Files

146 lines
4.4 KiB
Python
Executable File

#!/usr/bin/env python3
"""Scan env files and source code for likely secret exposure patterns."""
from __future__ import annotations
import argparse
import json
import os
import re
from pathlib import Path
from typing import Dict, Iterable, List
IGNORED_DIRS = {
".git",
"node_modules",
".next",
"dist",
"build",
"coverage",
"venv",
".venv",
"__pycache__",
}
SOURCE_EXTS = {
".env",
".py",
".ts",
".tsx",
".js",
".jsx",
".json",
".yaml",
".yml",
".toml",
".ini",
".sh",
".md",
}
PATTERNS = [
("critical", "openai_key", re.compile(r"\bsk-[A-Za-z0-9]{20,}\b")),
("critical", "github_pat", re.compile(r"\bghp_[A-Za-z0-9]{20,}\b")),
("critical", "aws_access_key_id", re.compile(r"\bAKIA[0-9A-Z]{16}\b")),
("high", "slack_token", re.compile(r"\bxox[baprs]-[A-Za-z0-9-]{10,}\b")),
("high", "private_key_block", re.compile(r"-----BEGIN (RSA |EC |OPENSSH )?PRIVATE KEY-----")),
("high", "generic_secret_assignment", re.compile(r"(?i)\b(secret|token|password|passwd|api[_-]?key)\b\s*[:=]\s*['\"]?[A-Za-z0-9_\-\/.+=]{8,}")),
("medium", "jwt_like", re.compile(r"\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b")),
]
def iter_files(root: Path) -> Iterable[Path]:
for dirpath, dirnames, filenames in os.walk(root):
dirnames[:] = [d for d in dirnames if d not in IGNORED_DIRS]
for name in filenames:
p = Path(dirpath) / name
if p.is_file():
yield p
def is_candidate(path: Path) -> bool:
if path.name.startswith(".env"):
return True
return path.suffix.lower() in SOURCE_EXTS
def scan_file(path: Path, max_bytes: int, root: Path) -> List[Dict[str, object]]:
findings: List[Dict[str, object]] = []
try:
if path.stat().st_size > max_bytes:
return findings
text = path.read_text(encoding="utf-8", errors="ignore")
except Exception:
return findings
for lineno, line in enumerate(text.splitlines(), start=1):
for severity, kind, pattern in PATTERNS:
if pattern.search(line):
findings.append(
{
"severity": severity,
"pattern": kind,
"file": str(path.relative_to(root)),
"line": lineno,
"snippet": line.strip()[:180],
}
)
return findings
def severity_counts(findings: List[Dict[str, object]]) -> Dict[str, int]:
counts = {"critical": 0, "high": 0, "medium": 0, "low": 0}
for item in findings:
sev = str(item.get("severity", "low"))
counts[sev] = counts.get(sev, 0) + 1
return counts
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Audit a repository for likely secret leaks in env files and source.")
parser.add_argument("path", help="Path to repository root")
parser.add_argument("--max-file-size-kb", type=int, default=512, help="Skip files larger than this size (default: 512)")
parser.add_argument("--json", action="store_true", help="Output JSON")
return parser.parse_args()
def main() -> int:
args = parse_args()
root = Path(args.path).expanduser().resolve()
if not root.exists() or not root.is_dir():
raise SystemExit(f"Path is not a directory: {root}")
max_bytes = max(1, args.max_file_size_kb) * 1024
findings: List[Dict[str, object]] = []
for file_path in iter_files(root):
if is_candidate(file_path):
findings.extend(scan_file(file_path, max_bytes=max_bytes, root=root))
report = {
"root": str(root),
"total_findings": len(findings),
"severity_counts": severity_counts(findings),
"findings": findings,
}
if args.json:
print(json.dumps(report, indent=2))
else:
print("Env/Secrets Audit Report")
print(f"Root: {report['root']}")
print(f"Total findings: {report['total_findings']}")
print("Severity:")
for sev, count in report["severity_counts"].items():
print(f"- {sev}: {count}")
print("")
for item in findings[:200]:
print(f"[{item['severity'].upper()}] {item['file']}:{item['line']} ({item['pattern']})")
print(f" {item['snippet']}")
return 0
if __name__ == "__main__":
raise SystemExit(main())