#!/usr/bin/env python3 """Scan env files and source code for likely secret exposure patterns.""" from __future__ import annotations import argparse import json import os import re from pathlib import Path from typing import Dict, Iterable, List IGNORED_DIRS = { ".git", "node_modules", ".next", "dist", "build", "coverage", "venv", ".venv", "__pycache__", } SOURCE_EXTS = { ".env", ".py", ".ts", ".tsx", ".js", ".jsx", ".json", ".yaml", ".yml", ".toml", ".ini", ".sh", ".md", } PATTERNS = [ ("critical", "openai_key", re.compile(r"\bsk-[A-Za-z0-9]{20,}\b")), ("critical", "github_pat", re.compile(r"\bghp_[A-Za-z0-9]{20,}\b")), ("critical", "aws_access_key_id", re.compile(r"\bAKIA[0-9A-Z]{16}\b")), ("high", "slack_token", re.compile(r"\bxox[baprs]-[A-Za-z0-9-]{10,}\b")), ("high", "private_key_block", re.compile(r"-----BEGIN (RSA |EC |OPENSSH )?PRIVATE KEY-----")), ("high", "generic_secret_assignment", re.compile(r"(?i)\b(secret|token|password|passwd|api[_-]?key)\b\s*[:=]\s*['\"]?[A-Za-z0-9_\-\/.+=]{8,}")), ("medium", "jwt_like", re.compile(r"\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b")), ] def iter_files(root: Path) -> Iterable[Path]: for dirpath, dirnames, filenames in os.walk(root): dirnames[:] = [d for d in dirnames if d not in IGNORED_DIRS] for name in filenames: p = Path(dirpath) / name if p.is_file(): yield p def is_candidate(path: Path) -> bool: if path.name.startswith(".env"): return True return path.suffix.lower() in SOURCE_EXTS def scan_file(path: Path, max_bytes: int, root: Path) -> List[Dict[str, object]]: findings: List[Dict[str, object]] = [] try: if path.stat().st_size > max_bytes: return findings text = path.read_text(encoding="utf-8", errors="ignore") except Exception: return findings for lineno, line in enumerate(text.splitlines(), start=1): for severity, kind, pattern in PATTERNS: if pattern.search(line): findings.append( { "severity": severity, "pattern": kind, "file": str(path.relative_to(root)), "line": lineno, "snippet": line.strip()[:180], } ) return findings def severity_counts(findings: List[Dict[str, object]]) -> Dict[str, int]: counts = {"critical": 0, "high": 0, "medium": 0, "low": 0} for item in findings: sev = str(item.get("severity", "low")) counts[sev] = counts.get(sev, 0) + 1 return counts def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Audit a repository for likely secret leaks in env files and source.") parser.add_argument("path", help="Path to repository root") parser.add_argument("--max-file-size-kb", type=int, default=512, help="Skip files larger than this size (default: 512)") parser.add_argument("--json", action="store_true", help="Output JSON") return parser.parse_args() def main() -> int: args = parse_args() root = Path(args.path).expanduser().resolve() if not root.exists() or not root.is_dir(): raise SystemExit(f"Path is not a directory: {root}") max_bytes = max(1, args.max_file_size_kb) * 1024 findings: List[Dict[str, object]] = [] for file_path in iter_files(root): if is_candidate(file_path): findings.extend(scan_file(file_path, max_bytes=max_bytes, root=root)) report = { "root": str(root), "total_findings": len(findings), "severity_counts": severity_counts(findings), "findings": findings, } if args.json: print(json.dumps(report, indent=2)) else: print("Env/Secrets Audit Report") print(f"Root: {report['root']}") print(f"Total findings: {report['total_findings']}") print("Severity:") for sev, count in report["severity_counts"].items(): print(f"- {sev}: {count}") print("") for item in findings[:200]: print(f"[{item['severity'].upper()}] {item['file']}:{item['line']} ({item['pattern']})") print(f" {item['snippet']}") return 0 if __name__ == "__main__": raise SystemExit(main())