#!/usr/bin/env python3 """ Secret Scanner Detects hardcoded secrets, API keys, and credentials in source code. Identifies exposed secrets before they reach version control. Usage: python secret_scanner.py /path/to/project python secret_scanner.py /path/to/file.py python secret_scanner.py /path/to/project --format json python secret_scanner.py --list-patterns """ import argparse import json import os import re import sys from dataclasses import dataclass from pathlib import Path from typing import Dict, List, Optional from enum import Enum class Severity(Enum): CRITICAL = "critical" HIGH = "high" MEDIUM = "medium" LOW = "low" @dataclass class SecretPattern: pattern_id: str name: str description: str regex: str severity: Severity file_extensions: List[str] recommendation: str @dataclass class SecretFinding: pattern_id: str name: str severity: Severity file_path: str line_number: int matched_text: str recommendation: str # Secret patterns database SECRET_PATTERNS = [ # Cloud Provider Keys SecretPattern( pattern_id="AWS001", name="AWS Access Key ID", description="AWS access key identifier", regex=r'AKIA[0-9A-Z]{16}', severity=Severity.CRITICAL, file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json", ".xml", ".conf"], recommendation="Use IAM roles or AWS Secrets Manager instead of hardcoded keys" ), SecretPattern( pattern_id="AWS002", name="AWS Secret Access Key", description="AWS secret access key", regex=r'(?:aws_secret_access_key|AWS_SECRET_ACCESS_KEY)\s*[:=]\s*["\']?[A-Za-z0-9/+=]{40}["\']?', severity=Severity.CRITICAL, file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json", ".conf"], recommendation="Use IAM roles or AWS Secrets Manager instead of hardcoded secrets" ), SecretPattern( pattern_id="GCP001", name="Google Cloud API Key", description="Google Cloud Platform API key", regex=r'AIza[0-9A-Za-z\-_]{35}', severity=Severity.CRITICAL, file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"], recommendation="Use service accounts or Google Secret Manager" ), SecretPattern( pattern_id="AZURE001", name="Azure Storage Key", description="Azure storage account key", regex=r'(?:AccountKey|account_key)\s*[:=]\s*["\']?[A-Za-z0-9+/=]{88}["\']?', severity=Severity.CRITICAL, file_extensions=[".py", ".js", ".ts", ".java", ".go", ".cs", ".env", ".yml", ".yaml", ".json"], recommendation="Use Azure Key Vault or managed identities" ), # Authentication Tokens SecretPattern( pattern_id="JWT001", name="JSON Web Token", description="Hardcoded JWT token", regex=r'eyJ[A-Za-z0-9-_=]+\.eyJ[A-Za-z0-9-_=]+\.[A-Za-z0-9-_.+/=]*', severity=Severity.HIGH, file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".json"], recommendation="Generate tokens dynamically, never hardcode" ), SecretPattern( pattern_id="GITHUB001", name="GitHub Token", description="GitHub personal access token or OAuth token", regex=r'(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{36,255}', severity=Severity.CRITICAL, file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"], recommendation="Use GitHub App authentication or environment variables" ), SecretPattern( pattern_id="GITLAB001", name="GitLab Token", description="GitLab personal access or pipeline token", regex=r'glpat-[A-Za-z0-9\-_]{20,}', severity=Severity.CRITICAL, file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml"], recommendation="Use CI/CD variables or environment variables" ), SecretPattern( pattern_id="SLACK001", name="Slack Token", description="Slack API token", regex=r'xox[baprs]-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*', severity=Severity.HIGH, file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"], recommendation="Use environment variables or secrets manager" ), SecretPattern( pattern_id="STRIPE001", name="Stripe API Key", description="Stripe secret or publishable key", regex=r'(?:sk|pk)_(?:test|live)_[0-9a-zA-Z]{24,}', severity=Severity.CRITICAL, file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"], recommendation="Use environment variables, never commit API keys" ), SecretPattern( pattern_id="TWILIO001", name="Twilio API Key", description="Twilio account SID or auth token", regex=r'(?:AC[a-z0-9]{32}|SK[a-z0-9]{32})', severity=Severity.HIGH, file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"], recommendation="Use environment variables for Twilio credentials" ), SecretPattern( pattern_id="SENDGRID001", name="SendGrid API Key", description="SendGrid API key", regex=r'SG\.[A-Za-z0-9_-]{22}\.[A-Za-z0-9_-]{43}', severity=Severity.HIGH, file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"], recommendation="Use environment variables for email service credentials" ), # Cryptographic Keys SecretPattern( pattern_id="CRYPTO001", name="RSA Private Key", description="RSA private key in PEM format", regex=r'-----BEGIN RSA PRIVATE KEY-----', severity=Severity.CRITICAL, file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".pem", ".key", ".txt"], recommendation="Store private keys in secure key management systems" ), SecretPattern( pattern_id="CRYPTO002", name="EC Private Key", description="Elliptic curve private key", regex=r'-----BEGIN EC PRIVATE KEY-----', severity=Severity.CRITICAL, file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".pem", ".key"], recommendation="Use hardware security modules or key management services" ), SecretPattern( pattern_id="CRYPTO003", name="OpenSSH Private Key", description="OpenSSH private key", regex=r'-----BEGIN OPENSSH PRIVATE KEY-----', severity=Severity.CRITICAL, file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".pem", ".key", ".txt"], recommendation="Never commit SSH keys to repositories" ), SecretPattern( pattern_id="CRYPTO004", name="PGP Private Key", description="PGP/GPG private key block", regex=r'-----BEGIN PGP PRIVATE KEY BLOCK-----', severity=Severity.CRITICAL, file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".asc", ".gpg", ".txt"], recommendation="Store PGP keys in secure key rings, not source code" ), # Generic Patterns SecretPattern( pattern_id="GEN001", name="Generic API Key", description="Generic API key or secret pattern", regex=r'(?:api[_-]?key|apikey|api[_-]?secret)\s*[:=]\s*["\'][a-zA-Z0-9_\-]{20,}["\']', severity=Severity.HIGH, file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json", ".xml"], recommendation="Use environment variables or secrets manager" ), SecretPattern( pattern_id="GEN002", name="Generic Secret", description="Generic secret or token pattern", regex=r'(?:secret|token|auth[_-]?token)\s*[:=]\s*["\'][a-zA-Z0-9_\-]{20,}["\']', severity=Severity.HIGH, file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"], recommendation="Store secrets in environment variables or secret managers" ), SecretPattern( pattern_id="GEN003", name="Password in Config", description="Password in configuration file", regex=r'(?:password|passwd|pwd)\s*[:=]\s*["\'][^"\']{8,}["\']', severity=Severity.CRITICAL, file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json", ".xml", ".conf", ".ini"], recommendation="Never hardcode passwords. Use secret managers" ), SecretPattern( pattern_id="GEN004", name="Database Connection String", description="Database connection string with credentials", regex=r'(?:mongodb|postgres|mysql|redis|amqp)://[^:]+:[^@]+@[^/]+', severity=Severity.CRITICAL, file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"], recommendation="Use environment variables for database credentials" ), # Low Severity Patterns SecretPattern( pattern_id="LOW001", name="TODO with Secret", description="TODO comment mentioning secrets or credentials", regex=r'(?:#|//|/\*)\s*(?:TODO|FIXME|XXX).*(?:secret|password|credential|key)', severity=Severity.LOW, file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php"], recommendation="Address security TODOs before deployment" ), ] def scan_file(file_path: Path, patterns: List[SecretPattern]) -> List[SecretFinding]: """Scan a single file for secrets.""" findings = [] extension = file_path.suffix.lower() try: content = file_path.read_text(encoding='utf-8', errors='ignore') lines = content.split('\n') except Exception: return findings for pattern in patterns: if extension not in pattern.file_extensions: continue try: regex = re.compile(pattern.regex, re.IGNORECASE) for i, line in enumerate(lines, 1): # Skip comments that explain patterns (like in this file) if 'regex' in line.lower() or 'pattern' in line.lower(): continue match = regex.search(line) if match: # Mask the actual secret for safety matched = match.group(0) if len(matched) > 20: masked = matched[:10] + "..." + matched[-5:] else: masked = matched[:5] + "..." findings.append(SecretFinding( pattern_id=pattern.pattern_id, name=pattern.name, severity=pattern.severity, file_path=str(file_path), line_number=i, matched_text=masked, recommendation=pattern.recommendation )) except re.error: continue return findings def scan_directory(dir_path: Path, patterns: List[SecretPattern], exclude_dirs: List[str] = None) -> List[SecretFinding]: """Scan all files in a directory for secrets.""" if exclude_dirs is None: exclude_dirs = [ "node_modules", ".git", "__pycache__", "venv", ".venv", "dist", "build", ".next", "vendor", ".idea", ".vscode" ] findings = [] extensions = set() for pattern in patterns: extensions.update(pattern.file_extensions) for file_path in dir_path.rglob("*"): if file_path.is_file(): # Check exclusions if any(excluded in file_path.parts for excluded in exclude_dirs): continue # Skip binary files and large files if file_path.stat().st_size > 1_000_000: # 1MB limit continue if file_path.suffix.lower() in extensions or file_path.name in ['.env', '.env.local', '.env.production']: findings.extend(scan_file(file_path, patterns)) return sorted(findings, key=lambda f: ( 0 if f.severity == Severity.CRITICAL else 1 if f.severity == Severity.HIGH else 2 if f.severity == Severity.MEDIUM else 3 )) def format_text_report(findings: List[SecretFinding], path: str) -> str: """Format findings as text report.""" lines = [] lines.append("=" * 70) lines.append("SECRET SCAN REPORT") lines.append("=" * 70) lines.append(f"Target: {path}") lines.append("") # Summary by_severity = {} for finding in findings: sev = finding.severity.value by_severity[sev] = by_severity.get(sev, 0) + 1 lines.append("SUMMARY:") lines.append(f" Total Secrets Found: {len(findings)}") for sev in ["critical", "high", "medium", "low"]: count = by_severity.get(sev, 0) if count > 0: lines.append(f" {sev.upper()}: {count}") lines.append("") if not findings: lines.append("No secrets found!") lines.append("=" * 70) return "\n".join(lines) # Group by severity current_severity = None for finding in findings: if finding.severity != current_severity: current_severity = finding.severity lines.append("-" * 70) lines.append(f"[{current_severity.value.upper()}]") lines.append("-" * 70) lines.append("") lines.append(f" [{finding.pattern_id}] {finding.name}") lines.append(f" File: {finding.file_path}:{finding.line_number}") lines.append(f" Match: {finding.matched_text}") lines.append(f" Fix: {finding.recommendation}") lines.append("") lines.append("=" * 70) lines.append("IMPORTANT: Review all findings and rotate exposed credentials!") lines.append("=" * 70) return "\n".join(lines) def format_json_report(findings: List[SecretFinding], path: str) -> Dict: """Format findings as JSON.""" return { "target": path, "scan_date": __import__('datetime').datetime.now().isoformat(), "summary": { "total": len(findings), "by_severity": { sev.value: sum(1 for f in findings if f.severity == sev) for sev in Severity } }, "findings": [ { "pattern_id": f.pattern_id, "name": f.name, "severity": f.severity.value, "file_path": f.file_path, "line_number": f.line_number, "matched_text": f.matched_text, "recommendation": f.recommendation } for f in findings ] } def list_patterns(): """List all secret patterns.""" print("\n" + "=" * 60) print("SECRET DETECTION PATTERNS") print("=" * 60) for pattern in sorted(SECRET_PATTERNS, key=lambda p: p.pattern_id): print(f"\n[{pattern.pattern_id}] {pattern.name}") print(f" Severity: {pattern.severity.value.upper()}") print(f" Description: {pattern.description}") def main(): parser = argparse.ArgumentParser( description="Secret Scanner - Detect hardcoded secrets in code", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Scan a project directory python secret_scanner.py /path/to/project # Scan a single file python secret_scanner.py /path/to/config.py # Output as JSON python secret_scanner.py /path/to/project --format json # List all detection patterns python secret_scanner.py --list-patterns # Save report to file python secret_scanner.py /path/to/project --output report.txt """ ) parser.add_argument( "path", nargs="?", help="Path to scan (file or directory)" ) parser.add_argument( "--format", "-f", choices=["text", "json"], default="text", help="Output format (default: text)" ) parser.add_argument( "--output", "-o", help="Output file path" ) parser.add_argument( "--list-patterns", "-l", action="store_true", help="List all detection patterns" ) parser.add_argument( "--severity", "-s", choices=["critical", "high", "medium", "low"], help="Minimum severity to report" ) args = parser.parse_args() if args.list_patterns: list_patterns() return if not args.path: parser.error("path is required (or use --list-patterns)") path = Path(args.path) if not path.exists(): print(f"Error: Path does not exist: {path}") sys.exit(1) # Filter patterns by severity patterns = SECRET_PATTERNS if args.severity: severity_order = ["critical", "high", "medium", "low"] min_index = severity_order.index(args.severity) allowed = set(Severity(s) for s in severity_order[:min_index + 1]) patterns = [p for p in patterns if p.severity in allowed] # Scan if path.is_file(): findings = scan_file(path, patterns) else: findings = scan_directory(path, patterns) # Format output if args.format == "json": output = json.dumps(format_json_report(findings, str(path)), indent=2) else: output = format_text_report(findings, str(path)) # Write output if args.output: with open(args.output, 'w') as f: f.write(output) print(f"Report written to {args.output}") else: print(output) # Exit code based on findings if any(f.severity in (Severity.CRITICAL, Severity.HIGH) for f in findings): sys.exit(1) if __name__ == "__main__": main()