517 lines
18 KiB
Python
517 lines
18 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Secret Scanner
|
|
|
|
Detects hardcoded secrets, API keys, and credentials in source code.
|
|
Identifies exposed secrets before they reach version control.
|
|
|
|
Usage:
|
|
python secret_scanner.py /path/to/project
|
|
python secret_scanner.py /path/to/file.py
|
|
python secret_scanner.py /path/to/project --format json
|
|
python secret_scanner.py --list-patterns
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional
|
|
from enum import Enum
|
|
|
|
|
|
class Severity(Enum):
|
|
CRITICAL = "critical"
|
|
HIGH = "high"
|
|
MEDIUM = "medium"
|
|
LOW = "low"
|
|
|
|
|
|
@dataclass
|
|
class SecretPattern:
|
|
pattern_id: str
|
|
name: str
|
|
description: str
|
|
regex: str
|
|
severity: Severity
|
|
file_extensions: List[str]
|
|
recommendation: str
|
|
|
|
|
|
@dataclass
|
|
class SecretFinding:
|
|
pattern_id: str
|
|
name: str
|
|
severity: Severity
|
|
file_path: str
|
|
line_number: int
|
|
matched_text: str
|
|
recommendation: str
|
|
|
|
|
|
# Secret patterns database
|
|
SECRET_PATTERNS = [
|
|
# Cloud Provider Keys
|
|
SecretPattern(
|
|
pattern_id="AWS001",
|
|
name="AWS Access Key ID",
|
|
description="AWS access key identifier",
|
|
regex=r'AKIA[0-9A-Z]{16}',
|
|
severity=Severity.CRITICAL,
|
|
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json", ".xml", ".conf"],
|
|
recommendation="Use IAM roles or AWS Secrets Manager instead of hardcoded keys"
|
|
),
|
|
SecretPattern(
|
|
pattern_id="AWS002",
|
|
name="AWS Secret Access Key",
|
|
description="AWS secret access key",
|
|
regex=r'(?:aws_secret_access_key|AWS_SECRET_ACCESS_KEY)\s*[:=]\s*["\']?[A-Za-z0-9/+=]{40}["\']?',
|
|
severity=Severity.CRITICAL,
|
|
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json", ".conf"],
|
|
recommendation="Use IAM roles or AWS Secrets Manager instead of hardcoded secrets"
|
|
),
|
|
SecretPattern(
|
|
pattern_id="GCP001",
|
|
name="Google Cloud API Key",
|
|
description="Google Cloud Platform API key",
|
|
regex=r'AIza[0-9A-Za-z\-_]{35}',
|
|
severity=Severity.CRITICAL,
|
|
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"],
|
|
recommendation="Use service accounts or Google Secret Manager"
|
|
),
|
|
SecretPattern(
|
|
pattern_id="AZURE001",
|
|
name="Azure Storage Key",
|
|
description="Azure storage account key",
|
|
regex=r'(?:AccountKey|account_key)\s*[:=]\s*["\']?[A-Za-z0-9+/=]{88}["\']?',
|
|
severity=Severity.CRITICAL,
|
|
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".cs", ".env", ".yml", ".yaml", ".json"],
|
|
recommendation="Use Azure Key Vault or managed identities"
|
|
),
|
|
|
|
# Authentication Tokens
|
|
SecretPattern(
|
|
pattern_id="JWT001",
|
|
name="JSON Web Token",
|
|
description="Hardcoded JWT token",
|
|
regex=r'eyJ[A-Za-z0-9-_=]+\.eyJ[A-Za-z0-9-_=]+\.[A-Za-z0-9-_.+/=]*',
|
|
severity=Severity.HIGH,
|
|
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".json"],
|
|
recommendation="Generate tokens dynamically, never hardcode"
|
|
),
|
|
SecretPattern(
|
|
pattern_id="GITHUB001",
|
|
name="GitHub Token",
|
|
description="GitHub personal access token or OAuth token",
|
|
regex=r'(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{36,255}',
|
|
severity=Severity.CRITICAL,
|
|
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"],
|
|
recommendation="Use GitHub App authentication or environment variables"
|
|
),
|
|
SecretPattern(
|
|
pattern_id="GITLAB001",
|
|
name="GitLab Token",
|
|
description="GitLab personal access or pipeline token",
|
|
regex=r'glpat-[A-Za-z0-9\-_]{20,}',
|
|
severity=Severity.CRITICAL,
|
|
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml"],
|
|
recommendation="Use CI/CD variables or environment variables"
|
|
),
|
|
SecretPattern(
|
|
pattern_id="SLACK001",
|
|
name="Slack Token",
|
|
description="Slack API token",
|
|
regex=r'xox[baprs]-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*',
|
|
severity=Severity.HIGH,
|
|
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"],
|
|
recommendation="Use environment variables or secrets manager"
|
|
),
|
|
SecretPattern(
|
|
pattern_id="STRIPE001",
|
|
name="Stripe API Key",
|
|
description="Stripe secret or publishable key",
|
|
regex=r'(?:sk|pk)_(?:test|live)_[0-9a-zA-Z]{24,}',
|
|
severity=Severity.CRITICAL,
|
|
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"],
|
|
recommendation="Use environment variables, never commit API keys"
|
|
),
|
|
SecretPattern(
|
|
pattern_id="TWILIO001",
|
|
name="Twilio API Key",
|
|
description="Twilio account SID or auth token",
|
|
regex=r'(?:AC[a-z0-9]{32}|SK[a-z0-9]{32})',
|
|
severity=Severity.HIGH,
|
|
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"],
|
|
recommendation="Use environment variables for Twilio credentials"
|
|
),
|
|
SecretPattern(
|
|
pattern_id="SENDGRID001",
|
|
name="SendGrid API Key",
|
|
description="SendGrid API key",
|
|
regex=r'SG\.[A-Za-z0-9_-]{22}\.[A-Za-z0-9_-]{43}',
|
|
severity=Severity.HIGH,
|
|
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"],
|
|
recommendation="Use environment variables for email service credentials"
|
|
),
|
|
|
|
# Cryptographic Keys
|
|
SecretPattern(
|
|
pattern_id="CRYPTO001",
|
|
name="RSA Private Key",
|
|
description="RSA private key in PEM format",
|
|
regex=r'-----BEGIN RSA PRIVATE KEY-----',
|
|
severity=Severity.CRITICAL,
|
|
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".pem", ".key", ".txt"],
|
|
recommendation="Store private keys in secure key management systems"
|
|
),
|
|
SecretPattern(
|
|
pattern_id="CRYPTO002",
|
|
name="EC Private Key",
|
|
description="Elliptic curve private key",
|
|
regex=r'-----BEGIN EC PRIVATE KEY-----',
|
|
severity=Severity.CRITICAL,
|
|
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".pem", ".key"],
|
|
recommendation="Use hardware security modules or key management services"
|
|
),
|
|
SecretPattern(
|
|
pattern_id="CRYPTO003",
|
|
name="OpenSSH Private Key",
|
|
description="OpenSSH private key",
|
|
regex=r'-----BEGIN OPENSSH PRIVATE KEY-----',
|
|
severity=Severity.CRITICAL,
|
|
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".pem", ".key", ".txt"],
|
|
recommendation="Never commit SSH keys to repositories"
|
|
),
|
|
SecretPattern(
|
|
pattern_id="CRYPTO004",
|
|
name="PGP Private Key",
|
|
description="PGP/GPG private key block",
|
|
regex=r'-----BEGIN PGP PRIVATE KEY BLOCK-----',
|
|
severity=Severity.CRITICAL,
|
|
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".asc", ".gpg", ".txt"],
|
|
recommendation="Store PGP keys in secure key rings, not source code"
|
|
),
|
|
|
|
# Generic Patterns
|
|
SecretPattern(
|
|
pattern_id="GEN001",
|
|
name="Generic API Key",
|
|
description="Generic API key or secret pattern",
|
|
regex=r'(?:api[_-]?key|apikey|api[_-]?secret)\s*[:=]\s*["\'][a-zA-Z0-9_\-]{20,}["\']',
|
|
severity=Severity.HIGH,
|
|
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json", ".xml"],
|
|
recommendation="Use environment variables or secrets manager"
|
|
),
|
|
SecretPattern(
|
|
pattern_id="GEN002",
|
|
name="Generic Secret",
|
|
description="Generic secret or token pattern",
|
|
regex=r'(?:secret|token|auth[_-]?token)\s*[:=]\s*["\'][a-zA-Z0-9_\-]{20,}["\']',
|
|
severity=Severity.HIGH,
|
|
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"],
|
|
recommendation="Store secrets in environment variables or secret managers"
|
|
),
|
|
SecretPattern(
|
|
pattern_id="GEN003",
|
|
name="Password in Config",
|
|
description="Password in configuration file",
|
|
regex=r'(?:password|passwd|pwd)\s*[:=]\s*["\'][^"\']{8,}["\']',
|
|
severity=Severity.CRITICAL,
|
|
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json", ".xml", ".conf", ".ini"],
|
|
recommendation="Never hardcode passwords. Use secret managers"
|
|
),
|
|
SecretPattern(
|
|
pattern_id="GEN004",
|
|
name="Database Connection String",
|
|
description="Database connection string with credentials",
|
|
regex=r'(?:mongodb|postgres|mysql|redis|amqp)://[^:]+:[^@]+@[^/]+',
|
|
severity=Severity.CRITICAL,
|
|
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"],
|
|
recommendation="Use environment variables for database credentials"
|
|
),
|
|
|
|
# Low Severity Patterns
|
|
SecretPattern(
|
|
pattern_id="LOW001",
|
|
name="TODO with Secret",
|
|
description="TODO comment mentioning secrets or credentials",
|
|
regex=r'(?:#|//|/\*)\s*(?:TODO|FIXME|XXX).*(?:secret|password|credential|key)',
|
|
severity=Severity.LOW,
|
|
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php"],
|
|
recommendation="Address security TODOs before deployment"
|
|
),
|
|
]
|
|
|
|
|
|
def scan_file(file_path: Path, patterns: List[SecretPattern]) -> List[SecretFinding]:
|
|
"""Scan a single file for secrets."""
|
|
findings = []
|
|
extension = file_path.suffix.lower()
|
|
|
|
try:
|
|
content = file_path.read_text(encoding='utf-8', errors='ignore')
|
|
lines = content.split('\n')
|
|
except Exception:
|
|
return findings
|
|
|
|
for pattern in patterns:
|
|
if extension not in pattern.file_extensions:
|
|
continue
|
|
|
|
try:
|
|
regex = re.compile(pattern.regex, re.IGNORECASE)
|
|
|
|
for i, line in enumerate(lines, 1):
|
|
# Skip comments that explain patterns (like in this file)
|
|
if 'regex' in line.lower() or 'pattern' in line.lower():
|
|
continue
|
|
|
|
match = regex.search(line)
|
|
if match:
|
|
# Mask the actual secret for safety
|
|
matched = match.group(0)
|
|
if len(matched) > 20:
|
|
masked = matched[:10] + "..." + matched[-5:]
|
|
else:
|
|
masked = matched[:5] + "..."
|
|
|
|
findings.append(SecretFinding(
|
|
pattern_id=pattern.pattern_id,
|
|
name=pattern.name,
|
|
severity=pattern.severity,
|
|
file_path=str(file_path),
|
|
line_number=i,
|
|
matched_text=masked,
|
|
recommendation=pattern.recommendation
|
|
))
|
|
except re.error:
|
|
continue
|
|
|
|
return findings
|
|
|
|
|
|
def scan_directory(dir_path: Path, patterns: List[SecretPattern],
|
|
exclude_dirs: List[str] = None) -> List[SecretFinding]:
|
|
"""Scan all files in a directory for secrets."""
|
|
if exclude_dirs is None:
|
|
exclude_dirs = [
|
|
"node_modules", ".git", "__pycache__", "venv", ".venv",
|
|
"dist", "build", ".next", "vendor", ".idea", ".vscode"
|
|
]
|
|
|
|
findings = []
|
|
extensions = set()
|
|
for pattern in patterns:
|
|
extensions.update(pattern.file_extensions)
|
|
|
|
for file_path in dir_path.rglob("*"):
|
|
if file_path.is_file():
|
|
# Check exclusions
|
|
if any(excluded in file_path.parts for excluded in exclude_dirs):
|
|
continue
|
|
|
|
# Skip binary files and large files
|
|
if file_path.stat().st_size > 1_000_000: # 1MB limit
|
|
continue
|
|
|
|
if file_path.suffix.lower() in extensions or file_path.name in ['.env', '.env.local', '.env.production']:
|
|
findings.extend(scan_file(file_path, patterns))
|
|
|
|
return sorted(findings, key=lambda f: (
|
|
0 if f.severity == Severity.CRITICAL else
|
|
1 if f.severity == Severity.HIGH else
|
|
2 if f.severity == Severity.MEDIUM else 3
|
|
))
|
|
|
|
|
|
def format_text_report(findings: List[SecretFinding], path: str) -> str:
|
|
"""Format findings as text report."""
|
|
lines = []
|
|
lines.append("=" * 70)
|
|
lines.append("SECRET SCAN REPORT")
|
|
lines.append("=" * 70)
|
|
lines.append(f"Target: {path}")
|
|
lines.append("")
|
|
|
|
# Summary
|
|
by_severity = {}
|
|
for finding in findings:
|
|
sev = finding.severity.value
|
|
by_severity[sev] = by_severity.get(sev, 0) + 1
|
|
|
|
lines.append("SUMMARY:")
|
|
lines.append(f" Total Secrets Found: {len(findings)}")
|
|
for sev in ["critical", "high", "medium", "low"]:
|
|
count = by_severity.get(sev, 0)
|
|
if count > 0:
|
|
lines.append(f" {sev.upper()}: {count}")
|
|
lines.append("")
|
|
|
|
if not findings:
|
|
lines.append("No secrets found!")
|
|
lines.append("=" * 70)
|
|
return "\n".join(lines)
|
|
|
|
# Group by severity
|
|
current_severity = None
|
|
for finding in findings:
|
|
if finding.severity != current_severity:
|
|
current_severity = finding.severity
|
|
lines.append("-" * 70)
|
|
lines.append(f"[{current_severity.value.upper()}]")
|
|
lines.append("-" * 70)
|
|
|
|
lines.append("")
|
|
lines.append(f" [{finding.pattern_id}] {finding.name}")
|
|
lines.append(f" File: {finding.file_path}:{finding.line_number}")
|
|
lines.append(f" Match: {finding.matched_text}")
|
|
lines.append(f" Fix: {finding.recommendation}")
|
|
|
|
lines.append("")
|
|
lines.append("=" * 70)
|
|
lines.append("IMPORTANT: Review all findings and rotate exposed credentials!")
|
|
lines.append("=" * 70)
|
|
return "\n".join(lines)
|
|
|
|
|
|
def format_json_report(findings: List[SecretFinding], path: str) -> Dict:
|
|
"""Format findings as JSON."""
|
|
return {
|
|
"target": path,
|
|
"scan_date": __import__('datetime').datetime.now().isoformat(),
|
|
"summary": {
|
|
"total": len(findings),
|
|
"by_severity": {
|
|
sev.value: sum(1 for f in findings if f.severity == sev)
|
|
for sev in Severity
|
|
}
|
|
},
|
|
"findings": [
|
|
{
|
|
"pattern_id": f.pattern_id,
|
|
"name": f.name,
|
|
"severity": f.severity.value,
|
|
"file_path": f.file_path,
|
|
"line_number": f.line_number,
|
|
"matched_text": f.matched_text,
|
|
"recommendation": f.recommendation
|
|
}
|
|
for f in findings
|
|
]
|
|
}
|
|
|
|
|
|
def list_patterns():
|
|
"""List all secret patterns."""
|
|
print("\n" + "=" * 60)
|
|
print("SECRET DETECTION PATTERNS")
|
|
print("=" * 60)
|
|
|
|
for pattern in sorted(SECRET_PATTERNS, key=lambda p: p.pattern_id):
|
|
print(f"\n[{pattern.pattern_id}] {pattern.name}")
|
|
print(f" Severity: {pattern.severity.value.upper()}")
|
|
print(f" Description: {pattern.description}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Secret Scanner - Detect hardcoded secrets in code",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# Scan a project directory
|
|
python secret_scanner.py /path/to/project
|
|
|
|
# Scan a single file
|
|
python secret_scanner.py /path/to/config.py
|
|
|
|
# Output as JSON
|
|
python secret_scanner.py /path/to/project --format json
|
|
|
|
# List all detection patterns
|
|
python secret_scanner.py --list-patterns
|
|
|
|
# Save report to file
|
|
python secret_scanner.py /path/to/project --output report.txt
|
|
"""
|
|
)
|
|
|
|
parser.add_argument(
|
|
"path",
|
|
nargs="?",
|
|
help="Path to scan (file or directory)"
|
|
)
|
|
parser.add_argument(
|
|
"--format", "-f",
|
|
choices=["text", "json"],
|
|
default="text",
|
|
help="Output format (default: text)"
|
|
)
|
|
parser.add_argument(
|
|
"--output", "-o",
|
|
help="Output file path"
|
|
)
|
|
parser.add_argument(
|
|
"--list-patterns", "-l",
|
|
action="store_true",
|
|
help="List all detection patterns"
|
|
)
|
|
parser.add_argument(
|
|
"--severity", "-s",
|
|
choices=["critical", "high", "medium", "low"],
|
|
help="Minimum severity to report"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.list_patterns:
|
|
list_patterns()
|
|
return
|
|
|
|
if not args.path:
|
|
parser.error("path is required (or use --list-patterns)")
|
|
|
|
path = Path(args.path)
|
|
if not path.exists():
|
|
print(f"Error: Path does not exist: {path}")
|
|
sys.exit(1)
|
|
|
|
# Filter patterns by severity
|
|
patterns = SECRET_PATTERNS
|
|
if args.severity:
|
|
severity_order = ["critical", "high", "medium", "low"]
|
|
min_index = severity_order.index(args.severity)
|
|
allowed = set(Severity(s) for s in severity_order[:min_index + 1])
|
|
patterns = [p for p in patterns if p.severity in allowed]
|
|
|
|
# Scan
|
|
if path.is_file():
|
|
findings = scan_file(path, patterns)
|
|
else:
|
|
findings = scan_directory(path, patterns)
|
|
|
|
# Format output
|
|
if args.format == "json":
|
|
output = json.dumps(format_json_report(findings, str(path)), indent=2)
|
|
else:
|
|
output = format_text_report(findings, str(path))
|
|
|
|
# Write output
|
|
if args.output:
|
|
with open(args.output, 'w') as f:
|
|
f.write(output)
|
|
print(f"Report written to {args.output}")
|
|
else:
|
|
print(output)
|
|
|
|
# Exit code based on findings
|
|
if any(f.severity in (Severity.CRITICAL, Severity.HIGH) for f in findings):
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|