Files
claude-skills-reference/engineering-team/senior-security/scripts/secret_scanner.py
Alireza Rezvani 5e1f6955e8 fix(skill): rewrite senior-security with real security engineering content (#87) (#168)
PROBLEM: Issue #87 feedback - senior-security scored 40/100
- Placeholder reference files with template content only
- Generic scripts without actual security functionality
- Missing trigger phrases, no TOC, no validation workflows

SOLUTION: Complete rewrite with comprehensive security content

SKILL.md (210→436 lines):
- Added 12 triggers: security architecture, threat modeling, STRIDE analysis,
  penetration testing, vulnerability assessment, secure coding, OWASP, etc.
- Added TOC with 7 sections
- 5 numbered workflows with validation checkpoints:
  1. Threat Modeling (STRIDE methodology)
  2. Security Architecture (Zero Trust, Defense-in-Depth)
  3. Vulnerability Assessment (OWASP Top 10 mapping)
  4. Secure Code Review (checklist with risk categories)
  5. Incident Response (severity levels, response phases)
- Added STRIDE per element matrix, DREAD scoring, severity matrices
- Authentication pattern selection guide
- Security tools reference (SAST, DAST, dependency scanning)
- Cryptographic algorithm selection guide
- Compliance frameworks reference (OWASP ASVS, CIS, NIST, PCI-DSS)
- Security headers checklist

References (rebuilt from scratch):
- security-architecture-patterns.md (~615 lines): Zero Trust implementation,
  Defense-in-Depth layers, OAuth 2.0 + PKCE flows, JWT patterns, API security
- threat-modeling-guide.md (~518 lines): STRIDE framework with element matrix,
  attack trees with probability calculations, DREAD scoring, DFD creation
- cryptography-implementation.md (~775 lines): AES-256-GCM, ChaCha20-Poly1305,
  envelope encryption, RSA, Ed25519 signatures, X25519 key exchange, Argon2id
  password hashing, key management strategies

Scripts (rebuilt with real functionality):
- threat_modeler.py (~675 lines): Interactive STRIDE analysis for any system
  component, DREAD risk scoring, comprehensive threat database with 70+ threats,
  mitigation recommendations, JSON/text output
- secret_scanner.py (~516 lines): Detects AWS/GCP/Azure credentials, GitHub/Slack/
  Stripe tokens, private keys (RSA/EC/OpenSSH/PGP), generic API keys, database
  connection strings, 20+ secret patterns, CI/CD integration ready

Deleted placeholder files:
- references/cryptography_implementation.md (template)
- references/penetration_testing_guide.md (template)
- references/security_architecture_patterns.md (template)
- scripts/pentest_automator.py (placeholder)
- scripts/security_auditor.py (placeholder)

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 13:31:54 +01:00

517 lines
18 KiB
Python

#!/usr/bin/env python3
"""
Secret Scanner
Detects hardcoded secrets, API keys, and credentials in source code.
Identifies exposed secrets before they reach version control.
Usage:
python secret_scanner.py /path/to/project
python secret_scanner.py /path/to/file.py
python secret_scanner.py /path/to/project --format json
python secret_scanner.py --list-patterns
"""
import argparse
import json
import os
import re
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional
from enum import Enum
class Severity(Enum):
CRITICAL = "critical"
HIGH = "high"
MEDIUM = "medium"
LOW = "low"
@dataclass
class SecretPattern:
pattern_id: str
name: str
description: str
regex: str
severity: Severity
file_extensions: List[str]
recommendation: str
@dataclass
class SecretFinding:
pattern_id: str
name: str
severity: Severity
file_path: str
line_number: int
matched_text: str
recommendation: str
# Secret patterns database
SECRET_PATTERNS = [
# Cloud Provider Keys
SecretPattern(
pattern_id="AWS001",
name="AWS Access Key ID",
description="AWS access key identifier",
regex=r'AKIA[0-9A-Z]{16}',
severity=Severity.CRITICAL,
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json", ".xml", ".conf"],
recommendation="Use IAM roles or AWS Secrets Manager instead of hardcoded keys"
),
SecretPattern(
pattern_id="AWS002",
name="AWS Secret Access Key",
description="AWS secret access key",
regex=r'(?:aws_secret_access_key|AWS_SECRET_ACCESS_KEY)\s*[:=]\s*["\']?[A-Za-z0-9/+=]{40}["\']?',
severity=Severity.CRITICAL,
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json", ".conf"],
recommendation="Use IAM roles or AWS Secrets Manager instead of hardcoded secrets"
),
SecretPattern(
pattern_id="GCP001",
name="Google Cloud API Key",
description="Google Cloud Platform API key",
regex=r'AIza[0-9A-Za-z\-_]{35}',
severity=Severity.CRITICAL,
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"],
recommendation="Use service accounts or Google Secret Manager"
),
SecretPattern(
pattern_id="AZURE001",
name="Azure Storage Key",
description="Azure storage account key",
regex=r'(?:AccountKey|account_key)\s*[:=]\s*["\']?[A-Za-z0-9+/=]{88}["\']?',
severity=Severity.CRITICAL,
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".cs", ".env", ".yml", ".yaml", ".json"],
recommendation="Use Azure Key Vault or managed identities"
),
# Authentication Tokens
SecretPattern(
pattern_id="JWT001",
name="JSON Web Token",
description="Hardcoded JWT token",
regex=r'eyJ[A-Za-z0-9-_=]+\.eyJ[A-Za-z0-9-_=]+\.[A-Za-z0-9-_.+/=]*',
severity=Severity.HIGH,
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".json"],
recommendation="Generate tokens dynamically, never hardcode"
),
SecretPattern(
pattern_id="GITHUB001",
name="GitHub Token",
description="GitHub personal access token or OAuth token",
regex=r'(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{36,255}',
severity=Severity.CRITICAL,
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"],
recommendation="Use GitHub App authentication or environment variables"
),
SecretPattern(
pattern_id="GITLAB001",
name="GitLab Token",
description="GitLab personal access or pipeline token",
regex=r'glpat-[A-Za-z0-9\-_]{20,}',
severity=Severity.CRITICAL,
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml"],
recommendation="Use CI/CD variables or environment variables"
),
SecretPattern(
pattern_id="SLACK001",
name="Slack Token",
description="Slack API token",
regex=r'xox[baprs]-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*',
severity=Severity.HIGH,
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"],
recommendation="Use environment variables or secrets manager"
),
SecretPattern(
pattern_id="STRIPE001",
name="Stripe API Key",
description="Stripe secret or publishable key",
regex=r'(?:sk|pk)_(?:test|live)_[0-9a-zA-Z]{24,}',
severity=Severity.CRITICAL,
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"],
recommendation="Use environment variables, never commit API keys"
),
SecretPattern(
pattern_id="TWILIO001",
name="Twilio API Key",
description="Twilio account SID or auth token",
regex=r'(?:AC[a-z0-9]{32}|SK[a-z0-9]{32})',
severity=Severity.HIGH,
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"],
recommendation="Use environment variables for Twilio credentials"
),
SecretPattern(
pattern_id="SENDGRID001",
name="SendGrid API Key",
description="SendGrid API key",
regex=r'SG\.[A-Za-z0-9_-]{22}\.[A-Za-z0-9_-]{43}',
severity=Severity.HIGH,
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"],
recommendation="Use environment variables for email service credentials"
),
# Cryptographic Keys
SecretPattern(
pattern_id="CRYPTO001",
name="RSA Private Key",
description="RSA private key in PEM format",
regex=r'-----BEGIN RSA PRIVATE KEY-----',
severity=Severity.CRITICAL,
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".pem", ".key", ".txt"],
recommendation="Store private keys in secure key management systems"
),
SecretPattern(
pattern_id="CRYPTO002",
name="EC Private Key",
description="Elliptic curve private key",
regex=r'-----BEGIN EC PRIVATE KEY-----',
severity=Severity.CRITICAL,
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".pem", ".key"],
recommendation="Use hardware security modules or key management services"
),
SecretPattern(
pattern_id="CRYPTO003",
name="OpenSSH Private Key",
description="OpenSSH private key",
regex=r'-----BEGIN OPENSSH PRIVATE KEY-----',
severity=Severity.CRITICAL,
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".pem", ".key", ".txt"],
recommendation="Never commit SSH keys to repositories"
),
SecretPattern(
pattern_id="CRYPTO004",
name="PGP Private Key",
description="PGP/GPG private key block",
regex=r'-----BEGIN PGP PRIVATE KEY BLOCK-----',
severity=Severity.CRITICAL,
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".asc", ".gpg", ".txt"],
recommendation="Store PGP keys in secure key rings, not source code"
),
# Generic Patterns
SecretPattern(
pattern_id="GEN001",
name="Generic API Key",
description="Generic API key or secret pattern",
regex=r'(?:api[_-]?key|apikey|api[_-]?secret)\s*[:=]\s*["\'][a-zA-Z0-9_\-]{20,}["\']',
severity=Severity.HIGH,
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json", ".xml"],
recommendation="Use environment variables or secrets manager"
),
SecretPattern(
pattern_id="GEN002",
name="Generic Secret",
description="Generic secret or token pattern",
regex=r'(?:secret|token|auth[_-]?token)\s*[:=]\s*["\'][a-zA-Z0-9_\-]{20,}["\']',
severity=Severity.HIGH,
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"],
recommendation="Store secrets in environment variables or secret managers"
),
SecretPattern(
pattern_id="GEN003",
name="Password in Config",
description="Password in configuration file",
regex=r'(?:password|passwd|pwd)\s*[:=]\s*["\'][^"\']{8,}["\']',
severity=Severity.CRITICAL,
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json", ".xml", ".conf", ".ini"],
recommendation="Never hardcode passwords. Use secret managers"
),
SecretPattern(
pattern_id="GEN004",
name="Database Connection String",
description="Database connection string with credentials",
regex=r'(?:mongodb|postgres|mysql|redis|amqp)://[^:]+:[^@]+@[^/]+',
severity=Severity.CRITICAL,
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php", ".env", ".yml", ".yaml", ".json"],
recommendation="Use environment variables for database credentials"
),
# Low Severity Patterns
SecretPattern(
pattern_id="LOW001",
name="TODO with Secret",
description="TODO comment mentioning secrets or credentials",
regex=r'(?:#|//|/\*)\s*(?:TODO|FIXME|XXX).*(?:secret|password|credential|key)',
severity=Severity.LOW,
file_extensions=[".py", ".js", ".ts", ".java", ".go", ".rb", ".php"],
recommendation="Address security TODOs before deployment"
),
]
def scan_file(file_path: Path, patterns: List[SecretPattern]) -> List[SecretFinding]:
"""Scan a single file for secrets."""
findings = []
extension = file_path.suffix.lower()
try:
content = file_path.read_text(encoding='utf-8', errors='ignore')
lines = content.split('\n')
except Exception:
return findings
for pattern in patterns:
if extension not in pattern.file_extensions:
continue
try:
regex = re.compile(pattern.regex, re.IGNORECASE)
for i, line in enumerate(lines, 1):
# Skip comments that explain patterns (like in this file)
if 'regex' in line.lower() or 'pattern' in line.lower():
continue
match = regex.search(line)
if match:
# Mask the actual secret for safety
matched = match.group(0)
if len(matched) > 20:
masked = matched[:10] + "..." + matched[-5:]
else:
masked = matched[:5] + "..."
findings.append(SecretFinding(
pattern_id=pattern.pattern_id,
name=pattern.name,
severity=pattern.severity,
file_path=str(file_path),
line_number=i,
matched_text=masked,
recommendation=pattern.recommendation
))
except re.error:
continue
return findings
def scan_directory(dir_path: Path, patterns: List[SecretPattern],
exclude_dirs: List[str] = None) -> List[SecretFinding]:
"""Scan all files in a directory for secrets."""
if exclude_dirs is None:
exclude_dirs = [
"node_modules", ".git", "__pycache__", "venv", ".venv",
"dist", "build", ".next", "vendor", ".idea", ".vscode"
]
findings = []
extensions = set()
for pattern in patterns:
extensions.update(pattern.file_extensions)
for file_path in dir_path.rglob("*"):
if file_path.is_file():
# Check exclusions
if any(excluded in file_path.parts for excluded in exclude_dirs):
continue
# Skip binary files and large files
if file_path.stat().st_size > 1_000_000: # 1MB limit
continue
if file_path.suffix.lower() in extensions or file_path.name in ['.env', '.env.local', '.env.production']:
findings.extend(scan_file(file_path, patterns))
return sorted(findings, key=lambda f: (
0 if f.severity == Severity.CRITICAL else
1 if f.severity == Severity.HIGH else
2 if f.severity == Severity.MEDIUM else 3
))
def format_text_report(findings: List[SecretFinding], path: str) -> str:
"""Format findings as text report."""
lines = []
lines.append("=" * 70)
lines.append("SECRET SCAN REPORT")
lines.append("=" * 70)
lines.append(f"Target: {path}")
lines.append("")
# Summary
by_severity = {}
for finding in findings:
sev = finding.severity.value
by_severity[sev] = by_severity.get(sev, 0) + 1
lines.append("SUMMARY:")
lines.append(f" Total Secrets Found: {len(findings)}")
for sev in ["critical", "high", "medium", "low"]:
count = by_severity.get(sev, 0)
if count > 0:
lines.append(f" {sev.upper()}: {count}")
lines.append("")
if not findings:
lines.append("No secrets found!")
lines.append("=" * 70)
return "\n".join(lines)
# Group by severity
current_severity = None
for finding in findings:
if finding.severity != current_severity:
current_severity = finding.severity
lines.append("-" * 70)
lines.append(f"[{current_severity.value.upper()}]")
lines.append("-" * 70)
lines.append("")
lines.append(f" [{finding.pattern_id}] {finding.name}")
lines.append(f" File: {finding.file_path}:{finding.line_number}")
lines.append(f" Match: {finding.matched_text}")
lines.append(f" Fix: {finding.recommendation}")
lines.append("")
lines.append("=" * 70)
lines.append("IMPORTANT: Review all findings and rotate exposed credentials!")
lines.append("=" * 70)
return "\n".join(lines)
def format_json_report(findings: List[SecretFinding], path: str) -> Dict:
"""Format findings as JSON."""
return {
"target": path,
"scan_date": __import__('datetime').datetime.now().isoformat(),
"summary": {
"total": len(findings),
"by_severity": {
sev.value: sum(1 for f in findings if f.severity == sev)
for sev in Severity
}
},
"findings": [
{
"pattern_id": f.pattern_id,
"name": f.name,
"severity": f.severity.value,
"file_path": f.file_path,
"line_number": f.line_number,
"matched_text": f.matched_text,
"recommendation": f.recommendation
}
for f in findings
]
}
def list_patterns():
"""List all secret patterns."""
print("\n" + "=" * 60)
print("SECRET DETECTION PATTERNS")
print("=" * 60)
for pattern in sorted(SECRET_PATTERNS, key=lambda p: p.pattern_id):
print(f"\n[{pattern.pattern_id}] {pattern.name}")
print(f" Severity: {pattern.severity.value.upper()}")
print(f" Description: {pattern.description}")
def main():
parser = argparse.ArgumentParser(
description="Secret Scanner - Detect hardcoded secrets in code",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Scan a project directory
python secret_scanner.py /path/to/project
# Scan a single file
python secret_scanner.py /path/to/config.py
# Output as JSON
python secret_scanner.py /path/to/project --format json
# List all detection patterns
python secret_scanner.py --list-patterns
# Save report to file
python secret_scanner.py /path/to/project --output report.txt
"""
)
parser.add_argument(
"path",
nargs="?",
help="Path to scan (file or directory)"
)
parser.add_argument(
"--format", "-f",
choices=["text", "json"],
default="text",
help="Output format (default: text)"
)
parser.add_argument(
"--output", "-o",
help="Output file path"
)
parser.add_argument(
"--list-patterns", "-l",
action="store_true",
help="List all detection patterns"
)
parser.add_argument(
"--severity", "-s",
choices=["critical", "high", "medium", "low"],
help="Minimum severity to report"
)
args = parser.parse_args()
if args.list_patterns:
list_patterns()
return
if not args.path:
parser.error("path is required (or use --list-patterns)")
path = Path(args.path)
if not path.exists():
print(f"Error: Path does not exist: {path}")
sys.exit(1)
# Filter patterns by severity
patterns = SECRET_PATTERNS
if args.severity:
severity_order = ["critical", "high", "medium", "low"]
min_index = severity_order.index(args.severity)
allowed = set(Severity(s) for s in severity_order[:min_index + 1])
patterns = [p for p in patterns if p.severity in allowed]
# Scan
if path.is_file():
findings = scan_file(path, patterns)
else:
findings = scan_directory(path, patterns)
# Format output
if args.format == "json":
output = json.dumps(format_json_report(findings, str(path)), indent=2)
else:
output = format_text_report(findings, str(path))
# Write output
if args.output:
with open(args.output, 'w') as f:
f.write(output)
print(f"Report written to {args.output}")
else:
print(output)
# Exit code based on findings
if any(f.severity in (Severity.CRITICAL, Severity.HIGH) for f in findings):
sys.exit(1)
if __name__ == "__main__":
main()