Files
antigravity-skills-reference/skills/007/scripts/scanners/secrets_scanner.py
ProgramadorBrasil 61ec71c5c7 feat: add 52 specialized AI agent skills (#217)
New skills covering 10 categories:

**Security & Audit**: 007 (STRIDE/PASTA/OWASP), cred-omega (secrets management)
**AI Personas**: Karpathy, Hinton, Sutskever, LeCun (4 sub-skills), Altman, Musk, Gates, Jobs, Buffett
**Multi-agent Orchestration**: agent-orchestrator, task-intelligence, multi-advisor
**Code Analysis**: matematico-tao (Terence Tao-inspired mathematical code analysis)
**Social & Messaging**: Instagram Graph API, Telegram Bot, WhatsApp Cloud API, social-orchestrator
**Image Generation**: AI Studio (Gemini), Stability AI, ComfyUI Gateway, image-studio router
**Brazilian Domain**: 6 auction specialist modules, 2 legal advisors, auctioneers data scraper
**Product & Growth**: design, invention, monetization, analytics, growth engine
**DevOps & LLM Ops**: Docker/CI-CD/AWS, RAG/embeddings/fine-tuning
**Skill Governance**: installer, sentinel auditor, context management

Each skill includes:
- Standardized YAML frontmatter (name, description, risk, source, tags, tools)
- Structured sections (Overview, When to Use, How it Works, Best Practices)
- Python scripts and reference documentation where applicable
- Cross-platform compatibility (Claude Code, Antigravity, Cursor, Gemini CLI, Codex CLI)

Co-authored-by: ProgramadorBrasil <214873561+ProgramadorBrasil@users.noreply.github.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 10:04:07 +01:00

1009 lines
31 KiB
Python

"""007 Secrets Scanner -- Deep scanner for secrets and credentials.
Goes deeper than quick_scan by performing entropy analysis, base64 detection,
context-aware false positive reduction, and targeted scanning of sensitive
file types (.env, config files, shell scripts, Docker, CI/CD).
Usage:
python secrets_scanner.py --target /path/to/project
python secrets_scanner.py --target /path/to/project --output json --verbose
python secrets_scanner.py --target /path/to/project --include-low
"""
import argparse
import base64
import json
import math
import os
import re
import sys
import time
from pathlib import Path
# ---------------------------------------------------------------------------
# Import from the 007 config hub (parent directory)
# ---------------------------------------------------------------------------
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
import config # noqa: E402
# ---------------------------------------------------------------------------
# Logger
# ---------------------------------------------------------------------------
logger = config.setup_logging("007-secrets-scanner")
# ---------------------------------------------------------------------------
# Additional patterns beyond config.SECRET_PATTERNS
# ---------------------------------------------------------------------------
# Each entry: (pattern_name, compiled_regex, severity)
_EXTRA_PATTERN_DEFS = [
# URLs with embedded credentials (http://user:pass@host)
(
"url_embedded_credentials",
r"""https?://[^:\s]+:[^@\s]+@[^\s/]+""",
"HIGH",
),
# Stripe keys
(
"stripe_key",
r"""(?:sk|pk)_(?:live|test)_[A-Za-z0-9]{20,}""",
"CRITICAL",
),
# Google API key
(
"google_api_key",
r"""AIza[0-9A-Za-z\-_]{35}""",
"HIGH",
),
# Twilio Account SID / Auth Token
(
"twilio_key",
r"""(?:AC[a-f0-9]{32}|SK[a-f0-9]{32})""",
"HIGH",
),
# Heroku API key
(
"heroku_api_key",
r"""(?i)heroku[_-]?api[_-]?key\s*[:=]\s*['\"]\S{8,}['\"]""",
"HIGH",
),
# SendGrid API key
(
"sendgrid_key",
r"""SG\.[A-Za-z0-9_-]{22}\.[A-Za-z0-9_-]{43}""",
"CRITICAL",
),
# npm token
(
"npm_token",
r"""(?:npm_)[A-Za-z0-9]{36}""",
"CRITICAL",
),
# Generic connection string (ODBC / ADO style)
(
"connection_string",
r"""(?i)(?:connectionstring|conn_str)\s*[:=]\s*['\"][^'\"]{10,}['\"]""",
"HIGH",
),
# JWT tokens (three base64 segments separated by dots)
(
"jwt_token",
r"""eyJ[A-Za-z0-9_-]{10,}\.eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}""",
"MEDIUM",
),
# Azure storage key
(
"azure_storage_key",
r"""(?i)(?:accountkey|storage[_-]?key)\s*[:=]\s*['\"]\S{44,}['\"]""",
"CRITICAL",
),
]
EXTRA_PATTERNS = [
(name, re.compile(pattern), severity)
for name, pattern, severity in _EXTRA_PATTERN_DEFS
]
# Combined pattern set: config patterns first, then extras
ALL_SECRET_PATTERNS = list(config.SECRET_PATTERNS) + EXTRA_PATTERNS
# ---------------------------------------------------------------------------
# Targeted file categories for deep scanning
# ---------------------------------------------------------------------------
# .env variants -- always scanned regardless of SCANNABLE_EXTENSIONS
ENV_FILE_PATTERNS = {
".env", ".env.local", ".env.production", ".env.staging",
".env.development", ".env.test", ".env.example", ".env.sample",
".env.defaults", ".env.template",
}
CONFIG_EXTENSIONS = {".json", ".yaml", ".yml", ".toml", ".ini", ".cfg", ".conf"}
SHELL_EXTENSIONS = {".sh", ".bash", ".zsh", ".ps1", ".bat", ".cmd"}
DOCKER_PREFIXES = ("Dockerfile", "dockerfile", "docker-compose")
CICD_PATTERNS = {
".github/workflows",
".gitlab-ci.yml",
"Jenkinsfile",
".circleci/config.yml",
".travis.yml",
"azure-pipelines.yml",
"bitbucket-pipelines.yml",
}
PRIVATE_KEY_EXTENSIONS = {".pem", ".key", ".p12", ".pfx", ".jks", ".keystore"}
# Files that are test fixtures -- lower severity or skip
_TEST_FILE_PATTERNS = re.compile(
r"""(?i)(?:^test_|_test\.py$|\.test\.[jt]sx?$|\.spec\.[jt]sx?$|__tests__|fixtures?[/\\])"""
)
# Placeholder / example value patterns -- these are NOT real secrets
_PLACEHOLDER_PATTERN = re.compile(
r"""(?i)(?:example|placeholder|changeme|xxx+|your[_-]?key[_-]?here|"""
r"""insert[_-]?here|replace[_-]?me|todo|fixme|dummy|fake|sample|test123|"""
r"""sk_test_|pk_test_)"""
)
# ---------------------------------------------------------------------------
# Entropy calculation
# ---------------------------------------------------------------------------
def shannon_entropy(s: str) -> float:
"""Calculate Shannon entropy of a string.
Higher entropy indicates more randomness, which may suggest a secret/token.
Typical English text: ~3.5-4.0 bits. Random tokens: ~4.5-6.0 bits.
Args:
s: Input string.
Returns:
Shannon entropy in bits. Returns 0.0 for empty strings.
"""
if not s:
return 0.0
length = len(s)
freq: dict[str, int] = {}
for ch in s:
freq[ch] = freq.get(ch, 0) + 1
entropy = 0.0
for count in freq.values():
probability = count / length
if probability > 0:
entropy -= probability * math.log2(probability)
return entropy
# ---------------------------------------------------------------------------
# Base64 detection
# ---------------------------------------------------------------------------
_BASE64_RE = re.compile(
r"""[A-Za-z0-9+/]{20,}={0,2}"""
)
_BASE64_URL_RE = re.compile(
r"""[A-Za-z0-9_-]{20,}"""
)
def _check_base64_secret(token: str) -> bool:
"""Check if a base64-looking string decodes to something high-entropy.
Args:
token: A candidate base64 string.
Returns:
True if the decoded content has high entropy (likely a secret).
"""
# Pad if needed for standard base64
padded = token + "=" * (-len(token) % 4)
try:
decoded = base64.b64decode(padded, validate=True)
decoded_str = decoded.decode("ascii", errors="replace")
# Only flag if decoded content is also high entropy
return shannon_entropy(decoded_str) > 4.0 and len(decoded) >= 12
except Exception:
return False
# ---------------------------------------------------------------------------
# Hardcoded IP detection
# ---------------------------------------------------------------------------
_IP_RE = re.compile(
r"""\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b"""
)
_SAFE_IP_PREFIXES = (
"127.", # localhost
"0.", # unspecified
"10.", # private class A
"192.168.", # private class C
"169.254.", # link-local
"255.", # broadcast
)
def _is_private_or_localhost(ip: str) -> bool:
"""Return True if IP is localhost, private range, or otherwise safe."""
if ip.startswith(_SAFE_IP_PREFIXES):
return True
# 172.16.0.0 - 172.31.255.255 (private class B)
parts = ip.split(".")
try:
if parts[0] == "172" and 16 <= int(parts[1]) <= 31:
return True
except (IndexError, ValueError):
pass
return False
# ---------------------------------------------------------------------------
# Context-aware false positive reduction
# ---------------------------------------------------------------------------
_COMMENT_LINE_RE = re.compile(
r"""^\s*(?:#|//|/\*|\*|;|rem\b|@rem\b)""", re.IGNORECASE
)
_MARKDOWN_CODE_FENCE = re.compile(r"""^\s*```""")
def _is_comment_line(line: str) -> bool:
"""Return True if the line appears to be a comment."""
return bool(_COMMENT_LINE_RE.match(line))
def _is_test_file(filepath: Path) -> bool:
"""Return True if the file is a test fixture / test file."""
return bool(_TEST_FILE_PATTERNS.search(filepath.name)) or bool(
_TEST_FILE_PATTERNS.search(str(filepath))
)
def _is_placeholder_value(line: str) -> bool:
"""Return True if the matched line contains placeholder/example values."""
return bool(_PLACEHOLDER_PATTERN.search(line))
def _is_env_example(filepath: Path) -> bool:
"""Return True if the file is a .env.example or similar template."""
name = filepath.name.lower()
return name in (".env.example", ".env.sample", ".env.template", ".env.defaults")
def _classify_file(filepath: Path) -> str:
"""Classify a file into a category for reporting.
Returns one of: 'env', 'config', 'shell', 'docker', 'cicd',
'private_key', 'source', 'other'.
"""
name = filepath.name.lower()
suffix = filepath.suffix.lower()
# .env variants
if name.startswith(".env") or name in ENV_FILE_PATTERNS:
return "env"
# Private key files
if suffix in PRIVATE_KEY_EXTENSIONS:
return "private_key"
# Config files
if suffix in CONFIG_EXTENSIONS:
return "config"
# Shell scripts
if suffix in SHELL_EXTENSIONS:
return "shell"
# Docker files
if any(name.startswith(prefix) for prefix in DOCKER_PREFIXES):
return "docker"
# CI/CD files
filepath_str = str(filepath).replace("\\", "/")
for cicd_pattern in CICD_PATTERNS:
if cicd_pattern in filepath_str:
return "cicd"
# Source code
if suffix in config.SCANNABLE_EXTENSIONS:
return "source"
return "other"
# ---------------------------------------------------------------------------
# File collection (deeper than quick_scan)
# ---------------------------------------------------------------------------
def _should_scan_file(filepath: Path) -> bool:
"""Determine if a file should be included in the deep scan.
More inclusive than quick_scan: also picks up .env variants, Docker files,
CI/CD files, and private key files even if their extension is not in
SCANNABLE_EXTENSIONS.
"""
name = filepath.name.lower()
suffix = filepath.suffix.lower()
# Always scan .env variants
if name.startswith(".env"):
return True
# Always scan private key files (we detect their presence, not content)
if suffix in PRIVATE_KEY_EXTENSIONS:
return True
# Always scan Docker files
if any(name.startswith(prefix) for prefix in DOCKER_PREFIXES):
return True
# Always scan CI/CD files
filepath_str = str(filepath).replace("\\", "/")
for cicd_pattern in CICD_PATTERNS:
if cicd_pattern in filepath_str or name == Path(cicd_pattern).name:
return True
# Standard scannable extensions
for ext in config.SCANNABLE_EXTENSIONS:
if name.endswith(ext):
return True
if suffix in config.SCANNABLE_EXTENSIONS:
return True
return False
def collect_files(target: Path) -> list[Path]:
"""Walk *target* recursively and return files for deep scanning.
Respects SKIP_DIRECTORIES but is more inclusive on file types.
"""
files: list[Path] = []
max_files = config.LIMITS["max_files_per_scan"]
for root, dirs, filenames in os.walk(target):
dirs[:] = [d for d in dirs if d not in config.SKIP_DIRECTORIES]
for fname in filenames:
if len(files) >= max_files:
logger.warning(
"Reached max_files_per_scan limit (%d). Stopping.", max_files
)
return files
fpath = Path(root) / fname
if _should_scan_file(fpath):
files.append(fpath)
return files
# ---------------------------------------------------------------------------
# Core scanning logic
# ---------------------------------------------------------------------------
def _redact(text: str, keep: int = 6) -> str:
"""Return a redacted version of *text*, keeping only the first few chars."""
text = text.strip()
if len(text) <= keep:
return text
return text[:keep] + "****"
def _snippet(line: str, match_start: int, context: int = 50) -> str:
"""Extract a short redacted snippet around the match position."""
start = max(0, match_start - context // 2)
end = min(len(line), match_start + context)
raw = line[start:end].strip()
return _redact(raw)
def scan_file(filepath: Path, verbose: bool = False) -> list[dict]:
"""Perform deep secret scanning on a single file.
Applies pattern matching, entropy analysis, base64 detection,
URL credential detection, IP detection, and context-aware filtering.
Returns a list of finding dicts.
"""
findings: list[dict] = []
max_findings = config.LIMITS["max_findings_per_file"]
file_str = str(filepath)
file_category = _classify_file(filepath)
is_test = _is_test_file(filepath)
is_env_ex = _is_env_example(filepath)
# --- Private key file detection (by extension, not content) ---
if filepath.suffix.lower() in PRIVATE_KEY_EXTENSIONS:
sev = "MEDIUM" if is_test else "CRITICAL"
findings.append({
"type": "secret",
"pattern": "private_key_file",
"severity": sev,
"file": file_str,
"line": 0,
"snippet": f"Private key file detected: {filepath.name}",
"category": file_category,
})
# Still scan content if readable
# (fall through)
# --- File size check ---
try:
size = filepath.stat().st_size
except OSError:
return findings
if size > config.LIMITS["max_file_size_bytes"]:
if verbose:
logger.debug("Skipping oversized file: %s (%d bytes)", filepath, size)
return findings
# --- Read content ---
try:
text = filepath.read_text(encoding="utf-8", errors="replace")
except OSError as exc:
if verbose:
logger.debug("Cannot read %s: %s", filepath, exc)
return findings
lines = text.splitlines()
in_markdown_code_block = False
for line_num, line in enumerate(lines, start=1):
if len(findings) >= max_findings:
break
stripped = line.strip()
if not stripped:
continue
# Track markdown code fences for context-aware filtering
if _MARKDOWN_CODE_FENCE.match(stripped):
in_markdown_code_block = not in_markdown_code_block
continue
# Context-aware filters
is_comment = _is_comment_line(stripped)
is_placeholder = _is_placeholder_value(stripped)
# --- Pattern matching (config + extra patterns) ---
for pattern_name, regex, severity in ALL_SECRET_PATTERNS:
m = regex.search(line)
if not m:
continue
# Apply false positive reduction
skip = False
adjusted_severity = severity
if is_comment and not file_category == "env":
# Comments in source code are usually not real secrets
# But comments in .env files might still be sensitive
skip = True
if in_markdown_code_block:
skip = True
if is_placeholder:
skip = True
if is_test:
# Lower severity for test files
sev_weight = config.SEVERITY.get(severity, 1)
if sev_weight >= config.SEVERITY["HIGH"]:
adjusted_severity = "MEDIUM"
elif sev_weight >= config.SEVERITY["MEDIUM"]:
adjusted_severity = "LOW"
if is_env_ex:
# .env.example should have placeholders, not real values
# If pattern matches, it might be a real secret leaked into example
if not is_placeholder:
adjusted_severity = "MEDIUM" # flag but lower severity
else:
skip = True # placeholder in example = expected
if skip:
continue
findings.append({
"type": "secret",
"pattern": pattern_name,
"severity": adjusted_severity,
"file": file_str,
"line": line_num,
"snippet": _snippet(line, m.start()),
"category": file_category,
})
# --- High entropy string detection ---
# Look for quoted strings or assignment values 16+ chars
for token_match in re.finditer(r"""['"]([^'"]{16,})['\"]""", line):
if len(findings) >= max_findings:
break
token = token_match.group(1)
ent = shannon_entropy(token)
if ent > 4.5:
# Skip if already caught by pattern matching
# (crude check: see if any finding on this line already)
already_found = any(
f["file"] == file_str and f["line"] == line_num
for f in findings
)
if already_found:
continue
if is_comment or in_markdown_code_block or is_placeholder:
continue
sev = "MEDIUM"
if ent > 5.0:
sev = "HIGH"
if is_test:
sev = "LOW"
findings.append({
"type": "secret",
"pattern": "high_entropy_string",
"severity": sev,
"file": file_str,
"line": line_num,
"snippet": _redact(token),
"category": file_category,
"entropy": round(ent, 2),
})
# --- Base64-encoded secret detection ---
for b64_match in _BASE64_RE.finditer(line):
if len(findings) >= max_findings:
break
token = b64_match.group(0)
if len(token) < 20:
continue
# Skip if already caught
already_found = any(
f["file"] == file_str and f["line"] == line_num
for f in findings
)
if already_found:
continue
if is_comment or in_markdown_code_block or is_placeholder:
continue
if _check_base64_secret(token):
sev = "MEDIUM" if is_test else "HIGH"
findings.append({
"type": "secret",
"pattern": "base64_encoded_secret",
"severity": sev,
"file": file_str,
"line": line_num,
"snippet": _redact(token),
"category": file_category,
})
# --- URL with embedded credentials ---
# Already handled by pattern, but double-check for non-standard schemes
# (covered by url_embedded_credentials pattern)
# --- Hardcoded IP detection ---
for ip_match in _IP_RE.finditer(line):
if len(findings) >= max_findings:
break
ip = ip_match.group(1)
if _is_private_or_localhost(ip):
continue
# Validate it looks like a real IP (each octet 0-255)
parts = ip.split(".")
try:
if not all(0 <= int(p) <= 255 for p in parts):
continue
except ValueError:
continue
if is_comment or in_markdown_code_block:
continue
sev = "LOW"
if is_test:
continue # Skip IPs in test files entirely
findings.append({
"type": "hardcoded_ip",
"pattern": "hardcoded_public_ip",
"severity": sev,
"file": file_str,
"line": line_num,
"snippet": ip,
"category": file_category,
})
return findings
# ---------------------------------------------------------------------------
# Aggregation and scoring
# ---------------------------------------------------------------------------
SCORE_DEDUCTIONS = {
"CRITICAL": 10,
"HIGH": 5,
"MEDIUM": 2,
"LOW": 1,
"INFO": 0,
}
def aggregate_by_severity(findings: list[dict]) -> dict[str, int]:
"""Count findings per severity level."""
counts: dict[str, int] = {sev: 0 for sev in config.SEVERITY}
for f in findings:
sev = f.get("severity", "INFO")
if sev in counts:
counts[sev] += 1
return counts
def aggregate_by_pattern(findings: list[dict]) -> dict[str, int]:
"""Count findings per pattern type."""
counts: dict[str, int] = {}
for f in findings:
pattern = f.get("pattern", "unknown")
counts[pattern] = counts.get(pattern, 0) + 1
return counts
def aggregate_by_category(findings: list[dict]) -> dict[str, int]:
"""Count findings per file category."""
counts: dict[str, int] = {}
for f in findings:
cat = f.get("category", "other")
counts[cat] = counts.get(cat, 0) + 1
return counts
def compute_score(findings: list[dict]) -> int:
"""Compute a secrets score starting at 100, deducting by severity."""
score = 100
for f in findings:
deduction = SCORE_DEDUCTIONS.get(f["severity"], 0)
score -= deduction
return max(0, score)
# ---------------------------------------------------------------------------
# Report formatters
# ---------------------------------------------------------------------------
def format_text_report(
target: str,
total_files: int,
findings: list[dict],
severity_counts: dict[str, int],
pattern_counts: dict[str, int],
category_counts: dict[str, int],
score: int,
verdict: dict,
elapsed: float,
include_low: bool = False,
) -> str:
"""Build a human-readable text report grouped by severity, then file."""
lines: list[str] = []
lines.append("=" * 72)
lines.append(" 007 SECRETS SCANNER -- DEEP SCAN REPORT")
lines.append("=" * 72)
lines.append("")
# Metadata
lines.append(f" Target: {target}")
lines.append(f" Timestamp: {config.get_timestamp()}")
lines.append(f" Duration: {elapsed:.2f}s")
lines.append(f" Files scanned: {total_files}")
lines.append(f" Total findings: {len(findings)}")
lines.append("")
# Severity breakdown
lines.append("-" * 72)
lines.append(" FINDINGS BY SEVERITY")
lines.append("-" * 72)
for sev in ("CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"):
count = severity_counts.get(sev, 0)
bar = "#" * min(count, 40)
lines.append(f" {sev:<10} {count:>5} {bar}")
lines.append("")
# Pattern type breakdown
if pattern_counts:
lines.append("-" * 72)
lines.append(" FINDINGS BY TYPE")
lines.append("-" * 72)
sorted_patterns = sorted(pattern_counts.items(), key=lambda x: x[1], reverse=True)
for pattern_name, count in sorted_patterns[:20]:
lines.append(f" {pattern_name:<35} {count:>5}")
lines.append("")
# Category breakdown
if category_counts:
lines.append("-" * 72)
lines.append(" FINDINGS BY FILE CATEGORY")
lines.append("-" * 72)
sorted_cats = sorted(category_counts.items(), key=lambda x: x[1], reverse=True)
for cat_name, count in sorted_cats:
lines.append(f" {cat_name:<20} {count:>5}")
lines.append("")
# Findings grouped by severity, then by file
min_severity = config.SEVERITY["LOW"] if include_low else config.SEVERITY["MEDIUM"]
displayed = [
f for f in findings
if config.SEVERITY.get(f.get("severity", "INFO"), 0) >= min_severity
]
if displayed:
# Group by severity
by_severity: dict[str, list[dict]] = {}
for f in displayed:
sev = f.get("severity", "INFO")
by_severity.setdefault(sev, []).append(f)
for sev in ("CRITICAL", "HIGH", "MEDIUM", "LOW"):
sev_findings = by_severity.get(sev, [])
if not sev_findings:
continue
lines.append("-" * 72)
lines.append(f" [{sev}] FINDINGS ({len(sev_findings)})")
lines.append("-" * 72)
# Sub-group by file
by_file: dict[str, list[dict]] = {}
for f in sev_findings:
by_file.setdefault(f["file"], []).append(f)
for filepath, file_findings in sorted(by_file.items()):
lines.append(f" {filepath}")
for f in sorted(file_findings, key=lambda x: x.get("line", 0)):
loc = f"L{f['line']}" if f.get("line") else ""
snippet_part = f" [{f['snippet']}]" if f.get("snippet") else ""
entropy_part = f" (entropy={f['entropy']})" if f.get("entropy") else ""
lines.append(
f" {loc:>6} {f['pattern']}{snippet_part}{entropy_part}"
)
lines.append("")
else:
lines.append(" No findings above the display threshold.")
lines.append("")
# Score and verdict
lines.append("=" * 72)
lines.append(f" SECRETS SCORE: {score} / 100")
lines.append(f" VERDICT: {verdict['emoji']} {verdict['label']}")
lines.append(f" {verdict['description']}")
lines.append("=" * 72)
lines.append("")
return "\n".join(lines)
def build_json_report(
target: str,
total_files: int,
findings: list[dict],
severity_counts: dict[str, int],
pattern_counts: dict[str, int],
category_counts: dict[str, int],
score: int,
verdict: dict,
elapsed: float,
) -> dict:
"""Build a structured JSON-serializable report dict."""
return {
"scan": "secrets_scanner",
"target": target,
"timestamp": config.get_timestamp(),
"duration_seconds": round(elapsed, 3),
"total_files_scanned": total_files,
"total_findings": len(findings),
"severity_counts": severity_counts,
"pattern_counts": pattern_counts,
"category_counts": category_counts,
"score": score,
"verdict": {
"label": verdict["label"],
"description": verdict["description"],
"emoji": verdict["emoji"],
},
"findings": findings,
}
# ---------------------------------------------------------------------------
# Main entry point
# ---------------------------------------------------------------------------
def run_scan(
target_path: str,
output_format: str = "text",
verbose: bool = False,
include_low: bool = False,
) -> dict:
"""Execute the deep secrets scan and return the report dict.
Also prints the report to stdout in the requested format.
Args:
target_path: Path to the directory to scan.
output_format: 'text' or 'json'.
verbose: Enable debug-level logging.
include_low: Include LOW severity findings in text output.
Returns:
JSON-compatible report dict.
"""
if verbose:
logger.setLevel("DEBUG")
config.ensure_directories()
target = Path(target_path).resolve()
if not target.exists():
logger.error("Target path does not exist: %s", target)
sys.exit(1)
if not target.is_dir():
logger.error("Target is not a directory: %s", target)
sys.exit(1)
logger.info("Starting deep secrets scan of %s", target)
start_time = time.time()
# Collect files
files = collect_files(target)
total_files = len(files)
logger.info("Collected %d files for deep scanning", total_files)
# Scan each file
all_findings: list[dict] = []
max_report = config.LIMITS["max_report_findings"]
for fpath in files:
if len(all_findings) >= max_report:
logger.warning(
"Reached max_report_findings limit (%d). Truncating.", max_report
)
break
file_findings = scan_file(fpath, verbose=verbose)
remaining = max_report - len(all_findings)
all_findings.extend(file_findings[:remaining])
elapsed = time.time() - start_time
logger.info(
"Deep scan complete: %d files, %d findings in %.2fs",
total_files, len(all_findings), elapsed,
)
# Aggregation
severity_counts = aggregate_by_severity(all_findings)
pattern_counts = aggregate_by_pattern(all_findings)
category_counts = aggregate_by_category(all_findings)
score = compute_score(all_findings)
verdict = config.get_verdict(score)
# Audit log
config.log_audit_event(
action="secrets_scan",
target=str(target),
result=f"score={score}, findings={len(all_findings)}, verdict={verdict['label']}",
details={
"total_files": total_files,
"severity_counts": severity_counts,
"pattern_counts": pattern_counts,
"category_counts": category_counts,
"duration_seconds": round(elapsed, 3),
},
)
# Build report
report = build_json_report(
target=str(target),
total_files=total_files,
findings=all_findings,
severity_counts=severity_counts,
pattern_counts=pattern_counts,
category_counts=category_counts,
score=score,
verdict=verdict,
elapsed=elapsed,
)
# Output
if output_format == "json":
print(json.dumps(report, indent=2, ensure_ascii=False))
else:
print(format_text_report(
target=str(target),
total_files=total_files,
findings=all_findings,
severity_counts=severity_counts,
pattern_counts=pattern_counts,
category_counts=category_counts,
score=score,
verdict=verdict,
elapsed=elapsed,
include_low=include_low,
))
return report
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="007 Secrets Scanner -- Deep scanner for secrets and credentials.",
epilog=(
"Examples:\n"
" python secrets_scanner.py --target ./my-project\n"
" python secrets_scanner.py --target ./my-project --output json\n"
" python secrets_scanner.py --target ./my-project --verbose --include-low"
),
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"--target",
required=True,
help="Path to the directory to scan (required).",
)
parser.add_argument(
"--output",
choices=["text", "json"],
default="text",
help="Output format: 'text' (default) or 'json'.",
)
parser.add_argument(
"--verbose",
action="store_true",
default=False,
help="Enable verbose/debug logging.",
)
parser.add_argument(
"--include-low",
action="store_true",
default=False,
help="Include LOW severity findings in text output (hidden by default).",
)
args = parser.parse_args()
run_scan(
target_path=args.target,
output_format=args.output,
verbose=args.verbose,
include_low=args.include_low,
)