Files
daymade d1041ac203 feat(skill-creator): Add mandatory security review step before packaging
- Add security_scan.py script with gitleaks integration
- Detect hardcoded secrets, personal info, and unsafe code patterns
- Add content-based hash validation in package_skill.py
- BLOCK packaging if security scan not run or content changed
- Add reference file naming guidelines in SKILL.md
- Create .gitignore for security marker files

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-28 13:10:35 +08:00

512 lines
19 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Security Scanner for Claude Code Skills
Validates skills before packaging to prevent secret leakage and security issues.
SINGLE RESPONSIBILITY: Validate skill security before distribution
ARCHITECTURE:
- Detection Layer: Gitleaks (secrets) + Pattern matching (code smells)
- Reporting Layer: Simple mode (gate) / Verbose mode (educational)
- Action Layer: Creates .security-scan-passed marker on clean scan
USAGE:
python security_scan.py <skill-dir> # Quick scan (required for packaging)
python security_scan.py <skill-dir> --verbose # Detailed educational review
"""
from __future__ import annotations
import json
import re
import subprocess
import sys
import shutil
import tempfile
import argparse
import hashlib
from pathlib import Path
from typing import List, Dict, Optional
from datetime import datetime
from dataclasses import dataclass
# ANSI color codes
RED = '\033[91m'
YELLOW = '\033[93m'
GREEN = '\033[92m'
BLUE = '\033[94m'
RESET = '\033[0m'
@dataclass
class SecurityIssue:
"""Represents a security issue found during scan"""
severity: str # CRITICAL, HIGH, MEDIUM
category: str # secrets, paths, emails, code_patterns
file_path: str
line_number: int
pattern_name: str
message: str
matched_text: str
recommendation: str
# ============================================================================
# DETECTION LAYER - What to scan for
# ============================================================================
def get_pattern_rules() -> List[Dict]:
"""
Define regex-based security patterns
Used when --verbose flag is set for educational review
NOTE: Patterns below are for DETECTION only, not usage
"""
return [
{
"id": "absolute_user_paths",
"category": "paths",
"name": "Absolute User Paths",
"patterns": [
r'/[Hh]ome/[a-z_][a-z0-9_-]+/',
r'/[Uu]sers/[A-Za-z][A-Za-z0-9_-]+/',
r'C:\\\\Users\\\\[A-Za-z][A-Za-z0-9_-]+\\\\',
],
"severity": "HIGH",
"message": "Absolute path with username found",
"recommendation": "Use relative paths or Path(__file__).parent",
},
{
"id": "email_addresses",
"category": "emails",
"name": "Email Addresses",
"patterns": [r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'],
"severity": "MEDIUM",
"message": "Email address found",
"recommendation": "Use placeholders like user@example.com",
"exceptions": ["example.com", "test.com", "localhost", "noreply@anthropic.com"],
},
{
"id": "insecure_http",
"category": "urls",
"name": "Insecure HTTP URLs",
"patterns": [r'http://(?!localhost|127\.0\.0\.1|0\.0\.0\.0|example\.com)'],
"severity": "MEDIUM",
"message": "HTTP (insecure) URL detected",
"recommendation": "Use HTTPS for external resources",
},
{
"id": "dangerous_code",
"category": "code_patterns",
"name": "Dangerous Code Patterns",
"patterns": [
r'\bos\.system\s*\(',
r'subprocess\.[a-z_]+\([^)]*shell\s*=\s*True',
# Pattern below detects unsafe serialization (for detection only)
r'import\s+pickle',
r'pickle\.load',
],
"severity": "HIGH",
"message": "Potentially dangerous code pattern",
"recommendation": "Use safe alternatives (subprocess.run with list args, JSON instead of unsafe serialization)",
},
]
def check_gitleaks_installed() -> bool:
"""Check if gitleaks is available"""
return shutil.which('gitleaks') is not None
def print_gitleaks_installation() -> None:
"""Print gitleaks installation instructions"""
print(f"\n{YELLOW}⚠️ gitleaks not installed{RESET}")
print(f"\ngitleaks is the industry-standard tool for detecting secrets.")
print(f"It's used by GitHub, GitLab, and thousands of companies.\n")
print(f"{BLUE}Installation:{RESET}")
print(f" macOS: brew install gitleaks")
print(f" Linux: wget https://github.com/gitleaks/gitleaks/releases/download/v8.18.2/gitleaks_8.18.2_linux_x64.tar.gz")
print(f" tar -xzf gitleaks_8.18.2_linux_x64.tar.gz && sudo mv gitleaks /usr/local/bin/")
print(f" Windows: scoop install gitleaks")
print(f"\nAfter installation, run this script again.\n")
def run_gitleaks(skill_path: Path) -> Optional[List[Dict]]:
"""
Run gitleaks scan on skill directory
Returns: List of findings, empty list if clean, None on error
"""
try:
# Use temporary file for cross-platform compatibility (Windows doesn't have /dev/stdout)
with tempfile.NamedTemporaryFile(mode='w+', suffix='.json', delete=False) as tmp_file:
tmp_path = tmp_file.name
try:
result = subprocess.run(
['gitleaks', 'detect', '--source', str(skill_path),
'--report-format', 'json', '--report-path', tmp_path, '--no-git'],
capture_output=True,
text=True,
timeout=60
)
# gitleaks exits with 1 if secrets found, 0 if clean
if result.returncode == 0:
return []
# Parse findings from temp file
with open(tmp_path, 'r', encoding='utf-8') as f:
return json.load(f)
finally:
Path(tmp_path).unlink(missing_ok=True)
except subprocess.TimeoutExpired:
print(f"{RED}❌ Error: gitleaks scan timed out{RESET}", file=sys.stderr)
return None
except json.JSONDecodeError:
print(f"{RED}❌ Error: Could not parse gitleaks output{RESET}", file=sys.stderr)
return None
except Exception as e:
print(f"{RED}❌ Error running gitleaks: {e}{RESET}", file=sys.stderr)
return None
def scan_file_patterns(file_path: Path, patterns: List[Dict]) -> List[SecurityIssue]:
"""
Scan a single file using regex patterns
Used for verbose mode educational review
"""
issues = []
try:
content = file_path.read_text(encoding='utf-8')
lines = content.split('\n')
for line_num, line in enumerate(lines, 1):
for pattern_def in patterns:
for regex in pattern_def["patterns"]:
matches = re.finditer(regex, line, re.IGNORECASE)
for match in matches:
matched_text = match.group(0)
# Check exceptions
if "exceptions" in pattern_def:
if any(exc in matched_text for exc in pattern_def["exceptions"]):
continue
issues.append(SecurityIssue(
severity=pattern_def["severity"],
category=pattern_def["category"],
file_path=str(file_path.relative_to(file_path.parent.parent)),
line_number=line_num,
pattern_name=pattern_def["name"],
message=pattern_def["message"],
matched_text=matched_text[:80],
recommendation=pattern_def["recommendation"],
))
except (UnicodeDecodeError, IOError):
pass
return issues
def scan_skill_patterns(skill_path: Path) -> tuple[List[SecurityIssue], Dict[str, int]]:
"""
Scan all files in skill directory using regex patterns
Returns: (issues list, severity stats dict)
"""
patterns = get_pattern_rules()
all_issues = []
stats = {"CRITICAL": 0, "HIGH": 0, "MEDIUM": 0}
code_extensions = {'.py', '.js', '.ts', '.jsx', '.tsx', '.sh', '.bash',
'.md', '.yml', '.yaml', '.json', '.toml'}
for file_path in skill_path.rglob('*'):
if not file_path.is_file() or file_path.suffix not in code_extensions:
continue
if any(part.startswith('.') for part in file_path.parts):
continue
if '__pycache__' in file_path.parts or 'node_modules' in file_path.parts:
continue
issues = scan_file_patterns(file_path, patterns)
for issue in issues:
all_issues.append(issue)
stats[issue.severity] += 1
return all_issues, stats
def categorize_gitleaks_severity(rule_id: str) -> str:
"""Categorize gitleaks finding severity"""
critical_patterns = ['api', 'key', 'token', 'password', 'secret', 'credential']
if any(pattern in rule_id.lower() for pattern in critical_patterns):
return "CRITICAL"
return "HIGH"
# ============================================================================
# REPORTING LAYER - How to present findings
# ============================================================================
def print_simple_report(gitleaks_findings: List[Dict], skill_name: str) -> int:
"""
Simple report for packaging workflow (exit code matters)
Returns: Exit code (0=clean, 2=critical, 1=high)
"""
if not gitleaks_findings:
print(f"{GREEN}✅ Security scan passed: No secrets detected{RESET}")
return 0
critical_count = sum(1 for f in gitleaks_findings
if categorize_gitleaks_severity(f.get('RuleID', '')) == 'CRITICAL')
print(f"\n{RED}❌ Security scan FAILED: {len(gitleaks_findings)} issue(s) found{RESET}")
print(f" {RED}Critical: {critical_count}{RESET}")
print(f" {YELLOW}High: {len(gitleaks_findings) - critical_count}{RESET}\n")
print(f"{RED}BLOCKING ISSUES:{RESET}")
for finding in gitleaks_findings[:5]: # Show first 5
file_path = finding.get('File', 'unknown')
line = finding.get('StartLine', '?')
rule_id = finding.get('RuleID', 'unknown')
print(f"{file_path}:{line} - {rule_id}")
if len(gitleaks_findings) > 5:
print(f" ... and {len(gitleaks_findings) - 5} more\n")
print(f"{RED}REQUIRED ACTIONS:{RESET}")
print(f" 1. Remove all hardcoded secrets from code")
print(f" 2. Use environment variables: os.environ.get('KEY_NAME')")
print(f" 3. Re-run scan after fixes\n")
return 2 if critical_count > 0 else 1
def print_verbose_report(gitleaks_findings: List[Dict], pattern_issues: List[SecurityIssue],
pattern_stats: Dict[str, int], skill_name: str) -> int:
"""
Detailed educational report with explanations
Returns: Exit code (0=clean, 2=critical, 1=high)
"""
print(f"\n{'=' * 80}")
print(f"🔒 Security Review Report: {skill_name}")
print(f"{'=' * 80}\n")
# Section 1: Gitleaks findings (secrets)
if gitleaks_findings:
critical_count = sum(1 for f in gitleaks_findings
if categorize_gitleaks_severity(f.get('RuleID', '')) == 'CRITICAL')
print(f"📊 Secret Detection (via gitleaks):")
print(f" {RED}🔴 CRITICAL: {critical_count}{RESET} (API keys, passwords, tokens)")
print(f" {YELLOW}🟠 HIGH: {len(gitleaks_findings) - critical_count}{RESET} (Other secrets)")
print(f" Total: {len(gitleaks_findings)}\n")
for finding in gitleaks_findings:
severity = categorize_gitleaks_severity(finding.get('RuleID', ''))
color = RED if severity == "CRITICAL" else YELLOW
file_path = finding.get('File', 'unknown')
line = finding.get('StartLine', '?')
rule_id = finding.get('RuleID', 'unknown')
description = finding.get('Description', 'No description')
print(f"{color}[{severity}]{RESET} {file_path}:{line}")
print(f" Rule: {rule_id}")
print(f" {description}\n")
else:
print(f"{GREEN}✅ Secret Detection: Clean{RESET}\n")
# Section 2: Pattern-based findings
if pattern_issues:
print(f"📊 Code Quality & Security Patterns:")
print(f" {YELLOW}🟠 HIGH: {pattern_stats['HIGH']}{RESET}")
print(f" 🟡 MEDIUM: {pattern_stats['MEDIUM']}")
print(f" Total: {sum(pattern_stats.values())}\n")
for severity in ["HIGH", "MEDIUM"]:
severity_issues = [i for i in pattern_issues if i.severity == severity]
if severity_issues:
color = YELLOW if severity == "HIGH" else RESET
print(f"{color}{severity} Issues ({len(severity_issues)}):{RESET}")
print("" * 80)
for issue in severity_issues[:10]: # Limit to 10 per severity
print(f"\n{color}[{issue.severity}]{RESET} {issue.file_path}:{issue.line_number}")
print(f" Issue: {issue.pattern_name}")
print(f" {issue.message}")
print(f" Matched: {issue.matched_text}")
print(f" Fix: {issue.recommendation}")
if len(severity_issues) > 10:
print(f"\n ... and {len(severity_issues) - 10} more {severity} issues")
print()
else:
print(f"{GREEN}✅ Code Patterns: Clean{RESET}\n")
# Summary
print(f"{'=' * 80}")
has_critical = any(categorize_gitleaks_severity(f.get('RuleID', '')) == 'CRITICAL'
for f in gitleaks_findings)
has_high = len(gitleaks_findings) > 0 or pattern_stats['HIGH'] > 0
if has_critical:
print(f"{RED}🔴 CRITICAL issues MUST be fixed before distribution{RESET}")
exit_code = 2
elif has_high:
print(f"{YELLOW}🟠 HIGH issues SHOULD be fixed before distribution{RESET}")
exit_code = 1
else:
print(f"{GREEN}✅ No critical security issues found!{RESET}")
exit_code = 0
print(f"{'=' * 80}\n")
return exit_code
# ============================================================================
# ACTION LAYER - What to do with results
# ============================================================================
def calculate_skill_hash(skill_path: Path) -> str:
"""
Calculate deterministic hash of all security-relevant files in skill
Returns: SHA256 hex digest of combined file contents
Implementation:
- Scans same file types as security scanner (code_extensions)
- Sorts files deterministically by path
- Hashes concatenated content (path + content for each file)
- Ignores .security-scan-passed itself and hidden files
"""
code_extensions = {'.py', '.js', '.ts', '.jsx', '.tsx', '.sh', '.bash',
'.md', '.yml', '.yaml', '.json', '.toml'}
hasher = hashlib.sha256()
# Collect all relevant files
files_to_hash = []
for file_path in skill_path.rglob('*'):
if not file_path.is_file() or file_path.suffix not in code_extensions:
continue
if file_path.name == '.security-scan-passed':
continue
if any(part.startswith('.') for part in file_path.parts):
continue
if '__pycache__' in file_path.parts or 'node_modules' in file_path.parts:
continue
files_to_hash.append(file_path)
# Sort for deterministic order
files_to_hash.sort()
# Hash each file (path + content)
for file_path in files_to_hash:
try:
# Include relative path in hash for file rename detection
rel_path = file_path.relative_to(skill_path)
hasher.update(str(rel_path).encode('utf-8'))
hasher.update(b'\0') # Null separator
# Include file content
content = file_path.read_bytes()
hasher.update(content)
hasher.update(b'\0') # Null separator
except (IOError, UnicodeDecodeError):
# Skip files that can't be read
pass
return hasher.hexdigest()
def create_security_marker(skill_path: Path) -> None:
"""
Create marker file indicating security scan passed
Includes content-based hash for validation
"""
marker_file = skill_path / ".security-scan-passed"
content_hash = calculate_skill_hash(skill_path)
marker_file.write_text(
f"Security scan passed\n"
f"Scanned at: {datetime.now().isoformat()}\n"
f"Tool: gitleaks + pattern-based validation\n"
f"Content hash: {content_hash}\n"
)
print(f"{GREEN}✓ Security marker created: {marker_file.name}{RESET}")
# ============================================================================
# MAIN ORCHESTRATION
# ============================================================================
def main():
parser = argparse.ArgumentParser(
description="Security scanner for Claude Code skills",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python security_scan.py ../my-skill # Quick scan (for packaging)
python security_scan.py ../my-skill --verbose # Detailed educational review
Exit codes:
0 - Clean (no issues)
1 - High severity issues found
2 - Critical issues found (MUST fix)
3 - gitleaks not installed
4 - Scan error
"""
)
parser.add_argument("skill_dir", help="Path to skill directory")
parser.add_argument("--verbose", "-v", action="store_true",
help="Show detailed educational review with pattern-based checks")
args = parser.parse_args()
# Validate skill directory
skill_path = Path(args.skill_dir).resolve()
if not skill_path.exists():
print(f"{RED}❌ Error: Skill directory not found: {skill_path}{RESET}")
sys.exit(1)
if not skill_path.is_dir():
print(f"{RED}❌ Error: Path is not a directory: {skill_path}{RESET}")
sys.exit(1)
# Check gitleaks availability
if not check_gitleaks_installed():
print_gitleaks_installation()
sys.exit(3)
# Run gitleaks scan (always)
print(f"🔍 Scanning: {skill_path.name}")
print(f" Tool: gitleaks (industry standard)")
print(f" Mode: {'verbose (educational)' if args.verbose else 'simple (packaging gate)'}")
gitleaks_findings = run_gitleaks(skill_path)
if gitleaks_findings is None:
sys.exit(4)
# Run pattern-based scan (only in verbose mode)
pattern_issues = []
pattern_stats = {"CRITICAL": 0, "HIGH": 0, "MEDIUM": 0}
if args.verbose:
print(f" Running pattern-based checks...")
pattern_issues, pattern_stats = scan_skill_patterns(skill_path)
# Generate report
if args.verbose:
exit_code = print_verbose_report(gitleaks_findings, pattern_issues,
pattern_stats, skill_path.name)
else:
exit_code = print_simple_report(gitleaks_findings, skill_path.name)
# Create marker file on clean scan
if exit_code == 0:
create_security_marker(skill_path)
sys.exit(exit_code)
if __name__ == "__main__":
main()