claude-skills-reference/engineering/skill-tester/scripts/security_scorer.py

#!/usr/bin/env python3
"""
Security Scorer - Security dimension scoring module

This module provides comprehensive security assessment for Python scripts,
evaluating sensitive data exposure, safe file operations, command injection
prevention, and input validation quality.

Author: Claude Skills Engineering Team
Version: 2.0.0
"""

import re
from pathlib import Path
from typing import Dict, List, Tuple, Optional, Any

# =============================================================================
# CONSTANTS - Scoring thresholds and weights
# =============================================================================

# Maximum score per component (25 points each, 4 components = 100 total)
MAX_COMPONENT_SCORE: int = 25

# Minimum score floor (never go below 0)
MIN_SCORE: int = 0

# Security score thresholds for tier recommendations
SECURITY_SCORE_POWERFUL_TIER: int = 70  # Required for POWERFUL tier
SECURITY_SCORE_STANDARD_TIER: int = 50  # Required for STANDARD tier

# Scoring modifiers (magic numbers replaced with named constants)
BASE_SCORE_SENSITIVE_DATA: int = 25  # Start with full points
BASE_SCORE_FILE_OPS: int = 15  # Base score for file operations
BASE_SCORE_COMMAND_INJECTION: int = 25  # Start with full points
BASE_SCORE_INPUT_VALIDATION: int = 10  # Base score for input validation

# Penalty amounts (negative scoring)
CRITICAL_VULNERABILITY_PENALTY: int = -25  # Critical issues (hardcoded passwords, etc.)
HIGH_SEVERITY_PENALTY: int = -10  # High severity issues
MEDIUM_SEVERITY_PENALTY: int = -5  # Medium severity issues
LOW_SEVERITY_PENALTY: int = -2  # Low severity issues

# Bonus amounts (positive scoring)
SAFE_PATTERN_BONUS: int = 2  # Bonus for using safe patterns
GOOD_PRACTICE_BONUS: int = 3  # Bonus for good security practices

# =============================================================================
# PRE-COMPILED REGEX PATTERNS - Sensitive Data Detection
# =============================================================================

# Hardcoded credentials patterns (CRITICAL severity)
PATTERN_HARDCODED_PASSWORD = re.compile(
    r'password\s*=\s*["\'][^"\']{4,}["\']',
    re.IGNORECASE
)
PATTERN_HARDCODED_API_KEY = re.compile(
    r'api_key\s*=\s*["\'][^"\']{8,}["\']',
    re.IGNORECASE
)
PATTERN_HARDCODED_SECRET = re.compile(
    r'secret\s*=\s*["\'][^"\']{4,}["\']',
    re.IGNORECASE
)
PATTERN_HARDCODED_TOKEN = re.compile(
    r'token\s*=\s*["\'][^"\']{8,}["\']',
    re.IGNORECASE
)
PATTERN_HARDCODED_PRIVATE_KEY = re.compile(
    r'private_key\s*=\s*["\'][^"\']{20,}["\']',
    re.IGNORECASE
)
PATTERN_HARDCODED_AWS_KEY = re.compile(
    r'aws_access_key\s*=\s*["\'][^"\']{16,}["\']',
    re.IGNORECASE
)
PATTERN_HARDCODED_AWS_SECRET = re.compile(
    r'aws_secret\s*=\s*["\'][^"\']{20,}["\']',
    re.IGNORECASE
)

# Multi-line string patterns (CRITICAL severity)
PATTERN_MULTILINE_STRING = re.compile(
    r'["\']{3}[^"\']*?(?:password|api_key|secret|token|private_key)[^"\']*?["\']{3}',
    re.IGNORECASE | re.DOTALL
)

# F-string patterns (HIGH severity)
PATTERN_FSTRING_SENSITIVE = re.compile(
    r'f["\'].*?(?:password|api_key|secret|token)\s*=',
    re.IGNORECASE
)

# Base64 encoded secrets (MEDIUM severity)
PATTERN_BASE64_SECRET = re.compile(
    r'(?:base64|b64encode|b64decode)\s*\([^)]*(?:password|api_key|secret|token)',
    re.IGNORECASE
)

# JWT tokens (HIGH severity)
PATTERN_JWT_TOKEN = re.compile(
    r'eyJ[a-zA-Z0-9_-]*\.eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*'
)

# Connection strings (HIGH severity)
PATTERN_CONNECTION_STRING = re.compile(
    r'(?:connection_string|conn_string|database_url)\s*=\s*["\'][^"\']*(?:password|pwd|passwd)[^"\']*["\']',
    re.IGNORECASE
)

# Safe credential patterns (environment variables are OK)
PATTERN_SAFE_ENV_VAR = re.compile(
    r'os\.(?:getenv|environ)\s*\(\s*["\'][^"\']+["\']',
    re.IGNORECASE
)

# =============================================================================
# PRE-COMPILED REGEX PATTERNS - Path Traversal Detection
# =============================================================================

# Basic path traversal patterns
PATTERN_PATH_TRAVERSAL_BASIC = re.compile(r'\.\.\/')
PATTERN_PATH_TRAVERSAL_WINDOWS = re.compile(r'\.\.\\')

# URL encoded path traversal (MEDIUM severity)
PATTERN_PATH_TRAVERSAL_URL_ENCODED = re.compile(
    r'%2e%2e%2f|%252e%252e%252f|\.\.%2f',
    re.IGNORECASE
)

# Unicode encoded path traversal (MEDIUM severity)
PATTERN_PATH_TRAVERSAL_UNICODE = re.compile(
    r'\\u002e\\u002e|\\uff0e\\uff0e|\u002e\u002e\/',
    re.IGNORECASE
)

# Null byte injection (HIGH severity)
PATTERN_NULL_BYTE = re.compile(r'%00|\\x00|\0')

# Risky file operation patterns
PATTERN_PATH_CONCAT = re.compile(
    r'open\s*\(\s*[^)]*\+',
    re.IGNORECASE
)
PATTERN_USER_INPUT_PATH = re.compile(
    r'\.join\s*\(\s*[^)]*input|os\.path\.join\s*\([^)]*request',
    re.IGNORECASE
)

# Safe file operation patterns
PATTERN_SAFE_BASENAME = re.compile(r'os\.path\.basename', re.IGNORECASE)
PATTERN_SAFE_PATHLIB = re.compile(r'pathlib\.Path\s*\(', re.IGNORECASE)
PATTERN_PATH_VALIDATION = re.compile(r'validate.*path', re.IGNORECASE)
PATTERN_PATH_RESOLVE = re.compile(r'\.resolve\s*\(', re.IGNORECASE)

# =============================================================================
# PRE-COMPILED REGEX PATTERNS - Command Injection Detection
# =============================================================================

# Dangerous patterns (CRITICAL severity)
PATTERN_OS_SYSTEM = re.compile(r'os\.system\s*\(')
PATTERN_OS_POPEN = re.compile(r'os\.popen\s*\(')
PATTERN_EVAL = re.compile(r'eval\s*\(')
PATTERN_EXEC = re.compile(r'exec\s*\(')

# Subprocess with shell=True (HIGH severity)
PATTERN_SUBPROCESS_SHELL_TRUE = re.compile(
    r'subprocess\.(?:call|run|Popen|check_output)\s*\([^)]*shell\s*=\s*True',
    re.IGNORECASE
)

# Asyncio subprocess shell (HIGH severity)
PATTERN_ASYNCIO_SHELL = re.compile(
    r'asyncio\.create_subprocess_shell\s*\(',
    re.IGNORECASE
)

# Pexpect spawn (HIGH severity)
PATTERN_PEXPECT_SPAWN = re.compile(r'pexpect\.spawn\s*\(', re.IGNORECASE)

# Safe subprocess patterns
PATTERN_SAFE_SUBPROCESS = re.compile(
    r'subprocess\.(?:run|call|Popen)\s*\([^)]*shell\s*=\s*False',
    re.IGNORECASE
)
PATTERN_SHLEX_QUOTE = re.compile(r'shlex\.quote', re.IGNORECASE)
PATTERN_SHLEX_SPLIT = re.compile(r'shlex\.split', re.IGNORECASE)

# =============================================================================
# PRE-COMPILED REGEX PATTERNS - Input Validation Detection
# =============================================================================

# Good validation patterns
PATTERN_ARGPARSE = re.compile(r'argparse')
PATTERN_TRY_EXCEPT = re.compile(r'try\s*:[\s\S]*?except\s+\w*Error')
PATTERN_INPUT_CHECK = re.compile(r'if\s+not\s+\w+\s*:')
PATTERN_ISINSTANCE = re.compile(r'isinstance\s*\(')
PATTERN_ISDIGIT = re.compile(r'\.isdigit\s*\(\)')
PATTERN_REGEX_VALIDATION = re.compile(r're\.(?:match|search|fullmatch)\s*\(')
PATTERN_VALIDATOR_CLASS = re.compile(r'Validator', re.IGNORECASE)
PATTERN_VALIDATE_FUNC = re.compile(r'validate', re.IGNORECASE)
PATTERN_SANITIZE_FUNC = re.compile(r'sanitize', re.IGNORECASE)


class SecurityScorer:
    """
    Security dimension scoring engine.

    This class evaluates Python scripts for security vulnerabilities and best practices
    across four components:
    1. Sensitive Data Exposure Prevention (25% of security score)
    2. Safe File Operations (25% of security score)
    3. Command Injection Prevention (25% of security score)
    4. Input Validation Quality (25% of security score)

    Attributes:
        scripts: List of Python script paths to evaluate
        verbose: Whether to output verbose logging
    """

    def __init__(self, scripts: List[Path], verbose: bool = False):
        """
        Initialize the SecurityScorer.

        Args:
            scripts: List of Path objects pointing to Python scripts
            verbose: Enable verbose output for debugging
        """
        self.scripts = scripts
        self.verbose = verbose
        self._findings: List[str] = []

    def _log_verbose(self, message: str) -> None:
        """Log verbose message if verbose mode is enabled."""
        if self.verbose:
            print(f"[SECURITY] {message}")

    def _get_script_content(self, script_path: Path) -> Optional[str]:
        """
        Safely read script content.

        Args:
            script_path: Path to the Python script

        Returns:
            Script content as string, or None if read fails
        """
        try:
            return script_path.read_text(encoding='utf-8')
        except Exception as e:
            self._log_verbose(f"Failed to read {script_path}: {e}")
            return None

    def _clamp_score(self, score: int) -> int:
        """
        Clamp score to valid range [MIN_SCORE, MAX_COMPONENT_SCORE].

        Args:
            score: Raw score value

        Returns:
            Score clamped to valid range
        """
        return max(MIN_SCORE, min(score, MAX_COMPONENT_SCORE))

    def _score_patterns(
        self,
        content: str,
        script_name: str,
        dangerous_patterns: List[Tuple[re.Pattern, str, int]],
        safe_patterns: List[Tuple[re.Pattern, str, int]],
        base_score: int
    ) -> Tuple[int, List[str]]:
        """
        Generic pattern scoring method.

        This method evaluates a script against lists of dangerous and safe patterns,
        applying penalties for dangerous patterns found and bonuses for safe patterns.

        Args:
            content: Script content to analyze
            script_name: Name of the script (for findings)
            dangerous_patterns: List of (pattern, description, penalty) tuples
            safe_patterns: List of (pattern, description, bonus) tuples
            base_score: Starting score before adjustments

        Returns:
            Tuple of (final_score, findings_list)
        """
        score = base_score
        findings = []

        # Check for dangerous patterns
        for pattern, description, penalty in dangerous_patterns:
            matches = pattern.findall(content)
            if matches:
                score += penalty  # Penalty is negative
                findings.append(f"{script_name}: {description} ({len(matches)} occurrence(s))")

        # Check for safe patterns
        for pattern, description, bonus in safe_patterns:
            if pattern.search(content):
                score += bonus
                self._log_verbose(f"Safe pattern found in {script_name}: {description}")

        return self._clamp_score(score), findings

    def score_sensitive_data_exposure(self) -> Tuple[float, List[str]]:
        """
        Score sensitive data exposure prevention.

        Evaluates scripts for:
        - Hardcoded passwords, API keys, secrets, tokens, private keys
        - Multi-line string credentials
        - F-string sensitive data
        - Base64 encoded secrets
        - JWT tokens
        - Connection strings with credentials

        Returns:
            Tuple of (average_score, findings_list)
        """
        if not self.scripts:
            return float(MAX_COMPONENT_SCORE), []

        scores = []
        all_findings = []

        # Define dangerous patterns with severity-based penalties
        dangerous_patterns = [
            (PATTERN_HARDCODED_PASSWORD, 'hardcoded password', CRITICAL_VULNERABILITY_PENALTY),
            (PATTERN_HARDCODED_API_KEY, 'hardcoded API key', CRITICAL_VULNERABILITY_PENALTY),
            (PATTERN_HARDCODED_SECRET, 'hardcoded secret', CRITICAL_VULNERABILITY_PENALTY),
            (PATTERN_HARDCODED_TOKEN, 'hardcoded token', CRITICAL_VULNERABILITY_PENALTY),
            (PATTERN_HARDCODED_PRIVATE_KEY, 'hardcoded private key', CRITICAL_VULNERABILITY_PENALTY),
            (PATTERN_HARDCODED_AWS_KEY, 'hardcoded AWS key', CRITICAL_VULNERABILITY_PENALTY),
            (PATTERN_HARDCODED_AWS_SECRET, 'hardcoded AWS secret', CRITICAL_VULNERABILITY_PENALTY),
            (PATTERN_MULTILINE_STRING, 'multi-line string credential', CRITICAL_VULNERABILITY_PENALTY),
            (PATTERN_FSTRING_SENSITIVE, 'f-string sensitive data', HIGH_SEVERITY_PENALTY),
            (PATTERN_BASE64_SECRET, 'base64 encoded secret', MEDIUM_SEVERITY_PENALTY),
            (PATTERN_JWT_TOKEN, 'JWT token in code', HIGH_SEVERITY_PENALTY),
            (PATTERN_CONNECTION_STRING, 'connection string with credentials', HIGH_SEVERITY_PENALTY),
        ]

        # Safe patterns get bonus points
        safe_patterns = [
            (PATTERN_SAFE_ENV_VAR, 'safe environment variable usage', SAFE_PATTERN_BONUS),
        ]

        for script_path in self.scripts:
            content = self._get_script_content(script_path)
            if content is None:
                continue

            score, findings = self._score_patterns(
                content=content,
                script_name=script_path.name,
                dangerous_patterns=dangerous_patterns,
                safe_patterns=safe_patterns,
                base_score=BASE_SCORE_SENSITIVE_DATA
            )

            scores.append(score)
            all_findings.extend(findings)

        avg_score = sum(scores) / len(scores) if scores else 0.0
        return avg_score, all_findings

    def score_safe_file_operations(self) -> Tuple[float, List[str]]:
        """
        Score safe file operations.

        Evaluates scripts for:
        - Path traversal vulnerabilities (basic, URL-encoded, Unicode, null bytes)
        - Unsafe path construction
        - Safe patterns (pathlib, basename, validation)

        Returns:
            Tuple of (average_score, findings_list)
        """
        if not self.scripts:
            return float(MAX_COMPONENT_SCORE), []

        scores = []
        all_findings = []

        # Dangerous patterns with severity-based penalties
        dangerous_patterns = [
            (PATTERN_PATH_TRAVERSAL_BASIC, 'basic path traversal', HIGH_SEVERITY_PENALTY),
            (PATTERN_PATH_TRAVERSAL_WINDOWS, 'Windows-style path traversal', HIGH_SEVERITY_PENALTY),
            (PATTERN_PATH_TRAVERSAL_URL_ENCODED, 'URL-encoded path traversal', HIGH_SEVERITY_PENALTY),
            (PATTERN_PATH_TRAVERSAL_UNICODE, 'Unicode-encoded path traversal', HIGH_SEVERITY_PENALTY),
            (PATTERN_NULL_BYTE, 'null byte injection', HIGH_SEVERITY_PENALTY),
            (PATTERN_PATH_CONCAT, 'potential path injection via concatenation', MEDIUM_SEVERITY_PENALTY),
            (PATTERN_USER_INPUT_PATH, 'user input in path construction', MEDIUM_SEVERITY_PENALTY),
        ]

        # Safe patterns get bonus points
        safe_patterns = [
            (PATTERN_SAFE_BASENAME, 'uses basename for safety', SAFE_PATTERN_BONUS),
            (PATTERN_SAFE_PATHLIB, 'uses pathlib', SAFE_PATTERN_BONUS),
            (PATTERN_PATH_VALIDATION, 'path validation', SAFE_PATTERN_BONUS),
            (PATTERN_PATH_RESOLVE, 'path resolution', SAFE_PATTERN_BONUS),
        ]

        for script_path in self.scripts:
            content = self._get_script_content(script_path)
            if content is None:
                continue

            score, findings = self._score_patterns(
                content=content,
                script_name=script_path.name,
                dangerous_patterns=dangerous_patterns,
                safe_patterns=safe_patterns,
                base_score=BASE_SCORE_FILE_OPS
            )

            scores.append(score)
            all_findings.extend(findings)

        avg_score = sum(scores) / len(scores) if scores else 0.0
        return avg_score, all_findings

    def score_command_injection_prevention(self) -> Tuple[float, List[str]]:
        """
        Score command injection prevention.

        Evaluates scripts for:
        - os.system(), os.popen() usage
        - subprocess with shell=True
        - eval(), exec() usage
        - asyncio.create_subprocess_shell()
        - pexpect.spawn()
        - Safe patterns (shlex.quote, shell=False)

        Returns:
            Tuple of (average_score, findings_list)
        """
        if not self.scripts:
            return float(MAX_COMPONENT_SCORE), []

        scores = []
        all_findings = []

        # Dangerous patterns with severity-based penalties
        dangerous_patterns = [
            (PATTERN_OS_SYSTEM, 'os.system usage - potential command injection', CRITICAL_VULNERABILITY_PENALTY),
            (PATTERN_OS_POPEN, 'os.popen usage', HIGH_SEVERITY_PENALTY),
            (PATTERN_EVAL, 'eval usage - code injection risk', CRITICAL_VULNERABILITY_PENALTY),
            (PATTERN_EXEC, 'exec usage - code injection risk', CRITICAL_VULNERABILITY_PENALTY),
            (PATTERN_SUBPROCESS_SHELL_TRUE, 'subprocess with shell=True', HIGH_SEVERITY_PENALTY),
            (PATTERN_ASYNCIO_SHELL, 'asyncio.create_subprocess_shell()', HIGH_SEVERITY_PENALTY),
            (PATTERN_PEXPECT_SPAWN, 'pexpect.spawn()', MEDIUM_SEVERITY_PENALTY),
        ]

        # Safe patterns get bonus points
        safe_patterns = [
            (PATTERN_SAFE_SUBPROCESS, 'safe subprocess usage (shell=False)', GOOD_PRACTICE_BONUS),
            (PATTERN_SHLEX_QUOTE, 'shell escaping with shlex.quote', GOOD_PRACTICE_BONUS),
            (PATTERN_SHLEX_SPLIT, 'safe argument splitting with shlex.split', GOOD_PRACTICE_BONUS),
        ]

        for script_path in self.scripts:
            content = self._get_script_content(script_path)
            if content is None:
                continue

            score, findings = self._score_patterns(
                content=content,
                script_name=script_path.name,
                dangerous_patterns=dangerous_patterns,
                safe_patterns=safe_patterns,
                base_score=BASE_SCORE_COMMAND_INJECTION
            )

            scores.append(score)
            all_findings.extend(findings)

        avg_score = sum(scores) / len(scores) if scores else 0.0
        return avg_score, all_findings

    def score_input_validation(self) -> Tuple[float, List[str]]:
        """
        Score input validation quality.

        Evaluates scripts for:
        - argparse usage for CLI validation
        - Error handling patterns
        - Type checking (isinstance)
        - Regex validation
        - Validation/sanitization functions

        Returns:
            Tuple of (average_score, suggestions_list)
        """
        if not self.scripts:
            return float(MAX_COMPONENT_SCORE), []

        scores = []
        suggestions = []

        # Good validation patterns (each gives bonus points)
        validation_patterns = [
            (PATTERN_ARGPARSE, GOOD_PRACTICE_BONUS),
            (PATTERN_TRY_EXCEPT, SAFE_PATTERN_BONUS),
            (PATTERN_INPUT_CHECK, SAFE_PATTERN_BONUS),
            (PATTERN_ISINSTANCE, SAFE_PATTERN_BONUS),
            (PATTERN_ISDIGIT, SAFE_PATTERN_BONUS),
            (PATTERN_REGEX_VALIDATION, SAFE_PATTERN_BONUS),
            (PATTERN_VALIDATOR_CLASS, GOOD_PRACTICE_BONUS),
            (PATTERN_VALIDATE_FUNC, SAFE_PATTERN_BONUS),
            (PATTERN_SANITIZE_FUNC, SAFE_PATTERN_BONUS),
        ]

        for script_path in self.scripts:
            content = self._get_script_content(script_path)
            if content is None:
                continue

            score = BASE_SCORE_INPUT_VALIDATION

            # Check for validation patterns
            for pattern, bonus in validation_patterns:
                if pattern.search(content):
                    score += bonus

            scores.append(self._clamp_score(score))

        avg_score = sum(scores) / len(scores) if scores else 0.0

        if avg_score < 15:
            suggestions.append("Add input validation with argparse, type checking, and error handling")

        return avg_score, suggestions

    def get_overall_score(self) -> Dict[str, Any]:
        """
        Calculate overall security score and return detailed results.

        Returns:
            Dictionary containing:
            - overall_score: Weighted average of all components
            - components: Individual component scores
            - findings: List of security issues found
            - suggestions: Improvement suggestions
        """
        # Score each component
        sensitive_score, sensitive_findings = self.score_sensitive_data_exposure()
        file_ops_score, file_ops_findings = self.score_safe_file_operations()
        command_injection_score, command_findings = self.score_command_injection_prevention()
        input_validation_score, input_suggestions = self.score_input_validation()

        # Calculate overall score (equal weight: 25% each)
        overall_score = (
            sensitive_score * 0.25 +
            file_ops_score * 0.25 +
            command_injection_score * 0.25 +
            input_validation_score * 0.25
        )

        # Collect all findings
        all_findings = sensitive_findings + file_ops_findings + command_findings

        # Generate suggestions based on findings
        suggestions = input_suggestions.copy()
        if sensitive_findings:
            suggestions.append("Remove hardcoded credentials and use environment variables or secure config")
        if file_ops_findings:
            suggestions.append("Validate and sanitize file paths, use pathlib for safe path handling")
        if command_findings:
            suggestions.append("Avoid shell=True in subprocess, use shlex.quote for shell arguments")

        # Critical vulnerability check - if any critical issues, cap the score
        critical_patterns = [
            PATTERN_HARDCODED_PASSWORD, PATTERN_HARDCODED_API_KEY,
            PATTERN_HARDCODED_PRIVATE_KEY, PATTERN_OS_SYSTEM,
            PATTERN_EVAL, PATTERN_EXEC
        ]

        has_critical = False
        for script_path in self.scripts:
            content = self._get_script_content(script_path)
            if content is None:
                continue
            for pattern in critical_patterns:
                if pattern.search(content):
                    has_critical = True
                    break
            if has_critical:
                break

        if has_critical:
            overall_score = min(overall_score, 30)  # Cap at 30 if critical vulnerabilities exist

        return {
            'overall_score': round(overall_score, 1),
            'components': {
                'sensitive_data_exposure': round(sensitive_score, 1),
                'safe_file_operations': round(file_ops_score, 1),
                'command_injection_prevention': round(command_injection_score, 1),
                'input_validation': round(input_validation_score, 1),
            },
            'findings': all_findings,
            'suggestions': suggestions,
            'has_critical_vulnerabilities': has_critical,
        }