feat(skill-tester): add Security dimension to quality scoring system

- Add SecurityScorer module (605 lines) with comprehensive security assessment - Add 4 security scoring components: - Sensitive data exposure prevention (hardcoded credentials detection) - Safe file operations (path traversal prevention) - Command injection prevention (shell=True, eval, exec detection) - Input validation quality (argparse, error handling, type checking) - Add 53 unit tests with 850 lines of test code - Update quality_scorer.py to integrate Security dimension (20% weight) - Rebalance all dimensions from 25% to 20% (5 dimensions total) - Update tier requirements: - POWERFUL: Security ≥70 - STANDARD: Security ≥50 - BASIC: Security ≥40 - Update documentation (quality-scoring-rubric.md, tier-requirements-matrix.md) - Version bump to 2.0.0 This addresses the feedback from PR #420 by providing a focused, well-tested implementation of the Security dimension without bundling other changes.
2026-03-26 13:25:27 +00:00
parent a5423febc8
commit 2f92a1dfcb
5 changed files with 1704 additions and 29 deletions
--- a/engineering/skill-tester/scripts/quality_scorer.py
+++ b/engineering/skill-tester/scripts/quality_scorer.py
@@ -3,15 +3,18 @@
 Quality Scorer - Scores skills across multiple quality dimensions

 This script provides comprehensive quality assessment for skills in the claude-skills
-ecosystem by evaluating documentation, code quality, completeness, and usability.
+ecosystem by evaluating documentation, code quality, completeness, security, and usability.
 Generates letter grades, tier recommendations, and improvement roadmaps.

 Usage:
    python quality_scorer.py <skill_path> [--detailed] [--minimum-score SCORE] [--json]

 Author: Claude Skills Engineering Team
-Version: 1.0.0
+Version: 2.0.0
 Dependencies: Python Standard Library Only
+Changelog:
+  v2.0.0 - Added Security dimension (20% weight), rebalanced all dimensions to 20%
+  v1.0.0 - Initial release with 4 dimensions (25% each)
 """

 import argparse
@@ -23,6 +26,9 @@ import sys
 from datetime import datetime
 from pathlib import Path
 from typing import Dict, List, Any, Optional, Tuple
+
+# Import Security Scorer module
+from security_scorer import SecurityScorer
 try:
    import yaml
 except ImportError:
@@ -148,15 +154,18 @@ class QualityReport:
        code_score = self.dimensions.get("Code Quality", QualityDimension("", 0, "")).score
        completeness_score = self.dimensions.get("Completeness", QualityDimension("", 0, "")).score
        usability_score = self.dimensions.get("Usability", QualityDimension("", 0, "")).score
+        security_score = self.dimensions.get("Security", QualityDimension("", 0, "")).score
        
        # POWERFUL tier requirements (all dimensions must be strong)
        if (self.overall_score >= 80 and 
-            all(score >= 75 for score in [doc_score, code_score, completeness_score, usability_score])):
+            all(score >= 75 for score in [doc_score, code_score, completeness_score, usability_score]) and
+            security_score >= 70):
            self.tier_recommendation = "POWERFUL"
            
        # STANDARD tier requirements (most dimensions good)
        elif (self.overall_score >= 70 and 
-              sum(1 for score in [doc_score, code_score, completeness_score, usability_score] if score >= 65) >= 3):
+              sum(1 for score in [doc_score, code_score, completeness_score, usability_score, security_score] if score >= 65) >= 4 and
+              security_score >= 50):
            self.tier_recommendation = "STANDARD"
            
        # BASIC tier (minimum viable quality)
@@ -220,10 +229,11 @@ class QualityScorer:
            if not self.skill_path.exists():
                raise ValueError(f"Skill path does not exist: {self.skill_path}")
                
-            # Score each dimension
+            # Score each dimension (20% weight each, 5 dimensions total)
            self._score_documentation()
            self._score_code_quality()
            self._score_completeness()
+            self._score_security()
            self._score_usability()
            
            # Calculate overall metrics
@@ -241,7 +251,7 @@ class QualityScorer:
        """Score documentation quality (25% weight)"""
        self.log_verbose("Scoring documentation quality...")
        
-        dimension = QualityDimension("Documentation", 0.25, "Quality of documentation and written materials")
+        dimension = QualityDimension("Documentation", 0.20, "Quality of documentation and written materials")
        
        # Score SKILL.md
        self._score_skill_md(dimension)
@@ -495,7 +505,7 @@ class QualityScorer:
        """Score code quality (25% weight)"""
        self.log_verbose("Scoring code quality...")
        
-        dimension = QualityDimension("Code Quality", 0.25, "Quality of Python scripts and implementation")
+        dimension = QualityDimension("Code Quality", 0.20, "Quality of Python scripts and implementation")
        
        scripts_dir = self.skill_path / "scripts"
        if not scripts_dir.exists():
@@ -682,7 +692,7 @@ class QualityScorer:
        """Score completeness (25% weight)"""
        self.log_verbose("Scoring completeness...")
        
-        dimension = QualityDimension("Completeness", 0.25, "Completeness of required components and assets")
+        dimension = QualityDimension("Completeness", 0.20, "Completeness of required components and assets")
        
        # Score directory structure
        self._score_directory_structure(dimension)
@@ -804,7 +814,7 @@ class QualityScorer:
        """Score usability (25% weight)"""
        self.log_verbose("Scoring usability...")
        
-        dimension = QualityDimension("Usability", 0.25, "Ease of use and user experience")
+        dimension = QualityDimension("Usability", 0.20, "Ease of use and user experience")
        
        # Score installation simplicity
        self._score_installation(dimension)
@@ -936,6 +946,58 @@ class QualityScorer:
        dimension.add_score("practical_examples", score, 25,
                           f"Practical examples: {len(example_files)} files")

+    def _score_security(self):
+        """Score security quality (20% weight)"""
+        self.log_verbose("Scoring security quality...")
+        
+        dimension = QualityDimension("Security", 0.20, "Security practices and vulnerability prevention")
+        
+        # Find Python scripts
+        python_files = list(self.skill_path.rglob("*.py"))
+        
+        # Filter out test files and __pycache__
+        python_files = [f for f in python_files 
+                       if "__pycache__" not in str(f) and "test_" not in f.name]
+        
+        if not python_files:
+            dimension.add_score("scripts_existence", 25, 25, 
+                               "No scripts directory - no script security concerns")
+            dimension.calculate_final_score()
+            self.report.add_dimension(dimension)
+            return
+        
+        # Use SecurityScorer module
+        try:
+            scorer = SecurityScorer(python_files, verbose=self.verbose)
+            result = scorer.get_overall_score()
+            
+            # Extract scores from SecurityScorer result
+            sensitive_data_score = result.get("sensitive_data_exposure", {}).get("score", 0)
+            file_ops_score = result.get("safe_file_operations", {}).get("score", 0)
+            command_injection_score = result.get("command_injection_prevention", {}).get("score", 0)
+            input_validation_score = result.get("input_validation", {}).get("score", 0)
+            
+            dimension.add_score("sensitive_data_exposure", sensitive_data_score, 25,
+                               "Detection and prevention of hardcoded credentials")
+            dimension.add_score("safe_file_operations", file_ops_score, 25,
+                               "Prevention of path traversal vulnerabilities")
+            dimension.add_score("command_injection_prevention", command_injection_score, 25,
+                               "Prevention of command injection vulnerabilities")
+            dimension.add_score("input_validation", input_validation_score, 25,
+                               "Quality of input validation and error handling")
+            
+            # Add suggestions from SecurityScorer
+            for issue in result.get("issues", []):
+                dimension.add_suggestion(issue)
+                
+        except Exception as e:
+            self.log_verbose(f"Security scoring failed: {str(e)}")
+            dimension.add_score("security_error", 0, 100, f"Security scoring failed: {str(e)}")
+            dimension.add_suggestion("Fix security scoring module integration")
+        
+        dimension.calculate_final_score()
+        self.report.add_dimension(dimension)
+

 class QualityReportFormatter:
    """Formats quality reports for output"""
--- a/engineering/skill-tester/scripts/security_scorer.py
+++ b/engineering/skill-tester/scripts/security_scorer.py
@@ -0,0 +1,606 @@
+#!/usr/bin/env python3
+"""
+Security Scorer - Security dimension scoring module
+
+This module provides comprehensive security assessment for Python scripts,
+evaluating sensitive data exposure, safe file operations, command injection
+prevention, and input validation quality.
+
+Author: Claude Skills Engineering Team
+Version: 2.0.0
+"""
+
+import re
+from pathlib import Path
+from typing import Dict, List, Tuple, Optional, Any
+
+# =============================================================================
+# CONSTANTS - Scoring thresholds and weights
+# =============================================================================
+
+# Maximum score per component (25 points each, 4 components = 100 total)
+MAX_COMPONENT_SCORE: int = 25
+
+# Minimum score floor (never go below 0)
+MIN_SCORE: int = 0
+
+# Security score thresholds for tier recommendations
+SECURITY_SCORE_POWERFUL_TIER: int = 70  # Required for POWERFUL tier
+SECURITY_SCORE_STANDARD_TIER: int = 50  # Required for STANDARD tier
+
+# Scoring modifiers (magic numbers replaced with named constants)
+BASE_SCORE_SENSITIVE_DATA: int = 25  # Start with full points
+BASE_SCORE_FILE_OPS: int = 15  # Base score for file operations
+BASE_SCORE_COMMAND_INJECTION: int = 25  # Start with full points
+BASE_SCORE_INPUT_VALIDATION: int = 10  # Base score for input validation
+
+# Penalty amounts (negative scoring)
+CRITICAL_VULNERABILITY_PENALTY: int = -25  # Critical issues (hardcoded passwords, etc.)
+HIGH_SEVERITY_PENALTY: int = -10  # High severity issues
+MEDIUM_SEVERITY_PENALTY: int = -5  # Medium severity issues
+LOW_SEVERITY_PENALTY: int = -2  # Low severity issues
+
+# Bonus amounts (positive scoring)
+SAFE_PATTERN_BONUS: int = 2  # Bonus for using safe patterns
+GOOD_PRACTICE_BONUS: int = 3  # Bonus for good security practices
+
+# =============================================================================
+# PRE-COMPILED REGEX PATTERNS - Sensitive Data Detection
+# =============================================================================
+
+# Hardcoded credentials patterns (CRITICAL severity)
+PATTERN_HARDCODED_PASSWORD = re.compile(
+    r'password\s*=\s*["\'][^"\']{4,}["\']',
+    re.IGNORECASE
+)
+PATTERN_HARDCODED_API_KEY = re.compile(
+    r'api_key\s*=\s*["\'][^"\']{8,}["\']',
+    re.IGNORECASE
+)
+PATTERN_HARDCODED_SECRET = re.compile(
+    r'secret\s*=\s*["\'][^"\']{4,}["\']',
+    re.IGNORECASE
+)
+PATTERN_HARDCODED_TOKEN = re.compile(
+    r'token\s*=\s*["\'][^"\']{8,}["\']',
+    re.IGNORECASE
+)
+PATTERN_HARDCODED_PRIVATE_KEY = re.compile(
+    r'private_key\s*=\s*["\'][^"\']{20,}["\']',
+    re.IGNORECASE
+)
+PATTERN_HARDCODED_AWS_KEY = re.compile(
+    r'aws_access_key\s*=\s*["\'][^"\']{16,}["\']',
+    re.IGNORECASE
+)
+PATTERN_HARDCODED_AWS_SECRET = re.compile(
+    r'aws_secret\s*=\s*["\'][^"\']{20,}["\']',
+    re.IGNORECASE
+)
+
+# Multi-line string patterns (CRITICAL severity)
+PATTERN_MULTILINE_STRING = re.compile(
+    r'["\']{3}[^"\']*?(?:password|api_key|secret|token|private_key)[^"\']*?["\']{3}',
+    re.IGNORECASE | re.DOTALL
+)
+
+# F-string patterns (HIGH severity)
+PATTERN_FSTRING_SENSITIVE = re.compile(
+    r'f["\'].*?(?:password|api_key|secret|token)\s*=',
+    re.IGNORECASE
+)
+
+# Base64 encoded secrets (MEDIUM severity)
+PATTERN_BASE64_SECRET = re.compile(
+    r'(?:base64|b64encode|b64decode)\s*\([^)]*(?:password|api_key|secret|token)',
+    re.IGNORECASE
+)
+
+# JWT tokens (HIGH severity)
+PATTERN_JWT_TOKEN = re.compile(
+    r'eyJ[a-zA-Z0-9_-]*\.eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*'
+)
+
+# Connection strings (HIGH severity)
+PATTERN_CONNECTION_STRING = re.compile(
+    r'(?:connection_string|conn_string|database_url)\s*=\s*["\'][^"\']*(?:password|pwd|passwd)[^"\']*["\']',
+    re.IGNORECASE
+)
+
+# Safe credential patterns (environment variables are OK)
+PATTERN_SAFE_ENV_VAR = re.compile(
+    r'os\.(?:getenv|environ)\s*\(\s*["\'][^"\']+["\']',
+    re.IGNORECASE
+)
+
+# =============================================================================
+# PRE-COMPILED REGEX PATTERNS - Path Traversal Detection
+# =============================================================================
+
+# Basic path traversal patterns
+PATTERN_PATH_TRAVERSAL_BASIC = re.compile(r'\.\.\/')
+PATTERN_PATH_TRAVERSAL_WINDOWS = re.compile(r'\.\.\\')
+
+# URL encoded path traversal (MEDIUM severity)
+PATTERN_PATH_TRAVERSAL_URL_ENCODED = re.compile(
+    r'%2e%2e%2f|%252e%252e%252f|\.\.%2f',
+    re.IGNORECASE
+)
+
+# Unicode encoded path traversal (MEDIUM severity)
+PATTERN_PATH_TRAVERSAL_UNICODE = re.compile(
+    r'\\u002e\\u002e|\\uff0e\\uff0e|\u002e\u002e\/',
+    re.IGNORECASE
+)
+
+# Null byte injection (HIGH severity)
+PATTERN_NULL_BYTE = re.compile(r'%00|\\x00|\0')
+
+# Risky file operation patterns
+PATTERN_PATH_CONCAT = re.compile(
+    r'open\s*\(\s*[^)]*\+',
+    re.IGNORECASE
+)
+PATTERN_USER_INPUT_PATH = re.compile(
+    r'\.join\s*\(\s*[^)]*input|os\.path\.join\s*\([^)]*request',
+    re.IGNORECASE
+)
+
+# Safe file operation patterns
+PATTERN_SAFE_BASENAME = re.compile(r'os\.path\.basename', re.IGNORECASE)
+PATTERN_SAFE_PATHLIB = re.compile(r'pathlib\.Path\s*\(', re.IGNORECASE)
+PATTERN_PATH_VALIDATION = re.compile(r'validate.*path', re.IGNORECASE)
+PATTERN_PATH_RESOLVE = re.compile(r'\.resolve\s*\(', re.IGNORECASE)
+
+# =============================================================================
+# PRE-COMPILED REGEX PATTERNS - Command Injection Detection
+# =============================================================================
+
+# Dangerous patterns (CRITICAL severity)
+PATTERN_OS_SYSTEM = re.compile(r'os\.system\s*\(')
+PATTERN_OS_POPEN = re.compile(r'os\.popen\s*\(')
+PATTERN_EVAL = re.compile(r'eval\s*\(')
+PATTERN_EXEC = re.compile(r'exec\s*\(')
+
+# Subprocess with shell=True (HIGH severity)
+PATTERN_SUBPROCESS_SHELL_TRUE = re.compile(
+    r'subprocess\.(?:call|run|Popen|check_output)\s*\([^)]*shell\s*=\s*True',
+    re.IGNORECASE
+)
+
+# Asyncio subprocess shell (HIGH severity)
+PATTERN_ASYNCIO_SHELL = re.compile(
+    r'asyncio\.create_subprocess_shell\s*\(',
+    re.IGNORECASE
+)
+
+# Pexpect spawn (HIGH severity)
+PATTERN_PEXPECT_SPAWN = re.compile(r'pexpect\.spawn\s*\(', re.IGNORECASE)
+
+# Safe subprocess patterns
+PATTERN_SAFE_SUBPROCESS = re.compile(
+    r'subprocess\.(?:run|call|Popen)\s*\([^)]*shell\s*=\s*False',
+    re.IGNORECASE
+)
+PATTERN_SHLEX_QUOTE = re.compile(r'shlex\.quote', re.IGNORECASE)
+PATTERN_SHLEX_SPLIT = re.compile(r'shlex\.split', re.IGNORECASE)
+
+# =============================================================================
+# PRE-COMPILED REGEX PATTERNS - Input Validation Detection
+# =============================================================================
+
+# Good validation patterns
+PATTERN_ARGPARSE = re.compile(r'argparse')
+PATTERN_TRY_EXCEPT = re.compile(r'try\s*:[\s\S]*?except\s+\w*Error')
+PATTERN_INPUT_CHECK = re.compile(r'if\s+not\s+\w+\s*:')
+PATTERN_ISINSTANCE = re.compile(r'isinstance\s*\(')
+PATTERN_ISDIGIT = re.compile(r'\.isdigit\s*\(\)')
+PATTERN_REGEX_VALIDATION = re.compile(r're\.(?:match|search|fullmatch)\s*\(')
+PATTERN_VALIDATOR_CLASS = re.compile(r'Validator', re.IGNORECASE)
+PATTERN_VALIDATE_FUNC = re.compile(r'validate', re.IGNORECASE)
+PATTERN_SANITIZE_FUNC = re.compile(r'sanitize', re.IGNORECASE)
+
+
+class SecurityScorer:
+    """
+    Security dimension scoring engine.
+    
+    This class evaluates Python scripts for security vulnerabilities and best practices
+    across four components:
+    1. Sensitive Data Exposure Prevention (25% of security score)
+    2. Safe File Operations (25% of security score)
+    3. Command Injection Prevention (25% of security score)
+    4. Input Validation Quality (25% of security score)
+    
+    Attributes:
+        scripts: List of Python script paths to evaluate
+        verbose: Whether to output verbose logging
+    """
+    
+    def __init__(self, scripts: List[Path], verbose: bool = False):
+        """
+        Initialize the SecurityScorer.
+        
+        Args:
+            scripts: List of Path objects pointing to Python scripts
+            verbose: Enable verbose output for debugging
+        """
+        self.scripts = scripts
+        self.verbose = verbose
+        self._findings: List[str] = []
+        
+    def _log_verbose(self, message: str) -> None:
+        """Log verbose message if verbose mode is enabled."""
+        if self.verbose:
+            print(f"[SECURITY] {message}")
+            
+    def _get_script_content(self, script_path: Path) -> Optional[str]:
+        """
+        Safely read script content.
+        
+        Args:
+            script_path: Path to the Python script
+            
+        Returns:
+            Script content as string, or None if read fails
+        """
+        try:
+            return script_path.read_text(encoding='utf-8')
+        except Exception as e:
+            self._log_verbose(f"Failed to read {script_path}: {e}")
+            return None
+            
+    def _clamp_score(self, score: int) -> int:
+        """
+        Clamp score to valid range [MIN_SCORE, MAX_COMPONENT_SCORE].
+        
+        Args:
+            score: Raw score value
+            
+        Returns:
+            Score clamped to valid range
+        """
+        return max(MIN_SCORE, min(score, MAX_COMPONENT_SCORE))
+        
+    def _score_patterns(
+        self,
+        content: str,
+        script_name: str,
+        dangerous_patterns: List[Tuple[re.Pattern, str, int]],
+        safe_patterns: List[Tuple[re.Pattern, str, int]],
+        base_score: int
+    ) -> Tuple[int, List[str]]:
+        """
+        Generic pattern scoring method.
+        
+        This method evaluates a script against lists of dangerous and safe patterns,
+        applying penalties for dangerous patterns found and bonuses for safe patterns.
+        
+        Args:
+            content: Script content to analyze
+            script_name: Name of the script (for findings)
+            dangerous_patterns: List of (pattern, description, penalty) tuples
+            safe_patterns: List of (pattern, description, bonus) tuples
+            base_score: Starting score before adjustments
+            
+        Returns:
+            Tuple of (final_score, findings_list)
+        """
+        score = base_score
+        findings = []
+        
+        # Check for dangerous patterns
+        for pattern, description, penalty in dangerous_patterns:
+            matches = pattern.findall(content)
+            if matches:
+                score += penalty  # Penalty is negative
+                findings.append(f"{script_name}: {description} ({len(matches)} occurrence(s))")
+                
+        # Check for safe patterns
+        for pattern, description, bonus in safe_patterns:
+            if pattern.search(content):
+                score += bonus
+                self._log_verbose(f"Safe pattern found in {script_name}: {description}")
+                
+        return self._clamp_score(score), findings
+        
+    def score_sensitive_data_exposure(self) -> Tuple[float, List[str]]:
+        """
+        Score sensitive data exposure prevention.
+        
+        Evaluates scripts for:
+        - Hardcoded passwords, API keys, secrets, tokens, private keys
+        - Multi-line string credentials
+        - F-string sensitive data
+        - Base64 encoded secrets
+        - JWT tokens
+        - Connection strings with credentials
+        
+        Returns:
+            Tuple of (average_score, findings_list)
+        """
+        if not self.scripts:
+            return float(MAX_COMPONENT_SCORE), []
+            
+        scores = []
+        all_findings = []
+        
+        # Define dangerous patterns with severity-based penalties
+        dangerous_patterns = [
+            (PATTERN_HARDCODED_PASSWORD, 'hardcoded password', CRITICAL_VULNERABILITY_PENALTY),
+            (PATTERN_HARDCODED_API_KEY, 'hardcoded API key', CRITICAL_VULNERABILITY_PENALTY),
+            (PATTERN_HARDCODED_SECRET, 'hardcoded secret', CRITICAL_VULNERABILITY_PENALTY),
+            (PATTERN_HARDCODED_TOKEN, 'hardcoded token', CRITICAL_VULNERABILITY_PENALTY),
+            (PATTERN_HARDCODED_PRIVATE_KEY, 'hardcoded private key', CRITICAL_VULNERABILITY_PENALTY),
+            (PATTERN_HARDCODED_AWS_KEY, 'hardcoded AWS key', CRITICAL_VULNERABILITY_PENALTY),
+            (PATTERN_HARDCODED_AWS_SECRET, 'hardcoded AWS secret', CRITICAL_VULNERABILITY_PENALTY),
+            (PATTERN_MULTILINE_STRING, 'multi-line string credential', CRITICAL_VULNERABILITY_PENALTY),
+            (PATTERN_FSTRING_SENSITIVE, 'f-string sensitive data', HIGH_SEVERITY_PENALTY),
+            (PATTERN_BASE64_SECRET, 'base64 encoded secret', MEDIUM_SEVERITY_PENALTY),
+            (PATTERN_JWT_TOKEN, 'JWT token in code', HIGH_SEVERITY_PENALTY),
+            (PATTERN_CONNECTION_STRING, 'connection string with credentials', HIGH_SEVERITY_PENALTY),
+        ]
+        
+        # Safe patterns get bonus points
+        safe_patterns = [
+            (PATTERN_SAFE_ENV_VAR, 'safe environment variable usage', SAFE_PATTERN_BONUS),
+        ]
+        
+        for script_path in self.scripts:
+            content = self._get_script_content(script_path)
+            if content is None:
+                continue
+                
+            score, findings = self._score_patterns(
+                content=content,
+                script_name=script_path.name,
+                dangerous_patterns=dangerous_patterns,
+                safe_patterns=safe_patterns,
+                base_score=BASE_SCORE_SENSITIVE_DATA
+            )
+            
+            scores.append(score)
+            all_findings.extend(findings)
+            
+        avg_score = sum(scores) / len(scores) if scores else 0.0
+        return avg_score, all_findings
+        
+    def score_safe_file_operations(self) -> Tuple[float, List[str]]:
+        """
+        Score safe file operations.
+        
+        Evaluates scripts for:
+        - Path traversal vulnerabilities (basic, URL-encoded, Unicode, null bytes)
+        - Unsafe path construction
+        - Safe patterns (pathlib, basename, validation)
+        
+        Returns:
+            Tuple of (average_score, findings_list)
+        """
+        if not self.scripts:
+            return float(MAX_COMPONENT_SCORE), []
+            
+        scores = []
+        all_findings = []
+        
+        # Dangerous patterns with severity-based penalties
+        dangerous_patterns = [
+            (PATTERN_PATH_TRAVERSAL_BASIC, 'basic path traversal', HIGH_SEVERITY_PENALTY),
+            (PATTERN_PATH_TRAVERSAL_WINDOWS, 'Windows-style path traversal', HIGH_SEVERITY_PENALTY),
+            (PATTERN_PATH_TRAVERSAL_URL_ENCODED, 'URL-encoded path traversal', HIGH_SEVERITY_PENALTY),
+            (PATTERN_PATH_TRAVERSAL_UNICODE, 'Unicode-encoded path traversal', HIGH_SEVERITY_PENALTY),
+            (PATTERN_NULL_BYTE, 'null byte injection', HIGH_SEVERITY_PENALTY),
+            (PATTERN_PATH_CONCAT, 'potential path injection via concatenation', MEDIUM_SEVERITY_PENALTY),
+            (PATTERN_USER_INPUT_PATH, 'user input in path construction', MEDIUM_SEVERITY_PENALTY),
+        ]
+        
+        # Safe patterns get bonus points
+        safe_patterns = [
+            (PATTERN_SAFE_BASENAME, 'uses basename for safety', SAFE_PATTERN_BONUS),
+            (PATTERN_SAFE_PATHLIB, 'uses pathlib', SAFE_PATTERN_BONUS),
+            (PATTERN_PATH_VALIDATION, 'path validation', SAFE_PATTERN_BONUS),
+            (PATTERN_PATH_RESOLVE, 'path resolution', SAFE_PATTERN_BONUS),
+        ]
+        
+        for script_path in self.scripts:
+            content = self._get_script_content(script_path)
+            if content is None:
+                continue
+                
+            score, findings = self._score_patterns(
+                content=content,
+                script_name=script_path.name,
+                dangerous_patterns=dangerous_patterns,
+                safe_patterns=safe_patterns,
+                base_score=BASE_SCORE_FILE_OPS
+            )
+            
+            scores.append(score)
+            all_findings.extend(findings)
+            
+        avg_score = sum(scores) / len(scores) if scores else 0.0
+        return avg_score, all_findings
+        
+    def score_command_injection_prevention(self) -> Tuple[float, List[str]]:
+        """
+        Score command injection prevention.
+        
+        Evaluates scripts for:
+        - os.system(), os.popen() usage
+        - subprocess with shell=True
+        - eval(), exec() usage
+        - asyncio.create_subprocess_shell()
+        - pexpect.spawn()
+        - Safe patterns (shlex.quote, shell=False)
+        
+        Returns:
+            Tuple of (average_score, findings_list)
+        """
+        if not self.scripts:
+            return float(MAX_COMPONENT_SCORE), []
+            
+        scores = []
+        all_findings = []
+        
+        # Dangerous patterns with severity-based penalties
+        dangerous_patterns = [
+            (PATTERN_OS_SYSTEM, 'os.system usage - potential command injection', CRITICAL_VULNERABILITY_PENALTY),
+            (PATTERN_OS_POPEN, 'os.popen usage', HIGH_SEVERITY_PENALTY),
+            (PATTERN_EVAL, 'eval usage - code injection risk', CRITICAL_VULNERABILITY_PENALTY),
+            (PATTERN_EXEC, 'exec usage - code injection risk', CRITICAL_VULNERABILITY_PENALTY),
+            (PATTERN_SUBPROCESS_SHELL_TRUE, 'subprocess with shell=True', HIGH_SEVERITY_PENALTY),
+            (PATTERN_ASYNCIO_SHELL, 'asyncio.create_subprocess_shell()', HIGH_SEVERITY_PENALTY),
+            (PATTERN_PEXPECT_SPAWN, 'pexpect.spawn()', MEDIUM_SEVERITY_PENALTY),
+        ]
+        
+        # Safe patterns get bonus points
+        safe_patterns = [
+            (PATTERN_SAFE_SUBPROCESS, 'safe subprocess usage (shell=False)', GOOD_PRACTICE_BONUS),
+            (PATTERN_SHLEX_QUOTE, 'shell escaping with shlex.quote', GOOD_PRACTICE_BONUS),
+            (PATTERN_SHLEX_SPLIT, 'safe argument splitting with shlex.split', GOOD_PRACTICE_BONUS),
+        ]
+        
+        for script_path in self.scripts:
+            content = self._get_script_content(script_path)
+            if content is None:
+                continue
+                
+            score, findings = self._score_patterns(
+                content=content,
+                script_name=script_path.name,
+                dangerous_patterns=dangerous_patterns,
+                safe_patterns=safe_patterns,
+                base_score=BASE_SCORE_COMMAND_INJECTION
+            )
+            
+            scores.append(score)
+            all_findings.extend(findings)
+            
+        avg_score = sum(scores) / len(scores) if scores else 0.0
+        return avg_score, all_findings
+        
+    def score_input_validation(self) -> Tuple[float, List[str]]:
+        """
+        Score input validation quality.
+        
+        Evaluates scripts for:
+        - argparse usage for CLI validation
+        - Error handling patterns
+        - Type checking (isinstance)
+        - Regex validation
+        - Validation/sanitization functions
+        
+        Returns:
+            Tuple of (average_score, suggestions_list)
+        """
+        if not self.scripts:
+            return float(MAX_COMPONENT_SCORE), []
+            
+        scores = []
+        suggestions = []
+        
+        # Good validation patterns (each gives bonus points)
+        validation_patterns = [
+            (PATTERN_ARGPARSE, GOOD_PRACTICE_BONUS),
+            (PATTERN_TRY_EXCEPT, SAFE_PATTERN_BONUS),
+            (PATTERN_INPUT_CHECK, SAFE_PATTERN_BONUS),
+            (PATTERN_ISINSTANCE, SAFE_PATTERN_BONUS),
+            (PATTERN_ISDIGIT, SAFE_PATTERN_BONUS),
+            (PATTERN_REGEX_VALIDATION, SAFE_PATTERN_BONUS),
+            (PATTERN_VALIDATOR_CLASS, GOOD_PRACTICE_BONUS),
+            (PATTERN_VALIDATE_FUNC, SAFE_PATTERN_BONUS),
+            (PATTERN_SANITIZE_FUNC, SAFE_PATTERN_BONUS),
+        ]
+        
+        for script_path in self.scripts:
+            content = self._get_script_content(script_path)
+            if content is None:
+                continue
+                
+            score = BASE_SCORE_INPUT_VALIDATION
+            
+            # Check for validation patterns
+            for pattern, bonus in validation_patterns:
+                if pattern.search(content):
+                    score += bonus
+                    
+            scores.append(self._clamp_score(score))
+            
+        avg_score = sum(scores) / len(scores) if scores else 0.0
+        
+        if avg_score < 15:
+            suggestions.append("Add input validation with argparse, type checking, and error handling")
+            
+        return avg_score, suggestions
+        
+    def get_overall_score(self) -> Dict[str, Any]:
+        """
+        Calculate overall security score and return detailed results.
+        
+        Returns:
+            Dictionary containing:
+            - overall_score: Weighted average of all components
+            - components: Individual component scores
+            - findings: List of security issues found
+            - suggestions: Improvement suggestions
+        """
+        # Score each component
+        sensitive_score, sensitive_findings = self.score_sensitive_data_exposure()
+        file_ops_score, file_ops_findings = self.score_safe_file_operations()
+        command_injection_score, command_findings = self.score_command_injection_prevention()
+        input_validation_score, input_suggestions = self.score_input_validation()
+        
+        # Calculate overall score (equal weight: 25% each)
+        overall_score = (
+            sensitive_score * 0.25 +
+            file_ops_score * 0.25 +
+            command_injection_score * 0.25 +
+            input_validation_score * 0.25
+        )
+        
+        # Collect all findings
+        all_findings = sensitive_findings + file_ops_findings + command_findings
+        
+        # Generate suggestions based on findings
+        suggestions = input_suggestions.copy()
+        if sensitive_findings:
+            suggestions.append("Remove hardcoded credentials and use environment variables or secure config")
+        if file_ops_findings:
+            suggestions.append("Validate and sanitize file paths, use pathlib for safe path handling")
+        if command_findings:
+            suggestions.append("Avoid shell=True in subprocess, use shlex.quote for shell arguments")
+            
+        # Critical vulnerability check - if any critical issues, cap the score
+        critical_patterns = [
+            PATTERN_HARDCODED_PASSWORD, PATTERN_HARDCODED_API_KEY,
+            PATTERN_HARDCODED_PRIVATE_KEY, PATTERN_OS_SYSTEM,
+            PATTERN_EVAL, PATTERN_EXEC
+        ]
+        
+        has_critical = False
+        for script_path in self.scripts:
+            content = self._get_script_content(script_path)
+            if content is None:
+                continue
+            for pattern in critical_patterns:
+                if pattern.search(content):
+                    has_critical = True
+                    break
+            if has_critical:
+                break
+                
+        if has_critical:
+            overall_score = min(overall_score, 30)  # Cap at 30 if critical vulnerabilities exist
+            
+        return {
+            'overall_score': round(overall_score, 1),
+            'components': {
+                'sensitive_data_exposure': round(sensitive_score, 1),
+                'safe_file_operations': round(file_ops_score, 1),
+                'command_injection_prevention': round(command_injection_score, 1),
+                'input_validation': round(input_validation_score, 1),
+            },
+            'findings': all_findings,
+            'suggestions': suggestions,
+            'has_critical_vulnerabilities': has_critical,
+        }