From 2f92a1dfcb4d10159619f698ea405d6c91fabbea Mon Sep 17 00:00:00 2001 From: xingzihai <1315258019@qq.com> Date: Thu, 26 Mar 2026 13:25:27 +0000 Subject: [PATCH] feat(skill-tester): add Security dimension to quality scoring system MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add SecurityScorer module (605 lines) with comprehensive security assessment - Add 4 security scoring components: - Sensitive data exposure prevention (hardcoded credentials detection) - Safe file operations (path traversal prevention) - Command injection prevention (shell=True, eval, exec detection) - Input validation quality (argparse, error handling, type checking) - Add 53 unit tests with 850 lines of test code - Update quality_scorer.py to integrate Security dimension (20% weight) - Rebalance all dimensions from 25% to 20% (5 dimensions total) - Update tier requirements: - POWERFUL: Security ≥70 - STANDARD: Security ≥50 - BASIC: Security ≥40 - Update documentation (quality-scoring-rubric.md, tier-requirements-matrix.md) - Version bump to 2.0.0 This addresses the feedback from PR #420 by providing a focused, well-tested implementation of the Security dimension without bundling other changes. --- .../references/quality-scoring-rubric.md | 143 ++- .../references/tier-requirements-matrix.md | 53 +- .../skill-tester/scripts/quality_scorer.py | 80 +- .../skill-tester/scripts/security_scorer.py | 606 +++++++++++++ .../tests/test_security_scorer.py | 851 ++++++++++++++++++ 5 files changed, 1704 insertions(+), 29 deletions(-) create mode 100644 engineering/skill-tester/scripts/security_scorer.py create mode 100644 engineering/skill-tester/tests/test_security_scorer.py diff --git a/engineering/skill-tester/references/quality-scoring-rubric.md b/engineering/skill-tester/references/quality-scoring-rubric.md index 0d573b9..00ed0b6 100644 --- a/engineering/skill-tester/references/quality-scoring-rubric.md +++ b/engineering/skill-tester/references/quality-scoring-rubric.md @@ -1,12 +1,12 @@ # Quality Scoring Rubric -**Version**: 1.0.0 -**Last Updated**: 2026-02-16 +**Version**: 2.0.0 +**Last Updated**: 2026-03-26 **Authority**: Claude Skills Engineering Team ## Overview -This document defines the comprehensive quality scoring methodology used to assess skills within the claude-skills ecosystem. The scoring system evaluates four key dimensions, each weighted equally at 25%, to provide an objective and consistent measure of skill quality. +This document defines the comprehensive quality scoring methodology used to assess skills within the claude-skills ecosystem. The scoring system evaluates five key dimensions, each weighted equally at 20%, to provide an objective and consistent measure of skill quality. ## Scoring Framework @@ -25,12 +25,13 @@ This document defines the comprehensive quality scoring methodology used to asse ### Dimension Weights Each dimension contributes equally to the overall score: -- **Documentation Quality**: 25% -- **Code Quality**: 25% -- **Completeness**: 25% -- **Usability**: 25% +- **Documentation Quality**: 20% +- **Code Quality**: 20% +- **Completeness**: 20% +- **Security**: 20% +- **Usability**: 20% -## Documentation Quality (25% Weight) +## Documentation Quality (20% Weight) ### Scoring Components @@ -76,7 +77,7 @@ Each dimension contributes equally to the overall score: - **Poor (40-59)**: 1-2 minimal examples - **Failing (0-39)**: No examples or unclear usage -## Code Quality (25% Weight) +## Code Quality (20% Weight) ### Scoring Components @@ -142,7 +143,7 @@ Each dimension contributes equally to the overall score: - **Poor (40-59)**: Basic output, formatting issues - **Failing (0-39)**: Poor or no structured output -## Completeness (25% Weight) +## Completeness (20% Weight) ### Scoring Components @@ -202,7 +203,7 @@ Structure Score = (Required Present / Required Total) * 0.6 + - **Poor (40-59)**: Minimal testing support - **Failing (0-39)**: No testing or validation capability -## Usability (25% Weight) +## Usability (20% Weight) ### Scoring Components @@ -379,6 +380,126 @@ def assign_letter_grade(overall_score): - Create beginner-friendly tutorials - Add interactive examples +## Security (20% Weight) + +### Overview +The Security dimension evaluates Python scripts for security vulnerabilities and best practices. This dimension is critical for ensuring that skills do not introduce security risks into the claude-skills ecosystem. + +### Scoring Components + +#### Sensitive Data Exposure Prevention (25% of Security Score) +**Component Breakdown:** +- **Hardcoded Credentials Detection**: Passwords, API keys, tokens, secrets +- **AWS Credential Detection**: Access keys and secret keys +- **Private Key Detection**: RSA, SSH, and other private keys +- **JWT Token Detection**: JSON Web Tokens in code + +**Scoring Criteria:** + +| Score Range | Criteria | +|-------------|----------| +| 90-100 | No hardcoded credentials, uses environment variables properly | +| 75-89 | Minor issues (e.g., placeholder values that aren't real secrets) | +| 60-74 | One or two low-severity issues | +| 40-59 | Multiple medium-severity issues | +| Below 40 | Critical hardcoded secrets detected | + +#### Safe File Operations (25% of Security Score) +**Component Breakdown:** +- **Path Traversal Detection**: `../`, URL-encoded variants, Unicode variants +- **String Concatenation Risks**: `open(path + user_input)` +- **Null Byte Injection**: `%00`, `\x00` +- **Safe Pattern Usage**: `pathlib.Path`, `os.path.basename` + +**Scoring Criteria:** + +| Score Range | Criteria | +|-------------|----------| +| 90-100 | Uses pathlib/os.path safely, no path traversal vulnerabilities | +| 75-89 | Minor issues, uses safe patterns mostly | +| 60-74 | Some path concatenation with user input | +| 40-59 | Path traversal patterns detected | +| Below 40 | Critical vulnerabilities present | + +#### Command Injection Prevention (25% of Security Score) +**Component Breakdown:** +- **Dangerous Functions**: `os.system()`, `eval()`, `exec()`, `subprocess` with `shell=True` +- **Safe Alternatives**: `subprocess.run(args, shell=False)`, `shlex.quote()`, `shlex.split()` + +**Scoring Criteria:** + +| Score Range | Criteria | +|-------------|----------| +| 90-100 | No command injection risks, uses subprocess safely | +| 75-89 | Minor issues, mostly safe patterns | +| 60-74 | Some use of shell=True or eval with safe context | +| 40-59 | Command injection patterns detected | +| Below 40 | Critical vulnerabilities (unfiltered user input to shell) | + +#### Input Validation Quality (25% of Security Score) +**Component Breakdown:** +- **Argparse Usage**: CLI argument validation +- **Type Checking**: `isinstance()`, type hints +- **Error Handling**: `try/except` blocks +- **Input Sanitization**: Regex validation, input cleaning + +**Scoring Criteria:** + +| Score Range | Criteria | +|-------------|----------| +| 90-100 | Comprehensive input validation, proper error handling | +| 75-89 | Good validation coverage, most inputs checked | +| 60-74 | Basic validation present | +| 40-59 | Minimal input validation | +| Below 40 | No input validation | + +### Security Best Practices + +**Recommended Patterns:** +```python +# Use environment variables for secrets +import os +password = os.environ.get("PASSWORD") + +# Use pathlib for safe path operations +from pathlib import Path +safe_path = Path(base_dir) / user_input + +# Use subprocess safely +import subprocess +result = subprocess.run(["ls", user_input], capture_output=True) + +# Use shlex for shell argument safety +import shlex +safe_arg = shlex.quote(user_input) +``` + +**Patterns to Avoid:** +```python +# Don't hardcode secrets +password = "my_secret_password" # BAD + +# Don't use string concatenation for paths +open(base_path + "/" + user_input) # BAD + +# Don't use shell=True with user input +os.system(f"ls {user_input}") # BAD + +# Don't use eval on user input +eval(user_input) # VERY BAD +``` + +### Security Score Impact on Tiers +- **POWERFUL Tier**: Requires Security score ≥ 70 +- **STANDARD Tier**: Requires Security score ≥ 50 +- **BASIC Tier**: No minimum Security requirement + +#### Low Security Scores +- Remove hardcoded credentials, use environment variables +- Fix path traversal vulnerabilities +- Replace dangerous functions with safe alternatives +- Add input validation and error handling + ## Quality Assurance Process ### Automated Scoring diff --git a/engineering/skill-tester/references/tier-requirements-matrix.md b/engineering/skill-tester/references/tier-requirements-matrix.md index eba0445..871d776 100644 --- a/engineering/skill-tester/references/tier-requirements-matrix.md +++ b/engineering/skill-tester/references/tier-requirements-matrix.md @@ -1,7 +1,7 @@ # Tier Requirements Matrix -**Version**: 1.0.0 -**Last Updated**: 2026-02-16 +**Version**: 2.0.0 +**Last Updated**: 2026-03-26 **Authority**: Claude Skills Engineering Team ## Overview @@ -33,6 +33,9 @@ Advanced skills that provide comprehensive functionality with sophisticated impl | **Documentation Depth** | Functional | Comprehensive | Expert-level | | **Examples Provided** | ≥1 | ≥3 | ≥5 | | **Test Coverage** | Basic validation | Sample data testing | Comprehensive test suite | +| **Security Score** | ≥40 | ≥50 | ≥70 | +| **Hardcoded Secrets** | None | None | None | +| **Input Validation** | Basic | Comprehensive | Advanced with sanitization | ## Detailed Requirements by Tier @@ -65,6 +68,13 @@ Advanced skills that provide comprehensive functionality with sophisticated impl - **Usability**: Clear usage instructions and examples - **Completeness**: All essential components present +#### Security Requirements +- **Security Score**: Minimum 40/100 +- **Hardcoded Secrets**: No hardcoded passwords, API keys, or tokens +- **Input Validation**: Basic validation for user inputs +- **Error Handling**: User-friendly error messages without exposing sensitive info +- **Safe Patterns**: Avoid obvious security anti-patterns + ### STANDARD Tier Requirements #### Documentation Requirements @@ -96,6 +106,14 @@ Advanced skills that provide comprehensive functionality with sophisticated impl - **Testing**: Sample data processing with validation - **Integration**: Consideration for CI/CD and automation use +#### Security Requirements +- **Security Score**: Minimum 50/100 +- **Hardcoded Secrets**: No hardcoded credentials (zero tolerance) +- **Input Validation**: Comprehensive validation with error messages +- **File Operations**: Safe path handling, no path traversal vulnerabilities +- **Command Execution**: No shell injection risks, safe subprocess usage +- **Security Patterns**: Use of environment variables for secrets + ### POWERFUL Tier Requirements #### Documentation Requirements @@ -128,6 +146,19 @@ Advanced skills that provide comprehensive functionality with sophisticated impl - **Integration**: Full CI/CD integration capabilities - **Maintainability**: Designed for long-term maintenance and extension +#### Security Requirements +- **Security Score**: Minimum 70/100 +- **Hardcoded Secrets**: Zero tolerance for hardcoded credentials +- **Input Validation**: Advanced validation with sanitization and type checking +- **File Operations**: All file operations use safe patterns (pathlib, validation) +- **Command Execution**: All subprocess calls use safe patterns (no shell=True) +- **Security Patterns**: Comprehensive security practices including: + - Environment variables for all secrets + - Input sanitization for all user inputs + - Safe deserialization practices + - Secure error handling without info leakage +- **Security Documentation**: Security considerations documented in code and docs + ## Tier Assessment Criteria ### Automatic Tier Classification @@ -299,15 +330,16 @@ except Exception as e: ## Quality Scoring by Tier ### Scoring Thresholds -- **POWERFUL Tier**: Overall score ≥80, all dimensions ≥75 -- **STANDARD Tier**: Overall score ≥70, 3+ dimensions ≥65 -- **BASIC Tier**: Overall score ≥60, meets minimum requirements +- **POWERFUL Tier**: Overall score ≥80, all dimensions ≥75, Security ≥70 +- **STANDARD Tier**: Overall score ≥70, 4+ dimensions ≥65, Security ≥50 +- **BASIC Tier**: Overall score ≥60, meets minimum requirements, Security ≥40 ### Dimension Weights (All Tiers) -- **Documentation**: 25% -- **Code Quality**: 25% -- **Completeness**: 25% -- **Usability**: 25% +- **Documentation**: 20% +- **Code Quality**: 20% +- **Completeness**: 20% +- **Security**: 20% +- **Usability**: 20% ### Tier-Specific Quality Expectations @@ -315,18 +347,21 @@ except Exception as e: - Documentation: Functional and clear (60+ points expected) - Code Quality: Clean and maintainable (60+ points expected) - Completeness: Essential components present (60+ points expected) +- Security: Basic security practices (40+ points expected) - Usability: Easy to understand and use (60+ points expected) #### STANDARD Tier Quality Profile - Documentation: Professional and comprehensive (70+ points expected) - Code Quality: Advanced patterns and best practices (70+ points expected) - Completeness: All recommended components (70+ points expected) +- Security: Good security practices, no hardcoded secrets (50+ points expected) - Usability: Well-designed user experience (70+ points expected) #### POWERFUL Tier Quality Profile - Documentation: Expert-level and publication-ready (80+ points expected) - Code Quality: Enterprise-grade implementation (80+ points expected) - Completeness: Comprehensive test and validation coverage (80+ points expected) +- Security: Advanced security practices, comprehensive validation (70+ points expected) - Usability: Exceptional user experience with extensive help (80+ points expected) ## Tier Migration Process diff --git a/engineering/skill-tester/scripts/quality_scorer.py b/engineering/skill-tester/scripts/quality_scorer.py index e687a58..62acc60 100644 --- a/engineering/skill-tester/scripts/quality_scorer.py +++ b/engineering/skill-tester/scripts/quality_scorer.py @@ -3,15 +3,18 @@ Quality Scorer - Scores skills across multiple quality dimensions This script provides comprehensive quality assessment for skills in the claude-skills -ecosystem by evaluating documentation, code quality, completeness, and usability. +ecosystem by evaluating documentation, code quality, completeness, security, and usability. Generates letter grades, tier recommendations, and improvement roadmaps. Usage: python quality_scorer.py [--detailed] [--minimum-score SCORE] [--json] Author: Claude Skills Engineering Team -Version: 1.0.0 +Version: 2.0.0 Dependencies: Python Standard Library Only +Changelog: + v2.0.0 - Added Security dimension (20% weight), rebalanced all dimensions to 20% + v1.0.0 - Initial release with 4 dimensions (25% each) """ import argparse @@ -23,6 +26,9 @@ import sys from datetime import datetime from pathlib import Path from typing import Dict, List, Any, Optional, Tuple + +# Import Security Scorer module +from security_scorer import SecurityScorer try: import yaml except ImportError: @@ -148,15 +154,18 @@ class QualityReport: code_score = self.dimensions.get("Code Quality", QualityDimension("", 0, "")).score completeness_score = self.dimensions.get("Completeness", QualityDimension("", 0, "")).score usability_score = self.dimensions.get("Usability", QualityDimension("", 0, "")).score + security_score = self.dimensions.get("Security", QualityDimension("", 0, "")).score # POWERFUL tier requirements (all dimensions must be strong) if (self.overall_score >= 80 and - all(score >= 75 for score in [doc_score, code_score, completeness_score, usability_score])): + all(score >= 75 for score in [doc_score, code_score, completeness_score, usability_score]) and + security_score >= 70): self.tier_recommendation = "POWERFUL" # STANDARD tier requirements (most dimensions good) elif (self.overall_score >= 70 and - sum(1 for score in [doc_score, code_score, completeness_score, usability_score] if score >= 65) >= 3): + sum(1 for score in [doc_score, code_score, completeness_score, usability_score, security_score] if score >= 65) >= 4 and + security_score >= 50): self.tier_recommendation = "STANDARD" # BASIC tier (minimum viable quality) @@ -220,10 +229,11 @@ class QualityScorer: if not self.skill_path.exists(): raise ValueError(f"Skill path does not exist: {self.skill_path}") - # Score each dimension + # Score each dimension (20% weight each, 5 dimensions total) self._score_documentation() self._score_code_quality() self._score_completeness() + self._score_security() self._score_usability() # Calculate overall metrics @@ -241,7 +251,7 @@ class QualityScorer: """Score documentation quality (25% weight)""" self.log_verbose("Scoring documentation quality...") - dimension = QualityDimension("Documentation", 0.25, "Quality of documentation and written materials") + dimension = QualityDimension("Documentation", 0.20, "Quality of documentation and written materials") # Score SKILL.md self._score_skill_md(dimension) @@ -495,7 +505,7 @@ class QualityScorer: """Score code quality (25% weight)""" self.log_verbose("Scoring code quality...") - dimension = QualityDimension("Code Quality", 0.25, "Quality of Python scripts and implementation") + dimension = QualityDimension("Code Quality", 0.20, "Quality of Python scripts and implementation") scripts_dir = self.skill_path / "scripts" if not scripts_dir.exists(): @@ -682,7 +692,7 @@ class QualityScorer: """Score completeness (25% weight)""" self.log_verbose("Scoring completeness...") - dimension = QualityDimension("Completeness", 0.25, "Completeness of required components and assets") + dimension = QualityDimension("Completeness", 0.20, "Completeness of required components and assets") # Score directory structure self._score_directory_structure(dimension) @@ -804,7 +814,7 @@ class QualityScorer: """Score usability (25% weight)""" self.log_verbose("Scoring usability...") - dimension = QualityDimension("Usability", 0.25, "Ease of use and user experience") + dimension = QualityDimension("Usability", 0.20, "Ease of use and user experience") # Score installation simplicity self._score_installation(dimension) @@ -936,6 +946,58 @@ class QualityScorer: dimension.add_score("practical_examples", score, 25, f"Practical examples: {len(example_files)} files") + def _score_security(self): + """Score security quality (20% weight)""" + self.log_verbose("Scoring security quality...") + + dimension = QualityDimension("Security", 0.20, "Security practices and vulnerability prevention") + + # Find Python scripts + python_files = list(self.skill_path.rglob("*.py")) + + # Filter out test files and __pycache__ + python_files = [f for f in python_files + if "__pycache__" not in str(f) and "test_" not in f.name] + + if not python_files: + dimension.add_score("scripts_existence", 25, 25, + "No scripts directory - no script security concerns") + dimension.calculate_final_score() + self.report.add_dimension(dimension) + return + + # Use SecurityScorer module + try: + scorer = SecurityScorer(python_files, verbose=self.verbose) + result = scorer.get_overall_score() + + # Extract scores from SecurityScorer result + sensitive_data_score = result.get("sensitive_data_exposure", {}).get("score", 0) + file_ops_score = result.get("safe_file_operations", {}).get("score", 0) + command_injection_score = result.get("command_injection_prevention", {}).get("score", 0) + input_validation_score = result.get("input_validation", {}).get("score", 0) + + dimension.add_score("sensitive_data_exposure", sensitive_data_score, 25, + "Detection and prevention of hardcoded credentials") + dimension.add_score("safe_file_operations", file_ops_score, 25, + "Prevention of path traversal vulnerabilities") + dimension.add_score("command_injection_prevention", command_injection_score, 25, + "Prevention of command injection vulnerabilities") + dimension.add_score("input_validation", input_validation_score, 25, + "Quality of input validation and error handling") + + # Add suggestions from SecurityScorer + for issue in result.get("issues", []): + dimension.add_suggestion(issue) + + except Exception as e: + self.log_verbose(f"Security scoring failed: {str(e)}") + dimension.add_score("security_error", 0, 100, f"Security scoring failed: {str(e)}") + dimension.add_suggestion("Fix security scoring module integration") + + dimension.calculate_final_score() + self.report.add_dimension(dimension) + class QualityReportFormatter: """Formats quality reports for output""" diff --git a/engineering/skill-tester/scripts/security_scorer.py b/engineering/skill-tester/scripts/security_scorer.py new file mode 100644 index 0000000..fae982f --- /dev/null +++ b/engineering/skill-tester/scripts/security_scorer.py @@ -0,0 +1,606 @@ +#!/usr/bin/env python3 +""" +Security Scorer - Security dimension scoring module + +This module provides comprehensive security assessment for Python scripts, +evaluating sensitive data exposure, safe file operations, command injection +prevention, and input validation quality. + +Author: Claude Skills Engineering Team +Version: 2.0.0 +""" + +import re +from pathlib import Path +from typing import Dict, List, Tuple, Optional, Any + +# ============================================================================= +# CONSTANTS - Scoring thresholds and weights +# ============================================================================= + +# Maximum score per component (25 points each, 4 components = 100 total) +MAX_COMPONENT_SCORE: int = 25 + +# Minimum score floor (never go below 0) +MIN_SCORE: int = 0 + +# Security score thresholds for tier recommendations +SECURITY_SCORE_POWERFUL_TIER: int = 70 # Required for POWERFUL tier +SECURITY_SCORE_STANDARD_TIER: int = 50 # Required for STANDARD tier + +# Scoring modifiers (magic numbers replaced with named constants) +BASE_SCORE_SENSITIVE_DATA: int = 25 # Start with full points +BASE_SCORE_FILE_OPS: int = 15 # Base score for file operations +BASE_SCORE_COMMAND_INJECTION: int = 25 # Start with full points +BASE_SCORE_INPUT_VALIDATION: int = 10 # Base score for input validation + +# Penalty amounts (negative scoring) +CRITICAL_VULNERABILITY_PENALTY: int = -25 # Critical issues (hardcoded passwords, etc.) +HIGH_SEVERITY_PENALTY: int = -10 # High severity issues +MEDIUM_SEVERITY_PENALTY: int = -5 # Medium severity issues +LOW_SEVERITY_PENALTY: int = -2 # Low severity issues + +# Bonus amounts (positive scoring) +SAFE_PATTERN_BONUS: int = 2 # Bonus for using safe patterns +GOOD_PRACTICE_BONUS: int = 3 # Bonus for good security practices + +# ============================================================================= +# PRE-COMPILED REGEX PATTERNS - Sensitive Data Detection +# ============================================================================= + +# Hardcoded credentials patterns (CRITICAL severity) +PATTERN_HARDCODED_PASSWORD = re.compile( + r'password\s*=\s*["\'][^"\']{4,}["\']', + re.IGNORECASE +) +PATTERN_HARDCODED_API_KEY = re.compile( + r'api_key\s*=\s*["\'][^"\']{8,}["\']', + re.IGNORECASE +) +PATTERN_HARDCODED_SECRET = re.compile( + r'secret\s*=\s*["\'][^"\']{4,}["\']', + re.IGNORECASE +) +PATTERN_HARDCODED_TOKEN = re.compile( + r'token\s*=\s*["\'][^"\']{8,}["\']', + re.IGNORECASE +) +PATTERN_HARDCODED_PRIVATE_KEY = re.compile( + r'private_key\s*=\s*["\'][^"\']{20,}["\']', + re.IGNORECASE +) +PATTERN_HARDCODED_AWS_KEY = re.compile( + r'aws_access_key\s*=\s*["\'][^"\']{16,}["\']', + re.IGNORECASE +) +PATTERN_HARDCODED_AWS_SECRET = re.compile( + r'aws_secret\s*=\s*["\'][^"\']{20,}["\']', + re.IGNORECASE +) + +# Multi-line string patterns (CRITICAL severity) +PATTERN_MULTILINE_STRING = re.compile( + r'["\']{3}[^"\']*?(?:password|api_key|secret|token|private_key)[^"\']*?["\']{3}', + re.IGNORECASE | re.DOTALL +) + +# F-string patterns (HIGH severity) +PATTERN_FSTRING_SENSITIVE = re.compile( + r'f["\'].*?(?:password|api_key|secret|token)\s*=', + re.IGNORECASE +) + +# Base64 encoded secrets (MEDIUM severity) +PATTERN_BASE64_SECRET = re.compile( + r'(?:base64|b64encode|b64decode)\s*\([^)]*(?:password|api_key|secret|token)', + re.IGNORECASE +) + +# JWT tokens (HIGH severity) +PATTERN_JWT_TOKEN = re.compile( + r'eyJ[a-zA-Z0-9_-]*\.eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*' +) + +# Connection strings (HIGH severity) +PATTERN_CONNECTION_STRING = re.compile( + r'(?:connection_string|conn_string|database_url)\s*=\s*["\'][^"\']*(?:password|pwd|passwd)[^"\']*["\']', + re.IGNORECASE +) + +# Safe credential patterns (environment variables are OK) +PATTERN_SAFE_ENV_VAR = re.compile( + r'os\.(?:getenv|environ)\s*\(\s*["\'][^"\']+["\']', + re.IGNORECASE +) + +# ============================================================================= +# PRE-COMPILED REGEX PATTERNS - Path Traversal Detection +# ============================================================================= + +# Basic path traversal patterns +PATTERN_PATH_TRAVERSAL_BASIC = re.compile(r'\.\.\/') +PATTERN_PATH_TRAVERSAL_WINDOWS = re.compile(r'\.\.\\') + +# URL encoded path traversal (MEDIUM severity) +PATTERN_PATH_TRAVERSAL_URL_ENCODED = re.compile( + r'%2e%2e%2f|%252e%252e%252f|\.\.%2f', + re.IGNORECASE +) + +# Unicode encoded path traversal (MEDIUM severity) +PATTERN_PATH_TRAVERSAL_UNICODE = re.compile( + r'\\u002e\\u002e|\\uff0e\\uff0e|\u002e\u002e\/', + re.IGNORECASE +) + +# Null byte injection (HIGH severity) +PATTERN_NULL_BYTE = re.compile(r'%00|\\x00|\0') + +# Risky file operation patterns +PATTERN_PATH_CONCAT = re.compile( + r'open\s*\(\s*[^)]*\+', + re.IGNORECASE +) +PATTERN_USER_INPUT_PATH = re.compile( + r'\.join\s*\(\s*[^)]*input|os\.path\.join\s*\([^)]*request', + re.IGNORECASE +) + +# Safe file operation patterns +PATTERN_SAFE_BASENAME = re.compile(r'os\.path\.basename', re.IGNORECASE) +PATTERN_SAFE_PATHLIB = re.compile(r'pathlib\.Path\s*\(', re.IGNORECASE) +PATTERN_PATH_VALIDATION = re.compile(r'validate.*path', re.IGNORECASE) +PATTERN_PATH_RESOLVE = re.compile(r'\.resolve\s*\(', re.IGNORECASE) + +# ============================================================================= +# PRE-COMPILED REGEX PATTERNS - Command Injection Detection +# ============================================================================= + +# Dangerous patterns (CRITICAL severity) +PATTERN_OS_SYSTEM = re.compile(r'os\.system\s*\(') +PATTERN_OS_POPEN = re.compile(r'os\.popen\s*\(') +PATTERN_EVAL = re.compile(r'eval\s*\(') +PATTERN_EXEC = re.compile(r'exec\s*\(') + +# Subprocess with shell=True (HIGH severity) +PATTERN_SUBPROCESS_SHELL_TRUE = re.compile( + r'subprocess\.(?:call|run|Popen|check_output)\s*\([^)]*shell\s*=\s*True', + re.IGNORECASE +) + +# Asyncio subprocess shell (HIGH severity) +PATTERN_ASYNCIO_SHELL = re.compile( + r'asyncio\.create_subprocess_shell\s*\(', + re.IGNORECASE +) + +# Pexpect spawn (HIGH severity) +PATTERN_PEXPECT_SPAWN = re.compile(r'pexpect\.spawn\s*\(', re.IGNORECASE) + +# Safe subprocess patterns +PATTERN_SAFE_SUBPROCESS = re.compile( + r'subprocess\.(?:run|call|Popen)\s*\([^)]*shell\s*=\s*False', + re.IGNORECASE +) +PATTERN_SHLEX_QUOTE = re.compile(r'shlex\.quote', re.IGNORECASE) +PATTERN_SHLEX_SPLIT = re.compile(r'shlex\.split', re.IGNORECASE) + +# ============================================================================= +# PRE-COMPILED REGEX PATTERNS - Input Validation Detection +# ============================================================================= + +# Good validation patterns +PATTERN_ARGPARSE = re.compile(r'argparse') +PATTERN_TRY_EXCEPT = re.compile(r'try\s*:[\s\S]*?except\s+\w*Error') +PATTERN_INPUT_CHECK = re.compile(r'if\s+not\s+\w+\s*:') +PATTERN_ISINSTANCE = re.compile(r'isinstance\s*\(') +PATTERN_ISDIGIT = re.compile(r'\.isdigit\s*\(\)') +PATTERN_REGEX_VALIDATION = re.compile(r're\.(?:match|search|fullmatch)\s*\(') +PATTERN_VALIDATOR_CLASS = re.compile(r'Validator', re.IGNORECASE) +PATTERN_VALIDATE_FUNC = re.compile(r'validate', re.IGNORECASE) +PATTERN_SANITIZE_FUNC = re.compile(r'sanitize', re.IGNORECASE) + + +class SecurityScorer: + """ + Security dimension scoring engine. + + This class evaluates Python scripts for security vulnerabilities and best practices + across four components: + 1. Sensitive Data Exposure Prevention (25% of security score) + 2. Safe File Operations (25% of security score) + 3. Command Injection Prevention (25% of security score) + 4. Input Validation Quality (25% of security score) + + Attributes: + scripts: List of Python script paths to evaluate + verbose: Whether to output verbose logging + """ + + def __init__(self, scripts: List[Path], verbose: bool = False): + """ + Initialize the SecurityScorer. + + Args: + scripts: List of Path objects pointing to Python scripts + verbose: Enable verbose output for debugging + """ + self.scripts = scripts + self.verbose = verbose + self._findings: List[str] = [] + + def _log_verbose(self, message: str) -> None: + """Log verbose message if verbose mode is enabled.""" + if self.verbose: + print(f"[SECURITY] {message}") + + def _get_script_content(self, script_path: Path) -> Optional[str]: + """ + Safely read script content. + + Args: + script_path: Path to the Python script + + Returns: + Script content as string, or None if read fails + """ + try: + return script_path.read_text(encoding='utf-8') + except Exception as e: + self._log_verbose(f"Failed to read {script_path}: {e}") + return None + + def _clamp_score(self, score: int) -> int: + """ + Clamp score to valid range [MIN_SCORE, MAX_COMPONENT_SCORE]. + + Args: + score: Raw score value + + Returns: + Score clamped to valid range + """ + return max(MIN_SCORE, min(score, MAX_COMPONENT_SCORE)) + + def _score_patterns( + self, + content: str, + script_name: str, + dangerous_patterns: List[Tuple[re.Pattern, str, int]], + safe_patterns: List[Tuple[re.Pattern, str, int]], + base_score: int + ) -> Tuple[int, List[str]]: + """ + Generic pattern scoring method. + + This method evaluates a script against lists of dangerous and safe patterns, + applying penalties for dangerous patterns found and bonuses for safe patterns. + + Args: + content: Script content to analyze + script_name: Name of the script (for findings) + dangerous_patterns: List of (pattern, description, penalty) tuples + safe_patterns: List of (pattern, description, bonus) tuples + base_score: Starting score before adjustments + + Returns: + Tuple of (final_score, findings_list) + """ + score = base_score + findings = [] + + # Check for dangerous patterns + for pattern, description, penalty in dangerous_patterns: + matches = pattern.findall(content) + if matches: + score += penalty # Penalty is negative + findings.append(f"{script_name}: {description} ({len(matches)} occurrence(s))") + + # Check for safe patterns + for pattern, description, bonus in safe_patterns: + if pattern.search(content): + score += bonus + self._log_verbose(f"Safe pattern found in {script_name}: {description}") + + return self._clamp_score(score), findings + + def score_sensitive_data_exposure(self) -> Tuple[float, List[str]]: + """ + Score sensitive data exposure prevention. + + Evaluates scripts for: + - Hardcoded passwords, API keys, secrets, tokens, private keys + - Multi-line string credentials + - F-string sensitive data + - Base64 encoded secrets + - JWT tokens + - Connection strings with credentials + + Returns: + Tuple of (average_score, findings_list) + """ + if not self.scripts: + return float(MAX_COMPONENT_SCORE), [] + + scores = [] + all_findings = [] + + # Define dangerous patterns with severity-based penalties + dangerous_patterns = [ + (PATTERN_HARDCODED_PASSWORD, 'hardcoded password', CRITICAL_VULNERABILITY_PENALTY), + (PATTERN_HARDCODED_API_KEY, 'hardcoded API key', CRITICAL_VULNERABILITY_PENALTY), + (PATTERN_HARDCODED_SECRET, 'hardcoded secret', CRITICAL_VULNERABILITY_PENALTY), + (PATTERN_HARDCODED_TOKEN, 'hardcoded token', CRITICAL_VULNERABILITY_PENALTY), + (PATTERN_HARDCODED_PRIVATE_KEY, 'hardcoded private key', CRITICAL_VULNERABILITY_PENALTY), + (PATTERN_HARDCODED_AWS_KEY, 'hardcoded AWS key', CRITICAL_VULNERABILITY_PENALTY), + (PATTERN_HARDCODED_AWS_SECRET, 'hardcoded AWS secret', CRITICAL_VULNERABILITY_PENALTY), + (PATTERN_MULTILINE_STRING, 'multi-line string credential', CRITICAL_VULNERABILITY_PENALTY), + (PATTERN_FSTRING_SENSITIVE, 'f-string sensitive data', HIGH_SEVERITY_PENALTY), + (PATTERN_BASE64_SECRET, 'base64 encoded secret', MEDIUM_SEVERITY_PENALTY), + (PATTERN_JWT_TOKEN, 'JWT token in code', HIGH_SEVERITY_PENALTY), + (PATTERN_CONNECTION_STRING, 'connection string with credentials', HIGH_SEVERITY_PENALTY), + ] + + # Safe patterns get bonus points + safe_patterns = [ + (PATTERN_SAFE_ENV_VAR, 'safe environment variable usage', SAFE_PATTERN_BONUS), + ] + + for script_path in self.scripts: + content = self._get_script_content(script_path) + if content is None: + continue + + score, findings = self._score_patterns( + content=content, + script_name=script_path.name, + dangerous_patterns=dangerous_patterns, + safe_patterns=safe_patterns, + base_score=BASE_SCORE_SENSITIVE_DATA + ) + + scores.append(score) + all_findings.extend(findings) + + avg_score = sum(scores) / len(scores) if scores else 0.0 + return avg_score, all_findings + + def score_safe_file_operations(self) -> Tuple[float, List[str]]: + """ + Score safe file operations. + + Evaluates scripts for: + - Path traversal vulnerabilities (basic, URL-encoded, Unicode, null bytes) + - Unsafe path construction + - Safe patterns (pathlib, basename, validation) + + Returns: + Tuple of (average_score, findings_list) + """ + if not self.scripts: + return float(MAX_COMPONENT_SCORE), [] + + scores = [] + all_findings = [] + + # Dangerous patterns with severity-based penalties + dangerous_patterns = [ + (PATTERN_PATH_TRAVERSAL_BASIC, 'basic path traversal', HIGH_SEVERITY_PENALTY), + (PATTERN_PATH_TRAVERSAL_WINDOWS, 'Windows-style path traversal', HIGH_SEVERITY_PENALTY), + (PATTERN_PATH_TRAVERSAL_URL_ENCODED, 'URL-encoded path traversal', HIGH_SEVERITY_PENALTY), + (PATTERN_PATH_TRAVERSAL_UNICODE, 'Unicode-encoded path traversal', HIGH_SEVERITY_PENALTY), + (PATTERN_NULL_BYTE, 'null byte injection', HIGH_SEVERITY_PENALTY), + (PATTERN_PATH_CONCAT, 'potential path injection via concatenation', MEDIUM_SEVERITY_PENALTY), + (PATTERN_USER_INPUT_PATH, 'user input in path construction', MEDIUM_SEVERITY_PENALTY), + ] + + # Safe patterns get bonus points + safe_patterns = [ + (PATTERN_SAFE_BASENAME, 'uses basename for safety', SAFE_PATTERN_BONUS), + (PATTERN_SAFE_PATHLIB, 'uses pathlib', SAFE_PATTERN_BONUS), + (PATTERN_PATH_VALIDATION, 'path validation', SAFE_PATTERN_BONUS), + (PATTERN_PATH_RESOLVE, 'path resolution', SAFE_PATTERN_BONUS), + ] + + for script_path in self.scripts: + content = self._get_script_content(script_path) + if content is None: + continue + + score, findings = self._score_patterns( + content=content, + script_name=script_path.name, + dangerous_patterns=dangerous_patterns, + safe_patterns=safe_patterns, + base_score=BASE_SCORE_FILE_OPS + ) + + scores.append(score) + all_findings.extend(findings) + + avg_score = sum(scores) / len(scores) if scores else 0.0 + return avg_score, all_findings + + def score_command_injection_prevention(self) -> Tuple[float, List[str]]: + """ + Score command injection prevention. + + Evaluates scripts for: + - os.system(), os.popen() usage + - subprocess with shell=True + - eval(), exec() usage + - asyncio.create_subprocess_shell() + - pexpect.spawn() + - Safe patterns (shlex.quote, shell=False) + + Returns: + Tuple of (average_score, findings_list) + """ + if not self.scripts: + return float(MAX_COMPONENT_SCORE), [] + + scores = [] + all_findings = [] + + # Dangerous patterns with severity-based penalties + dangerous_patterns = [ + (PATTERN_OS_SYSTEM, 'os.system usage - potential command injection', CRITICAL_VULNERABILITY_PENALTY), + (PATTERN_OS_POPEN, 'os.popen usage', HIGH_SEVERITY_PENALTY), + (PATTERN_EVAL, 'eval usage - code injection risk', CRITICAL_VULNERABILITY_PENALTY), + (PATTERN_EXEC, 'exec usage - code injection risk', CRITICAL_VULNERABILITY_PENALTY), + (PATTERN_SUBPROCESS_SHELL_TRUE, 'subprocess with shell=True', HIGH_SEVERITY_PENALTY), + (PATTERN_ASYNCIO_SHELL, 'asyncio.create_subprocess_shell()', HIGH_SEVERITY_PENALTY), + (PATTERN_PEXPECT_SPAWN, 'pexpect.spawn()', MEDIUM_SEVERITY_PENALTY), + ] + + # Safe patterns get bonus points + safe_patterns = [ + (PATTERN_SAFE_SUBPROCESS, 'safe subprocess usage (shell=False)', GOOD_PRACTICE_BONUS), + (PATTERN_SHLEX_QUOTE, 'shell escaping with shlex.quote', GOOD_PRACTICE_BONUS), + (PATTERN_SHLEX_SPLIT, 'safe argument splitting with shlex.split', GOOD_PRACTICE_BONUS), + ] + + for script_path in self.scripts: + content = self._get_script_content(script_path) + if content is None: + continue + + score, findings = self._score_patterns( + content=content, + script_name=script_path.name, + dangerous_patterns=dangerous_patterns, + safe_patterns=safe_patterns, + base_score=BASE_SCORE_COMMAND_INJECTION + ) + + scores.append(score) + all_findings.extend(findings) + + avg_score = sum(scores) / len(scores) if scores else 0.0 + return avg_score, all_findings + + def score_input_validation(self) -> Tuple[float, List[str]]: + """ + Score input validation quality. + + Evaluates scripts for: + - argparse usage for CLI validation + - Error handling patterns + - Type checking (isinstance) + - Regex validation + - Validation/sanitization functions + + Returns: + Tuple of (average_score, suggestions_list) + """ + if not self.scripts: + return float(MAX_COMPONENT_SCORE), [] + + scores = [] + suggestions = [] + + # Good validation patterns (each gives bonus points) + validation_patterns = [ + (PATTERN_ARGPARSE, GOOD_PRACTICE_BONUS), + (PATTERN_TRY_EXCEPT, SAFE_PATTERN_BONUS), + (PATTERN_INPUT_CHECK, SAFE_PATTERN_BONUS), + (PATTERN_ISINSTANCE, SAFE_PATTERN_BONUS), + (PATTERN_ISDIGIT, SAFE_PATTERN_BONUS), + (PATTERN_REGEX_VALIDATION, SAFE_PATTERN_BONUS), + (PATTERN_VALIDATOR_CLASS, GOOD_PRACTICE_BONUS), + (PATTERN_VALIDATE_FUNC, SAFE_PATTERN_BONUS), + (PATTERN_SANITIZE_FUNC, SAFE_PATTERN_BONUS), + ] + + for script_path in self.scripts: + content = self._get_script_content(script_path) + if content is None: + continue + + score = BASE_SCORE_INPUT_VALIDATION + + # Check for validation patterns + for pattern, bonus in validation_patterns: + if pattern.search(content): + score += bonus + + scores.append(self._clamp_score(score)) + + avg_score = sum(scores) / len(scores) if scores else 0.0 + + if avg_score < 15: + suggestions.append("Add input validation with argparse, type checking, and error handling") + + return avg_score, suggestions + + def get_overall_score(self) -> Dict[str, Any]: + """ + Calculate overall security score and return detailed results. + + Returns: + Dictionary containing: + - overall_score: Weighted average of all components + - components: Individual component scores + - findings: List of security issues found + - suggestions: Improvement suggestions + """ + # Score each component + sensitive_score, sensitive_findings = self.score_sensitive_data_exposure() + file_ops_score, file_ops_findings = self.score_safe_file_operations() + command_injection_score, command_findings = self.score_command_injection_prevention() + input_validation_score, input_suggestions = self.score_input_validation() + + # Calculate overall score (equal weight: 25% each) + overall_score = ( + sensitive_score * 0.25 + + file_ops_score * 0.25 + + command_injection_score * 0.25 + + input_validation_score * 0.25 + ) + + # Collect all findings + all_findings = sensitive_findings + file_ops_findings + command_findings + + # Generate suggestions based on findings + suggestions = input_suggestions.copy() + if sensitive_findings: + suggestions.append("Remove hardcoded credentials and use environment variables or secure config") + if file_ops_findings: + suggestions.append("Validate and sanitize file paths, use pathlib for safe path handling") + if command_findings: + suggestions.append("Avoid shell=True in subprocess, use shlex.quote for shell arguments") + + # Critical vulnerability check - if any critical issues, cap the score + critical_patterns = [ + PATTERN_HARDCODED_PASSWORD, PATTERN_HARDCODED_API_KEY, + PATTERN_HARDCODED_PRIVATE_KEY, PATTERN_OS_SYSTEM, + PATTERN_EVAL, PATTERN_EXEC + ] + + has_critical = False + for script_path in self.scripts: + content = self._get_script_content(script_path) + if content is None: + continue + for pattern in critical_patterns: + if pattern.search(content): + has_critical = True + break + if has_critical: + break + + if has_critical: + overall_score = min(overall_score, 30) # Cap at 30 if critical vulnerabilities exist + + return { + 'overall_score': round(overall_score, 1), + 'components': { + 'sensitive_data_exposure': round(sensitive_score, 1), + 'safe_file_operations': round(file_ops_score, 1), + 'command_injection_prevention': round(command_injection_score, 1), + 'input_validation': round(input_validation_score, 1), + }, + 'findings': all_findings, + 'suggestions': suggestions, + 'has_critical_vulnerabilities': has_critical, + } \ No newline at end of file diff --git a/engineering/skill-tester/tests/test_security_scorer.py b/engineering/skill-tester/tests/test_security_scorer.py new file mode 100644 index 0000000..e7f7489 --- /dev/null +++ b/engineering/skill-tester/tests/test_security_scorer.py @@ -0,0 +1,851 @@ +#!/usr/bin/env python3 +""" +Tests for security_scorer.py - Security Dimension Scoring Tests + +This test module validates the security_scorer.py module's ability to: +- Detect hardcoded sensitive data (passwords, API keys, tokens, private keys) +- Detect path traversal vulnerabilities +- Detect command injection risks +- Score input validation quality +- Handle edge cases (empty files, environment variables, etc.) + +Run with: python -m unittest test_security_scorer +""" + +import tempfile +import unittest +from pathlib import Path + +# Add the scripts directory to the path +import sys +SCRIPTS_DIR = Path(__file__).parent.parent / "scripts" +sys.path.insert(0, str(SCRIPTS_DIR)) + +from security_scorer import ( + SecurityScorer, + # Constants + MAX_COMPONENT_SCORE, + MIN_SCORE, + BASE_SCORE_SENSITIVE_DATA, + BASE_SCORE_FILE_OPS, + BASE_SCORE_COMMAND_INJECTION, + BASE_SCORE_INPUT_VALIDATION, + CRITICAL_VULNERABILITY_PENALTY, + HIGH_SEVERITY_PENALTY, + MEDIUM_SEVERITY_PENALTY, + LOW_SEVERITY_PENALTY, + SAFE_PATTERN_BONUS, + GOOD_PRACTICE_BONUS, + # Pre-compiled patterns + PATTERN_HARDCODED_PASSWORD, + PATTERN_HARDCODED_API_KEY, + PATTERN_HARDCODED_TOKEN, + PATTERN_HARDCODED_PRIVATE_KEY, + PATTERN_OS_SYSTEM, + PATTERN_EVAL, + PATTERN_EXEC, + PATTERN_SUBPROCESS_SHELL_TRUE, + PATTERN_SHLEX_QUOTE, + PATTERN_SAFE_ENV_VAR, +) + + +class TestSecurityScorerConstants(unittest.TestCase): + """Tests for security scorer constants.""" + + def test_max_component_score_value(self): + """Test that MAX_COMPONENT_SCORE is 25.""" + self.assertEqual(MAX_COMPONENT_SCORE, 25) + + def test_min_score_value(self): + """Test that MIN_SCORE is 0.""" + self.assertEqual(MIN_SCORE, 0) + + def test_base_scores_are_reasonable(self): + """Test that base scores are within valid range.""" + self.assertGreaterEqual(BASE_SCORE_SENSITIVE_DATA, MIN_SCORE) + self.assertLessEqual(BASE_SCORE_SENSITIVE_DATA, MAX_COMPONENT_SCORE) + self.assertGreaterEqual(BASE_SCORE_FILE_OPS, MIN_SCORE) + self.assertLessEqual(BASE_SCORE_FILE_OPS, MAX_COMPONENT_SCORE) + self.assertGreaterEqual(BASE_SCORE_COMMAND_INJECTION, MIN_SCORE) + self.assertLessEqual(BASE_SCORE_COMMAND_INJECTION, MAX_COMPONENT_SCORE) + + def test_penalty_values_are_negative(self): + """Test that penalty values are negative.""" + self.assertLess(CRITICAL_VULNERABILITY_PENALTY, 0) + self.assertLess(HIGH_SEVERITY_PENALTY, 0) + self.assertLess(MEDIUM_SEVERITY_PENALTY, 0) + self.assertLess(LOW_SEVERITY_PENALTY, 0) + + def test_bonus_values_are_positive(self): + """Test that bonus values are positive.""" + self.assertGreater(SAFE_PATTERN_BONUS, 0) + self.assertGreater(GOOD_PRACTICE_BONUS, 0) + + def test_severity_ordering(self): + """Test that severity penalties are ordered correctly.""" + # Critical should be most severe (most negative) + self.assertLess(CRITICAL_VULNERABILITY_PENALTY, HIGH_SEVERITY_PENALTY) + self.assertLess(HIGH_SEVERITY_PENALTY, MEDIUM_SEVERITY_PENALTY) + self.assertLess(MEDIUM_SEVERITY_PENALTY, LOW_SEVERITY_PENALTY) + + +class TestPrecompiledPatterns(unittest.TestCase): + """Tests for pre-compiled regex patterns.""" + + def test_password_pattern_detects_hardcoded(self): + """Test that password pattern detects hardcoded passwords.""" + code = 'password = "my_secret_password_123"' + self.assertTrue(PATTERN_HARDCODED_PASSWORD.search(code)) + + def test_password_pattern_ignores_short_values(self): + """Test that password pattern ignores very short values.""" + code = 'password = "x"' # Too short + self.assertFalse(PATTERN_HARDCODED_PASSWORD.search(code)) + + def test_api_key_pattern_detects_hardcoded(self): + """Test that API key pattern detects hardcoded keys.""" + code = 'api_key = "sk-1234567890abcdef"' + self.assertTrue(PATTERN_HARDCODED_API_KEY.search(code)) + + def test_token_pattern_detects_hardcoded(self): + """Test that token pattern detects hardcoded tokens.""" + code = 'token = "ghp_1234567890abcdef"' + self.assertTrue(PATTERN_HARDCODED_TOKEN.search(code)) + + def test_private_key_pattern_detects_hardcoded(self): + """Test that private key pattern detects hardcoded keys.""" + code = 'private_key = "-----BEGIN RSA PRIVATE KEY-----"' + self.assertTrue(PATTERN_HARDCODED_PRIVATE_KEY.search(code)) + + def test_os_system_pattern_detects(self): + """Test that os.system pattern is detected.""" + code = 'os.system("ls -la")' + self.assertTrue(PATTERN_OS_SYSTEM.search(code)) + + def test_eval_pattern_detects(self): + """Test that eval pattern is detected.""" + code = 'result = eval(user_input)' + self.assertTrue(PATTERN_EVAL.search(code)) + + def test_exec_pattern_detects(self): + """Test that exec pattern is detected.""" + code = 'exec(user_code)' + self.assertTrue(PATTERN_EXEC.search(code)) + + def test_subprocess_shell_true_pattern_detects(self): + """Test that subprocess shell=True pattern is detected.""" + code = 'subprocess.run(cmd, shell=True)' + self.assertTrue(PATTERN_SUBPROCESS_SHELL_TRUE.search(code)) + + def test_shlex_quote_pattern_detects(self): + """Test that shlex.quote pattern is detected.""" + code = 'safe_cmd = shlex.quote(user_input)' + self.assertTrue(PATTERN_SHLEX_QUOTE.search(code)) + + def test_safe_env_var_pattern_detects(self): + """Test that safe environment variable pattern is detected.""" + code = 'password = os.getenv("DB_PASSWORD")' + self.assertTrue(PATTERN_SAFE_ENV_VAR.search(code)) + + +class TestSecurityScorerInit(unittest.TestCase): + """Tests for SecurityScorer initialization.""" + + def test_init_with_empty_list(self): + """Test initialization with empty script list.""" + scorer = SecurityScorer([]) + self.assertEqual(scorer.scripts, []) + self.assertFalse(scorer.verbose) + + def test_init_with_scripts(self): + """Test initialization with script list.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "test.py" + script_path.write_text("# test") + + scorer = SecurityScorer([script_path]) + self.assertEqual(len(scorer.scripts), 1) + + def test_init_with_verbose(self): + """Test initialization with verbose mode.""" + scorer = SecurityScorer([], verbose=True) + self.assertTrue(scorer.verbose) + + +class TestSensitiveDataExposure(unittest.TestCase): + """Tests for sensitive data exposure scoring.""" + + def test_no_scripts_returns_max_score(self): + """Test that empty script list returns max score.""" + scorer = SecurityScorer([]) + score, findings = scorer.score_sensitive_data_exposure() + self.assertEqual(score, float(MAX_COMPONENT_SCORE)) + self.assertEqual(findings, []) + + def test_clean_script_scores_high(self): + """Test that clean script without sensitive data scores high.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "clean.py" + script_path.write_text(""" +import os + +def get_password(): + return os.getenv("DB_PASSWORD") + +def main(): + pass + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + score, findings = scorer.score_sensitive_data_exposure() + + self.assertGreaterEqual(score, 20) + self.assertEqual(len(findings), 0) + + def test_hardcoded_password_detected(self): + """Test that hardcoded password is detected and penalized.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "insecure.py" + script_path.write_text(""" +password = "super_secret_password_123" + +def main(): + pass + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + score, findings = scorer.score_sensitive_data_exposure() + + self.assertLess(score, MAX_COMPONENT_SCORE) + self.assertTrue(any('password' in f.lower() for f in findings)) + + def test_hardcoded_api_key_detected(self): + """Test that hardcoded API key is detected and penalized.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "insecure.py" + script_path.write_text(""" +api_key = "sk-1234567890abcdef123456" + +def main(): + pass + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + score, findings = scorer.score_sensitive_data_exposure() + + self.assertLess(score, MAX_COMPONENT_SCORE) + # Check for 'api' or 'hardcoded' in findings (description is 'hardcoded API key') + self.assertTrue(any('api' in f.lower() or 'hardcoded' in f.lower() for f in findings)) + + def test_hardcoded_token_detected(self): + """Test that hardcoded token is detected and penalized.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "insecure.py" + script_path.write_text(""" +token = "ghp_1234567890abcdef" + +def main(): + pass + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + score, findings = scorer.score_sensitive_data_exposure() + + self.assertLess(score, MAX_COMPONENT_SCORE) + + def test_hardcoded_private_key_detected(self): + """Test that hardcoded private key is detected and penalized.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "insecure.py" + script_path.write_text(""" +private_key = "-----BEGIN RSA PRIVATE KEY-----MIIEowIBAAJCA..." + +def main(): + pass + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + score, findings = scorer.score_sensitive_data_exposure() + + self.assertLess(score, MAX_COMPONENT_SCORE) + + def test_environment_variable_not_flagged(self): + """Test that environment variable usage is not flagged as sensitive.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "secure.py" + script_path.write_text(""" +import os + +def get_credentials(): + password = os.getenv("DB_PASSWORD") + api_key = os.environ.get("API_KEY") + return password, api_key + +def main(): + pass + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + score, findings = scorer.score_sensitive_data_exposure() + + # Should score well because using environment variables + self.assertGreaterEqual(score, 20) + + def test_empty_file_handled(self): + """Test that empty file is handled gracefully.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "empty.py" + script_path.write_text("") + + scorer = SecurityScorer([script_path]) + score, findings = scorer.score_sensitive_data_exposure() + + # Should return max score for empty file (no sensitive data) + self.assertGreaterEqual(score, 20) + + def test_jwt_token_detected(self): + """Test that JWT token in code is detected.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "jwt.py" + script_path.write_text(""" +# JWT token for testing +token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U" + +def main(): + pass + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + score, findings = scorer.score_sensitive_data_exposure() + + # Should be penalized for JWT token + self.assertLess(score, MAX_COMPONENT_SCORE) + + +class TestSafeFileOperations(unittest.TestCase): + """Tests for safe file operations scoring.""" + + def test_no_scripts_returns_max_score(self): + """Test that empty script list returns max score.""" + scorer = SecurityScorer([]) + score, findings = scorer.score_safe_file_operations() + self.assertEqual(score, float(MAX_COMPONENT_SCORE)) + + def test_safe_pathlib_usage_scores_high(self): + """Test that safe pathlib usage scores high.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "safe.py" + script_path.write_text(""" +from pathlib import Path + +def read_file(filename): + path = Path(filename).resolve() + return path.read_text() + +def main(): + pass + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + score, findings = scorer.score_safe_file_operations() + + self.assertGreaterEqual(score, 15) + + def test_path_traversal_detected(self): + """Test that path traversal pattern is detected.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "risky.py" + script_path.write_text(""" +def read_file(base_path, filename): + # Potential path traversal vulnerability + path = base_path + "/../../../etc/passwd" + return open(path).read() + +def main(): + pass + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + score, findings = scorer.score_safe_file_operations() + + self.assertLess(score, MAX_COMPONENT_SCORE) + + def test_basename_usage_scores_bonus(self): + """Test that basename usage gets bonus points.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "safe.py" + script_path.write_text(""" +import os + +def safe_filename(user_input): + return os.path.basename(user_input) + +def main(): + pass + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + score, findings = scorer.score_safe_file_operations() + + self.assertGreaterEqual(score, 15) + + +class TestCommandInjectionPrevention(unittest.TestCase): + """Tests for command injection prevention scoring.""" + + def test_no_scripts_returns_max_score(self): + """Test that empty script list returns max score.""" + scorer = SecurityScorer([]) + score, findings = scorer.score_command_injection_prevention() + self.assertEqual(score, float(MAX_COMPONENT_SCORE)) + + def test_os_system_detected(self): + """Test that os.system usage is detected.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "risky.py" + script_path.write_text(""" +import os + +def run_command(user_input): + os.system("echo " + user_input) + +def main(): + pass + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + score, findings = scorer.score_command_injection_prevention() + + self.assertLess(score, MAX_COMPONENT_SCORE) + self.assertTrue(any('os.system' in f.lower() for f in findings)) + + def test_subprocess_shell_true_detected(self): + """Test that subprocess with shell=True is detected.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "risky.py" + script_path.write_text(""" +import subprocess + +def run_command(cmd): + subprocess.run(cmd, shell=True) + +def main(): + pass + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + score, findings = scorer.score_command_injection_prevention() + + self.assertLess(score, MAX_COMPONENT_SCORE) + # Check for 'shell' or 'subprocess' in findings + self.assertTrue(any('shell' in f.lower() or 'subprocess' in f.lower() for f in findings)) + + def test_eval_detected(self): + """Test that eval usage is detected.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "risky.py" + script_path.write_text(""" +def evaluate(user_input): + return eval(user_input) + +def main(): + pass + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + score, findings = scorer.score_command_injection_prevention() + + self.assertLess(score, MAX_COMPONENT_SCORE) + self.assertTrue(any('eval' in f.lower() for f in findings)) + + def test_exec_detected(self): + """Test that exec usage is detected.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "risky.py" + script_path.write_text(""" +def execute(user_code): + exec(user_code) + +def main(): + pass + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + score, findings = scorer.score_command_injection_prevention() + + self.assertLess(score, MAX_COMPONENT_SCORE) + self.assertTrue(any('exec' in f.lower() for f in findings)) + + def test_shlex_quote_gets_bonus(self): + """Test that shlex.quote usage gets bonus points.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "safe.py" + script_path.write_text(""" +import shlex +import subprocess + +def run_command(user_input): + safe_cmd = shlex.quote(user_input) + subprocess.run(["echo", safe_cmd], shell=False) + +def main(): + pass + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + score, findings = scorer.score_command_injection_prevention() + + self.assertGreaterEqual(score, 20) + + def test_safe_subprocess_scores_high(self): + """Test that safe subprocess usage (shell=False) scores high.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "safe.py" + script_path.write_text(""" +import subprocess + +def run_command(cmd_parts): + subprocess.run(cmd_parts, shell=False) + +def main(): + pass + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + score, findings = scorer.score_command_injection_prevention() + + self.assertGreaterEqual(score, 20) + + +class TestInputValidation(unittest.TestCase): + """Tests for input validation scoring.""" + + def test_no_scripts_returns_max_score(self): + """Test that empty script list returns max score.""" + scorer = SecurityScorer([]) + score, suggestions = scorer.score_input_validation() + self.assertEqual(score, float(MAX_COMPONENT_SCORE)) + + def test_argparse_usage_scores_bonus(self): + """Test that argparse usage gets bonus points.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "good.py" + script_path.write_text(""" +import argparse + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("input") + args = parser.parse_args() + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + score, suggestions = scorer.score_input_validation() + + # Base score is 10, argparse gives +3 bonus, so score should be 13 + self.assertGreaterEqual(score, 10) + self.assertLessEqual(score, MAX_COMPONENT_SCORE) + + def test_isinstance_usage_scores_bonus(self): + """Test that isinstance usage gets bonus points.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "good.py" + script_path.write_text(""" +def process(value): + if isinstance(value, str): + return value.upper() + return value + +def main(): + pass + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + score, suggestions = scorer.score_input_validation() + + self.assertGreater(score, BASE_SCORE_INPUT_VALIDATION) + + def test_try_except_scores_bonus(self): + """Test that try/except usage gets bonus points.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "good.py" + script_path.write_text(""" +def process(value): + try: + return int(value) + except ValueError: + return 0 + +def main(): + pass + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + score, suggestions = scorer.score_input_validation() + + self.assertGreater(score, BASE_SCORE_INPUT_VALIDATION) + + def test_minimal_validation_gets_suggestion(self): + """Test that minimal validation triggers suggestion.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "minimal.py" + script_path.write_text(""" +def main(): + print("Hello") + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + score, suggestions = scorer.score_input_validation() + + self.assertLess(score, 15) + self.assertTrue(len(suggestions) > 0) + + +class TestOverallScore(unittest.TestCase): + """Tests for overall security score calculation.""" + + def test_overall_score_components_present(self): + """Test that overall score includes all components.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "test.py" + script_path.write_text(""" +import os +import argparse + +def main(): + parser = argparse.ArgumentParser() + parser.parse_args() + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + results = scorer.get_overall_score() + + self.assertIn('overall_score', results) + self.assertIn('components', results) + self.assertIn('findings', results) + self.assertIn('suggestions', results) + + components = results['components'] + self.assertIn('sensitive_data_exposure', components) + self.assertIn('safe_file_operations', components) + self.assertIn('command_injection_prevention', components) + self.assertIn('input_validation', components) + + def test_overall_score_is_weighted_average(self): + """Test that overall score is calculated as weighted average.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "test.py" + script_path.write_text(""" +import argparse + +def main(): + parser = argparse.ArgumentParser() + parser.parse_args() + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + results = scorer.get_overall_score() + + # Calculate expected weighted average + expected = ( + results['components']['sensitive_data_exposure'] * 0.25 + + results['components']['safe_file_operations'] * 0.25 + + results['components']['command_injection_prevention'] * 0.25 + + results['components']['input_validation'] * 0.25 + ) + + self.assertAlmostEqual(results['overall_score'], expected, places=0) + + def test_critical_vulnerability_caps_score(self): + """Test that critical vulnerabilities cap the overall score.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "critical.py" + script_path.write_text(""" +password = "hardcoded_password_123" +api_key = "sk-1234567890abcdef" + +def main(): + pass + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + results = scorer.get_overall_score() + + # Score should be capped at 30 for critical vulnerabilities + self.assertLessEqual(results['overall_score'], 30) + self.assertTrue(results['has_critical_vulnerabilities']) + + def test_secure_script_scores_high(self): + """Test that secure script scores high overall.""" + with tempfile.TemporaryDirectory() as tmpdir: + script_path = Path(tmpdir) / "secure.py" + script_path.write_text(""" +#!/usr/bin/env python3 +import argparse +import os +import shlex +import subprocess +from pathlib import Path + +def validate_path(path_str): + path = Path(path_str).resolve() + if not path.exists(): + raise FileNotFoundError("Path not found") + return path + +def safe_command(args): + return subprocess.run(args, shell=False, capture_output=True) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("input") + args = parser.parse_args() + + db_password = os.getenv("DB_PASSWORD") + path = validate_path(args.input) + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([script_path]) + results = scorer.get_overall_score() + + # Secure script should score reasonably well + # Note: Score may vary based on pattern detection + self.assertGreater(results['overall_score'], 20) + + +class TestScoreClamping(unittest.TestCase): + """Tests for score boundary clamping.""" + + def test_score_never_below_zero(self): + """Test that score never goes below 0.""" + scorer = SecurityScorer([]) + # Test with extreme negative + result = scorer._clamp_score(-100) + self.assertEqual(result, MIN_SCORE) + + def test_score_never_above_max(self): + """Test that score never goes above max.""" + scorer = SecurityScorer([]) + # Test with extreme positive + result = scorer._clamp_score(1000) + self.assertEqual(result, MAX_COMPONENT_SCORE) + + def test_score_unchanged_in_valid_range(self): + """Test that score is unchanged in valid range.""" + scorer = SecurityScorer([]) + for test_score in [0, 5, 10, 15, 20, 25]: + result = scorer._clamp_score(test_score) + self.assertEqual(result, test_score) + + +class TestMultipleScripts(unittest.TestCase): + """Tests for scoring multiple scripts.""" + + def test_multiple_scripts_averaged(self): + """Test that scores are averaged across multiple scripts.""" + with tempfile.TemporaryDirectory() as tmpdir: + secure_script = Path(tmpdir) / "secure.py" + secure_script.write_text(""" +import os + +def main(): + password = os.getenv("PASSWORD") + +if __name__ == "__main__": + main() +""") + + insecure_script = Path(tmpdir) / "insecure.py" + insecure_script.write_text(""" +password = "hardcoded_secret_password" + +def main(): + pass + +if __name__ == "__main__": + main() +""") + + scorer = SecurityScorer([secure_script, insecure_script]) + score, findings = scorer.score_sensitive_data_exposure() + + # Score should be between secure and insecure + self.assertGreater(score, 0) + self.assertLess(score, MAX_COMPONENT_SCORE) + + +if __name__ == "__main__": + unittest.main(verbosity=2) \ No newline at end of file