fix(skill-tester): make Security dimension opt-in with --include-security flag
- Add --include-security flag to quality_scorer.py - Default: 4 dimensions × 25% (backward compatible) - With --include-security: 5 dimensions × 20% - Update tier recommendation logic for optional Security - Update documentation to reflect opt-in behavior This addresses the breaking change concern from PR review: the weight change from 25% to 20% would affect all existing audit baselines. The new opt-in approach preserves backward compatibility.
This commit is contained in:
@@ -13,7 +13,9 @@ Author: Claude Skills Engineering Team
|
||||
Version: 2.0.0
|
||||
Dependencies: Python Standard Library Only
|
||||
Changelog:
|
||||
v2.0.0 - Added Security dimension (20% weight), rebalanced all dimensions to 20%
|
||||
v2.0.0 - Added Security dimension (opt-in via --include-security flag)
|
||||
Default: 4 dimensions × 25% (backward compatible)
|
||||
With --include-security: 5 dimensions × 20%
|
||||
v1.0.0 - Initial release with 4 dimensions (25% each)
|
||||
"""
|
||||
|
||||
@@ -156,21 +158,35 @@ class QualityReport:
|
||||
usability_score = self.dimensions.get("Usability", QualityDimension("", 0, "")).score
|
||||
security_score = self.dimensions.get("Security", QualityDimension("", 0, "")).score
|
||||
|
||||
# POWERFUL tier requirements (all dimensions must be strong)
|
||||
if (self.overall_score >= 80 and
|
||||
all(score >= 75 for score in [doc_score, code_score, completeness_score, usability_score]) and
|
||||
security_score >= 70):
|
||||
self.tier_recommendation = "POWERFUL"
|
||||
|
||||
# STANDARD tier requirements (most dimensions good)
|
||||
elif (self.overall_score >= 70 and
|
||||
sum(1 for score in [doc_score, code_score, completeness_score, usability_score, security_score] if score >= 65) >= 4 and
|
||||
security_score >= 50):
|
||||
self.tier_recommendation = "STANDARD"
|
||||
|
||||
# BASIC tier (minimum viable quality)
|
||||
# Check if Security dimension is included
|
||||
has_security = "Security" in self.dimensions
|
||||
|
||||
# POWERFUL tier requirements
|
||||
if has_security:
|
||||
# With Security: all 5 dimensions must be strong
|
||||
if (self.overall_score >= 80 and
|
||||
all(score >= 75 for score in [doc_score, code_score, completeness_score, usability_score]) and
|
||||
security_score >= 70):
|
||||
self.tier_recommendation = "POWERFUL"
|
||||
|
||||
# STANDARD tier requirements (with Security)
|
||||
elif (self.overall_score >= 70 and
|
||||
sum(1 for score in [doc_score, code_score, completeness_score, usability_score, security_score] if score >= 65) >= 4 and
|
||||
security_score >= 50):
|
||||
self.tier_recommendation = "STANDARD"
|
||||
else:
|
||||
self.tier_recommendation = "BASIC"
|
||||
# Without Security: 4 dimensions must be strong
|
||||
if (self.overall_score >= 80 and
|
||||
all(score >= 75 for score in [doc_score, code_score, completeness_score, usability_score])):
|
||||
self.tier_recommendation = "POWERFUL"
|
||||
|
||||
# STANDARD tier requirements (without Security)
|
||||
elif (self.overall_score >= 70 and
|
||||
sum(1 for score in [doc_score, code_score, completeness_score, usability_score] if score >= 65) >= 3):
|
||||
self.tier_recommendation = "STANDARD"
|
||||
|
||||
# BASIC tier (minimum viable quality)
|
||||
# Falls through to BASIC if no other tier matched
|
||||
|
||||
def _generate_improvement_roadmap(self):
|
||||
"""Generate prioritized improvement suggestions"""
|
||||
@@ -209,10 +225,11 @@ class QualityReport:
|
||||
class QualityScorer:
|
||||
"""Main quality scoring engine"""
|
||||
|
||||
def __init__(self, skill_path: str, detailed: bool = False, verbose: bool = False):
|
||||
def __init__(self, skill_path: str, detailed: bool = False, verbose: bool = False, include_security: bool = False):
|
||||
self.skill_path = Path(skill_path).resolve()
|
||||
self.detailed = detailed
|
||||
self.verbose = verbose
|
||||
self.include_security = include_security
|
||||
self.report = QualityReport(str(self.skill_path))
|
||||
|
||||
def log_verbose(self, message: str):
|
||||
@@ -229,12 +246,20 @@ class QualityScorer:
|
||||
if not self.skill_path.exists():
|
||||
raise ValueError(f"Skill path does not exist: {self.skill_path}")
|
||||
|
||||
# Score each dimension (20% weight each, 5 dimensions total)
|
||||
self._score_documentation()
|
||||
self._score_code_quality()
|
||||
self._score_completeness()
|
||||
self._score_security()
|
||||
self._score_usability()
|
||||
# Score each dimension
|
||||
# Default: 4 dimensions at 25% each (backward compatible)
|
||||
# With --include-security: 5 dimensions at 20% each
|
||||
weight = 0.20 if self.include_security else 0.25
|
||||
|
||||
self._score_documentation(weight)
|
||||
self._score_code_quality(weight)
|
||||
self._score_completeness(weight)
|
||||
|
||||
if self.include_security:
|
||||
self._score_security(0.20)
|
||||
self._score_usability(0.20)
|
||||
else:
|
||||
self._score_usability(0.25)
|
||||
|
||||
# Calculate overall metrics
|
||||
self.report.calculate_overall_score()
|
||||
@@ -247,11 +272,11 @@ class QualityScorer:
|
||||
|
||||
return self.report
|
||||
|
||||
def _score_documentation(self):
|
||||
"""Score documentation quality (25% weight)"""
|
||||
def _score_documentation(self, weight: float = 0.25):
|
||||
"""Score documentation quality"""
|
||||
self.log_verbose("Scoring documentation quality...")
|
||||
|
||||
dimension = QualityDimension("Documentation", 0.20, "Quality of documentation and written materials")
|
||||
dimension = QualityDimension("Documentation", weight, "Quality of documentation and written materials")
|
||||
|
||||
# Score SKILL.md
|
||||
self._score_skill_md(dimension)
|
||||
@@ -501,11 +526,11 @@ class QualityScorer:
|
||||
dimension.add_score("examples_availability", score, 25,
|
||||
f"Found {len(example_files)} example/sample files")
|
||||
|
||||
def _score_code_quality(self):
|
||||
"""Score code quality (25% weight)"""
|
||||
def _score_code_quality(self, weight: float = 0.25):
|
||||
"""Score code quality"""
|
||||
self.log_verbose("Scoring code quality...")
|
||||
|
||||
dimension = QualityDimension("Code Quality", 0.20, "Quality of Python scripts and implementation")
|
||||
dimension = QualityDimension("Code Quality", weight, "Quality of Python scripts and implementation")
|
||||
|
||||
scripts_dir = self.skill_path / "scripts"
|
||||
if not scripts_dir.exists():
|
||||
@@ -688,11 +713,11 @@ class QualityScorer:
|
||||
if avg_output_score < 15:
|
||||
dimension.add_suggestion("Add support for both JSON and human-readable output formats")
|
||||
|
||||
def _score_completeness(self):
|
||||
"""Score completeness (25% weight)"""
|
||||
def _score_completeness(self, weight: float = 0.25):
|
||||
"""Score completeness"""
|
||||
self.log_verbose("Scoring completeness...")
|
||||
|
||||
dimension = QualityDimension("Completeness", 0.20, "Completeness of required components and assets")
|
||||
dimension = QualityDimension("Completeness", weight, "Completeness of required components and assets")
|
||||
|
||||
# Score directory structure
|
||||
self._score_directory_structure(dimension)
|
||||
@@ -810,11 +835,11 @@ class QualityScorer:
|
||||
if not test_files:
|
||||
dimension.add_suggestion("Add test files or validation scripts")
|
||||
|
||||
def _score_usability(self):
|
||||
"""Score usability (25% weight)"""
|
||||
def _score_usability(self, weight: float = 0.25):
|
||||
"""Score usability"""
|
||||
self.log_verbose("Scoring usability...")
|
||||
|
||||
dimension = QualityDimension("Usability", 0.20, "Ease of use and user experience")
|
||||
dimension = QualityDimension("Usability", weight, "Ease of use and user experience")
|
||||
|
||||
# Score installation simplicity
|
||||
self._score_installation(dimension)
|
||||
@@ -946,11 +971,11 @@ class QualityScorer:
|
||||
dimension.add_score("practical_examples", score, 25,
|
||||
f"Practical examples: {len(example_files)} files")
|
||||
|
||||
def _score_security(self):
|
||||
"""Score security quality (20% weight)"""
|
||||
def _score_security(self, weight: float = 0.20):
|
||||
"""Score security quality"""
|
||||
self.log_verbose("Scoring security quality...")
|
||||
|
||||
dimension = QualityDimension("Security", 0.20, "Security practices and vulnerability prevention")
|
||||
dimension = QualityDimension("Security", weight, "Security practices and vulnerability prevention")
|
||||
|
||||
# Find Python scripts
|
||||
python_files = list(self.skill_path.rglob("*.py"))
|
||||
@@ -1078,13 +1103,17 @@ Examples:
|
||||
python quality_scorer.py engineering/my-skill
|
||||
python quality_scorer.py engineering/my-skill --detailed --json
|
||||
python quality_scorer.py engineering/my-skill --minimum-score 75
|
||||
python quality_scorer.py engineering/my-skill --include-security
|
||||
|
||||
Quality Dimensions (each 25%):
|
||||
Quality Dimensions (default: 4 dimensions × 25%):
|
||||
Documentation - SKILL.md quality, README, references, examples
|
||||
Code Quality - Script complexity, error handling, structure, output
|
||||
Completeness - Directory structure, assets, expected outputs, tests
|
||||
Usability - Installation simplicity, usage clarity, help accessibility
|
||||
|
||||
With --include-security (5 dimensions × 20%):
|
||||
Security - Sensitive data exposure, command injection, input validation
|
||||
|
||||
Letter Grades: A+ (95+), A (90+), A- (85+), B+ (80+), B (75+), B- (70+), C+ (65+), C (60+), C- (55+), D (50+), F (<50)
|
||||
"""
|
||||
)
|
||||
@@ -1104,12 +1133,15 @@ Letter Grades: A+ (95+), A (90+), A- (85+), B+ (80+), B (75+), B- (70+), C+ (65+
|
||||
parser.add_argument("--verbose",
|
||||
action="store_true",
|
||||
help="Enable verbose logging")
|
||||
parser.add_argument("--include-security",
|
||||
action="store_true",
|
||||
help="Include Security dimension (switches to 5 dimensions × 20%% each)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
# Create scorer and assess quality
|
||||
scorer = QualityScorer(args.skill_path, args.detailed, args.verbose)
|
||||
scorer = QualityScorer(args.skill_path, args.detailed, args.verbose, args.include_security)
|
||||
report = scorer.assess_quality()
|
||||
|
||||
# Format and output report
|
||||
|
||||
Reference in New Issue
Block a user