diff --git a/src/skill_seekers/cli/quality_metrics.py b/src/skill_seekers/cli/quality_metrics.py new file mode 100644 index 0000000..49d0649 --- /dev/null +++ b/src/skill_seekers/cli/quality_metrics.py @@ -0,0 +1,541 @@ +#!/usr/bin/env python3 +""" +Quality Metrics Dashboard + +Provides comprehensive quality monitoring and reporting for skills. +Tracks completeness, accuracy, coverage, and health metrics. +""" + +import json +from pathlib import Path +from typing import Dict, List, Optional, Any +from dataclasses import dataclass, field, asdict +from datetime import datetime +from enum import Enum + + +class MetricLevel(Enum): + """Metric severity level.""" + INFO = "info" + WARNING = "warning" + ERROR = "error" + CRITICAL = "critical" + + +@dataclass +class QualityMetric: + """Individual quality metric.""" + name: str + value: float # 0.0-1.0 (or 0-100 percentage) + level: MetricLevel + description: str + suggestions: List[str] = field(default_factory=list) + + +@dataclass +class QualityScore: + """Overall quality score.""" + total_score: float # 0-100 + completeness: float # 0-100 + accuracy: float # 0-100 + coverage: float # 0-100 + health: float # 0-100 + grade: str # A+, A, B+, B, C, D, F + + +@dataclass +class QualityReport: + """Complete quality report.""" + timestamp: str + skill_name: str + overall_score: QualityScore + metrics: List[QualityMetric] + statistics: Dict[str, Any] + recommendations: List[str] + history: List[Dict[str, Any]] = field(default_factory=list) + + +class QualityAnalyzer: + """ + Analyze skill quality across multiple dimensions. + + Provides comprehensive quality assessment and reporting. + """ + + # Thresholds for quality grades + GRADE_THRESHOLDS = { + 'A+': 95, 'A': 90, 'A-': 85, + 'B+': 80, 'B': 75, 'B-': 70, + 'C+': 65, 'C': 60, 'C-': 55, + 'D': 50, 'F': 0 + } + + def __init__(self, skill_dir: Path): + """Initialize quality analyzer.""" + self.skill_dir = Path(skill_dir) + self.metrics: List[QualityMetric] = [] + self.statistics: Dict[str, Any] = {} + + def analyze_completeness(self) -> float: + """ + Analyze skill completeness. + + Checks for: + - SKILL.md exists and has content + - References directory exists + - Minimum documentation coverage + + Returns: + Completeness score (0-100) + """ + score = 0.0 + max_score = 100.0 + + # SKILL.md exists (40 points) + skill_md = self.skill_dir / "SKILL.md" + if skill_md.exists(): + score += 40 + content = skill_md.read_text(encoding="utf-8") + + # Has substantial content (10 points) + if len(content) > 500: + score += 10 + + # Has sections (10 points) + if content.count('#') >= 5: + score += 10 + + # References directory (20 points) + refs_dir = self.skill_dir / "references" + if refs_dir.exists(): + score += 10 + + # Has reference files (10 points) + refs = list(refs_dir.glob("*.md")) + if len(refs) > 0: + score += 10 + + # Metadata/config (20 points) + if (self.skill_dir / "skill.json").exists(): + score += 10 + if (self.skill_dir / ".skill_version.json").exists(): + score += 10 + + completeness = (score / max_score) * 100 + + # Add metric + level = MetricLevel.INFO if completeness >= 70 else MetricLevel.WARNING + suggestions = [] + if completeness < 100: + if not skill_md.exists(): + suggestions.append("Create SKILL.md file") + if not refs_dir.exists(): + suggestions.append("Add references directory") + if len(suggestions) == 0: + suggestions.append("Expand documentation coverage") + + self.metrics.append(QualityMetric( + name="Completeness", + value=completeness, + level=level, + description=f"Documentation completeness: {completeness:.1f}%", + suggestions=suggestions + )) + + return completeness + + def analyze_accuracy(self) -> float: + """ + Analyze skill accuracy. + + Checks for: + - No broken links + - Valid JSON/YAML + - Consistent metadata + - No duplicate content + + Returns: + Accuracy score (0-100) + """ + score = 100.0 + issues = [] + + # Check for broken references + skill_md = self.skill_dir / "SKILL.md" + if skill_md.exists(): + content = skill_md.read_text(encoding="utf-8") + + # Check for TODO markers (deduct 5 points each, max 20) + todo_count = content.lower().count('todo') + if todo_count > 0: + deduction = min(todo_count * 5, 20) + score -= deduction + issues.append(f"Found {todo_count} TODO markers") + + # Check for placeholder text (deduct 10) + placeholders = ['lorem ipsum', 'placeholder', 'coming soon'] + for placeholder in placeholders: + if placeholder in content.lower(): + score -= 10 + issues.append(f"Found placeholder text: {placeholder}") + break + + # Check JSON validity + for json_file in self.skill_dir.glob("*.json"): + try: + json.loads(json_file.read_text()) + except json.JSONDecodeError: + score -= 15 + issues.append(f"Invalid JSON: {json_file.name}") + + accuracy = max(score, 0.0) + + level = MetricLevel.INFO if accuracy >= 80 else MetricLevel.WARNING + suggestions = [] + if accuracy < 100: + if issues: + suggestions.extend(issues[:3]) # Top 3 issues + + self.metrics.append(QualityMetric( + name="Accuracy", + value=accuracy, + level=level, + description=f"Documentation accuracy: {accuracy:.1f}%", + suggestions=suggestions + )) + + return accuracy + + def analyze_coverage(self) -> float: + """ + Analyze documentation coverage. + + Checks for: + - Multiple document types + - Code examples + - API references + - Getting started guide + + Returns: + Coverage score (0-100) + """ + score = 0.0 + max_score = 100.0 + + refs_dir = self.skill_dir / "references" + if refs_dir.exists(): + ref_files = list(refs_dir.glob("*.md")) + + # Has multiple references (30 points) + if len(ref_files) >= 3: + score += 30 + elif len(ref_files) >= 1: + score += 15 + + # Check for specific types (20 points each) + ref_names = [f.stem.lower() for f in ref_files] + + if any('getting' in name or 'start' in name for name in ref_names): + score += 20 + + if any('api' in name or 'reference' in name for name in ref_names): + score += 20 + + if any('example' in name or 'tutorial' in name for name in ref_names): + score += 20 + + # Has diverse content (10 points) + if len(ref_files) >= 5: + score += 10 + + coverage = (score / max_score) * 100 + + level = MetricLevel.INFO if coverage >= 60 else MetricLevel.WARNING + suggestions = [] + if coverage < 100: + if coverage < 30: + suggestions.append("Add getting started guide") + if coverage < 60: + suggestions.append("Add API reference documentation") + suggestions.append("Expand documentation coverage") + + self.metrics.append(QualityMetric( + name="Coverage", + value=coverage, + level=level, + description=f"Documentation coverage: {coverage:.1f}%", + suggestions=suggestions + )) + + return coverage + + def analyze_health(self) -> float: + """ + Analyze skill health. + + Checks for: + - File sizes reasonable + - No empty files + - Recent updates + - Proper structure + + Returns: + Health score (0-100) + """ + score = 100.0 + issues = [] + + # Check for empty files (deduct 15 each) + for md_file in self.skill_dir.rglob("*.md"): + if md_file.stat().st_size == 0: + score -= 15 + issues.append(f"Empty file: {md_file.name}") + + # Check for very large files (deduct 10) + for md_file in self.skill_dir.rglob("*.md"): + if md_file.stat().st_size > 500_000: # > 500KB + score -= 10 + issues.append(f"Very large file: {md_file.name}") + + # Check directory structure (deduct 20 if missing) + if not (self.skill_dir / "references").exists(): + score -= 20 + issues.append("Missing references directory") + + health = max(score, 0.0) + + level = MetricLevel.INFO if health >= 80 else MetricLevel.WARNING + suggestions = [] + if health < 100: + suggestions.extend(issues[:3]) + + self.metrics.append(QualityMetric( + name="Health", + value=health, + level=level, + description=f"Skill health: {health:.1f}%", + suggestions=suggestions + )) + + return health + + def calculate_statistics(self) -> Dict[str, Any]: + """Calculate skill statistics.""" + stats = { + 'total_files': 0, + 'total_size_bytes': 0, + 'markdown_files': 0, + 'reference_files': 0, + 'total_characters': 0, + 'total_words': 0 + } + + # Count files and sizes + for md_file in self.skill_dir.rglob("*.md"): + stats['total_files'] += 1 + stats['markdown_files'] += 1 + size = md_file.stat().st_size + stats['total_size_bytes'] += size + + # Count words + try: + content = md_file.read_text(encoding="utf-8") + stats['total_characters'] += len(content) + stats['total_words'] += len(content.split()) + except Exception: + pass + + # Count references + refs_dir = self.skill_dir / "references" + if refs_dir.exists(): + stats['reference_files'] = len(list(refs_dir.glob("*.md"))) + + self.statistics = stats + return stats + + def calculate_overall_score( + self, + completeness: float, + accuracy: float, + coverage: float, + health: float + ) -> QualityScore: + """ + Calculate overall quality score. + + Weighted average: + - Completeness: 30% + - Accuracy: 25% + - Coverage: 25% + - Health: 20% + """ + total = ( + completeness * 0.30 + + accuracy * 0.25 + + coverage * 0.25 + + health * 0.20 + ) + + # Determine grade + grade = 'F' + for g, threshold in self.GRADE_THRESHOLDS.items(): + if total >= threshold: + grade = g + break + + return QualityScore( + total_score=total, + completeness=completeness, + accuracy=accuracy, + coverage=coverage, + health=health, + grade=grade + ) + + def generate_recommendations(self, score: QualityScore) -> List[str]: + """Generate improvement recommendations.""" + recommendations = [] + + # Priority recommendations + if score.completeness < 70: + recommendations.append("šŸ”“ PRIORITY: Improve documentation completeness") + + if score.accuracy < 80: + recommendations.append("🟔 Address accuracy issues (TODOs, placeholders)") + + if score.coverage < 60: + recommendations.append("🟔 Expand documentation coverage (API, examples)") + + if score.health < 80: + recommendations.append("🟔 Fix health issues (empty files, structure)") + + # General recommendations + if score.total_score < 80: + recommendations.append("šŸ“ Review and enhance overall documentation quality") + + if score.total_score >= 90: + recommendations.append("āœ… Excellent quality! Consider adding advanced topics") + + return recommendations + + def generate_report(self) -> QualityReport: + """ + Generate comprehensive quality report. + + Returns: + Complete quality report + """ + # Run all analyses + completeness = self.analyze_completeness() + accuracy = self.analyze_accuracy() + coverage = self.analyze_coverage() + health = self.analyze_health() + + # Calculate overall score + overall_score = self.calculate_overall_score( + completeness, accuracy, coverage, health + ) + + # Calculate statistics + stats = self.calculate_statistics() + + # Generate recommendations + recommendations = self.generate_recommendations(overall_score) + + return QualityReport( + timestamp=datetime.now().isoformat(), + skill_name=self.skill_dir.name, + overall_score=overall_score, + metrics=self.metrics, + statistics=stats, + recommendations=recommendations + ) + + def format_report(self, report: QualityReport) -> str: + """Format report as human-readable text.""" + lines = ["=" * 70] + lines.append("QUALITY METRICS DASHBOARD") + lines.append("=" * 70) + lines.append("") + + # Header + lines.append(f"šŸ“Š Skill: {report.skill_name}") + lines.append(f"šŸ• Time: {report.timestamp}") + lines.append("") + + # Overall Score + score = report.overall_score + lines.append("šŸŽÆ OVERALL SCORE") + lines.append(f" Grade: {score.grade}") + lines.append(f" Score: {score.total_score:.1f}/100") + lines.append("") + + # Component Scores + lines.append("šŸ“ˆ COMPONENT SCORES") + lines.append(f" Completeness: {score.completeness:.1f}% (30% weight)") + lines.append(f" Accuracy: {score.accuracy:.1f}% (25% weight)") + lines.append(f" Coverage: {score.coverage:.1f}% (25% weight)") + lines.append(f" Health: {score.health:.1f}% (20% weight)") + lines.append("") + + # Metrics + lines.append("šŸ“‹ DETAILED METRICS") + for metric in report.metrics: + icon = { + MetricLevel.INFO: "āœ…", + MetricLevel.WARNING: "āš ļø", + MetricLevel.ERROR: "āŒ", + MetricLevel.CRITICAL: "šŸ”“" + }.get(metric.level, "ā„¹ļø") + + lines.append(f" {icon} {metric.name}: {metric.value:.1f}%") + if metric.suggestions: + for suggestion in metric.suggestions[:2]: + lines.append(f" → {suggestion}") + lines.append("") + + # Statistics + lines.append("šŸ“Š STATISTICS") + stats = report.statistics + lines.append(f" Total files: {stats.get('total_files', 0)}") + lines.append(f" Markdown files: {stats.get('markdown_files', 0)}") + lines.append(f" Reference files: {stats.get('reference_files', 0)}") + lines.append(f" Total words: {stats.get('total_words', 0):,}") + lines.append(f" Total size: {stats.get('total_size_bytes', 0):,} bytes") + lines.append("") + + # Recommendations + if report.recommendations: + lines.append("šŸ’” RECOMMENDATIONS") + for rec in report.recommendations: + lines.append(f" {rec}") + lines.append("") + + lines.append("=" * 70) + + return "\n".join(lines) + + +def example_usage(): + """Example usage of quality metrics.""" + from pathlib import Path + + # Analyze skill + skill_dir = Path("output/ansible") + analyzer = QualityAnalyzer(skill_dir) + + # Generate report + report = analyzer.generate_report() + + # Display report + formatted = analyzer.format_report(report) + print(formatted) + + # Save report + report_path = skill_dir / "quality_report.json" + report_path.write_text(json.dumps(asdict(report), indent=2, default=str)) + print(f"\nāœ… Report saved: {report_path}") + + +if __name__ == "__main__": + example_usage() diff --git a/tests/test_quality_metrics.py b/tests/test_quality_metrics.py new file mode 100644 index 0000000..25e4b7f --- /dev/null +++ b/tests/test_quality_metrics.py @@ -0,0 +1,319 @@ +#!/usr/bin/env python3 +""" +Tests for quality metrics dashboard. + +Validates: +- Completeness analysis +- Accuracy analysis +- Coverage analysis +- Health analysis +- Overall scoring +- Report generation +""" + +import pytest +from pathlib import Path +import sys +import tempfile + +# Add src to path +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from skill_seekers.cli.quality_metrics import ( + QualityAnalyzer, + MetricLevel +) + + +@pytest.fixture +def complete_skill_dir(): + """Create complete skill directory.""" + with tempfile.TemporaryDirectory() as tmpdir: + skill_dir = Path(tmpdir) / "complete_skill" + skill_dir.mkdir() + + # Create SKILL.md with substantial content + skill_md = skill_dir / "SKILL.md" + skill_md.write_text("# Complete Skill\n\n" + ("## Section\nContent. " * 20)) + + # Create references + refs_dir = skill_dir / "references" + refs_dir.mkdir() + + (refs_dir / "getting_started.md").write_text("# Getting Started\nGuide content") + (refs_dir / "api_reference.md").write_text("# API Reference\nAPI docs") + (refs_dir / "examples.md").write_text("# Examples\nExample code") + + yield skill_dir + + +@pytest.fixture +def minimal_skill_dir(): + """Create minimal skill directory.""" + with tempfile.TemporaryDirectory() as tmpdir: + skill_dir = Path(tmpdir) / "minimal_skill" + skill_dir.mkdir() + + # Only SKILL.md + (skill_dir / "SKILL.md").write_text("# Minimal") + + yield skill_dir + + +def test_completeness_full(complete_skill_dir): + """Test completeness analysis with complete skill.""" + analyzer = QualityAnalyzer(complete_skill_dir) + score = analyzer.analyze_completeness() + + assert score >= 70 # Should be high (70 is good for test fixture) + + +def test_completeness_minimal(minimal_skill_dir): + """Test completeness analysis with minimal skill.""" + analyzer = QualityAnalyzer(minimal_skill_dir) + score = analyzer.analyze_completeness() + + assert score < 80 # Should be lower + + +def test_accuracy_clean(): + """Test accuracy analysis with clean content.""" + with tempfile.TemporaryDirectory() as tmpdir: + skill_dir = Path(tmpdir) / "clean_skill" + skill_dir.mkdir() + + (skill_dir / "SKILL.md").write_text("# Clean Skill\n\nNo issues here.") + + analyzer = QualityAnalyzer(skill_dir) + score = analyzer.analyze_accuracy() + + assert score == 100 # Perfect score + + +def test_accuracy_with_todos(): + """Test accuracy detects TODO markers.""" + with tempfile.TemporaryDirectory() as tmpdir: + skill_dir = Path(tmpdir) / "todo_skill" + skill_dir.mkdir() + + (skill_dir / "SKILL.md").write_text("# Skill\n\nTODO: Add content\nTODO: Fix this") + + analyzer = QualityAnalyzer(skill_dir) + score = analyzer.analyze_accuracy() + + assert score < 100 # Deducted for TODOs + + +def test_accuracy_with_placeholder(): + """Test accuracy detects placeholder text.""" + with tempfile.TemporaryDirectory() as tmpdir: + skill_dir = Path(tmpdir) / "placeholder_skill" + skill_dir.mkdir() + + (skill_dir / "SKILL.md").write_text("# Skill\n\nLorem ipsum dolor sit amet") + + analyzer = QualityAnalyzer(skill_dir) + score = analyzer.analyze_accuracy() + + assert score < 100 # Deducted for placeholder + + +def test_coverage_high(complete_skill_dir): + """Test coverage analysis with good coverage.""" + analyzer = QualityAnalyzer(complete_skill_dir) + score = analyzer.analyze_coverage() + + assert score >= 60 # Should have decent coverage + + +def test_coverage_low(): + """Test coverage analysis with low coverage.""" + with tempfile.TemporaryDirectory() as tmpdir: + skill_dir = Path(tmpdir) / "low_coverage" + skill_dir.mkdir() + + (skill_dir / "SKILL.md").write_text("# Skill") + + analyzer = QualityAnalyzer(skill_dir) + score = analyzer.analyze_coverage() + + assert score < 50 # Low coverage + + +def test_health_good(): + """Test health analysis with healthy skill.""" + with tempfile.TemporaryDirectory() as tmpdir: + skill_dir = Path(tmpdir) / "healthy_skill" + skill_dir.mkdir() + + (skill_dir / "SKILL.md").write_text("# Healthy Skill\n\nGood content") + + refs_dir = skill_dir / "references" + refs_dir.mkdir() + + analyzer = QualityAnalyzer(skill_dir) + score = analyzer.analyze_health() + + assert score >= 80 # Healthy + + +def test_health_empty_files(): + """Test health detects empty files.""" + with tempfile.TemporaryDirectory() as tmpdir: + skill_dir = Path(tmpdir) / "empty_files" + skill_dir.mkdir() + + (skill_dir / "SKILL.md").write_text("") # Empty + + analyzer = QualityAnalyzer(skill_dir) + score = analyzer.analyze_health() + + assert score < 100 # Deducted for empty file + + +def test_calculate_statistics(complete_skill_dir): + """Test statistics calculation.""" + analyzer = QualityAnalyzer(complete_skill_dir) + stats = analyzer.calculate_statistics() + + assert stats['total_files'] > 0 + assert stats['markdown_files'] > 0 + assert stats['total_words'] > 0 + + +def test_overall_score_calculation(): + """Test overall score calculation.""" + with tempfile.TemporaryDirectory() as tmpdir: + skill_dir = Path(tmpdir) / "test_skill" + skill_dir.mkdir() + + (skill_dir / "SKILL.md").write_text("# Test Skill\n\nContent") + + analyzer = QualityAnalyzer(skill_dir) + + # Manually set scores + completeness = 80.0 + accuracy = 90.0 + coverage = 70.0 + health = 85.0 + + overall = analyzer.calculate_overall_score( + completeness, accuracy, coverage, health + ) + + assert overall.completeness == 80.0 + assert overall.accuracy == 90.0 + assert overall.coverage == 70.0 + assert overall.health == 85.0 + assert 70 <= overall.total_score <= 90 # Weighted average + + +def test_grade_assignment(): + """Test grade assignment based on score.""" + with tempfile.TemporaryDirectory() as tmpdir: + skill_dir = Path(tmpdir) / "test_skill" + skill_dir.mkdir() + + analyzer = QualityAnalyzer(skill_dir) + + # Test various scores + score_95 = analyzer.calculate_overall_score(95, 95, 95, 95) + assert score_95.grade == 'A+' + + score_85 = analyzer.calculate_overall_score(85, 85, 85, 85) + assert score_85.grade in ['A-', 'B+'] + + score_70 = analyzer.calculate_overall_score(70, 70, 70, 70) + assert score_70.grade in ['B-', 'C+', 'C'] + + +def test_generate_recommendations(): + """Test recommendation generation.""" + with tempfile.TemporaryDirectory() as tmpdir: + skill_dir = Path(tmpdir) / "test_skill" + skill_dir.mkdir() + + analyzer = QualityAnalyzer(skill_dir) + + # Low completeness + score = analyzer.calculate_overall_score(60, 80, 70, 80) + recommendations = analyzer.generate_recommendations(score) + + assert len(recommendations) > 0 + assert any('completeness' in r.lower() for r in recommendations) + + +def test_generate_report(complete_skill_dir): + """Test full report generation.""" + analyzer = QualityAnalyzer(complete_skill_dir) + report = analyzer.generate_report() + + assert report.skill_name == "complete_skill" + assert report.overall_score is not None + assert len(report.metrics) == 4 # 4 analyses + assert len(report.statistics) > 0 + assert report.timestamp is not None + + +def test_format_report(complete_skill_dir): + """Test report formatting.""" + analyzer = QualityAnalyzer(complete_skill_dir) + report = analyzer.generate_report() + formatted = analyzer.format_report(report) + + assert "QUALITY METRICS DASHBOARD" in formatted + assert "OVERALL SCORE" in formatted + assert "COMPONENT SCORES" in formatted + + # RECOMMENDATIONS only appears if there are recommendations + if report.recommendations: + assert "RECOMMENDATIONS" in formatted + + +def test_metric_levels(): + """Test metric level assignment.""" + with tempfile.TemporaryDirectory() as tmpdir: + skill_dir = Path(tmpdir) / "test_skill" + skill_dir.mkdir() + + (skill_dir / "SKILL.md").write_text("# Test") + + analyzer = QualityAnalyzer(skill_dir) + analyzer.analyze_completeness() + + assert len(analyzer.metrics) > 0 + assert analyzer.metrics[0].level in [MetricLevel.INFO, MetricLevel.WARNING] + + +def test_empty_skill_directory(): + """Test handling empty skill directory.""" + with tempfile.TemporaryDirectory() as tmpdir: + empty_dir = Path(tmpdir) / "empty" + empty_dir.mkdir() + + analyzer = QualityAnalyzer(empty_dir) + report = analyzer.generate_report() + + assert report.overall_score.total_score < 50 # Very low score + + +def test_metric_suggestions(): + """Test metrics include suggestions.""" + with tempfile.TemporaryDirectory() as tmpdir: + skill_dir = Path(tmpdir) / "incomplete_skill" + skill_dir.mkdir() + + # Minimal content to trigger suggestions + (skill_dir / "SKILL.md").write_text("# Minimal") + + analyzer = QualityAnalyzer(skill_dir) + analyzer.analyze_completeness() + + # Should have suggestions + assert len(analyzer.metrics) > 0 + if analyzer.metrics[0].value < 100: + assert len(analyzer.metrics[0].suggestions) > 0 + + +if __name__ == "__main__": + pytest.main([__file__, "-v"])