diff --git a/src/skill_seekers/cli/codebase_scraper.py b/src/skill_seekers/cli/codebase_scraper.py index e1ed334..b811e21 100644 --- a/src/skill_seekers/cli/codebase_scraper.py +++ b/src/skill_seekers/cli/codebase_scraper.py @@ -1006,18 +1006,49 @@ def analyze_codebase( logger.warning(f"Pattern detection failed for {file_path}: {e}") continue - # Save pattern results + # Save pattern results with multi-level filtering (Issue #240) if pattern_results: pattern_output = output_dir / "patterns" pattern_output.mkdir(parents=True, exist_ok=True) - pattern_json = pattern_output / "detected_patterns.json" - with open(pattern_json, "w", encoding="utf-8") as f: + # Import filtering utilities + from skill_seekers.cli.pattern_recognizer import create_multi_level_report + + # Create multi-level report + multi_level = create_multi_level_report(pattern_results) + stats = multi_level["statistics"] + + # Save all patterns (unfiltered) + all_patterns_json = pattern_output / "all_patterns.json" + with open(all_patterns_json, "w", encoding="utf-8") as f: json.dump(pattern_results, f, indent=2) - total_patterns = sum(len(r["patterns"]) for r in pattern_results) - logger.info(f"✅ Detected {total_patterns} patterns in {len(pattern_results)} files") - logger.info(f"📁 Saved to: {pattern_json}") + # Save high-confidence patterns (>= 0.70) for detailed analysis + high_confidence_json = pattern_output / "high_confidence_patterns.json" + with open(high_confidence_json, "w", encoding="utf-8") as f: + json.dump(multi_level["high_confidence"], f, indent=2) + + # Save critical patterns (>= 0.80) for ARCHITECTURE.md + critical_json = pattern_output / "critical_patterns.json" + with open(critical_json, "w", encoding="utf-8") as f: + json.dump(multi_level["critical"], f, indent=2) + + # Save summary statistics + summary_json = pattern_output / "summary.json" + with open(summary_json, "w", encoding="utf-8") as f: + json.dump({ + "statistics": stats, + "thresholds": multi_level["thresholds"], + "files_analyzed": len(pattern_results), + }, f, indent=2) + + # Log results with breakdown by confidence + logger.info(f"✅ Detected {stats['total']} patterns in {len(pattern_results)} files") + logger.info(f" 🔴 Critical (≥0.80): {stats['critical_count']} patterns") + logger.info(f" 🟠 High (≥0.70): {stats['high_confidence_count']} patterns") + logger.info(f" 🟡 Medium (≥0.60): {stats['medium_count']} patterns") + logger.info(f" ⚪ Low (<0.60): {stats['low_count']} patterns") + logger.info(f"📁 Saved to: {pattern_output}/") else: logger.info("No design patterns detected") diff --git a/src/skill_seekers/cli/pattern_recognizer.py b/src/skill_seekers/cli/pattern_recognizer.py index 518569c..5664abd 100644 --- a/src/skill_seekers/cli/pattern_recognizer.py +++ b/src/skill_seekers/cli/pattern_recognizer.py @@ -28,6 +28,17 @@ from pathlib import Path logger = logging.getLogger(__name__) +# Confidence thresholds for pattern filtering (Issue #240) +CONFIDENCE_THRESHOLDS = { + 'critical': 0.80, # High-confidence patterns for ARCHITECTURE.md + 'high': 0.70, # Include in detailed analysis + 'medium': 0.60, # Include with warning/context + 'low': 0.50, # Minimum detection threshold +} + +# Default minimum confidence for pattern detection +DEFAULT_MIN_CONFIDENCE = CONFIDENCE_THRESHOLDS['low'] + @dataclass class PatternInstance: @@ -1636,6 +1647,76 @@ class LanguageAdapter: return pattern +# ============================================================================ +# PATTERN FILTERING UTILITIES (Issue #240 - C4.2) +# ============================================================================ + + +def filter_patterns_by_confidence(patterns: list[dict], min_confidence: float) -> list[dict]: + """ + Filter patterns by minimum confidence threshold. + + Args: + patterns: List of pattern dictionaries (from PatternReport.to_dict()) + min_confidence: Minimum confidence threshold (0.0-1.0) + + Returns: + Filtered list of patterns meeting the threshold + """ + filtered = [] + for pattern in patterns: + if pattern.get("confidence", 0.0) >= min_confidence: + filtered.append(pattern) + return filtered + + +def create_multi_level_report(pattern_results: list[dict]) -> dict: + """ + Create multi-level pattern report with different confidence thresholds. + + Args: + pattern_results: List of PatternReport dictionaries + + Returns: + Dictionary with patterns grouped by confidence level: + - all_patterns: All detected patterns + - high_confidence: Patterns >= 0.70 (for detailed analysis) + - critical: Patterns >= 0.80 (for ARCHITECTURE.md) + - statistics: Pattern count by level + """ + # Flatten all patterns from all files + all_patterns = [] + for report in pattern_results: + file_path = report.get("file_path", "unknown") + for pattern in report.get("patterns", []): + # Add file path to pattern for context + pattern_with_file = {**pattern, "file_path": file_path} + all_patterns.append(pattern_with_file) + + # Sort by confidence (highest first) + all_patterns_sorted = sorted(all_patterns, key=lambda p: p.get("confidence", 0.0), reverse=True) + + # Filter by confidence levels + critical = filter_patterns_by_confidence(all_patterns_sorted, CONFIDENCE_THRESHOLDS['critical']) + high_confidence = filter_patterns_by_confidence(all_patterns_sorted, CONFIDENCE_THRESHOLDS['high']) + medium = filter_patterns_by_confidence(all_patterns_sorted, CONFIDENCE_THRESHOLDS['medium']) + + return { + "all_patterns": all_patterns_sorted, + "critical": critical, + "high_confidence": high_confidence, + "medium": medium, + "statistics": { + "total": len(all_patterns_sorted), + "critical_count": len(critical), + "high_confidence_count": len(high_confidence), + "medium_count": len(medium), + "low_count": len(all_patterns_sorted) - len(medium), + }, + "thresholds": CONFIDENCE_THRESHOLDS, + } + + def main(): """ CLI entry point for pattern detection.