#!/usr/bin/env python3 """ AI Enhancement Module for Pattern Detection and Test Examples Enhances C3.1 (Pattern Detection) and C3.2 (Test Example Extraction) with AI analysis. Features: - Explains why patterns were detected - Suggests improvements and identifies issues - Recommends related patterns - Adds context to test examples - Groups related examples into tutorials - Identifies best practices Modes: - API mode: Uses Claude API (requires ANTHROPIC_API_KEY) - LOCAL mode: Uses Claude Code CLI (no API key needed, uses your Claude Max plan) - AUTO mode: Tries API first, falls back to LOCAL Credits: - Uses Claude AI (Anthropic) for analysis - Graceful degradation if API unavailable """ import json import logging import os import subprocess import tempfile from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass from pathlib import Path logger = logging.getLogger(__name__) # Import config manager for settings try: from skill_seekers.cli.config_manager import get_config_manager CONFIG_AVAILABLE = True except ImportError: CONFIG_AVAILABLE = False @dataclass class AIAnalysis: """AI analysis result for patterns or examples""" explanation: str issues: list[str] recommendations: list[str] related_items: list[str] # Related patterns or examples best_practices: list[str] confidence_boost: float # -0.2 to +0.2 adjustment to confidence class AIEnhancer: """Base class for AI enhancement""" def __init__(self, api_key: str | None = None, enabled: bool = True, mode: str = "auto"): """ Initialize AI enhancer. Args: api_key: Anthropic API key (uses ANTHROPIC_API_KEY env if None) enabled: Enable AI enhancement (default: True) mode: Enhancement mode - "auto" (default), "api", or "local" - "auto": Use API if key available, otherwise fall back to LOCAL - "api": Force API mode (fails if no key) - "local": Use Claude Code CLI (no API key needed) """ self.enabled = enabled self.mode = mode self.api_key = api_key or os.environ.get("ANTHROPIC_API_KEY") self.client = None # Get settings from config (with defaults) if CONFIG_AVAILABLE: config = get_config_manager() self.local_batch_size = config.get_local_batch_size() self.local_parallel_workers = config.get_local_parallel_workers() else: self.local_batch_size = 20 # Default self.local_parallel_workers = 3 # Default # Determine actual mode if mode == "auto": if self.api_key: self.mode = "api" else: # Fall back to LOCAL mode (Claude Code CLI) self.mode = "local" logger.info("ℹ️ No API key found, using LOCAL mode (Claude Code CLI)") if self.mode == "api" and self.enabled: try: import anthropic # Support custom base_url for GLM-4.7 and other Claude-compatible APIs client_kwargs = {"api_key": self.api_key} base_url = os.environ.get("ANTHROPIC_BASE_URL") if base_url: client_kwargs["base_url"] = base_url logger.info(f"✅ Using custom API base URL: {base_url}") self.client = anthropic.Anthropic(**client_kwargs) logger.info("✅ AI enhancement enabled (using Claude API)") except ImportError: logger.warning("⚠️ anthropic package not installed, falling back to LOCAL mode") self.mode = "local" except Exception as e: logger.warning(f"⚠️ Failed to initialize API client: {e}, falling back to LOCAL mode") self.mode = "local" if self.mode == "local" and self.enabled: # Verify Claude CLI is available if self._check_claude_cli(): logger.info("✅ AI enhancement enabled (using LOCAL mode - Claude Code CLI)") else: logger.warning("⚠️ Claude Code CLI not found. AI enhancement disabled.") logger.warning(" Install with: npm install -g @anthropic-ai/claude-code") self.enabled = False def _check_claude_cli(self) -> bool: """Check if Claude Code CLI is available""" try: result = subprocess.run( ["claude", "--version"], capture_output=True, text=True, timeout=5, ) return result.returncode == 0 except (FileNotFoundError, subprocess.TimeoutExpired): return False def _call_claude(self, prompt: str, max_tokens: int = 1000) -> str | None: """Call Claude (API or LOCAL mode) with error handling""" if self.mode == "api": return self._call_claude_api(prompt, max_tokens) elif self.mode == "local": return self._call_claude_local(prompt) return None def _call_claude_api(self, prompt: str, max_tokens: int = 1000) -> str | None: """Call Claude API""" if not self.client: return None try: response = self.client.messages.create( model="claude-sonnet-4-20250514", max_tokens=max_tokens, messages=[{"role": "user", "content": prompt}], ) return response.content[0].text except Exception as e: logger.warning(f"⚠️ AI API call failed: {e}") return None def _call_claude_local(self, prompt: str) -> str | None: """Call Claude using LOCAL mode (Claude Code CLI)""" try: # Create a temporary directory for this enhancement with tempfile.TemporaryDirectory(prefix="ai_enhance_") as temp_dir: temp_path = Path(temp_dir) # Create prompt file prompt_file = temp_path / "prompt.md" output_file = temp_path / "response.json" # Write prompt with instructions to output JSON full_prompt = f"""# AI Analysis Task IMPORTANT: You MUST write your response as valid JSON to this file: {output_file} ## Task {prompt} ## Instructions 1. Analyze the input carefully 2. Generate the JSON response as specified 3. Use the Write tool to save the JSON to: {output_file} 4. The JSON must be valid and parseable DO NOT include any explanation - just write the JSON file. """ prompt_file.write_text(full_prompt) # Run Claude CLI result = subprocess.run( ["claude", "--dangerously-skip-permissions", str(prompt_file)], capture_output=True, text=True, timeout=120, # 2 minute timeout per call cwd=str(temp_path), ) if result.returncode != 0: logger.warning(f"⚠️ Claude CLI returned error: {result.returncode}") return None # Read output file if output_file.exists(): response_text = output_file.read_text() # Try to extract JSON from response try: # Validate it's valid JSON json.loads(response_text) return response_text except json.JSONDecodeError: # Try to find JSON in the response import re json_match = re.search(r'\[[\s\S]*\]|\{[\s\S]*\}', response_text) if json_match: return json_match.group() logger.warning("⚠️ Could not parse JSON from LOCAL response") return None else: # Look for any JSON file created for json_file in temp_path.glob("*.json"): if json_file.name != "prompt.json": return json_file.read_text() logger.warning("⚠️ No output file from LOCAL mode") return None except subprocess.TimeoutExpired: logger.warning("⚠️ Claude CLI timeout (2 minutes)") return None except Exception as e: logger.warning(f"⚠️ LOCAL mode error: {e}") return None class PatternEnhancer(AIEnhancer): """Enhance design pattern detection with AI analysis""" def enhance_patterns(self, patterns: list[dict]) -> list[dict]: """ Enhance detected patterns with AI analysis. Args: patterns: List of detected pattern instances Returns: Enhanced patterns with AI analysis """ if not self.enabled or not patterns: return patterns # Use larger batch size for LOCAL mode (configurable) if self.mode == "local": batch_size = self.local_batch_size parallel_workers = self.local_parallel_workers logger.info( f"🤖 Enhancing {len(patterns)} patterns with AI " f"(LOCAL mode: {batch_size} per batch, {parallel_workers} parallel workers)..." ) else: batch_size = 5 # API mode uses smaller batches parallel_workers = 1 # API mode is sequential logger.info(f"🤖 Enhancing {len(patterns)} detected patterns with AI...") # Create batches batches = [] for i in range(0, len(patterns), batch_size): batches.append(patterns[i : i + batch_size]) # Process batches (parallel for LOCAL, sequential for API) if parallel_workers > 1 and len(batches) > 1: enhanced = self._enhance_patterns_parallel(batches, parallel_workers) else: enhanced = [] for batch in batches: batch_results = self._enhance_pattern_batch(batch) enhanced.extend(batch_results) logger.info(f"✅ Enhanced {len(enhanced)} patterns") return enhanced def _enhance_patterns_parallel(self, batches: list[list[dict]], workers: int) -> list[dict]: """Process pattern batches in parallel using ThreadPoolExecutor.""" results = [None] * len(batches) # Preserve order with ThreadPoolExecutor(max_workers=workers) as executor: # Submit all batches future_to_idx = { executor.submit(self._enhance_pattern_batch, batch): idx for idx, batch in enumerate(batches) } # Collect results as they complete completed = 0 total = len(batches) for future in as_completed(future_to_idx): idx = future_to_idx[future] try: results[idx] = future.result() completed += 1 # Show progress: always for small jobs (<10), every 5 for larger jobs if total < 10 or completed % 5 == 0 or completed == total: logger.info(f" Progress: {completed}/{total} batches completed") except Exception as e: logger.warning(f"⚠️ Batch {idx} failed: {e}") results[idx] = batches[idx] # Return unenhanced on failure # Flatten results enhanced = [] for batch_result in results: if batch_result: enhanced.extend(batch_result) return enhanced def _enhance_pattern_batch(self, patterns: list[dict]) -> list[dict]: """Enhance a batch of patterns""" # Prepare prompt pattern_descriptions = [] for idx, p in enumerate(patterns): desc = f"{idx + 1}. {p['pattern_type']} in {p.get('class_name', 'unknown')}" desc += f"\n Evidence: {', '.join(p.get('evidence', []))}" pattern_descriptions.append(desc) prompt = f"""Analyze these detected design patterns and provide insights: {chr(10).join(pattern_descriptions)} For EACH pattern, provide (in JSON format): 1. "explanation": Brief why this pattern was detected (1-2 sentences) 2. "issues": List of potential issues or anti-patterns (if any) 3. "recommendations": Suggestions for improvement (if any) 4. "related_patterns": Other patterns that might be relevant 5. "confidence_boost": Confidence adjustment from -0.2 to +0.2 based on evidence quality Format as JSON array matching input order. Be concise and actionable. """ response = self._call_claude(prompt, max_tokens=2000) if not response: # Return patterns unchanged if API fails return patterns try: analyses = json.loads(response) # Merge AI analysis into patterns for idx, pattern in enumerate(patterns): if idx < len(analyses): analysis = analyses[idx] pattern["ai_analysis"] = { "explanation": analysis.get("explanation", ""), "issues": analysis.get("issues", []), "recommendations": analysis.get("recommendations", []), "related_patterns": analysis.get("related_patterns", []), "confidence_boost": analysis.get("confidence_boost", 0.0), } # Adjust confidence boost = analysis.get("confidence_boost", 0.0) if -0.2 <= boost <= 0.2: pattern["confidence"] = min(1.0, max(0.0, pattern["confidence"] + boost)) return patterns except json.JSONDecodeError: logger.warning("⚠️ Failed to parse AI response, returning patterns unchanged") return patterns except Exception as e: logger.warning(f"⚠️ Error processing AI analysis: {e}") return patterns class TestExampleEnhancer(AIEnhancer): """Enhance test examples with AI analysis""" def enhance_examples(self, examples: list[dict]) -> list[dict]: """ Enhance test examples with AI context and explanations. Args: examples: List of extracted test examples Returns: Enhanced examples with AI analysis """ if not self.enabled or not examples: return examples # Use larger batch size for LOCAL mode (configurable) if self.mode == "local": batch_size = self.local_batch_size parallel_workers = self.local_parallel_workers logger.info( f"🤖 Enhancing {len(examples)} test examples with AI " f"(LOCAL mode: {batch_size} per batch, {parallel_workers} parallel workers)..." ) else: batch_size = 5 # API mode uses smaller batches parallel_workers = 1 # API mode is sequential logger.info(f"🤖 Enhancing {len(examples)} test examples with AI...") # Create batches batches = [] for i in range(0, len(examples), batch_size): batches.append(examples[i : i + batch_size]) # Process batches (parallel for LOCAL, sequential for API) if parallel_workers > 1 and len(batches) > 1: enhanced = self._enhance_examples_parallel(batches, parallel_workers) else: enhanced = [] for batch in batches: batch_results = self._enhance_example_batch(batch) enhanced.extend(batch_results) logger.info(f"✅ Enhanced {len(enhanced)} examples") return enhanced def _enhance_examples_parallel(self, batches: list[list[dict]], workers: int) -> list[dict]: """Process example batches in parallel using ThreadPoolExecutor.""" results = [None] * len(batches) # Preserve order with ThreadPoolExecutor(max_workers=workers) as executor: # Submit all batches future_to_idx = { executor.submit(self._enhance_example_batch, batch): idx for idx, batch in enumerate(batches) } # Collect results as they complete completed = 0 total = len(batches) for future in as_completed(future_to_idx): idx = future_to_idx[future] try: results[idx] = future.result() completed += 1 # Show progress: always for small jobs (<10), every 5 for larger jobs if total < 10 or completed % 5 == 0 or completed == total: logger.info(f" Progress: {completed}/{total} batches completed") except Exception as e: logger.warning(f"⚠️ Batch {idx} failed: {e}") results[idx] = batches[idx] # Return unenhanced on failure # Flatten results enhanced = [] for batch_result in results: if batch_result: enhanced.extend(batch_result) return enhanced def _enhance_example_batch(self, examples: list[dict]) -> list[dict]: """Enhance a batch of examples""" # Prepare prompt example_descriptions = [] for idx, ex in enumerate(examples): desc = f"{idx + 1}. {ex.get('category', 'unknown')} - {ex.get('test_name', 'unknown')}" desc += f"\n Code: {ex.get('code', '')[:100]}..." if ex.get("expected_behavior"): desc += f"\n Expected: {ex['expected_behavior']}" example_descriptions.append(desc) prompt = f"""Analyze these test examples and provide educational context: {chr(10).join(example_descriptions)} For EACH example, provide (in JSON format): 1. "explanation": What this example demonstrates (1-2 sentences, beginner-friendly) 2. "best_practices": List of best practices shown in this example 3. "common_mistakes": Common mistakes this example helps avoid 4. "related_examples": Related test scenarios or patterns 5. "tutorial_group": Suggested tutorial category (e.g., "User Authentication", "Database Operations") Format as JSON array matching input order. Focus on educational value. """ response = self._call_claude(prompt, max_tokens=2000) if not response: return examples try: analyses = json.loads(response) # Merge AI analysis into examples for idx, example in enumerate(examples): if idx < len(analyses): analysis = analyses[idx] example["ai_analysis"] = { "explanation": analysis.get("explanation", ""), "best_practices": analysis.get("best_practices", []), "common_mistakes": analysis.get("common_mistakes", []), "related_examples": analysis.get("related_examples", []), "tutorial_group": analysis.get("tutorial_group", ""), } return examples except json.JSONDecodeError: logger.warning("⚠️ Failed to parse AI response, returning examples unchanged") return examples except Exception as e: logger.warning(f"⚠️ Error processing AI analysis: {e}") return examples def generate_tutorials(self, examples: list[dict]) -> dict[str, list[dict]]: """ Group enhanced examples into tutorial sections. Args: examples: Enhanced examples with AI analysis Returns: Dictionary mapping tutorial groups to examples """ tutorials = {} for example in examples: ai_analysis = example.get("ai_analysis", {}) group = ai_analysis.get("tutorial_group", "Miscellaneous") if group not in tutorials: tutorials[group] = [] tutorials[group].append(example) return tutorials