feat: C3.5 - Architectural Overview & Skill Integrator

Implements comprehensive integration of ALL C3.x codebase analysis features into unified skills, transforming basic GitHub scraping into comprehensive codebase intelligence with architectural insights. **What C3.5 Does:** - Generates comprehensive ARCHITECTURE.md with 8 sections - Integrates ALL C3.x outputs (patterns, examples, guides, configs, architecture) - Defaults to ON for GitHub sources with local_repo_path - Adds --skip-codebase-analysis CLI flag **ARCHITECTURE.md Sections:** 1. Overview - Project description 2. Architectural Patterns (C3.7) - MVC, MVVM, Clean Architecture, etc. 3. Technology Stack - Frameworks, libraries, languages 4. Design Patterns (C3.1) - Factory, Singleton, Observer, etc. 5. Configuration Overview (C3.4) - Config files with security warnings 6. Common Workflows (C3.3) - How-to guides summary 7. Usage Examples (C3.2) - Test examples statistics 8. Entry Points & Directory Structure - File organization **Directory Structure:** output/{name}/references/codebase_analysis/ ├── ARCHITECTURE.md (main deliverable) ├── patterns/ (C3.1 design patterns) ├── examples/ (C3.2 test examples) ├── guides/ (C3.3 how-to tutorials) ├── configuration/ (C3.4 config patterns) └── architecture_details/ (C3.7 architectural patterns) **Key Features:** - Default ON: enable_codebase_analysis=true when local_repo_path exists - CLI flag: --skip-codebase-analysis to disable - Enhanced SKILL.md with Architecture & Code Analysis summary - Graceful degradation on C3.x failures - New config properties: enable_codebase_analysis, ai_mode **Changes:** - unified_scraper.py: Added _run_c3_analysis(), modified _scrape_github(), CLI flag - unified_skill_builder.py: Added 7 methods for C3.x generation + SKILL.md enhancement - config_validator.py: Added validation for C3.x properties - Updated 5 configs: react, django, fastapi, godot, svelte-cli - Added 9 integration tests in test_c3_integration.py - Updated CHANGELOG.md with complete C3.5 documentation **Related:** - Closes #75 - Creates #238 (type: "local" support - separate task) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-04 22:03:46 +03:00
parent 1298f7bd57
commit 9e772351fe
11 changed files with 1091 additions and 6 deletions
--- a/src/skill_seekers/cli/unified_skill_builder.py
+++ b/src/skill_seekers/cli/unified_skill_builder.py
@@ -108,6 +108,11 @@ This skill combines knowledge from multiple sources:
            elif source_type == 'pdf':
                content += f"- ✅ **PDF Document**: {source.get('path', 'N/A')}\n"

+        # C3.x Architecture & Code Analysis section (if available)
+        github_data = self.scraped_data.get('github', {}).get('data', {})
+        if github_data.get('c3_analysis'):
+            content += self._format_c3_summary_section(github_data['c3_analysis'])
+
        # Data quality section
        if self.conflicts:
            content += f"\n## ⚠️ Data Quality\n\n"
@@ -282,6 +287,11 @@ This skill combines knowledge from multiple sources:
        if self.merged_data:
            self._generate_merged_api_reference()

+        # Generate C3.x codebase analysis references if available
+        github_data = self.scraped_data.get('github', {}).get('data', {})
+        if github_data.get('c3_analysis'):
+            self._generate_c3_analysis_references()
+
    def _generate_docs_references(self):
        """Generate references from documentation source."""
        docs_dir = os.path.join(self.skill_dir, 'references', 'documentation')
@@ -401,6 +411,463 @@ This skill combines knowledge from multiple sources:

        logger.info(f"Created merged API reference ({len(apis)} APIs)")

+    def _generate_c3_analysis_references(self):
+        """Generate codebase analysis references (C3.5)."""
+        github_data = self.scraped_data.get('github', {}).get('data', {})
+        c3_data = github_data.get('c3_analysis')
+
+        if not c3_data:
+            return
+
+        # Create main directory
+        c3_dir = os.path.join(self.skill_dir, 'references', 'codebase_analysis')
+        os.makedirs(c3_dir, exist_ok=True)
+
+        logger.info("Generating C3.x codebase analysis references...")
+
+        # Generate ARCHITECTURE.md (main deliverable)
+        self._generate_architecture_overview(c3_dir, c3_data)
+
+        # Generate subdirectories for each C3.x component
+        self._generate_pattern_references(c3_dir, c3_data.get('patterns'))
+        self._generate_example_references(c3_dir, c3_data.get('test_examples'))
+        self._generate_guide_references(c3_dir, c3_data.get('how_to_guides'))
+        self._generate_config_references(c3_dir, c3_data.get('config_patterns'))
+        self._copy_architecture_details(c3_dir, c3_data.get('architecture'))
+
+        logger.info("✅ Created codebase analysis references")
+
+    def _generate_architecture_overview(self, c3_dir: str, c3_data: Dict):
+        """Generate comprehensive ARCHITECTURE.md (C3.5 main deliverable)."""
+        arch_path = os.path.join(c3_dir, 'ARCHITECTURE.md')
+
+        with open(arch_path, 'w', encoding='utf-8') as f:
+            f.write(f"# {self.name.title()} Architecture Overview\n\n")
+            f.write("*Generated from C3.x automated codebase analysis*\n\n")
+
+            # Section 1: Overview
+            f.write("## 1. Overview\n\n")
+            f.write(f"{self.description}\n\n")
+
+            # Section 2: Architectural Patterns (C3.7)
+            if c3_data.get('architecture'):
+                arch = c3_data['architecture']
+                patterns = arch.get('patterns', [])
+                if patterns:
+                    f.write("## 2. Architectural Patterns\n\n")
+                    f.write("*Detected architectural patterns from codebase structure*\n\n")
+                    for pattern in patterns[:5]:  # Top 5 patterns
+                        f.write(f"### {pattern['pattern_name']}\n\n")
+                        f.write(f"- **Confidence**: {pattern['confidence']:.2f}\n")
+                        if pattern.get('framework'):
+                            f.write(f"- **Framework**: {pattern['framework']}\n")
+                        if pattern.get('evidence'):
+                            f.write(f"- **Evidence**: {', '.join(pattern['evidence'][:3])}\n")
+                        f.write("\n")
+
+            # Section 3: Technology Stack
+            if c3_data.get('architecture'):
+                f.write("## 3. Technology Stack\n\n")
+                frameworks = c3_data['architecture'].get('frameworks_detected', [])
+                if frameworks:
+                    f.write("**Frameworks & Libraries**:\n")
+                    for fw in frameworks[:10]:
+                        f.write(f"- {fw}\n")
+                    f.write("\n")
+
+                # Add language info if available
+                languages = c3_data['architecture'].get('languages', {})
+                if languages:
+                    f.write("**Languages Detected**:\n")
+                    for lang, count in sorted(languages.items(), key=lambda x: x[1], reverse=True)[:5]:
+                        f.write(f"- {lang}: {count} files\n")
+                    f.write("\n")
+
+            # Section 4: Design Patterns (C3.1)
+            if c3_data.get('patterns'):
+                f.write("## 4. Design Patterns\n\n")
+                f.write("*Classic design patterns identified in the codebase*\n\n")
+
+                # Summarize pattern types
+                pattern_summary = {}
+                for file_data in c3_data['patterns']:
+                    for pattern in file_data.get('patterns', []):
+                        ptype = pattern['pattern_type']
+                        pattern_summary[ptype] = pattern_summary.get(ptype, 0) + 1
+
+                if pattern_summary:
+                    for ptype, count in sorted(pattern_summary.items(), key=lambda x: x[1], reverse=True):
+                        f.write(f"- **{ptype}**: {count} instance(s)\n")
+                    f.write(f"\n📁 See `references/codebase_analysis/patterns/` for detailed analysis.\n\n")
+                else:
+                    f.write("*No design patterns detected.*\n\n")
+
+            # Section 5: Configuration Overview (C3.4)
+            if c3_data.get('config_patterns'):
+                f.write("## 5. Configuration Overview\n\n")
+                config = c3_data['config_patterns']
+                config_files = config.get('config_files', [])
+
+                if config_files:
+                    f.write(f"**{len(config_files)} configuration file(s) detected**:\n\n")
+                    for cf in config_files[:10]:  # Top 10
+                        f.write(f"- **`{cf['relative_path']}`**: {cf['config_type']}\n")
+                        if cf.get('purpose'):
+                            f.write(f"  - Purpose: {cf['purpose']}\n")
+
+                    # Add security warnings if available
+                    if config.get('ai_enhancements'):
+                        insights = config['ai_enhancements'].get('overall_insights', {})
+                        security_issues = insights.get('security_issues_found', 0)
+                        if security_issues > 0:
+                            f.write(f"\n🔐 **Security Alert**: {security_issues} potential security issue(s) found in configurations.\n")
+                            if insights.get('recommended_actions'):
+                                f.write("\n**Recommended Actions**:\n")
+                                for action in insights['recommended_actions'][:5]:
+                                    f.write(f"- {action}\n")
+                    f.write(f"\n📁 See `references/codebase_analysis/configuration/` for details.\n\n")
+                else:
+                    f.write("*No configuration files detected.*\n\n")
+
+            # Section 6: Common Workflows (C3.3)
+            if c3_data.get('how_to_guides'):
+                f.write("## 6. Common Workflows\n\n")
+                guides = c3_data['how_to_guides'].get('guides', [])
+
+                if guides:
+                    f.write(f"**{len(guides)} how-to guide(s) extracted from codebase**:\n\n")
+                    for guide in guides[:10]:  # Top 10
+                        f.write(f"- {guide.get('title', 'Untitled Guide')}\n")
+                    f.write(f"\n📁 See `references/codebase_analysis/guides/` for detailed tutorials.\n\n")
+                else:
+                    f.write("*No workflow guides extracted.*\n\n")
+
+            # Section 7: Usage Examples (C3.2)
+            if c3_data.get('test_examples'):
+                f.write("## 7. Usage Examples\n\n")
+                examples = c3_data['test_examples']
+                total = examples.get('total_examples', 0)
+                high_value = examples.get('high_value_count', 0)
+
+                if total > 0:
+                    f.write(f"**{total} usage example(s) extracted from tests**:\n")
+                    f.write(f"- High-value examples: {high_value}\n")
+
+                    # Category breakdown
+                    if examples.get('examples_by_category'):
+                        f.write("\n**By Category**:\n")
+                        for cat, count in sorted(examples['examples_by_category'].items(), key=lambda x: x[1], reverse=True):
+                            f.write(f"- {cat}: {count}\n")
+
+                    f.write(f"\n📁 See `references/codebase_analysis/examples/` for code samples.\n\n")
+                else:
+                    f.write("*No test examples extracted.*\n\n")
+
+            # Section 8: Entry Points & Directory Structure
+            f.write("## 8. Entry Points & Directory Structure\n\n")
+            f.write("*Analysis based on codebase organization*\n\n")
+
+            if c3_data.get('architecture'):
+                dir_struct = c3_data['architecture'].get('directory_structure', {})
+                if dir_struct:
+                    f.write("**Main Directories**:\n")
+                    for dir_name, file_count in sorted(dir_struct.items(), key=lambda x: x[1], reverse=True)[:15]:
+                        f.write(f"- `{dir_name}/`: {file_count} file(s)\n")
+                    f.write("\n")
+
+            # Footer
+            f.write("---\n\n")
+            f.write("*This architecture overview was automatically generated by C3.x codebase analysis.*\n")
+            f.write("*Last updated: skill build time*\n")
+
+        logger.info(f"📐 Created ARCHITECTURE.md")
+
+    def _generate_pattern_references(self, c3_dir: str, patterns_data: Dict):
+        """Generate design pattern references (C3.1)."""
+        if not patterns_data:
+            return
+
+        patterns_dir = os.path.join(c3_dir, 'patterns')
+        os.makedirs(patterns_dir, exist_ok=True)
+
+        # Save JSON data
+        json_path = os.path.join(patterns_dir, 'detected_patterns.json')
+        with open(json_path, 'w', encoding='utf-8') as f:
+            json.dump(patterns_data, f, indent=2, ensure_ascii=False)
+
+        # Create summary markdown
+        md_path = os.path.join(patterns_dir, 'index.md')
+        with open(md_path, 'w', encoding='utf-8') as f:
+            f.write("# Design Patterns\n\n")
+            f.write("*Detected patterns from C3.1 analysis*\n\n")
+
+            for file_data in patterns_data:
+                patterns = file_data.get('patterns', [])
+                if patterns:
+                    f.write(f"## {file_data['file_path']}\n\n")
+                    for p in patterns:
+                        f.write(f"### {p['pattern_type']}\n\n")
+                        if p.get('class_name'):
+                            f.write(f"- **Class**: `{p['class_name']}`\n")
+                        if p.get('confidence'):
+                            f.write(f"- **Confidence**: {p['confidence']:.2f}\n")
+                        if p.get('indicators'):
+                            f.write(f"- **Indicators**: {', '.join(p['indicators'][:3])}\n")
+                        f.write("\n")
+
+        logger.info(f"   ✓ Design patterns: {len(patterns_data)} files")
+
+    def _generate_example_references(self, c3_dir: str, examples_data: Dict):
+        """Generate test example references (C3.2)."""
+        if not examples_data:
+            return
+
+        examples_dir = os.path.join(c3_dir, 'examples')
+        os.makedirs(examples_dir, exist_ok=True)
+
+        # Save JSON data
+        json_path = os.path.join(examples_dir, 'test_examples.json')
+        with open(json_path, 'w', encoding='utf-8') as f:
+            json.dump(examples_data, f, indent=2, ensure_ascii=False)
+
+        # Create summary markdown
+        md_path = os.path.join(examples_dir, 'index.md')
+        with open(md_path, 'w', encoding='utf-8') as f:
+            f.write("# Usage Examples\n\n")
+            f.write("*Extracted from test files (C3.2)*\n\n")
+
+            total = examples_data.get('total_examples', 0)
+            high_value = examples_data.get('high_value_count', 0)
+
+            f.write(f"**Total Examples**: {total}\n")
+            f.write(f"**High-Value Examples**: {high_value}\n\n")
+
+            # List high-value examples
+            examples = examples_data.get('examples', [])
+            high_value_examples = [e for e in examples if e.get('confidence', 0) > 0.7]
+
+            if high_value_examples:
+                f.write("## High-Value Examples\n\n")
+                for ex in high_value_examples[:20]:  # Top 20
+                    f.write(f"### {ex.get('description', 'Example')}\n\n")
+                    f.write(f"- **Category**: {ex.get('category', 'unknown')}\n")
+                    f.write(f"- **Confidence**: {ex.get('confidence', 0):.2f}\n")
+                    f.write(f"- **File**: `{ex.get('file_path', 'N/A')}`\n")
+                    if ex.get('code_snippet'):
+                        f.write(f"\n```python\n{ex['code_snippet'][:300]}\n```\n")
+                    f.write("\n")
+
+        logger.info(f"   ✓ Test examples: {total} total, {high_value} high-value")
+
+    def _generate_guide_references(self, c3_dir: str, guides_data: Dict):
+        """Generate how-to guide references (C3.3)."""
+        if not guides_data:
+            return
+
+        guides_dir = os.path.join(c3_dir, 'guides')
+        os.makedirs(guides_dir, exist_ok=True)
+
+        # Save JSON collection data
+        json_path = os.path.join(guides_dir, 'guide_collection.json')
+        with open(json_path, 'w', encoding='utf-8') as f:
+            json.dump(guides_data, f, indent=2, ensure_ascii=False)
+
+        guides = guides_data.get('guides', [])
+
+        # Create index
+        md_path = os.path.join(guides_dir, 'index.md')
+        with open(md_path, 'w', encoding='utf-8') as f:
+            f.write("# How-To Guides\n\n")
+            f.write("*Workflow tutorials extracted from codebase (C3.3)*\n\n")
+
+            f.write(f"**Total Guides**: {len(guides)}\n\n")
+
+            if guides:
+                f.write("## Available Guides\n\n")
+                for guide in guides:
+                    f.write(f"- [{guide.get('title', 'Untitled')}](guide_{guide.get('id', 'unknown')}.md)\n")
+                f.write("\n")
+
+        # Save individual guide markdown files
+        for guide in guides:
+            guide_id = guide.get('id', 'unknown')
+            guide_path = os.path.join(guides_dir, f"guide_{guide_id}.md")
+
+            with open(guide_path, 'w', encoding='utf-8') as f:
+                f.write(f"# {guide.get('title', 'Untitled Guide')}\n\n")
+
+                if guide.get('description'):
+                    f.write(f"{guide['description']}\n\n")
+
+                steps = guide.get('steps', [])
+                if steps:
+                    f.write("## Steps\n\n")
+                    for i, step in enumerate(steps, 1):
+                        f.write(f"### {i}. {step.get('action', 'Step')}\n\n")
+                        if step.get('code_example'):
+                            lang = step.get('language', 'python')
+                            f.write(f"```{lang}\n{step['code_example']}\n```\n\n")
+                        if step.get('explanation'):
+                            f.write(f"{step['explanation']}\n\n")
+
+        logger.info(f"   ✓ How-to guides: {len(guides)}")
+
+    def _generate_config_references(self, c3_dir: str, config_data: Dict):
+        """Generate configuration pattern references (C3.4)."""
+        if not config_data:
+            return
+
+        config_dir = os.path.join(c3_dir, 'configuration')
+        os.makedirs(config_dir, exist_ok=True)
+
+        # Save JSON data
+        json_path = os.path.join(config_dir, 'config_patterns.json')
+        with open(json_path, 'w', encoding='utf-8') as f:
+            json.dump(config_data, f, indent=2, ensure_ascii=False)
+
+        # Create summary markdown
+        md_path = os.path.join(config_dir, 'index.md')
+        config_files = config_data.get('config_files', [])
+
+        with open(md_path, 'w', encoding='utf-8') as f:
+            f.write("# Configuration Patterns\n\n")
+            f.write("*Detected configuration files (C3.4)*\n\n")
+
+            f.write(f"**Total Config Files**: {len(config_files)}\n\n")
+
+            if config_files:
+                f.write("## Configuration Files\n\n")
+                for cf in config_files:
+                    f.write(f"### `{cf['relative_path']}`\n\n")
+                    f.write(f"- **Type**: {cf['config_type']}\n")
+                    f.write(f"- **Purpose**: {cf.get('purpose', 'N/A')}\n")
+                    f.write(f"- **Settings**: {len(cf.get('settings', []))}\n")
+
+                    # Show AI enhancements if available
+                    if cf.get('ai_enhancement'):
+                        enh = cf['ai_enhancement']
+                        if enh.get('security_concern'):
+                            f.write(f"- **Security**: {enh['security_concern']}\n")
+                        if enh.get('best_practice'):
+                            f.write(f"- **Best Practice**: {enh['best_practice']}\n")
+
+                    f.write("\n")
+
+                # Overall insights
+                if config_data.get('ai_enhancements'):
+                    insights = config_data['ai_enhancements'].get('overall_insights', {})
+                    if insights:
+                        f.write("## Overall Insights\n\n")
+                        if insights.get('security_issues_found'):
+                            f.write(f"🔐 **Security Issues**: {insights['security_issues_found']}\n\n")
+                        if insights.get('recommended_actions'):
+                            f.write("**Recommended Actions**:\n")
+                            for action in insights['recommended_actions']:
+                                f.write(f"- {action}\n")
+                            f.write("\n")
+
+        logger.info(f"   ✓ Configuration files: {len(config_files)}")
+
+    def _copy_architecture_details(self, c3_dir: str, arch_data: Dict):
+        """Copy architectural pattern JSON details (C3.7)."""
+        if not arch_data:
+            return
+
+        arch_dir = os.path.join(c3_dir, 'architecture_details')
+        os.makedirs(arch_dir, exist_ok=True)
+
+        # Save full JSON data
+        json_path = os.path.join(arch_dir, 'architectural_patterns.json')
+        with open(json_path, 'w', encoding='utf-8') as f:
+            json.dump(arch_data, f, indent=2, ensure_ascii=False)
+
+        # Create summary markdown
+        md_path = os.path.join(arch_dir, 'index.md')
+        with open(md_path, 'w', encoding='utf-8') as f:
+            f.write("# Architectural Patterns (Detailed)\n\n")
+            f.write("*Comprehensive architectural analysis (C3.7)*\n\n")
+
+            patterns = arch_data.get('patterns', [])
+            if patterns:
+                f.write("## Detected Patterns\n\n")
+                for p in patterns:
+                    f.write(f"### {p['pattern_name']}\n\n")
+                    f.write(f"- **Confidence**: {p['confidence']:.2f}\n")
+                    if p.get('framework'):
+                        f.write(f"- **Framework**: {p['framework']}\n")
+                    if p.get('evidence'):
+                        f.write(f"- **Evidence**:\n")
+                        for e in p['evidence'][:5]:
+                            f.write(f"  - {e}\n")
+                    f.write("\n")
+
+        logger.info(f"   ✓ Architectural details: {len(patterns)} patterns")
+
+    def _format_c3_summary_section(self, c3_data: Dict) -> str:
+        """Format C3.x analysis summary for SKILL.md."""
+        content = "\n## 🏗️ Architecture & Code Analysis\n\n"
+        content += "*This skill includes comprehensive codebase analysis*\n\n"
+
+        # Add architectural pattern summary
+        if c3_data.get('architecture'):
+            patterns = c3_data['architecture'].get('patterns', [])
+            if patterns:
+                top_pattern = patterns[0]
+                content += f"**Primary Architecture**: {top_pattern['pattern_name']}"
+                if top_pattern.get('framework'):
+                    content += f" ({top_pattern['framework']})"
+                content += f" - Confidence: {top_pattern['confidence']:.0%}\n\n"
+
+        # Add design patterns summary
+        if c3_data.get('patterns'):
+            total_patterns = sum(len(f.get('patterns', [])) for f in c3_data['patterns'])
+            if total_patterns > 0:
+                content += f"**Design Patterns**: {total_patterns} detected\n"
+
+                # Show top 3 pattern types
+                pattern_summary = {}
+                for file_data in c3_data['patterns']:
+                    for pattern in file_data.get('patterns', []):
+                        ptype = pattern['pattern_type']
+                        pattern_summary[ptype] = pattern_summary.get(ptype, 0) + 1
+
+                top_patterns = sorted(pattern_summary.items(), key=lambda x: x[1], reverse=True)[:3]
+                if top_patterns:
+                    content += f"- Top patterns: {', '.join([f'{p[0]} ({p[1]})' for p in top_patterns])}\n"
+                content += "\n"
+
+        # Add test examples summary
+        if c3_data.get('test_examples'):
+            total = c3_data['test_examples'].get('total_examples', 0)
+            high_value = c3_data['test_examples'].get('high_value_count', 0)
+            if total > 0:
+                content += f"**Usage Examples**: {total} extracted from tests ({high_value} high-value)\n\n"
+
+        # Add how-to guides summary
+        if c3_data.get('how_to_guides'):
+            guide_count = len(c3_data['how_to_guides'].get('guides', []))
+            if guide_count > 0:
+                content += f"**How-To Guides**: {guide_count} workflow tutorials\n\n"
+
+        # Add configuration summary
+        if c3_data.get('config_patterns'):
+            config_files = c3_data['config_patterns'].get('config_files', [])
+            if config_files:
+                content += f"**Configuration Files**: {len(config_files)} analyzed\n"
+
+                # Add security warning if present
+                if c3_data['config_patterns'].get('ai_enhancements'):
+                    insights = c3_data['config_patterns']['ai_enhancements'].get('overall_insights', {})
+                    security_issues = insights.get('security_issues_found', 0)
+                    if security_issues > 0:
+                        content += f"- 🔐 **Security Alert**: {security_issues} issue(s) detected\n"
+                content += "\n"
+
+        # Add link to ARCHITECTURE.md
+        content += "📖 **See** `references/codebase_analysis/ARCHITECTURE.md` for complete architectural overview.\n\n"
+
+        return content
+
    def _generate_conflicts_report(self):
        """Generate detailed conflicts report."""
        conflicts_path = os.path.join(self.skill_dir, 'references', 'conflicts.md')