feat: C3.5 - Architectural Overview & Skill Integrator

Implements comprehensive integration of ALL C3.x codebase analysis features
into unified skills, transforming basic GitHub scraping into comprehensive
codebase intelligence with architectural insights.

**What C3.5 Does:**
- Generates comprehensive ARCHITECTURE.md with 8 sections
- Integrates ALL C3.x outputs (patterns, examples, guides, configs, architecture)
- Defaults to ON for GitHub sources with local_repo_path
- Adds --skip-codebase-analysis CLI flag

**ARCHITECTURE.md Sections:**
1. Overview - Project description
2. Architectural Patterns (C3.7) - MVC, MVVM, Clean Architecture, etc.
3. Technology Stack - Frameworks, libraries, languages
4. Design Patterns (C3.1) - Factory, Singleton, Observer, etc.
5. Configuration Overview (C3.4) - Config files with security warnings
6. Common Workflows (C3.3) - How-to guides summary
7. Usage Examples (C3.2) - Test examples statistics
8. Entry Points & Directory Structure - File organization

**Directory Structure:**
output/{name}/references/codebase_analysis/
├── ARCHITECTURE.md (main deliverable)
├── patterns/ (C3.1 design patterns)
├── examples/ (C3.2 test examples)
├── guides/ (C3.3 how-to tutorials)
├── configuration/ (C3.4 config patterns)
└── architecture_details/ (C3.7 architectural patterns)

**Key Features:**
- Default ON: enable_codebase_analysis=true when local_repo_path exists
- CLI flag: --skip-codebase-analysis to disable
- Enhanced SKILL.md with Architecture & Code Analysis summary
- Graceful degradation on C3.x failures
- New config properties: enable_codebase_analysis, ai_mode

**Changes:**
- unified_scraper.py: Added _run_c3_analysis(), modified _scrape_github(), CLI flag
- unified_skill_builder.py: Added 7 methods for C3.x generation + SKILL.md enhancement
- config_validator.py: Added validation for C3.x properties
- Updated 5 configs: react, django, fastapi, godot, svelte-cli
- Added 9 integration tests in test_c3_integration.py
- Updated CHANGELOG.md with complete C3.5 documentation

**Related:**
- Closes #75
- Creates #238 (type: "local" support - separate task)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
yusyus
2026-01-04 22:03:46 +03:00
parent 1298f7bd57
commit 9e772351fe
11 changed files with 1091 additions and 6 deletions

View File

@@ -18,6 +18,7 @@ import json
import logging
import argparse
import subprocess
import shutil
from pathlib import Path
from typing import Dict, List, Any, Optional
@@ -211,6 +212,23 @@ class UnifiedScraper:
scraper = GitHubScraper(github_config)
github_data = scraper.scrape()
# Run C3.x codebase analysis if enabled and local_repo_path available
enable_codebase_analysis = source.get('enable_codebase_analysis', True)
local_repo_path = source.get('local_repo_path')
if enable_codebase_analysis and local_repo_path:
logger.info("🔬 Running C3.x codebase analysis...")
try:
c3_data = self._run_c3_analysis(local_repo_path, source)
if c3_data:
github_data['c3_analysis'] = c3_data
logger.info("✅ C3.x analysis complete")
else:
logger.warning("⚠️ C3.x analysis returned no data")
except Exception as e:
logger.warning(f"⚠️ C3.x analysis failed: {e}")
# Continue without C3.x data - graceful degradation
# Save data
github_data_file = os.path.join(self.data_dir, 'github_data.json')
with open(github_data_file, 'w', encoding='utf-8') as f:
@@ -257,6 +275,138 @@ class UnifiedScraper:
logger.info(f"✅ PDF: {len(pdf_data.get('pages', []))} pages extracted")
def _load_json(self, file_path: Path) -> Dict:
"""
Load JSON file safely.
Args:
file_path: Path to JSON file
Returns:
Dict with JSON data, or empty dict if file doesn't exist or is invalid
"""
if not file_path.exists():
logger.warning(f"JSON file not found: {file_path}")
return {}
try:
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f)
except (json.JSONDecodeError, IOError) as e:
logger.warning(f"Failed to load JSON {file_path}: {e}")
return {}
def _load_guide_collection(self, tutorials_dir: Path) -> Dict:
"""
Load how-to guide collection from tutorials directory.
Args:
tutorials_dir: Path to tutorials directory
Returns:
Dict with guide collection data
"""
if not tutorials_dir.exists():
logger.warning(f"Tutorials directory not found: {tutorials_dir}")
return {'guides': []}
collection_file = tutorials_dir / 'guide_collection.json'
if collection_file.exists():
return self._load_json(collection_file)
# Fallback: scan for individual guide JSON files
guides = []
for guide_file in tutorials_dir.glob('guide_*.json'):
guide_data = self._load_json(guide_file)
if guide_data:
guides.append(guide_data)
return {'guides': guides, 'total_count': len(guides)}
def _run_c3_analysis(self, local_repo_path: str, source: Dict[str, Any]) -> Dict[str, Any]:
"""
Run comprehensive C3.x codebase analysis.
Calls codebase_scraper.analyze_codebase() with all C3.x features enabled,
loads the results into memory, and cleans up temporary files.
Args:
local_repo_path: Path to local repository
source: GitHub source configuration dict
Returns:
Dict with keys: patterns, test_examples, how_to_guides,
config_patterns, architecture
"""
try:
from skill_seekers.cli.codebase_scraper import analyze_codebase
except ImportError:
logger.error("codebase_scraper.py not found")
return {}
# Create temp output dir for C3.x analysis
temp_output = Path(self.data_dir) / 'c3_analysis_temp'
temp_output.mkdir(parents=True, exist_ok=True)
logger.info(f" Analyzing codebase: {local_repo_path}")
try:
# Run full C3.x analysis
results = analyze_codebase(
directory=Path(local_repo_path),
output_dir=temp_output,
depth='deep',
languages=None, # Analyze all languages
file_patterns=source.get('file_patterns'),
build_api_reference=False, # Not needed in skill
extract_comments=False, # Not needed
build_dependency_graph=False, # Can add later if needed
detect_patterns=True, # C3.1: Design patterns
extract_test_examples=True, # C3.2: Test examples
build_how_to_guides=True, # C3.3: How-to guides
extract_config_patterns=True, # C3.4: Config patterns
enhance_with_ai=source.get('ai_mode', 'auto') != 'none',
ai_mode=source.get('ai_mode', 'auto')
)
# Load C3.x outputs into memory
c3_data = {
'patterns': self._load_json(temp_output / 'patterns' / 'detected_patterns.json'),
'test_examples': self._load_json(temp_output / 'test_examples' / 'test_examples.json'),
'how_to_guides': self._load_guide_collection(temp_output / 'tutorials'),
'config_patterns': self._load_json(temp_output / 'config_patterns' / 'config_patterns.json'),
'architecture': self._load_json(temp_output / 'architecture' / 'architectural_patterns.json')
}
# Log summary
total_patterns = sum(len(f.get('patterns', [])) for f in c3_data.get('patterns', []))
total_examples = c3_data.get('test_examples', {}).get('total_examples', 0)
total_guides = len(c3_data.get('how_to_guides', {}).get('guides', []))
total_configs = len(c3_data.get('config_patterns', {}).get('config_files', []))
arch_patterns = len(c3_data.get('architecture', {}).get('patterns', []))
logger.info(f" ✓ Design Patterns: {total_patterns}")
logger.info(f" ✓ Test Examples: {total_examples}")
logger.info(f" ✓ How-To Guides: {total_guides}")
logger.info(f" ✓ Config Files: {total_configs}")
logger.info(f" ✓ Architecture Patterns: {arch_patterns}")
return c3_data
except Exception as e:
logger.error(f"C3.x analysis failed: {e}")
import traceback
traceback.print_exc()
return {}
finally:
# Clean up temp directory
if temp_output.exists():
try:
shutil.rmtree(temp_output)
except Exception as e:
logger.warning(f"Failed to clean up temp directory: {e}")
def detect_conflicts(self) -> List:
"""
Detect conflicts between documentation and code.
@@ -451,11 +601,23 @@ Examples:
parser.add_argument('--merge-mode', '-m',
choices=['rule-based', 'claude-enhanced'],
help='Override config merge mode')
parser.add_argument('--skip-codebase-analysis',
action='store_true',
help='Skip C3.x codebase analysis for GitHub sources (default: enabled)')
args = parser.parse_args()
# Create and run scraper
# Create scraper
scraper = UnifiedScraper(args.config, args.merge_mode)
# Disable codebase analysis if requested
if args.skip_codebase_analysis:
for source in scraper.config.get('sources', []):
if source['type'] == 'github':
source['enable_codebase_analysis'] = False
logger.info(f"⏭️ Skipping codebase analysis for GitHub source: {source.get('repo', 'unknown')}")
# Run scraper
scraper.run()