run ruff
This commit is contained in:
@@ -24,65 +24,80 @@ Credits:
|
||||
- pathspec for .gitignore support: https://pypi.org/project/pathspec/
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any
|
||||
from typing import Any
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from skill_seekers.cli.code_analyzer import CodeAnalyzer
|
||||
from skill_seekers.cli.api_reference_builder import APIReferenceBuilder
|
||||
from skill_seekers.cli.dependency_analyzer import DependencyAnalyzer
|
||||
from skill_seekers.cli.code_analyzer import CodeAnalyzer
|
||||
from skill_seekers.cli.config_extractor import ConfigExtractor
|
||||
from skill_seekers.cli.dependency_analyzer import DependencyAnalyzer
|
||||
|
||||
# Try to import pathspec for .gitignore support
|
||||
try:
|
||||
import pathspec
|
||||
|
||||
PATHSPEC_AVAILABLE = True
|
||||
except ImportError:
|
||||
PATHSPEC_AVAILABLE = False
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Language extension mapping
|
||||
LANGUAGE_EXTENSIONS = {
|
||||
'.py': 'Python',
|
||||
'.js': 'JavaScript',
|
||||
'.jsx': 'JavaScript',
|
||||
'.ts': 'TypeScript',
|
||||
'.tsx': 'TypeScript',
|
||||
'.cpp': 'C++',
|
||||
'.cc': 'C++',
|
||||
'.cxx': 'C++',
|
||||
'.h': 'C++',
|
||||
'.hpp': 'C++',
|
||||
'.hxx': 'C++',
|
||||
'.c': 'C',
|
||||
'.cs': 'C#',
|
||||
'.go': 'Go',
|
||||
'.rs': 'Rust',
|
||||
'.java': 'Java',
|
||||
'.rb': 'Ruby',
|
||||
'.php': 'PHP',
|
||||
".py": "Python",
|
||||
".js": "JavaScript",
|
||||
".jsx": "JavaScript",
|
||||
".ts": "TypeScript",
|
||||
".tsx": "TypeScript",
|
||||
".cpp": "C++",
|
||||
".cc": "C++",
|
||||
".cxx": "C++",
|
||||
".h": "C++",
|
||||
".hpp": "C++",
|
||||
".hxx": "C++",
|
||||
".c": "C",
|
||||
".cs": "C#",
|
||||
".go": "Go",
|
||||
".rs": "Rust",
|
||||
".java": "Java",
|
||||
".rb": "Ruby",
|
||||
".php": "PHP",
|
||||
}
|
||||
|
||||
# Default directories to exclude
|
||||
DEFAULT_EXCLUDED_DIRS = {
|
||||
'node_modules', 'venv', '__pycache__', '.git', '.svn', '.hg',
|
||||
'build', 'dist', 'target', '.pytest_cache', '.tox', '.mypy_cache',
|
||||
'htmlcov', 'coverage', '.coverage', '.eggs', '*.egg-info',
|
||||
'.idea', '.vscode', '.vs', '__pypackages__'
|
||||
"node_modules",
|
||||
"venv",
|
||||
"__pycache__",
|
||||
".git",
|
||||
".svn",
|
||||
".hg",
|
||||
"build",
|
||||
"dist",
|
||||
"target",
|
||||
".pytest_cache",
|
||||
".tox",
|
||||
".mypy_cache",
|
||||
"htmlcov",
|
||||
"coverage",
|
||||
".coverage",
|
||||
".eggs",
|
||||
"*.egg-info",
|
||||
".idea",
|
||||
".vscode",
|
||||
".vs",
|
||||
"__pypackages__",
|
||||
}
|
||||
|
||||
|
||||
@@ -97,10 +112,10 @@ def detect_language(file_path: Path) -> str:
|
||||
Language name or 'Unknown'
|
||||
"""
|
||||
extension = file_path.suffix.lower()
|
||||
return LANGUAGE_EXTENSIONS.get(extension, 'Unknown')
|
||||
return LANGUAGE_EXTENSIONS.get(extension, "Unknown")
|
||||
|
||||
|
||||
def load_gitignore(directory: Path) -> Optional[pathspec.PathSpec]:
|
||||
def load_gitignore(directory: Path) -> pathspec.PathSpec | None:
|
||||
"""
|
||||
Load .gitignore file and create pathspec matcher.
|
||||
|
||||
@@ -115,14 +130,14 @@ def load_gitignore(directory: Path) -> Optional[pathspec.PathSpec]:
|
||||
logger.warning("Install with: pip install pathspec")
|
||||
return None
|
||||
|
||||
gitignore_path = directory / '.gitignore'
|
||||
gitignore_path = directory / ".gitignore"
|
||||
if not gitignore_path.exists():
|
||||
logger.debug(f"No .gitignore found in {directory}")
|
||||
return None
|
||||
|
||||
try:
|
||||
with open(gitignore_path, 'r', encoding='utf-8') as f:
|
||||
spec = pathspec.PathSpec.from_lines('gitwildmatch', f)
|
||||
with open(gitignore_path, encoding="utf-8") as f:
|
||||
spec = pathspec.PathSpec.from_lines("gitwildmatch", f)
|
||||
logger.info(f"Loaded .gitignore from {gitignore_path}")
|
||||
return spec
|
||||
except Exception as e:
|
||||
@@ -146,10 +161,10 @@ def should_exclude_dir(dir_name: str, excluded_dirs: set) -> bool:
|
||||
|
||||
def walk_directory(
|
||||
root: Path,
|
||||
patterns: Optional[List[str]] = None,
|
||||
gitignore_spec: Optional[pathspec.PathSpec] = None,
|
||||
excluded_dirs: Optional[set] = None
|
||||
) -> List[Path]:
|
||||
patterns: list[str] | None = None,
|
||||
gitignore_spec: pathspec.PathSpec | None = None,
|
||||
excluded_dirs: set | None = None,
|
||||
) -> list[Path]:
|
||||
"""
|
||||
Walk directory tree and collect source files.
|
||||
|
||||
@@ -205,9 +220,9 @@ def walk_directory(
|
||||
def analyze_codebase(
|
||||
directory: Path,
|
||||
output_dir: Path,
|
||||
depth: str = 'deep',
|
||||
languages: Optional[List[str]] = None,
|
||||
file_patterns: Optional[List[str]] = None,
|
||||
depth: str = "deep",
|
||||
languages: list[str] | None = None,
|
||||
file_patterns: list[str] | None = None,
|
||||
build_api_reference: bool = True,
|
||||
extract_comments: bool = True,
|
||||
build_dependency_graph: bool = True,
|
||||
@@ -216,8 +231,8 @@ def analyze_codebase(
|
||||
build_how_to_guides: bool = True,
|
||||
extract_config_patterns: bool = True,
|
||||
enhance_with_ai: bool = True,
|
||||
ai_mode: str = "auto"
|
||||
) -> Dict[str, Any]:
|
||||
ai_mode: str = "auto",
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Analyze local codebase and extract code knowledge.
|
||||
|
||||
@@ -255,11 +270,7 @@ def analyze_codebase(
|
||||
|
||||
# Walk directory tree
|
||||
logger.info("Scanning directory tree...")
|
||||
files = walk_directory(
|
||||
directory,
|
||||
patterns=file_patterns,
|
||||
gitignore_spec=gitignore_spec
|
||||
)
|
||||
files = walk_directory(directory, patterns=file_patterns, gitignore_spec=gitignore_spec)
|
||||
|
||||
logger.info(f"Found {len(files)} source files")
|
||||
|
||||
@@ -273,27 +284,25 @@ def analyze_codebase(
|
||||
analyzer = CodeAnalyzer(depth=depth)
|
||||
|
||||
# Analyze each file
|
||||
results = {'files': []}
|
||||
results = {"files": []}
|
||||
analyzed_count = 0
|
||||
|
||||
for file_path in files:
|
||||
try:
|
||||
content = file_path.read_text(encoding='utf-8', errors='ignore')
|
||||
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
||||
language = detect_language(file_path)
|
||||
|
||||
if language == 'Unknown':
|
||||
if language == "Unknown":
|
||||
continue
|
||||
|
||||
# Analyze file
|
||||
analysis = analyzer.analyze_file(str(file_path), content, language)
|
||||
|
||||
# Only include files with actual analysis results
|
||||
if analysis and (analysis.get('classes') or analysis.get('functions')):
|
||||
results['files'].append({
|
||||
'file': str(file_path.relative_to(directory)),
|
||||
'language': language,
|
||||
**analysis
|
||||
})
|
||||
if analysis and (analysis.get("classes") or analysis.get("functions")):
|
||||
results["files"].append(
|
||||
{"file": str(file_path.relative_to(directory)), "language": language, **analysis}
|
||||
)
|
||||
analyzed_count += 1
|
||||
|
||||
if analyzed_count % 10 == 0:
|
||||
@@ -306,17 +315,17 @@ def analyze_codebase(
|
||||
logger.info(f"✅ Successfully analyzed {analyzed_count} files")
|
||||
|
||||
# Save results
|
||||
output_json = output_dir / 'code_analysis.json'
|
||||
with open(output_json, 'w', encoding='utf-8') as f:
|
||||
output_json = output_dir / "code_analysis.json"
|
||||
with open(output_json, "w", encoding="utf-8") as f:
|
||||
json.dump(results, f, indent=2)
|
||||
|
||||
logger.info(f"📁 Saved analysis to: {output_json}")
|
||||
|
||||
# Build API reference if requested
|
||||
if build_api_reference and results['files']:
|
||||
if build_api_reference and results["files"]:
|
||||
logger.info("Building API reference documentation...")
|
||||
builder = APIReferenceBuilder(results)
|
||||
api_output_dir = output_dir / 'api_reference'
|
||||
api_output_dir = output_dir / "api_reference"
|
||||
generated_files = builder.build_reference(api_output_dir)
|
||||
logger.info(f"✅ Generated {len(generated_files)} API reference files")
|
||||
logger.info(f"📁 API reference: {api_output_dir}")
|
||||
@@ -329,10 +338,10 @@ def analyze_codebase(
|
||||
# Analyze dependencies for all files
|
||||
for file_path in files:
|
||||
try:
|
||||
content = file_path.read_text(encoding='utf-8', errors='ignore')
|
||||
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
||||
language = detect_language(file_path)
|
||||
|
||||
if language != 'Unknown':
|
||||
if language != "Unknown":
|
||||
# Use relative path from directory for better graph readability
|
||||
rel_path = str(file_path.relative_to(directory))
|
||||
dep_analyzer.analyze_file(rel_path, content, language)
|
||||
@@ -348,7 +357,7 @@ def analyze_codebase(
|
||||
if cycles:
|
||||
logger.warning(f"⚠️ Found {len(cycles)} circular dependencies:")
|
||||
for i, cycle in enumerate(cycles[:5], 1): # Show first 5
|
||||
cycle_str = ' → '.join(cycle) + f" → {cycle[0]}"
|
||||
cycle_str = " → ".join(cycle) + f" → {cycle[0]}"
|
||||
logger.warning(f" {i}. {cycle_str}")
|
||||
if len(cycles) > 5:
|
||||
logger.warning(f" ... and {len(cycles) - 5} more")
|
||||
@@ -356,32 +365,34 @@ def analyze_codebase(
|
||||
logger.info("✅ No circular dependencies found")
|
||||
|
||||
# Save dependency graph data
|
||||
dep_output_dir = output_dir / 'dependencies'
|
||||
dep_output_dir = output_dir / "dependencies"
|
||||
dep_output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Export as JSON
|
||||
dep_json = dep_output_dir / 'dependency_graph.json'
|
||||
with open(dep_json, 'w', encoding='utf-8') as f:
|
||||
dep_json = dep_output_dir / "dependency_graph.json"
|
||||
with open(dep_json, "w", encoding="utf-8") as f:
|
||||
json.dump(dep_analyzer.export_json(), f, indent=2)
|
||||
logger.info(f"📁 Saved dependency graph: {dep_json}")
|
||||
|
||||
# Export as Mermaid diagram
|
||||
mermaid_file = dep_output_dir / 'dependency_graph.mmd'
|
||||
mermaid_file = dep_output_dir / "dependency_graph.mmd"
|
||||
mermaid_file.write_text(dep_analyzer.export_mermaid())
|
||||
logger.info(f"📁 Saved Mermaid diagram: {mermaid_file}")
|
||||
|
||||
# Save statistics
|
||||
stats = dep_analyzer.get_statistics()
|
||||
stats_file = dep_output_dir / 'statistics.json'
|
||||
with open(stats_file, 'w', encoding='utf-8') as f:
|
||||
stats_file = dep_output_dir / "statistics.json"
|
||||
with open(stats_file, "w", encoding="utf-8") as f:
|
||||
json.dump(stats, f, indent=2)
|
||||
logger.info(f"📊 Statistics: {stats['total_files']} files, "
|
||||
f"{stats['total_dependencies']} dependencies, "
|
||||
f"{stats['circular_dependencies']} cycles")
|
||||
logger.info(
|
||||
f"📊 Statistics: {stats['total_files']} files, "
|
||||
f"{stats['total_dependencies']} dependencies, "
|
||||
f"{stats['circular_dependencies']} cycles"
|
||||
)
|
||||
|
||||
# Try to export as DOT (requires pydot)
|
||||
try:
|
||||
dot_file = dep_output_dir / 'dependency_graph.dot'
|
||||
dot_file = dep_output_dir / "dependency_graph.dot"
|
||||
dep_analyzer.export_dot(str(dot_file))
|
||||
except:
|
||||
pass # pydot not installed, skip DOT export
|
||||
@@ -396,13 +407,11 @@ def analyze_codebase(
|
||||
|
||||
for file_path in files:
|
||||
try:
|
||||
content = file_path.read_text(encoding='utf-8', errors='ignore')
|
||||
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
||||
language = detect_language(file_path)
|
||||
|
||||
if language != 'Unknown':
|
||||
report = pattern_recognizer.analyze_file(
|
||||
str(file_path), content, language
|
||||
)
|
||||
if language != "Unknown":
|
||||
report = pattern_recognizer.analyze_file(str(file_path), content, language)
|
||||
|
||||
if report.patterns:
|
||||
pattern_results.append(report.to_dict())
|
||||
@@ -412,14 +421,14 @@ def analyze_codebase(
|
||||
|
||||
# Save pattern results
|
||||
if pattern_results:
|
||||
pattern_output = output_dir / 'patterns'
|
||||
pattern_output = output_dir / "patterns"
|
||||
pattern_output.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
pattern_json = pattern_output / 'detected_patterns.json'
|
||||
with open(pattern_json, 'w', encoding='utf-8') as f:
|
||||
pattern_json = pattern_output / "detected_patterns.json"
|
||||
with open(pattern_json, "w", encoding="utf-8") as f:
|
||||
json.dump(pattern_results, f, indent=2)
|
||||
|
||||
total_patterns = sum(len(r['patterns']) for r in pattern_results)
|
||||
total_patterns = sum(len(r["patterns"]) for r in pattern_results)
|
||||
logger.info(f"✅ Detected {total_patterns} patterns in {len(pattern_results)} files")
|
||||
logger.info(f"📁 Saved to: {pattern_json}")
|
||||
else:
|
||||
@@ -432,35 +441,31 @@ def analyze_codebase(
|
||||
|
||||
# Create extractor
|
||||
test_extractor = TestExampleExtractor(
|
||||
min_confidence=0.5,
|
||||
max_per_file=10,
|
||||
languages=languages,
|
||||
enhance_with_ai=enhance_with_ai
|
||||
min_confidence=0.5, max_per_file=10, languages=languages, enhance_with_ai=enhance_with_ai
|
||||
)
|
||||
|
||||
# Extract examples from directory
|
||||
try:
|
||||
example_report = test_extractor.extract_from_directory(
|
||||
directory,
|
||||
recursive=True
|
||||
)
|
||||
example_report = test_extractor.extract_from_directory(directory, recursive=True)
|
||||
|
||||
if example_report.total_examples > 0:
|
||||
# Save results
|
||||
examples_output = output_dir / 'test_examples'
|
||||
examples_output = output_dir / "test_examples"
|
||||
examples_output.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Save as JSON
|
||||
examples_json = examples_output / 'test_examples.json'
|
||||
with open(examples_json, 'w', encoding='utf-8') as f:
|
||||
examples_json = examples_output / "test_examples.json"
|
||||
with open(examples_json, "w", encoding="utf-8") as f:
|
||||
json.dump(example_report.to_dict(), f, indent=2)
|
||||
|
||||
# Save as Markdown
|
||||
examples_md = examples_output / 'test_examples.md'
|
||||
examples_md.write_text(example_report.to_markdown(), encoding='utf-8')
|
||||
examples_md = examples_output / "test_examples.md"
|
||||
examples_md.write_text(example_report.to_markdown(), encoding="utf-8")
|
||||
|
||||
logger.info(f"✅ Extracted {example_report.total_examples} test examples "
|
||||
f"({example_report.high_value_count} high-value)")
|
||||
logger.info(
|
||||
f"✅ Extracted {example_report.total_examples} test examples "
|
||||
f"({example_report.high_value_count} high-value)"
|
||||
)
|
||||
logger.info(f"📁 Saved to: {examples_output}")
|
||||
else:
|
||||
logger.info("No test examples extracted")
|
||||
@@ -479,25 +484,25 @@ def analyze_codebase(
|
||||
guide_builder = HowToGuideBuilder(enhance_with_ai=enhance_with_ai)
|
||||
|
||||
# Build guides from workflow examples
|
||||
tutorials_dir = output_dir / 'tutorials'
|
||||
tutorials_dir = output_dir / "tutorials"
|
||||
|
||||
# Get workflow examples from the example_report if available
|
||||
if 'example_report' in locals() and example_report and example_report.total_examples > 0:
|
||||
if "example_report" in locals() and example_report and example_report.total_examples > 0:
|
||||
# Convert example_report to list of dicts for processing
|
||||
examples_list = example_report.to_dict().get('examples', [])
|
||||
examples_list = example_report.to_dict().get("examples", [])
|
||||
|
||||
guide_collection = guide_builder.build_guides_from_examples(
|
||||
examples_list,
|
||||
grouping_strategy='ai-tutorial-group',
|
||||
grouping_strategy="ai-tutorial-group",
|
||||
output_dir=tutorials_dir,
|
||||
enhance_with_ai=enhance_with_ai,
|
||||
ai_mode=ai_mode
|
||||
ai_mode=ai_mode,
|
||||
)
|
||||
|
||||
if guide_collection and guide_collection.total_guides > 0:
|
||||
# Save collection summary
|
||||
collection_json = tutorials_dir / 'guide_collection.json'
|
||||
with open(collection_json, 'w', encoding='utf-8') as f:
|
||||
collection_json = tutorials_dir / "guide_collection.json"
|
||||
with open(collection_json, "w", encoding="utf-8") as f:
|
||||
json.dump(guide_collection.to_dict(), f, indent=2)
|
||||
|
||||
logger.info(f"✅ Built {guide_collection.total_guides} how-to guides")
|
||||
@@ -524,9 +529,10 @@ def analyze_codebase(
|
||||
result_dict = config_extractor.to_dict(extraction_result)
|
||||
|
||||
# AI Enhancement (if enabled)
|
||||
if enhance_with_ai and ai_mode != 'none':
|
||||
if enhance_with_ai and ai_mode != "none":
|
||||
try:
|
||||
from skill_seekers.cli.config_enhancer import ConfigEnhancer
|
||||
|
||||
logger.info(f"🤖 Enhancing config analysis with AI (mode: {ai_mode})...")
|
||||
enhancer = ConfigEnhancer(mode=ai_mode)
|
||||
result_dict = enhancer.enhance_config_result(result_dict)
|
||||
@@ -535,28 +541,30 @@ def analyze_codebase(
|
||||
logger.warning(f"⚠️ Config AI enhancement failed: {e}")
|
||||
|
||||
# Save results
|
||||
config_output = output_dir / 'config_patterns'
|
||||
config_output = output_dir / "config_patterns"
|
||||
config_output.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Save as JSON
|
||||
config_json = config_output / 'config_patterns.json'
|
||||
with open(config_json, 'w', encoding='utf-8') as f:
|
||||
config_json = config_output / "config_patterns.json"
|
||||
with open(config_json, "w", encoding="utf-8") as f:
|
||||
json.dump(result_dict, f, indent=2)
|
||||
|
||||
# Save as Markdown (basic - AI enhancements in JSON only for now)
|
||||
config_md = config_output / 'config_patterns.md'
|
||||
config_md.write_text(extraction_result.to_markdown(), encoding='utf-8')
|
||||
config_md = config_output / "config_patterns.md"
|
||||
config_md.write_text(extraction_result.to_markdown(), encoding="utf-8")
|
||||
|
||||
# Count total settings across all files
|
||||
total_settings = sum(len(cf.settings) for cf in extraction_result.config_files)
|
||||
total_patterns = sum(len(cf.patterns) for cf in extraction_result.config_files)
|
||||
|
||||
logger.info(f"✅ Extracted {len(extraction_result.config_files)} config files "
|
||||
f"with {total_settings} settings and {total_patterns} detected patterns")
|
||||
logger.info(
|
||||
f"✅ Extracted {len(extraction_result.config_files)} config files "
|
||||
f"with {total_settings} settings and {total_patterns} detected patterns"
|
||||
)
|
||||
|
||||
if 'ai_enhancements' in result_dict:
|
||||
insights = result_dict['ai_enhancements'].get('overall_insights', {})
|
||||
if insights.get('security_issues_found'):
|
||||
if "ai_enhancements" in result_dict:
|
||||
insights = result_dict["ai_enhancements"].get("overall_insights", {})
|
||||
if insights.get("security_issues_found"):
|
||||
logger.info(f"🔐 Security issues found: {insights['security_issues_found']}")
|
||||
|
||||
logger.info(f"📁 Saved to: {config_output}")
|
||||
@@ -572,15 +580,15 @@ def analyze_codebase(
|
||||
from skill_seekers.cli.architectural_pattern_detector import ArchitecturalPatternDetector
|
||||
|
||||
arch_detector = ArchitecturalPatternDetector(enhance_with_ai=enhance_with_ai)
|
||||
arch_report = arch_detector.analyze(directory, results['files'])
|
||||
arch_report = arch_detector.analyze(directory, results["files"])
|
||||
|
||||
if arch_report.patterns:
|
||||
arch_output = output_dir / 'architecture'
|
||||
arch_output = output_dir / "architecture"
|
||||
arch_output.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Save as JSON
|
||||
arch_json = arch_output / 'architectural_patterns.json'
|
||||
with open(arch_json, 'w', encoding='utf-8') as f:
|
||||
arch_json = arch_output / "architectural_patterns.json"
|
||||
with open(arch_json, "w", encoding="utf-8") as f:
|
||||
json.dump(arch_report.to_dict(), f, indent=2)
|
||||
|
||||
logger.info(f"🏗️ Detected {len(arch_report.patterns)} architectural patterns")
|
||||
@@ -601,7 +609,7 @@ def analyze_codebase(
|
||||
build_dependency_graph=build_dependency_graph,
|
||||
detect_patterns=detect_patterns,
|
||||
extract_test_examples=extract_test_examples,
|
||||
extract_config_patterns=extract_config_patterns
|
||||
extract_config_patterns=extract_config_patterns,
|
||||
)
|
||||
|
||||
return results
|
||||
@@ -610,13 +618,13 @@ def analyze_codebase(
|
||||
def _generate_skill_md(
|
||||
output_dir: Path,
|
||||
directory: Path,
|
||||
results: Dict[str, Any],
|
||||
results: dict[str, Any],
|
||||
depth: str,
|
||||
build_api_reference: bool,
|
||||
build_dependency_graph: bool,
|
||||
detect_patterns: bool,
|
||||
extract_test_examples: bool,
|
||||
extract_config_patterns: bool
|
||||
extract_config_patterns: bool,
|
||||
):
|
||||
"""
|
||||
Generate rich SKILL.md from codebase analysis results.
|
||||
@@ -635,14 +643,14 @@ def _generate_skill_md(
|
||||
repo_name = directory.name
|
||||
|
||||
# Generate skill name (lowercase, hyphens only, max 64 chars)
|
||||
skill_name = repo_name.lower().replace('_', '-').replace(' ', '-')[:64]
|
||||
skill_name = repo_name.lower().replace("_", "-").replace(" ", "-")[:64]
|
||||
|
||||
# Generate description
|
||||
description = f"Local codebase analysis for {repo_name}"
|
||||
|
||||
# Count files by language
|
||||
language_stats = _get_language_stats(results.get('files', []))
|
||||
total_files = len(results.get('files', []))
|
||||
language_stats = _get_language_stats(results.get("files", []))
|
||||
total_files = len(results.get("files", []))
|
||||
|
||||
# Start building content
|
||||
skill_content = f"""---
|
||||
@@ -658,7 +666,7 @@ Local codebase analysis and documentation generated from code analysis.
|
||||
|
||||
**Path:** `{directory}`
|
||||
**Files Analyzed:** {total_files}
|
||||
**Languages:** {', '.join(language_stats.keys())}
|
||||
**Languages:** {", ".join(language_stats.keys())}
|
||||
**Analysis Depth:** {depth}
|
||||
|
||||
## When to Use This Skill
|
||||
@@ -732,22 +740,22 @@ Use this skill when you need to:
|
||||
skill_content += "This skill includes detailed reference documentation:\n\n"
|
||||
|
||||
refs_added = False
|
||||
if build_api_reference and (output_dir / 'api_reference').exists():
|
||||
if build_api_reference and (output_dir / "api_reference").exists():
|
||||
skill_content += "- **API Reference**: `references/api_reference/` - Complete API documentation\n"
|
||||
refs_added = True
|
||||
if build_dependency_graph and (output_dir / 'dependencies').exists():
|
||||
if build_dependency_graph and (output_dir / "dependencies").exists():
|
||||
skill_content += "- **Dependencies**: `references/dependencies/` - Dependency graph and analysis\n"
|
||||
refs_added = True
|
||||
if detect_patterns and (output_dir / 'patterns').exists():
|
||||
if detect_patterns and (output_dir / "patterns").exists():
|
||||
skill_content += "- **Patterns**: `references/patterns/` - Detected design patterns\n"
|
||||
refs_added = True
|
||||
if extract_test_examples and (output_dir / 'test_examples').exists():
|
||||
if extract_test_examples and (output_dir / "test_examples").exists():
|
||||
skill_content += "- **Examples**: `references/test_examples/` - Usage examples from tests\n"
|
||||
refs_added = True
|
||||
if extract_config_patterns and (output_dir / 'config_patterns').exists():
|
||||
if extract_config_patterns and (output_dir / "config_patterns").exists():
|
||||
skill_content += "- **Configuration**: `references/config_patterns/` - Configuration patterns\n"
|
||||
refs_added = True
|
||||
if (output_dir / 'architecture').exists():
|
||||
if (output_dir / "architecture").exists():
|
||||
skill_content += "- **Architecture**: `references/architecture/` - Architectural patterns\n"
|
||||
refs_added = True
|
||||
|
||||
@@ -762,34 +770,34 @@ Use this skill when you need to:
|
||||
|
||||
# Write SKILL.md
|
||||
skill_path = output_dir / "SKILL.md"
|
||||
skill_path.write_text(skill_content, encoding='utf-8')
|
||||
skill_path.write_text(skill_content, encoding="utf-8")
|
||||
|
||||
line_count = len(skill_content.split('\n'))
|
||||
line_count = len(skill_content.split("\n"))
|
||||
logger.info(f"✅ Generated SKILL.md: {skill_path} ({line_count} lines)")
|
||||
|
||||
# Generate references/ directory structure
|
||||
_generate_references(output_dir)
|
||||
|
||||
|
||||
def _get_language_stats(files: List[Dict]) -> Dict[str, int]:
|
||||
def _get_language_stats(files: list[dict]) -> dict[str, int]:
|
||||
"""Count files by language from analysis results."""
|
||||
stats = {}
|
||||
for file_data in files:
|
||||
# files is a list of dicts with 'language' key
|
||||
lang = file_data.get('language', 'Unknown')
|
||||
if lang != 'Unknown':
|
||||
lang = file_data.get("language", "Unknown")
|
||||
if lang != "Unknown":
|
||||
stats[lang] = stats.get(lang, 0) + 1
|
||||
return stats
|
||||
|
||||
|
||||
def _format_patterns_section(output_dir: Path) -> str:
|
||||
"""Format design patterns section from patterns/detected_patterns.json."""
|
||||
patterns_file = output_dir / 'patterns' / 'detected_patterns.json'
|
||||
patterns_file = output_dir / "patterns" / "detected_patterns.json"
|
||||
if not patterns_file.exists():
|
||||
return ""
|
||||
|
||||
try:
|
||||
with open(patterns_file, 'r', encoding='utf-8') as f:
|
||||
with open(patterns_file, encoding="utf-8") as f:
|
||||
patterns_data = json.load(f)
|
||||
except Exception:
|
||||
return ""
|
||||
@@ -802,10 +810,10 @@ def _format_patterns_section(output_dir: Path) -> str:
|
||||
by_class = {}
|
||||
|
||||
for pattern_file in patterns_data:
|
||||
for pattern in pattern_file.get('patterns', []):
|
||||
ptype = pattern.get('pattern_type', 'Unknown')
|
||||
cls = pattern.get('class_name', '')
|
||||
confidence = pattern.get('confidence', 0)
|
||||
for pattern in pattern_file.get("patterns", []):
|
||||
ptype = pattern.get("pattern_type", "Unknown")
|
||||
cls = pattern.get("class_name", "")
|
||||
confidence = pattern.get("confidence", 0)
|
||||
|
||||
# Skip low confidence
|
||||
if confidence < 0.7:
|
||||
@@ -813,7 +821,7 @@ def _format_patterns_section(output_dir: Path) -> str:
|
||||
|
||||
# Deduplicate by class
|
||||
key = f"{cls}:{ptype}"
|
||||
if key not in by_class or by_class[key]['confidence'] < confidence:
|
||||
if key not in by_class or by_class[key]["confidence"] < confidence:
|
||||
by_class[key] = pattern
|
||||
|
||||
# Count by type
|
||||
@@ -836,22 +844,22 @@ def _format_patterns_section(output_dir: Path) -> str:
|
||||
|
||||
def _format_examples_section(output_dir: Path) -> str:
|
||||
"""Format code examples section from test_examples/test_examples.json."""
|
||||
examples_file = output_dir / 'test_examples' / 'test_examples.json'
|
||||
examples_file = output_dir / "test_examples" / "test_examples.json"
|
||||
if not examples_file.exists():
|
||||
return ""
|
||||
|
||||
try:
|
||||
with open(examples_file, 'r', encoding='utf-8') as f:
|
||||
with open(examples_file, encoding="utf-8") as f:
|
||||
examples_data = json.load(f)
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
examples = examples_data.get('examples', [])
|
||||
examples = examples_data.get("examples", [])
|
||||
if not examples:
|
||||
return ""
|
||||
|
||||
# Filter high-value examples (complexity > 0.7)
|
||||
high_value = [ex for ex in examples if ex.get('complexity_score', 0) > 0.7]
|
||||
high_value = [ex for ex in examples if ex.get("complexity_score", 0) > 0.7]
|
||||
|
||||
if not high_value:
|
||||
# If no high complexity, take any examples
|
||||
@@ -864,11 +872,11 @@ def _format_examples_section(output_dir: Path) -> str:
|
||||
content += "*High-quality examples extracted from test files (C3.2)*\n\n"
|
||||
|
||||
# Top 10 examples
|
||||
for ex in sorted(high_value, key=lambda x: x.get('complexity_score', 0), reverse=True)[:10]:
|
||||
desc = ex.get('description', 'Example')
|
||||
lang = ex.get('language', 'python').lower()
|
||||
code = ex.get('code', '')
|
||||
complexity = ex.get('complexity_score', 0)
|
||||
for ex in sorted(high_value, key=lambda x: x.get("complexity_score", 0), reverse=True)[:10]:
|
||||
desc = ex.get("description", "Example")
|
||||
lang = ex.get("language", "python").lower()
|
||||
code = ex.get("code", "")
|
||||
complexity = ex.get("complexity_score", 0)
|
||||
|
||||
content += f"**{desc}** (complexity: {complexity:.2f})\n\n"
|
||||
content += f"```{lang}\n{code}\n```\n\n"
|
||||
@@ -879,16 +887,16 @@ def _format_examples_section(output_dir: Path) -> str:
|
||||
|
||||
def _format_api_section(output_dir: Path) -> str:
|
||||
"""Format API reference section."""
|
||||
api_dir = output_dir / 'api_reference'
|
||||
api_dir = output_dir / "api_reference"
|
||||
if not api_dir.exists():
|
||||
return ""
|
||||
|
||||
api_md = api_dir / 'api_reference.md'
|
||||
api_md = api_dir / "api_reference.md"
|
||||
if not api_md.exists():
|
||||
return ""
|
||||
|
||||
try:
|
||||
api_content = api_md.read_text(encoding='utf-8')
|
||||
api_content = api_md.read_text(encoding="utf-8")
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
@@ -906,17 +914,17 @@ def _format_api_section(output_dir: Path) -> str:
|
||||
|
||||
def _format_architecture_section(output_dir: Path) -> str:
|
||||
"""Format architecture section from architecture/architectural_patterns.json."""
|
||||
arch_file = output_dir / 'architecture' / 'architectural_patterns.json'
|
||||
arch_file = output_dir / "architecture" / "architectural_patterns.json"
|
||||
if not arch_file.exists():
|
||||
return ""
|
||||
|
||||
try:
|
||||
with open(arch_file, 'r', encoding='utf-8') as f:
|
||||
with open(arch_file, encoding="utf-8") as f:
|
||||
arch_data = json.load(f)
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
patterns = arch_data.get('patterns', [])
|
||||
patterns = arch_data.get("patterns", [])
|
||||
if not patterns:
|
||||
return ""
|
||||
|
||||
@@ -925,9 +933,9 @@ def _format_architecture_section(output_dir: Path) -> str:
|
||||
|
||||
content += "**Detected Architectural Patterns:**\n\n"
|
||||
for pattern in patterns[:5]:
|
||||
name = pattern.get('pattern_name', 'Unknown')
|
||||
confidence = pattern.get('confidence', 0)
|
||||
indicators = pattern.get('indicators', [])
|
||||
name = pattern.get("pattern_name", "Unknown")
|
||||
confidence = pattern.get("confidence", 0)
|
||||
indicators = pattern.get("indicators", [])
|
||||
|
||||
content += f"- **{name}** (confidence: {confidence:.2f})\n"
|
||||
if indicators:
|
||||
@@ -940,22 +948,22 @@ def _format_architecture_section(output_dir: Path) -> str:
|
||||
|
||||
def _format_config_section(output_dir: Path) -> str:
|
||||
"""Format configuration patterns section."""
|
||||
config_file = output_dir / 'config_patterns' / 'config_patterns.json'
|
||||
config_file = output_dir / "config_patterns" / "config_patterns.json"
|
||||
if not config_file.exists():
|
||||
return ""
|
||||
|
||||
try:
|
||||
with open(config_file, 'r', encoding='utf-8') as f:
|
||||
with open(config_file, encoding="utf-8") as f:
|
||||
config_data = json.load(f)
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
config_files = config_data.get('config_files', [])
|
||||
config_files = config_data.get("config_files", [])
|
||||
if not config_files:
|
||||
return ""
|
||||
|
||||
total_settings = sum(len(cf.get('settings', [])) for cf in config_files)
|
||||
total_patterns = sum(len(cf.get('patterns', [])) for cf in config_files)
|
||||
total_settings = sum(len(cf.get("settings", [])) for cf in config_files)
|
||||
total_patterns = sum(len(cf.get("patterns", [])) for cf in config_files)
|
||||
|
||||
content = "## ⚙️ Configuration Patterns\n\n"
|
||||
content += "*From C3.4 configuration analysis*\n\n"
|
||||
@@ -966,7 +974,7 @@ def _format_config_section(output_dir: Path) -> str:
|
||||
# List config file types found
|
||||
file_types = {}
|
||||
for cf in config_files:
|
||||
ctype = cf.get('config_type', 'unknown')
|
||||
ctype = cf.get("config_type", "unknown")
|
||||
file_types[ctype] = file_types.get(ctype, 0) + 1
|
||||
|
||||
if file_types:
|
||||
@@ -985,18 +993,18 @@ def _generate_references(output_dir: Path):
|
||||
|
||||
Creates a clean references/ directory that links to all analysis outputs.
|
||||
"""
|
||||
references_dir = output_dir / 'references'
|
||||
references_dir = output_dir / "references"
|
||||
references_dir.mkdir(exist_ok=True)
|
||||
|
||||
# Map analysis directories to reference names
|
||||
mappings = {
|
||||
'api_reference': 'api_reference',
|
||||
'dependencies': 'dependencies',
|
||||
'patterns': 'patterns',
|
||||
'test_examples': 'test_examples',
|
||||
'tutorials': 'tutorials',
|
||||
'config_patterns': 'config_patterns',
|
||||
'architecture': 'architecture'
|
||||
"api_reference": "api_reference",
|
||||
"dependencies": "dependencies",
|
||||
"patterns": "patterns",
|
||||
"test_examples": "test_examples",
|
||||
"tutorials": "tutorials",
|
||||
"config_patterns": "config_patterns",
|
||||
"architecture": "architecture",
|
||||
}
|
||||
|
||||
for source, target in mappings.items():
|
||||
@@ -1007,9 +1015,11 @@ def _generate_references(output_dir: Path):
|
||||
# Copy directory to references/ (not symlink, for portability)
|
||||
if target_dir.exists():
|
||||
import shutil
|
||||
|
||||
shutil.rmtree(target_dir)
|
||||
|
||||
import shutil
|
||||
|
||||
shutil.copytree(source_dir, target_dir)
|
||||
logger.debug(f"Copied {source} → references/{target}")
|
||||
|
||||
@@ -1019,7 +1029,7 @@ def _generate_references(output_dir: Path):
|
||||
def main():
|
||||
"""Command-line interface for codebase analysis."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Analyze local codebases and extract code knowledge',
|
||||
description="Analyze local codebases and extract code knowledge",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
@@ -1043,101 +1053,78 @@ Examples:
|
||||
|
||||
# Skip specific features
|
||||
codebase-scraper --directory . --skip-patterns --skip-test-examples
|
||||
"""
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument("--directory", required=True, help="Directory to analyze")
|
||||
parser.add_argument("--output", default="output/codebase/", help="Output directory (default: output/codebase/)")
|
||||
parser.add_argument(
|
||||
'--directory',
|
||||
required=True,
|
||||
help='Directory to analyze'
|
||||
"--depth", choices=["surface", "deep", "full"], default="deep", help="Analysis depth (default: deep)"
|
||||
)
|
||||
parser.add_argument("--languages", help="Comma-separated languages to analyze (e.g., Python,JavaScript,C++)")
|
||||
parser.add_argument("--file-patterns", help="Comma-separated file patterns (e.g., *.py,src/**/*.js)")
|
||||
parser.add_argument(
|
||||
'--output',
|
||||
default='output/codebase/',
|
||||
help='Output directory (default: output/codebase/)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--depth',
|
||||
choices=['surface', 'deep', 'full'],
|
||||
default='deep',
|
||||
help='Analysis depth (default: deep)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--languages',
|
||||
help='Comma-separated languages to analyze (e.g., Python,JavaScript,C++)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--file-patterns',
|
||||
help='Comma-separated file patterns (e.g., *.py,src/**/*.js)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--skip-api-reference',
|
||||
action='store_true',
|
||||
"--skip-api-reference",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help='Skip API reference markdown documentation generation (default: enabled)'
|
||||
help="Skip API reference markdown documentation generation (default: enabled)",
|
||||
)
|
||||
parser.add_argument(
|
||||
'--skip-dependency-graph',
|
||||
action='store_true',
|
||||
"--skip-dependency-graph",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help='Skip dependency graph and circular dependency detection (default: enabled)'
|
||||
help="Skip dependency graph and circular dependency detection (default: enabled)",
|
||||
)
|
||||
parser.add_argument(
|
||||
'--skip-patterns',
|
||||
action='store_true',
|
||||
"--skip-patterns",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help='Skip design pattern detection (Singleton, Factory, Observer, etc.) (default: enabled)'
|
||||
help="Skip design pattern detection (Singleton, Factory, Observer, etc.) (default: enabled)",
|
||||
)
|
||||
parser.add_argument(
|
||||
'--skip-test-examples',
|
||||
action='store_true',
|
||||
"--skip-test-examples",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help='Skip test example extraction (instantiation, method calls, configs, etc.) (default: enabled)'
|
||||
help="Skip test example extraction (instantiation, method calls, configs, etc.) (default: enabled)",
|
||||
)
|
||||
parser.add_argument(
|
||||
'--skip-how-to-guides',
|
||||
action='store_true',
|
||||
"--skip-how-to-guides",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help='Skip how-to guide generation from workflow examples (default: enabled)'
|
||||
help="Skip how-to guide generation from workflow examples (default: enabled)",
|
||||
)
|
||||
parser.add_argument(
|
||||
'--skip-config-patterns',
|
||||
action='store_true',
|
||||
"--skip-config-patterns",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help='Skip configuration pattern extraction from config files (JSON, YAML, TOML, ENV, etc.) (default: enabled)'
|
||||
help="Skip configuration pattern extraction from config files (JSON, YAML, TOML, ENV, etc.) (default: enabled)",
|
||||
)
|
||||
parser.add_argument(
|
||||
'--ai-mode',
|
||||
choices=['auto', 'api', 'local', 'none'],
|
||||
default='auto',
|
||||
help='AI enhancement mode for how-to guides: auto (detect best), api (Claude API), local (Claude Code CLI), none (disable) (default: auto)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--no-comments',
|
||||
action='store_true',
|
||||
help='Skip comment extraction'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--verbose',
|
||||
action='store_true',
|
||||
help='Enable verbose logging'
|
||||
"--ai-mode",
|
||||
choices=["auto", "api", "local", "none"],
|
||||
default="auto",
|
||||
help="AI enhancement mode for how-to guides: auto (detect best), api (Claude API), local (Claude Code CLI), none (disable) (default: auto)",
|
||||
)
|
||||
parser.add_argument("--no-comments", action="store_true", help="Skip comment extraction")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
# Check for deprecated flags
|
||||
deprecated_flags = {
|
||||
'--build-api-reference': '--skip-api-reference',
|
||||
'--build-dependency-graph': '--skip-dependency-graph',
|
||||
'--detect-patterns': '--skip-patterns',
|
||||
'--extract-test-examples': '--skip-test-examples',
|
||||
'--build-how-to-guides': '--skip-how-to-guides',
|
||||
'--extract-config-patterns': '--skip-config-patterns'
|
||||
"--build-api-reference": "--skip-api-reference",
|
||||
"--build-dependency-graph": "--skip-dependency-graph",
|
||||
"--detect-patterns": "--skip-patterns",
|
||||
"--extract-test-examples": "--skip-test-examples",
|
||||
"--build-how-to-guides": "--skip-how-to-guides",
|
||||
"--extract-config-patterns": "--skip-config-patterns",
|
||||
}
|
||||
|
||||
for old_flag, new_flag in deprecated_flags.items():
|
||||
if old_flag in sys.argv:
|
||||
logger.warning(f"⚠️ DEPRECATED: {old_flag} is deprecated. "
|
||||
f"All features are now enabled by default. "
|
||||
f"Use {new_flag} to disable this feature.")
|
||||
logger.warning(
|
||||
f"⚠️ DEPRECATED: {old_flag} is deprecated. "
|
||||
f"All features are now enabled by default. "
|
||||
f"Use {new_flag} to disable this feature."
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -1158,12 +1145,12 @@ Examples:
|
||||
# Parse languages
|
||||
languages = None
|
||||
if args.languages:
|
||||
languages = [lang.strip() for lang in args.languages.split(',')]
|
||||
languages = [lang.strip() for lang in args.languages.split(",")]
|
||||
|
||||
# Parse file patterns
|
||||
file_patterns = None
|
||||
if args.file_patterns:
|
||||
file_patterns = [p.strip() for p in args.file_patterns.split(',')]
|
||||
file_patterns = [p.strip() for p in args.file_patterns.split(",")]
|
||||
|
||||
# Analyze codebase
|
||||
try:
|
||||
@@ -1181,18 +1168,18 @@ Examples:
|
||||
build_how_to_guides=not args.skip_how_to_guides,
|
||||
extract_config_patterns=not args.skip_config_patterns,
|
||||
enhance_with_ai=True, # Auto-disables if no API key present
|
||||
ai_mode=args.ai_mode # NEW: AI enhancement mode for how-to guides
|
||||
ai_mode=args.ai_mode, # NEW: AI enhancement mode for how-to guides
|
||||
)
|
||||
|
||||
# Print summary
|
||||
print(f"\n{'='*60}")
|
||||
print(f"CODEBASE ANALYSIS COMPLETE")
|
||||
print(f"{'='*60}")
|
||||
print(f"\n{'=' * 60}")
|
||||
print("CODEBASE ANALYSIS COMPLETE")
|
||||
print(f"{'=' * 60}")
|
||||
print(f"Files analyzed: {len(results['files'])}")
|
||||
print(f"Output directory: {args.output}")
|
||||
if args.build_api_reference:
|
||||
print(f"API reference: {Path(args.output) / 'api_reference'}")
|
||||
print(f"{'='*60}\n")
|
||||
print(f"{'=' * 60}\n")
|
||||
|
||||
return 0
|
||||
|
||||
@@ -1202,9 +1189,10 @@ Examples:
|
||||
except Exception as e:
|
||||
logger.error(f"Analysis failed: {e}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
Reference in New Issue
Block a user