Thanks @franklegolasyoung for the excellent work on the core fixes for issues #267, #242, and #260! 🙏 Your comprehensive approach to fixing PDF processing, expanding workflow detection, and improving the Chinese README documentation is much appreciated. I've added code quality fixes and comprehensive tests to ensure everything passes CI. All 1266+ tests are now passing, and the issues are resolved! 🎉
1963 lines
66 KiB
Python
1963 lines
66 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Codebase Scraper CLI Tool
|
|
|
|
Standalone tool for analyzing local codebases without GitHub API.
|
|
Extracts code signatures, comments, and optionally generates API documentation.
|
|
|
|
Usage:
|
|
codebase-scraper --directory /path/to/repo --output output/codebase/
|
|
codebase-scraper --directory . --depth deep --languages Python,JavaScript
|
|
codebase-scraper --directory /path/to/repo --build-api-reference
|
|
|
|
Features:
|
|
- File tree walking with .gitignore support
|
|
- Multi-language code analysis (9 languages: Python, JavaScript/TypeScript, C/C++, C#, Go, Rust, Java, Ruby, PHP)
|
|
- API reference generation
|
|
- Comment extraction
|
|
- Dependency graph analysis
|
|
- Configurable depth levels
|
|
|
|
Credits:
|
|
- Language parsing patterns inspired by official language specifications
|
|
- NetworkX for dependency graph analysis: https://networkx.org/
|
|
- pathspec for .gitignore support: https://pypi.org/project/pathspec/
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
# Add parent directory to path for imports
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from skill_seekers.cli.api_reference_builder import APIReferenceBuilder
|
|
from skill_seekers.cli.code_analyzer import CodeAnalyzer
|
|
from skill_seekers.cli.config_extractor import ConfigExtractor
|
|
from skill_seekers.cli.dependency_analyzer import DependencyAnalyzer
|
|
|
|
# Try to import pathspec for .gitignore support
|
|
try:
|
|
import pathspec
|
|
|
|
PATHSPEC_AVAILABLE = True
|
|
except ImportError:
|
|
PATHSPEC_AVAILABLE = False
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# Language extension mapping
|
|
LANGUAGE_EXTENSIONS = {
|
|
".py": "Python",
|
|
".js": "JavaScript",
|
|
".jsx": "JavaScript",
|
|
".ts": "TypeScript",
|
|
".tsx": "TypeScript",
|
|
".cpp": "C++",
|
|
".cc": "C++",
|
|
".cxx": "C++",
|
|
".h": "C++",
|
|
".hpp": "C++",
|
|
".hxx": "C++",
|
|
".c": "C",
|
|
".cs": "C#",
|
|
".go": "Go",
|
|
".rs": "Rust",
|
|
".java": "Java",
|
|
".rb": "Ruby",
|
|
".php": "PHP",
|
|
}
|
|
|
|
# Markdown extension mapping
|
|
MARKDOWN_EXTENSIONS = {".md", ".markdown", ".mdown", ".mkd"}
|
|
|
|
# Common documentation folders to scan
|
|
DOC_FOLDERS = {"docs", "doc", "documentation", "wiki", ".github"}
|
|
|
|
# Root-level doc files → category mapping
|
|
ROOT_DOC_CATEGORIES = {
|
|
"readme": "overview",
|
|
"contributing": "contributing",
|
|
"changelog": "changelog",
|
|
"history": "changelog",
|
|
"license": "license",
|
|
"authors": "authors",
|
|
"code_of_conduct": "community",
|
|
"security": "security",
|
|
"architecture": "architecture",
|
|
"design": "architecture",
|
|
}
|
|
|
|
# Folder name → category mapping
|
|
FOLDER_CATEGORIES = {
|
|
"architecture": "architecture",
|
|
"arch": "architecture",
|
|
"design": "architecture",
|
|
"guides": "guides",
|
|
"guide": "guides",
|
|
"tutorials": "guides",
|
|
"tutorial": "guides",
|
|
"howto": "guides",
|
|
"how-to": "guides",
|
|
"workflows": "workflows",
|
|
"workflow": "workflows",
|
|
"templates": "templates",
|
|
"template": "templates",
|
|
"api": "api",
|
|
"reference": "api",
|
|
"examples": "examples",
|
|
"example": "examples",
|
|
"specs": "specifications",
|
|
"spec": "specifications",
|
|
"rfcs": "specifications",
|
|
"rfc": "specifications",
|
|
"features": "features",
|
|
"feature": "features",
|
|
}
|
|
|
|
# Default directories to exclude
|
|
DEFAULT_EXCLUDED_DIRS = {
|
|
"node_modules",
|
|
"venv",
|
|
"__pycache__",
|
|
".git",
|
|
".svn",
|
|
".hg",
|
|
"build",
|
|
"dist",
|
|
"target",
|
|
".pytest_cache",
|
|
".tox",
|
|
".mypy_cache",
|
|
"htmlcov",
|
|
"coverage",
|
|
".coverage",
|
|
".eggs",
|
|
"*.egg-info",
|
|
".idea",
|
|
".vscode",
|
|
".vs",
|
|
"__pypackages__",
|
|
}
|
|
|
|
|
|
def detect_language(file_path: Path) -> str:
|
|
"""
|
|
Detect programming language from file extension.
|
|
|
|
Args:
|
|
file_path: Path to source file
|
|
|
|
Returns:
|
|
Language name or 'Unknown'
|
|
"""
|
|
extension = file_path.suffix.lower()
|
|
return LANGUAGE_EXTENSIONS.get(extension, "Unknown")
|
|
|
|
|
|
def load_gitignore(directory: Path) -> pathspec.PathSpec | None:
|
|
"""
|
|
Load .gitignore file and create pathspec matcher.
|
|
|
|
Args:
|
|
directory: Root directory to search for .gitignore
|
|
|
|
Returns:
|
|
PathSpec object if .gitignore found, None otherwise
|
|
"""
|
|
if not PATHSPEC_AVAILABLE:
|
|
logger.warning("pathspec not installed - .gitignore support disabled")
|
|
logger.warning("Install with: pip install pathspec")
|
|
return None
|
|
|
|
gitignore_path = directory / ".gitignore"
|
|
if not gitignore_path.exists():
|
|
logger.debug(f"No .gitignore found in {directory}")
|
|
return None
|
|
|
|
try:
|
|
with open(gitignore_path, encoding="utf-8") as f:
|
|
spec = pathspec.PathSpec.from_lines("gitwildmatch", f)
|
|
logger.info(f"Loaded .gitignore from {gitignore_path}")
|
|
return spec
|
|
except Exception as e:
|
|
logger.warning(f"Failed to load .gitignore: {e}")
|
|
return None
|
|
|
|
|
|
def should_exclude_dir(dir_name: str, excluded_dirs: set) -> bool:
|
|
"""
|
|
Check if directory should be excluded from analysis.
|
|
|
|
Args:
|
|
dir_name: Directory name
|
|
excluded_dirs: Set of directory names to exclude
|
|
|
|
Returns:
|
|
True if directory should be excluded
|
|
"""
|
|
return dir_name in excluded_dirs
|
|
|
|
|
|
def walk_directory(
|
|
root: Path,
|
|
patterns: list[str] | None = None,
|
|
gitignore_spec: pathspec.PathSpec | None = None,
|
|
excluded_dirs: set | None = None,
|
|
) -> list[Path]:
|
|
"""
|
|
Walk directory tree and collect source files.
|
|
|
|
Args:
|
|
root: Root directory to walk
|
|
patterns: Optional file patterns to include (e.g., ['*.py', '*.js'])
|
|
gitignore_spec: Optional PathSpec object for .gitignore rules
|
|
excluded_dirs: Set of directory names to exclude
|
|
|
|
Returns:
|
|
List of source file paths
|
|
"""
|
|
if excluded_dirs is None:
|
|
excluded_dirs = DEFAULT_EXCLUDED_DIRS
|
|
|
|
files = []
|
|
root = Path(root).resolve()
|
|
|
|
for dirpath, dirnames, filenames in os.walk(root):
|
|
current_dir = Path(dirpath)
|
|
|
|
# Filter out excluded directories (in-place modification)
|
|
dirnames[:] = [d for d in dirnames if not should_exclude_dir(d, excluded_dirs)]
|
|
|
|
for filename in filenames:
|
|
file_path = current_dir / filename
|
|
|
|
# Check .gitignore rules
|
|
if gitignore_spec:
|
|
try:
|
|
rel_path = file_path.relative_to(root)
|
|
if gitignore_spec.match_file(str(rel_path)):
|
|
logger.debug(f"Skipping (gitignore): {rel_path}")
|
|
continue
|
|
except ValueError:
|
|
# File is outside root, skip it
|
|
continue
|
|
|
|
# Check file extension
|
|
if file_path.suffix.lower() not in LANGUAGE_EXTENSIONS:
|
|
continue
|
|
|
|
# Check file patterns if provided
|
|
if patterns and not any(file_path.match(pattern) for pattern in patterns):
|
|
continue
|
|
|
|
files.append(file_path)
|
|
|
|
return sorted(files)
|
|
|
|
|
|
def walk_markdown_files(
|
|
root: Path,
|
|
gitignore_spec: pathspec.PathSpec | None = None,
|
|
excluded_dirs: set | None = None,
|
|
) -> list[Path]:
|
|
"""
|
|
Walk directory tree and collect markdown documentation files.
|
|
|
|
Args:
|
|
root: Root directory to walk
|
|
gitignore_spec: Optional PathSpec object for .gitignore rules
|
|
excluded_dirs: Set of directory names to exclude
|
|
|
|
Returns:
|
|
List of markdown file paths
|
|
"""
|
|
if excluded_dirs is None:
|
|
excluded_dirs = DEFAULT_EXCLUDED_DIRS
|
|
|
|
files = []
|
|
root = Path(root).resolve()
|
|
|
|
for dirpath, dirnames, filenames in os.walk(root):
|
|
current_dir = Path(dirpath)
|
|
|
|
# Filter out excluded directories (in-place modification)
|
|
dirnames[:] = [d for d in dirnames if not should_exclude_dir(d, excluded_dirs)]
|
|
|
|
for filename in filenames:
|
|
file_path = current_dir / filename
|
|
|
|
# Check .gitignore rules
|
|
if gitignore_spec:
|
|
try:
|
|
rel_path = file_path.relative_to(root)
|
|
if gitignore_spec.match_file(str(rel_path)):
|
|
logger.debug(f"Skipping (gitignore): {rel_path}")
|
|
continue
|
|
except ValueError:
|
|
continue
|
|
|
|
# Check if markdown file
|
|
if file_path.suffix.lower() not in MARKDOWN_EXTENSIONS:
|
|
continue
|
|
|
|
files.append(file_path)
|
|
|
|
return sorted(files)
|
|
|
|
|
|
def categorize_markdown_file(file_path: Path, root: Path) -> str:
|
|
"""
|
|
Categorize a markdown file based on its location and filename.
|
|
|
|
Args:
|
|
file_path: Path to the markdown file
|
|
root: Root directory of the project
|
|
|
|
Returns:
|
|
Category name (e.g., 'overview', 'guides', 'architecture')
|
|
"""
|
|
try:
|
|
rel_path = file_path.relative_to(root)
|
|
except ValueError:
|
|
return "other"
|
|
|
|
# Check root-level files by filename
|
|
if len(rel_path.parts) == 1:
|
|
filename_lower = file_path.stem.lower().replace("-", "_").replace(" ", "_")
|
|
for key, category in ROOT_DOC_CATEGORIES.items():
|
|
if key in filename_lower:
|
|
return category
|
|
return "overview" # Default for root .md files
|
|
|
|
# Check folder-based categorization
|
|
for part in rel_path.parts[:-1]: # Exclude filename
|
|
part_lower = part.lower().replace("-", "_").replace(" ", "_")
|
|
for key, category in FOLDER_CATEGORIES.items():
|
|
if key in part_lower:
|
|
return category
|
|
|
|
# Default category
|
|
return "other"
|
|
|
|
|
|
def extract_markdown_structure(content: str) -> dict[str, Any]:
|
|
"""
|
|
Extract structure from markdown content (headers, code blocks, links).
|
|
|
|
Args:
|
|
content: Markdown file content
|
|
|
|
Returns:
|
|
Dictionary with extracted structure
|
|
"""
|
|
import re
|
|
|
|
structure = {
|
|
"title": None,
|
|
"headers": [],
|
|
"code_blocks": [],
|
|
"links": [],
|
|
"word_count": len(content.split()),
|
|
"line_count": len(content.split("\n")),
|
|
}
|
|
|
|
lines = content.split("\n")
|
|
|
|
# Extract headers
|
|
for i, line in enumerate(lines):
|
|
header_match = re.match(r"^(#{1,6})\s+(.+)$", line)
|
|
if header_match:
|
|
level = len(header_match.group(1))
|
|
text = header_match.group(2).strip()
|
|
structure["headers"].append(
|
|
{
|
|
"level": level,
|
|
"text": text,
|
|
"line": i + 1,
|
|
}
|
|
)
|
|
# First h1 is the title
|
|
if level == 1 and structure["title"] is None:
|
|
structure["title"] = text
|
|
|
|
# Extract code blocks (fenced)
|
|
code_block_pattern = re.compile(r"```(\w*)\n(.*?)```", re.DOTALL)
|
|
for match in code_block_pattern.finditer(content):
|
|
language = match.group(1) or "text"
|
|
code = match.group(2).strip()
|
|
if len(code) > 0:
|
|
structure["code_blocks"].append(
|
|
{
|
|
"language": language,
|
|
"code": code[:500], # Truncate long code blocks
|
|
"full_length": len(code),
|
|
}
|
|
)
|
|
|
|
# Extract links
|
|
link_pattern = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
|
|
for match in link_pattern.finditer(content):
|
|
structure["links"].append(
|
|
{
|
|
"text": match.group(1),
|
|
"url": match.group(2),
|
|
}
|
|
)
|
|
|
|
return structure
|
|
|
|
|
|
def generate_markdown_summary(
|
|
content: str, structure: dict[str, Any], max_length: int = 500
|
|
) -> str:
|
|
"""
|
|
Generate a summary of markdown content.
|
|
|
|
Args:
|
|
content: Full markdown content
|
|
structure: Extracted structure from extract_markdown_structure()
|
|
max_length: Maximum summary length
|
|
|
|
Returns:
|
|
Summary string
|
|
"""
|
|
# Start with title if available
|
|
summary_parts = []
|
|
|
|
if structure.get("title"):
|
|
summary_parts.append(f"**{structure['title']}**")
|
|
|
|
# Add header outline (first 5 h2/h3 headers)
|
|
h2_h3 = [h for h in structure.get("headers", []) if h["level"] in (2, 3)][:5]
|
|
if h2_h3:
|
|
sections = [h["text"] for h in h2_h3]
|
|
summary_parts.append(f"Sections: {', '.join(sections)}")
|
|
|
|
# Extract first paragraph (skip headers and empty lines)
|
|
lines = content.split("\n")
|
|
first_para = []
|
|
in_para = False
|
|
for line in lines:
|
|
stripped = line.strip()
|
|
if stripped.startswith("#") or stripped.startswith("```"):
|
|
if in_para:
|
|
break
|
|
continue
|
|
if stripped:
|
|
in_para = True
|
|
first_para.append(stripped)
|
|
elif in_para:
|
|
break
|
|
|
|
if first_para:
|
|
para_text = " ".join(first_para)
|
|
if len(para_text) > 200:
|
|
para_text = para_text[:200] + "..."
|
|
summary_parts.append(para_text)
|
|
|
|
# Add stats
|
|
stats = f"({structure.get('word_count', 0)} words, {len(structure.get('code_blocks', []))} code blocks)"
|
|
summary_parts.append(stats)
|
|
|
|
summary = "\n".join(summary_parts)
|
|
if len(summary) > max_length:
|
|
summary = summary[:max_length] + "..."
|
|
|
|
return summary
|
|
|
|
|
|
def process_markdown_docs(
|
|
directory: Path,
|
|
output_dir: Path,
|
|
depth: str = "deep",
|
|
gitignore_spec: pathspec.PathSpec | None = None,
|
|
enhance_with_ai: bool = False,
|
|
ai_mode: str = "none",
|
|
) -> dict[str, Any]:
|
|
"""
|
|
Process all markdown documentation files in a directory.
|
|
|
|
Args:
|
|
directory: Root directory to scan
|
|
output_dir: Output directory for processed docs
|
|
depth: Processing depth ('surface', 'deep', 'full')
|
|
gitignore_spec: Optional .gitignore spec
|
|
enhance_with_ai: Whether to use AI enhancement
|
|
ai_mode: AI mode ('none', 'auto', 'api', 'local')
|
|
|
|
Returns:
|
|
Dictionary with processed documentation data
|
|
"""
|
|
logger.info("Scanning for markdown documentation...")
|
|
|
|
# Find all markdown files
|
|
md_files = walk_markdown_files(directory, gitignore_spec)
|
|
logger.info(f"Found {len(md_files)} markdown files")
|
|
|
|
if not md_files:
|
|
return {"files": [], "categories": {}, "total_files": 0}
|
|
|
|
# Process each file
|
|
processed_docs = []
|
|
categories = {}
|
|
|
|
for md_path in md_files:
|
|
try:
|
|
content = md_path.read_text(encoding="utf-8", errors="ignore")
|
|
rel_path = str(md_path.relative_to(directory))
|
|
category = categorize_markdown_file(md_path, directory)
|
|
|
|
doc_data = {
|
|
"path": rel_path,
|
|
"filename": md_path.name,
|
|
"category": category,
|
|
"size_bytes": len(content.encode("utf-8")),
|
|
}
|
|
|
|
# Surface depth: just path and category
|
|
if depth == "surface":
|
|
processed_docs.append(doc_data)
|
|
else:
|
|
# Deep/Full: extract structure and summary
|
|
structure = extract_markdown_structure(content)
|
|
summary = generate_markdown_summary(content, structure)
|
|
|
|
doc_data.update(
|
|
{
|
|
"title": structure.get("title") or md_path.stem,
|
|
"structure": structure,
|
|
"summary": summary,
|
|
"content": content if depth == "full" else None,
|
|
}
|
|
)
|
|
processed_docs.append(doc_data)
|
|
|
|
# Track categories
|
|
if category not in categories:
|
|
categories[category] = []
|
|
categories[category].append(rel_path)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Failed to process {md_path}: {e}")
|
|
continue
|
|
|
|
# AI Enhancement (if enabled and enhance_level >= 2)
|
|
if enhance_with_ai and ai_mode != "none" and processed_docs:
|
|
logger.info("🤖 Enhancing documentation analysis with AI...")
|
|
try:
|
|
processed_docs = _enhance_docs_with_ai(processed_docs, ai_mode)
|
|
logger.info("✅ AI documentation enhancement complete")
|
|
except Exception as e:
|
|
logger.warning(f"⚠️ AI enhancement failed: {e}")
|
|
|
|
# Save processed docs to output
|
|
docs_output_dir = output_dir / "documentation"
|
|
docs_output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Copy files organized by category
|
|
for doc in processed_docs:
|
|
try:
|
|
src_path = directory / doc["path"]
|
|
category = doc["category"]
|
|
category_dir = docs_output_dir / category
|
|
category_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Copy file to category folder
|
|
dest_path = category_dir / doc["filename"]
|
|
import shutil
|
|
|
|
shutil.copy2(src_path, dest_path)
|
|
except Exception as e:
|
|
logger.debug(f"Failed to copy {doc['path']}: {e}")
|
|
|
|
# Save documentation index
|
|
index_data = {
|
|
"total_files": len(processed_docs),
|
|
"categories": categories,
|
|
"files": processed_docs,
|
|
}
|
|
|
|
index_json = docs_output_dir / "documentation_index.json"
|
|
with open(index_json, "w", encoding="utf-8") as f:
|
|
json.dump(index_data, f, indent=2, default=str)
|
|
|
|
logger.info(
|
|
f"✅ Processed {len(processed_docs)} documentation files in {len(categories)} categories"
|
|
)
|
|
logger.info(f"📁 Saved to: {docs_output_dir}")
|
|
|
|
return index_data
|
|
|
|
|
|
def _enhance_docs_with_ai(docs: list[dict], ai_mode: str) -> list[dict]:
|
|
"""
|
|
Enhance documentation analysis with AI.
|
|
|
|
Args:
|
|
docs: List of processed document dictionaries
|
|
ai_mode: AI mode ('api' or 'local')
|
|
|
|
Returns:
|
|
Enhanced document list
|
|
"""
|
|
# Try API mode first
|
|
if ai_mode in ("api", "auto"):
|
|
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
|
if api_key:
|
|
return _enhance_docs_api(docs, api_key)
|
|
|
|
# Fall back to LOCAL mode
|
|
if ai_mode in ("local", "auto"):
|
|
return _enhance_docs_local(docs)
|
|
|
|
return docs
|
|
|
|
|
|
def _enhance_docs_api(docs: list[dict], api_key: str) -> list[dict]:
|
|
"""Enhance docs using Claude API."""
|
|
try:
|
|
import anthropic
|
|
|
|
client = anthropic.Anthropic(api_key=api_key)
|
|
|
|
# Batch documents for efficiency
|
|
batch_size = 10
|
|
for i in range(0, len(docs), batch_size):
|
|
batch = docs[i : i + batch_size]
|
|
|
|
# Create prompt for batch
|
|
docs_text = "\n\n".join(
|
|
[
|
|
f"## {d.get('title', d['filename'])}\nCategory: {d['category']}\nSummary: {d.get('summary', 'N/A')}"
|
|
for d in batch
|
|
if d.get("summary")
|
|
]
|
|
)
|
|
|
|
if not docs_text:
|
|
continue
|
|
|
|
prompt = f"""Analyze these documentation files and provide:
|
|
1. A brief description of what each document covers
|
|
2. Key topics/concepts mentioned
|
|
3. How they relate to each other
|
|
|
|
Documents:
|
|
{docs_text}
|
|
|
|
Return JSON with format:
|
|
{{"enhancements": [{{"filename": "...", "description": "...", "key_topics": [...], "related_to": [...]}}]}}"""
|
|
|
|
response = client.messages.create(
|
|
model="claude-sonnet-4-20250514",
|
|
max_tokens=2000,
|
|
messages=[{"role": "user", "content": prompt}],
|
|
)
|
|
|
|
# Parse response and merge enhancements
|
|
try:
|
|
import re
|
|
|
|
json_match = re.search(r"\{.*\}", response.content[0].text, re.DOTALL)
|
|
if json_match:
|
|
enhancements = json.loads(json_match.group())
|
|
for enh in enhancements.get("enhancements", []):
|
|
for doc in batch:
|
|
if doc["filename"] == enh.get("filename"):
|
|
doc["ai_description"] = enh.get("description")
|
|
doc["ai_topics"] = enh.get("key_topics", [])
|
|
doc["ai_related"] = enh.get("related_to", [])
|
|
except Exception:
|
|
pass
|
|
|
|
except Exception as e:
|
|
logger.warning(f"API enhancement failed: {e}")
|
|
|
|
return docs
|
|
|
|
|
|
def _enhance_docs_local(docs: list[dict]) -> list[dict]:
|
|
"""Enhance docs using Claude Code CLI (LOCAL mode)."""
|
|
import subprocess
|
|
import tempfile
|
|
|
|
# Prepare batch of docs for enhancement
|
|
docs_with_summary = [d for d in docs if d.get("summary")]
|
|
if not docs_with_summary:
|
|
return docs
|
|
|
|
docs_text = "\n\n".join(
|
|
[
|
|
f"## {d.get('title', d['filename'])}\nCategory: {d['category']}\nPath: {d['path']}\nSummary: {d.get('summary', 'N/A')}"
|
|
for d in docs_with_summary[:20] # Limit to 20 docs
|
|
]
|
|
)
|
|
|
|
prompt = f"""Analyze these documentation files from a codebase and provide insights.
|
|
|
|
For each document, provide:
|
|
1. A brief description of what it covers
|
|
2. Key topics/concepts
|
|
3. Related documents
|
|
|
|
Documents:
|
|
{docs_text}
|
|
|
|
Output JSON only:
|
|
{{"enhancements": [{{"filename": "...", "description": "...", "key_topics": ["..."], "related_to": ["..."]}}]}}"""
|
|
|
|
try:
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
|
|
f.write(prompt)
|
|
prompt_file = f.name
|
|
|
|
result = subprocess.run(
|
|
["claude", "--dangerously-skip-permissions", "-p", prompt],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=120,
|
|
)
|
|
|
|
os.unlink(prompt_file)
|
|
|
|
if result.returncode == 0 and result.stdout:
|
|
import re
|
|
|
|
json_match = re.search(r"\{.*\}", result.stdout, re.DOTALL)
|
|
if json_match:
|
|
enhancements = json.loads(json_match.group())
|
|
for enh in enhancements.get("enhancements", []):
|
|
for doc in docs:
|
|
if doc["filename"] == enh.get("filename"):
|
|
doc["ai_description"] = enh.get("description")
|
|
doc["ai_topics"] = enh.get("key_topics", [])
|
|
doc["ai_related"] = enh.get("related_to", [])
|
|
|
|
except Exception as e:
|
|
logger.warning(f"LOCAL enhancement failed: {e}")
|
|
|
|
return docs
|
|
|
|
|
|
def analyze_codebase(
|
|
directory: Path,
|
|
output_dir: Path,
|
|
depth: str = "deep",
|
|
languages: list[str] | None = None,
|
|
file_patterns: list[str] | None = None,
|
|
build_api_reference: bool = True,
|
|
extract_comments: bool = True,
|
|
build_dependency_graph: bool = True,
|
|
detect_patterns: bool = True,
|
|
extract_test_examples: bool = True,
|
|
build_how_to_guides: bool = True,
|
|
extract_config_patterns: bool = True,
|
|
extract_docs: bool = True,
|
|
enhance_level: int = 0,
|
|
) -> dict[str, Any]:
|
|
"""
|
|
Analyze local codebase and extract code knowledge.
|
|
|
|
Args:
|
|
directory: Directory to analyze
|
|
output_dir: Output directory for results
|
|
depth: Analysis depth (surface, deep, full)
|
|
languages: Optional list of languages to analyze
|
|
file_patterns: Optional file patterns to include
|
|
build_api_reference: Generate API reference markdown
|
|
extract_comments: Extract inline comments
|
|
build_dependency_graph: Generate dependency graph and detect circular dependencies
|
|
detect_patterns: Detect design patterns (Singleton, Factory, Observer, etc.)
|
|
extract_test_examples: Extract usage examples from test files
|
|
build_how_to_guides: Build how-to guides from workflow examples (C3.3)
|
|
extract_config_patterns: Extract configuration patterns from config files (C3.4)
|
|
extract_docs: Extract and process markdown documentation files (default: True)
|
|
enhance_level: AI enhancement level (0=off, 1=SKILL.md only, 2=+config+arch+docs, 3=full)
|
|
|
|
Returns:
|
|
Analysis results dictionary
|
|
"""
|
|
# Determine AI enhancement settings based on level
|
|
# Level 0: No AI enhancement
|
|
# Level 1: SKILL.md only (handled in main.py)
|
|
# Level 2: Architecture + Config AI enhancement
|
|
# Level 3: Full AI enhancement (patterns, tests, config, architecture)
|
|
enhance_patterns = enhance_level >= 3
|
|
enhance_tests = enhance_level >= 3
|
|
enhance_config = enhance_level >= 2
|
|
enhance_architecture = enhance_level >= 2
|
|
ai_mode = "auto" if enhance_level > 0 else "none"
|
|
|
|
if enhance_level > 0:
|
|
level_names = {1: "SKILL.md only", 2: "SKILL.md+Architecture+Config", 3: "full"}
|
|
logger.info(
|
|
f"🤖 AI Enhancement Level: {enhance_level} ({level_names.get(enhance_level, 'unknown')})"
|
|
)
|
|
# Resolve directory to absolute path to avoid relative_to() errors
|
|
directory = Path(directory).resolve()
|
|
|
|
logger.info(f"Analyzing codebase: {directory}")
|
|
logger.info(f"Depth: {depth}")
|
|
|
|
# Create output directory
|
|
output_dir = Path(output_dir)
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Load .gitignore
|
|
gitignore_spec = load_gitignore(directory)
|
|
|
|
# Walk directory tree
|
|
logger.info("Scanning directory tree...")
|
|
files = walk_directory(directory, patterns=file_patterns, gitignore_spec=gitignore_spec)
|
|
|
|
logger.info(f"Found {len(files)} source files")
|
|
|
|
# Filter by language if specified
|
|
if languages:
|
|
language_set = set(languages)
|
|
files = [f for f in files if detect_language(f) in language_set]
|
|
logger.info(f"Filtered to {len(files)} files for languages: {', '.join(languages)}")
|
|
|
|
# Initialize code analyzer
|
|
analyzer = CodeAnalyzer(depth=depth)
|
|
|
|
# Analyze each file
|
|
results = {"files": []}
|
|
analyzed_count = 0
|
|
|
|
for file_path in files:
|
|
try:
|
|
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
language = detect_language(file_path)
|
|
|
|
if language == "Unknown":
|
|
continue
|
|
|
|
# Analyze file
|
|
analysis = analyzer.analyze_file(str(file_path), content, language)
|
|
|
|
# Only include files with actual analysis results
|
|
if analysis and (analysis.get("classes") or analysis.get("functions")):
|
|
results["files"].append(
|
|
{
|
|
"file": str(file_path.relative_to(directory)),
|
|
"language": language,
|
|
**analysis,
|
|
}
|
|
)
|
|
analyzed_count += 1
|
|
|
|
if analyzed_count % 10 == 0:
|
|
logger.info(f"Analyzed {analyzed_count}/{len(files)} files...")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error analyzing {file_path}: {e}")
|
|
continue
|
|
|
|
logger.info(f"✅ Successfully analyzed {analyzed_count} files")
|
|
|
|
# Save results
|
|
output_json = output_dir / "code_analysis.json"
|
|
with open(output_json, "w", encoding="utf-8") as f:
|
|
json.dump(results, f, indent=2)
|
|
|
|
logger.info(f"📁 Saved analysis to: {output_json}")
|
|
|
|
# Build API reference if requested
|
|
if build_api_reference and results["files"]:
|
|
logger.info("Building API reference documentation...")
|
|
builder = APIReferenceBuilder(results)
|
|
api_output_dir = output_dir / "api_reference"
|
|
generated_files = builder.build_reference(api_output_dir)
|
|
logger.info(f"✅ Generated {len(generated_files)} API reference files")
|
|
logger.info(f"📁 API reference: {api_output_dir}")
|
|
|
|
# Build dependency graph if requested (C2.6)
|
|
if build_dependency_graph:
|
|
logger.info("Building dependency graph...")
|
|
dep_analyzer = DependencyAnalyzer()
|
|
|
|
# Analyze dependencies for all files
|
|
for file_path in files:
|
|
try:
|
|
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
language = detect_language(file_path)
|
|
|
|
if language != "Unknown":
|
|
# Use relative path from directory for better graph readability
|
|
rel_path = str(file_path.relative_to(directory))
|
|
dep_analyzer.analyze_file(rel_path, content, language)
|
|
except Exception as e:
|
|
logger.warning(f"Error analyzing dependencies for {file_path}: {e}")
|
|
continue
|
|
|
|
# Build the graph
|
|
graph = dep_analyzer.build_graph()
|
|
|
|
# Detect circular dependencies
|
|
cycles = dep_analyzer.detect_cycles()
|
|
if cycles:
|
|
logger.warning(f"⚠️ Found {len(cycles)} circular dependencies:")
|
|
for i, cycle in enumerate(cycles[:5], 1): # Show first 5
|
|
cycle_str = " → ".join(cycle) + f" → {cycle[0]}"
|
|
logger.warning(f" {i}. {cycle_str}")
|
|
if len(cycles) > 5:
|
|
logger.warning(f" ... and {len(cycles) - 5} more")
|
|
else:
|
|
logger.info("✅ No circular dependencies found")
|
|
|
|
# Save dependency graph data
|
|
dep_output_dir = output_dir / "dependencies"
|
|
dep_output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Export as JSON
|
|
dep_json = dep_output_dir / "dependency_graph.json"
|
|
with open(dep_json, "w", encoding="utf-8") as f:
|
|
json.dump(dep_analyzer.export_json(), f, indent=2)
|
|
logger.info(f"📁 Saved dependency graph: {dep_json}")
|
|
|
|
# Export as Mermaid diagram
|
|
mermaid_file = dep_output_dir / "dependency_graph.mmd"
|
|
mermaid_file.write_text(dep_analyzer.export_mermaid())
|
|
logger.info(f"📁 Saved Mermaid diagram: {mermaid_file}")
|
|
|
|
# Save statistics
|
|
stats = dep_analyzer.get_statistics()
|
|
stats_file = dep_output_dir / "statistics.json"
|
|
with open(stats_file, "w", encoding="utf-8") as f:
|
|
json.dump(stats, f, indent=2)
|
|
logger.info(
|
|
f"📊 Statistics: {stats['total_files']} files, "
|
|
f"{stats['total_dependencies']} dependencies, "
|
|
f"{stats['circular_dependencies']} cycles"
|
|
)
|
|
|
|
# Try to export as DOT (requires pydot)
|
|
try:
|
|
dot_file = dep_output_dir / "dependency_graph.dot"
|
|
dep_analyzer.export_dot(str(dot_file))
|
|
except Exception:
|
|
pass # pydot not installed, skip DOT export
|
|
|
|
# Detect design patterns if requested (C3.1)
|
|
if detect_patterns:
|
|
logger.info("Detecting design patterns...")
|
|
from skill_seekers.cli.pattern_recognizer import PatternRecognizer
|
|
|
|
pattern_recognizer = PatternRecognizer(depth=depth, enhance_with_ai=enhance_patterns)
|
|
pattern_results = []
|
|
|
|
for file_path in files:
|
|
try:
|
|
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
language = detect_language(file_path)
|
|
|
|
if language != "Unknown":
|
|
report = pattern_recognizer.analyze_file(str(file_path), content, language)
|
|
|
|
if report.patterns:
|
|
pattern_results.append(report.to_dict())
|
|
except Exception as e:
|
|
logger.warning(f"Pattern detection failed for {file_path}: {e}")
|
|
continue
|
|
|
|
# Save pattern results
|
|
if pattern_results:
|
|
pattern_output = output_dir / "patterns"
|
|
pattern_output.mkdir(parents=True, exist_ok=True)
|
|
|
|
pattern_json = pattern_output / "detected_patterns.json"
|
|
with open(pattern_json, "w", encoding="utf-8") as f:
|
|
json.dump(pattern_results, f, indent=2)
|
|
|
|
total_patterns = sum(len(r["patterns"]) for r in pattern_results)
|
|
logger.info(f"✅ Detected {total_patterns} patterns in {len(pattern_results)} files")
|
|
logger.info(f"📁 Saved to: {pattern_json}")
|
|
else:
|
|
logger.info("No design patterns detected")
|
|
|
|
# Extract test examples if requested (C3.2)
|
|
if extract_test_examples:
|
|
logger.info("Extracting usage examples from test files...")
|
|
from skill_seekers.cli.test_example_extractor import TestExampleExtractor
|
|
|
|
# Create extractor
|
|
test_extractor = TestExampleExtractor(
|
|
min_confidence=0.5,
|
|
max_per_file=10,
|
|
languages=languages,
|
|
enhance_with_ai=enhance_tests,
|
|
)
|
|
|
|
# Extract examples from directory
|
|
try:
|
|
example_report = test_extractor.extract_from_directory(directory, recursive=True)
|
|
|
|
if example_report.total_examples > 0:
|
|
# Save results
|
|
examples_output = output_dir / "test_examples"
|
|
examples_output.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Save as JSON
|
|
examples_json = examples_output / "test_examples.json"
|
|
with open(examples_json, "w", encoding="utf-8") as f:
|
|
json.dump(example_report.to_dict(), f, indent=2)
|
|
|
|
# Save as Markdown
|
|
examples_md = examples_output / "test_examples.md"
|
|
examples_md.write_text(example_report.to_markdown(), encoding="utf-8")
|
|
|
|
logger.info(
|
|
f"✅ Extracted {example_report.total_examples} test examples "
|
|
f"({example_report.high_value_count} high-value)"
|
|
)
|
|
logger.info(f"📁 Saved to: {examples_output}")
|
|
else:
|
|
logger.info("No test examples extracted")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Test example extraction failed: {e}")
|
|
example_report = None
|
|
|
|
# Build how-to guides from workflow examples (C3.3)
|
|
if build_how_to_guides and extract_test_examples:
|
|
logger.info("Building how-to guides from workflow examples...")
|
|
try:
|
|
from skill_seekers.cli.how_to_guide_builder import HowToGuideBuilder
|
|
|
|
# Create guide builder (uses same enhance level as test examples)
|
|
guide_builder = HowToGuideBuilder(enhance_with_ai=enhance_tests)
|
|
|
|
# Build guides from workflow examples
|
|
tutorials_dir = output_dir / "tutorials"
|
|
|
|
# Get workflow examples from the example_report if available
|
|
if (
|
|
"example_report" in locals()
|
|
and example_report
|
|
and example_report.total_examples > 0
|
|
):
|
|
# Convert example_report to list of dicts for processing
|
|
examples_list = example_report.to_dict().get("examples", [])
|
|
|
|
guide_collection = guide_builder.build_guides_from_examples(
|
|
examples_list,
|
|
grouping_strategy="ai-tutorial-group",
|
|
output_dir=tutorials_dir,
|
|
enhance_with_ai=enhance_tests,
|
|
ai_mode=ai_mode,
|
|
)
|
|
|
|
if guide_collection and guide_collection.total_guides > 0:
|
|
# Save collection summary
|
|
collection_json = tutorials_dir / "guide_collection.json"
|
|
with open(collection_json, "w", encoding="utf-8") as f:
|
|
json.dump(guide_collection.to_dict(), f, indent=2)
|
|
|
|
logger.info(f"✅ Built {guide_collection.total_guides} how-to guides")
|
|
logger.info(f"📁 Saved to: {tutorials_dir}")
|
|
else:
|
|
logger.info("No how-to guides generated (insufficient workflow examples)")
|
|
else:
|
|
logger.info("No workflow examples available for guide generation")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"How-to guide building failed: {e}")
|
|
|
|
# Extract configuration patterns (C3.4)
|
|
if extract_config_patterns:
|
|
logger.info("Extracting configuration patterns...")
|
|
try:
|
|
config_extractor = ConfigExtractor()
|
|
|
|
# Extract config patterns from directory
|
|
extraction_result = config_extractor.extract_from_directory(directory)
|
|
|
|
if extraction_result.config_files:
|
|
# Convert to dict for enhancement
|
|
result_dict = config_extractor.to_dict(extraction_result)
|
|
|
|
# AI Enhancement (if enabled - level 2+)
|
|
if enhance_config and ai_mode != "none":
|
|
try:
|
|
from skill_seekers.cli.config_enhancer import ConfigEnhancer
|
|
|
|
logger.info(f"🤖 Enhancing config analysis with AI (mode: {ai_mode})...")
|
|
enhancer = ConfigEnhancer(mode=ai_mode)
|
|
result_dict = enhancer.enhance_config_result(result_dict)
|
|
logger.info("✅ AI enhancement complete")
|
|
except Exception as e:
|
|
logger.warning(f"⚠️ Config AI enhancement failed: {e}")
|
|
|
|
# Save results
|
|
config_output = output_dir / "config_patterns"
|
|
config_output.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Save as JSON
|
|
config_json = config_output / "config_patterns.json"
|
|
with open(config_json, "w", encoding="utf-8") as f:
|
|
json.dump(result_dict, f, indent=2)
|
|
|
|
# Save as Markdown (basic - AI enhancements in JSON only for now)
|
|
config_md = config_output / "config_patterns.md"
|
|
config_md.write_text(extraction_result.to_markdown(), encoding="utf-8")
|
|
|
|
# Count total settings across all files
|
|
total_settings = sum(len(cf.settings) for cf in extraction_result.config_files)
|
|
total_patterns = sum(len(cf.patterns) for cf in extraction_result.config_files)
|
|
|
|
logger.info(
|
|
f"✅ Extracted {len(extraction_result.config_files)} config files "
|
|
f"with {total_settings} settings and {total_patterns} detected patterns"
|
|
)
|
|
|
|
if "ai_enhancements" in result_dict:
|
|
insights = result_dict["ai_enhancements"].get("overall_insights", {})
|
|
if insights.get("security_issues_found"):
|
|
logger.info(
|
|
f"🔐 Security issues found: {insights['security_issues_found']}"
|
|
)
|
|
|
|
logger.info(f"📁 Saved to: {config_output}")
|
|
else:
|
|
logger.info("No configuration files found")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Config pattern extraction failed: {e}")
|
|
|
|
# Detect architectural patterns (C3.7)
|
|
# Always run this - it provides high-level overview
|
|
logger.info("Analyzing architectural patterns...")
|
|
from skill_seekers.cli.architectural_pattern_detector import ArchitecturalPatternDetector
|
|
|
|
arch_detector = ArchitecturalPatternDetector(enhance_with_ai=enhance_architecture)
|
|
arch_report = arch_detector.analyze(directory, results["files"])
|
|
|
|
if arch_report.patterns:
|
|
arch_output = output_dir / "architecture"
|
|
arch_output.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Save as JSON
|
|
arch_json = arch_output / "architectural_patterns.json"
|
|
with open(arch_json, "w", encoding="utf-8") as f:
|
|
json.dump(arch_report.to_dict(), f, indent=2)
|
|
|
|
logger.info(f"🏗️ Detected {len(arch_report.patterns)} architectural patterns")
|
|
for pattern in arch_report.patterns:
|
|
logger.info(f" - {pattern.pattern_name} (confidence: {pattern.confidence:.2f})")
|
|
logger.info(f"📁 Saved to: {arch_json}")
|
|
else:
|
|
logger.info("No clear architectural patterns detected")
|
|
|
|
# Extract markdown documentation (C3.9)
|
|
docs_data = None
|
|
if extract_docs:
|
|
logger.info("Extracting project documentation...")
|
|
try:
|
|
# Determine AI enhancement for docs (level 2+)
|
|
enhance_docs_ai = enhance_level >= 2
|
|
docs_data = process_markdown_docs(
|
|
directory=directory,
|
|
output_dir=output_dir,
|
|
depth=depth,
|
|
gitignore_spec=gitignore_spec,
|
|
enhance_with_ai=enhance_docs_ai,
|
|
ai_mode=ai_mode,
|
|
)
|
|
|
|
if docs_data and docs_data.get("total_files", 0) > 0:
|
|
logger.info(
|
|
f"✅ Extracted {docs_data['total_files']} documentation files "
|
|
f"in {len(docs_data.get('categories', {}))} categories"
|
|
)
|
|
else:
|
|
logger.info("No markdown documentation files found")
|
|
except Exception as e:
|
|
logger.warning(f"Documentation extraction failed: {e}")
|
|
docs_data = None
|
|
|
|
# Generate SKILL.md and references/ directory
|
|
logger.info("Generating SKILL.md and references...")
|
|
_generate_skill_md(
|
|
output_dir=output_dir,
|
|
directory=directory,
|
|
results=results,
|
|
depth=depth,
|
|
build_api_reference=build_api_reference,
|
|
build_dependency_graph=build_dependency_graph,
|
|
detect_patterns=detect_patterns,
|
|
extract_test_examples=extract_test_examples,
|
|
extract_config_patterns=extract_config_patterns,
|
|
extract_docs=extract_docs,
|
|
docs_data=docs_data,
|
|
)
|
|
|
|
return results
|
|
|
|
|
|
def _generate_skill_md(
|
|
output_dir: Path,
|
|
directory: Path,
|
|
results: dict[str, Any],
|
|
depth: str,
|
|
build_api_reference: bool,
|
|
build_dependency_graph: bool,
|
|
detect_patterns: bool,
|
|
extract_test_examples: bool,
|
|
extract_config_patterns: bool,
|
|
extract_docs: bool = True,
|
|
docs_data: dict[str, Any] | None = None,
|
|
):
|
|
"""
|
|
Generate rich SKILL.md from codebase analysis results.
|
|
|
|
Creates a 300+ line skill file with:
|
|
- Front matter (name, description)
|
|
- Repository info (path, languages, file count)
|
|
- When to Use section
|
|
- Quick Reference (patterns, languages, stats)
|
|
- Code Examples (from test files)
|
|
- API Reference (from code analysis)
|
|
- Architecture Overview
|
|
- Configuration Patterns
|
|
- Available References
|
|
"""
|
|
repo_name = directory.name
|
|
|
|
# Generate skill name (lowercase, hyphens only, max 64 chars)
|
|
skill_name = repo_name.lower().replace("_", "-").replace(" ", "-")[:64]
|
|
|
|
# Generate description
|
|
description = f"Local codebase analysis for {repo_name}"
|
|
|
|
# Count files by language
|
|
language_stats = _get_language_stats(results.get("files", []))
|
|
total_files = len(results.get("files", []))
|
|
|
|
# Start building content
|
|
skill_content = f"""---
|
|
name: {skill_name}
|
|
description: {description}
|
|
---
|
|
|
|
# {repo_name} Codebase
|
|
|
|
## Description
|
|
|
|
Local codebase analysis and documentation generated from code analysis.
|
|
|
|
**Path:** `{directory}`
|
|
**Files Analyzed:** {total_files}
|
|
**Languages:** {", ".join(language_stats.keys())}
|
|
**Analysis Depth:** {depth}
|
|
|
|
## When to Use This Skill
|
|
|
|
Use this skill when you need to:
|
|
- Understand the codebase architecture and design patterns
|
|
- Find implementation examples and usage patterns
|
|
- Review API documentation extracted from code
|
|
- Check configuration patterns and best practices
|
|
- Explore test examples and real-world usage
|
|
- Navigate the codebase structure efficiently
|
|
|
|
## ⚡ Quick Reference
|
|
|
|
### Codebase Statistics
|
|
|
|
"""
|
|
|
|
# Language breakdown
|
|
skill_content += "**Languages:**\n"
|
|
for lang, count in sorted(language_stats.items(), key=lambda x: x[1], reverse=True):
|
|
percentage = (count / total_files * 100) if total_files > 0 else 0
|
|
skill_content += f"- **{lang}**: {count} files ({percentage:.1f}%)\n"
|
|
skill_content += "\n"
|
|
|
|
# Analysis features performed
|
|
skill_content += "**Analysis Performed:**\n"
|
|
if build_api_reference:
|
|
skill_content += "- ✅ API Reference (C2.5)\n"
|
|
if build_dependency_graph:
|
|
skill_content += "- ✅ Dependency Graph (C2.6)\n"
|
|
if detect_patterns:
|
|
skill_content += "- ✅ Design Patterns (C3.1)\n"
|
|
if extract_test_examples:
|
|
skill_content += "- ✅ Test Examples (C3.2)\n"
|
|
if extract_config_patterns:
|
|
skill_content += "- ✅ Configuration Patterns (C3.4)\n"
|
|
skill_content += "- ✅ Architectural Analysis (C3.7)\n"
|
|
if extract_docs:
|
|
skill_content += "- ✅ Project Documentation (C3.9)\n"
|
|
skill_content += "\n"
|
|
|
|
# Add design patterns if available
|
|
if detect_patterns:
|
|
patterns_content = _format_patterns_section(output_dir)
|
|
if patterns_content:
|
|
skill_content += patterns_content
|
|
|
|
# Add code examples if available
|
|
if extract_test_examples:
|
|
examples_content = _format_examples_section(output_dir)
|
|
if examples_content:
|
|
skill_content += examples_content
|
|
|
|
# Add API reference if available
|
|
if build_api_reference:
|
|
api_content = _format_api_section(output_dir)
|
|
if api_content:
|
|
skill_content += api_content
|
|
|
|
# Add architecture if available
|
|
arch_content = _format_architecture_section(output_dir)
|
|
if arch_content:
|
|
skill_content += arch_content
|
|
|
|
# Add configuration patterns if available
|
|
if extract_config_patterns:
|
|
config_content = _format_config_section(output_dir)
|
|
if config_content:
|
|
skill_content += config_content
|
|
|
|
# Add project documentation if available
|
|
if extract_docs and docs_data:
|
|
docs_content = _format_documentation_section(output_dir, docs_data)
|
|
if docs_content:
|
|
skill_content += docs_content
|
|
|
|
# Available references
|
|
skill_content += "## 📚 Available References\n\n"
|
|
skill_content += "This skill includes detailed reference documentation:\n\n"
|
|
|
|
refs_added = False
|
|
if build_api_reference and (output_dir / "api_reference").exists():
|
|
skill_content += (
|
|
"- **API Reference**: `references/api_reference/` - Complete API documentation\n"
|
|
)
|
|
refs_added = True
|
|
if build_dependency_graph and (output_dir / "dependencies").exists():
|
|
skill_content += (
|
|
"- **Dependencies**: `references/dependencies/` - Dependency graph and analysis\n"
|
|
)
|
|
refs_added = True
|
|
if detect_patterns and (output_dir / "patterns").exists():
|
|
skill_content += "- **Patterns**: `references/patterns/` - Detected design patterns\n"
|
|
refs_added = True
|
|
if extract_test_examples and (output_dir / "test_examples").exists():
|
|
skill_content += "- **Examples**: `references/test_examples/` - Usage examples from tests\n"
|
|
refs_added = True
|
|
if extract_config_patterns and (output_dir / "config_patterns").exists():
|
|
skill_content += (
|
|
"- **Configuration**: `references/config_patterns/` - Configuration patterns\n"
|
|
)
|
|
refs_added = True
|
|
if (output_dir / "architecture").exists():
|
|
skill_content += "- **Architecture**: `references/architecture/` - Architectural patterns\n"
|
|
refs_added = True
|
|
if extract_docs and (output_dir / "documentation").exists():
|
|
skill_content += (
|
|
"- **Documentation**: `references/documentation/` - Project documentation\n"
|
|
)
|
|
refs_added = True
|
|
|
|
if not refs_added:
|
|
skill_content += "No additional references generated (analysis features disabled).\n"
|
|
|
|
skill_content += "\n"
|
|
|
|
# Footer
|
|
skill_content += "---\n\n"
|
|
skill_content += "**Generated by Skill Seeker** | Codebase Analyzer with C3.x Analysis\n"
|
|
|
|
# Write SKILL.md
|
|
skill_path = output_dir / "SKILL.md"
|
|
skill_path.write_text(skill_content, encoding="utf-8")
|
|
|
|
line_count = len(skill_content.split("\n"))
|
|
logger.info(f"✅ Generated SKILL.md: {skill_path} ({line_count} lines)")
|
|
|
|
# Generate references/ directory structure
|
|
_generate_references(output_dir)
|
|
|
|
|
|
def _get_language_stats(files: list[dict]) -> dict[str, int]:
|
|
"""Count files by language from analysis results."""
|
|
stats = {}
|
|
for file_data in files:
|
|
# files is a list of dicts with 'language' key
|
|
lang = file_data.get("language", "Unknown")
|
|
if lang != "Unknown":
|
|
stats[lang] = stats.get(lang, 0) + 1
|
|
return stats
|
|
|
|
|
|
def _format_patterns_section(output_dir: Path) -> str:
|
|
"""Format design patterns section from patterns/detected_patterns.json."""
|
|
patterns_file = output_dir / "patterns" / "detected_patterns.json"
|
|
if not patterns_file.exists():
|
|
return ""
|
|
|
|
try:
|
|
with open(patterns_file, encoding="utf-8") as f:
|
|
patterns_data = json.load(f)
|
|
except Exception:
|
|
return ""
|
|
|
|
if not patterns_data:
|
|
return ""
|
|
|
|
# Count patterns by type (deduplicate by class, keep highest confidence)
|
|
pattern_counts = {}
|
|
by_class = {}
|
|
|
|
for pattern_file in patterns_data:
|
|
for pattern in pattern_file.get("patterns", []):
|
|
ptype = pattern.get("pattern_type", "Unknown")
|
|
cls = pattern.get("class_name", "")
|
|
confidence = pattern.get("confidence", 0)
|
|
|
|
# Skip low confidence
|
|
if confidence < 0.7:
|
|
continue
|
|
|
|
# Deduplicate by class
|
|
key = f"{cls}:{ptype}"
|
|
if key not in by_class or by_class[key]["confidence"] < confidence:
|
|
by_class[key] = pattern
|
|
|
|
# Count by type
|
|
pattern_counts[ptype] = pattern_counts.get(ptype, 0) + 1
|
|
|
|
if not pattern_counts:
|
|
return ""
|
|
|
|
content = "### 🎨 Design Patterns Detected\n\n"
|
|
content += "*From C3.1 codebase analysis (confidence > 0.7)*\n\n"
|
|
|
|
# Top 5 pattern types
|
|
for ptype, count in sorted(pattern_counts.items(), key=lambda x: x[1], reverse=True)[:5]:
|
|
content += f"- **{ptype}**: {count} instances\n"
|
|
|
|
content += f"\n*Total: {len(by_class)} high-confidence patterns*\n\n"
|
|
content += "*See `references/patterns/` for complete pattern analysis*\n\n"
|
|
return content
|
|
|
|
|
|
def _format_examples_section(output_dir: Path) -> str:
|
|
"""Format code examples section from test_examples/test_examples.json."""
|
|
examples_file = output_dir / "test_examples" / "test_examples.json"
|
|
if not examples_file.exists():
|
|
return ""
|
|
|
|
try:
|
|
with open(examples_file, encoding="utf-8") as f:
|
|
examples_data = json.load(f)
|
|
except Exception:
|
|
return ""
|
|
|
|
examples = examples_data.get("examples", [])
|
|
if not examples:
|
|
return ""
|
|
|
|
# Filter high-value examples (complexity > 0.7)
|
|
high_value = [ex for ex in examples if ex.get("complexity_score", 0) > 0.7]
|
|
|
|
if not high_value:
|
|
# If no high complexity, take any examples
|
|
high_value = examples[:10]
|
|
|
|
if not high_value:
|
|
return ""
|
|
|
|
content = "## 📝 Code Examples\n\n"
|
|
content += "*High-quality examples extracted from test files (C3.2)*\n\n"
|
|
|
|
# Top 10 examples
|
|
for ex in sorted(high_value, key=lambda x: x.get("complexity_score", 0), reverse=True)[:10]:
|
|
desc = ex.get("description", "Example")
|
|
lang = ex.get("language", "python").lower()
|
|
code = ex.get("code", "")
|
|
complexity = ex.get("complexity_score", 0)
|
|
|
|
content += f"**{desc}** (complexity: {complexity:.2f})\n\n"
|
|
content += f"```{lang}\n{code}\n```\n\n"
|
|
|
|
content += "*See `references/test_examples/` for all extracted examples*\n\n"
|
|
return content
|
|
|
|
|
|
def _format_api_section(output_dir: Path) -> str:
|
|
"""Format API reference section."""
|
|
api_dir = output_dir / "api_reference"
|
|
if not api_dir.exists():
|
|
return ""
|
|
|
|
api_md = api_dir / "api_reference.md"
|
|
if not api_md.exists():
|
|
return ""
|
|
|
|
try:
|
|
api_content = api_md.read_text(encoding="utf-8")
|
|
except Exception:
|
|
return ""
|
|
|
|
# Extract first section (up to 500 chars)
|
|
preview = api_content[:500]
|
|
if len(api_content) > 500:
|
|
preview += "..."
|
|
|
|
content = "## 🔧 API Reference\n\n"
|
|
content += "*Extracted from codebase analysis (C2.5)*\n\n"
|
|
content += preview + "\n\n"
|
|
content += "*See `references/api_reference/` for complete API documentation*\n\n"
|
|
return content
|
|
|
|
|
|
def _format_architecture_section(output_dir: Path) -> str:
|
|
"""Format architecture section from architecture/architectural_patterns.json."""
|
|
arch_file = output_dir / "architecture" / "architectural_patterns.json"
|
|
if not arch_file.exists():
|
|
return ""
|
|
|
|
try:
|
|
with open(arch_file, encoding="utf-8") as f:
|
|
arch_data = json.load(f)
|
|
except Exception:
|
|
return ""
|
|
|
|
patterns = arch_data.get("patterns", [])
|
|
if not patterns:
|
|
return ""
|
|
|
|
content = "## 🏗️ Architecture Overview\n\n"
|
|
content += "*From C3.7 architectural analysis*\n\n"
|
|
|
|
content += "**Detected Architectural Patterns:**\n\n"
|
|
for pattern in patterns[:5]:
|
|
name = pattern.get("pattern_name", "Unknown")
|
|
confidence = pattern.get("confidence", 0)
|
|
indicators = pattern.get("indicators", [])
|
|
|
|
content += f"- **{name}** (confidence: {confidence:.2f})\n"
|
|
if indicators:
|
|
content += f" - Indicators: {', '.join(indicators[:3])}\n"
|
|
|
|
content += f"\n*Total: {len(patterns)} architectural patterns detected*\n\n"
|
|
content += "*See `references/architecture/` for complete architectural analysis*\n\n"
|
|
return content
|
|
|
|
|
|
def _format_config_section(output_dir: Path) -> str:
|
|
"""Format configuration patterns section."""
|
|
config_file = output_dir / "config_patterns" / "config_patterns.json"
|
|
if not config_file.exists():
|
|
return ""
|
|
|
|
try:
|
|
with open(config_file, encoding="utf-8") as f:
|
|
config_data = json.load(f)
|
|
except Exception:
|
|
return ""
|
|
|
|
config_files = config_data.get("config_files", [])
|
|
if not config_files:
|
|
return ""
|
|
|
|
total_settings = sum(len(cf.get("settings", [])) for cf in config_files)
|
|
total_patterns = sum(len(cf.get("patterns", [])) for cf in config_files)
|
|
|
|
content = "## ⚙️ Configuration Patterns\n\n"
|
|
content += "*From C3.4 configuration analysis*\n\n"
|
|
content += f"**Configuration Files Analyzed:** {len(config_files)}\n"
|
|
content += f"**Total Settings:** {total_settings}\n"
|
|
content += f"**Patterns Detected:** {total_patterns}\n\n"
|
|
|
|
# List config file types found
|
|
file_types = {}
|
|
for cf in config_files:
|
|
ctype = cf.get("config_type", "unknown")
|
|
file_types[ctype] = file_types.get(ctype, 0) + 1
|
|
|
|
if file_types:
|
|
content += "**Configuration Types:**\n"
|
|
for ctype, count in sorted(file_types.items(), key=lambda x: x[1], reverse=True):
|
|
content += f"- {ctype}: {count} files\n"
|
|
content += "\n"
|
|
|
|
content += "*See `references/config_patterns/` for detailed configuration analysis*\n\n"
|
|
return content
|
|
|
|
|
|
def _format_documentation_section(_output_dir: Path, docs_data: dict[str, Any]) -> str:
|
|
"""Format project documentation section from extracted markdown files.
|
|
|
|
Note: output_dir parameter is unused but kept for consistency with other _format_* functions.
|
|
Documentation data is provided via docs_data parameter.
|
|
"""
|
|
if not docs_data or docs_data.get("total_files", 0) == 0:
|
|
return ""
|
|
|
|
categories = docs_data.get("categories", {})
|
|
files = docs_data.get("files", [])
|
|
|
|
content = "## 📖 Project Documentation\n\n"
|
|
content += "*Extracted from markdown files in the project (C3.9)*\n\n"
|
|
content += f"**Total Documentation Files:** {docs_data['total_files']}\n"
|
|
content += f"**Categories:** {len(categories)}\n\n"
|
|
|
|
# List documents by category (most important first)
|
|
priority_order = [
|
|
"overview",
|
|
"architecture",
|
|
"guides",
|
|
"workflows",
|
|
"features",
|
|
"api",
|
|
"examples",
|
|
]
|
|
|
|
# Sort categories by priority
|
|
sorted_categories = []
|
|
for cat in priority_order:
|
|
if cat in categories:
|
|
sorted_categories.append(cat)
|
|
for cat in sorted(categories.keys()):
|
|
if cat not in sorted_categories:
|
|
sorted_categories.append(cat)
|
|
|
|
for category in sorted_categories[:6]: # Limit to 6 categories in SKILL.md
|
|
cat_files = categories[category]
|
|
content += f"### {category.title()}\n\n"
|
|
|
|
# Get file details for this category
|
|
cat_docs = [f for f in files if f.get("category") == category]
|
|
|
|
for doc in cat_docs[:5]: # Limit to 5 docs per category
|
|
title = doc.get("title") or doc.get("filename", "Unknown")
|
|
path = doc.get("path", "")
|
|
|
|
# Add summary if available (deep/full depth)
|
|
if doc.get("ai_description"):
|
|
content += f"- **{title}**: {doc['ai_description']}\n"
|
|
elif doc.get("summary"):
|
|
# Extract first sentence from summary
|
|
summary = doc["summary"].split("\n")[0]
|
|
if len(summary) > 100:
|
|
summary = summary[:100] + "..."
|
|
content += f"- **{title}**: {summary}\n"
|
|
else:
|
|
content += f"- **{title}** (`{path}`)\n"
|
|
|
|
if len(cat_files) > 5:
|
|
content += f"- *...and {len(cat_files) - 5} more*\n"
|
|
|
|
content += "\n"
|
|
|
|
# AI-enhanced topics if available
|
|
all_topics = []
|
|
for doc in files:
|
|
all_topics.extend(doc.get("ai_topics", []))
|
|
|
|
if all_topics:
|
|
# Deduplicate and count
|
|
from collections import Counter
|
|
|
|
topic_counts = Counter(all_topics)
|
|
top_topics = [t for t, _ in topic_counts.most_common(10)]
|
|
content += f"**Key Topics:** {', '.join(top_topics)}\n\n"
|
|
|
|
content += "*See `references/documentation/` for all project documentation*\n\n"
|
|
return content
|
|
|
|
|
|
def _generate_references(output_dir: Path):
|
|
"""
|
|
Generate references/ directory structure by symlinking analysis output.
|
|
|
|
Creates a clean references/ directory that links to all analysis outputs.
|
|
"""
|
|
references_dir = output_dir / "references"
|
|
references_dir.mkdir(exist_ok=True)
|
|
|
|
# Map analysis directories to reference names
|
|
mappings = {
|
|
"api_reference": "api_reference",
|
|
"dependencies": "dependencies",
|
|
"patterns": "patterns",
|
|
"test_examples": "test_examples",
|
|
"tutorials": "tutorials",
|
|
"config_patterns": "config_patterns",
|
|
"architecture": "architecture",
|
|
"documentation": "documentation",
|
|
}
|
|
|
|
for source, target in mappings.items():
|
|
source_dir = output_dir / source
|
|
target_dir = references_dir / target
|
|
|
|
if source_dir.exists() and source_dir.is_dir():
|
|
# Copy directory to references/ (not symlink, for portability)
|
|
if target_dir.exists():
|
|
import shutil
|
|
|
|
shutil.rmtree(target_dir)
|
|
|
|
import shutil
|
|
|
|
shutil.copytree(source_dir, target_dir)
|
|
logger.debug(f"Copied {source} → references/{target}")
|
|
|
|
logger.info(f"✅ Generated references directory: {references_dir}")
|
|
|
|
|
|
def main():
|
|
"""Command-line interface for codebase analysis."""
|
|
parser = argparse.ArgumentParser(
|
|
description="Analyze local codebases and extract code knowledge",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# Analyze current directory
|
|
codebase-scraper --directory . --output output/codebase/
|
|
|
|
# Deep analysis with API reference and dependency graph
|
|
codebase-scraper --directory /path/to/repo --depth deep --build-api-reference --build-dependency-graph
|
|
|
|
# Analyze only Python and JavaScript
|
|
codebase-scraper --directory . --languages Python,JavaScript
|
|
|
|
# Use file patterns
|
|
codebase-scraper --directory . --file-patterns "*.py,src/**/*.js"
|
|
|
|
# Full analysis with all features (default)
|
|
codebase-scraper --directory . --depth deep
|
|
|
|
# Surface analysis (fast, skip all analysis features)
|
|
codebase-scraper --directory . --depth surface --skip-api-reference --skip-dependency-graph --skip-patterns --skip-test-examples
|
|
|
|
# Skip specific features
|
|
codebase-scraper --directory . --skip-patterns --skip-test-examples
|
|
""",
|
|
)
|
|
|
|
parser.add_argument("--directory", required=True, help="Directory to analyze")
|
|
parser.add_argument(
|
|
"--output", default="output/codebase/", help="Output directory (default: output/codebase/)"
|
|
)
|
|
parser.add_argument(
|
|
"--depth",
|
|
choices=["surface", "deep", "full"],
|
|
default="deep",
|
|
help=(
|
|
"Analysis depth: "
|
|
"surface (basic code structure, ~1-2 min), "
|
|
"deep (code + patterns + tests, ~5-10 min, DEFAULT), "
|
|
"full (everything + AI enhancement, ~20-60 min). "
|
|
"💡 TIP: Use --quick or --comprehensive presets instead for better UX!"
|
|
),
|
|
)
|
|
parser.add_argument(
|
|
"--languages", help="Comma-separated languages to analyze (e.g., Python,JavaScript,C++)"
|
|
)
|
|
parser.add_argument(
|
|
"--file-patterns", help="Comma-separated file patterns (e.g., *.py,src/**/*.js)"
|
|
)
|
|
parser.add_argument(
|
|
"--skip-api-reference",
|
|
action="store_true",
|
|
default=False,
|
|
help="Skip API reference markdown documentation generation (default: enabled)",
|
|
)
|
|
parser.add_argument(
|
|
"--skip-dependency-graph",
|
|
action="store_true",
|
|
default=False,
|
|
help="Skip dependency graph and circular dependency detection (default: enabled)",
|
|
)
|
|
parser.add_argument(
|
|
"--skip-patterns",
|
|
action="store_true",
|
|
default=False,
|
|
help="Skip design pattern detection (Singleton, Factory, Observer, etc.) (default: enabled)",
|
|
)
|
|
parser.add_argument(
|
|
"--skip-test-examples",
|
|
action="store_true",
|
|
default=False,
|
|
help="Skip test example extraction (instantiation, method calls, configs, etc.) (default: enabled)",
|
|
)
|
|
parser.add_argument(
|
|
"--skip-how-to-guides",
|
|
action="store_true",
|
|
default=False,
|
|
help="Skip how-to guide generation from workflow examples (default: enabled)",
|
|
)
|
|
parser.add_argument(
|
|
"--skip-config-patterns",
|
|
action="store_true",
|
|
default=False,
|
|
help="Skip configuration pattern extraction from config files (JSON, YAML, TOML, ENV, etc.) (default: enabled)",
|
|
)
|
|
parser.add_argument(
|
|
"--skip-docs",
|
|
action="store_true",
|
|
default=False,
|
|
help="Skip project documentation extraction from markdown files (README, docs/, etc.) (default: enabled)",
|
|
)
|
|
parser.add_argument(
|
|
"--ai-mode",
|
|
choices=["auto", "api", "local", "none"],
|
|
default="auto",
|
|
help=(
|
|
"AI enhancement mode for how-to guides: "
|
|
"auto (auto-detect: API if ANTHROPIC_API_KEY set, else LOCAL), "
|
|
"api (Claude API, requires ANTHROPIC_API_KEY), "
|
|
"local (Claude Code Max, FREE, no API key), "
|
|
"none (disable AI enhancement). "
|
|
"💡 TIP: Use --enhance flag instead for simpler UX!"
|
|
),
|
|
)
|
|
parser.add_argument("--no-comments", action="store_true", help="Skip comment extraction")
|
|
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
|
parser.add_argument(
|
|
"--enhance-level",
|
|
type=int,
|
|
choices=[0, 1, 2, 3],
|
|
default=0,
|
|
help=(
|
|
"AI enhancement level: "
|
|
"0=off (default), "
|
|
"1=SKILL.md only, "
|
|
"2=SKILL.md+Architecture+Config, "
|
|
"3=full (patterns, tests, config, architecture, SKILL.md)"
|
|
),
|
|
)
|
|
|
|
# Check for deprecated flags
|
|
deprecated_flags = {
|
|
"--build-api-reference": "--skip-api-reference",
|
|
"--build-dependency-graph": "--skip-dependency-graph",
|
|
"--detect-patterns": "--skip-patterns",
|
|
"--extract-test-examples": "--skip-test-examples",
|
|
"--build-how-to-guides": "--skip-how-to-guides",
|
|
"--extract-config-patterns": "--skip-config-patterns",
|
|
}
|
|
|
|
for old_flag, new_flag in deprecated_flags.items():
|
|
if old_flag in sys.argv:
|
|
logger.warning(
|
|
f"⚠️ DEPRECATED: {old_flag} is deprecated. "
|
|
f"All features are now enabled by default. "
|
|
f"Use {new_flag} to disable this feature."
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Handle presets (Phase 1 feature - NEW)
|
|
if (
|
|
hasattr(args, "quick")
|
|
and args.quick
|
|
and hasattr(args, "comprehensive")
|
|
and args.comprehensive
|
|
):
|
|
logger.error("❌ Cannot use --quick and --comprehensive together. Choose one.")
|
|
return 1
|
|
|
|
if hasattr(args, "quick") and args.quick:
|
|
# Override depth and disable advanced features
|
|
args.depth = "surface"
|
|
args.skip_patterns = True
|
|
args.skip_test_examples = True
|
|
args.skip_how_to_guides = True
|
|
args.skip_config_patterns = True
|
|
args.ai_mode = "none"
|
|
logger.info("⚡ Quick analysis mode: surface depth, basic features only (~1-2 min)")
|
|
|
|
if hasattr(args, "comprehensive") and args.comprehensive:
|
|
# Override depth and enable all features
|
|
args.depth = "full"
|
|
args.skip_patterns = False
|
|
args.skip_test_examples = False
|
|
args.skip_how_to_guides = False
|
|
args.skip_config_patterns = False
|
|
args.ai_mode = "auto"
|
|
logger.info("🚀 Comprehensive analysis mode: all features + AI enhancement (~20-60 min)")
|
|
|
|
# Set logging level
|
|
if args.verbose:
|
|
logging.getLogger().setLevel(logging.DEBUG)
|
|
|
|
# Validate directory
|
|
directory = Path(args.directory)
|
|
if not directory.exists():
|
|
logger.error(f"Directory not found: {directory}")
|
|
return 1
|
|
|
|
if not directory.is_dir():
|
|
logger.error(f"Not a directory: {directory}")
|
|
return 1
|
|
|
|
# Parse languages
|
|
languages = None
|
|
if args.languages:
|
|
languages = [lang.strip() for lang in args.languages.split(",")]
|
|
|
|
# Parse file patterns
|
|
file_patterns = None
|
|
if args.file_patterns:
|
|
file_patterns = [p.strip() for p in args.file_patterns.split(",")]
|
|
|
|
# Analyze codebase
|
|
try:
|
|
results = analyze_codebase(
|
|
directory=directory,
|
|
output_dir=Path(args.output),
|
|
depth=args.depth,
|
|
languages=languages,
|
|
file_patterns=file_patterns,
|
|
build_api_reference=not args.skip_api_reference,
|
|
extract_comments=not args.no_comments,
|
|
build_dependency_graph=not args.skip_dependency_graph,
|
|
detect_patterns=not args.skip_patterns,
|
|
extract_test_examples=not args.skip_test_examples,
|
|
build_how_to_guides=not args.skip_how_to_guides,
|
|
extract_config_patterns=not args.skip_config_patterns,
|
|
extract_docs=not args.skip_docs,
|
|
enhance_level=args.enhance_level, # AI enhancement level (0-3)
|
|
)
|
|
|
|
# Print summary
|
|
print(f"\n{'=' * 60}")
|
|
print("CODEBASE ANALYSIS COMPLETE")
|
|
print(f"{'=' * 60}")
|
|
print(f"Files analyzed: {len(results['files'])}")
|
|
print(f"Output directory: {args.output}")
|
|
if not args.skip_api_reference:
|
|
print(f"API reference: {Path(args.output) / 'api_reference'}")
|
|
print(f"{'=' * 60}\n")
|
|
|
|
return 0
|
|
|
|
except KeyboardInterrupt:
|
|
logger.error("\nAnalysis interrupted by user")
|
|
return 130
|
|
except Exception as e:
|
|
logger.error(f"Analysis failed: {e}")
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|