run ruff
This commit is contained in:
@@ -13,21 +13,21 @@ Analysis modes:
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional, List
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from skill_seekers.cli.github_fetcher import GitHubThreeStreamFetcher, ThreeStreamData
|
||||
from skill_seekers.cli.github_fetcher import GitHubThreeStreamFetcher
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnalysisResult:
|
||||
"""Unified analysis result from any codebase source."""
|
||||
code_analysis: Dict
|
||||
github_docs: Optional[Dict] = None
|
||||
github_insights: Optional[Dict] = None
|
||||
source_type: str = 'local' # 'local' or 'github'
|
||||
analysis_depth: str = 'basic' # 'basic' or 'c3x'
|
||||
|
||||
code_analysis: dict
|
||||
github_docs: dict | None = None
|
||||
github_insights: dict | None = None
|
||||
source_type: str = "local" # 'local' or 'github'
|
||||
analysis_depth: str = "basic" # 'basic' or 'c3x'
|
||||
|
||||
|
||||
class UnifiedCodebaseAnalyzer:
|
||||
@@ -59,21 +59,17 @@ class UnifiedCodebaseAnalyzer:
|
||||
)
|
||||
"""
|
||||
|
||||
def __init__(self, github_token: Optional[str] = None):
|
||||
def __init__(self, github_token: str | None = None):
|
||||
"""
|
||||
Initialize analyzer.
|
||||
|
||||
Args:
|
||||
github_token: Optional GitHub API token for higher rate limits
|
||||
"""
|
||||
self.github_token = github_token or os.getenv('GITHUB_TOKEN')
|
||||
self.github_token = github_token or os.getenv("GITHUB_TOKEN")
|
||||
|
||||
def analyze(
|
||||
self,
|
||||
source: str,
|
||||
depth: str = 'c3x',
|
||||
fetch_github_metadata: bool = True,
|
||||
output_dir: Optional[Path] = None
|
||||
self, source: str, depth: str = "c3x", fetch_github_metadata: bool = True, output_dir: Path | None = None
|
||||
) -> AnalysisResult:
|
||||
"""
|
||||
Analyze codebase with specified depth.
|
||||
@@ -92,18 +88,14 @@ class UnifiedCodebaseAnalyzer:
|
||||
|
||||
# Step 1: Acquire source
|
||||
if self.is_github_url(source):
|
||||
print(f"📦 Source type: GitHub repository")
|
||||
print("📦 Source type: GitHub repository")
|
||||
return self._analyze_github(source, depth, fetch_github_metadata, output_dir)
|
||||
else:
|
||||
print(f"📁 Source type: Local directory")
|
||||
print("📁 Source type: Local directory")
|
||||
return self._analyze_local(source, depth)
|
||||
|
||||
def _analyze_github(
|
||||
self,
|
||||
repo_url: str,
|
||||
depth: str,
|
||||
fetch_metadata: bool,
|
||||
output_dir: Optional[Path]
|
||||
self, repo_url: str, depth: str, fetch_metadata: bool, output_dir: Path | None
|
||||
) -> AnalysisResult:
|
||||
"""
|
||||
Analyze GitHub repository with three-stream fetcher.
|
||||
@@ -123,32 +115,28 @@ class UnifiedCodebaseAnalyzer:
|
||||
|
||||
# Analyze code with specified depth
|
||||
code_directory = three_streams.code_stream.directory
|
||||
if depth == 'basic':
|
||||
if depth == "basic":
|
||||
code_analysis = self.basic_analysis(code_directory)
|
||||
elif depth == 'c3x':
|
||||
elif depth == "c3x":
|
||||
code_analysis = self.c3x_analysis(code_directory)
|
||||
else:
|
||||
raise ValueError(f"Unknown depth: {depth}. Use 'basic' or 'c3x'")
|
||||
|
||||
# Build result with all streams
|
||||
result = AnalysisResult(
|
||||
code_analysis=code_analysis,
|
||||
source_type='github',
|
||||
analysis_depth=depth
|
||||
)
|
||||
result = AnalysisResult(code_analysis=code_analysis, source_type="github", analysis_depth=depth)
|
||||
|
||||
# Add GitHub-specific data if available
|
||||
if fetch_metadata:
|
||||
result.github_docs = {
|
||||
'readme': three_streams.docs_stream.readme,
|
||||
'contributing': three_streams.docs_stream.contributing,
|
||||
'docs_files': three_streams.docs_stream.docs_files
|
||||
"readme": three_streams.docs_stream.readme,
|
||||
"contributing": three_streams.docs_stream.contributing,
|
||||
"docs_files": three_streams.docs_stream.docs_files,
|
||||
}
|
||||
result.github_insights = {
|
||||
'metadata': three_streams.insights_stream.metadata,
|
||||
'common_problems': three_streams.insights_stream.common_problems,
|
||||
'known_solutions': three_streams.insights_stream.known_solutions,
|
||||
'top_labels': three_streams.insights_stream.top_labels
|
||||
"metadata": three_streams.insights_stream.metadata,
|
||||
"common_problems": three_streams.insights_stream.common_problems,
|
||||
"known_solutions": three_streams.insights_stream.known_solutions,
|
||||
"top_labels": three_streams.insights_stream.top_labels,
|
||||
}
|
||||
|
||||
return result
|
||||
@@ -173,20 +161,16 @@ class UnifiedCodebaseAnalyzer:
|
||||
raise NotADirectoryError(f"Not a directory: {directory}")
|
||||
|
||||
# Analyze code with specified depth
|
||||
if depth == 'basic':
|
||||
if depth == "basic":
|
||||
code_analysis = self.basic_analysis(code_directory)
|
||||
elif depth == 'c3x':
|
||||
elif depth == "c3x":
|
||||
code_analysis = self.c3x_analysis(code_directory)
|
||||
else:
|
||||
raise ValueError(f"Unknown depth: {depth}. Use 'basic' or 'c3x'")
|
||||
|
||||
return AnalysisResult(
|
||||
code_analysis=code_analysis,
|
||||
source_type='local',
|
||||
analysis_depth=depth
|
||||
)
|
||||
return AnalysisResult(code_analysis=code_analysis, source_type="local", analysis_depth=depth)
|
||||
|
||||
def basic_analysis(self, directory: Path) -> Dict:
|
||||
def basic_analysis(self, directory: Path) -> dict:
|
||||
"""
|
||||
Fast, shallow analysis (1-2 min).
|
||||
|
||||
@@ -205,19 +189,19 @@ class UnifiedCodebaseAnalyzer:
|
||||
print("📊 Running basic analysis (1-2 min)...")
|
||||
|
||||
analysis = {
|
||||
'directory': str(directory),
|
||||
'analysis_type': 'basic',
|
||||
'files': self.list_files(directory),
|
||||
'structure': self.get_directory_structure(directory),
|
||||
'imports': self.extract_imports(directory),
|
||||
'entry_points': self.find_entry_points(directory),
|
||||
'statistics': self.compute_statistics(directory)
|
||||
"directory": str(directory),
|
||||
"analysis_type": "basic",
|
||||
"files": self.list_files(directory),
|
||||
"structure": self.get_directory_structure(directory),
|
||||
"imports": self.extract_imports(directory),
|
||||
"entry_points": self.find_entry_points(directory),
|
||||
"statistics": self.compute_statistics(directory),
|
||||
}
|
||||
|
||||
print(f"✅ Basic analysis complete: {len(analysis['files'])} files analyzed")
|
||||
return analysis
|
||||
|
||||
def c3x_analysis(self, directory: Path) -> Dict:
|
||||
def c3x_analysis(self, directory: Path) -> dict:
|
||||
"""
|
||||
Deep C3.x analysis (20-60 min).
|
||||
|
||||
@@ -245,17 +229,18 @@ class UnifiedCodebaseAnalyzer:
|
||||
|
||||
try:
|
||||
# Import codebase analyzer
|
||||
from .codebase_scraper import analyze_codebase
|
||||
import tempfile
|
||||
|
||||
from .codebase_scraper import analyze_codebase
|
||||
|
||||
# Create temporary output directory for C3.x analysis
|
||||
temp_output = Path(tempfile.mkdtemp(prefix='c3x_analysis_'))
|
||||
temp_output = Path(tempfile.mkdtemp(prefix="c3x_analysis_"))
|
||||
|
||||
# Run full C3.x analysis
|
||||
analyze_codebase(
|
||||
directory=directory,
|
||||
output_dir=temp_output,
|
||||
depth='deep',
|
||||
depth="deep",
|
||||
languages=None, # All languages
|
||||
file_patterns=None, # All files
|
||||
build_api_reference=True,
|
||||
@@ -265,20 +250,16 @@ class UnifiedCodebaseAnalyzer:
|
||||
build_how_to_guides=True,
|
||||
extract_config_patterns=True,
|
||||
enhance_with_ai=False, # Disable AI for speed
|
||||
ai_mode='none'
|
||||
ai_mode="none",
|
||||
)
|
||||
|
||||
# Load C3.x results from output files
|
||||
c3x_data = self._load_c3x_results(temp_output)
|
||||
|
||||
# Merge with basic analysis
|
||||
c3x = {
|
||||
**basic,
|
||||
'analysis_type': 'c3x',
|
||||
**c3x_data
|
||||
}
|
||||
c3x = {**basic, "analysis_type": "c3x", **c3x_data}
|
||||
|
||||
print(f"✅ C3.x analysis complete!")
|
||||
print("✅ C3.x analysis complete!")
|
||||
print(f" - {len(c3x_data.get('c3_1_patterns', []))} design patterns detected")
|
||||
print(f" - {c3x_data.get('c3_2_examples_count', 0)} test examples extracted")
|
||||
print(f" - {len(c3x_data.get('c3_3_guides', []))} how-to guides generated")
|
||||
@@ -289,24 +270,24 @@ class UnifiedCodebaseAnalyzer:
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ C3.x analysis failed: {e}")
|
||||
print(f" Falling back to basic analysis with placeholders")
|
||||
print(" Falling back to basic analysis with placeholders")
|
||||
|
||||
# Fall back to placeholders
|
||||
c3x = {
|
||||
**basic,
|
||||
'analysis_type': 'c3x',
|
||||
'c3_1_patterns': [],
|
||||
'c3_2_examples': [],
|
||||
'c3_2_examples_count': 0,
|
||||
'c3_3_guides': [],
|
||||
'c3_4_configs': [],
|
||||
'c3_7_architecture': [],
|
||||
'error': str(e)
|
||||
"analysis_type": "c3x",
|
||||
"c3_1_patterns": [],
|
||||
"c3_2_examples": [],
|
||||
"c3_2_examples_count": 0,
|
||||
"c3_3_guides": [],
|
||||
"c3_4_configs": [],
|
||||
"c3_7_architecture": [],
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
return c3x
|
||||
|
||||
def _load_c3x_results(self, output_dir: Path) -> Dict:
|
||||
def _load_c3x_results(self, output_dir: Path) -> dict:
|
||||
"""
|
||||
Load C3.x analysis results from output directory.
|
||||
|
||||
@@ -321,65 +302,65 @@ class UnifiedCodebaseAnalyzer:
|
||||
c3x_data = {}
|
||||
|
||||
# C3.1: Design Patterns
|
||||
patterns_file = output_dir / 'patterns' / 'design_patterns.json'
|
||||
patterns_file = output_dir / "patterns" / "design_patterns.json"
|
||||
if patterns_file.exists():
|
||||
with open(patterns_file, 'r') as f:
|
||||
with open(patterns_file) as f:
|
||||
patterns_data = json.load(f)
|
||||
c3x_data['c3_1_patterns'] = patterns_data.get('patterns', [])
|
||||
c3x_data["c3_1_patterns"] = patterns_data.get("patterns", [])
|
||||
else:
|
||||
c3x_data['c3_1_patterns'] = []
|
||||
c3x_data["c3_1_patterns"] = []
|
||||
|
||||
# C3.2: Test Examples
|
||||
examples_file = output_dir / 'test_examples' / 'test_examples.json'
|
||||
examples_file = output_dir / "test_examples" / "test_examples.json"
|
||||
if examples_file.exists():
|
||||
with open(examples_file, 'r') as f:
|
||||
with open(examples_file) as f:
|
||||
examples_data = json.load(f)
|
||||
c3x_data['c3_2_examples'] = examples_data.get('examples', [])
|
||||
c3x_data['c3_2_examples_count'] = examples_data.get('total_examples', 0)
|
||||
c3x_data["c3_2_examples"] = examples_data.get("examples", [])
|
||||
c3x_data["c3_2_examples_count"] = examples_data.get("total_examples", 0)
|
||||
else:
|
||||
c3x_data['c3_2_examples'] = []
|
||||
c3x_data['c3_2_examples_count'] = 0
|
||||
c3x_data["c3_2_examples"] = []
|
||||
c3x_data["c3_2_examples_count"] = 0
|
||||
|
||||
# C3.3: How-to Guides
|
||||
guides_file = output_dir / 'tutorials' / 'guide_collection.json'
|
||||
guides_file = output_dir / "tutorials" / "guide_collection.json"
|
||||
if guides_file.exists():
|
||||
with open(guides_file, 'r') as f:
|
||||
with open(guides_file) as f:
|
||||
guides_data = json.load(f)
|
||||
c3x_data['c3_3_guides'] = guides_data.get('guides', [])
|
||||
c3x_data["c3_3_guides"] = guides_data.get("guides", [])
|
||||
else:
|
||||
c3x_data['c3_3_guides'] = []
|
||||
c3x_data["c3_3_guides"] = []
|
||||
|
||||
# C3.4: Config Patterns
|
||||
config_file = output_dir / 'config_patterns' / 'config_patterns.json'
|
||||
config_file = output_dir / "config_patterns" / "config_patterns.json"
|
||||
if config_file.exists():
|
||||
with open(config_file, 'r') as f:
|
||||
with open(config_file) as f:
|
||||
config_data = json.load(f)
|
||||
c3x_data['c3_4_configs'] = config_data.get('config_files', [])
|
||||
c3x_data["c3_4_configs"] = config_data.get("config_files", [])
|
||||
else:
|
||||
c3x_data['c3_4_configs'] = []
|
||||
c3x_data["c3_4_configs"] = []
|
||||
|
||||
# C3.7: Architecture
|
||||
arch_file = output_dir / 'architecture' / 'architectural_patterns.json'
|
||||
arch_file = output_dir / "architecture" / "architectural_patterns.json"
|
||||
if arch_file.exists():
|
||||
with open(arch_file, 'r') as f:
|
||||
with open(arch_file) as f:
|
||||
arch_data = json.load(f)
|
||||
c3x_data['c3_7_architecture'] = arch_data.get('patterns', [])
|
||||
c3x_data["c3_7_architecture"] = arch_data.get("patterns", [])
|
||||
else:
|
||||
c3x_data['c3_7_architecture'] = []
|
||||
c3x_data["c3_7_architecture"] = []
|
||||
|
||||
# Add dependency graph data
|
||||
dep_file = output_dir / 'dependencies' / 'dependency_graph.json'
|
||||
dep_file = output_dir / "dependencies" / "dependency_graph.json"
|
||||
if dep_file.exists():
|
||||
with open(dep_file, 'r') as f:
|
||||
with open(dep_file) as f:
|
||||
dep_data = json.load(f)
|
||||
c3x_data['dependency_graph'] = dep_data
|
||||
c3x_data["dependency_graph"] = dep_data
|
||||
|
||||
# Add API reference data
|
||||
api_file = output_dir / 'code_analysis.json'
|
||||
api_file = output_dir / "code_analysis.json"
|
||||
if api_file.exists():
|
||||
with open(api_file, 'r') as f:
|
||||
with open(api_file) as f:
|
||||
api_data = json.load(f)
|
||||
c3x_data['api_reference'] = api_data
|
||||
c3x_data["api_reference"] = api_data
|
||||
|
||||
return c3x_data
|
||||
|
||||
@@ -393,9 +374,9 @@ class UnifiedCodebaseAnalyzer:
|
||||
Returns:
|
||||
True if GitHub URL, False otherwise
|
||||
"""
|
||||
return 'github.com' in source
|
||||
return "github.com" in source
|
||||
|
||||
def list_files(self, directory: Path) -> List[Dict]:
|
||||
def list_files(self, directory: Path) -> list[dict]:
|
||||
"""
|
||||
List all files in directory with metadata.
|
||||
|
||||
@@ -406,20 +387,22 @@ class UnifiedCodebaseAnalyzer:
|
||||
List of file info dicts
|
||||
"""
|
||||
files = []
|
||||
for file_path in directory.rglob('*'):
|
||||
for file_path in directory.rglob("*"):
|
||||
if file_path.is_file():
|
||||
try:
|
||||
files.append({
|
||||
'path': str(file_path.relative_to(directory)),
|
||||
'size': file_path.stat().st_size,
|
||||
'extension': file_path.suffix
|
||||
})
|
||||
files.append(
|
||||
{
|
||||
"path": str(file_path.relative_to(directory)),
|
||||
"size": file_path.stat().st_size,
|
||||
"extension": file_path.suffix,
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
# Skip files we can't access
|
||||
continue
|
||||
return files
|
||||
|
||||
def get_directory_structure(self, directory: Path) -> Dict:
|
||||
def get_directory_structure(self, directory: Path) -> dict:
|
||||
"""
|
||||
Get directory structure tree.
|
||||
|
||||
@@ -429,35 +412,24 @@ class UnifiedCodebaseAnalyzer:
|
||||
Returns:
|
||||
Dict representing directory structure
|
||||
"""
|
||||
structure = {
|
||||
'name': directory.name,
|
||||
'type': 'directory',
|
||||
'children': []
|
||||
}
|
||||
structure = {"name": directory.name, "type": "directory", "children": []}
|
||||
|
||||
try:
|
||||
for item in sorted(directory.iterdir()):
|
||||
if item.name.startswith('.'):
|
||||
if item.name.startswith("."):
|
||||
continue # Skip hidden files
|
||||
|
||||
if item.is_dir():
|
||||
# Only include immediate subdirectories
|
||||
structure['children'].append({
|
||||
'name': item.name,
|
||||
'type': 'directory'
|
||||
})
|
||||
structure["children"].append({"name": item.name, "type": "directory"})
|
||||
elif item.is_file():
|
||||
structure['children'].append({
|
||||
'name': item.name,
|
||||
'type': 'file',
|
||||
'extension': item.suffix
|
||||
})
|
||||
structure["children"].append({"name": item.name, "type": "file", "extension": item.suffix})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return structure
|
||||
|
||||
def extract_imports(self, directory: Path) -> Dict[str, List[str]]:
|
||||
def extract_imports(self, directory: Path) -> dict[str, list[str]]:
|
||||
"""
|
||||
Extract import statements from code files.
|
||||
|
||||
@@ -467,27 +439,23 @@ class UnifiedCodebaseAnalyzer:
|
||||
Returns:
|
||||
Dict mapping file extensions to import lists
|
||||
"""
|
||||
imports = {
|
||||
'.py': [],
|
||||
'.js': [],
|
||||
'.ts': []
|
||||
}
|
||||
imports = {".py": [], ".js": [], ".ts": []}
|
||||
|
||||
# Sample up to 10 files per extension
|
||||
for ext in imports.keys():
|
||||
files = list(directory.rglob(f'*{ext}'))[:10]
|
||||
for ext in imports:
|
||||
files = list(directory.rglob(f"*{ext}"))[:10]
|
||||
for file_path in files:
|
||||
try:
|
||||
content = file_path.read_text(encoding='utf-8')
|
||||
if ext == '.py':
|
||||
content = file_path.read_text(encoding="utf-8")
|
||||
if ext == ".py":
|
||||
# Extract Python imports
|
||||
for line in content.split('\n')[:50]: # Check first 50 lines
|
||||
if line.strip().startswith(('import ', 'from ')):
|
||||
for line in content.split("\n")[:50]: # Check first 50 lines
|
||||
if line.strip().startswith(("import ", "from ")):
|
||||
imports[ext].append(line.strip())
|
||||
elif ext in ['.js', '.ts']:
|
||||
elif ext in [".js", ".ts"]:
|
||||
# Extract JS/TS imports
|
||||
for line in content.split('\n')[:50]:
|
||||
if line.strip().startswith(('import ', 'require(')):
|
||||
for line in content.split("\n")[:50]:
|
||||
if line.strip().startswith(("import ", "require(")):
|
||||
imports[ext].append(line.strip())
|
||||
except Exception:
|
||||
continue
|
||||
@@ -495,7 +463,7 @@ class UnifiedCodebaseAnalyzer:
|
||||
# Remove empty lists
|
||||
return {k: v for k, v in imports.items() if v}
|
||||
|
||||
def find_entry_points(self, directory: Path) -> List[str]:
|
||||
def find_entry_points(self, directory: Path) -> list[str]:
|
||||
"""
|
||||
Find potential entry points (main files, setup files, etc.).
|
||||
|
||||
@@ -509,10 +477,20 @@ class UnifiedCodebaseAnalyzer:
|
||||
|
||||
# Common entry point patterns
|
||||
entry_patterns = [
|
||||
'main.py', '__main__.py', 'app.py', 'server.py',
|
||||
'index.js', 'index.ts', 'main.js', 'main.ts',
|
||||
'setup.py', 'pyproject.toml', 'package.json',
|
||||
'Makefile', 'docker-compose.yml', 'Dockerfile'
|
||||
"main.py",
|
||||
"__main__.py",
|
||||
"app.py",
|
||||
"server.py",
|
||||
"index.js",
|
||||
"index.ts",
|
||||
"main.js",
|
||||
"main.ts",
|
||||
"setup.py",
|
||||
"pyproject.toml",
|
||||
"package.json",
|
||||
"Makefile",
|
||||
"docker-compose.yml",
|
||||
"Dockerfile",
|
||||
]
|
||||
|
||||
for pattern in entry_patterns:
|
||||
@@ -525,7 +503,7 @@ class UnifiedCodebaseAnalyzer:
|
||||
|
||||
return entry_points
|
||||
|
||||
def compute_statistics(self, directory: Path) -> Dict:
|
||||
def compute_statistics(self, directory: Path) -> dict:
|
||||
"""
|
||||
Compute basic statistics about the codebase.
|
||||
|
||||
@@ -535,39 +513,34 @@ class UnifiedCodebaseAnalyzer:
|
||||
Returns:
|
||||
Dict with statistics
|
||||
"""
|
||||
stats = {
|
||||
'total_files': 0,
|
||||
'total_size_bytes': 0,
|
||||
'file_types': {},
|
||||
'languages': {}
|
||||
}
|
||||
stats = {"total_files": 0, "total_size_bytes": 0, "file_types": {}, "languages": {}}
|
||||
|
||||
for file_path in directory.rglob('*'):
|
||||
for file_path in directory.rglob("*"):
|
||||
if not file_path.is_file():
|
||||
continue
|
||||
|
||||
try:
|
||||
stats['total_files'] += 1
|
||||
stats['total_size_bytes'] += file_path.stat().st_size
|
||||
stats["total_files"] += 1
|
||||
stats["total_size_bytes"] += file_path.stat().st_size
|
||||
|
||||
ext = file_path.suffix
|
||||
if ext:
|
||||
stats['file_types'][ext] = stats['file_types'].get(ext, 0) + 1
|
||||
stats["file_types"][ext] = stats["file_types"].get(ext, 0) + 1
|
||||
|
||||
# Map extensions to languages
|
||||
language_map = {
|
||||
'.py': 'Python',
|
||||
'.js': 'JavaScript',
|
||||
'.ts': 'TypeScript',
|
||||
'.go': 'Go',
|
||||
'.rs': 'Rust',
|
||||
'.java': 'Java',
|
||||
'.rb': 'Ruby',
|
||||
'.php': 'PHP'
|
||||
".py": "Python",
|
||||
".js": "JavaScript",
|
||||
".ts": "TypeScript",
|
||||
".go": "Go",
|
||||
".rs": "Rust",
|
||||
".java": "Java",
|
||||
".rb": "Ruby",
|
||||
".php": "PHP",
|
||||
}
|
||||
if ext in language_map:
|
||||
lang = language_map[ext]
|
||||
stats['languages'][lang] = stats['languages'].get(lang, 0) + 1
|
||||
stats["languages"][lang] = stats["languages"].get(lang, 0) + 1
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
|
||||
Reference in New Issue
Block a user