Files
claude-skills-reference/engineering-team/senior-architect/scripts/project_architect.py
Alireza Rezvani 94224f2201 feat(senior-architect): Complete skill overhaul per Issue #48 (#88)
Addresses SkillzWave feedback and Anthropic best practices:

SKILL.md (343 lines):
- Third-person description with trigger phrases
- Added Table of Contents for navigation
- Concrete tool descriptions with usage examples
- Decision workflows: Database, Architecture Pattern, Monolith vs Microservices
- Removed marketing fluff, added actionable content

References (rewritten with real content):
- architecture_patterns.md: 9 patterns with trade-offs, code examples
  (Monolith, Modular Monolith, Microservices, Event-Driven, CQRS,
  Event Sourcing, Hexagonal, Clean Architecture, API Gateway)
- system_design_workflows.md: 6 step-by-step workflows
  (System Design Interview, Capacity Planning, API Design,
  Database Schema, Scalability Assessment, Migration Planning)
- tech_decision_guide.md: 7 decision frameworks with matrices
  (Database, Cache, Message Queue, Auth, Frontend, Cloud, API)

Scripts (fully functional, standard library only):
- architecture_diagram_generator.py: Mermaid + PlantUML + ASCII output
  Scans project structure, detects components, relationships
- dependency_analyzer.py: npm/pip/go/cargo support
  Circular dependency detection, coupling score calculation
- project_architect.py: Pattern detection (7 patterns)
  Layer violation detection, code quality metrics

All scripts tested and working.

Closes #48

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-26 10:29:14 +01:00

750 lines
28 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Project Architect
Analyzes project structure and detects:
- Architectural patterns (MVC, layered, hexagonal, microservices)
- Code organization issues (god classes, mixed concerns)
- Layer violations
- Missing architectural components
Provides architecture assessment and improvement recommendations.
"""
import os
import sys
import json
import argparse
import re
from pathlib import Path
from typing import Dict, List, Set, Tuple, Optional
from collections import defaultdict
class PatternDetector:
"""Detects architectural patterns in a project."""
# Pattern signatures
PATTERNS = {
'layered': {
'indicators': ['controller', 'service', 'repository', 'dao', 'model', 'entity'],
'structure': ['controllers', 'services', 'repositories', 'models'],
'weight': 0,
},
'mvc': {
'indicators': ['model', 'view', 'controller'],
'structure': ['models', 'views', 'controllers'],
'weight': 0,
},
'hexagonal': {
'indicators': ['port', 'adapter', 'domain', 'infrastructure', 'application'],
'structure': ['ports', 'adapters', 'domain', 'infrastructure'],
'weight': 0,
},
'clean': {
'indicators': ['entity', 'usecase', 'interface', 'framework', 'adapter'],
'structure': ['entities', 'usecases', 'interfaces', 'frameworks'],
'weight': 0,
},
'microservices': {
'indicators': ['service', 'api', 'gateway', 'docker', 'kubernetes'],
'structure': ['services', 'api-gateway', 'docker-compose'],
'weight': 0,
},
'modular_monolith': {
'indicators': ['module', 'feature', 'bounded'],
'structure': ['modules', 'features'],
'weight': 0,
},
'feature_based': {
'indicators': ['feature', 'component', 'page'],
'structure': ['features', 'components', 'pages'],
'weight': 0,
},
}
# Layer definitions for violation detection
LAYER_HIERARCHY = {
'presentation': ['controller', 'handler', 'view', 'page', 'component', 'ui', 'route'],
'application': ['service', 'usecase', 'application', 'facade'],
'domain': ['domain', 'entity', 'model', 'aggregate', 'valueobject'],
'infrastructure': ['repository', 'dao', 'adapter', 'gateway', 'client', 'config'],
}
LAYER_ORDER = ['presentation', 'application', 'domain', 'infrastructure']
def __init__(self, project_path: Path):
self.project_path = project_path
self.directories: Set[str] = set()
self.files: Dict[str, List[str]] = defaultdict(list) # dir -> files
self.detected_pattern: Optional[str] = None
self.confidence: float = 0
self.layer_assignments: Dict[str, str] = {} # dir -> layer
def scan(self) -> Dict:
"""Scan project and detect patterns."""
self._scan_structure()
self._detect_pattern()
self._assign_layers()
return {
'detected_pattern': self.detected_pattern,
'confidence': self.confidence,
'directories': list(self.directories),
'layer_assignments': self.layer_assignments,
'pattern_scores': {p: d['weight'] for p, d in self.PATTERNS.items()},
}
def _scan_structure(self):
"""Scan directory structure."""
ignore_dirs = {'.git', 'node_modules', '__pycache__', '.venv', 'venv',
'dist', 'build', '.next', 'coverage', '.pytest_cache'}
for item in self.project_path.iterdir():
if item.is_dir() and item.name not in ignore_dirs and not item.name.startswith('.'):
self.directories.add(item.name.lower())
# Scan files in directory
try:
for f in item.rglob('*'):
if f.is_file():
self.files[item.name.lower()].append(f.name.lower())
except PermissionError:
pass
def _detect_pattern(self):
"""Detect the primary architectural pattern."""
for pattern, config in self.PATTERNS.items():
score = 0
# Check directory structure
for struct in config['structure']:
if struct.lower() in self.directories:
score += 2
# Check indicator presence in directory names
for indicator in config['indicators']:
for dir_name in self.directories:
if indicator in dir_name:
score += 1
# Check file patterns
all_files = [f for files in self.files.values() for f in files]
for indicator in config['indicators']:
matching_files = sum(1 for f in all_files if indicator in f)
score += min(matching_files // 5, 3) # Cap contribution
config['weight'] = score
# Find best match
best_pattern = max(self.PATTERNS.items(), key=lambda x: x[1]['weight'])
if best_pattern[1]['weight'] > 3:
self.detected_pattern = best_pattern[0]
max_possible = len(best_pattern[1]['structure']) * 2 + len(best_pattern[1]['indicators']) * 2
self.confidence = min(100, int((best_pattern[1]['weight'] / max(max_possible, 1)) * 100))
else:
self.detected_pattern = 'unstructured'
self.confidence = 0
def _assign_layers(self):
"""Assign directories to architectural layers."""
for dir_name in self.directories:
for layer, indicators in self.LAYER_HIERARCHY.items():
for indicator in indicators:
if indicator in dir_name:
self.layer_assignments[dir_name] = layer
break
if dir_name in self.layer_assignments:
break
if dir_name not in self.layer_assignments:
self.layer_assignments[dir_name] = 'unknown'
class CodeAnalyzer:
"""Analyzes code for architectural issues."""
# Thresholds
MAX_FILE_LINES = 500
MAX_CLASS_LINES = 300
MAX_FUNCTION_LINES = 50
MAX_IMPORTS_PER_FILE = 30
def __init__(self, project_path: Path, verbose: bool = False):
self.project_path = project_path
self.verbose = verbose
self.issues: List[Dict] = []
self.metrics: Dict = {}
def analyze(self) -> Dict:
"""Run code analysis."""
self._analyze_file_sizes()
self._analyze_imports()
self._detect_god_classes()
self._check_naming_conventions()
return {
'issues': self.issues,
'metrics': self.metrics,
}
def _analyze_file_sizes(self):
"""Check for oversized files."""
extensions = ['.py', '.js', '.ts', '.jsx', '.tsx', '.go', '.rs', '.java']
large_files = []
total_lines = 0
file_count = 0
ignore_dirs = {'.git', 'node_modules', '__pycache__', '.venv', 'venv',
'dist', 'build', '.next', 'coverage'}
for ext in extensions:
for file_path in self.project_path.rglob(f'*{ext}'):
if any(ignored in file_path.parts for ignored in ignore_dirs):
continue
try:
content = file_path.read_text(encoding='utf-8', errors='ignore')
lines = len(content.split('\n'))
total_lines += lines
file_count += 1
if lines > self.MAX_FILE_LINES:
large_files.append({
'path': str(file_path.relative_to(self.project_path)),
'lines': lines,
})
self.issues.append({
'type': 'large_file',
'severity': 'warning',
'file': str(file_path.relative_to(self.project_path)),
'message': f"File has {lines} lines (threshold: {self.MAX_FILE_LINES})",
'suggestion': "Consider splitting into smaller, focused modules",
})
except Exception:
pass
self.metrics['total_lines'] = total_lines
self.metrics['file_count'] = file_count
self.metrics['avg_file_lines'] = total_lines // file_count if file_count > 0 else 0
self.metrics['large_files'] = large_files
def _analyze_imports(self):
"""Analyze import patterns."""
extensions = ['.py', '.js', '.ts', '.jsx', '.tsx']
high_import_files = []
ignore_dirs = {'.git', 'node_modules', '__pycache__', '.venv', 'venv',
'dist', 'build', '.next', 'coverage'}
for ext in extensions:
for file_path in self.project_path.rglob(f'*{ext}'):
if any(ignored in file_path.parts for ignored in ignore_dirs):
continue
try:
content = file_path.read_text(encoding='utf-8', errors='ignore')
# Count imports
py_imports = len(re.findall(r'^(?:from|import)\s+', content, re.MULTILINE))
js_imports = len(re.findall(r'^import\s+', content, re.MULTILINE))
imports = py_imports + js_imports
if imports > self.MAX_IMPORTS_PER_FILE:
high_import_files.append({
'path': str(file_path.relative_to(self.project_path)),
'imports': imports,
})
self.issues.append({
'type': 'high_imports',
'severity': 'info',
'file': str(file_path.relative_to(self.project_path)),
'message': f"File has {imports} imports (threshold: {self.MAX_IMPORTS_PER_FILE})",
'suggestion': "Consider if all imports are necessary or if the file has too many responsibilities",
})
except Exception:
pass
self.metrics['high_import_files'] = high_import_files
def _detect_god_classes(self):
"""Detect potential god classes (oversized classes)."""
extensions = ['.py', '.js', '.ts', '.java']
god_classes = []
ignore_dirs = {'.git', 'node_modules', '__pycache__', '.venv', 'venv',
'dist', 'build', '.next', 'coverage'}
for ext in extensions:
for file_path in self.project_path.rglob(f'*{ext}'):
if any(ignored in file_path.parts for ignored in ignore_dirs):
continue
try:
content = file_path.read_text(encoding='utf-8', errors='ignore')
lines = content.split('\n')
# Simple class detection
class_pattern = r'^\s*(?:export\s+)?(?:abstract\s+)?class\s+(\w+)'
in_class = False
class_name = None
class_start = 0
brace_count = 0
for i, line in enumerate(lines):
match = re.match(class_pattern, line)
if match:
if in_class and class_name:
# End previous class
class_lines = i - class_start
if class_lines > self.MAX_CLASS_LINES:
god_classes.append({
'file': str(file_path.relative_to(self.project_path)),
'class': class_name,
'lines': class_lines,
})
class_name = match.group(1)
class_start = i
in_class = True
# Check last class
if in_class and class_name:
class_lines = len(lines) - class_start
if class_lines > self.MAX_CLASS_LINES:
god_classes.append({
'file': str(file_path.relative_to(self.project_path)),
'class': class_name,
'lines': class_lines,
})
self.issues.append({
'type': 'god_class',
'severity': 'warning',
'file': str(file_path.relative_to(self.project_path)),
'message': f"Class '{class_name}' has ~{class_lines} lines (threshold: {self.MAX_CLASS_LINES})",
'suggestion': "Consider applying Single Responsibility Principle and splitting into smaller classes",
})
except Exception:
pass
self.metrics['god_classes'] = god_classes
def _check_naming_conventions(self):
"""Check for naming convention issues."""
ignore_dirs = {'.git', 'node_modules', '__pycache__', '.venv', 'venv',
'dist', 'build', '.next', 'coverage'}
naming_issues = []
# Check directory naming
for dir_path in self.project_path.rglob('*'):
if not dir_path.is_dir():
continue
if any(ignored in dir_path.parts for ignored in ignore_dirs):
continue
dir_name = dir_path.name
# Check for mixed case in directories (should be kebab-case or snake_case)
if re.search(r'[A-Z]', dir_name) and '-' not in dir_name and '_' not in dir_name:
rel_path = str(dir_path.relative_to(self.project_path))
if len(rel_path.split('/')) <= 3: # Only check top-level dirs
naming_issues.append({
'type': 'directory',
'path': rel_path,
'issue': 'PascalCase directory name',
})
if naming_issues:
self.issues.append({
'type': 'naming_convention',
'severity': 'info',
'message': f"Found {len(naming_issues)} naming convention inconsistencies",
'details': naming_issues[:5], # Show first 5
})
self.metrics['naming_issues'] = naming_issues
class LayerViolationDetector:
"""Detects architectural layer violations."""
LAYER_ORDER = ['presentation', 'application', 'domain', 'infrastructure']
# Valid dependency directions (key can depend on values)
VALID_DEPENDENCIES = {
'presentation': ['application', 'domain'],
'application': ['domain', 'infrastructure'],
'domain': [], # Domain should not depend on other layers
'infrastructure': ['domain'],
}
def __init__(self, project_path: Path, layer_assignments: Dict[str, str]):
self.project_path = project_path
self.layer_assignments = layer_assignments
self.violations: List[Dict] = []
def detect(self) -> List[Dict]:
"""Detect layer violations."""
self._analyze_imports()
return self.violations
def _analyze_imports(self):
"""Analyze imports for layer violations."""
extensions = ['.py', '.js', '.ts', '.jsx', '.tsx']
ignore_dirs = {'.git', 'node_modules', '__pycache__', '.venv', 'venv',
'dist', 'build', '.next', 'coverage'}
for ext in extensions:
for file_path in self.project_path.rglob(f'*{ext}'):
if any(ignored in file_path.parts for ignored in ignore_dirs):
continue
try:
rel_path = file_path.relative_to(self.project_path)
if len(rel_path.parts) < 2:
continue
source_dir = rel_path.parts[0].lower()
source_layer = self.layer_assignments.get(source_dir)
if not source_layer or source_layer == 'unknown':
continue
# Extract imports
content = file_path.read_text(encoding='utf-8', errors='ignore')
imports = self._extract_imports(content)
# Check each import for layer violations
for imp in imports:
target_dir = self._get_import_directory(imp)
if not target_dir:
continue
target_layer = self.layer_assignments.get(target_dir.lower())
if not target_layer or target_layer == 'unknown':
continue
if self._is_violation(source_layer, target_layer):
self.violations.append({
'type': 'layer_violation',
'severity': 'warning',
'file': str(rel_path),
'source_layer': source_layer,
'target_layer': target_layer,
'import': imp,
'message': f"{source_layer} layer should not depend on {target_layer} layer",
})
except Exception:
pass
def _extract_imports(self, content: str) -> List[str]:
"""Extract import statements."""
imports = []
# Python imports
imports.extend(re.findall(r'^(?:from|import)\s+([\w.]+)', content, re.MULTILINE))
# JS/TS imports
imports.extend(re.findall(r'(?:import|require)\s*\(?[\'"]([^\'"\s]+)[\'"]', content))
return imports
def _get_import_directory(self, imp: str) -> Optional[str]:
"""Get the directory from an import path."""
# Handle relative imports
if imp.startswith('.'):
return None # Skip relative imports
parts = imp.replace('@/', '').replace('~/', '').split('/')
if parts:
return parts[0].split('.')[0]
return None
def _is_violation(self, source_layer: str, target_layer: str) -> bool:
"""Check if the dependency is a violation."""
if source_layer == target_layer:
return False
valid_deps = self.VALID_DEPENDENCIES.get(source_layer, [])
return target_layer not in valid_deps and target_layer != source_layer
class ProjectArchitect:
"""Main class that orchestrates architecture analysis."""
def __init__(self, project_path: Path, verbose: bool = False):
self.project_path = project_path
self.verbose = verbose
def analyze(self) -> Dict:
"""Run full architecture analysis."""
if self.verbose:
print(f"Analyzing project: {self.project_path}")
# Pattern detection
pattern_detector = PatternDetector(self.project_path)
pattern_result = pattern_detector.scan()
if self.verbose:
print(f"Detected pattern: {pattern_result['detected_pattern']} "
f"(confidence: {pattern_result['confidence']}%)")
# Code analysis
code_analyzer = CodeAnalyzer(self.project_path, self.verbose)
code_result = code_analyzer.analyze()
if self.verbose:
print(f"Found {len(code_result['issues'])} code issues")
# Layer violation detection
violation_detector = LayerViolationDetector(
self.project_path,
pattern_result['layer_assignments']
)
violations = violation_detector.detect()
if self.verbose:
print(f"Found {len(violations)} layer violations")
# Generate recommendations
recommendations = self._generate_recommendations(
pattern_result, code_result, violations
)
return {
'project_path': str(self.project_path),
'architecture': {
'detected_pattern': pattern_result['detected_pattern'],
'confidence': pattern_result['confidence'],
'layer_assignments': pattern_result['layer_assignments'],
'pattern_scores': pattern_result['pattern_scores'],
},
'structure': {
'directories': pattern_result['directories'],
},
'code_quality': {
'metrics': code_result['metrics'],
'issues': code_result['issues'],
},
'layer_violations': violations,
'recommendations': recommendations,
'summary': {
'pattern': pattern_result['detected_pattern'],
'confidence': pattern_result['confidence'],
'total_issues': len(code_result['issues']) + len(violations),
'code_issues': len(code_result['issues']),
'layer_violations': len(violations),
},
}
def _generate_recommendations(self, pattern_result: Dict, code_result: Dict,
violations: List[Dict]) -> List[str]:
"""Generate actionable recommendations."""
recommendations = []
# Pattern recommendations
pattern = pattern_result['detected_pattern']
confidence = pattern_result['confidence']
if pattern == 'unstructured' or confidence < 30:
recommendations.append(
"Consider adopting a clear architectural pattern (Layered, Clean, or Hexagonal) "
"to improve code organization and maintainability"
)
# Layer violation recommendations
if violations:
recommendations.append(
f"Fix {len(violations)} layer violation(s) to maintain proper separation of concerns. "
"Dependencies should flow from presentation → application → domain ← infrastructure"
)
# God class recommendations
god_classes = code_result['metrics'].get('god_classes', [])
if god_classes:
recommendations.append(
f"Split {len(god_classes)} large class(es) into smaller, focused classes "
"following the Single Responsibility Principle"
)
# Large file recommendations
large_files = code_result['metrics'].get('large_files', [])
if large_files:
recommendations.append(
f"Consider refactoring {len(large_files)} large file(s) into smaller modules"
)
# Missing layer recommendations
assigned_layers = set(pattern_result['layer_assignments'].values())
if pattern in ['layered', 'clean', 'hexagonal']:
expected_layers = {'presentation', 'application', 'domain', 'infrastructure'}
missing = expected_layers - assigned_layers - {'unknown'}
if missing:
recommendations.append(
f"Consider adding missing architectural layer(s): {', '.join(missing)}"
)
return recommendations
def print_human_report(report: Dict):
"""Print human-readable report."""
print("\n" + "=" * 60)
print("ARCHITECTURE ASSESSMENT")
print("=" * 60)
print(f"\nProject: {report['project_path']}")
arch = report['architecture']
print(f"\n--- Architecture Pattern ---")
print(f"Detected: {arch['detected_pattern'].replace('_', ' ').title()}")
print(f"Confidence: {arch['confidence']}%")
if arch['layer_assignments']:
print(f"\nLayer Assignments:")
for dir_name, layer in sorted(arch['layer_assignments'].items()):
if layer != 'unknown':
status = "OK"
else:
status = "?"
print(f" {status} {dir_name:20} -> {layer}")
summary = report['summary']
print(f"\n--- Summary ---")
print(f"Total issues: {summary['total_issues']}")
print(f" Code issues: {summary['code_issues']}")
print(f" Layer violations: {summary['layer_violations']}")
if report['code_quality']['issues']:
print(f"\n--- Code Issues ---")
for issue in report['code_quality']['issues'][:10]:
severity = issue['severity'].upper()
print(f" [{severity}] {issue.get('file', 'N/A')}")
print(f" {issue['message']}")
if 'suggestion' in issue:
print(f" Suggestion: {issue['suggestion']}")
if report['layer_violations']:
print(f"\n--- Layer Violations ---")
for v in report['layer_violations'][:5]:
print(f" {v['file']}")
print(f" {v['message']}")
if report['recommendations']:
print(f"\n--- Recommendations ---")
for i, rec in enumerate(report['recommendations'], 1):
print(f" {i}. {rec}")
metrics = report['code_quality']['metrics']
print(f"\n--- Metrics ---")
print(f" Total lines: {metrics.get('total_lines', 'N/A')}")
print(f" File count: {metrics.get('file_count', 'N/A')}")
print(f" Avg lines/file: {metrics.get('avg_file_lines', 'N/A')}")
print("\n" + "=" * 60)
def main():
parser = argparse.ArgumentParser(
description='Analyze project architecture and detect patterns and issues',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog='''
Examples:
%(prog)s ./my-project
%(prog)s ./my-project --verbose
%(prog)s ./my-project --output json
%(prog)s ./my-project --check layers
Detects:
- Architectural patterns (Layered, MVC, Hexagonal, Clean, Microservices)
- Code organization issues (large files, god classes)
- Layer violations (incorrect dependencies between layers)
- Missing architectural components
'''
)
parser.add_argument(
'project_path',
help='Path to the project directory'
)
parser.add_argument(
'--output', '-o',
choices=['human', 'json'],
default='human',
help='Output format (default: human)'
)
parser.add_argument(
'--check',
choices=['all', 'pattern', 'layers', 'code'],
default='all',
help='What to check (default: all)'
)
parser.add_argument(
'--verbose', '-v',
action='store_true',
help='Enable verbose output'
)
parser.add_argument(
'--save', '-s',
help='Save report to file'
)
args = parser.parse_args()
project_path = Path(args.project_path).resolve()
if not project_path.exists():
print(f"Error: Project path does not exist: {project_path}", file=sys.stderr)
sys.exit(1)
if not project_path.is_dir():
print(f"Error: Project path is not a directory: {project_path}", file=sys.stderr)
sys.exit(1)
# Run analysis
architect = ProjectArchitect(project_path, verbose=args.verbose)
report = architect.analyze()
# Handle specific checks
if args.check == 'pattern':
arch = report['architecture']
print(f"Pattern: {arch['detected_pattern']} (confidence: {arch['confidence']}%)")
sys.exit(0)
elif args.check == 'layers':
violations = report['layer_violations']
if violations:
print(f"Found {len(violations)} layer violation(s):")
for v in violations:
print(f" {v['file']}: {v['message']}")
sys.exit(1)
else:
print("No layer violations found.")
sys.exit(0)
elif args.check == 'code':
issues = report['code_quality']['issues']
if issues:
print(f"Found {len(issues)} code issue(s):")
for issue in issues[:10]:
print(f" [{issue['severity'].upper()}] {issue['message']}")
sys.exit(1 if any(i['severity'] == 'warning' for i in issues) else 0)
else:
print("No code issues found.")
sys.exit(0)
# Output report
if args.output == 'json':
output = json.dumps(report, indent=2)
if args.save:
Path(args.save).write_text(output)
print(f"Report saved to {args.save}")
else:
print(output)
else:
print_human_report(report)
if args.save:
Path(args.save).write_text(json.dumps(report, indent=2))
print(f"\nJSON report saved to {args.save}")
if __name__ == '__main__':
main()