#!/usr/bin/env python3 """ Project Architect Analyzes project structure and detects: - Architectural patterns (MVC, layered, hexagonal, microservices) - Code organization issues (god classes, mixed concerns) - Layer violations - Missing architectural components Provides architecture assessment and improvement recommendations. """ import os import sys import json import argparse import re from pathlib import Path from typing import Dict, List, Set, Tuple, Optional from collections import defaultdict class PatternDetector: """Detects architectural patterns in a project.""" # Pattern signatures PATTERNS = { 'layered': { 'indicators': ['controller', 'service', 'repository', 'dao', 'model', 'entity'], 'structure': ['controllers', 'services', 'repositories', 'models'], 'weight': 0, }, 'mvc': { 'indicators': ['model', 'view', 'controller'], 'structure': ['models', 'views', 'controllers'], 'weight': 0, }, 'hexagonal': { 'indicators': ['port', 'adapter', 'domain', 'infrastructure', 'application'], 'structure': ['ports', 'adapters', 'domain', 'infrastructure'], 'weight': 0, }, 'clean': { 'indicators': ['entity', 'usecase', 'interface', 'framework', 'adapter'], 'structure': ['entities', 'usecases', 'interfaces', 'frameworks'], 'weight': 0, }, 'microservices': { 'indicators': ['service', 'api', 'gateway', 'docker', 'kubernetes'], 'structure': ['services', 'api-gateway', 'docker-compose'], 'weight': 0, }, 'modular_monolith': { 'indicators': ['module', 'feature', 'bounded'], 'structure': ['modules', 'features'], 'weight': 0, }, 'feature_based': { 'indicators': ['feature', 'component', 'page'], 'structure': ['features', 'components', 'pages'], 'weight': 0, }, } # Layer definitions for violation detection LAYER_HIERARCHY = { 'presentation': ['controller', 'handler', 'view', 'page', 'component', 'ui', 'route'], 'application': ['service', 'usecase', 'application', 'facade'], 'domain': ['domain', 'entity', 'model', 'aggregate', 'valueobject'], 'infrastructure': ['repository', 'dao', 'adapter', 'gateway', 'client', 'config'], } LAYER_ORDER = ['presentation', 'application', 'domain', 'infrastructure'] def __init__(self, project_path: Path): self.project_path = project_path self.directories: Set[str] = set() self.files: Dict[str, List[str]] = defaultdict(list) # dir -> files self.detected_pattern: Optional[str] = None self.confidence: float = 0 self.layer_assignments: Dict[str, str] = {} # dir -> layer def scan(self) -> Dict: """Scan project and detect patterns.""" self._scan_structure() self._detect_pattern() self._assign_layers() return { 'detected_pattern': self.detected_pattern, 'confidence': self.confidence, 'directories': list(self.directories), 'layer_assignments': self.layer_assignments, 'pattern_scores': {p: d['weight'] for p, d in self.PATTERNS.items()}, } def _scan_structure(self): """Scan directory structure.""" ignore_dirs = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', 'dist', 'build', '.next', 'coverage', '.pytest_cache'} for item in self.project_path.iterdir(): if item.is_dir() and item.name not in ignore_dirs and not item.name.startswith('.'): self.directories.add(item.name.lower()) # Scan files in directory try: for f in item.rglob('*'): if f.is_file(): self.files[item.name.lower()].append(f.name.lower()) except PermissionError: pass def _detect_pattern(self): """Detect the primary architectural pattern.""" for pattern, config in self.PATTERNS.items(): score = 0 # Check directory structure for struct in config['structure']: if struct.lower() in self.directories: score += 2 # Check indicator presence in directory names for indicator in config['indicators']: for dir_name in self.directories: if indicator in dir_name: score += 1 # Check file patterns all_files = [f for files in self.files.values() for f in files] for indicator in config['indicators']: matching_files = sum(1 for f in all_files if indicator in f) score += min(matching_files // 5, 3) # Cap contribution config['weight'] = score # Find best match best_pattern = max(self.PATTERNS.items(), key=lambda x: x[1]['weight']) if best_pattern[1]['weight'] > 3: self.detected_pattern = best_pattern[0] max_possible = len(best_pattern[1]['structure']) * 2 + len(best_pattern[1]['indicators']) * 2 self.confidence = min(100, int((best_pattern[1]['weight'] / max(max_possible, 1)) * 100)) else: self.detected_pattern = 'unstructured' self.confidence = 0 def _assign_layers(self): """Assign directories to architectural layers.""" for dir_name in self.directories: for layer, indicators in self.LAYER_HIERARCHY.items(): for indicator in indicators: if indicator in dir_name: self.layer_assignments[dir_name] = layer break if dir_name in self.layer_assignments: break if dir_name not in self.layer_assignments: self.layer_assignments[dir_name] = 'unknown' class CodeAnalyzer: """Analyzes code for architectural issues.""" # Thresholds MAX_FILE_LINES = 500 MAX_CLASS_LINES = 300 MAX_FUNCTION_LINES = 50 MAX_IMPORTS_PER_FILE = 30 def __init__(self, project_path: Path, verbose: bool = False): self.project_path = project_path self.verbose = verbose self.issues: List[Dict] = [] self.metrics: Dict = {} def analyze(self) -> Dict: """Run code analysis.""" self._analyze_file_sizes() self._analyze_imports() self._detect_god_classes() self._check_naming_conventions() return { 'issues': self.issues, 'metrics': self.metrics, } def _analyze_file_sizes(self): """Check for oversized files.""" extensions = ['.py', '.js', '.ts', '.jsx', '.tsx', '.go', '.rs', '.java'] large_files = [] total_lines = 0 file_count = 0 ignore_dirs = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', 'dist', 'build', '.next', 'coverage'} for ext in extensions: for file_path in self.project_path.rglob(f'*{ext}'): if any(ignored in file_path.parts for ignored in ignore_dirs): continue try: content = file_path.read_text(encoding='utf-8', errors='ignore') lines = len(content.split('\n')) total_lines += lines file_count += 1 if lines > self.MAX_FILE_LINES: large_files.append({ 'path': str(file_path.relative_to(self.project_path)), 'lines': lines, }) self.issues.append({ 'type': 'large_file', 'severity': 'warning', 'file': str(file_path.relative_to(self.project_path)), 'message': f"File has {lines} lines (threshold: {self.MAX_FILE_LINES})", 'suggestion': "Consider splitting into smaller, focused modules", }) except Exception: pass self.metrics['total_lines'] = total_lines self.metrics['file_count'] = file_count self.metrics['avg_file_lines'] = total_lines // file_count if file_count > 0 else 0 self.metrics['large_files'] = large_files def _analyze_imports(self): """Analyze import patterns.""" extensions = ['.py', '.js', '.ts', '.jsx', '.tsx'] high_import_files = [] ignore_dirs = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', 'dist', 'build', '.next', 'coverage'} for ext in extensions: for file_path in self.project_path.rglob(f'*{ext}'): if any(ignored in file_path.parts for ignored in ignore_dirs): continue try: content = file_path.read_text(encoding='utf-8', errors='ignore') # Count imports py_imports = len(re.findall(r'^(?:from|import)\s+', content, re.MULTILINE)) js_imports = len(re.findall(r'^import\s+', content, re.MULTILINE)) imports = py_imports + js_imports if imports > self.MAX_IMPORTS_PER_FILE: high_import_files.append({ 'path': str(file_path.relative_to(self.project_path)), 'imports': imports, }) self.issues.append({ 'type': 'high_imports', 'severity': 'info', 'file': str(file_path.relative_to(self.project_path)), 'message': f"File has {imports} imports (threshold: {self.MAX_IMPORTS_PER_FILE})", 'suggestion': "Consider if all imports are necessary or if the file has too many responsibilities", }) except Exception: pass self.metrics['high_import_files'] = high_import_files def _detect_god_classes(self): """Detect potential god classes (oversized classes).""" extensions = ['.py', '.js', '.ts', '.java'] god_classes = [] ignore_dirs = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', 'dist', 'build', '.next', 'coverage'} for ext in extensions: for file_path in self.project_path.rglob(f'*{ext}'): if any(ignored in file_path.parts for ignored in ignore_dirs): continue try: content = file_path.read_text(encoding='utf-8', errors='ignore') lines = content.split('\n') # Simple class detection class_pattern = r'^\s*(?:export\s+)?(?:abstract\s+)?class\s+(\w+)' in_class = False class_name = None class_start = 0 brace_count = 0 for i, line in enumerate(lines): match = re.match(class_pattern, line) if match: if in_class and class_name: # End previous class class_lines = i - class_start if class_lines > self.MAX_CLASS_LINES: god_classes.append({ 'file': str(file_path.relative_to(self.project_path)), 'class': class_name, 'lines': class_lines, }) class_name = match.group(1) class_start = i in_class = True # Check last class if in_class and class_name: class_lines = len(lines) - class_start if class_lines > self.MAX_CLASS_LINES: god_classes.append({ 'file': str(file_path.relative_to(self.project_path)), 'class': class_name, 'lines': class_lines, }) self.issues.append({ 'type': 'god_class', 'severity': 'warning', 'file': str(file_path.relative_to(self.project_path)), 'message': f"Class '{class_name}' has ~{class_lines} lines (threshold: {self.MAX_CLASS_LINES})", 'suggestion': "Consider applying Single Responsibility Principle and splitting into smaller classes", }) except Exception: pass self.metrics['god_classes'] = god_classes def _check_naming_conventions(self): """Check for naming convention issues.""" ignore_dirs = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', 'dist', 'build', '.next', 'coverage'} naming_issues = [] # Check directory naming for dir_path in self.project_path.rglob('*'): if not dir_path.is_dir(): continue if any(ignored in dir_path.parts for ignored in ignore_dirs): continue dir_name = dir_path.name # Check for mixed case in directories (should be kebab-case or snake_case) if re.search(r'[A-Z]', dir_name) and '-' not in dir_name and '_' not in dir_name: rel_path = str(dir_path.relative_to(self.project_path)) if len(rel_path.split('/')) <= 3: # Only check top-level dirs naming_issues.append({ 'type': 'directory', 'path': rel_path, 'issue': 'PascalCase directory name', }) if naming_issues: self.issues.append({ 'type': 'naming_convention', 'severity': 'info', 'message': f"Found {len(naming_issues)} naming convention inconsistencies", 'details': naming_issues[:5], # Show first 5 }) self.metrics['naming_issues'] = naming_issues class LayerViolationDetector: """Detects architectural layer violations.""" LAYER_ORDER = ['presentation', 'application', 'domain', 'infrastructure'] # Valid dependency directions (key can depend on values) VALID_DEPENDENCIES = { 'presentation': ['application', 'domain'], 'application': ['domain', 'infrastructure'], 'domain': [], # Domain should not depend on other layers 'infrastructure': ['domain'], } def __init__(self, project_path: Path, layer_assignments: Dict[str, str]): self.project_path = project_path self.layer_assignments = layer_assignments self.violations: List[Dict] = [] def detect(self) -> List[Dict]: """Detect layer violations.""" self._analyze_imports() return self.violations def _analyze_imports(self): """Analyze imports for layer violations.""" extensions = ['.py', '.js', '.ts', '.jsx', '.tsx'] ignore_dirs = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', 'dist', 'build', '.next', 'coverage'} for ext in extensions: for file_path in self.project_path.rglob(f'*{ext}'): if any(ignored in file_path.parts for ignored in ignore_dirs): continue try: rel_path = file_path.relative_to(self.project_path) if len(rel_path.parts) < 2: continue source_dir = rel_path.parts[0].lower() source_layer = self.layer_assignments.get(source_dir) if not source_layer or source_layer == 'unknown': continue # Extract imports content = file_path.read_text(encoding='utf-8', errors='ignore') imports = self._extract_imports(content) # Check each import for layer violations for imp in imports: target_dir = self._get_import_directory(imp) if not target_dir: continue target_layer = self.layer_assignments.get(target_dir.lower()) if not target_layer or target_layer == 'unknown': continue if self._is_violation(source_layer, target_layer): self.violations.append({ 'type': 'layer_violation', 'severity': 'warning', 'file': str(rel_path), 'source_layer': source_layer, 'target_layer': target_layer, 'import': imp, 'message': f"{source_layer} layer should not depend on {target_layer} layer", }) except Exception: pass def _extract_imports(self, content: str) -> List[str]: """Extract import statements.""" imports = [] # Python imports imports.extend(re.findall(r'^(?:from|import)\s+([\w.]+)', content, re.MULTILINE)) # JS/TS imports imports.extend(re.findall(r'(?:import|require)\s*\(?[\'"]([^\'"\s]+)[\'"]', content)) return imports def _get_import_directory(self, imp: str) -> Optional[str]: """Get the directory from an import path.""" # Handle relative imports if imp.startswith('.'): return None # Skip relative imports parts = imp.replace('@/', '').replace('~/', '').split('/') if parts: return parts[0].split('.')[0] return None def _is_violation(self, source_layer: str, target_layer: str) -> bool: """Check if the dependency is a violation.""" if source_layer == target_layer: return False valid_deps = self.VALID_DEPENDENCIES.get(source_layer, []) return target_layer not in valid_deps and target_layer != source_layer class ProjectArchitect: """Main class that orchestrates architecture analysis.""" def __init__(self, project_path: Path, verbose: bool = False): self.project_path = project_path self.verbose = verbose def analyze(self) -> Dict: """Run full architecture analysis.""" if self.verbose: print(f"Analyzing project: {self.project_path}") # Pattern detection pattern_detector = PatternDetector(self.project_path) pattern_result = pattern_detector.scan() if self.verbose: print(f"Detected pattern: {pattern_result['detected_pattern']} " f"(confidence: {pattern_result['confidence']}%)") # Code analysis code_analyzer = CodeAnalyzer(self.project_path, self.verbose) code_result = code_analyzer.analyze() if self.verbose: print(f"Found {len(code_result['issues'])} code issues") # Layer violation detection violation_detector = LayerViolationDetector( self.project_path, pattern_result['layer_assignments'] ) violations = violation_detector.detect() if self.verbose: print(f"Found {len(violations)} layer violations") # Generate recommendations recommendations = self._generate_recommendations( pattern_result, code_result, violations ) return { 'project_path': str(self.project_path), 'architecture': { 'detected_pattern': pattern_result['detected_pattern'], 'confidence': pattern_result['confidence'], 'layer_assignments': pattern_result['layer_assignments'], 'pattern_scores': pattern_result['pattern_scores'], }, 'structure': { 'directories': pattern_result['directories'], }, 'code_quality': { 'metrics': code_result['metrics'], 'issues': code_result['issues'], }, 'layer_violations': violations, 'recommendations': recommendations, 'summary': { 'pattern': pattern_result['detected_pattern'], 'confidence': pattern_result['confidence'], 'total_issues': len(code_result['issues']) + len(violations), 'code_issues': len(code_result['issues']), 'layer_violations': len(violations), }, } def _generate_recommendations(self, pattern_result: Dict, code_result: Dict, violations: List[Dict]) -> List[str]: """Generate actionable recommendations.""" recommendations = [] # Pattern recommendations pattern = pattern_result['detected_pattern'] confidence = pattern_result['confidence'] if pattern == 'unstructured' or confidence < 30: recommendations.append( "Consider adopting a clear architectural pattern (Layered, Clean, or Hexagonal) " "to improve code organization and maintainability" ) # Layer violation recommendations if violations: recommendations.append( f"Fix {len(violations)} layer violation(s) to maintain proper separation of concerns. " "Dependencies should flow from presentation → application → domain ← infrastructure" ) # God class recommendations god_classes = code_result['metrics'].get('god_classes', []) if god_classes: recommendations.append( f"Split {len(god_classes)} large class(es) into smaller, focused classes " "following the Single Responsibility Principle" ) # Large file recommendations large_files = code_result['metrics'].get('large_files', []) if large_files: recommendations.append( f"Consider refactoring {len(large_files)} large file(s) into smaller modules" ) # Missing layer recommendations assigned_layers = set(pattern_result['layer_assignments'].values()) if pattern in ['layered', 'clean', 'hexagonal']: expected_layers = {'presentation', 'application', 'domain', 'infrastructure'} missing = expected_layers - assigned_layers - {'unknown'} if missing: recommendations.append( f"Consider adding missing architectural layer(s): {', '.join(missing)}" ) return recommendations def print_human_report(report: Dict): """Print human-readable report.""" print("\n" + "=" * 60) print("ARCHITECTURE ASSESSMENT") print("=" * 60) print(f"\nProject: {report['project_path']}") arch = report['architecture'] print(f"\n--- Architecture Pattern ---") print(f"Detected: {arch['detected_pattern'].replace('_', ' ').title()}") print(f"Confidence: {arch['confidence']}%") if arch['layer_assignments']: print(f"\nLayer Assignments:") for dir_name, layer in sorted(arch['layer_assignments'].items()): if layer != 'unknown': status = "OK" else: status = "?" print(f" {status} {dir_name:20} -> {layer}") summary = report['summary'] print(f"\n--- Summary ---") print(f"Total issues: {summary['total_issues']}") print(f" Code issues: {summary['code_issues']}") print(f" Layer violations: {summary['layer_violations']}") if report['code_quality']['issues']: print(f"\n--- Code Issues ---") for issue in report['code_quality']['issues'][:10]: severity = issue['severity'].upper() print(f" [{severity}] {issue.get('file', 'N/A')}") print(f" {issue['message']}") if 'suggestion' in issue: print(f" Suggestion: {issue['suggestion']}") if report['layer_violations']: print(f"\n--- Layer Violations ---") for v in report['layer_violations'][:5]: print(f" {v['file']}") print(f" {v['message']}") if report['recommendations']: print(f"\n--- Recommendations ---") for i, rec in enumerate(report['recommendations'], 1): print(f" {i}. {rec}") metrics = report['code_quality']['metrics'] print(f"\n--- Metrics ---") print(f" Total lines: {metrics.get('total_lines', 'N/A')}") print(f" File count: {metrics.get('file_count', 'N/A')}") print(f" Avg lines/file: {metrics.get('avg_file_lines', 'N/A')}") print("\n" + "=" * 60) def main(): parser = argparse.ArgumentParser( description='Analyze project architecture and detect patterns and issues', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=''' Examples: %(prog)s ./my-project %(prog)s ./my-project --verbose %(prog)s ./my-project --output json %(prog)s ./my-project --check layers Detects: - Architectural patterns (Layered, MVC, Hexagonal, Clean, Microservices) - Code organization issues (large files, god classes) - Layer violations (incorrect dependencies between layers) - Missing architectural components ''' ) parser.add_argument( 'project_path', help='Path to the project directory' ) parser.add_argument( '--output', '-o', choices=['human', 'json'], default='human', help='Output format (default: human)' ) parser.add_argument( '--check', choices=['all', 'pattern', 'layers', 'code'], default='all', help='What to check (default: all)' ) parser.add_argument( '--verbose', '-v', action='store_true', help='Enable verbose output' ) parser.add_argument( '--save', '-s', help='Save report to file' ) args = parser.parse_args() project_path = Path(args.project_path).resolve() if not project_path.exists(): print(f"Error: Project path does not exist: {project_path}", file=sys.stderr) sys.exit(1) if not project_path.is_dir(): print(f"Error: Project path is not a directory: {project_path}", file=sys.stderr) sys.exit(1) # Run analysis architect = ProjectArchitect(project_path, verbose=args.verbose) report = architect.analyze() # Handle specific checks if args.check == 'pattern': arch = report['architecture'] print(f"Pattern: {arch['detected_pattern']} (confidence: {arch['confidence']}%)") sys.exit(0) elif args.check == 'layers': violations = report['layer_violations'] if violations: print(f"Found {len(violations)} layer violation(s):") for v in violations: print(f" {v['file']}: {v['message']}") sys.exit(1) else: print("No layer violations found.") sys.exit(0) elif args.check == 'code': issues = report['code_quality']['issues'] if issues: print(f"Found {len(issues)} code issue(s):") for issue in issues[:10]: print(f" [{issue['severity'].upper()}] {issue['message']}") sys.exit(1 if any(i['severity'] == 'warning' for i in issues) else 0) else: print("No code issues found.") sys.exit(0) # Output report if args.output == 'json': output = json.dumps(report, indent=2) if args.save: Path(args.save).write_text(output) print(f"Report saved to {args.save}") else: print(output) else: print_human_report(report) if args.save: Path(args.save).write_text(json.dumps(report, indent=2)) print(f"\nJSON report saved to {args.save}") if __name__ == '__main__': main()