feat(C2.6): Add dependency graph analyzer with NetworkX

- Add NetworkX dependency to pyproject.toml
- Create dependency_analyzer.py with comprehensive functionality
- Support Python, JavaScript/TypeScript, and C++ import extraction
- Build directed graphs using NetworkX DiGraph
- Detect circular dependencies with NetworkX algorithms
- Export graphs in multiple formats (JSON, Mermaid, DOT)
- Add 24 comprehensive tests with 100% pass rate

Features:
- Python: AST-based import extraction (import, from, relative)
- JavaScript/TypeScript: ES6 and CommonJS parsing (import, require)
- C++: #include directive extraction (system and local headers)
- Graph statistics (total files, dependencies, cycles, components)
- Circular dependency detection and reporting
- Multiple export formats for visualization

Architecture:
- DependencyAnalyzer class with NetworkX integration
- DependencyInfo dataclass for tracking import relationships
- FileNode dataclass for graph nodes
- Language-specific extraction methods

Related research:
- NetworkX: Standard Python graph library for analysis
- pydeps: Python-specific analyzer (inspiration)
- madge: JavaScript dependency analyzer (reference)
- dependency-cruiser: Advanced JS/TS analyzer (reference)

Test coverage:
- 5 Python import tests
- 4 JavaScript/TypeScript import tests
- 3 C++ include tests
- 3 graph building tests
- 3 circular dependency detection tests
- 3 export format tests
- 3 edge case tests
This commit is contained in:
yusyus
2026-01-01 23:30:46 +03:00
parent eac1f4ef8e
commit aa6bc363d9
3 changed files with 768 additions and 0 deletions

View File

@@ -0,0 +1,414 @@
#!/usr/bin/env python3
"""
Dependency Graph Analyzer (C2.6)
Analyzes import/require/include statements to build dependency graphs.
Supports Python, JavaScript/TypeScript, and C++.
Features:
- Multi-language import extraction
- Dependency graph construction with NetworkX
- Circular dependency detection
- Graph export (JSON, DOT/GraphViz, Mermaid)
Usage:
from dependency_analyzer import DependencyAnalyzer
analyzer = DependencyAnalyzer()
analyzer.analyze_file('src/main.py', content, 'Python')
graph = analyzer.build_graph()
cycles = analyzer.detect_cycles()
"""
import re
import ast
import logging
from pathlib import Path
from typing import Dict, List, Set, Tuple, Optional, Any
from dataclasses import dataclass, field
try:
import networkx as nx
NETWORKX_AVAILABLE = True
except ImportError:
NETWORKX_AVAILABLE = False
logger = logging.getLogger(__name__)
@dataclass
class DependencyInfo:
"""Information about a single dependency relationship."""
source_file: str
imported_module: str
import_type: str # 'import', 'from', 'require', 'include'
is_relative: bool = False
line_number: int = 0
@dataclass
class FileNode:
"""Represents a file node in the dependency graph."""
file_path: str
language: str
dependencies: List[str] = field(default_factory=list)
imported_by: List[str] = field(default_factory=list)
class DependencyAnalyzer:
"""
Multi-language dependency analyzer using NetworkX.
Analyzes import/require/include statements and builds dependency graphs
with circular dependency detection.
"""
def __init__(self):
"""Initialize dependency analyzer."""
if not NETWORKX_AVAILABLE:
raise ImportError(
"NetworkX is required for dependency analysis. "
"Install with: pip install networkx"
)
self.graph = nx.DiGraph() # Directed graph for dependencies
self.file_dependencies: Dict[str, List[DependencyInfo]] = {}
self.file_nodes: Dict[str, FileNode] = {}
def analyze_file(self, file_path: str, content: str, language: str) -> List[DependencyInfo]:
"""
Extract dependencies from a source file.
Args:
file_path: Path to source file
content: File content
language: Programming language (Python, JavaScript, TypeScript, C++)
Returns:
List of DependencyInfo objects
"""
if language == 'Python':
deps = self._extract_python_imports(content, file_path)
elif language in ('JavaScript', 'TypeScript'):
deps = self._extract_js_imports(content, file_path)
elif language == 'C++':
deps = self._extract_cpp_includes(content, file_path)
else:
logger.warning(f"Unsupported language: {language}")
deps = []
self.file_dependencies[file_path] = deps
# Create file node
imported_modules = [dep.imported_module for dep in deps]
self.file_nodes[file_path] = FileNode(
file_path=file_path,
language=language,
dependencies=imported_modules
)
return deps
def _extract_python_imports(self, content: str, file_path: str) -> List[DependencyInfo]:
"""
Extract Python import statements using AST.
Handles:
- import module
- import module as alias
- from module import name
- from . import relative
"""
deps = []
try:
tree = ast.parse(content)
except SyntaxError:
logger.warning(f"Syntax error in {file_path}, skipping import extraction")
return deps
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
deps.append(DependencyInfo(
source_file=file_path,
imported_module=alias.name,
import_type='import',
is_relative=False,
line_number=node.lineno
))
elif isinstance(node, ast.ImportFrom):
module = node.module or ''
is_relative = node.level > 0
# Handle relative imports
if is_relative:
module = '.' * node.level + module
deps.append(DependencyInfo(
source_file=file_path,
imported_module=module,
import_type='from',
is_relative=is_relative,
line_number=node.lineno
))
return deps
def _extract_js_imports(self, content: str, file_path: str) -> List[DependencyInfo]:
"""
Extract JavaScript/TypeScript import statements.
Handles:
- import x from 'module'
- import { x } from 'module'
- import * as x from 'module'
- const x = require('module')
- require('module')
"""
deps = []
# ES6 imports: import ... from 'module'
import_pattern = r"import\s+(?:[\w\s{},*]+\s+from\s+)?['\"]([^'\"]+)['\"]"
for match in re.finditer(import_pattern, content):
module = match.group(1)
line_num = content[:match.start()].count('\n') + 1
is_relative = module.startswith('.') or module.startswith('/')
deps.append(DependencyInfo(
source_file=file_path,
imported_module=module,
import_type='import',
is_relative=is_relative,
line_number=line_num
))
# CommonJS requires: require('module')
require_pattern = r"require\s*\(['\"]([^'\"]+)['\"]\)"
for match in re.finditer(require_pattern, content):
module = match.group(1)
line_num = content[:match.start()].count('\n') + 1
is_relative = module.startswith('.') or module.startswith('/')
deps.append(DependencyInfo(
source_file=file_path,
imported_module=module,
import_type='require',
is_relative=is_relative,
line_number=line_num
))
return deps
def _extract_cpp_includes(self, content: str, file_path: str) -> List[DependencyInfo]:
"""
Extract C++ #include directives.
Handles:
- #include "local/header.h"
- #include <system/header.h>
"""
deps = []
# Match #include statements
include_pattern = r'#include\s+[<"]([^>"]+)[>"]'
for match in re.finditer(include_pattern, content):
header = match.group(1)
line_num = content[:match.start()].count('\n') + 1
# Headers with "" are usually local, <> are system headers
is_relative = '"' in match.group(0)
deps.append(DependencyInfo(
source_file=file_path,
imported_module=header,
import_type='include',
is_relative=is_relative,
line_number=line_num
))
return deps
def build_graph(self) -> nx.DiGraph:
"""
Build dependency graph from analyzed files.
Returns:
NetworkX DiGraph with file dependencies
"""
self.graph.clear()
# Add all file nodes
for file_path, node in self.file_nodes.items():
self.graph.add_node(file_path, language=node.language)
# Add dependency edges
for file_path, deps in self.file_dependencies.items():
for dep in deps:
# Try to resolve the imported module to an actual file
target = self._resolve_import(file_path, dep.imported_module, dep.is_relative)
if target and target in self.file_nodes:
# Add edge from source to dependency
self.graph.add_edge(
file_path,
target,
import_type=dep.import_type,
line_number=dep.line_number
)
# Update imported_by lists
if target in self.file_nodes:
self.file_nodes[target].imported_by.append(file_path)
return self.graph
def _resolve_import(self, source_file: str, imported_module: str, is_relative: bool) -> Optional[str]:
"""
Resolve import statement to actual file path.
This is a simplified resolution - a full implementation would need
to handle module resolution rules for each language.
"""
# For now, just return the imported module if it exists in our file_nodes
# In a real implementation, this would resolve relative paths, handle
# module resolution (node_modules, Python packages, etc.)
if imported_module in self.file_nodes:
return imported_module
# Try common variations
variations = [
imported_module,
f"{imported_module}.py",
f"{imported_module}.js",
f"{imported_module}.ts",
f"{imported_module}.h",
f"{imported_module}.cpp",
]
for var in variations:
if var in self.file_nodes:
return var
return None
def detect_cycles(self) -> List[List[str]]:
"""
Detect circular dependencies in the graph.
Returns:
List of cycles, where each cycle is a list of file paths
"""
try:
cycles = list(nx.simple_cycles(self.graph))
if cycles:
logger.warning(f"Found {len(cycles)} circular dependencies")
for cycle in cycles:
logger.warning(f" Cycle: {' -> '.join(cycle)} -> {cycle[0]}")
return cycles
except Exception as e:
logger.error(f"Error detecting cycles: {e}")
return []
def get_strongly_connected_components(self) -> List[Set[str]]:
"""
Get strongly connected components (groups of mutually dependent files).
Returns:
List of sets, each containing file paths in a component
"""
return list(nx.strongly_connected_components(self.graph))
def export_dot(self, output_path: str):
"""
Export graph as GraphViz DOT format.
Args:
output_path: Path to save .dot file
"""
try:
from networkx.drawing.nx_pydot import write_dot
write_dot(self.graph, output_path)
logger.info(f"Exported graph to DOT format: {output_path}")
except ImportError:
logger.warning("pydot not installed - cannot export to DOT format")
logger.warning("Install with: pip install pydot")
def export_json(self) -> Dict[str, Any]:
"""
Export graph as JSON structure.
Returns:
Dictionary with nodes and edges
"""
return {
'nodes': [
{
'file': node,
'language': data.get('language', 'Unknown')
}
for node, data in self.graph.nodes(data=True)
],
'edges': [
{
'source': source,
'target': target,
'import_type': data.get('import_type', 'unknown'),
'line_number': data.get('line_number', 0)
}
for source, target, data in self.graph.edges(data=True)
]
}
def export_mermaid(self) -> str:
"""
Export graph as Mermaid diagram format.
Returns:
Mermaid diagram as string
"""
lines = ['graph TD']
# Create node labels (shorten file paths for readability)
node_ids = {}
for i, node in enumerate(self.graph.nodes()):
node_id = f"N{i}"
node_ids[node] = node_id
label = Path(node).name # Just filename
lines.append(f" {node_id}[{label}]")
# Add edges
for source, target in self.graph.edges():
source_id = node_ids[source]
target_id = node_ids[target]
lines.append(f" {source_id} --> {target_id}")
return '\n'.join(lines)
def get_statistics(self) -> Dict[str, Any]:
"""
Get graph statistics.
Returns:
Dictionary with various statistics
"""
return {
'total_files': self.graph.number_of_nodes(),
'total_dependencies': self.graph.number_of_edges(),
'circular_dependencies': len(self.detect_cycles()),
'strongly_connected_components': len(self.get_strongly_connected_components()),
'avg_dependencies_per_file': (
self.graph.number_of_edges() / self.graph.number_of_nodes()
if self.graph.number_of_nodes() > 0 else 0
),
'files_with_no_dependencies': len([
node for node in self.graph.nodes()
if self.graph.out_degree(node) == 0
]),
'files_not_imported': len([
node for node in self.graph.nodes()
if self.graph.in_degree(node) == 0
]),
}