- Remove trailing whitespace from lines 1510, 1519, 1522, 1527, 1535, 1548, 1552, 1563, 1568, 1578 - Fixes remaining ruff W293 linting errors Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
1819 lines
64 KiB
Python
1819 lines
64 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Code Analyzer for GitHub Repositories
|
|
|
|
Extracts code signatures at configurable depth levels:
|
|
- surface: File tree only (existing behavior)
|
|
- deep: Parse files for signatures, parameters, types
|
|
- full: Complete AST analysis (future enhancement)
|
|
|
|
Supports 9 programming languages with language-specific parsers:
|
|
- Python (AST-based, production quality)
|
|
- JavaScript/TypeScript (regex-based)
|
|
- C/C++ (regex-based)
|
|
- C# (regex-based, inspired by Microsoft C# spec)
|
|
- Go (regex-based, Go language spec)
|
|
- Rust (regex-based, Rust reference)
|
|
- Java (regex-based, Oracle Java spec)
|
|
- Ruby (regex-based, Ruby documentation)
|
|
- PHP (regex-based, PHP reference)
|
|
|
|
Note: Regex-based parsers are simplified implementations. For production use,
|
|
consider using dedicated parsers (tree-sitter, language-specific AST libraries).
|
|
"""
|
|
|
|
import ast
|
|
import contextlib
|
|
import logging
|
|
import re
|
|
from dataclasses import asdict, dataclass
|
|
from typing import Any
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class Parameter:
|
|
"""Represents a function parameter."""
|
|
|
|
name: str
|
|
type_hint: str | None = None
|
|
default: str | None = None
|
|
|
|
|
|
@dataclass
|
|
class FunctionSignature:
|
|
"""Represents a function/method signature."""
|
|
|
|
name: str
|
|
parameters: list[Parameter]
|
|
return_type: str | None = None
|
|
docstring: str | None = None
|
|
line_number: int | None = None
|
|
is_async: bool = False
|
|
is_method: bool = False
|
|
decorators: list[str] = None
|
|
|
|
def __post_init__(self):
|
|
if self.decorators is None:
|
|
self.decorators = []
|
|
|
|
|
|
@dataclass
|
|
class ClassSignature:
|
|
"""Represents a class signature."""
|
|
|
|
name: str
|
|
base_classes: list[str]
|
|
methods: list[FunctionSignature]
|
|
docstring: str | None = None
|
|
line_number: int | None = None
|
|
|
|
|
|
class CodeAnalyzer:
|
|
"""
|
|
Analyzes code at different depth levels.
|
|
"""
|
|
|
|
def __init__(self, depth: str = "surface"):
|
|
"""
|
|
Initialize code analyzer.
|
|
|
|
Args:
|
|
depth: Analysis depth ('surface', 'deep', 'full')
|
|
"""
|
|
self.depth = depth
|
|
|
|
def analyze_file(self, file_path: str, content: str, language: str) -> dict[str, Any]:
|
|
"""
|
|
Analyze a single file based on depth level.
|
|
|
|
Args:
|
|
file_path: Path to file in repository
|
|
content: File content as string
|
|
language: Programming language (Python, JavaScript, C#, Go, Rust, Java, Ruby, PHP, etc.)
|
|
|
|
Returns:
|
|
Dict containing extracted signatures
|
|
"""
|
|
if self.depth == "surface":
|
|
return {} # Surface level doesn't analyze individual files
|
|
|
|
logger.debug(f"Analyzing {file_path} (language: {language}, depth: {self.depth})")
|
|
|
|
try:
|
|
if language == "Python":
|
|
return self._analyze_python(content, file_path)
|
|
elif language == "GDScript":
|
|
# GDScript has Godot-specific syntax, use dedicated parser
|
|
return self._analyze_gdscript(content, file_path)
|
|
elif language == "GodotScene":
|
|
return self._analyze_godot_scene(content, file_path)
|
|
elif language == "GodotResource":
|
|
return self._analyze_godot_resource(content, file_path)
|
|
elif language == "GodotShader":
|
|
return self._analyze_godot_shader(content, file_path)
|
|
elif language in ["JavaScript", "TypeScript"]:
|
|
return self._analyze_javascript(content, file_path)
|
|
elif language in ["C", "C++"]:
|
|
return self._analyze_cpp(content, file_path)
|
|
elif language == "C#":
|
|
return self._analyze_csharp(content, file_path)
|
|
elif language == "Go":
|
|
return self._analyze_go(content, file_path)
|
|
elif language == "Rust":
|
|
return self._analyze_rust(content, file_path)
|
|
elif language == "Java":
|
|
return self._analyze_java(content, file_path)
|
|
elif language == "Ruby":
|
|
return self._analyze_ruby(content, file_path)
|
|
elif language == "PHP":
|
|
return self._analyze_php(content, file_path)
|
|
else:
|
|
logger.debug(f"No analyzer for language: {language}")
|
|
return {}
|
|
except Exception as e:
|
|
logger.warning(f"Error analyzing {file_path}: {e}")
|
|
return {}
|
|
|
|
def _analyze_python(self, content: str, file_path: str) -> dict[str, Any]:
|
|
"""Analyze Python file using AST."""
|
|
try:
|
|
tree = ast.parse(content)
|
|
except SyntaxError as e:
|
|
logger.debug(f"Syntax error in {file_path}: {e}")
|
|
return {}
|
|
|
|
classes = []
|
|
functions = []
|
|
|
|
for node in ast.walk(tree):
|
|
if isinstance(node, ast.ClassDef):
|
|
class_sig = self._extract_python_class(node)
|
|
classes.append(asdict(class_sig))
|
|
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
# Only top-level functions (not methods)
|
|
# Fix AST parser to check isinstance(parent.body, list) before 'in' operator
|
|
is_method = False
|
|
try:
|
|
is_method = any(
|
|
isinstance(parent, ast.ClassDef)
|
|
for parent in ast.walk(tree)
|
|
if hasattr(parent, "body")
|
|
and isinstance(parent.body, list)
|
|
and node in parent.body
|
|
)
|
|
except (TypeError, AttributeError):
|
|
# If body is not iterable or check fails, assume it's a top-level function
|
|
is_method = False
|
|
|
|
if not is_method:
|
|
func_sig = self._extract_python_function(node)
|
|
functions.append(asdict(func_sig))
|
|
|
|
# Extract comments
|
|
comments = self._extract_python_comments(content)
|
|
|
|
return {"classes": classes, "functions": functions, "comments": comments}
|
|
|
|
def _extract_python_class(self, node: ast.ClassDef) -> ClassSignature:
|
|
"""Extract class signature from AST node."""
|
|
# Extract base classes
|
|
bases = []
|
|
for base in node.bases:
|
|
if isinstance(base, ast.Name):
|
|
bases.append(base.id)
|
|
elif isinstance(base, ast.Attribute):
|
|
bases.append(
|
|
f"{base.value.id}.{base.attr}" if hasattr(base.value, "id") else base.attr
|
|
)
|
|
|
|
# Extract methods
|
|
methods = []
|
|
for item in node.body:
|
|
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
method_sig = self._extract_python_function(item, is_method=True)
|
|
methods.append(method_sig)
|
|
|
|
# Extract docstring
|
|
docstring = ast.get_docstring(node)
|
|
|
|
return ClassSignature(
|
|
name=node.name,
|
|
base_classes=bases,
|
|
methods=methods,
|
|
docstring=docstring,
|
|
line_number=node.lineno,
|
|
)
|
|
|
|
def _extract_python_function(self, node, is_method: bool = False) -> FunctionSignature:
|
|
"""Extract function signature from AST node."""
|
|
# Extract parameters
|
|
params = []
|
|
for arg in node.args.args:
|
|
param_type = None
|
|
if arg.annotation:
|
|
param_type = ast.unparse(arg.annotation) if hasattr(ast, "unparse") else None
|
|
|
|
params.append(Parameter(name=arg.arg, type_hint=param_type))
|
|
|
|
# Extract defaults
|
|
defaults = node.args.defaults
|
|
if defaults:
|
|
# Defaults are aligned to the end of params
|
|
num_no_default = len(params) - len(defaults)
|
|
for i, default in enumerate(defaults):
|
|
param_idx = num_no_default + i
|
|
if param_idx < len(params):
|
|
try:
|
|
params[param_idx].default = (
|
|
ast.unparse(default) if hasattr(ast, "unparse") else str(default)
|
|
)
|
|
except Exception:
|
|
params[param_idx].default = "..."
|
|
|
|
# Extract return type
|
|
return_type = None
|
|
if node.returns:
|
|
with contextlib.suppress(Exception):
|
|
return_type = ast.unparse(node.returns) if hasattr(ast, "unparse") else None
|
|
|
|
# Extract decorators
|
|
decorators = []
|
|
for decorator in node.decorator_list:
|
|
try:
|
|
if hasattr(ast, "unparse"):
|
|
decorators.append(ast.unparse(decorator))
|
|
elif isinstance(decorator, ast.Name):
|
|
decorators.append(decorator.id)
|
|
except Exception:
|
|
pass
|
|
|
|
# Extract docstring
|
|
docstring = ast.get_docstring(node)
|
|
|
|
return FunctionSignature(
|
|
name=node.name,
|
|
parameters=params,
|
|
return_type=return_type,
|
|
docstring=docstring,
|
|
line_number=node.lineno,
|
|
is_async=isinstance(node, ast.AsyncFunctionDef),
|
|
is_method=is_method,
|
|
decorators=decorators,
|
|
)
|
|
|
|
def _analyze_javascript(self, content: str, _file_path: str) -> dict[str, Any]:
|
|
"""
|
|
Analyze JavaScript/TypeScript file using regex patterns.
|
|
|
|
Note: This is a simplified approach. For production, consider using
|
|
a proper JS/TS parser like esprima or ts-morph.
|
|
"""
|
|
classes = []
|
|
functions = []
|
|
|
|
# Extract class definitions
|
|
class_pattern = r"class\s+(\w+)(?:\s+extends\s+(\w+))?\s*\{"
|
|
for match in re.finditer(class_pattern, content):
|
|
class_name = match.group(1)
|
|
base_class = match.group(2) if match.group(2) else None
|
|
|
|
# Try to extract methods (simplified)
|
|
class_block_start = match.end()
|
|
# This is a simplification - proper parsing would track braces
|
|
class_block_end = content.find("}", class_block_start)
|
|
if class_block_end != -1:
|
|
class_body = content[class_block_start:class_block_end]
|
|
methods = self._extract_js_methods(class_body)
|
|
else:
|
|
methods = []
|
|
|
|
classes.append(
|
|
{
|
|
"name": class_name,
|
|
"base_classes": [base_class] if base_class else [],
|
|
"methods": methods,
|
|
"docstring": None,
|
|
"line_number": content[: match.start()].count("\n") + 1,
|
|
}
|
|
)
|
|
|
|
# Extract top-level functions
|
|
func_pattern = r"(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)"
|
|
for match in re.finditer(func_pattern, content):
|
|
func_name = match.group(1)
|
|
params_str = match.group(2)
|
|
is_async = "async" in match.group(0)
|
|
|
|
params = self._parse_js_parameters(params_str)
|
|
|
|
functions.append(
|
|
{
|
|
"name": func_name,
|
|
"parameters": params,
|
|
"return_type": None, # JS doesn't have type annotations (unless TS)
|
|
"docstring": None,
|
|
"line_number": content[: match.start()].count("\n") + 1,
|
|
"is_async": is_async,
|
|
"is_method": False,
|
|
"decorators": [],
|
|
}
|
|
)
|
|
|
|
# Extract arrow functions assigned to const/let
|
|
arrow_pattern = r"(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(([^)]*)\)\s*=>"
|
|
for match in re.finditer(arrow_pattern, content):
|
|
func_name = match.group(1)
|
|
params_str = match.group(2)
|
|
is_async = "async" in match.group(0)
|
|
|
|
params = self._parse_js_parameters(params_str)
|
|
|
|
functions.append(
|
|
{
|
|
"name": func_name,
|
|
"parameters": params,
|
|
"return_type": None,
|
|
"docstring": None,
|
|
"line_number": content[: match.start()].count("\n") + 1,
|
|
"is_async": is_async,
|
|
"is_method": False,
|
|
"decorators": [],
|
|
}
|
|
)
|
|
|
|
# Extract comments
|
|
comments = self._extract_js_comments(content)
|
|
|
|
return {"classes": classes, "functions": functions, "comments": comments}
|
|
|
|
def _extract_js_methods(self, class_body: str) -> list[dict]:
|
|
"""Extract method signatures from class body."""
|
|
methods = []
|
|
|
|
# Match method definitions
|
|
method_pattern = r"(?:async\s+)?(\w+)\s*\(([^)]*)\)"
|
|
for match in re.finditer(method_pattern, class_body):
|
|
method_name = match.group(1)
|
|
params_str = match.group(2)
|
|
is_async = "async" in match.group(0)
|
|
|
|
# Skip constructor keyword detection
|
|
if method_name in ["if", "for", "while", "switch"]:
|
|
continue
|
|
|
|
params = self._parse_js_parameters(params_str)
|
|
|
|
methods.append(
|
|
{
|
|
"name": method_name,
|
|
"parameters": params,
|
|
"return_type": None,
|
|
"docstring": None,
|
|
"line_number": None,
|
|
"is_async": is_async,
|
|
"is_method": True,
|
|
"decorators": [],
|
|
}
|
|
)
|
|
|
|
return methods
|
|
|
|
def _parse_js_parameters(self, params_str: str) -> list[dict]:
|
|
"""Parse JavaScript parameter string."""
|
|
params = []
|
|
|
|
if not params_str.strip():
|
|
return params
|
|
|
|
# Split by comma (simplified - doesn't handle complex default values)
|
|
param_list = [p.strip() for p in params_str.split(",")]
|
|
|
|
for param in param_list:
|
|
if not param:
|
|
continue
|
|
|
|
# Check for default value
|
|
if "=" in param:
|
|
name, default = param.split("=", 1)
|
|
name = name.strip()
|
|
default = default.strip()
|
|
else:
|
|
name = param
|
|
default = None
|
|
|
|
# Check for type annotation (TypeScript)
|
|
type_hint = None
|
|
if ":" in name:
|
|
name, type_hint = name.split(":", 1)
|
|
name = name.strip()
|
|
type_hint = type_hint.strip()
|
|
|
|
params.append({"name": name, "type_hint": type_hint, "default": default})
|
|
|
|
return params
|
|
|
|
def _analyze_cpp(self, content: str, _file_path: str) -> dict[str, Any]:
|
|
"""
|
|
Analyze C/C++ header file using regex patterns.
|
|
|
|
Note: This is a simplified approach focusing on header files.
|
|
For production, consider using libclang or similar.
|
|
"""
|
|
classes = []
|
|
functions = []
|
|
|
|
# Extract class definitions (simplified - doesn't handle nested classes)
|
|
class_pattern = r"class\s+(\w+)(?:\s*:\s*public\s+(\w+))?\s*\{"
|
|
for match in re.finditer(class_pattern, content):
|
|
class_name = match.group(1)
|
|
base_class = match.group(2) if match.group(2) else None
|
|
|
|
classes.append(
|
|
{
|
|
"name": class_name,
|
|
"base_classes": [base_class] if base_class else [],
|
|
"methods": [], # Simplified - would need to parse class body
|
|
"docstring": None,
|
|
"line_number": content[: match.start()].count("\n") + 1,
|
|
}
|
|
)
|
|
|
|
# Extract function declarations
|
|
func_pattern = r"(\w+(?:\s*\*|\s*&)?)\s+(\w+)\s*\(([^)]*)\)"
|
|
for match in re.finditer(func_pattern, content):
|
|
return_type = match.group(1).strip()
|
|
func_name = match.group(2)
|
|
params_str = match.group(3)
|
|
|
|
# Skip common keywords
|
|
if func_name in ["if", "for", "while", "switch", "return"]:
|
|
continue
|
|
|
|
params = self._parse_cpp_parameters(params_str)
|
|
|
|
functions.append(
|
|
{
|
|
"name": func_name,
|
|
"parameters": params,
|
|
"return_type": return_type,
|
|
"docstring": None,
|
|
"line_number": content[: match.start()].count("\n") + 1,
|
|
"is_async": False,
|
|
"is_method": False,
|
|
"decorators": [],
|
|
}
|
|
)
|
|
|
|
# Extract comments
|
|
comments = self._extract_cpp_comments(content)
|
|
|
|
return {"classes": classes, "functions": functions, "comments": comments}
|
|
|
|
def _parse_cpp_parameters(self, params_str: str) -> list[dict]:
|
|
"""Parse C++ parameter string."""
|
|
params = []
|
|
|
|
if not params_str.strip() or params_str.strip() == "void":
|
|
return params
|
|
|
|
# Split by comma (simplified)
|
|
param_list = [p.strip() for p in params_str.split(",")]
|
|
|
|
for param in param_list:
|
|
if not param:
|
|
continue
|
|
|
|
# Check for default value
|
|
default = None
|
|
if "=" in param:
|
|
param, default = param.rsplit("=", 1)
|
|
param = param.strip()
|
|
default = default.strip()
|
|
|
|
# Extract type and name (simplified)
|
|
# Format: "type name" or "type* name" or "type& name"
|
|
parts = param.split()
|
|
if len(parts) >= 2:
|
|
param_type = " ".join(parts[:-1])
|
|
param_name = parts[-1]
|
|
else:
|
|
param_type = param
|
|
param_name = "unknown"
|
|
|
|
params.append({"name": param_name, "type_hint": param_type, "default": default})
|
|
|
|
return params
|
|
|
|
def _extract_python_comments(self, content: str) -> list[dict]:
|
|
"""
|
|
Extract Python comments (# style).
|
|
|
|
Returns list of comment dictionaries with line number, text, and type.
|
|
"""
|
|
comments = []
|
|
|
|
for i, line in enumerate(content.splitlines(), 1):
|
|
stripped = line.strip()
|
|
|
|
# Skip shebang and encoding declarations
|
|
if stripped.startswith("#!") or stripped.startswith("#") and "coding" in stripped:
|
|
continue
|
|
|
|
# Extract regular comments
|
|
if stripped.startswith("#"):
|
|
comment_text = stripped[1:].strip()
|
|
comments.append({"line": i, "text": comment_text, "type": "inline"})
|
|
|
|
return comments
|
|
|
|
def _extract_js_comments(self, content: str) -> list[dict]:
|
|
"""
|
|
Extract JavaScript/TypeScript comments (// and /* */ styles).
|
|
|
|
Returns list of comment dictionaries with line number, text, and type.
|
|
"""
|
|
comments = []
|
|
|
|
# Extract single-line comments (//)
|
|
for match in re.finditer(r"//(.+)$", content, re.MULTILINE):
|
|
line_num = content[: match.start()].count("\n") + 1
|
|
comment_text = match.group(1).strip()
|
|
|
|
comments.append({"line": line_num, "text": comment_text, "type": "inline"})
|
|
|
|
# Extract multi-line comments (/* */)
|
|
for match in re.finditer(r"/\*(.+?)\*/", content, re.DOTALL):
|
|
start_line = content[: match.start()].count("\n") + 1
|
|
comment_text = match.group(1).strip()
|
|
|
|
comments.append({"line": start_line, "text": comment_text, "type": "block"})
|
|
|
|
return comments
|
|
|
|
def _extract_cpp_comments(self, content: str) -> list[dict]:
|
|
"""
|
|
Extract C++ comments (// and /* */ styles, same as JavaScript).
|
|
|
|
Returns list of comment dictionaries with line number, text, and type.
|
|
"""
|
|
# C++ uses the same comment syntax as JavaScript
|
|
return self._extract_js_comments(content)
|
|
|
|
def _analyze_csharp(self, content: str, _file_path: str) -> dict[str, Any]:
|
|
"""
|
|
Analyze C# file using regex patterns.
|
|
|
|
Note: This is a simplified regex-based approach. For production use with Unity/ASP.NET,
|
|
consider using tree-sitter-c-sharp or Roslyn via pythonnet for more accurate parsing.
|
|
|
|
Regex patterns inspired by C# language specification:
|
|
https://learn.microsoft.com/en-us/dotnet/csharp/language-reference/
|
|
"""
|
|
classes = []
|
|
functions = []
|
|
|
|
# Extract class definitions
|
|
# Matches: [modifiers] class ClassName [: BaseClass] [, Interface]
|
|
class_pattern = r"(?:public|private|internal|protected)?\s*(?:static|abstract|sealed)?\s*class\s+(\w+)(?:\s*:\s*([\w\s,<>]+))?\s*\{"
|
|
for match in re.finditer(class_pattern, content):
|
|
class_name = match.group(1)
|
|
bases_str = match.group(2) if match.group(2) else ""
|
|
|
|
# Parse base classes and interfaces
|
|
base_classes = []
|
|
if bases_str:
|
|
base_classes = [b.strip() for b in bases_str.split(",")]
|
|
|
|
# Try to extract methods (simplified)
|
|
class_block_start = match.end()
|
|
# Find matching closing brace (simplified - doesn't handle nested classes perfectly)
|
|
brace_count = 1
|
|
class_block_end = class_block_start
|
|
for i, char in enumerate(content[class_block_start:], class_block_start):
|
|
if char == "{":
|
|
brace_count += 1
|
|
elif char == "}":
|
|
brace_count -= 1
|
|
if brace_count == 0:
|
|
class_block_end = i
|
|
break
|
|
|
|
if class_block_end > class_block_start:
|
|
class_body = content[class_block_start:class_block_end]
|
|
methods = self._extract_csharp_methods(class_body)
|
|
else:
|
|
methods = []
|
|
|
|
classes.append(
|
|
{
|
|
"name": class_name,
|
|
"base_classes": base_classes,
|
|
"methods": methods,
|
|
"docstring": None, # Would need to extract XML doc comments
|
|
"line_number": content[: match.start()].count("\n") + 1,
|
|
}
|
|
)
|
|
|
|
# Extract top-level functions/methods
|
|
# Matches: [modifiers] [async] ReturnType MethodName(params)
|
|
func_pattern = r"(?:public|private|internal|protected)?\s*(?:static|virtual|override|abstract)?\s*(?:async\s+)?(\w+(?:<[\w\s,]+>)?)\s+(\w+)\s*\(([^)]*)\)"
|
|
for match in re.finditer(func_pattern, content):
|
|
return_type = match.group(1).strip()
|
|
func_name = match.group(2)
|
|
params_str = match.group(3)
|
|
is_async = "async" in match.group(0)
|
|
|
|
# Skip common keywords
|
|
if func_name in ["if", "for", "while", "switch", "return", "using", "namespace"]:
|
|
continue
|
|
|
|
params = self._parse_csharp_parameters(params_str)
|
|
|
|
functions.append(
|
|
{
|
|
"name": func_name,
|
|
"parameters": params,
|
|
"return_type": return_type,
|
|
"docstring": None,
|
|
"line_number": content[: match.start()].count("\n") + 1,
|
|
"is_async": is_async,
|
|
"is_method": False,
|
|
"decorators": [],
|
|
}
|
|
)
|
|
|
|
# Extract comments
|
|
comments = self._extract_csharp_comments(content)
|
|
|
|
return {"classes": classes, "functions": functions, "comments": comments}
|
|
|
|
def _extract_csharp_methods(self, class_body: str) -> list[dict]:
|
|
"""Extract C# method signatures from class body."""
|
|
methods = []
|
|
|
|
# Match method definitions
|
|
method_pattern = r"(?:public|private|internal|protected)?\s*(?:static|virtual|override|abstract)?\s*(?:async\s+)?(\w+(?:<[\w\s,]+>)?)\s+(\w+)\s*\(([^)]*)\)"
|
|
for match in re.finditer(method_pattern, class_body):
|
|
return_type = match.group(1).strip()
|
|
method_name = match.group(2)
|
|
params_str = match.group(3)
|
|
is_async = "async" in match.group(0)
|
|
|
|
# Skip keywords
|
|
if method_name in ["if", "for", "while", "switch", "get", "set"]:
|
|
continue
|
|
|
|
params = self._parse_csharp_parameters(params_str)
|
|
|
|
methods.append(
|
|
{
|
|
"name": method_name,
|
|
"parameters": params,
|
|
"return_type": return_type,
|
|
"docstring": None,
|
|
"line_number": None,
|
|
"is_async": is_async,
|
|
"is_method": True,
|
|
"decorators": [],
|
|
}
|
|
)
|
|
|
|
return methods
|
|
|
|
def _parse_csharp_parameters(self, params_str: str) -> list[dict]:
|
|
"""Parse C# parameter string."""
|
|
params = []
|
|
|
|
if not params_str.strip():
|
|
return params
|
|
|
|
# Split by comma (simplified)
|
|
param_list = [p.strip() for p in params_str.split(",")]
|
|
|
|
for param in param_list:
|
|
if not param:
|
|
continue
|
|
|
|
# Check for default value
|
|
default = None
|
|
if "=" in param:
|
|
param, default = param.split("=", 1)
|
|
param = param.strip()
|
|
default = default.strip()
|
|
|
|
# Parse: [ref/out] Type name
|
|
parts = param.split()
|
|
if len(parts) >= 2:
|
|
# Remove ref/out modifiers
|
|
if parts[0] in ["ref", "out", "in", "params"]:
|
|
parts = parts[1:]
|
|
|
|
if len(parts) >= 2:
|
|
param_type = parts[0]
|
|
param_name = parts[1]
|
|
else:
|
|
param_type = parts[0]
|
|
param_name = "unknown"
|
|
else:
|
|
param_type = None
|
|
param_name = param
|
|
|
|
params.append({"name": param_name, "type_hint": param_type, "default": default})
|
|
|
|
return params
|
|
|
|
def _extract_csharp_comments(self, content: str) -> list[dict]:
|
|
"""Extract C# comments (// and /* */ and /// XML docs)."""
|
|
comments = []
|
|
|
|
# Single-line comments (//)
|
|
for match in re.finditer(r"//(.+)$", content, re.MULTILINE):
|
|
line_num = content[: match.start()].count("\n") + 1
|
|
comment_text = match.group(1).strip()
|
|
|
|
# Distinguish XML doc comments (///)
|
|
comment_type = "doc" if match.group(1).startswith("/") else "inline"
|
|
|
|
comments.append(
|
|
{"line": line_num, "text": comment_text.lstrip("/").strip(), "type": comment_type}
|
|
)
|
|
|
|
# Multi-line comments (/* */)
|
|
for match in re.finditer(r"/\*(.+?)\*/", content, re.DOTALL):
|
|
start_line = content[: match.start()].count("\n") + 1
|
|
comment_text = match.group(1).strip()
|
|
|
|
comments.append({"line": start_line, "text": comment_text, "type": "block"})
|
|
|
|
return comments
|
|
|
|
def _analyze_go(self, content: str, _file_path: str) -> dict[str, Any]:
|
|
"""
|
|
Analyze Go file using regex patterns.
|
|
|
|
Note: This is a simplified regex-based approach. For production,
|
|
consider using go/parser from the Go standard library via subprocess.
|
|
|
|
Regex patterns based on Go language specification:
|
|
https://go.dev/ref/spec
|
|
"""
|
|
classes = [] # Go doesn't have classes, but we'll extract structs
|
|
functions = []
|
|
|
|
# Extract struct definitions (Go's equivalent of classes)
|
|
struct_pattern = r"type\s+(\w+)\s+struct\s*\{"
|
|
for match in re.finditer(struct_pattern, content):
|
|
struct_name = match.group(1)
|
|
|
|
classes.append(
|
|
{
|
|
"name": struct_name,
|
|
"base_classes": [], # Go uses embedding, not inheritance
|
|
"methods": [], # Methods extracted separately
|
|
"docstring": None,
|
|
"line_number": content[: match.start()].count("\n") + 1,
|
|
}
|
|
)
|
|
|
|
# Extract function definitions
|
|
# Matches: func [receiver] name(params) [returns]
|
|
func_pattern = r"func\s+(?:\((\w+)\s+\*?(\w+)\)\s+)?(\w+)\s*\(([^)]*)\)(?:\s+\(([^)]+)\)|(?:\s+(\w+(?:\[.*?\])?(?:,\s*\w+)*)))?"
|
|
for match in re.finditer(func_pattern, content):
|
|
_receiver_var = match.group(1)
|
|
receiver_type = match.group(2)
|
|
func_name = match.group(3)
|
|
params_str = match.group(4)
|
|
returns_multi = match.group(5) # Multiple returns in parentheses
|
|
returns_single = match.group(6) # Single return without parentheses
|
|
|
|
# Determine if it's a method (has receiver)
|
|
is_method = bool(receiver_type)
|
|
|
|
# Parse return type
|
|
return_type = None
|
|
if returns_multi:
|
|
return_type = f"({returns_multi})"
|
|
elif returns_single:
|
|
return_type = returns_single
|
|
|
|
params = self._parse_go_parameters(params_str)
|
|
|
|
functions.append(
|
|
{
|
|
"name": func_name,
|
|
"parameters": params,
|
|
"return_type": return_type,
|
|
"docstring": None,
|
|
"line_number": content[: match.start()].count("\n") + 1,
|
|
"is_async": False, # Go uses goroutines differently
|
|
"is_method": is_method,
|
|
"decorators": [],
|
|
}
|
|
)
|
|
|
|
# Extract comments
|
|
comments = self._extract_go_comments(content)
|
|
|
|
return {"classes": classes, "functions": functions, "comments": comments}
|
|
|
|
def _parse_go_parameters(self, params_str: str) -> list[dict]:
|
|
"""Parse Go parameter string."""
|
|
params = []
|
|
|
|
if not params_str.strip():
|
|
return params
|
|
|
|
# Split by comma
|
|
param_list = [p.strip() for p in params_str.split(",")]
|
|
|
|
for param in param_list:
|
|
if not param:
|
|
continue
|
|
|
|
# Go format: name type or name1, name2 type
|
|
# Simplified parsing
|
|
parts = param.split()
|
|
if len(parts) >= 2:
|
|
# Last part is type
|
|
param_type = parts[-1]
|
|
param_name = " ".join(parts[:-1])
|
|
else:
|
|
param_type = param
|
|
param_name = "unknown"
|
|
|
|
params.append(
|
|
{
|
|
"name": param_name,
|
|
"type_hint": param_type,
|
|
"default": None, # Go doesn't support default parameters
|
|
}
|
|
)
|
|
|
|
return params
|
|
|
|
def _extract_go_comments(self, content: str) -> list[dict]:
|
|
"""Extract Go comments (// and /* */ styles)."""
|
|
# Go uses C-style comments
|
|
return self._extract_js_comments(content)
|
|
|
|
def _analyze_rust(self, content: str, _file_path: str) -> dict[str, Any]:
|
|
"""
|
|
Analyze Rust file using regex patterns.
|
|
|
|
Note: This is a simplified regex-based approach. For production,
|
|
consider using syn crate via subprocess or tree-sitter-rust.
|
|
|
|
Regex patterns based on Rust language reference:
|
|
https://doc.rust-lang.org/reference/
|
|
"""
|
|
classes = [] # Rust uses structs/enums/traits
|
|
functions = []
|
|
|
|
# Extract struct definitions
|
|
struct_pattern = r"(?:pub\s+)?struct\s+(\w+)(?:<[^>]+>)?\s*\{"
|
|
for match in re.finditer(struct_pattern, content):
|
|
struct_name = match.group(1)
|
|
|
|
classes.append(
|
|
{
|
|
"name": struct_name,
|
|
"base_classes": [], # Rust uses traits, not inheritance
|
|
"methods": [],
|
|
"docstring": None,
|
|
"line_number": content[: match.start()].count("\n") + 1,
|
|
}
|
|
)
|
|
|
|
# Extract function definitions
|
|
# Matches: [pub] [async] [unsafe] [const] fn name<generics>(params) -> ReturnType
|
|
func_pattern = r"(?:pub\s+)?(?:async\s+)?(?:unsafe\s+)?(?:const\s+)?fn\s+(\w+)(?:<[^>]+>)?\s*\(([^)]*)\)(?:\s*->\s*([^{;]+))?"
|
|
for match in re.finditer(func_pattern, content):
|
|
func_name = match.group(1)
|
|
params_str = match.group(2)
|
|
return_type = match.group(3).strip() if match.group(3) else None
|
|
is_async = "async" in match.group(0)
|
|
|
|
params = self._parse_rust_parameters(params_str)
|
|
|
|
functions.append(
|
|
{
|
|
"name": func_name,
|
|
"parameters": params,
|
|
"return_type": return_type,
|
|
"docstring": None,
|
|
"line_number": content[: match.start()].count("\n") + 1,
|
|
"is_async": is_async,
|
|
"is_method": False,
|
|
"decorators": [],
|
|
}
|
|
)
|
|
|
|
# Extract comments
|
|
comments = self._extract_rust_comments(content)
|
|
|
|
return {"classes": classes, "functions": functions, "comments": comments}
|
|
|
|
def _parse_rust_parameters(self, params_str: str) -> list[dict]:
|
|
"""Parse Rust parameter string."""
|
|
params = []
|
|
|
|
if not params_str.strip():
|
|
return params
|
|
|
|
# Split by comma
|
|
param_list = [p.strip() for p in params_str.split(",")]
|
|
|
|
for param in param_list:
|
|
if not param:
|
|
continue
|
|
|
|
# Rust format: name: type or &self
|
|
if ":" in param:
|
|
name, param_type = param.split(":", 1)
|
|
name = name.strip()
|
|
param_type = param_type.strip()
|
|
else:
|
|
# Handle &self, &mut self, self
|
|
name = param
|
|
param_type = None
|
|
|
|
params.append(
|
|
{
|
|
"name": name,
|
|
"type_hint": param_type,
|
|
"default": None, # Rust doesn't support default parameters
|
|
}
|
|
)
|
|
|
|
return params
|
|
|
|
def _extract_rust_comments(self, content: str) -> list[dict]:
|
|
"""Extract Rust comments (// and /* */ and /// doc comments)."""
|
|
comments = []
|
|
|
|
# Single-line comments (//)
|
|
for match in re.finditer(r"//(.+)$", content, re.MULTILINE):
|
|
line_num = content[: match.start()].count("\n") + 1
|
|
comment_text = match.group(1).strip()
|
|
|
|
# Distinguish doc comments (/// or //!)
|
|
if comment_text.startswith("/") or comment_text.startswith("!"):
|
|
comment_type = "doc"
|
|
comment_text = comment_text.lstrip("/!").strip()
|
|
else:
|
|
comment_type = "inline"
|
|
|
|
comments.append({"line": line_num, "text": comment_text, "type": comment_type})
|
|
|
|
# Multi-line comments (/* */)
|
|
for match in re.finditer(r"/\*(.+?)\*/", content, re.DOTALL):
|
|
start_line = content[: match.start()].count("\n") + 1
|
|
comment_text = match.group(1).strip()
|
|
|
|
comments.append({"line": start_line, "text": comment_text, "type": "block"})
|
|
|
|
return comments
|
|
|
|
def _analyze_java(self, content: str, _file_path: str) -> dict[str, Any]:
|
|
"""
|
|
Analyze Java file using regex patterns.
|
|
|
|
Note: This is a simplified regex-based approach. For production,
|
|
consider using Eclipse JDT or JavaParser library.
|
|
|
|
Regex patterns based on Java language specification:
|
|
https://docs.oracle.com/javase/specs/
|
|
"""
|
|
classes = []
|
|
functions = []
|
|
|
|
# Extract class definitions
|
|
# Matches: [modifiers] class ClassName [extends Base] [implements Interfaces]
|
|
class_pattern = r"(?:public|private|protected)?\s*(?:static|final|abstract)?\s*class\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([\w\s,]+))?\s*\{"
|
|
for match in re.finditer(class_pattern, content):
|
|
class_name = match.group(1)
|
|
base_class = match.group(2)
|
|
interfaces_str = match.group(3)
|
|
|
|
base_classes = []
|
|
if base_class:
|
|
base_classes.append(base_class)
|
|
if interfaces_str:
|
|
base_classes.extend([i.strip() for i in interfaces_str.split(",")])
|
|
|
|
# Extract methods (simplified)
|
|
class_block_start = match.end()
|
|
brace_count = 1
|
|
class_block_end = class_block_start
|
|
for i, char in enumerate(content[class_block_start:], class_block_start):
|
|
if char == "{":
|
|
brace_count += 1
|
|
elif char == "}":
|
|
brace_count -= 1
|
|
if brace_count == 0:
|
|
class_block_end = i
|
|
break
|
|
|
|
if class_block_end > class_block_start:
|
|
class_body = content[class_block_start:class_block_end]
|
|
methods = self._extract_java_methods(class_body)
|
|
else:
|
|
methods = []
|
|
|
|
classes.append(
|
|
{
|
|
"name": class_name,
|
|
"base_classes": base_classes,
|
|
"methods": methods,
|
|
"docstring": None,
|
|
"line_number": content[: match.start()].count("\n") + 1,
|
|
}
|
|
)
|
|
|
|
# Extract top-level functions (rare in Java, but static methods)
|
|
func_pattern = r"(?:public|private|protected)?\s*(?:static|final|synchronized)?\s*(\w+(?:<[\w\s,]+>)?)\s+(\w+)\s*\(([^)]*)\)"
|
|
for match in re.finditer(func_pattern, content):
|
|
return_type = match.group(1).strip()
|
|
func_name = match.group(2)
|
|
params_str = match.group(3)
|
|
|
|
# Skip keywords
|
|
if func_name in ["if", "for", "while", "switch", "return", "class", "void"]:
|
|
continue
|
|
|
|
params = self._parse_java_parameters(params_str)
|
|
|
|
functions.append(
|
|
{
|
|
"name": func_name,
|
|
"parameters": params,
|
|
"return_type": return_type,
|
|
"docstring": None,
|
|
"line_number": content[: match.start()].count("\n") + 1,
|
|
"is_async": False,
|
|
"is_method": False,
|
|
"decorators": [],
|
|
}
|
|
)
|
|
|
|
# Extract comments
|
|
comments = self._extract_java_comments(content)
|
|
|
|
return {"classes": classes, "functions": functions, "comments": comments}
|
|
|
|
def _extract_java_methods(self, class_body: str) -> list[dict]:
|
|
"""Extract Java method signatures from class body."""
|
|
methods = []
|
|
|
|
method_pattern = r"(?:public|private|protected)?\s*(?:static|final|synchronized)?\s*(\w+(?:<[\w\s,]+>)?)\s+(\w+)\s*\(([^)]*)\)"
|
|
for match in re.finditer(method_pattern, class_body):
|
|
return_type = match.group(1).strip()
|
|
method_name = match.group(2)
|
|
params_str = match.group(3)
|
|
|
|
# Skip keywords
|
|
if method_name in ["if", "for", "while", "switch"]:
|
|
continue
|
|
|
|
params = self._parse_java_parameters(params_str)
|
|
|
|
methods.append(
|
|
{
|
|
"name": method_name,
|
|
"parameters": params,
|
|
"return_type": return_type,
|
|
"docstring": None,
|
|
"line_number": None,
|
|
"is_async": False,
|
|
"is_method": True,
|
|
"decorators": [],
|
|
}
|
|
)
|
|
|
|
return methods
|
|
|
|
def _parse_java_parameters(self, params_str: str) -> list[dict]:
|
|
"""Parse Java parameter string."""
|
|
params = []
|
|
|
|
if not params_str.strip():
|
|
return params
|
|
|
|
# Split by comma
|
|
param_list = [p.strip() for p in params_str.split(",")]
|
|
|
|
for param in param_list:
|
|
if not param:
|
|
continue
|
|
|
|
# Java format: Type name or final Type name
|
|
parts = param.split()
|
|
if len(parts) >= 2:
|
|
# Remove 'final' if present
|
|
if parts[0] == "final":
|
|
parts = parts[1:]
|
|
|
|
if len(parts) >= 2:
|
|
param_type = parts[0]
|
|
param_name = parts[1]
|
|
else:
|
|
param_type = parts[0]
|
|
param_name = "unknown"
|
|
else:
|
|
param_type = param
|
|
param_name = "unknown"
|
|
|
|
params.append(
|
|
{
|
|
"name": param_name,
|
|
"type_hint": param_type,
|
|
"default": None, # Java doesn't support default parameters
|
|
}
|
|
)
|
|
|
|
return params
|
|
|
|
def _extract_java_comments(self, content: str) -> list[dict]:
|
|
"""Extract Java comments (// and /* */ and /** JavaDoc */)."""
|
|
comments = []
|
|
|
|
# Single-line comments (//)
|
|
for match in re.finditer(r"//(.+)$", content, re.MULTILINE):
|
|
line_num = content[: match.start()].count("\n") + 1
|
|
comment_text = match.group(1).strip()
|
|
|
|
comments.append({"line": line_num, "text": comment_text, "type": "inline"})
|
|
|
|
# Multi-line and JavaDoc comments (/* */ and /** */)
|
|
for match in re.finditer(r"/\*\*?(.+?)\*/", content, re.DOTALL):
|
|
start_line = content[: match.start()].count("\n") + 1
|
|
comment_text = match.group(1).strip()
|
|
|
|
# Distinguish JavaDoc (starts with **)
|
|
comment_type = "doc" if match.group(0).startswith("/**") else "block"
|
|
|
|
comments.append({"line": start_line, "text": comment_text, "type": comment_type})
|
|
|
|
return comments
|
|
|
|
def _analyze_ruby(self, content: str, _file_path: str) -> dict[str, Any]:
|
|
"""
|
|
Analyze Ruby file using regex patterns.
|
|
|
|
Note: This is a simplified regex-based approach. For production,
|
|
consider using parser gem or tree-sitter-ruby.
|
|
|
|
Regex patterns based on Ruby language documentation:
|
|
https://ruby-doc.org/
|
|
"""
|
|
classes = []
|
|
functions = []
|
|
|
|
# Extract class definitions
|
|
class_pattern = r"class\s+(\w+)(?:\s*<\s*(\w+))?\s*$"
|
|
for match in re.finditer(class_pattern, content, re.MULTILINE):
|
|
class_name = match.group(1)
|
|
base_class = match.group(2)
|
|
|
|
base_classes = [base_class] if base_class else []
|
|
|
|
classes.append(
|
|
{
|
|
"name": class_name,
|
|
"base_classes": base_classes,
|
|
"methods": [], # Would need to parse class body
|
|
"docstring": None,
|
|
"line_number": content[: match.start()].count("\n") + 1,
|
|
}
|
|
)
|
|
|
|
# Extract method/function definitions
|
|
# Matches: def method_name(params)
|
|
func_pattern = r"def\s+(?:self\.)?(\w+[?!]?)\s*(?:\(([^)]*)\))?"
|
|
for match in re.finditer(func_pattern, content):
|
|
func_name = match.group(1)
|
|
params_str = match.group(2) if match.group(2) else ""
|
|
|
|
params = self._parse_ruby_parameters(params_str)
|
|
|
|
functions.append(
|
|
{
|
|
"name": func_name,
|
|
"parameters": params,
|
|
"return_type": None, # Ruby has no type annotations (usually)
|
|
"docstring": None,
|
|
"line_number": content[: match.start()].count("\n") + 1,
|
|
"is_async": False,
|
|
"is_method": False,
|
|
"decorators": [],
|
|
}
|
|
)
|
|
|
|
# Extract comments
|
|
comments = self._extract_ruby_comments(content)
|
|
|
|
return {"classes": classes, "functions": functions, "comments": comments}
|
|
|
|
def _parse_ruby_parameters(self, params_str: str) -> list[dict]:
|
|
"""Parse Ruby parameter string."""
|
|
params = []
|
|
|
|
if not params_str.strip():
|
|
return params
|
|
|
|
# Split by comma
|
|
param_list = [p.strip() for p in params_str.split(",")]
|
|
|
|
for param in param_list:
|
|
if not param:
|
|
continue
|
|
|
|
# Check for default value
|
|
default = None
|
|
if "=" in param:
|
|
name, default = param.split("=", 1)
|
|
name = name.strip()
|
|
default = default.strip()
|
|
else:
|
|
name = param
|
|
|
|
# Ruby doesn't have type hints in method signatures
|
|
params.append({"name": name, "type_hint": None, "default": default})
|
|
|
|
return params
|
|
|
|
def _extract_ruby_comments(self, content: str) -> list[dict]:
|
|
"""Extract Ruby comments (# style)."""
|
|
comments = []
|
|
|
|
for i, line in enumerate(content.splitlines(), 1):
|
|
stripped = line.strip()
|
|
|
|
# Ruby comments start with #
|
|
if stripped.startswith("#"):
|
|
comment_text = stripped[1:].strip()
|
|
comments.append({"line": i, "text": comment_text, "type": "inline"})
|
|
|
|
return comments
|
|
|
|
def _analyze_php(self, content: str, _file_path: str) -> dict[str, Any]:
|
|
"""
|
|
Analyze PHP file using regex patterns.
|
|
|
|
Note: This is a simplified regex-based approach. For production,
|
|
consider using nikic/PHP-Parser via subprocess or tree-sitter-php.
|
|
|
|
Regex patterns based on PHP language reference:
|
|
https://www.php.net/manual/en/langref.php
|
|
"""
|
|
classes = []
|
|
functions = []
|
|
|
|
# Extract class definitions
|
|
class_pattern = r"(?:abstract\s+)?class\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([\w\s,]+))?\s*\{"
|
|
for match in re.finditer(class_pattern, content):
|
|
class_name = match.group(1)
|
|
base_class = match.group(2)
|
|
interfaces_str = match.group(3)
|
|
|
|
base_classes = []
|
|
if base_class:
|
|
base_classes.append(base_class)
|
|
if interfaces_str:
|
|
base_classes.extend([i.strip() for i in interfaces_str.split(",")])
|
|
|
|
# Extract methods (simplified)
|
|
class_block_start = match.end()
|
|
brace_count = 1
|
|
class_block_end = class_block_start
|
|
for i, char in enumerate(content[class_block_start:], class_block_start):
|
|
if char == "{":
|
|
brace_count += 1
|
|
elif char == "}":
|
|
brace_count -= 1
|
|
if brace_count == 0:
|
|
class_block_end = i
|
|
break
|
|
|
|
if class_block_end > class_block_start:
|
|
class_body = content[class_block_start:class_block_end]
|
|
methods = self._extract_php_methods(class_body)
|
|
else:
|
|
methods = []
|
|
|
|
classes.append(
|
|
{
|
|
"name": class_name,
|
|
"base_classes": base_classes,
|
|
"methods": methods,
|
|
"docstring": None,
|
|
"line_number": content[: match.start()].count("\n") + 1,
|
|
}
|
|
)
|
|
|
|
# Extract function definitions
|
|
func_pattern = r"function\s+(\w+)\s*\(([^)]*)\)(?:\s*:\s*(\??\w+))?"
|
|
for match in re.finditer(func_pattern, content):
|
|
func_name = match.group(1)
|
|
params_str = match.group(2)
|
|
return_type = match.group(3)
|
|
|
|
params = self._parse_php_parameters(params_str)
|
|
|
|
functions.append(
|
|
{
|
|
"name": func_name,
|
|
"parameters": params,
|
|
"return_type": return_type,
|
|
"docstring": None,
|
|
"line_number": content[: match.start()].count("\n") + 1,
|
|
"is_async": False,
|
|
"is_method": False,
|
|
"decorators": [],
|
|
}
|
|
)
|
|
|
|
# Extract comments
|
|
comments = self._extract_php_comments(content)
|
|
|
|
return {"classes": classes, "functions": functions, "comments": comments}
|
|
|
|
def _extract_php_methods(self, class_body: str) -> list[dict]:
|
|
"""Extract PHP method signatures from class body."""
|
|
methods = []
|
|
|
|
method_pattern = r"(?:public|private|protected)?\s*(?:static|final)?\s*function\s+(\w+)\s*\(([^)]*)\)(?:\s*:\s*(\??\w+))?"
|
|
for match in re.finditer(method_pattern, class_body):
|
|
method_name = match.group(1)
|
|
params_str = match.group(2)
|
|
return_type = match.group(3)
|
|
|
|
params = self._parse_php_parameters(params_str)
|
|
|
|
methods.append(
|
|
{
|
|
"name": method_name,
|
|
"parameters": params,
|
|
"return_type": return_type,
|
|
"docstring": None,
|
|
"line_number": None,
|
|
"is_async": False,
|
|
"is_method": True,
|
|
"decorators": [],
|
|
}
|
|
)
|
|
|
|
return methods
|
|
|
|
def _parse_php_parameters(self, params_str: str) -> list[dict]:
|
|
"""Parse PHP parameter string."""
|
|
params = []
|
|
|
|
if not params_str.strip():
|
|
return params
|
|
|
|
# Split by comma
|
|
param_list = [p.strip() for p in params_str.split(",")]
|
|
|
|
for param in param_list:
|
|
if not param:
|
|
continue
|
|
|
|
# Check for default value
|
|
default = None
|
|
if "=" in param:
|
|
param, default = param.split("=", 1)
|
|
param = param.strip()
|
|
default = default.strip()
|
|
|
|
# PHP format: Type $name or just $name
|
|
parts = param.split()
|
|
if len(parts) >= 2:
|
|
param_type = parts[0]
|
|
param_name = parts[1]
|
|
else:
|
|
param_type = None
|
|
param_name = parts[0] if parts else "unknown"
|
|
|
|
# Remove $ from variable name
|
|
if param_name.startswith("$"):
|
|
param_name = param_name[1:]
|
|
|
|
params.append({"name": param_name, "type_hint": param_type, "default": default})
|
|
|
|
return params
|
|
|
|
def _extract_php_comments(self, content: str) -> list[dict]:
|
|
"""Extract PHP comments (// and /* */ and # and /** PHPDoc */)."""
|
|
comments = []
|
|
|
|
# Single-line comments (// and #)
|
|
for match in re.finditer(r"(?://|#)(.+)$", content, re.MULTILINE):
|
|
line_num = content[: match.start()].count("\n") + 1
|
|
comment_text = match.group(1).strip()
|
|
|
|
comments.append({"line": line_num, "text": comment_text, "type": "inline"})
|
|
|
|
# Multi-line and PHPDoc comments (/* */ and /** */)
|
|
for match in re.finditer(r"/\*\*?(.+?)\*/", content, re.DOTALL):
|
|
start_line = content[: match.start()].count("\n") + 1
|
|
comment_text = match.group(1).strip()
|
|
|
|
# Distinguish PHPDoc (starts with **)
|
|
comment_type = "doc" if match.group(0).startswith("/**") else "block"
|
|
|
|
comments.append({"line": start_line, "text": comment_text, "type": comment_type})
|
|
|
|
return comments
|
|
|
|
|
|
def _analyze_godot_scene(self, content: str, file_path: str) -> dict[str, Any]:
|
|
"""
|
|
Analyze Godot .tscn scene file.
|
|
|
|
Extracts:
|
|
- Node hierarchy
|
|
- Script attachments
|
|
- External resource dependencies
|
|
- Scene metadata
|
|
"""
|
|
nodes = []
|
|
resources = []
|
|
scripts = []
|
|
|
|
# Extract external resources
|
|
for match in re.finditer(r'\[ext_resource.*?type="(.+?)".*?path="(.+?)".*?id="(.+?)"\]', content):
|
|
res_type, path, res_id = match.groups()
|
|
resources.append({
|
|
"type": res_type,
|
|
"path": path,
|
|
"id": res_id
|
|
})
|
|
|
|
# Track scripts separately
|
|
if res_type == "Script":
|
|
scripts.append({
|
|
"path": path,
|
|
"id": res_id
|
|
})
|
|
|
|
# Extract nodes
|
|
for match in re.finditer(r'\[node name="(.+?)".*?type="(.+?)".*?\]', content):
|
|
node_name, node_type = match.groups()
|
|
|
|
# Check if node has a script attached
|
|
script_match = re.search(rf'\[node name="{re.escape(node_name)}".*?script = ExtResource\("(.+?)"\)', content, re.DOTALL)
|
|
attached_script = script_match.group(1) if script_match else None
|
|
|
|
nodes.append({
|
|
"name": node_name,
|
|
"type": node_type,
|
|
"script": attached_script
|
|
})
|
|
|
|
return {
|
|
"file": file_path,
|
|
"nodes": nodes,
|
|
"scripts": scripts,
|
|
"resources": resources,
|
|
"scene_metadata": {
|
|
"node_count": len(nodes),
|
|
"script_count": len(scripts),
|
|
"resource_count": len(resources)
|
|
}
|
|
}
|
|
|
|
def _analyze_godot_resource(self, content: str, file_path: str) -> dict[str, Any]:
|
|
"""
|
|
Analyze Godot .tres resource file.
|
|
|
|
Extracts:
|
|
- Resource type and class
|
|
- Script reference
|
|
- Properties and values
|
|
- External dependencies
|
|
"""
|
|
properties = []
|
|
resources = []
|
|
resource_type = None
|
|
script_class = None
|
|
script_path = None
|
|
|
|
# Extract resource header
|
|
header_match = re.search(r'\[gd_resource type="(.+?)"(?:\s+script_class="(.+?)")?\s+', content)
|
|
if header_match:
|
|
resource_type = header_match.group(1)
|
|
script_class = header_match.group(2)
|
|
|
|
# Extract external resources
|
|
for match in re.finditer(r'\[ext_resource.*?type="(.+?)".*?path="(.+?)".*?id="(.+?)"\]', content):
|
|
res_type, path, res_id = match.groups()
|
|
resources.append({
|
|
"type": res_type,
|
|
"path": path,
|
|
"id": res_id
|
|
})
|
|
|
|
if res_type == "Script":
|
|
script_path = path
|
|
|
|
# Extract properties from [resource] section
|
|
resource_section = re.search(r'\[resource\](.*?)(?:\n\[|$)', content, re.DOTALL)
|
|
if resource_section:
|
|
prop_text = resource_section.group(1)
|
|
|
|
for line in prop_text.strip().split('\n'):
|
|
if '=' in line:
|
|
key, value = line.split('=', 1)
|
|
properties.append({
|
|
"name": key.strip(),
|
|
"value": value.strip()
|
|
})
|
|
|
|
return {
|
|
"file": file_path,
|
|
"resource_type": resource_type,
|
|
"script_class": script_class,
|
|
"script_path": script_path,
|
|
"properties": properties,
|
|
"resources": resources,
|
|
"resource_metadata": {
|
|
"property_count": len(properties),
|
|
"dependency_count": len(resources)
|
|
}
|
|
}
|
|
|
|
def _analyze_godot_shader(self, content: str, file_path: str) -> dict[str, Any]:
|
|
"""
|
|
Analyze Godot .gdshader shader file.
|
|
|
|
Extracts:
|
|
- Shader type (spatial, canvas_item, particles, etc.)
|
|
- Uniforms (parameters)
|
|
- Functions
|
|
- Varying variables
|
|
"""
|
|
uniforms = []
|
|
functions = []
|
|
varyings = []
|
|
shader_type = None
|
|
|
|
# Extract shader type
|
|
type_match = re.search(r'shader_type\s+(\w+)', content)
|
|
if type_match:
|
|
shader_type = type_match.group(1)
|
|
|
|
# Extract uniforms
|
|
for match in re.finditer(r'uniform\s+(\w+)\s+(\w+)(?:\s*:\s*(.+?))?(?:\s*=\s*(.+?))?;', content):
|
|
uniform_type, name, hint, default = match.groups()
|
|
uniforms.append({
|
|
"name": name,
|
|
"type": uniform_type,
|
|
"hint": hint,
|
|
"default": default
|
|
})
|
|
|
|
# Extract varying variables
|
|
for match in re.finditer(r'varying\s+(\w+)\s+(\w+)', content):
|
|
var_type, name = match.groups()
|
|
varyings.append({
|
|
"name": name,
|
|
"type": var_type
|
|
})
|
|
|
|
# Extract functions
|
|
for match in re.finditer(r'void\s+(\w+)\s*\(([^)]*)\)', content):
|
|
func_name, params = match.groups()
|
|
functions.append({
|
|
"name": func_name,
|
|
"parameters": params.strip() if params else ""
|
|
})
|
|
|
|
return {
|
|
"file": file_path,
|
|
"shader_type": shader_type,
|
|
"uniforms": uniforms,
|
|
"varyings": varyings,
|
|
"functions": functions,
|
|
"shader_metadata": {
|
|
"uniform_count": len(uniforms),
|
|
"function_count": len(functions)
|
|
}
|
|
}
|
|
|
|
def _analyze_gdscript(self, content: str, file_path: str) -> dict[str, Any]:
|
|
"""
|
|
Analyze GDScript file using regex (Godot-specific syntax).
|
|
|
|
GDScript has Python-like syntax but with Godot-specific keywords:
|
|
- class_name MyClass extends Node
|
|
- func _ready(): (functions)
|
|
- signal my_signal(param)
|
|
- @export var speed: float = 100.0
|
|
- @onready var sprite = $Sprite2D
|
|
"""
|
|
classes = []
|
|
functions = []
|
|
signals = []
|
|
exports = []
|
|
|
|
# Extract class definition
|
|
class_match = re.search(r'class_name\s+(\w+)(?:\s+extends\s+(\w+))?', content)
|
|
if class_match:
|
|
class_name = class_match.group(1)
|
|
extends = class_match.group(2)
|
|
classes.append({
|
|
"name": class_name,
|
|
"bases": [extends] if extends else [],
|
|
"methods": [],
|
|
"line_number": content[: class_match.start()].count("\n") + 1
|
|
})
|
|
|
|
# Extract functions
|
|
for match in re.finditer(r'func\s+(\w+)\s*\(([^)]*)\)(?:\s*->\s*(\w+))?:', content):
|
|
func_name, params, return_type = match.groups()
|
|
|
|
# Parse parameters
|
|
param_list = []
|
|
if params.strip():
|
|
for param in params.split(','):
|
|
param = param.strip()
|
|
if ':' in param:
|
|
# param_name: Type = default
|
|
parts = param.split(':')
|
|
name = parts[0].strip()
|
|
type_and_default = parts[1].strip()
|
|
|
|
param_type = type_and_default.split('=')[0].strip() if '=' in type_and_default else type_and_default
|
|
default = type_and_default.split('=')[1].strip() if '=' in type_and_default else None
|
|
|
|
param_list.append({
|
|
"name": name,
|
|
"type_hint": param_type,
|
|
"default": default
|
|
})
|
|
else:
|
|
param_list.append({
|
|
"name": param,
|
|
"type_hint": None,
|
|
"default": None
|
|
})
|
|
|
|
functions.append({
|
|
"name": func_name,
|
|
"parameters": param_list,
|
|
"return_type": return_type,
|
|
"line_number": content[: match.start()].count("\n") + 1
|
|
})
|
|
|
|
# Extract signals with documentation
|
|
signal_connections = []
|
|
signal_emissions = []
|
|
|
|
for match in re.finditer(r'signal\s+(\w+)(?:\(([^)]*)\))?', content):
|
|
signal_name, params = match.groups()
|
|
line_number = content[: match.start()].count("\n") + 1
|
|
|
|
# Extract documentation comment above signal (## or #)
|
|
doc_comment = None
|
|
lines = content[:match.start()].split('\n')
|
|
if len(lines) >= 2:
|
|
prev_line = lines[-1].strip()
|
|
if prev_line.startswith('##') or prev_line.startswith('#'):
|
|
doc_comment = prev_line.lstrip('#').strip()
|
|
|
|
signals.append({
|
|
"name": signal_name,
|
|
"parameters": params if params else "",
|
|
"line_number": line_number,
|
|
"documentation": doc_comment
|
|
})
|
|
|
|
# Extract signal connections (.connect() calls)
|
|
for match in re.finditer(r'(\w+(?:\.\w+)*)\.connect\(([^)]+)\)', content):
|
|
signal_path, handler = match.groups()
|
|
signal_connections.append({
|
|
"signal": signal_path,
|
|
"handler": handler.strip(),
|
|
"line_number": content[: match.start()].count("\n") + 1
|
|
})
|
|
|
|
# Extract signal emissions (.emit() calls)
|
|
for match in re.finditer(r'(\w+(?:\.\w+)*)\.emit\(([^)]*)\)', content):
|
|
signal_path, args = match.groups()
|
|
signal_emissions.append({
|
|
"signal": signal_path,
|
|
"arguments": args.strip() if args else "",
|
|
"line_number": content[: match.start()].count("\n") + 1
|
|
})
|
|
|
|
# Extract @export variables
|
|
for match in re.finditer(r'@export(?:\(([^)]+)\))?\s+var\s+(\w+)(?:\s*:\s*(\w+))?(?:\s*=\s*(.+?))?(?:\n|$)', content):
|
|
hint, var_name, var_type, default = match.groups()
|
|
exports.append({
|
|
"name": var_name,
|
|
"type": var_type,
|
|
"default": default,
|
|
"export_hint": hint,
|
|
"line_number": content[: match.start()].count("\n") + 1
|
|
})
|
|
|
|
# Detect test framework
|
|
test_framework = None
|
|
test_functions = []
|
|
|
|
# GUT (Godot Unit Test) - extends "res://addons/gut/test.gd" or extends GutTest
|
|
if re.search(r'extends\s+["\']?res://addons/gut/test\.gd["\']?', content) or \
|
|
re.search(r'extends\s+GutTest', content):
|
|
test_framework = "GUT"
|
|
|
|
# Extract test functions (test_* functions)
|
|
for func in functions:
|
|
if func["name"].startswith("test_"):
|
|
test_functions.append(func)
|
|
|
|
# gdUnit4 - @suite class annotation
|
|
elif re.search(r'@suite', content):
|
|
test_framework = "gdUnit4"
|
|
|
|
# Extract test functions (@test annotated or test_* prefix)
|
|
for i, func in enumerate(functions):
|
|
# Check for @test annotation above function
|
|
func_line = func["line_number"]
|
|
lines = content.split('\n')
|
|
if func_line > 1:
|
|
prev_line = lines[func_line - 2].strip()
|
|
if prev_line.startswith('@test'):
|
|
test_functions.append(func)
|
|
elif func["name"].startswith("test_"):
|
|
test_functions.append(func)
|
|
|
|
# WAT (WizAds Test) - less common
|
|
elif re.search(r'extends\s+WAT\.Test', content):
|
|
test_framework = "WAT"
|
|
for func in functions:
|
|
if func["name"].startswith("test_"):
|
|
test_functions.append(func)
|
|
|
|
result = {
|
|
"file": file_path,
|
|
"classes": classes,
|
|
"functions": functions,
|
|
"signals": signals,
|
|
"exports": exports,
|
|
"signal_connections": signal_connections,
|
|
"signal_emissions": signal_emissions,
|
|
}
|
|
|
|
# Add test framework info if detected
|
|
if test_framework:
|
|
result["test_framework"] = test_framework
|
|
result["test_functions"] = test_functions
|
|
|
|
return result
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Test the analyzer
|
|
python_code = '''
|
|
class Node2D:
|
|
"""Base class for 2D nodes."""
|
|
|
|
def move_local_x(self, delta: float, snap: bool = False) -> None:
|
|
"""Move node along local X axis."""
|
|
pass
|
|
|
|
async def tween_position(self, target: tuple, duration: float = 1.0):
|
|
"""Animate position to target."""
|
|
pass
|
|
|
|
def create_sprite(texture: str) -> Node2D:
|
|
"""Create a new sprite node."""
|
|
return Node2D()
|
|
'''
|
|
|
|
analyzer = CodeAnalyzer(depth="deep")
|
|
result = analyzer.analyze_file("test.py", python_code, "Python")
|
|
|
|
print("Analysis Result:")
|
|
print(f"Classes: {len(result.get('classes', []))}")
|
|
print(f"Functions: {len(result.get('functions', []))}")
|
|
|
|
if result.get("classes"):
|
|
cls = result["classes"][0]
|
|
print(f"\nClass: {cls['name']}")
|
|
print(f" Methods: {len(cls['methods'])}")
|
|
for method in cls["methods"]:
|
|
params = ", ".join(
|
|
[
|
|
f"{p['name']}: {p['type_hint']}"
|
|
+ (f" = {p['default']}" if p.get("default") else "")
|
|
for p in method["parameters"]
|
|
]
|
|
)
|
|
print(f" {method['name']}({params}) -> {method['return_type']}")
|
|
|