feat: Add 6 new languages to codebase analysis system (C#, Go, Rust, Java, Ruby, PHP)

Expands language support from 3 to 9 languages across entire codebase scraping system.

**New Languages Added:**
- C# (Unity/.NET support) - classes, methods, properties, async/await, XML docs
- Go - structs, functions, methods with receivers, multiple return values
- Rust - structs, functions, async functions, impl blocks
- Java - classes, methods, inheritance, interfaces, generics
- Ruby - classes, methods, inheritance, predicate methods
- PHP - classes, methods, namespaces, inheritance

**Code Analysis (code_analyzer.py):**
- Added 6 new language analyzers (~1000 lines)
- Regex-based parsers inspired by official language specs
- Extract classes, functions, signatures, async detection
- Comprehensive comment extraction for all languages

**Dependency Analysis (dependency_analyzer.py):**
- Added 6 new import extractors (~300 lines)
- C#: using statements, static using, aliases
- Go: import blocks, aliases
- Rust: use statements, curly braces, crate/super
- Java: import statements, static imports, wildcards
- Ruby: require, require_relative, load
- PHP: require/include, namespace use

**File Extensions (codebase_scraper.py):**
- Added mappings: .cs, .go, .rs, .java, .rb, .php

**Test Coverage:**
- Added 24 new tests for 6 languages (4 tests each)
- Added 19 dependency analyzer tests
- Added 6 language detection tests
- Total: 118 tests, 100% passing 

**Credits:**
- Regex patterns based on official language specifications:
  - Microsoft C# Language Specification
  - Go Language Specification
  - Rust Language Reference
  - Oracle Java Language Specification
  - Ruby Documentation
  - PHP Language Reference
- NetworkX for graph algorithms

**Issues Resolved:**
- Closes #166 (C# support request)
- Closes #140 (E1.7 MCP tool scrape_codebase)

**Test Results:**
- test_code_analyzer.py: 54 tests passing
- test_dependency_analyzer.py: 43 tests passing
- test_codebase_scraper.py: 21 tests passing
- Total execution: ~0.41s

🚀 Generated with Claude Code
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
yusyus
2026-01-02 21:28:21 +03:00
parent 0511486677
commit 3408315f40
6 changed files with 1978 additions and 14 deletions

View File

@@ -477,11 +477,12 @@ def calculate(x: int, y: int) -> int:
"""Test that unknown language returns empty dict."""
analyzer = CodeAnalyzer(depth='deep')
code = '''
func main() {
fmt.Println("Hello, Go!")
import Foundation
func greet(name: String) {
print("Hello, \\(name)!")
}
'''
result = analyzer.analyze_file('test.go', code, 'Go')
result = analyzer.analyze_file('test.swift', code, 'Swift')
# Unknown language should return empty dict
self.assertEqual(result, {})
@@ -741,6 +742,459 @@ def incomplete_func():
self.assertTrue(any('NOTE' in text for text in comment_texts))
class TestCSharpParsing(unittest.TestCase):
"""Tests for C# code analysis"""
def setUp(self):
self.analyzer = CodeAnalyzer(depth='deep')
def test_csharp_class_extraction(self):
"""Test C# class extraction with inheritance."""
code = '''
using System;
public class PlayerController : MonoBehaviour
{
private float speed = 5f;
}
'''
result = self.analyzer.analyze_file('test.cs', code, 'C#')
self.assertIn('classes', result)
self.assertEqual(len(result['classes']), 1)
cls = result['classes'][0]
self.assertEqual(cls['name'], 'PlayerController')
self.assertIn('MonoBehaviour', cls['base_classes'])
def test_csharp_method_extraction(self):
"""Test C# method extraction with parameters."""
code = '''
public class Calculator
{
public int Add(int a, int b)
{
return a + b;
}
}
'''
result = self.analyzer.analyze_file('test.cs', code, 'C#')
self.assertIn('functions', result)
self.assertEqual(len(result['functions']), 1)
method = result['functions'][0]
self.assertEqual(method['name'], 'Add')
self.assertEqual(len(method['parameters']), 2)
self.assertEqual(method['return_type'], 'int')
def test_csharp_property_extraction(self):
"""Test C# property extraction."""
code = '''
public class Player
{
public int Health { get; set; } = 100;
private string Name { get; }
}
'''
result = self.analyzer.analyze_file('test.cs', code, 'C#')
# Properties are extracted as part of class analysis
self.assertIn('classes', result)
cls = result['classes'][0]
self.assertEqual(cls['name'], 'Player')
def test_csharp_async_method(self):
"""Test C# async method detection."""
code = '''
public class DataLoader
{
public async Task<string> LoadDataAsync()
{
await Task.Delay(100);
return "data";
}
}
'''
result = self.analyzer.analyze_file('test.cs', code, 'C#')
self.assertIn('functions', result)
method = result['functions'][0]
self.assertEqual(method['name'], 'LoadDataAsync')
self.assertTrue(method['is_async'])
class TestGoParsing(unittest.TestCase):
"""Tests for Go code analysis"""
def setUp(self):
self.analyzer = CodeAnalyzer(depth='deep')
def test_go_function_extraction(self):
"""Test Go function extraction."""
code = '''
package main
func Add(a int, b int) int {
return a + b
}
'''
result = self.analyzer.analyze_file('test.go', code, 'Go')
self.assertIn('functions', result)
self.assertEqual(len(result['functions']), 1)
func = result['functions'][0]
self.assertEqual(func['name'], 'Add')
self.assertEqual(func['return_type'], 'int')
def test_go_method_with_receiver(self):
"""Test Go method with receiver."""
code = '''
package main
type Person struct {
Name string
}
func (p *Person) Greet() string {
return "Hello " + p.Name
}
'''
result = self.analyzer.analyze_file('test.go', code, 'Go')
self.assertIn('functions', result)
# Should extract method
method = next((f for f in result['functions'] if f['name'] == 'Greet'), None)
self.assertIsNotNone(method)
self.assertEqual(method['return_type'], 'string')
def test_go_struct_extraction(self):
"""Test Go struct extraction."""
code = '''
package main
type Rectangle struct {
Width float64
Height float64
}
'''
result = self.analyzer.analyze_file('test.go', code, 'Go')
self.assertIn('classes', result)
self.assertEqual(len(result['classes']), 1)
struct = result['classes'][0]
self.assertEqual(struct['name'], 'Rectangle')
def test_go_multiple_return_values(self):
"""Test Go function with multiple return values."""
code = '''
func Divide(a, b float64) (float64, error) {
if b == 0 {
return 0, errors.New("division by zero")
}
return a / b, nil
}
'''
result = self.analyzer.analyze_file('test.go', code, 'Go')
self.assertIn('functions', result)
func = result['functions'][0]
self.assertEqual(func['name'], 'Divide')
class TestRustParsing(unittest.TestCase):
"""Tests for Rust code analysis"""
def setUp(self):
self.analyzer = CodeAnalyzer(depth='deep')
def test_rust_function_extraction(self):
"""Test Rust function extraction."""
code = '''
pub fn add(a: i32, b: i32) -> i32 {
a + b
}
'''
result = self.analyzer.analyze_file('test.rs', code, 'Rust')
self.assertIn('functions', result)
self.assertEqual(len(result['functions']), 1)
func = result['functions'][0]
self.assertEqual(func['name'], 'add')
self.assertEqual(func['return_type'], 'i32')
def test_rust_struct_extraction(self):
"""Test Rust struct extraction."""
code = '''
pub struct Point {
x: f64,
y: f64,
}
'''
result = self.analyzer.analyze_file('test.rs', code, 'Rust')
self.assertIn('classes', result)
self.assertEqual(len(result['classes']), 1)
struct = result['classes'][0]
self.assertEqual(struct['name'], 'Point')
def test_rust_async_function(self):
"""Test Rust async function detection."""
code = '''
pub async fn fetch_data() -> Result<String, Error> {
Ok("data".to_string())
}
'''
result = self.analyzer.analyze_file('test.rs', code, 'Rust')
self.assertIn('functions', result)
func = result['functions'][0]
self.assertEqual(func['name'], 'fetch_data')
self.assertTrue(func['is_async'])
def test_rust_impl_block(self):
"""Test Rust impl block method extraction."""
code = '''
struct Circle {
radius: f64,
}
impl Circle {
pub fn area(&self) -> f64 {
std::f64::consts::PI * self.radius * self.radius
}
}
'''
result = self.analyzer.analyze_file('test.rs', code, 'Rust')
self.assertIn('classes', result)
self.assertIn('functions', result)
class TestJavaParsing(unittest.TestCase):
"""Tests for Java code analysis"""
def setUp(self):
self.analyzer = CodeAnalyzer(depth='deep')
def test_java_class_extraction(self):
"""Test Java class extraction with inheritance."""
code = '''
public class ArrayList extends AbstractList implements List {
private int size;
}
'''
result = self.analyzer.analyze_file('test.java', code, 'Java')
self.assertIn('classes', result)
self.assertEqual(len(result['classes']), 1)
cls = result['classes'][0]
self.assertEqual(cls['name'], 'ArrayList')
self.assertIn('AbstractList', cls['base_classes'])
def test_java_method_extraction(self):
"""Test Java method extraction."""
code = '''
public class Calculator {
public static int multiply(int a, int b) {
return a * b;
}
}
'''
result = self.analyzer.analyze_file('test.java', code, 'Java')
self.assertIn('functions', result)
self.assertEqual(len(result['functions']), 1)
method = result['functions'][0]
self.assertEqual(method['name'], 'multiply')
self.assertEqual(method['return_type'], 'int')
def test_java_interface_implementation(self):
"""Test Java interface implementation."""
code = '''
public class MyHandler implements EventHandler, Runnable {
public void run() {}
}
'''
result = self.analyzer.analyze_file('test.java', code, 'Java')
self.assertIn('classes', result)
cls = result['classes'][0]
self.assertEqual(cls['name'], 'MyHandler')
def test_java_generic_class(self):
"""Test Java generic class."""
code = '''
public class Box<T> {
private T value;
public T getValue() {
return value;
}
}
'''
result = self.analyzer.analyze_file('test.java', code, 'Java')
self.assertIn('classes', result)
self.assertIn('functions', result)
class TestRubyParsing(unittest.TestCase):
"""Tests for Ruby code analysis"""
def setUp(self):
self.analyzer = CodeAnalyzer(depth='deep')
def test_ruby_class_extraction(self):
"""Test Ruby class extraction."""
code = '''
class Person
def initialize(name)
@name = name
end
end
'''
result = self.analyzer.analyze_file('test.rb', code, 'Ruby')
self.assertIn('classes', result)
self.assertEqual(len(result['classes']), 1)
cls = result['classes'][0]
self.assertEqual(cls['name'], 'Person')
def test_ruby_method_extraction(self):
"""Test Ruby method extraction."""
code = '''
def greet(name)
puts "Hello, #{name}!"
end
'''
result = self.analyzer.analyze_file('test.rb', code, 'Ruby')
self.assertIn('functions', result)
self.assertEqual(len(result['functions']), 1)
method = result['functions'][0]
self.assertEqual(method['name'], 'greet')
def test_ruby_class_inheritance(self):
"""Test Ruby class inheritance."""
code = '''
class Dog < Animal
def bark
puts "Woof!"
end
end
'''
result = self.analyzer.analyze_file('test.rb', code, 'Ruby')
self.assertIn('classes', result)
cls = result['classes'][0]
self.assertEqual(cls['name'], 'Dog')
self.assertIn('Animal', cls['base_classes'])
def test_ruby_predicate_methods(self):
"""Test Ruby predicate methods (ending with ?)."""
code = '''
def empty?
@items.length == 0
end
'''
result = self.analyzer.analyze_file('test.rb', code, 'Ruby')
self.assertIn('functions', result)
method = result['functions'][0]
self.assertEqual(method['name'], 'empty?')
class TestPHPParsing(unittest.TestCase):
"""Tests for PHP code analysis"""
def setUp(self):
self.analyzer = CodeAnalyzer(depth='deep')
def test_php_class_extraction(self):
"""Test PHP class extraction."""
code = '''
<?php
class User {
private $name;
public function getName() {
return $this->name;
}
}
?>
'''
result = self.analyzer.analyze_file('test.php', code, 'PHP')
self.assertIn('classes', result)
self.assertEqual(len(result['classes']), 1)
cls = result['classes'][0]
self.assertEqual(cls['name'], 'User')
def test_php_method_extraction(self):
"""Test PHP method extraction."""
code = '''
<?php
function calculate($a, $b) {
return $a + $b;
}
?>
'''
result = self.analyzer.analyze_file('test.php', code, 'PHP')
self.assertIn('functions', result)
self.assertEqual(len(result['functions']), 1)
func = result['functions'][0]
self.assertEqual(func['name'], 'calculate')
def test_php_class_inheritance(self):
"""Test PHP class inheritance and interfaces."""
code = '''
<?php
class Rectangle extends Shape implements Drawable {
public function draw() {
// Implementation
}
}
?>
'''
result = self.analyzer.analyze_file('test.php', code, 'PHP')
self.assertIn('classes', result)
cls = result['classes'][0]
self.assertEqual(cls['name'], 'Rectangle')
self.assertIn('Shape', cls['base_classes'])
def test_php_namespace(self):
"""Test PHP namespace handling."""
code = '''
<?php
namespace App\\Models;
class Product {
public function getPrice() {
return 99.99;
}
}
?>
'''
result = self.analyzer.analyze_file('test.php', code, 'PHP')
self.assertIn('classes', result)
cls = result['classes'][0]
self.assertEqual(cls['name'], 'Product')
if __name__ == '__main__':
# Run tests with verbose output
unittest.main(verbosity=2)

View File

@@ -51,9 +51,33 @@ class TestLanguageDetection(unittest.TestCase):
self.assertEqual(detect_language(Path('test.h')), 'C++')
self.assertEqual(detect_language(Path('test.hpp')), 'C++')
def test_csharp_detection(self):
"""Test C# file detection."""
self.assertEqual(detect_language(Path('test.cs')), 'C#')
def test_go_detection(self):
"""Test Go file detection."""
self.assertEqual(detect_language(Path('test.go')), 'Go')
def test_rust_detection(self):
"""Test Rust file detection."""
self.assertEqual(detect_language(Path('test.rs')), 'Rust')
def test_java_detection(self):
"""Test Java file detection."""
self.assertEqual(detect_language(Path('test.java')), 'Java')
def test_ruby_detection(self):
"""Test Ruby file detection."""
self.assertEqual(detect_language(Path('test.rb')), 'Ruby')
def test_php_detection(self):
"""Test PHP file detection."""
self.assertEqual(detect_language(Path('test.php')), 'PHP')
def test_unknown_language(self):
"""Test unknown file extension."""
self.assertEqual(detect_language(Path('test.go')), 'Unknown')
self.assertEqual(detect_language(Path('test.swift')), 'Unknown')
self.assertEqual(detect_language(Path('test.txt')), 'Unknown')

View File

@@ -320,6 +320,239 @@ class TestGraphExport(unittest.TestCase):
self.assertEqual(stats['total_files'], 4)
class TestCSharpImportExtraction(unittest.TestCase):
"""Tests for C# using statement extraction."""
def setUp(self):
if not ANALYZER_AVAILABLE:
self.skipTest("dependency_analyzer not available")
self.analyzer = DependencyAnalyzer()
def test_simple_using(self):
"""Test simple using statement."""
code = "using System;\nusing System.Collections.Generic;"
deps = self.analyzer.analyze_file('test.cs', code, 'C#')
self.assertEqual(len(deps), 2)
self.assertEqual(deps[0].imported_module, 'System')
self.assertEqual(deps[0].import_type, 'using')
self.assertFalse(deps[0].is_relative)
def test_using_alias(self):
"""Test using statement with alias."""
code = "using Project = PC.MyCompany.Project;"
deps = self.analyzer.analyze_file('test.cs', code, 'C#')
self.assertEqual(len(deps), 1)
self.assertEqual(deps[0].imported_module, 'PC.MyCompany.Project')
def test_using_static(self):
"""Test static using."""
code = "using static System.Math;"
deps = self.analyzer.analyze_file('test.cs', code, 'C#')
self.assertEqual(len(deps), 1)
self.assertEqual(deps[0].imported_module, 'System.Math')
class TestGoImportExtraction(unittest.TestCase):
"""Tests for Go import statement extraction."""
def setUp(self):
if not ANALYZER_AVAILABLE:
self.skipTest("dependency_analyzer not available")
self.analyzer = DependencyAnalyzer()
def test_simple_import(self):
"""Test simple import statement."""
code = 'import "fmt"\nimport "os"'
deps = self.analyzer.analyze_file('test.go', code, 'Go')
self.assertEqual(len(deps), 2)
self.assertEqual(deps[0].imported_module, 'fmt')
self.assertEqual(deps[0].import_type, 'import')
self.assertFalse(deps[0].is_relative)
def test_import_with_alias(self):
"""Test import with alias."""
code = 'import f "fmt"'
deps = self.analyzer.analyze_file('test.go', code, 'Go')
self.assertEqual(len(deps), 1)
self.assertEqual(deps[0].imported_module, 'fmt')
def test_multi_import_block(self):
"""Test multi-import block."""
code = '''import (
"fmt"
"os"
"io"
)'''
deps = self.analyzer.analyze_file('test.go', code, 'Go')
self.assertEqual(len(deps), 3)
modules = [dep.imported_module for dep in deps]
self.assertIn('fmt', modules)
self.assertIn('os', modules)
self.assertIn('io', modules)
class TestRustImportExtraction(unittest.TestCase):
"""Tests for Rust use statement extraction."""
def setUp(self):
if not ANALYZER_AVAILABLE:
self.skipTest("dependency_analyzer not available")
self.analyzer = DependencyAnalyzer()
def test_simple_use(self):
"""Test simple use statement."""
code = "use std::collections::HashMap;\nuse std::io;"
deps = self.analyzer.analyze_file('test.rs', code, 'Rust')
self.assertEqual(len(deps), 2)
self.assertEqual(deps[0].imported_module, 'std::collections::HashMap')
self.assertEqual(deps[0].import_type, 'use')
self.assertFalse(deps[0].is_relative)
def test_use_crate(self):
"""Test use with crate keyword."""
code = "use crate::module::Item;"
deps = self.analyzer.analyze_file('test.rs', code, 'Rust')
self.assertEqual(len(deps), 1)
self.assertEqual(deps[0].imported_module, 'crate::module::Item')
self.assertFalse(deps[0].is_relative)
def test_use_super(self):
"""Test use with super keyword."""
code = "use super::sibling;"
deps = self.analyzer.analyze_file('test.rs', code, 'Rust')
self.assertEqual(len(deps), 1)
self.assertTrue(deps[0].is_relative)
def test_use_curly_braces(self):
"""Test use with curly braces."""
code = "use std::{io, fs};"
deps = self.analyzer.analyze_file('test.rs', code, 'Rust')
self.assertEqual(len(deps), 2)
modules = [dep.imported_module for dep in deps]
self.assertIn('std::io', modules)
self.assertIn('std::fs', modules)
class TestJavaImportExtraction(unittest.TestCase):
"""Tests for Java import statement extraction."""
def setUp(self):
if not ANALYZER_AVAILABLE:
self.skipTest("dependency_analyzer not available")
self.analyzer = DependencyAnalyzer()
def test_simple_import(self):
"""Test simple import statement."""
code = "import java.util.List;\nimport java.io.File;"
deps = self.analyzer.analyze_file('test.java', code, 'Java')
self.assertEqual(len(deps), 2)
self.assertEqual(deps[0].imported_module, 'java.util.List')
self.assertEqual(deps[0].import_type, 'import')
self.assertFalse(deps[0].is_relative)
def test_wildcard_import(self):
"""Test wildcard import."""
code = "import java.util.*;"
deps = self.analyzer.analyze_file('test.java', code, 'Java')
self.assertEqual(len(deps), 1)
self.assertEqual(deps[0].imported_module, 'java.util.*')
def test_static_import(self):
"""Test static import."""
code = "import static java.lang.Math.PI;"
deps = self.analyzer.analyze_file('test.java', code, 'Java')
self.assertEqual(len(deps), 1)
self.assertEqual(deps[0].imported_module, 'java.lang.Math.PI')
class TestRubyImportExtraction(unittest.TestCase):
"""Tests for Ruby require statement extraction."""
def setUp(self):
if not ANALYZER_AVAILABLE:
self.skipTest("dependency_analyzer not available")
self.analyzer = DependencyAnalyzer()
def test_simple_require(self):
"""Test simple require statement."""
code = "require 'json'\nrequire 'net/http'"
deps = self.analyzer.analyze_file('test.rb', code, 'Ruby')
self.assertEqual(len(deps), 2)
self.assertEqual(deps[0].imported_module, 'json')
self.assertEqual(deps[0].import_type, 'require')
self.assertFalse(deps[0].is_relative)
def test_require_relative(self):
"""Test require_relative statement."""
code = "require_relative 'helper'\nrequire_relative '../utils'"
deps = self.analyzer.analyze_file('test.rb', code, 'Ruby')
self.assertEqual(len(deps), 2)
self.assertEqual(deps[0].imported_module, 'helper')
self.assertEqual(deps[0].import_type, 'require_relative')
self.assertTrue(deps[0].is_relative)
def test_load_statement(self):
"""Test load statement."""
code = "load 'script.rb'"
deps = self.analyzer.analyze_file('test.rb', code, 'Ruby')
self.assertEqual(len(deps), 1)
self.assertEqual(deps[0].import_type, 'load')
self.assertTrue(deps[0].is_relative)
class TestPHPImportExtraction(unittest.TestCase):
"""Tests for PHP require/include/use extraction."""
def setUp(self):
if not ANALYZER_AVAILABLE:
self.skipTest("dependency_analyzer not available")
self.analyzer = DependencyAnalyzer()
def test_require_statement(self):
"""Test require statement."""
code = "<?php\nrequire 'config.php';\nrequire_once 'database.php';"
deps = self.analyzer.analyze_file('test.php', code, 'PHP')
self.assertEqual(len(deps), 2)
self.assertEqual(deps[0].imported_module, 'config.php')
self.assertEqual(deps[0].import_type, 'require')
self.assertTrue(deps[0].is_relative)
def test_include_statement(self):
"""Test include statement."""
code = "<?php\ninclude 'header.php';\ninclude_once 'footer.php';"
deps = self.analyzer.analyze_file('test.php', code, 'PHP')
self.assertEqual(len(deps), 2)
self.assertEqual(deps[0].import_type, 'include')
def test_namespace_use(self):
"""Test namespace use statement."""
code = "<?php\nuse App\\Models\\User;\nuse Illuminate\\Support\\Facades\\DB;"
deps = self.analyzer.analyze_file('test.php', code, 'PHP')
self.assertEqual(len(deps), 2)
self.assertEqual(deps[0].imported_module, 'App\\Models\\User')
self.assertEqual(deps[0].import_type, 'use')
self.assertFalse(deps[0].is_relative)
class TestEdgeCases(unittest.TestCase):
"""Tests for edge cases and error handling."""
@@ -336,8 +569,8 @@ class TestEdgeCases(unittest.TestCase):
def test_unsupported_language(self):
"""Test handling of unsupported language."""
code = "package main"
deps = self.analyzer.analyze_file('test.go', code, 'Go')
code = "BEGIN { print $0 }"
deps = self.analyzer.analyze_file('test.awk', code, 'AWK')
self.assertEqual(len(deps), 0)