Expands language support from 3 to 9 languages across entire codebase scraping system. **New Languages Added:** - C# (Unity/.NET support) - classes, methods, properties, async/await, XML docs - Go - structs, functions, methods with receivers, multiple return values - Rust - structs, functions, async functions, impl blocks - Java - classes, methods, inheritance, interfaces, generics - Ruby - classes, methods, inheritance, predicate methods - PHP - classes, methods, namespaces, inheritance **Code Analysis (code_analyzer.py):** - Added 6 new language analyzers (~1000 lines) - Regex-based parsers inspired by official language specs - Extract classes, functions, signatures, async detection - Comprehensive comment extraction for all languages **Dependency Analysis (dependency_analyzer.py):** - Added 6 new import extractors (~300 lines) - C#: using statements, static using, aliases - Go: import blocks, aliases - Rust: use statements, curly braces, crate/super - Java: import statements, static imports, wildcards - Ruby: require, require_relative, load - PHP: require/include, namespace use **File Extensions (codebase_scraper.py):** - Added mappings: .cs, .go, .rs, .java, .rb, .php **Test Coverage:** - Added 24 new tests for 6 languages (4 tests each) - Added 19 dependency analyzer tests - Added 6 language detection tests - Total: 118 tests, 100% passing ✅ **Credits:** - Regex patterns based on official language specifications: - Microsoft C# Language Specification - Go Language Specification - Rust Language Reference - Oracle Java Language Specification - Ruby Documentation - PHP Language Reference - NetworkX for graph algorithms **Issues Resolved:** - Closes #166 (C# support request) - Closes #140 (E1.7 MCP tool scrape_codebase) **Test Results:** - test_code_analyzer.py: 54 tests passing - test_dependency_analyzer.py: 43 tests passing - test_codebase_scraper.py: 21 tests passing - Total execution: ~0.41s 🚀 Generated with Claude Code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
207 lines
6.7 KiB
Python
207 lines
6.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Tests for codebase_scraper.py - Standalone codebase analysis CLI.
|
|
|
|
Test Coverage:
|
|
- Language detection
|
|
- Directory exclusion
|
|
- File walking
|
|
- .gitignore loading
|
|
"""
|
|
|
|
import unittest
|
|
import tempfile
|
|
import shutil
|
|
from pathlib import Path
|
|
import sys
|
|
import os
|
|
|
|
# Add src to path
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
|
|
|
|
from skill_seekers.cli.codebase_scraper import (
|
|
detect_language,
|
|
should_exclude_dir,
|
|
walk_directory,
|
|
load_gitignore,
|
|
DEFAULT_EXCLUDED_DIRS
|
|
)
|
|
|
|
|
|
class TestLanguageDetection(unittest.TestCase):
|
|
"""Tests for language detection from file extensions"""
|
|
|
|
def test_python_detection(self):
|
|
"""Test Python file detection."""
|
|
self.assertEqual(detect_language(Path('test.py')), 'Python')
|
|
|
|
def test_javascript_detection(self):
|
|
"""Test JavaScript file detection."""
|
|
self.assertEqual(detect_language(Path('test.js')), 'JavaScript')
|
|
self.assertEqual(detect_language(Path('test.jsx')), 'JavaScript')
|
|
|
|
def test_typescript_detection(self):
|
|
"""Test TypeScript file detection."""
|
|
self.assertEqual(detect_language(Path('test.ts')), 'TypeScript')
|
|
self.assertEqual(detect_language(Path('test.tsx')), 'TypeScript')
|
|
|
|
def test_cpp_detection(self):
|
|
"""Test C++ file detection."""
|
|
self.assertEqual(detect_language(Path('test.cpp')), 'C++')
|
|
self.assertEqual(detect_language(Path('test.h')), 'C++')
|
|
self.assertEqual(detect_language(Path('test.hpp')), 'C++')
|
|
|
|
def test_csharp_detection(self):
|
|
"""Test C# file detection."""
|
|
self.assertEqual(detect_language(Path('test.cs')), 'C#')
|
|
|
|
def test_go_detection(self):
|
|
"""Test Go file detection."""
|
|
self.assertEqual(detect_language(Path('test.go')), 'Go')
|
|
|
|
def test_rust_detection(self):
|
|
"""Test Rust file detection."""
|
|
self.assertEqual(detect_language(Path('test.rs')), 'Rust')
|
|
|
|
def test_java_detection(self):
|
|
"""Test Java file detection."""
|
|
self.assertEqual(detect_language(Path('test.java')), 'Java')
|
|
|
|
def test_ruby_detection(self):
|
|
"""Test Ruby file detection."""
|
|
self.assertEqual(detect_language(Path('test.rb')), 'Ruby')
|
|
|
|
def test_php_detection(self):
|
|
"""Test PHP file detection."""
|
|
self.assertEqual(detect_language(Path('test.php')), 'PHP')
|
|
|
|
def test_unknown_language(self):
|
|
"""Test unknown file extension."""
|
|
self.assertEqual(detect_language(Path('test.swift')), 'Unknown')
|
|
self.assertEqual(detect_language(Path('test.txt')), 'Unknown')
|
|
|
|
|
|
class TestDirectoryExclusion(unittest.TestCase):
|
|
"""Tests for directory exclusion logic"""
|
|
|
|
def test_node_modules_excluded(self):
|
|
"""Test that node_modules is excluded."""
|
|
self.assertTrue(should_exclude_dir('node_modules', DEFAULT_EXCLUDED_DIRS))
|
|
|
|
def test_venv_excluded(self):
|
|
"""Test that venv is excluded."""
|
|
self.assertTrue(should_exclude_dir('venv', DEFAULT_EXCLUDED_DIRS))
|
|
|
|
def test_git_excluded(self):
|
|
"""Test that .git is excluded."""
|
|
self.assertTrue(should_exclude_dir('.git', DEFAULT_EXCLUDED_DIRS))
|
|
|
|
def test_normal_dir_not_excluded(self):
|
|
"""Test that normal directories are not excluded."""
|
|
self.assertFalse(should_exclude_dir('src', DEFAULT_EXCLUDED_DIRS))
|
|
self.assertFalse(should_exclude_dir('tests', DEFAULT_EXCLUDED_DIRS))
|
|
|
|
|
|
class TestDirectoryWalking(unittest.TestCase):
|
|
"""Tests for directory walking functionality"""
|
|
|
|
def setUp(self):
|
|
"""Set up test environment"""
|
|
self.temp_dir = tempfile.mkdtemp()
|
|
self.root = Path(self.temp_dir)
|
|
|
|
def tearDown(self):
|
|
"""Clean up test environment"""
|
|
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
|
|
def test_walk_empty_directory(self):
|
|
"""Test walking empty directory."""
|
|
files = walk_directory(self.root)
|
|
self.assertEqual(len(files), 0)
|
|
|
|
def test_walk_with_python_files(self):
|
|
"""Test walking directory with Python files."""
|
|
# Create test files
|
|
(self.root / 'test1.py').write_text('print("test")')
|
|
(self.root / 'test2.py').write_text('print("test2")')
|
|
(self.root / 'readme.txt').write_text('readme')
|
|
|
|
files = walk_directory(self.root)
|
|
|
|
# Should only find Python files
|
|
self.assertEqual(len(files), 2)
|
|
self.assertTrue(all(f.suffix == '.py' for f in files))
|
|
|
|
def test_walk_excludes_node_modules(self):
|
|
"""Test that node_modules directory is excluded."""
|
|
# Create test files
|
|
(self.root / 'test.py').write_text('test')
|
|
|
|
# Create node_modules with files
|
|
node_modules = self.root / 'node_modules'
|
|
node_modules.mkdir()
|
|
(node_modules / 'package.js').write_text('test')
|
|
|
|
files = walk_directory(self.root)
|
|
|
|
# Should only find root test.py, not package.js
|
|
self.assertEqual(len(files), 1)
|
|
self.assertEqual(files[0].name, 'test.py')
|
|
|
|
def test_walk_with_subdirectories(self):
|
|
"""Test walking nested directory structure."""
|
|
# Create nested structure
|
|
src_dir = self.root / 'src'
|
|
src_dir.mkdir()
|
|
(src_dir / 'module.py').write_text('test')
|
|
|
|
tests_dir = self.root / 'tests'
|
|
tests_dir.mkdir()
|
|
(tests_dir / 'test_module.py').write_text('test')
|
|
|
|
files = walk_directory(self.root)
|
|
|
|
# Should find both files
|
|
self.assertEqual(len(files), 2)
|
|
filenames = [f.name for f in files]
|
|
self.assertIn('module.py', filenames)
|
|
self.assertIn('test_module.py', filenames)
|
|
|
|
|
|
class TestGitignoreLoading(unittest.TestCase):
|
|
"""Tests for .gitignore loading"""
|
|
|
|
def setUp(self):
|
|
"""Set up test environment"""
|
|
self.temp_dir = tempfile.mkdtemp()
|
|
self.root = Path(self.temp_dir)
|
|
|
|
def tearDown(self):
|
|
"""Clean up test environment"""
|
|
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
|
|
def test_no_gitignore(self):
|
|
"""Test behavior when no .gitignore exists."""
|
|
spec = load_gitignore(self.root)
|
|
# Should return None when no .gitignore found
|
|
self.assertIsNone(spec)
|
|
|
|
def test_load_gitignore(self):
|
|
"""Test loading valid .gitignore file."""
|
|
# Create .gitignore
|
|
gitignore_path = self.root / '.gitignore'
|
|
gitignore_path.write_text('*.log\ntemp/\n')
|
|
|
|
spec = load_gitignore(self.root)
|
|
|
|
# Should successfully load pathspec (if pathspec is installed)
|
|
# If pathspec is not installed, spec will be None
|
|
if spec is not None:
|
|
# Verify it's a PathSpec object
|
|
self.assertIsNotNone(spec)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# Run tests with verbose output
|
|
unittest.main(verbosity=2)
|