#!/usr/bin/env python3 """ Tests for codebase_scraper.py - Standalone codebase analysis CLI. Test Coverage: - Language detection - Directory exclusion - File walking - .gitignore loading """ import os import shutil import sys import tempfile import unittest from pathlib import Path # Add src to path sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src")) from skill_seekers.cli.codebase_scraper import ( DEFAULT_EXCLUDED_DIRS, detect_language, load_gitignore, should_exclude_dir, walk_directory, ) class TestLanguageDetection(unittest.TestCase): """Tests for language detection from file extensions""" def test_python_detection(self): """Test Python file detection.""" self.assertEqual(detect_language(Path("test.py")), "Python") def test_javascript_detection(self): """Test JavaScript file detection.""" self.assertEqual(detect_language(Path("test.js")), "JavaScript") self.assertEqual(detect_language(Path("test.jsx")), "JavaScript") def test_typescript_detection(self): """Test TypeScript file detection.""" self.assertEqual(detect_language(Path("test.ts")), "TypeScript") self.assertEqual(detect_language(Path("test.tsx")), "TypeScript") def test_cpp_detection(self): """Test C++ file detection.""" self.assertEqual(detect_language(Path("test.cpp")), "C++") self.assertEqual(detect_language(Path("test.h")), "C++") self.assertEqual(detect_language(Path("test.hpp")), "C++") def test_csharp_detection(self): """Test C# file detection.""" self.assertEqual(detect_language(Path("test.cs")), "C#") def test_go_detection(self): """Test Go file detection.""" self.assertEqual(detect_language(Path("test.go")), "Go") def test_rust_detection(self): """Test Rust file detection.""" self.assertEqual(detect_language(Path("test.rs")), "Rust") def test_java_detection(self): """Test Java file detection.""" self.assertEqual(detect_language(Path("test.java")), "Java") def test_ruby_detection(self): """Test Ruby file detection.""" self.assertEqual(detect_language(Path("test.rb")), "Ruby") def test_php_detection(self): """Test PHP file detection.""" self.assertEqual(detect_language(Path("test.php")), "PHP") def test_unknown_language(self): """Test unknown file extension.""" self.assertEqual(detect_language(Path("test.swift")), "Unknown") self.assertEqual(detect_language(Path("test.txt")), "Unknown") class TestDirectoryExclusion(unittest.TestCase): """Tests for directory exclusion logic""" def test_node_modules_excluded(self): """Test that node_modules is excluded.""" self.assertTrue(should_exclude_dir("node_modules", DEFAULT_EXCLUDED_DIRS)) def test_venv_excluded(self): """Test that venv is excluded.""" self.assertTrue(should_exclude_dir("venv", DEFAULT_EXCLUDED_DIRS)) def test_git_excluded(self): """Test that .git is excluded.""" self.assertTrue(should_exclude_dir(".git", DEFAULT_EXCLUDED_DIRS)) def test_normal_dir_not_excluded(self): """Test that normal directories are not excluded.""" self.assertFalse(should_exclude_dir("src", DEFAULT_EXCLUDED_DIRS)) self.assertFalse(should_exclude_dir("tests", DEFAULT_EXCLUDED_DIRS)) class TestDirectoryWalking(unittest.TestCase): """Tests for directory walking functionality""" def setUp(self): """Set up test environment""" self.temp_dir = tempfile.mkdtemp() self.root = Path(self.temp_dir) def tearDown(self): """Clean up test environment""" shutil.rmtree(self.temp_dir, ignore_errors=True) def test_walk_empty_directory(self): """Test walking empty directory.""" files = walk_directory(self.root) self.assertEqual(len(files), 0) def test_walk_with_python_files(self): """Test walking directory with Python files.""" # Create test files (self.root / "test1.py").write_text('print("test")') (self.root / "test2.py").write_text('print("test2")') (self.root / "readme.txt").write_text("readme") files = walk_directory(self.root) # Should only find Python files self.assertEqual(len(files), 2) self.assertTrue(all(f.suffix == ".py" for f in files)) def test_walk_excludes_node_modules(self): """Test that node_modules directory is excluded.""" # Create test files (self.root / "test.py").write_text("test") # Create node_modules with files node_modules = self.root / "node_modules" node_modules.mkdir() (node_modules / "package.js").write_text("test") files = walk_directory(self.root) # Should only find root test.py, not package.js self.assertEqual(len(files), 1) self.assertEqual(files[0].name, "test.py") def test_walk_with_subdirectories(self): """Test walking nested directory structure.""" # Create nested structure src_dir = self.root / "src" src_dir.mkdir() (src_dir / "module.py").write_text("test") tests_dir = self.root / "tests" tests_dir.mkdir() (tests_dir / "test_module.py").write_text("test") files = walk_directory(self.root) # Should find both files self.assertEqual(len(files), 2) filenames = [f.name for f in files] self.assertIn("module.py", filenames) self.assertIn("test_module.py", filenames) class TestGitignoreLoading(unittest.TestCase): """Tests for .gitignore loading""" def setUp(self): """Set up test environment""" self.temp_dir = tempfile.mkdtemp() self.root = Path(self.temp_dir) def tearDown(self): """Clean up test environment""" shutil.rmtree(self.temp_dir, ignore_errors=True) def test_no_gitignore(self): """Test behavior when no .gitignore exists.""" spec = load_gitignore(self.root) # Should return None when no .gitignore found self.assertIsNone(spec) def test_load_gitignore(self): """Test loading valid .gitignore file.""" # Create .gitignore gitignore_path = self.root / ".gitignore" gitignore_path.write_text("*.log\ntemp/\n") spec = load_gitignore(self.root) # Should successfully load pathspec (if pathspec is installed) # If pathspec is not installed, spec will be None if spec is not None: # Verify it's a PathSpec object self.assertIsNotNone(spec) if __name__ == "__main__": # Run tests with verbose output unittest.main(verbosity=2)