feat(C2.1): Add .gitignore support to github_scraper for local repos
- Add pathspec import with graceful fallback - Add gitignore_spec attribute to GitHubScraper class - Implement _load_gitignore() method to parse .gitignore files - Update should_exclude_dir() to check .gitignore rules - Load .gitignore automatically in local repository mode - Handle directory patterns with and without trailing slash - Add 4 comprehensive tests for .gitignore functionality Closes #63 - C2.1 File Tree Walker with .gitignore support complete Features: - Loads .gitignore from local repository root - Respects .gitignore patterns for directory exclusion - Falls back gracefully when pathspec not installed - Works alongside existing hard-coded exclusions - Only active in local_repo_path mode (not GitHub API mode) Test coverage: - test_load_gitignore_exists: .gitignore parsing - test_load_gitignore_missing: Missing .gitignore handling - test_should_exclude_dir_with_gitignore: .gitignore exclusion - test_should_exclude_dir_default_exclusions: Existing exclusions still work Integration: - github_scraper.py now has same .gitignore support as codebase_scraper.py - Both tools use pathspec library for consistent behavior - Enables proper repository analysis respecting project .gitignore rules
This commit is contained in:
@@ -962,6 +962,100 @@ class TestSymlinkHandling(unittest.TestCase):
|
||||
self.assertIn('Major update', scraper.extracted_data['changelog'])
|
||||
|
||||
|
||||
class TestGitignoreSupport(unittest.TestCase):
|
||||
"""Test .gitignore support in github_scraper (C2.1)"""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test environment"""
|
||||
if not PYGITHUB_AVAILABLE:
|
||||
self.skipTest("PyGithub not installed")
|
||||
from skill_seekers.cli.github_scraper import GitHubScraper
|
||||
self.GitHubScraper = GitHubScraper
|
||||
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
self.repo_path = Path(self.temp_dir)
|
||||
|
||||
def tearDown(self):
|
||||
"""Clean up test environment"""
|
||||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||||
|
||||
def test_load_gitignore_exists(self):
|
||||
"""Test loading existing .gitignore file."""
|
||||
# Create .gitignore
|
||||
gitignore_path = self.repo_path / '.gitignore'
|
||||
gitignore_path.write_text('*.log\ntemp/\n__pycache__/')
|
||||
|
||||
config = {
|
||||
'repo': 'test/repo',
|
||||
'local_repo_path': str(self.repo_path)
|
||||
}
|
||||
|
||||
with patch('skill_seekers.cli.github_scraper.Github'):
|
||||
scraper = self.GitHubScraper(config)
|
||||
|
||||
# Should load .gitignore if pathspec available
|
||||
if hasattr(scraper, 'gitignore_spec'):
|
||||
# pathspec is installed
|
||||
self.assertIsNotNone(scraper.gitignore_spec)
|
||||
else:
|
||||
# pathspec not installed
|
||||
self.assertIsNone(scraper.gitignore_spec)
|
||||
|
||||
def test_load_gitignore_missing(self):
|
||||
"""Test behavior when no .gitignore exists."""
|
||||
config = {
|
||||
'repo': 'test/repo',
|
||||
'local_repo_path': str(self.repo_path)
|
||||
}
|
||||
|
||||
with patch('skill_seekers.cli.github_scraper.Github'):
|
||||
scraper = self.GitHubScraper(config)
|
||||
|
||||
# Should be None when no .gitignore found
|
||||
self.assertIsNone(scraper.gitignore_spec)
|
||||
|
||||
def test_should_exclude_dir_with_gitignore(self):
|
||||
"""Test directory exclusion with .gitignore rules."""
|
||||
# Create .gitignore
|
||||
gitignore_path = self.repo_path / '.gitignore'
|
||||
gitignore_path.write_text('temp/\nbuild/\n*.egg-info')
|
||||
|
||||
config = {
|
||||
'repo': 'test/repo',
|
||||
'local_repo_path': str(self.repo_path)
|
||||
}
|
||||
|
||||
with patch('skill_seekers.cli.github_scraper.Github'):
|
||||
scraper = self.GitHubScraper(config)
|
||||
|
||||
# Test .gitignore exclusion (if pathspec available)
|
||||
if scraper.gitignore_spec:
|
||||
self.assertTrue(scraper.should_exclude_dir('temp', 'temp'))
|
||||
self.assertTrue(scraper.should_exclude_dir('build', 'build'))
|
||||
|
||||
# Non-excluded dir should pass
|
||||
self.assertFalse(scraper.should_exclude_dir('src', 'src'))
|
||||
|
||||
def test_should_exclude_dir_default_exclusions(self):
|
||||
"""Test that default exclusions still work."""
|
||||
config = {
|
||||
'repo': 'test/repo',
|
||||
'local_repo_path': str(self.repo_path)
|
||||
}
|
||||
|
||||
with patch('skill_seekers.cli.github_scraper.Github'):
|
||||
scraper = self.GitHubScraper(config)
|
||||
|
||||
# Default exclusions should still work
|
||||
self.assertTrue(scraper.should_exclude_dir('node_modules'))
|
||||
self.assertTrue(scraper.should_exclude_dir('venv'))
|
||||
self.assertTrue(scraper.should_exclude_dir('__pycache__'))
|
||||
|
||||
# Normal directories should not be excluded
|
||||
self.assertFalse(scraper.should_exclude_dir('src'))
|
||||
self.assertFalse(scraper.should_exclude_dir('tests'))
|
||||
|
||||
|
||||
class TestErrorHandling(unittest.TestCase):
|
||||
"""Test error handling and edge cases"""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user