Adds 7 additional test cases for Issue #203 configurable EXCLUDED_DIRS: Test Coverage Additions: - Local repository integration (2 tests) * exclude_dirs with local_repo_path * Replace mode with local_repo_path - Logging verification (3 tests) * INFO level for extend mode * WARNING level for replace mode * No logging for default mode - Type handling (2 tests) * Tuple support for exclude_dirs * Set support for exclude_dirs_additional Total Test Coverage: - 19 tests for exclude_dirs feature (all passing) - 427 total tests passing (up from 420) - 54% code coverage for github_scraper.py All tests pass with no failures. 32 skipped tests are expected: - 3 macOS-specific tests (platform limitation) - 29 MCP tests (pass individually, skip in full suite due to pytest quirk) Closes #203
376 lines
14 KiB
Python
376 lines
14 KiB
Python
"""Tests for configurable directory exclusions in GitHub scraper.
|
|
|
|
Tests Issue #203: Make EXCLUDED_DIRS configurable
|
|
"""
|
|
|
|
import unittest
|
|
from unittest.mock import patch, Mock
|
|
from skill_seekers.cli.github_scraper import GitHubScraper, EXCLUDED_DIRS
|
|
|
|
|
|
class TestExcludedDirsDefaults(unittest.TestCase):
|
|
"""Test default EXCLUDED_DIRS behavior (backward compatibility)."""
|
|
|
|
@patch('skill_seekers.cli.github_scraper.Github')
|
|
def test_defaults_when_no_config(self, mock_github):
|
|
"""Test that default exclusions are used when no config provided."""
|
|
config = {
|
|
'repo': 'owner/repo'
|
|
}
|
|
|
|
scraper = GitHubScraper(config)
|
|
|
|
# Should use default EXCLUDED_DIRS
|
|
self.assertEqual(scraper.excluded_dirs, EXCLUDED_DIRS)
|
|
|
|
@patch('skill_seekers.cli.github_scraper.Github')
|
|
def test_defaults_exclude_common_dirs(self, mock_github):
|
|
"""Test that default exclusions work correctly."""
|
|
config = {
|
|
'repo': 'owner/repo'
|
|
}
|
|
|
|
scraper = GitHubScraper(config)
|
|
|
|
# Test common directories are excluded
|
|
self.assertTrue(scraper.should_exclude_dir('venv'))
|
|
self.assertTrue(scraper.should_exclude_dir('node_modules'))
|
|
self.assertTrue(scraper.should_exclude_dir('__pycache__'))
|
|
self.assertTrue(scraper.should_exclude_dir('.git'))
|
|
self.assertTrue(scraper.should_exclude_dir('build'))
|
|
|
|
# Test normal directories are not excluded
|
|
self.assertFalse(scraper.should_exclude_dir('src'))
|
|
self.assertFalse(scraper.should_exclude_dir('tests'))
|
|
self.assertFalse(scraper.should_exclude_dir('docs'))
|
|
|
|
@patch('skill_seekers.cli.github_scraper.Github')
|
|
def test_dot_directories_always_excluded(self, mock_github):
|
|
"""Test that directories starting with '.' are always excluded."""
|
|
config = {
|
|
'repo': 'owner/repo'
|
|
}
|
|
|
|
scraper = GitHubScraper(config)
|
|
|
|
# Dot directories should be excluded (even if not in EXCLUDED_DIRS)
|
|
self.assertTrue(scraper.should_exclude_dir('.hidden'))
|
|
self.assertTrue(scraper.should_exclude_dir('.cache'))
|
|
self.assertTrue(scraper.should_exclude_dir('.vscode'))
|
|
|
|
|
|
class TestExcludedDirsAdditional(unittest.TestCase):
|
|
"""Test exclude_dirs_additional (extend mode)."""
|
|
|
|
@patch('skill_seekers.cli.github_scraper.Github')
|
|
def test_extend_with_additional_dirs(self, mock_github):
|
|
"""Test adding custom exclusions to defaults."""
|
|
config = {
|
|
'repo': 'owner/repo',
|
|
'exclude_dirs_additional': ['proprietary', 'vendor', 'third_party']
|
|
}
|
|
|
|
scraper = GitHubScraper(config)
|
|
|
|
# Should include both defaults and additional
|
|
self.assertIn('venv', scraper.excluded_dirs) # Default
|
|
self.assertIn('node_modules', scraper.excluded_dirs) # Default
|
|
self.assertIn('proprietary', scraper.excluded_dirs) # Additional
|
|
self.assertIn('vendor', scraper.excluded_dirs) # Additional
|
|
self.assertIn('third_party', scraper.excluded_dirs) # Additional
|
|
|
|
# Verify total count
|
|
self.assertEqual(
|
|
len(scraper.excluded_dirs),
|
|
len(EXCLUDED_DIRS) + 3
|
|
)
|
|
|
|
@patch('skill_seekers.cli.github_scraper.Github')
|
|
def test_extend_excludes_additional_dirs(self, mock_github):
|
|
"""Test that additional directories are actually excluded."""
|
|
config = {
|
|
'repo': 'owner/repo',
|
|
'exclude_dirs_additional': ['legacy', 'deprecated']
|
|
}
|
|
|
|
scraper = GitHubScraper(config)
|
|
|
|
# Additional dirs should be excluded
|
|
self.assertTrue(scraper.should_exclude_dir('legacy'))
|
|
self.assertTrue(scraper.should_exclude_dir('deprecated'))
|
|
|
|
# Default dirs still excluded
|
|
self.assertTrue(scraper.should_exclude_dir('venv'))
|
|
self.assertTrue(scraper.should_exclude_dir('node_modules'))
|
|
|
|
# Normal dirs not excluded
|
|
self.assertFalse(scraper.should_exclude_dir('src'))
|
|
|
|
@patch('skill_seekers.cli.github_scraper.Github')
|
|
def test_extend_with_empty_list(self, mock_github):
|
|
"""Test that empty additional list works correctly."""
|
|
config = {
|
|
'repo': 'owner/repo',
|
|
'exclude_dirs_additional': []
|
|
}
|
|
|
|
scraper = GitHubScraper(config)
|
|
|
|
# Should just have defaults
|
|
self.assertEqual(scraper.excluded_dirs, EXCLUDED_DIRS)
|
|
|
|
|
|
class TestExcludedDirsReplace(unittest.TestCase):
|
|
"""Test exclude_dirs (replace mode)."""
|
|
|
|
@patch('skill_seekers.cli.github_scraper.Github')
|
|
def test_replace_with_custom_list(self, mock_github):
|
|
"""Test replacing default exclusions entirely."""
|
|
config = {
|
|
'repo': 'owner/repo',
|
|
'exclude_dirs': ['node_modules', 'custom_vendor']
|
|
}
|
|
|
|
scraper = GitHubScraper(config)
|
|
|
|
# Should ONLY have specified dirs
|
|
self.assertEqual(scraper.excluded_dirs, {'node_modules', 'custom_vendor'})
|
|
self.assertEqual(len(scraper.excluded_dirs), 2)
|
|
|
|
@patch('skill_seekers.cli.github_scraper.Github')
|
|
def test_replace_excludes_only_specified_dirs(self, mock_github):
|
|
"""Test that only specified directories are excluded in replace mode."""
|
|
config = {
|
|
'repo': 'owner/repo',
|
|
'exclude_dirs': ['node_modules', '.git']
|
|
}
|
|
|
|
scraper = GitHubScraper(config)
|
|
|
|
# Specified dirs should be excluded
|
|
self.assertTrue(scraper.should_exclude_dir('node_modules'))
|
|
# Note: .git would be excluded anyway due to dot prefix
|
|
self.assertTrue(scraper.should_exclude_dir('.git'))
|
|
|
|
# Default dirs NOT in our list should NOT be excluded
|
|
self.assertFalse(scraper.should_exclude_dir('venv'))
|
|
self.assertFalse(scraper.should_exclude_dir('__pycache__'))
|
|
self.assertFalse(scraper.should_exclude_dir('build'))
|
|
|
|
# Normal dirs still not excluded
|
|
self.assertFalse(scraper.should_exclude_dir('src'))
|
|
|
|
@patch('skill_seekers.cli.github_scraper.Github')
|
|
def test_replace_with_empty_list(self, mock_github):
|
|
"""Test that empty replace list allows all directories (except dot-prefixed)."""
|
|
config = {
|
|
'repo': 'owner/repo',
|
|
'exclude_dirs': []
|
|
}
|
|
|
|
scraper = GitHubScraper(config)
|
|
|
|
# No explicit exclusions
|
|
self.assertEqual(scraper.excluded_dirs, set())
|
|
|
|
# Nothing explicitly excluded
|
|
self.assertFalse(scraper.should_exclude_dir('venv'))
|
|
self.assertFalse(scraper.should_exclude_dir('node_modules'))
|
|
self.assertFalse(scraper.should_exclude_dir('build'))
|
|
|
|
# But dot dirs still excluded (different logic)
|
|
self.assertTrue(scraper.should_exclude_dir('.git'))
|
|
self.assertTrue(scraper.should_exclude_dir('.hidden'))
|
|
|
|
|
|
class TestExcludedDirsPrecedence(unittest.TestCase):
|
|
"""Test precedence when both options provided."""
|
|
|
|
@patch('skill_seekers.cli.github_scraper.Github')
|
|
def test_replace_takes_precedence_over_additional(self, mock_github):
|
|
"""Test that exclude_dirs takes precedence over exclude_dirs_additional."""
|
|
config = {
|
|
'repo': 'owner/repo',
|
|
'exclude_dirs': ['only', 'these'], # Replace mode
|
|
'exclude_dirs_additional': ['ignored'] # Should be ignored
|
|
}
|
|
|
|
scraper = GitHubScraper(config)
|
|
|
|
# Should use replace mode (exclude_dirs), ignore additional
|
|
self.assertEqual(scraper.excluded_dirs, {'only', 'these'})
|
|
self.assertNotIn('ignored', scraper.excluded_dirs)
|
|
self.assertNotIn('venv', scraper.excluded_dirs) # Defaults also ignored
|
|
|
|
|
|
class TestExcludedDirsEdgeCases(unittest.TestCase):
|
|
"""Test edge cases and error handling."""
|
|
|
|
@patch('skill_seekers.cli.github_scraper.Github')
|
|
def test_duplicate_exclusions_in_additional(self, mock_github):
|
|
"""Test that duplicates in additional list are handled (set deduplication)."""
|
|
config = {
|
|
'repo': 'owner/repo',
|
|
'exclude_dirs_additional': ['venv', 'custom', 'venv'] # venv is duplicate (default + listed)
|
|
}
|
|
|
|
scraper = GitHubScraper(config)
|
|
|
|
# Should deduplicate automatically (using set)
|
|
self.assertIn('venv', scraper.excluded_dirs)
|
|
self.assertIn('custom', scraper.excluded_dirs)
|
|
# Count should account for deduplication
|
|
self.assertEqual(
|
|
len(scraper.excluded_dirs),
|
|
len(EXCLUDED_DIRS) + 1 # Only 'custom' is truly additional
|
|
)
|
|
|
|
@patch('skill_seekers.cli.github_scraper.Github')
|
|
def test_case_sensitive_exclusions(self, mock_github):
|
|
"""Test that exclusions are case-sensitive."""
|
|
config = {
|
|
'repo': 'owner/repo',
|
|
'exclude_dirs': ['Venv', 'NODE_MODULES']
|
|
}
|
|
|
|
scraper = GitHubScraper(config)
|
|
|
|
# Case-sensitive matching
|
|
self.assertTrue(scraper.should_exclude_dir('Venv'))
|
|
self.assertTrue(scraper.should_exclude_dir('NODE_MODULES'))
|
|
self.assertFalse(scraper.should_exclude_dir('venv')) # Different case
|
|
self.assertFalse(scraper.should_exclude_dir('node_modules')) # Different case
|
|
|
|
|
|
class TestExcludedDirsWithLocalRepo(unittest.TestCase):
|
|
"""Test exclude_dirs integration with local_repo_path."""
|
|
|
|
@patch('skill_seekers.cli.github_scraper.Github')
|
|
def test_exclude_dirs_with_local_repo_path(self, mock_github):
|
|
"""Test that exclude_dirs works when local_repo_path is provided."""
|
|
config = {
|
|
'repo': 'owner/repo',
|
|
'local_repo_path': '/tmp/test/repo',
|
|
'exclude_dirs_additional': ['proprietary', 'internal']
|
|
}
|
|
|
|
scraper = GitHubScraper(config)
|
|
|
|
# Should have both defaults and additional
|
|
self.assertIn('venv', scraper.excluded_dirs)
|
|
self.assertIn('proprietary', scraper.excluded_dirs)
|
|
self.assertIn('internal', scraper.excluded_dirs)
|
|
|
|
# Test exclusion works
|
|
self.assertTrue(scraper.should_exclude_dir('proprietary'))
|
|
self.assertTrue(scraper.should_exclude_dir('internal'))
|
|
self.assertTrue(scraper.should_exclude_dir('venv'))
|
|
|
|
@patch('skill_seekers.cli.github_scraper.Github')
|
|
def test_replace_mode_with_local_repo_path(self, mock_github):
|
|
"""Test that replace mode works with local_repo_path."""
|
|
config = {
|
|
'repo': 'owner/repo',
|
|
'local_repo_path': '/tmp/test/repo',
|
|
'exclude_dirs': ['only_this']
|
|
}
|
|
|
|
scraper = GitHubScraper(config)
|
|
|
|
# Should ONLY have specified dir
|
|
self.assertEqual(scraper.excluded_dirs, {'only_this'})
|
|
self.assertTrue(scraper.should_exclude_dir('only_this'))
|
|
self.assertFalse(scraper.should_exclude_dir('venv'))
|
|
|
|
|
|
class TestExcludedDirsLogging(unittest.TestCase):
|
|
"""Test logging output for exclude_dirs configuration."""
|
|
|
|
@patch('skill_seekers.cli.github_scraper.Github')
|
|
@patch('skill_seekers.cli.github_scraper.logger')
|
|
def test_extend_mode_logs_info(self, mock_logger, mock_github):
|
|
"""Test that extend mode logs INFO level message."""
|
|
config = {
|
|
'repo': 'owner/repo',
|
|
'exclude_dirs_additional': ['custom1', 'custom2']
|
|
}
|
|
|
|
scraper = GitHubScraper(config)
|
|
|
|
# Should have logged INFO message
|
|
# Check that info was called with a message about adding custom exclusions
|
|
info_calls = [str(call) for call in mock_logger.info.call_args_list]
|
|
self.assertTrue(any('Added 2 custom directory exclusions' in call for call in info_calls))
|
|
|
|
@patch('skill_seekers.cli.github_scraper.Github')
|
|
@patch('skill_seekers.cli.github_scraper.logger')
|
|
def test_replace_mode_logs_warning(self, mock_logger, mock_github):
|
|
"""Test that replace mode logs WARNING level message."""
|
|
config = {
|
|
'repo': 'owner/repo',
|
|
'exclude_dirs': ['only', 'these']
|
|
}
|
|
|
|
scraper = GitHubScraper(config)
|
|
|
|
# Should have logged WARNING message
|
|
warning_calls = [str(call) for call in mock_logger.warning.call_args_list]
|
|
self.assertTrue(any('Using custom directory exclusions' in call and 'defaults overridden' in call for call in warning_calls))
|
|
|
|
@patch('skill_seekers.cli.github_scraper.Github')
|
|
@patch('skill_seekers.cli.github_scraper.logger')
|
|
def test_no_config_no_logging(self, mock_logger, mock_github):
|
|
"""Test that default mode doesn't log exclude_dirs messages."""
|
|
config = {
|
|
'repo': 'owner/repo'
|
|
}
|
|
|
|
scraper = GitHubScraper(config)
|
|
|
|
# Should NOT have logged any exclude_dirs messages
|
|
info_calls = [str(call) for call in mock_logger.info.call_args_list]
|
|
warning_calls = [str(call) for call in mock_logger.warning.call_args_list]
|
|
|
|
# Filter for exclude_dirs related messages
|
|
exclude_info = [c for c in info_calls if 'directory exclusion' in c]
|
|
exclude_warnings = [c for c in warning_calls if 'directory exclusion' in c]
|
|
|
|
self.assertEqual(len(exclude_info), 0)
|
|
self.assertEqual(len(exclude_warnings), 0)
|
|
|
|
|
|
class TestExcludedDirsTypeHandling(unittest.TestCase):
|
|
"""Test type handling for exclude_dirs configuration."""
|
|
|
|
@patch('skill_seekers.cli.github_scraper.Github')
|
|
def test_exclude_dirs_with_tuple(self, mock_github):
|
|
"""Test that tuples are converted to sets correctly."""
|
|
config = {
|
|
'repo': 'owner/repo',
|
|
'exclude_dirs': ('node_modules', 'build') # Tuple instead of list
|
|
}
|
|
|
|
scraper = GitHubScraper(config)
|
|
|
|
# Should work with tuples (set() accepts tuples)
|
|
self.assertEqual(scraper.excluded_dirs, {'node_modules', 'build'})
|
|
|
|
@patch('skill_seekers.cli.github_scraper.Github')
|
|
def test_exclude_dirs_additional_with_set(self, mock_github):
|
|
"""Test that sets work correctly for exclude_dirs_additional."""
|
|
config = {
|
|
'repo': 'owner/repo',
|
|
'exclude_dirs_additional': {'custom1', 'custom2'} # Set instead of list
|
|
}
|
|
|
|
scraper = GitHubScraper(config)
|
|
|
|
# Should work with sets
|
|
self.assertIn('custom1', scraper.excluded_dirs)
|
|
self.assertIn('custom2', scraper.excluded_dirs)
|
|
self.assertIn('venv', scraper.excluded_dirs) # Defaults still there
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|