Files
skill-seekers-reference/tests/test_excluded_dirs_config.py
yusyus 596b219599 fix: Resolve remaining 188 linting errors (249 total fixed)
Second batch of comprehensive linting fixes:

Unused Arguments/Variables (136 errors):
- ARG002/ARG001 (91 errors): Prefixed unused method/function arguments with '_'
  - Interface methods in adaptors (base.py, gemini.py, markdown.py)
  - AST analyzer methods maintaining signatures (code_analyzer.py)
  - Test fixtures and hooks (conftest.py)
  - Added noqa: ARG001/ARG002 for pytest hooks requiring exact names
- F841 (45 errors): Prefixed unused local variables with '_'
  - Tuple unpacking where some values aren't needed
  - Variables assigned but not referenced

Loop & Boolean Quality (28 errors):
- B007 (18 errors): Prefixed unused loop control variables with '_'
  - enumerate() loops where index not used
  - for-in loops where loop variable not referenced
- E712 (10 errors): Simplified boolean comparisons
  - Changed '== True' to direct boolean check
  - Changed '== False' to 'not' expression
  - Improved test readability

Code Quality (24 errors):
- SIM201 (4 errors): Already fixed in previous commit
- SIM118 (2 errors): Already fixed in previous commit
- E741 (4 errors): Already fixed in previous commit
- Config manager loop variable fix (1 error)

All Tests Passing:
- test_scraper_features.py: 42 passed
- test_integration.py: 51 passed
- test_architecture_scenarios.py: 11 passed
- test_real_world_fastmcp.py: 19 passed, 1 skipped

Note: Some SIM errors (nested if, multiple with) remain unfixed as they
would require non-trivial refactoring. Focus was on functional correctness.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-17 23:02:11 +03:00

351 lines
14 KiB
Python

"""Tests for configurable directory exclusions in GitHub scraper.
Tests Issue #203: Make EXCLUDED_DIRS configurable
"""
import unittest
from unittest.mock import patch
from skill_seekers.cli.github_scraper import EXCLUDED_DIRS, GitHubScraper
class TestExcludedDirsDefaults(unittest.TestCase):
"""Test default EXCLUDED_DIRS behavior (backward compatibility)."""
@patch("skill_seekers.cli.github_scraper.Github")
def test_defaults_when_no_config(self, _mock_github):
"""Test that default exclusions are used when no config provided."""
config = {"repo": "owner/repo"}
scraper = GitHubScraper(config)
# Should use default EXCLUDED_DIRS
self.assertEqual(scraper.excluded_dirs, EXCLUDED_DIRS)
@patch("skill_seekers.cli.github_scraper.Github")
def test_defaults_exclude_common_dirs(self, _mock_github):
"""Test that default exclusions work correctly."""
config = {"repo": "owner/repo"}
scraper = GitHubScraper(config)
# Test common directories are excluded
self.assertTrue(scraper.should_exclude_dir("venv"))
self.assertTrue(scraper.should_exclude_dir("node_modules"))
self.assertTrue(scraper.should_exclude_dir("__pycache__"))
self.assertTrue(scraper.should_exclude_dir(".git"))
self.assertTrue(scraper.should_exclude_dir("build"))
# Test normal directories are not excluded
self.assertFalse(scraper.should_exclude_dir("src"))
self.assertFalse(scraper.should_exclude_dir("tests"))
self.assertFalse(scraper.should_exclude_dir("docs"))
@patch("skill_seekers.cli.github_scraper.Github")
def test_dot_directories_always_excluded(self, _mock_github):
"""Test that directories starting with '.' are always excluded."""
config = {"repo": "owner/repo"}
scraper = GitHubScraper(config)
# Dot directories should be excluded (even if not in EXCLUDED_DIRS)
self.assertTrue(scraper.should_exclude_dir(".hidden"))
self.assertTrue(scraper.should_exclude_dir(".cache"))
self.assertTrue(scraper.should_exclude_dir(".vscode"))
class TestExcludedDirsAdditional(unittest.TestCase):
"""Test exclude_dirs_additional (extend mode)."""
@patch("skill_seekers.cli.github_scraper.Github")
def test_extend_with_additional_dirs(self, _mock_github):
"""Test adding custom exclusions to defaults."""
config = {
"repo": "owner/repo",
"exclude_dirs_additional": ["proprietary", "vendor", "third_party"],
}
scraper = GitHubScraper(config)
# Should include both defaults and additional
self.assertIn("venv", scraper.excluded_dirs) # Default
self.assertIn("node_modules", scraper.excluded_dirs) # Default
self.assertIn("proprietary", scraper.excluded_dirs) # Additional
self.assertIn("vendor", scraper.excluded_dirs) # Additional
self.assertIn("third_party", scraper.excluded_dirs) # Additional
# Verify total count
self.assertEqual(len(scraper.excluded_dirs), len(EXCLUDED_DIRS) + 3)
@patch("skill_seekers.cli.github_scraper.Github")
def test_extend_excludes_additional_dirs(self, _mock_github):
"""Test that additional directories are actually excluded."""
config = {"repo": "owner/repo", "exclude_dirs_additional": ["legacy", "deprecated"]}
scraper = GitHubScraper(config)
# Additional dirs should be excluded
self.assertTrue(scraper.should_exclude_dir("legacy"))
self.assertTrue(scraper.should_exclude_dir("deprecated"))
# Default dirs still excluded
self.assertTrue(scraper.should_exclude_dir("venv"))
self.assertTrue(scraper.should_exclude_dir("node_modules"))
# Normal dirs not excluded
self.assertFalse(scraper.should_exclude_dir("src"))
@patch("skill_seekers.cli.github_scraper.Github")
def test_extend_with_empty_list(self, _mock_github):
"""Test that empty additional list works correctly."""
config = {"repo": "owner/repo", "exclude_dirs_additional": []}
scraper = GitHubScraper(config)
# Should just have defaults
self.assertEqual(scraper.excluded_dirs, EXCLUDED_DIRS)
class TestExcludedDirsReplace(unittest.TestCase):
"""Test exclude_dirs (replace mode)."""
@patch("skill_seekers.cli.github_scraper.Github")
def test_replace_with_custom_list(self, _mock_github):
"""Test replacing default exclusions entirely."""
config = {"repo": "owner/repo", "exclude_dirs": ["node_modules", "custom_vendor"]}
scraper = GitHubScraper(config)
# Should ONLY have specified dirs
self.assertEqual(scraper.excluded_dirs, {"node_modules", "custom_vendor"})
self.assertEqual(len(scraper.excluded_dirs), 2)
@patch("skill_seekers.cli.github_scraper.Github")
def test_replace_excludes_only_specified_dirs(self, _mock_github):
"""Test that only specified directories are excluded in replace mode."""
config = {"repo": "owner/repo", "exclude_dirs": ["node_modules", ".git"]}
scraper = GitHubScraper(config)
# Specified dirs should be excluded
self.assertTrue(scraper.should_exclude_dir("node_modules"))
# Note: .git would be excluded anyway due to dot prefix
self.assertTrue(scraper.should_exclude_dir(".git"))
# Default dirs NOT in our list should NOT be excluded
self.assertFalse(scraper.should_exclude_dir("venv"))
self.assertFalse(scraper.should_exclude_dir("__pycache__"))
self.assertFalse(scraper.should_exclude_dir("build"))
# Normal dirs still not excluded
self.assertFalse(scraper.should_exclude_dir("src"))
@patch("skill_seekers.cli.github_scraper.Github")
def test_replace_with_empty_list(self, _mock_github):
"""Test that empty replace list allows all directories (except dot-prefixed)."""
config = {"repo": "owner/repo", "exclude_dirs": []}
scraper = GitHubScraper(config)
# No explicit exclusions
self.assertEqual(scraper.excluded_dirs, set())
# Nothing explicitly excluded
self.assertFalse(scraper.should_exclude_dir("venv"))
self.assertFalse(scraper.should_exclude_dir("node_modules"))
self.assertFalse(scraper.should_exclude_dir("build"))
# But dot dirs still excluded (different logic)
self.assertTrue(scraper.should_exclude_dir(".git"))
self.assertTrue(scraper.should_exclude_dir(".hidden"))
class TestExcludedDirsPrecedence(unittest.TestCase):
"""Test precedence when both options provided."""
@patch("skill_seekers.cli.github_scraper.Github")
def test_replace_takes_precedence_over_additional(self, _mock_github):
"""Test that exclude_dirs takes precedence over exclude_dirs_additional."""
config = {
"repo": "owner/repo",
"exclude_dirs": ["only", "these"], # Replace mode
"exclude_dirs_additional": ["ignored"], # Should be ignored
}
scraper = GitHubScraper(config)
# Should use replace mode (exclude_dirs), ignore additional
self.assertEqual(scraper.excluded_dirs, {"only", "these"})
self.assertNotIn("ignored", scraper.excluded_dirs)
self.assertNotIn("venv", scraper.excluded_dirs) # Defaults also ignored
class TestExcludedDirsEdgeCases(unittest.TestCase):
"""Test edge cases and error handling."""
@patch("skill_seekers.cli.github_scraper.Github")
def test_duplicate_exclusions_in_additional(self, _mock_github):
"""Test that duplicates in additional list are handled (set deduplication)."""
config = {
"repo": "owner/repo",
"exclude_dirs_additional": [
"venv",
"custom",
"venv",
], # venv is duplicate (default + listed)
}
scraper = GitHubScraper(config)
# Should deduplicate automatically (using set)
self.assertIn("venv", scraper.excluded_dirs)
self.assertIn("custom", scraper.excluded_dirs)
# Count should account for deduplication
self.assertEqual(
len(scraper.excluded_dirs),
len(EXCLUDED_DIRS) + 1, # Only 'custom' is truly additional
)
@patch("skill_seekers.cli.github_scraper.Github")
def test_case_sensitive_exclusions(self, _mock_github):
"""Test that exclusions are case-sensitive."""
config = {"repo": "owner/repo", "exclude_dirs": ["Venv", "NODE_MODULES"]}
scraper = GitHubScraper(config)
# Case-sensitive matching
self.assertTrue(scraper.should_exclude_dir("Venv"))
self.assertTrue(scraper.should_exclude_dir("NODE_MODULES"))
self.assertFalse(scraper.should_exclude_dir("venv")) # Different case
self.assertFalse(scraper.should_exclude_dir("node_modules")) # Different case
class TestExcludedDirsWithLocalRepo(unittest.TestCase):
"""Test exclude_dirs integration with local_repo_path."""
@patch("skill_seekers.cli.github_scraper.Github")
def test_exclude_dirs_with_local_repo_path(self, _mock_github):
"""Test that exclude_dirs works when local_repo_path is provided."""
config = {
"repo": "owner/repo",
"local_repo_path": "/tmp/test/repo",
"exclude_dirs_additional": ["proprietary", "internal"],
}
scraper = GitHubScraper(config)
# Should have both defaults and additional
self.assertIn("venv", scraper.excluded_dirs)
self.assertIn("proprietary", scraper.excluded_dirs)
self.assertIn("internal", scraper.excluded_dirs)
# Test exclusion works
self.assertTrue(scraper.should_exclude_dir("proprietary"))
self.assertTrue(scraper.should_exclude_dir("internal"))
self.assertTrue(scraper.should_exclude_dir("venv"))
@patch("skill_seekers.cli.github_scraper.Github")
def test_replace_mode_with_local_repo_path(self, _mock_github):
"""Test that replace mode works with local_repo_path."""
config = {
"repo": "owner/repo",
"local_repo_path": "/tmp/test/repo",
"exclude_dirs": ["only_this"],
}
scraper = GitHubScraper(config)
# Should ONLY have specified dir
self.assertEqual(scraper.excluded_dirs, {"only_this"})
self.assertTrue(scraper.should_exclude_dir("only_this"))
self.assertFalse(scraper.should_exclude_dir("venv"))
class TestExcludedDirsLogging(unittest.TestCase):
"""Test logging output for exclude_dirs configuration."""
@patch("skill_seekers.cli.github_scraper.Github")
@patch("skill_seekers.cli.github_scraper.logger")
def test_extend_mode_logs_info(self, mock_logger, _mock_github):
"""Test that extend mode logs INFO level message."""
config = {"repo": "owner/repo", "exclude_dirs_additional": ["custom1", "custom2"]}
_scraper = GitHubScraper(config)
# Should have logged INFO message
# Check that info was called with a message about adding custom exclusions
info_calls = [str(call) for call in mock_logger.info.call_args_list]
self.assertTrue(any("Added 2 custom directory exclusions" in call for call in info_calls))
@patch("skill_seekers.cli.github_scraper.Github")
@patch("skill_seekers.cli.github_scraper.logger")
def test_replace_mode_logs_warning(self, mock_logger, _mock_github):
"""Test that replace mode logs WARNING level message."""
config = {"repo": "owner/repo", "exclude_dirs": ["only", "these"]}
_scraper = GitHubScraper(config)
# Should have logged WARNING message
warning_calls = [str(call) for call in mock_logger.warning.call_args_list]
self.assertTrue(
any(
"Using custom directory exclusions" in call and "defaults overridden" in call
for call in warning_calls
)
)
@patch("skill_seekers.cli.github_scraper.Github")
@patch("skill_seekers.cli.github_scraper.logger")
def test_no_config_no_logging(self, mock_logger, _mock_github):
"""Test that default mode doesn't log exclude_dirs messages."""
config = {"repo": "owner/repo"}
_scraper = GitHubScraper(config)
# Should NOT have logged any exclude_dirs messages
info_calls = [str(call) for call in mock_logger.info.call_args_list]
warning_calls = [str(call) for call in mock_logger.warning.call_args_list]
# Filter for exclude_dirs related messages
exclude_info = [c for c in info_calls if "directory exclusion" in c]
exclude_warnings = [c for c in warning_calls if "directory exclusion" in c]
self.assertEqual(len(exclude_info), 0)
self.assertEqual(len(exclude_warnings), 0)
class TestExcludedDirsTypeHandling(unittest.TestCase):
"""Test type handling for exclude_dirs configuration."""
@patch("skill_seekers.cli.github_scraper.Github")
def test_exclude_dirs_with_tuple(self, _mock_github):
"""Test that tuples are converted to sets correctly."""
config = {
"repo": "owner/repo",
"exclude_dirs": ("node_modules", "build"), # Tuple instead of list
}
scraper = GitHubScraper(config)
# Should work with tuples (set() accepts tuples)
self.assertEqual(scraper.excluded_dirs, {"node_modules", "build"})
@patch("skill_seekers.cli.github_scraper.Github")
def test_exclude_dirs_additional_with_set(self, _mock_github):
"""Test that sets work correctly for exclude_dirs_additional."""
config = {
"repo": "owner/repo",
"exclude_dirs_additional": {"custom1", "custom2"}, # Set instead of list
}
scraper = GitHubScraper(config)
# Should work with sets
self.assertIn("custom1", scraper.excluded_dirs)
self.assertIn("custom2", scraper.excluded_dirs)
self.assertIn("venv", scraper.excluded_dirs) # Defaults still there
if __name__ == "__main__":
unittest.main()