"""Tests for configurable directory exclusions in GitHub scraper. Tests Issue #203: Make EXCLUDED_DIRS configurable """ import unittest from unittest.mock import patch from skill_seekers.cli.github_scraper import EXCLUDED_DIRS, GitHubScraper class TestExcludedDirsDefaults(unittest.TestCase): """Test default EXCLUDED_DIRS behavior (backward compatibility).""" @patch("skill_seekers.cli.github_scraper.Github") def test_defaults_when_no_config(self, _mock_github): """Test that default exclusions are used when no config provided.""" config = {"repo": "owner/repo"} scraper = GitHubScraper(config) # Should use default EXCLUDED_DIRS self.assertEqual(scraper.excluded_dirs, EXCLUDED_DIRS) @patch("skill_seekers.cli.github_scraper.Github") def test_defaults_exclude_common_dirs(self, _mock_github): """Test that default exclusions work correctly.""" config = {"repo": "owner/repo"} scraper = GitHubScraper(config) # Test common directories are excluded self.assertTrue(scraper.should_exclude_dir("venv")) self.assertTrue(scraper.should_exclude_dir("node_modules")) self.assertTrue(scraper.should_exclude_dir("__pycache__")) self.assertTrue(scraper.should_exclude_dir(".git")) self.assertTrue(scraper.should_exclude_dir("build")) # Test normal directories are not excluded self.assertFalse(scraper.should_exclude_dir("src")) self.assertFalse(scraper.should_exclude_dir("tests")) self.assertFalse(scraper.should_exclude_dir("docs")) @patch("skill_seekers.cli.github_scraper.Github") def test_dot_directories_always_excluded(self, _mock_github): """Test that directories starting with '.' are always excluded.""" config = {"repo": "owner/repo"} scraper = GitHubScraper(config) # Dot directories should be excluded (even if not in EXCLUDED_DIRS) self.assertTrue(scraper.should_exclude_dir(".hidden")) self.assertTrue(scraper.should_exclude_dir(".cache")) self.assertTrue(scraper.should_exclude_dir(".vscode")) class TestExcludedDirsAdditional(unittest.TestCase): """Test exclude_dirs_additional (extend mode).""" @patch("skill_seekers.cli.github_scraper.Github") def test_extend_with_additional_dirs(self, _mock_github): """Test adding custom exclusions to defaults.""" config = { "repo": "owner/repo", "exclude_dirs_additional": ["proprietary", "vendor", "third_party"], } scraper = GitHubScraper(config) # Should include both defaults and additional self.assertIn("venv", scraper.excluded_dirs) # Default self.assertIn("node_modules", scraper.excluded_dirs) # Default self.assertIn("proprietary", scraper.excluded_dirs) # Additional self.assertIn("vendor", scraper.excluded_dirs) # Additional self.assertIn("third_party", scraper.excluded_dirs) # Additional # Verify total count self.assertEqual(len(scraper.excluded_dirs), len(EXCLUDED_DIRS) + 3) @patch("skill_seekers.cli.github_scraper.Github") def test_extend_excludes_additional_dirs(self, _mock_github): """Test that additional directories are actually excluded.""" config = {"repo": "owner/repo", "exclude_dirs_additional": ["legacy", "deprecated"]} scraper = GitHubScraper(config) # Additional dirs should be excluded self.assertTrue(scraper.should_exclude_dir("legacy")) self.assertTrue(scraper.should_exclude_dir("deprecated")) # Default dirs still excluded self.assertTrue(scraper.should_exclude_dir("venv")) self.assertTrue(scraper.should_exclude_dir("node_modules")) # Normal dirs not excluded self.assertFalse(scraper.should_exclude_dir("src")) @patch("skill_seekers.cli.github_scraper.Github") def test_extend_with_empty_list(self, _mock_github): """Test that empty additional list works correctly.""" config = {"repo": "owner/repo", "exclude_dirs_additional": []} scraper = GitHubScraper(config) # Should just have defaults self.assertEqual(scraper.excluded_dirs, EXCLUDED_DIRS) class TestExcludedDirsReplace(unittest.TestCase): """Test exclude_dirs (replace mode).""" @patch("skill_seekers.cli.github_scraper.Github") def test_replace_with_custom_list(self, _mock_github): """Test replacing default exclusions entirely.""" config = {"repo": "owner/repo", "exclude_dirs": ["node_modules", "custom_vendor"]} scraper = GitHubScraper(config) # Should ONLY have specified dirs self.assertEqual(scraper.excluded_dirs, {"node_modules", "custom_vendor"}) self.assertEqual(len(scraper.excluded_dirs), 2) @patch("skill_seekers.cli.github_scraper.Github") def test_replace_excludes_only_specified_dirs(self, _mock_github): """Test that only specified directories are excluded in replace mode.""" config = {"repo": "owner/repo", "exclude_dirs": ["node_modules", ".git"]} scraper = GitHubScraper(config) # Specified dirs should be excluded self.assertTrue(scraper.should_exclude_dir("node_modules")) # Note: .git would be excluded anyway due to dot prefix self.assertTrue(scraper.should_exclude_dir(".git")) # Default dirs NOT in our list should NOT be excluded self.assertFalse(scraper.should_exclude_dir("venv")) self.assertFalse(scraper.should_exclude_dir("__pycache__")) self.assertFalse(scraper.should_exclude_dir("build")) # Normal dirs still not excluded self.assertFalse(scraper.should_exclude_dir("src")) @patch("skill_seekers.cli.github_scraper.Github") def test_replace_with_empty_list(self, _mock_github): """Test that empty replace list allows all directories (except dot-prefixed).""" config = {"repo": "owner/repo", "exclude_dirs": []} scraper = GitHubScraper(config) # No explicit exclusions self.assertEqual(scraper.excluded_dirs, set()) # Nothing explicitly excluded self.assertFalse(scraper.should_exclude_dir("venv")) self.assertFalse(scraper.should_exclude_dir("node_modules")) self.assertFalse(scraper.should_exclude_dir("build")) # But dot dirs still excluded (different logic) self.assertTrue(scraper.should_exclude_dir(".git")) self.assertTrue(scraper.should_exclude_dir(".hidden")) class TestExcludedDirsPrecedence(unittest.TestCase): """Test precedence when both options provided.""" @patch("skill_seekers.cli.github_scraper.Github") def test_replace_takes_precedence_over_additional(self, _mock_github): """Test that exclude_dirs takes precedence over exclude_dirs_additional.""" config = { "repo": "owner/repo", "exclude_dirs": ["only", "these"], # Replace mode "exclude_dirs_additional": ["ignored"], # Should be ignored } scraper = GitHubScraper(config) # Should use replace mode (exclude_dirs), ignore additional self.assertEqual(scraper.excluded_dirs, {"only", "these"}) self.assertNotIn("ignored", scraper.excluded_dirs) self.assertNotIn("venv", scraper.excluded_dirs) # Defaults also ignored class TestExcludedDirsEdgeCases(unittest.TestCase): """Test edge cases and error handling.""" @patch("skill_seekers.cli.github_scraper.Github") def test_duplicate_exclusions_in_additional(self, _mock_github): """Test that duplicates in additional list are handled (set deduplication).""" config = { "repo": "owner/repo", "exclude_dirs_additional": [ "venv", "custom", "venv", ], # venv is duplicate (default + listed) } scraper = GitHubScraper(config) # Should deduplicate automatically (using set) self.assertIn("venv", scraper.excluded_dirs) self.assertIn("custom", scraper.excluded_dirs) # Count should account for deduplication self.assertEqual( len(scraper.excluded_dirs), len(EXCLUDED_DIRS) + 1, # Only 'custom' is truly additional ) @patch("skill_seekers.cli.github_scraper.Github") def test_case_sensitive_exclusions(self, _mock_github): """Test that exclusions are case-sensitive.""" config = {"repo": "owner/repo", "exclude_dirs": ["Venv", "NODE_MODULES"]} scraper = GitHubScraper(config) # Case-sensitive matching self.assertTrue(scraper.should_exclude_dir("Venv")) self.assertTrue(scraper.should_exclude_dir("NODE_MODULES")) self.assertFalse(scraper.should_exclude_dir("venv")) # Different case self.assertFalse(scraper.should_exclude_dir("node_modules")) # Different case class TestExcludedDirsWithLocalRepo(unittest.TestCase): """Test exclude_dirs integration with local_repo_path.""" @patch("skill_seekers.cli.github_scraper.Github") def test_exclude_dirs_with_local_repo_path(self, _mock_github): """Test that exclude_dirs works when local_repo_path is provided.""" config = { "repo": "owner/repo", "local_repo_path": "/tmp/test/repo", "exclude_dirs_additional": ["proprietary", "internal"], } scraper = GitHubScraper(config) # Should have both defaults and additional self.assertIn("venv", scraper.excluded_dirs) self.assertIn("proprietary", scraper.excluded_dirs) self.assertIn("internal", scraper.excluded_dirs) # Test exclusion works self.assertTrue(scraper.should_exclude_dir("proprietary")) self.assertTrue(scraper.should_exclude_dir("internal")) self.assertTrue(scraper.should_exclude_dir("venv")) @patch("skill_seekers.cli.github_scraper.Github") def test_replace_mode_with_local_repo_path(self, _mock_github): """Test that replace mode works with local_repo_path.""" config = { "repo": "owner/repo", "local_repo_path": "/tmp/test/repo", "exclude_dirs": ["only_this"], } scraper = GitHubScraper(config) # Should ONLY have specified dir self.assertEqual(scraper.excluded_dirs, {"only_this"}) self.assertTrue(scraper.should_exclude_dir("only_this")) self.assertFalse(scraper.should_exclude_dir("venv")) class TestExcludedDirsLogging(unittest.TestCase): """Test logging output for exclude_dirs configuration.""" @patch("skill_seekers.cli.github_scraper.Github") @patch("skill_seekers.cli.github_scraper.logger") def test_extend_mode_logs_info(self, mock_logger, _mock_github): """Test that extend mode logs INFO level message.""" config = {"repo": "owner/repo", "exclude_dirs_additional": ["custom1", "custom2"]} _scraper = GitHubScraper(config) # Should have logged INFO message # Check that info was called with a message about adding custom exclusions info_calls = [str(call) for call in mock_logger.info.call_args_list] self.assertTrue(any("Added 2 custom directory exclusions" in call for call in info_calls)) @patch("skill_seekers.cli.github_scraper.Github") @patch("skill_seekers.cli.github_scraper.logger") def test_replace_mode_logs_warning(self, mock_logger, _mock_github): """Test that replace mode logs WARNING level message.""" config = {"repo": "owner/repo", "exclude_dirs": ["only", "these"]} _scraper = GitHubScraper(config) # Should have logged WARNING message warning_calls = [str(call) for call in mock_logger.warning.call_args_list] self.assertTrue( any( "Using custom directory exclusions" in call and "defaults overridden" in call for call in warning_calls ) ) @patch("skill_seekers.cli.github_scraper.Github") @patch("skill_seekers.cli.github_scraper.logger") def test_no_config_no_logging(self, mock_logger, _mock_github): """Test that default mode doesn't log exclude_dirs messages.""" config = {"repo": "owner/repo"} _scraper = GitHubScraper(config) # Should NOT have logged any exclude_dirs messages info_calls = [str(call) for call in mock_logger.info.call_args_list] warning_calls = [str(call) for call in mock_logger.warning.call_args_list] # Filter for exclude_dirs related messages exclude_info = [c for c in info_calls if "directory exclusion" in c] exclude_warnings = [c for c in warning_calls if "directory exclusion" in c] self.assertEqual(len(exclude_info), 0) self.assertEqual(len(exclude_warnings), 0) class TestExcludedDirsTypeHandling(unittest.TestCase): """Test type handling for exclude_dirs configuration.""" @patch("skill_seekers.cli.github_scraper.Github") def test_exclude_dirs_with_tuple(self, _mock_github): """Test that tuples are converted to sets correctly.""" config = { "repo": "owner/repo", "exclude_dirs": ("node_modules", "build"), # Tuple instead of list } scraper = GitHubScraper(config) # Should work with tuples (set() accepts tuples) self.assertEqual(scraper.excluded_dirs, {"node_modules", "build"}) @patch("skill_seekers.cli.github_scraper.Github") def test_exclude_dirs_additional_with_set(self, _mock_github): """Test that sets work correctly for exclude_dirs_additional.""" config = { "repo": "owner/repo", "exclude_dirs_additional": {"custom1", "custom2"}, # Set instead of list } scraper = GitHubScraper(config) # Should work with sets self.assertIn("custom1", scraper.excluded_dirs) self.assertIn("custom2", scraper.excluded_dirs) self.assertIn("venv", scraper.excluded_dirs) # Defaults still there if __name__ == "__main__": unittest.main()