diff --git a/src/skill_seekers/cli/architectural_pattern_detector.py b/src/skill_seekers/cli/architectural_pattern_detector.py index 4aaa01b..2041d91 100644 --- a/src/skill_seekers/cli/architectural_pattern_detector.py +++ b/src/skill_seekers/cli/architectural_pattern_detector.py @@ -88,6 +88,11 @@ class ArchitecturalPatternDetector: # Framework detection patterns FRAMEWORK_MARKERS = { + # Game Engines (checked first to avoid false positives) + "Unity": ["Assembly-CSharp", "UnityEngine", "Assets", ".unity", "ProjectSettings"], + "Unreal": ["Source/", ".uproject", "Config/DefaultEngine.ini", "Binaries/", "Content/"], + "Godot": ["project.godot", ".godot", "scenes/", ".tscn", ".gd"], + # Web Frameworks "Django": ["django", "manage.py", "settings.py", "urls.py"], "Flask": ["flask", "app.py", "wsgi.py"], "Spring": ["springframework", "@Controller", "@Service", "@Repository"], diff --git a/src/skill_seekers/cli/codebase_scraper.py b/src/skill_seekers/cli/codebase_scraper.py index 6779a55..6527098 100644 --- a/src/skill_seekers/cli/codebase_scraper.py +++ b/src/skill_seekers/cli/codebase_scraper.py @@ -124,6 +124,7 @@ FOLDER_CATEGORIES = { # Default directories to exclude DEFAULT_EXCLUDED_DIRS = { + # Python/Node "node_modules", "venv", "__pycache__", @@ -141,10 +142,28 @@ DEFAULT_EXCLUDED_DIRS = { ".coverage", ".eggs", "*.egg-info", + # IDE ".idea", ".vscode", ".vs", "__pypackages__", + # Unity (critical - contains massive build cache) + "Library", + "Temp", + "Logs", + "UserSettings", + "MemoryCaptures", + "Recordings", + # Unreal Engine + "Intermediate", + "Saved", + "DerivedDataCache", + # Godot + ".godot", + ".import", + # Misc + "tmp", + ".tmp", } diff --git a/src/skill_seekers/cli/config_extractor.py b/src/skill_seekers/cli/config_extractor.py index 688dde2..876c2a1 100644 --- a/src/skill_seekers/cli/config_extractor.py +++ b/src/skill_seekers/cli/config_extractor.py @@ -222,6 +222,7 @@ class ConfigFileDetector: # Directories to skip SKIP_DIRS = { + # Python/Node "node_modules", "venv", "env", @@ -237,6 +238,23 @@ class ConfigFileDetector: "coverage", ".eggs", "*.egg-info", + # Unity (critical - contains massive build cache) + "Library", + "Temp", + "Logs", + "UserSettings", + "MemoryCaptures", + "Recordings", + # Unreal Engine + "Intermediate", + "Saved", + "DerivedDataCache", + # Godot + ".godot", + ".import", + # Misc + "tmp", + ".tmp", } def find_config_files(self, directory: Path, max_files: int = 100) -> list[ConfigFile]: diff --git a/src/skill_seekers/cli/config_validator.py b/src/skill_seekers/cli/config_validator.py index 87e8b2b..b156ad9 100644 --- a/src/skill_seekers/cli/config_validator.py +++ b/src/skill_seekers/cli/config_validator.py @@ -25,7 +25,7 @@ class ConfigValidator: """ # Valid source types - VALID_SOURCE_TYPES = {"documentation", "github", "pdf"} + VALID_SOURCE_TYPES = {"documentation", "github", "pdf", "local"} # Valid merge modes VALID_MERGE_MODES = {"rule-based", "claude-enhanced"} @@ -143,6 +143,8 @@ class ConfigValidator: self._validate_github_source(source, index) elif source_type == "pdf": self._validate_pdf_source(source, index) + elif source_type == "local": + self._validate_local_source(source, index) def _validate_documentation_source(self, source: dict[str, Any], index: int): """Validate documentation source configuration.""" @@ -209,6 +211,34 @@ class ConfigValidator: if not Path(pdf_path).exists(): logger.warning(f"Source {index} (pdf): File not found: {pdf_path}") + def _validate_local_source(self, source: dict[str, Any], index: int): + """Validate local codebase source configuration.""" + if "path" not in source: + raise ValueError(f"Source {index} (local): Missing required field 'path'") + + # Check if directory exists + local_path = source["path"] + if not Path(local_path).exists(): + logger.warning(f"Source {index} (local): Directory not found: {local_path}") + elif not Path(local_path).is_dir(): + raise ValueError(f"Source {index} (local): Path is not a directory: {local_path}") + + # Validate analysis_depth if provided + if "analysis_depth" in source: + depth = source["analysis_depth"] + if depth not in self.VALID_DEPTH_LEVELS: + raise ValueError( + f"Source {index} (local): Invalid analysis_depth '{depth}'. Must be one of {self.VALID_DEPTH_LEVELS}" + ) + + # Validate ai_mode if provided + if "ai_mode" in source: + ai_mode = source["ai_mode"] + if ai_mode not in self.VALID_AI_MODES: + raise ValueError( + f"Source {index} (local): Invalid ai_mode '{ai_mode}'. Must be one of {self.VALID_AI_MODES}" + ) + def _validate_legacy(self) -> bool: """ Validate legacy config format (backward compatibility). diff --git a/src/skill_seekers/cli/github_scraper.py b/src/skill_seekers/cli/github_scraper.py index aed0ec9..fa9d5ab 100644 --- a/src/skill_seekers/cli/github_scraper.py +++ b/src/skill_seekers/cli/github_scraper.py @@ -53,25 +53,49 @@ except ImportError: # Directories to exclude from local repository analysis EXCLUDED_DIRS = { + # Virtual environments "venv", "env", ".venv", - ".env", # Virtual environments + ".env", + # Dependencies and caches "node_modules", "__pycache__", - ".pytest_cache", # Dependencies and caches + ".pytest_cache", + # Version control ".git", ".svn", - ".hg", # Version control + ".hg", + # Build artifacts "build", "dist", - "*.egg-info", # Build artifacts + "*.egg-info", + # Coverage reports "htmlcov", - ".coverage", # Coverage reports + ".coverage", + # Testing environments ".tox", - ".nox", # Testing environments + ".nox", + # Linter caches ".mypy_cache", - ".ruff_cache", # Linter caches + ".ruff_cache", + # Unity (critical - contains massive build cache) + "Library", + "Temp", + "Logs", + "UserSettings", + "MemoryCaptures", + "Recordings", + # Unreal Engine + "Intermediate", + "Saved", + "DerivedDataCache", + # Godot + ".godot", + ".import", + # Misc + "tmp", + ".tmp", }