fix: Fix local repo extraction limitations (code analyzer, exclusions, enhancement)
This commit fixes three critical limitations discovered during local repository skill extraction testing: **Fix 1: Code Analyzer Import Issue** - Changed unified_scraper.py to use absolute imports instead of relative imports - Fixed: `from github_scraper import` → `from skill_seekers.cli.github_scraper import` - Fixed: `from pdf_scraper import` → `from skill_seekers.cli.pdf_scraper import` - Result: CodeAnalyzer now available during extraction, deep analysis works **Fix 2: Unity Library Exclusions** - Updated should_exclude_dir() to accept and check full directory paths - Updated _extract_file_tree_local() to pass both dir name and full path - Added exclusion config passing from unified_scraper to github_scraper - Result: exclude_dirs_additional now works (297 files excluded in test) **Fix 3: AI Enhancement for Single Sources** - Changed read_reference_files() to use rglob() for recursive search - Now finds reference files in subdirectories (e.g., references/github/README.md) - Result: AI enhancement works with unified skills that have nested references **Test Results:** - Code Analyzer: ✅ Working (deep analysis running) - Unity Exclusions: ✅ Working (297 files excluded from 679) - AI Enhancement: ✅ Working (finds and reads nested references) **Files Changed:** - src/skill_seekers/cli/unified_scraper.py (Fix 1 & 2) - src/skill_seekers/cli/github_scraper.py (Fix 2) - src/skill_seekers/cli/utils.py (Fix 3) **Test Artifacts:** - configs/deck_deck_go_local.json (test configuration) - docs/LOCAL_REPO_TEST_RESULTS.md (comprehensive test report) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -23,10 +23,10 @@ from typing import Dict, List, Any, Optional
|
||||
|
||||
# Import validators and scrapers
|
||||
try:
|
||||
from config_validator import ConfigValidator, validate_config
|
||||
from conflict_detector import ConflictDetector
|
||||
from merge_sources import RuleBasedMerger, ClaudeEnhancedMerger
|
||||
from unified_skill_builder import UnifiedSkillBuilder
|
||||
from skill_seekers.cli.config_validator import ConfigValidator, validate_config
|
||||
from skill_seekers.cli.conflict_detector import ConflictDetector
|
||||
from skill_seekers.cli.merge_sources import RuleBasedMerger, ClaudeEnhancedMerger
|
||||
from skill_seekers.cli.unified_skill_builder import UnifiedSkillBuilder
|
||||
except ImportError as e:
|
||||
print(f"Error importing modules: {e}")
|
||||
print("Make sure you're running from the project root directory")
|
||||
@@ -168,10 +168,8 @@ class UnifiedScraper:
|
||||
|
||||
def _scrape_github(self, source: Dict[str, Any]):
|
||||
"""Scrape GitHub repository."""
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
try:
|
||||
from github_scraper import GitHubScraper
|
||||
from skill_seekers.cli.github_scraper import GitHubScraper
|
||||
except ImportError:
|
||||
logger.error("github_scraper.py not found")
|
||||
return
|
||||
@@ -191,6 +189,12 @@ class UnifiedScraper:
|
||||
'local_repo_path': source.get('local_repo_path') # Pass local_repo_path from config
|
||||
}
|
||||
|
||||
# Pass directory exclusions if specified (optional)
|
||||
if 'exclude_dirs' in source:
|
||||
github_config['exclude_dirs'] = source['exclude_dirs']
|
||||
if 'exclude_dirs_additional' in source:
|
||||
github_config['exclude_dirs_additional'] = source['exclude_dirs_additional']
|
||||
|
||||
# Scrape
|
||||
logger.info(f"Scraping GitHub repository: {source['repo']}")
|
||||
scraper = GitHubScraper(github_config)
|
||||
@@ -210,10 +214,8 @@ class UnifiedScraper:
|
||||
|
||||
def _scrape_pdf(self, source: Dict[str, Any]):
|
||||
"""Scrape PDF document."""
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
try:
|
||||
from pdf_scraper import PDFToSkillConverter
|
||||
from skill_seekers.cli.pdf_scraper import PDFToSkillConverter
|
||||
except ImportError:
|
||||
logger.error("pdf_scraper.py not found")
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user