fix: resolve 15 bugs and gaps in video scraper pipeline
- Fix extract_visual_data returning 2-tuple instead of 3 (ValueError crash) - Move pytesseract from core deps to [video-full] optional group - Add 30-min timeout + user feedback to video enhancement subprocess - Add scrape_video_impl to MCP server fallback import block - Detect auto-generated YouTube captions via is_generated property - Forward --vision-ocr and --video-playlist through create command - Fix filename collision for non-ASCII video titles (fallback to video_id) - Make _vision_used a proper dataclass field on FrameSubSection - Expose 6 visual params in MCP scrape_video tool - Add install instructions on missing video deps in unified scraper - Update MCP docstring tool counts (25→33, 7 categories) - Add video and word commands to main.py docstring - Document video-full exclusion from [all] deps in pyproject.toml - Update parser registry test count (22→23 for video parser) All 2437 tests passing, 0 failures. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -79,7 +79,14 @@ class UnifiedScraper:
|
||||
}
|
||||
|
||||
# Track source index for unique naming (multi-source support)
|
||||
self._source_counters = {"documentation": 0, "github": 0, "pdf": 0, "word": 0, "video": 0, "local": 0}
|
||||
self._source_counters = {
|
||||
"documentation": 0,
|
||||
"github": 0,
|
||||
"pdf": 0,
|
||||
"word": 0,
|
||||
"video": 0,
|
||||
"local": 0,
|
||||
}
|
||||
|
||||
# Output paths - cleaner organization
|
||||
self.name = self.config["name"]
|
||||
@@ -583,8 +590,12 @@ class UnifiedScraper:
|
||||
"""Scrape video source (YouTube, local file, etc.)."""
|
||||
try:
|
||||
from skill_seekers.cli.video_scraper import VideoToSkillConverter
|
||||
except ImportError:
|
||||
logger.error("video_scraper.py not found")
|
||||
except ImportError as e:
|
||||
logger.error(
|
||||
f"Video scraper dependencies not installed: {e}\n"
|
||||
" Install with: pip install skill-seekers[video]\n"
|
||||
" For visual extraction (frame analysis, OCR): pip install skill-seekers[video-full]"
|
||||
)
|
||||
return
|
||||
|
||||
# Multi-source support: Get unique index for this video source
|
||||
@@ -630,8 +641,7 @@ class UnifiedScraper:
|
||||
logger.info("✅ Video: Standalone SKILL.md created")
|
||||
|
||||
logger.info(
|
||||
f"✅ Video: {len(result.videos)} videos, "
|
||||
f"{result.total_segments} segments extracted"
|
||||
f"✅ Video: {len(result.videos)} videos, {result.total_segments} segments extracted"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process video source: {e}")
|
||||
|
||||
Reference in New Issue
Block a user