fix: resolve 15 bugs and gaps in video scraper pipeline

- Fix extract_visual_data returning 2-tuple instead of 3 (ValueError crash) - Move pytesseract from core deps to [video-full] optional group - Add 30-min timeout + user feedback to video enhancement subprocess - Add scrape_video_impl to MCP server fallback import block - Detect auto-generated YouTube captions via is_generated property - Forward --vision-ocr and --video-playlist through create command - Fix filename collision for non-ASCII video titles (fallback to video_id) - Make _vision_used a proper dataclass field on FrameSubSection - Expose 6 visual params in MCP scrape_video tool - Add install instructions on missing video deps in unified scraper - Update MCP docstring tool counts (25→33, 7 categories) - Add video and word commands to main.py docstring - Document video-full exclusion from [all] deps in pyproject.toml - Update parser registry test count (22→23 for video parser) All 2437 tests passing, 0 failures. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-01 12:39:21 +03:00
parent 066e19674a
commit 12bc29ab36
13 changed files with 171 additions and 33 deletions
--- a/src/skill_seekers/mcp/tools/scraping_tools.py
+++ b/src/skill_seekers/mcp/tools/scraping_tools.py
@@ -372,6 +372,12 @@ async def scrape_video_tool(args: dict) -> list[TextContent]:
            - description (str, optional): Skill description
            - languages (str, optional): Language preferences (comma-separated)
            - from_json (str, optional): Build from extracted JSON file
+            - visual (bool, optional): Enable visual frame extraction (default: False)
+            - whisper_model (str, optional): Whisper model size (default: base)
+            - visual_interval (float, optional): Seconds between frame captures (default: 5.0)
+            - visual_min_gap (float, optional): Minimum seconds between kept frames (default: 2.0)
+            - visual_similarity (float, optional): Similarity threshold to skip duplicate frames (default: 0.95)
+            - vision_ocr (bool, optional): Use vision model for OCR on frames (default: False)

    Returns:
        List[TextContent]: Tool execution results
@@ -383,6 +389,12 @@ async def scrape_video_tool(args: dict) -> list[TextContent]:
    description = args.get("description")
    languages = args.get("languages")
    from_json = args.get("from_json")
+    visual = args.get("visual", False)
+    whisper_model = args.get("whisper_model")
+    visual_interval = args.get("visual_interval")
+    visual_min_gap = args.get("visual_min_gap")
+    visual_similarity = args.get("visual_similarity")
+    vision_ocr = args.get("vision_ocr", False)

    # Build command
    cmd = [sys.executable, str(CLI_DIR / "video_scraper.py")]
@@ -415,6 +427,20 @@ async def scrape_video_tool(args: dict) -> list[TextContent]:
            )
        ]

+    # Visual extraction parameters
+    if visual:
+        cmd.append("--visual")
+    if whisper_model:
+        cmd.extend(["--whisper-model", whisper_model])
+    if visual_interval is not None:
+        cmd.extend(["--visual-interval", str(visual_interval)])
+    if visual_min_gap is not None:
+        cmd.extend(["--visual-min-gap", str(visual_min_gap)])
+    if visual_similarity is not None:
+        cmd.extend(["--visual-similarity", str(visual_similarity)])
+    if vision_ocr:
+        cmd.append("--vision-ocr")
+
    # Run video_scraper.py with streaming
    timeout = 600  # 10 minutes for video extraction