feat: add video tutorial scraping pipeline with per-panel OCR and AI enhancement

Add complete video tutorial extraction system that converts YouTube videos and local video files into AI-consumable skills. The pipeline extracts transcripts, performs visual OCR on code editor panels independently, tracks code evolution across frames, and generates structured SKILL.md output. Key features: - Video metadata extraction (YouTube, local files, playlists) - Multi-source transcript extraction (YouTube API, yt-dlp, Whisper fallback) - Chapter-based and time-window segmentation - Visual extraction: keyframe detection, frame classification, panel detection - Per-panel sub-section OCR (each IDE panel OCR'd independently) - Parallel OCR with ThreadPoolExecutor for multi-panel frames - Narrow panel filtering (300px min width) to skip UI chrome - Text block tracking with spatial panel position matching - Code timeline with edit tracking across frames - Audio-visual alignment (code + narrator pairs) - Video-specific AI enhancement prompt for OCR denoising and code reconstruction - video-tutorial.yaml workflow with 4 stages (OCR cleanup, language detection, tutorial synthesis, skill polish) - CLI integration: skill-seekers video --url/--video-file/--playlist - MCP tool: scrape_video for automation - 161 tests passing Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 23:10:19 +03:00
parent 3bad7cf365
commit 62071c4aa9
32 changed files with 15090 additions and 9 deletions
--- a/src/skill_seekers/mcp/server_fastmcp.py
+++ b/src/skill_seekers/mcp/server_fastmcp.py
@@ -98,6 +98,7 @@ try:
        scrape_docs_impl,
        scrape_github_impl,
        scrape_pdf_impl,
+        scrape_video_impl,
        # Splitting tools
        split_config_impl,
        submit_config_impl,
@@ -420,6 +421,55 @@ async def scrape_pdf(
    return str(result)


+@safe_tool_decorator(
+    description="Extract transcripts and metadata from videos (YouTube, Vimeo, local files) and build Claude skill."
+)
+async def scrape_video(
+    url: str | None = None,
+    video_file: str | None = None,
+    playlist: str | None = None,
+    name: str | None = None,
+    description: str | None = None,
+    languages: str | None = None,
+    from_json: str | None = None,
+) -> str:
+    """
+    Scrape video content and build Claude skill.
+
+    Args:
+        url: Video URL (YouTube, Vimeo)
+        video_file: Local video file path
+        playlist: Playlist URL
+        name: Skill name
+        description: Skill description
+        languages: Transcript language preferences (comma-separated)
+        from_json: Build from extracted JSON file
+
+    Returns:
+        Video scraping results with file paths.
+    """
+    args = {}
+    if url:
+        args["url"] = url
+    if video_file:
+        args["video_file"] = video_file
+    if playlist:
+        args["playlist"] = playlist
+    if name:
+        args["name"] = name
+    if description:
+        args["description"] = description
+    if languages:
+        args["languages"] = languages
+    if from_json:
+        args["from_json"] = from_json
+
+    result = await scrape_video_impl(args)
+    if isinstance(result, list) and result:
+        return result[0].text if hasattr(result[0], "text") else str(result[0])
+    return str(result)
+
+
@safe_tool_decorator(
    description="Analyze local codebase and extract code knowledge. Walks directory tree, analyzes code files, extracts signatures, docstrings, and optionally generates API reference documentation and dependency graphs."
 )
--- a/src/skill_seekers/mcp/tools/init.py
+++ b/src/skill_seekers/mcp/tools/init.py
@@ -63,6 +63,9 @@ from .scraping_tools import (
 from .scraping_tools import (
    scrape_pdf_tool as scrape_pdf_impl,
 )
+from .scraping_tools import (
+    scrape_video_tool as scrape_video_impl,
+)
 from .source_tools import (
    add_config_source_tool as add_config_source_impl,
 )
@@ -123,6 +126,7 @@ __all__ = [
    "scrape_docs_impl",
    "scrape_github_impl",
    "scrape_pdf_impl",
+    "scrape_video_impl",
    "scrape_codebase_impl",
    "detect_patterns_impl",
    "extract_test_examples_impl",
--- a/src/skill_seekers/mcp/tools/scraping_tools.py
+++ b/src/skill_seekers/mcp/tools/scraping_tools.py
@@ -356,6 +356,81 @@ async def scrape_pdf_tool(args: dict) -> list[TextContent]:
        return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]


+async def scrape_video_tool(args: dict) -> list[TextContent]:
+    """
+    Scrape video content (YouTube, local files) and build Claude skill.
+
+    Extracts transcripts, metadata, and optionally visual content from videos
+    to create skills.
+
+    Args:
+        args: Dictionary containing:
+            - url (str, optional): Video URL (YouTube, Vimeo)
+            - video_file (str, optional): Local video file path
+            - playlist (str, optional): Playlist URL
+            - name (str, optional): Skill name
+            - description (str, optional): Skill description
+            - languages (str, optional): Language preferences (comma-separated)
+            - from_json (str, optional): Build from extracted JSON file
+
+    Returns:
+        List[TextContent]: Tool execution results
+    """
+    url = args.get("url")
+    video_file = args.get("video_file")
+    playlist = args.get("playlist")
+    name = args.get("name")
+    description = args.get("description")
+    languages = args.get("languages")
+    from_json = args.get("from_json")
+
+    # Build command
+    cmd = [sys.executable, str(CLI_DIR / "video_scraper.py")]
+
+    if from_json:
+        cmd.extend(["--from-json", from_json])
+    elif url:
+        cmd.extend(["--url", url])
+        if name:
+            cmd.extend(["--name", name])
+        if description:
+            cmd.extend(["--description", description])
+        if languages:
+            cmd.extend(["--languages", languages])
+    elif video_file:
+        cmd.extend(["--video-file", video_file])
+        if name:
+            cmd.extend(["--name", name])
+        if description:
+            cmd.extend(["--description", description])
+    elif playlist:
+        cmd.extend(["--playlist", playlist])
+        if name:
+            cmd.extend(["--name", name])
+    else:
+        return [
+            TextContent(
+                type="text",
+                text="❌ Error: Must specify --url, --video-file, --playlist, or --from-json",
+            )
+        ]
+
+    # Run video_scraper.py with streaming
+    timeout = 600  # 10 minutes for video extraction
+
+    progress_msg = "🎬 Scraping video content...\n"
+    progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
+
+    stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
+
+    output = progress_msg + stdout
+
+    if returncode == 0:
+        return [TextContent(type="text", text=output)]
+    else:
+        return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]
+
+
 async def scrape_github_tool(args: dict) -> list[TextContent]:
    """
    Scrape GitHub repository and build Claude skill.