feat: add video tutorial scraping pipeline with per-panel OCR and AI enhancement

Add complete video tutorial extraction system that converts YouTube videos and local video files into AI-consumable skills. The pipeline extracts transcripts, performs visual OCR on code editor panels independently, tracks code evolution across frames, and generates structured SKILL.md output. Key features: - Video metadata extraction (YouTube, local files, playlists) - Multi-source transcript extraction (YouTube API, yt-dlp, Whisper fallback) - Chapter-based and time-window segmentation - Visual extraction: keyframe detection, frame classification, panel detection - Per-panel sub-section OCR (each IDE panel OCR'd independently) - Parallel OCR with ThreadPoolExecutor for multi-panel frames - Narrow panel filtering (300px min width) to skip UI chrome - Text block tracking with spatial panel position matching - Code timeline with edit tracking across frames - Audio-visual alignment (code + narrator pairs) - Video-specific AI enhancement prompt for OCR denoising and code reconstruction - video-tutorial.yaml workflow with 4 stages (OCR cleanup, language detection, tutorial synthesis, skill polish) - CLI integration: skill-seekers video --url/--video-file/--playlist - MCP tool: scrape_video for automation - 161 tests passing Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 23:10:19 +03:00
parent 3bad7cf365
commit 62071c4aa9
32 changed files with 15090 additions and 9 deletions
--- a/src/skill_seekers/cli/create_command.py
+++ b/src/skill_seekers/cli/create_command.py
@@ -133,6 +133,8 @@ class CreateCommand:
            return self._route_pdf()
        elif self.source_info.type == "word":
            return self._route_word()
+        elif self.source_info.type == "video":
+            return self._route_video()
        elif self.source_info.type == "config":
            return self._route_config()
        else:
@@ -345,6 +347,55 @@ class CreateCommand:
        finally:
            sys.argv = original_argv

+    def _route_video(self) -> int:
+        """Route to video scraper (video_scraper.py)."""
+        from skill_seekers.cli import video_scraper
+
+        # Reconstruct argv for video_scraper
+        argv = ["video_scraper"]
+
+        # Add video source (URL or file)
+        parsed = self.source_info.parsed
+        if parsed.get("source_kind") == "file":
+            argv.extend(["--video-file", parsed["file_path"]])
+        elif parsed.get("url"):
+            url = parsed["url"]
+            # Detect playlist vs single video
+            if "playlist" in url.lower():
+                argv.extend(["--playlist", url])
+            else:
+                argv.extend(["--url", url])
+
+        # Add universal arguments
+        self._add_common_args(argv)
+
+        # Add video-specific arguments
+        video_langs = getattr(self.args, "video_languages", None) or getattr(self.args, "languages", None)
+        if video_langs:
+            argv.extend(["--languages", video_langs])
+        if getattr(self.args, "visual", False):
+            argv.append("--visual")
+        if getattr(self.args, "whisper_model", None) and self.args.whisper_model != "base":
+            argv.extend(["--whisper-model", self.args.whisper_model])
+        vi = getattr(self.args, "visual_interval", None)
+        if vi is not None and vi != 0.7:
+            argv.extend(["--visual-interval", str(vi)])
+        vmg = getattr(self.args, "visual_min_gap", None)
+        if vmg is not None and vmg != 0.5:
+            argv.extend(["--visual-min-gap", str(vmg)])
+        vs = getattr(self.args, "visual_similarity", None)
+        if vs is not None and vs != 3.0:
+            argv.extend(["--visual-similarity", str(vs)])
+
+        # Call video_scraper with modified argv
+        logger.debug(f"Calling video_scraper with argv: {argv}")
+        original_argv = sys.argv
+        try:
+            sys.argv = argv
+            return video_scraper.main()
+        finally:
+            sys.argv = original_argv
+
    def _route_config(self) -> int:
        """Route to unified scraper for config files (unified_scraper.py)."""
        from skill_seekers.cli import unified_scraper
@@ -468,6 +519,8 @@ Examples:
  Local:    skill-seekers create ./my-project -p comprehensive
  PDF:      skill-seekers create tutorial.pdf --ocr
  DOCX:     skill-seekers create document.docx
+  Video:    skill-seekers create https://youtube.com/watch?v=...
+  Video:    skill-seekers create recording.mp4
  Config:   skill-seekers create configs/react.json

 Source Auto-Detection:
@@ -476,6 +529,8 @@ Source Auto-Detection:
  • ./path → local codebase
  • file.pdf → PDF extraction
  • file.docx → Word document extraction
+  • youtube.com/... → Video transcript extraction
+  • file.mp4 → Video file extraction
  • file.json → multi-source config

 Progressive Help (13 → 120+ flags):
@@ -483,6 +538,7 @@ Progressive Help (13 → 120+ flags):
  --help-github    GitHub repository options
  --help-local     Local codebase analysis
  --help-pdf       PDF extraction options
+  --help-video     Video extraction options
  --help-advanced  Rare/advanced options
  --help-all       All options + compatibility

@@ -513,6 +569,9 @@ Common Workflows:
    parser.add_argument(
        "--help-word", action="store_true", help=argparse.SUPPRESS, dest="_help_word"
    )
+    parser.add_argument(
+        "--help-video", action="store_true", help=argparse.SUPPRESS, dest="_help_video"
+    )
    parser.add_argument(
        "--help-config", action="store_true", help=argparse.SUPPRESS, dest="_help_config"
    )
@@ -571,6 +630,15 @@ Common Workflows:
        add_create_arguments(parser_word, mode="word")
        parser_word.print_help()
        return 0
+    elif args._help_video:
+        parser_video = argparse.ArgumentParser(
+            prog="skill-seekers create",
+            description="Create skill from video (YouTube, Vimeo, local files)",
+            formatter_class=argparse.RawDescriptionHelpFormatter,
+        )
+        add_create_arguments(parser_video, mode="video")
+        parser_video.print_help()
+        return 0
    elif args._help_config:
        parser_config = argparse.ArgumentParser(
            prog="skill-seekers create",