feat: add video tutorial scraping pipeline with per-panel OCR and AI enhancement

Add complete video tutorial extraction system that converts YouTube videos and local video files into AI-consumable skills. The pipeline extracts transcripts, performs visual OCR on code editor panels independently, tracks code evolution across frames, and generates structured SKILL.md output. Key features: - Video metadata extraction (YouTube, local files, playlists) - Multi-source transcript extraction (YouTube API, yt-dlp, Whisper fallback) - Chapter-based and time-window segmentation - Visual extraction: keyframe detection, frame classification, panel detection - Per-panel sub-section OCR (each IDE panel OCR'd independently) - Parallel OCR with ThreadPoolExecutor for multi-panel frames - Narrow panel filtering (300px min width) to skip UI chrome - Text block tracking with spatial panel position matching - Code timeline with edit tracking across frames - Audio-visual alignment (code + narrator pairs) - Video-specific AI enhancement prompt for OCR denoising and code reconstruction - video-tutorial.yaml workflow with 4 stages (OCR cleanup, language detection, tutorial synthesis, skill polish) - CLI integration: skill-seekers video --url/--video-file/--playlist - MCP tool: scrape_video for automation - 161 tests passing Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 23:10:19 +03:00
parent 3bad7cf365
commit 62071c4aa9
32 changed files with 15090 additions and 9 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -115,6 +115,22 @@ docx = [
    "python-docx>=1.1.0",
 ]

+# Video processing (lightweight: YouTube transcripts + metadata)
+video = [
+    "yt-dlp>=2024.12.0",
+    "youtube-transcript-api>=1.2.0",
+]
+
+# Video processing (full: + Whisper + visual extraction)
+video-full = [
+    "yt-dlp>=2024.12.0",
+    "youtube-transcript-api>=1.2.0",
+    "faster-whisper>=1.0.0",
+    "scenedetect[opencv]>=0.6.4",
+    "easyocr>=1.7.0",
+    "opencv-python-headless>=4.9.0",
+]
+
 # RAG vector database upload support
 chroma = [
    "chromadb>=0.4.0",
@@ -154,6 +170,8 @@ embedding = [
 all = [
    "mammoth>=1.6.0",
    "python-docx>=1.1.0",
+    "yt-dlp>=2024.12.0",
+    "youtube-transcript-api>=1.2.0",
    "mcp>=1.25,<2",
    "httpx>=0.28.1",
    "httpx-sse>=0.4.3",
@@ -195,6 +213,7 @@ skill-seekers-scrape = "skill_seekers.cli.doc_scraper:main"
 skill-seekers-github = "skill_seekers.cli.github_scraper:main"
 skill-seekers-pdf = "skill_seekers.cli.pdf_scraper:main"
 skill-seekers-word = "skill_seekers.cli.word_scraper:main"
+skill-seekers-video = "skill_seekers.cli.video_scraper:main"
 skill-seekers-unified = "skill_seekers.cli.unified_scraper:main"
 skill-seekers-enhance = "skill_seekers.cli.enhance_command:main"
 skill-seekers-enhance-status = "skill_seekers.cli.enhance_status:main"