feat: add video tutorial scraping pipeline with per-panel OCR and AI enhancement

Add complete video tutorial extraction system that converts YouTube videos
and local video files into AI-consumable skills. The pipeline extracts
transcripts, performs visual OCR on code editor panels independently,
tracks code evolution across frames, and generates structured SKILL.md output.

Key features:
- Video metadata extraction (YouTube, local files, playlists)
- Multi-source transcript extraction (YouTube API, yt-dlp, Whisper fallback)
- Chapter-based and time-window segmentation
- Visual extraction: keyframe detection, frame classification, panel detection
- Per-panel sub-section OCR (each IDE panel OCR'd independently)
- Parallel OCR with ThreadPoolExecutor for multi-panel frames
- Narrow panel filtering (300px min width) to skip UI chrome
- Text block tracking with spatial panel position matching
- Code timeline with edit tracking across frames
- Audio-visual alignment (code + narrator pairs)
- Video-specific AI enhancement prompt for OCR denoising and code reconstruction
- video-tutorial.yaml workflow with 4 stages (OCR cleanup, language detection,
  tutorial synthesis, skill polish)
- CLI integration: skill-seekers video --url/--video-file/--playlist
- MCP tool: scrape_video for automation
- 161 tests passing

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
YusufKaraaslanSpyke
2026-02-27 23:10:19 +03:00
parent 3bad7cf365
commit 62071c4aa9
32 changed files with 15090 additions and 9 deletions

View File

@@ -63,6 +63,9 @@ from .scraping_tools import (
from .scraping_tools import (
scrape_pdf_tool as scrape_pdf_impl,
)
from .scraping_tools import (
scrape_video_tool as scrape_video_impl,
)
from .source_tools import (
add_config_source_tool as add_config_source_impl,
)
@@ -123,6 +126,7 @@ __all__ = [
"scrape_docs_impl",
"scrape_github_impl",
"scrape_pdf_impl",
"scrape_video_impl",
"scrape_codebase_impl",
"detect_patterns_impl",
"extract_test_examples_impl",

View File

@@ -356,6 +356,81 @@ async def scrape_pdf_tool(args: dict) -> list[TextContent]:
return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]
async def scrape_video_tool(args: dict) -> list[TextContent]:
"""
Scrape video content (YouTube, local files) and build Claude skill.
Extracts transcripts, metadata, and optionally visual content from videos
to create skills.
Args:
args: Dictionary containing:
- url (str, optional): Video URL (YouTube, Vimeo)
- video_file (str, optional): Local video file path
- playlist (str, optional): Playlist URL
- name (str, optional): Skill name
- description (str, optional): Skill description
- languages (str, optional): Language preferences (comma-separated)
- from_json (str, optional): Build from extracted JSON file
Returns:
List[TextContent]: Tool execution results
"""
url = args.get("url")
video_file = args.get("video_file")
playlist = args.get("playlist")
name = args.get("name")
description = args.get("description")
languages = args.get("languages")
from_json = args.get("from_json")
# Build command
cmd = [sys.executable, str(CLI_DIR / "video_scraper.py")]
if from_json:
cmd.extend(["--from-json", from_json])
elif url:
cmd.extend(["--url", url])
if name:
cmd.extend(["--name", name])
if description:
cmd.extend(["--description", description])
if languages:
cmd.extend(["--languages", languages])
elif video_file:
cmd.extend(["--video-file", video_file])
if name:
cmd.extend(["--name", name])
if description:
cmd.extend(["--description", description])
elif playlist:
cmd.extend(["--playlist", playlist])
if name:
cmd.extend(["--name", name])
else:
return [
TextContent(
type="text",
text="❌ Error: Must specify --url, --video-file, --playlist, or --from-json",
)
]
# Run video_scraper.py with streaming
timeout = 600 # 10 minutes for video extraction
progress_msg = "🎬 Scraping video content...\n"
progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
output = progress_msg + stdout
if returncode == 0:
return [TextContent(type="text", text=output)]
else:
return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]
async def scrape_github_tool(args: dict) -> list[TextContent]:
"""
Scrape GitHub repository and build Claude skill.