feat: add video tutorial scraping pipeline with per-panel OCR and AI enhancement
Add complete video tutorial extraction system that converts YouTube videos and local video files into AI-consumable skills. The pipeline extracts transcripts, performs visual OCR on code editor panels independently, tracks code evolution across frames, and generates structured SKILL.md output. Key features: - Video metadata extraction (YouTube, local files, playlists) - Multi-source transcript extraction (YouTube API, yt-dlp, Whisper fallback) - Chapter-based and time-window segmentation - Visual extraction: keyframe detection, frame classification, panel detection - Per-panel sub-section OCR (each IDE panel OCR'd independently) - Parallel OCR with ThreadPoolExecutor for multi-panel frames - Narrow panel filtering (300px min width) to skip UI chrome - Text block tracking with spatial panel position matching - Code timeline with edit tracking across frames - Audio-visual alignment (code + narrator pairs) - Video-specific AI enhancement prompt for OCR denoising and code reconstruction - video-tutorial.yaml workflow with 4 stages (OCR cleanup, language detection, tutorial synthesis, skill polish) - CLI integration: skill-seekers video --url/--video-file/--playlist - MCP tool: scrape_video for automation - 161 tests passing Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -98,6 +98,7 @@ try:
|
||||
scrape_docs_impl,
|
||||
scrape_github_impl,
|
||||
scrape_pdf_impl,
|
||||
scrape_video_impl,
|
||||
# Splitting tools
|
||||
split_config_impl,
|
||||
submit_config_impl,
|
||||
@@ -420,6 +421,55 @@ async def scrape_pdf(
|
||||
return str(result)
|
||||
|
||||
|
||||
@safe_tool_decorator(
|
||||
description="Extract transcripts and metadata from videos (YouTube, Vimeo, local files) and build Claude skill."
|
||||
)
|
||||
async def scrape_video(
|
||||
url: str | None = None,
|
||||
video_file: str | None = None,
|
||||
playlist: str | None = None,
|
||||
name: str | None = None,
|
||||
description: str | None = None,
|
||||
languages: str | None = None,
|
||||
from_json: str | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Scrape video content and build Claude skill.
|
||||
|
||||
Args:
|
||||
url: Video URL (YouTube, Vimeo)
|
||||
video_file: Local video file path
|
||||
playlist: Playlist URL
|
||||
name: Skill name
|
||||
description: Skill description
|
||||
languages: Transcript language preferences (comma-separated)
|
||||
from_json: Build from extracted JSON file
|
||||
|
||||
Returns:
|
||||
Video scraping results with file paths.
|
||||
"""
|
||||
args = {}
|
||||
if url:
|
||||
args["url"] = url
|
||||
if video_file:
|
||||
args["video_file"] = video_file
|
||||
if playlist:
|
||||
args["playlist"] = playlist
|
||||
if name:
|
||||
args["name"] = name
|
||||
if description:
|
||||
args["description"] = description
|
||||
if languages:
|
||||
args["languages"] = languages
|
||||
if from_json:
|
||||
args["from_json"] = from_json
|
||||
|
||||
result = await scrape_video_impl(args)
|
||||
if isinstance(result, list) and result:
|
||||
return result[0].text if hasattr(result[0], "text") else str(result[0])
|
||||
return str(result)
|
||||
|
||||
|
||||
@safe_tool_decorator(
|
||||
description="Analyze local codebase and extract code knowledge. Walks directory tree, analyzes code files, extracts signatures, docstrings, and optionally generates API reference documentation and dependency graphs."
|
||||
)
|
||||
|
||||
@@ -63,6 +63,9 @@ from .scraping_tools import (
|
||||
from .scraping_tools import (
|
||||
scrape_pdf_tool as scrape_pdf_impl,
|
||||
)
|
||||
from .scraping_tools import (
|
||||
scrape_video_tool as scrape_video_impl,
|
||||
)
|
||||
from .source_tools import (
|
||||
add_config_source_tool as add_config_source_impl,
|
||||
)
|
||||
@@ -123,6 +126,7 @@ __all__ = [
|
||||
"scrape_docs_impl",
|
||||
"scrape_github_impl",
|
||||
"scrape_pdf_impl",
|
||||
"scrape_video_impl",
|
||||
"scrape_codebase_impl",
|
||||
"detect_patterns_impl",
|
||||
"extract_test_examples_impl",
|
||||
|
||||
@@ -356,6 +356,81 @@ async def scrape_pdf_tool(args: dict) -> list[TextContent]:
|
||||
return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]
|
||||
|
||||
|
||||
async def scrape_video_tool(args: dict) -> list[TextContent]:
|
||||
"""
|
||||
Scrape video content (YouTube, local files) and build Claude skill.
|
||||
|
||||
Extracts transcripts, metadata, and optionally visual content from videos
|
||||
to create skills.
|
||||
|
||||
Args:
|
||||
args: Dictionary containing:
|
||||
- url (str, optional): Video URL (YouTube, Vimeo)
|
||||
- video_file (str, optional): Local video file path
|
||||
- playlist (str, optional): Playlist URL
|
||||
- name (str, optional): Skill name
|
||||
- description (str, optional): Skill description
|
||||
- languages (str, optional): Language preferences (comma-separated)
|
||||
- from_json (str, optional): Build from extracted JSON file
|
||||
|
||||
Returns:
|
||||
List[TextContent]: Tool execution results
|
||||
"""
|
||||
url = args.get("url")
|
||||
video_file = args.get("video_file")
|
||||
playlist = args.get("playlist")
|
||||
name = args.get("name")
|
||||
description = args.get("description")
|
||||
languages = args.get("languages")
|
||||
from_json = args.get("from_json")
|
||||
|
||||
# Build command
|
||||
cmd = [sys.executable, str(CLI_DIR / "video_scraper.py")]
|
||||
|
||||
if from_json:
|
||||
cmd.extend(["--from-json", from_json])
|
||||
elif url:
|
||||
cmd.extend(["--url", url])
|
||||
if name:
|
||||
cmd.extend(["--name", name])
|
||||
if description:
|
||||
cmd.extend(["--description", description])
|
||||
if languages:
|
||||
cmd.extend(["--languages", languages])
|
||||
elif video_file:
|
||||
cmd.extend(["--video-file", video_file])
|
||||
if name:
|
||||
cmd.extend(["--name", name])
|
||||
if description:
|
||||
cmd.extend(["--description", description])
|
||||
elif playlist:
|
||||
cmd.extend(["--playlist", playlist])
|
||||
if name:
|
||||
cmd.extend(["--name", name])
|
||||
else:
|
||||
return [
|
||||
TextContent(
|
||||
type="text",
|
||||
text="❌ Error: Must specify --url, --video-file, --playlist, or --from-json",
|
||||
)
|
||||
]
|
||||
|
||||
# Run video_scraper.py with streaming
|
||||
timeout = 600 # 10 minutes for video extraction
|
||||
|
||||
progress_msg = "🎬 Scraping video content...\n"
|
||||
progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
|
||||
|
||||
stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
|
||||
|
||||
output = progress_msg + stdout
|
||||
|
||||
if returncode == 0:
|
||||
return [TextContent(type="text", text=output)]
|
||||
else:
|
||||
return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]
|
||||
|
||||
|
||||
async def scrape_github_tool(args: dict) -> list[TextContent]:
|
||||
"""
|
||||
Scrape GitHub repository and build Claude skill.
|
||||
|
||||
Reference in New Issue
Block a user