fix: resolve 15 bugs and gaps in video scraper pipeline
- Fix extract_visual_data returning 2-tuple instead of 3 (ValueError crash) - Move pytesseract from core deps to [video-full] optional group - Add 30-min timeout + user feedback to video enhancement subprocess - Add scrape_video_impl to MCP server fallback import block - Detect auto-generated YouTube captions via is_generated property - Forward --vision-ocr and --video-playlist through create command - Fix filename collision for non-ASCII video titles (fallback to video_id) - Make _vision_used a proper dataclass field on FrameSubSection - Expose 6 visual params in MCP scrape_video tool - Add install instructions on missing video deps in unified scraper - Update MCP docstring tool counts (25→33, 7 categories) - Add video and word commands to main.py docstring - Document video-full exclusion from [all] deps in pyproject.toml - Update parser registry test count (22→23 for video parser) All 2437 tests passing, 0 failures. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -52,7 +52,6 @@ dependencies = [
|
|||||||
"anthropic>=0.76.0", # Required for AI enhancement (core feature)
|
"anthropic>=0.76.0", # Required for AI enhancement (core feature)
|
||||||
"PyMuPDF>=1.24.14",
|
"PyMuPDF>=1.24.14",
|
||||||
"Pillow>=11.0.0",
|
"Pillow>=11.0.0",
|
||||||
"pytesseract>=0.3.13",
|
|
||||||
"pydantic>=2.12.3",
|
"pydantic>=2.12.3",
|
||||||
"pydantic-settings>=2.11.0",
|
"pydantic-settings>=2.11.0",
|
||||||
"python-dotenv>=1.1.1",
|
"python-dotenv>=1.1.1",
|
||||||
@@ -129,6 +128,7 @@ video-full = [
|
|||||||
"scenedetect[opencv]>=0.6.4",
|
"scenedetect[opencv]>=0.6.4",
|
||||||
"easyocr>=1.7.0",
|
"easyocr>=1.7.0",
|
||||||
"opencv-python-headless>=4.9.0",
|
"opencv-python-headless>=4.9.0",
|
||||||
|
"pytesseract>=0.3.13",
|
||||||
]
|
]
|
||||||
|
|
||||||
# RAG vector database upload support
|
# RAG vector database upload support
|
||||||
@@ -172,6 +172,8 @@ embedding = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
# All optional dependencies combined (dev dependencies now in [dependency-groups])
|
# All optional dependencies combined (dev dependencies now in [dependency-groups])
|
||||||
|
# Note: video-full deps (opencv, easyocr, faster-whisper) excluded due to heavy
|
||||||
|
# native dependencies. Install separately: pip install skill-seekers[video-full]
|
||||||
all = [
|
all = [
|
||||||
"mammoth>=1.6.0",
|
"mammoth>=1.6.0",
|
||||||
"python-docx>=1.1.0",
|
"python-docx>=1.1.0",
|
||||||
|
|||||||
@@ -488,6 +488,13 @@ VIDEO_ARGUMENTS: dict[str, dict[str, Any]] = {
|
|||||||
"metavar": "THRESH",
|
"metavar": "THRESH",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
"vision_ocr": {
|
||||||
|
"flags": ("--vision-ocr",),
|
||||||
|
"kwargs": {
|
||||||
|
"action": "store_true",
|
||||||
|
"help": "Use Claude Vision API as fallback for low-confidence code frames (requires ANTHROPIC_API_KEY, ~$0.004/frame)",
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
# Multi-source config specific (from unified_scraper.py)
|
# Multi-source config specific (from unified_scraper.py)
|
||||||
|
|||||||
@@ -360,8 +360,12 @@ class CreateCommand:
|
|||||||
|
|
||||||
# Add video source (URL or file)
|
# Add video source (URL or file)
|
||||||
parsed = self.source_info.parsed
|
parsed = self.source_info.parsed
|
||||||
|
video_playlist = getattr(self.args, "video_playlist", None)
|
||||||
if parsed.get("source_kind") == "file":
|
if parsed.get("source_kind") == "file":
|
||||||
argv.extend(["--video-file", parsed["file_path"]])
|
argv.extend(["--video-file", parsed["file_path"]])
|
||||||
|
elif video_playlist:
|
||||||
|
# Explicit --video-playlist flag takes precedence
|
||||||
|
argv.extend(["--playlist", video_playlist])
|
||||||
elif parsed.get("url"):
|
elif parsed.get("url"):
|
||||||
url = parsed["url"]
|
url = parsed["url"]
|
||||||
# Detect playlist vs single video
|
# Detect playlist vs single video
|
||||||
@@ -374,11 +378,15 @@ class CreateCommand:
|
|||||||
self._add_common_args(argv)
|
self._add_common_args(argv)
|
||||||
|
|
||||||
# Add video-specific arguments
|
# Add video-specific arguments
|
||||||
video_langs = getattr(self.args, "video_languages", None) or getattr(self.args, "languages", None)
|
video_langs = getattr(self.args, "video_languages", None) or getattr(
|
||||||
|
self.args, "languages", None
|
||||||
|
)
|
||||||
if video_langs:
|
if video_langs:
|
||||||
argv.extend(["--languages", video_langs])
|
argv.extend(["--languages", video_langs])
|
||||||
if getattr(self.args, "visual", False):
|
if getattr(self.args, "visual", False):
|
||||||
argv.append("--visual")
|
argv.append("--visual")
|
||||||
|
if getattr(self.args, "vision_ocr", False):
|
||||||
|
argv.append("--vision-ocr")
|
||||||
if getattr(self.args, "whisper_model", None) and self.args.whisper_model != "base":
|
if getattr(self.args, "whisper_model", None) and self.args.whisper_model != "base":
|
||||||
argv.extend(["--whisper-model", self.args.whisper_model])
|
argv.extend(["--whisper-model", self.args.whisper_model])
|
||||||
vi = getattr(self.args, "visual_interval", None)
|
vi = getattr(self.args, "visual_interval", None)
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ Commands:
|
|||||||
scrape Scrape documentation website
|
scrape Scrape documentation website
|
||||||
github Scrape GitHub repository
|
github Scrape GitHub repository
|
||||||
pdf Extract from PDF file
|
pdf Extract from PDF file
|
||||||
|
word Extract from Word (.docx) file
|
||||||
|
video Extract from video (YouTube or local)
|
||||||
unified Multi-source scraping (docs + GitHub + PDF)
|
unified Multi-source scraping (docs + GitHub + PDF)
|
||||||
analyze Analyze local codebase and extract code knowledge
|
analyze Analyze local codebase and extract code knowledge
|
||||||
enhance AI-powered enhancement (auto: API or LOCAL mode)
|
enhance AI-powered enhancement (auto: API or LOCAL mode)
|
||||||
|
|||||||
@@ -79,7 +79,14 @@ class UnifiedScraper:
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Track source index for unique naming (multi-source support)
|
# Track source index for unique naming (multi-source support)
|
||||||
self._source_counters = {"documentation": 0, "github": 0, "pdf": 0, "word": 0, "video": 0, "local": 0}
|
self._source_counters = {
|
||||||
|
"documentation": 0,
|
||||||
|
"github": 0,
|
||||||
|
"pdf": 0,
|
||||||
|
"word": 0,
|
||||||
|
"video": 0,
|
||||||
|
"local": 0,
|
||||||
|
}
|
||||||
|
|
||||||
# Output paths - cleaner organization
|
# Output paths - cleaner organization
|
||||||
self.name = self.config["name"]
|
self.name = self.config["name"]
|
||||||
@@ -583,8 +590,12 @@ class UnifiedScraper:
|
|||||||
"""Scrape video source (YouTube, local file, etc.)."""
|
"""Scrape video source (YouTube, local file, etc.)."""
|
||||||
try:
|
try:
|
||||||
from skill_seekers.cli.video_scraper import VideoToSkillConverter
|
from skill_seekers.cli.video_scraper import VideoToSkillConverter
|
||||||
except ImportError:
|
except ImportError as e:
|
||||||
logger.error("video_scraper.py not found")
|
logger.error(
|
||||||
|
f"Video scraper dependencies not installed: {e}\n"
|
||||||
|
" Install with: pip install skill-seekers[video]\n"
|
||||||
|
" For visual extraction (frame analysis, OCR): pip install skill-seekers[video-full]"
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
# Multi-source support: Get unique index for this video source
|
# Multi-source support: Get unique index for this video source
|
||||||
@@ -630,8 +641,7 @@ class UnifiedScraper:
|
|||||||
logger.info("✅ Video: Standalone SKILL.md created")
|
logger.info("✅ Video: Standalone SKILL.md created")
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"✅ Video: {len(result.videos)} videos, "
|
f"✅ Video: {len(result.videos)} videos, {result.total_segments} segments extracted"
|
||||||
f"{result.total_segments} segments extracted"
|
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to process video source: {e}")
|
logger.error(f"Failed to process video source: {e}")
|
||||||
|
|||||||
@@ -222,6 +222,7 @@ class FrameSubSection:
|
|||||||
ocr_regions: list[OCRRegion] = field(default_factory=list)
|
ocr_regions: list[OCRRegion] = field(default_factory=list)
|
||||||
ocr_confidence: float = 0.0
|
ocr_confidence: float = 0.0
|
||||||
panel_id: str = "" # e.g. "panel_0_0" (row_col)
|
panel_id: str = "" # e.g. "panel_0_0" (row_col)
|
||||||
|
_vision_used: bool = False # Whether Vision API was used for OCR
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
def to_dict(self) -> dict:
|
||||||
return {
|
return {
|
||||||
|
|||||||
@@ -469,7 +469,12 @@ class VideoToSkillConverter:
|
|||||||
|
|
||||||
# Generate reference files for each video
|
# Generate reference files for each video
|
||||||
for video in self.result.videos:
|
for video in self.result.videos:
|
||||||
ref_filename = f"video_{_sanitize_filename(video.title)}.md"
|
sanitized = (
|
||||||
|
_sanitize_filename(video.title)
|
||||||
|
or video.video_id
|
||||||
|
or f"video_{hash(video.title) % 10000:04d}"
|
||||||
|
)
|
||||||
|
ref_filename = f"video_{sanitized}.md"
|
||||||
ref_path = os.path.join(refs_dir, ref_filename)
|
ref_path = os.path.join(refs_dir, ref_filename)
|
||||||
ref_content = self._generate_reference_md(video)
|
ref_content = self._generate_reference_md(video)
|
||||||
with open(ref_path, "w", encoding="utf-8") as f:
|
with open(ref_path, "w", encoding="utf-8") as f:
|
||||||
@@ -750,7 +755,12 @@ class VideoToSkillConverter:
|
|||||||
preview += "..."
|
preview += "..."
|
||||||
lines.append(f"{preview}\n")
|
lines.append(f"{preview}\n")
|
||||||
|
|
||||||
ref_filename = f"video_{_sanitize_filename(video.title)}.md"
|
sanitized = (
|
||||||
|
_sanitize_filename(video.title)
|
||||||
|
or video.video_id
|
||||||
|
or f"video_{hash(video.title) % 10000:04d}"
|
||||||
|
)
|
||||||
|
ref_filename = f"video_{sanitized}.md"
|
||||||
lines.append(
|
lines.append(
|
||||||
f"> Full transcript: [references/{ref_filename}](references/{ref_filename})\n"
|
f"> Full transcript: [references/{ref_filename}](references/{ref_filename})\n"
|
||||||
)
|
)
|
||||||
@@ -766,7 +776,12 @@ class VideoToSkillConverter:
|
|||||||
# References
|
# References
|
||||||
lines.append("## References\n")
|
lines.append("## References\n")
|
||||||
for video in self.result.videos:
|
for video in self.result.videos:
|
||||||
ref_filename = f"video_{_sanitize_filename(video.title)}.md"
|
sanitized = (
|
||||||
|
_sanitize_filename(video.title)
|
||||||
|
or video.video_id
|
||||||
|
or f"video_{hash(video.title) % 10000:04d}"
|
||||||
|
)
|
||||||
|
ref_filename = f"video_{sanitized}.md"
|
||||||
lines.append(f"- [{video.title}](references/{ref_filename})")
|
lines.append(f"- [{video.title}](references/{ref_filename})")
|
||||||
|
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
@@ -940,11 +955,25 @@ def _run_video_enhancement(skill_dir: str, enhance_level: int, args) -> None:
|
|||||||
if api_key:
|
if api_key:
|
||||||
enhance_cmd.extend(["--api-key", api_key])
|
enhance_cmd.extend(["--api-key", api_key])
|
||||||
|
|
||||||
result = subprocess.run(enhance_cmd, check=True)
|
logger.info(
|
||||||
if result.returncode == 0:
|
"Starting video skill enhancement (this may take 10+ minutes "
|
||||||
logger.info("✅ Video skill enhancement complete!")
|
"for large videos with AI enhancement)..."
|
||||||
except subprocess.CalledProcessError:
|
)
|
||||||
logger.warning("⚠ Enhancement failed, but skill was still built")
|
subprocess.run(enhance_cmd, check=True, timeout=1800)
|
||||||
|
logger.info("Video skill enhancement complete!")
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
logger.warning(
|
||||||
|
"⚠ Enhancement timed out after 30 minutes. "
|
||||||
|
"The skill was still built without enhancement. "
|
||||||
|
"You can retry manually with:\n"
|
||||||
|
f" skill-seekers enhance {skill_dir} --enhance-level {enhance_level}"
|
||||||
|
)
|
||||||
|
except subprocess.CalledProcessError as exc:
|
||||||
|
logger.warning(
|
||||||
|
f"⚠ Enhancement failed (exit code {exc.returncode}), "
|
||||||
|
"but skill was still built. You can retry manually with:\n"
|
||||||
|
f" skill-seekers enhance {skill_dir} --enhance-level {enhance_level}"
|
||||||
|
)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
logger.warning("⚠ skill-seekers-enhance not found. Run manually:")
|
logger.warning("⚠ skill-seekers-enhance not found. Run manually:")
|
||||||
logger.info(f" skill-seekers enhance {skill_dir} --enhance-level {enhance_level}")
|
logger.info(f" skill-seekers enhance {skill_dir} --enhance-level {enhance_level}")
|
||||||
|
|||||||
@@ -70,10 +70,36 @@ def extract_youtube_transcript(
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
ytt_api = YouTubeTranscriptApi()
|
ytt_api = YouTubeTranscriptApi()
|
||||||
transcript = ytt_api.fetch(video_id, languages=languages)
|
|
||||||
|
# Use list_transcripts to detect whether the transcript is auto-generated
|
||||||
|
source = TranscriptSource.YOUTUBE_MANUAL
|
||||||
|
try:
|
||||||
|
transcript_list = ytt_api.list(video_id)
|
||||||
|
# Prefer manually created transcripts; fall back to auto-generated
|
||||||
|
try:
|
||||||
|
transcript_entry = transcript_list.find_manually_created_transcript(languages)
|
||||||
|
source = TranscriptSource.YOUTUBE_MANUAL
|
||||||
|
except Exception:
|
||||||
|
try:
|
||||||
|
transcript_entry = transcript_list.find_generated_transcript(languages)
|
||||||
|
source = TranscriptSource.YOUTUBE_AUTO
|
||||||
|
except Exception:
|
||||||
|
# Fall back to any available transcript
|
||||||
|
transcript_entry = transcript_list.find_transcript(languages)
|
||||||
|
source = (
|
||||||
|
TranscriptSource.YOUTUBE_AUTO
|
||||||
|
if transcript_entry.is_generated
|
||||||
|
else TranscriptSource.YOUTUBE_MANUAL
|
||||||
|
)
|
||||||
|
transcript = transcript_entry.fetch()
|
||||||
|
except Exception:
|
||||||
|
# Fall back to direct fetch if list fails (older API versions)
|
||||||
|
transcript = ytt_api.fetch(video_id, languages=languages)
|
||||||
|
# Check is_generated on the FetchedTranscript if available
|
||||||
|
if getattr(transcript, "is_generated", False):
|
||||||
|
source = TranscriptSource.YOUTUBE_AUTO
|
||||||
|
|
||||||
segments = []
|
segments = []
|
||||||
source = TranscriptSource.YOUTUBE_MANUAL
|
|
||||||
for snippet in transcript.snippets:
|
for snippet in transcript.snippets:
|
||||||
text = snippet.text.strip()
|
text = snippet.text.strip()
|
||||||
if not text:
|
if not text:
|
||||||
|
|||||||
@@ -1864,7 +1864,7 @@ def _ocr_single_panel(
|
|||||||
panel_id=f"panel_{row}_{col}",
|
panel_id=f"panel_{row}_{col}",
|
||||||
)
|
)
|
||||||
# Stash vision_used flag for the caller to count
|
# Stash vision_used flag for the caller to count
|
||||||
ss._vision_used = vision_used # type: ignore[attr-defined]
|
ss._vision_used = vision_used
|
||||||
return ss
|
return ss
|
||||||
|
|
||||||
|
|
||||||
@@ -1918,7 +1918,7 @@ def extract_visual_data(
|
|||||||
cap = cv2.VideoCapture(video_path)
|
cap = cv2.VideoCapture(video_path)
|
||||||
if not cap.isOpened():
|
if not cap.isOpened():
|
||||||
logger.error(f"Cannot open video: {video_path}")
|
logger.error(f"Cannot open video: {video_path}")
|
||||||
return [], []
|
return [], [], None
|
||||||
|
|
||||||
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
|
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
|
||||||
total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
|
total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
|
||||||
@@ -2003,7 +2003,7 @@ def extract_visual_data(
|
|||||||
for fut in concurrent.futures.as_completed(futures):
|
for fut in concurrent.futures.as_completed(futures):
|
||||||
ss = fut.result()
|
ss = fut.result()
|
||||||
if ss is not None:
|
if ss is not None:
|
||||||
if getattr(ss, "_vision_used", False):
|
if ss._vision_used:
|
||||||
vision_api_frames += 1
|
vision_api_frames += 1
|
||||||
sub_sections.append(ss)
|
sub_sections.append(ss)
|
||||||
else:
|
else:
|
||||||
@@ -2018,7 +2018,7 @@ def extract_visual_data(
|
|||||||
use_vision_api,
|
use_vision_api,
|
||||||
)
|
)
|
||||||
if ss is not None:
|
if ss is not None:
|
||||||
if getattr(ss, "_vision_used", False):
|
if ss._vision_used:
|
||||||
vision_api_frames += 1
|
vision_api_frames += 1
|
||||||
sub_sections.append(ss)
|
sub_sections.append(ss)
|
||||||
|
|
||||||
|
|||||||
@@ -3,20 +3,21 @@
|
|||||||
Skill Seeker MCP Server (FastMCP Implementation)
|
Skill Seeker MCP Server (FastMCP Implementation)
|
||||||
|
|
||||||
Modern, decorator-based MCP server using FastMCP for simplified tool registration.
|
Modern, decorator-based MCP server using FastMCP for simplified tool registration.
|
||||||
Provides 25 tools for generating Claude AI skills from documentation.
|
Provides 33 tools for generating Claude AI skills from documentation.
|
||||||
|
|
||||||
This is a streamlined alternative to server.py (2200 lines → 708 lines, 68% reduction).
|
This is a streamlined alternative to server.py (2200 lines → 708 lines, 68% reduction).
|
||||||
All tool implementations are delegated to modular tool files in tools/ directory.
|
All tool implementations are delegated to modular tool files in tools/ directory.
|
||||||
|
|
||||||
**Architecture:**
|
**Architecture:**
|
||||||
- FastMCP server with decorator-based tool registration
|
- FastMCP server with decorator-based tool registration
|
||||||
- 25 tools organized into 6 categories:
|
- 33 tools organized into 7 categories:
|
||||||
* Config tools (3): generate_config, list_configs, validate_config
|
* Config tools (3): generate_config, list_configs, validate_config
|
||||||
* Scraping tools (8): estimate_pages, scrape_docs, scrape_github, scrape_pdf, scrape_codebase, detect_patterns, extract_test_examples, build_how_to_guides, extract_config_patterns
|
* Scraping tools (10): estimate_pages, scrape_docs, scrape_github, scrape_pdf, scrape_video, scrape_codebase, detect_patterns, extract_test_examples, build_how_to_guides, extract_config_patterns
|
||||||
* Packaging tools (4): package_skill, upload_skill, enhance_skill, install_skill
|
* Packaging tools (4): package_skill, upload_skill, enhance_skill, install_skill
|
||||||
* Splitting tools (2): split_config, generate_router
|
* Splitting tools (2): split_config, generate_router
|
||||||
* Source tools (4): fetch_config, submit_config, add_config_source, list_config_sources, remove_config_source
|
* Source tools (5): fetch_config, submit_config, add_config_source, list_config_sources, remove_config_source
|
||||||
* Vector Database tools (4): export_to_weaviate, export_to_chroma, export_to_faiss, export_to_qdrant
|
* Vector Database tools (4): export_to_weaviate, export_to_chroma, export_to_faiss, export_to_qdrant
|
||||||
|
* Workflow tools (5): list_workflows, get_workflow, create_workflow, update_workflow, delete_workflow
|
||||||
|
|
||||||
**Usage:**
|
**Usage:**
|
||||||
# Stdio transport (default, backward compatible)
|
# Stdio transport (default, backward compatible)
|
||||||
@@ -140,6 +141,7 @@ except ImportError:
|
|||||||
scrape_docs_impl,
|
scrape_docs_impl,
|
||||||
scrape_github_impl,
|
scrape_github_impl,
|
||||||
scrape_pdf_impl,
|
scrape_pdf_impl,
|
||||||
|
scrape_video_impl,
|
||||||
split_config_impl,
|
split_config_impl,
|
||||||
submit_config_impl,
|
submit_config_impl,
|
||||||
upload_skill_impl,
|
upload_skill_impl,
|
||||||
@@ -250,7 +252,7 @@ async def validate_config(config_path: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# SCRAPING TOOLS (4 tools)
|
# SCRAPING TOOLS (10 tools)
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
@@ -432,6 +434,12 @@ async def scrape_video(
|
|||||||
description: str | None = None,
|
description: str | None = None,
|
||||||
languages: str | None = None,
|
languages: str | None = None,
|
||||||
from_json: str | None = None,
|
from_json: str | None = None,
|
||||||
|
visual: bool = False,
|
||||||
|
whisper_model: str | None = None,
|
||||||
|
visual_interval: float | None = None,
|
||||||
|
visual_min_gap: float | None = None,
|
||||||
|
visual_similarity: float | None = None,
|
||||||
|
vision_ocr: bool = False,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Scrape video content and build Claude skill.
|
Scrape video content and build Claude skill.
|
||||||
@@ -444,6 +452,12 @@ async def scrape_video(
|
|||||||
description: Skill description
|
description: Skill description
|
||||||
languages: Transcript language preferences (comma-separated)
|
languages: Transcript language preferences (comma-separated)
|
||||||
from_json: Build from extracted JSON file
|
from_json: Build from extracted JSON file
|
||||||
|
visual: Enable visual frame extraction (requires video-full extras)
|
||||||
|
whisper_model: Whisper model size for local transcription (e.g., base, small, medium, large)
|
||||||
|
visual_interval: Seconds between frame captures (default: 5.0)
|
||||||
|
visual_min_gap: Minimum seconds between kept frames (default: 2.0)
|
||||||
|
visual_similarity: Similarity threshold to skip duplicate frames 0.0-1.0 (default: 0.95)
|
||||||
|
vision_ocr: Use vision model for OCR on extracted frames
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Video scraping results with file paths.
|
Video scraping results with file paths.
|
||||||
@@ -463,6 +477,18 @@ async def scrape_video(
|
|||||||
args["languages"] = languages
|
args["languages"] = languages
|
||||||
if from_json:
|
if from_json:
|
||||||
args["from_json"] = from_json
|
args["from_json"] = from_json
|
||||||
|
if visual:
|
||||||
|
args["visual"] = visual
|
||||||
|
if whisper_model:
|
||||||
|
args["whisper_model"] = whisper_model
|
||||||
|
if visual_interval is not None:
|
||||||
|
args["visual_interval"] = visual_interval
|
||||||
|
if visual_min_gap is not None:
|
||||||
|
args["visual_min_gap"] = visual_min_gap
|
||||||
|
if visual_similarity is not None:
|
||||||
|
args["visual_similarity"] = visual_similarity
|
||||||
|
if vision_ocr:
|
||||||
|
args["vision_ocr"] = vision_ocr
|
||||||
|
|
||||||
result = await scrape_video_impl(args)
|
result = await scrape_video_impl(args)
|
||||||
if isinstance(result, list) and result:
|
if isinstance(result, list) and result:
|
||||||
|
|||||||
@@ -372,6 +372,12 @@ async def scrape_video_tool(args: dict) -> list[TextContent]:
|
|||||||
- description (str, optional): Skill description
|
- description (str, optional): Skill description
|
||||||
- languages (str, optional): Language preferences (comma-separated)
|
- languages (str, optional): Language preferences (comma-separated)
|
||||||
- from_json (str, optional): Build from extracted JSON file
|
- from_json (str, optional): Build from extracted JSON file
|
||||||
|
- visual (bool, optional): Enable visual frame extraction (default: False)
|
||||||
|
- whisper_model (str, optional): Whisper model size (default: base)
|
||||||
|
- visual_interval (float, optional): Seconds between frame captures (default: 5.0)
|
||||||
|
- visual_min_gap (float, optional): Minimum seconds between kept frames (default: 2.0)
|
||||||
|
- visual_similarity (float, optional): Similarity threshold to skip duplicate frames (default: 0.95)
|
||||||
|
- vision_ocr (bool, optional): Use vision model for OCR on frames (default: False)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[TextContent]: Tool execution results
|
List[TextContent]: Tool execution results
|
||||||
@@ -383,6 +389,12 @@ async def scrape_video_tool(args: dict) -> list[TextContent]:
|
|||||||
description = args.get("description")
|
description = args.get("description")
|
||||||
languages = args.get("languages")
|
languages = args.get("languages")
|
||||||
from_json = args.get("from_json")
|
from_json = args.get("from_json")
|
||||||
|
visual = args.get("visual", False)
|
||||||
|
whisper_model = args.get("whisper_model")
|
||||||
|
visual_interval = args.get("visual_interval")
|
||||||
|
visual_min_gap = args.get("visual_min_gap")
|
||||||
|
visual_similarity = args.get("visual_similarity")
|
||||||
|
vision_ocr = args.get("vision_ocr", False)
|
||||||
|
|
||||||
# Build command
|
# Build command
|
||||||
cmd = [sys.executable, str(CLI_DIR / "video_scraper.py")]
|
cmd = [sys.executable, str(CLI_DIR / "video_scraper.py")]
|
||||||
@@ -415,6 +427,20 @@ async def scrape_video_tool(args: dict) -> list[TextContent]:
|
|||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Visual extraction parameters
|
||||||
|
if visual:
|
||||||
|
cmd.append("--visual")
|
||||||
|
if whisper_model:
|
||||||
|
cmd.extend(["--whisper-model", whisper_model])
|
||||||
|
if visual_interval is not None:
|
||||||
|
cmd.extend(["--visual-interval", str(visual_interval)])
|
||||||
|
if visual_min_gap is not None:
|
||||||
|
cmd.extend(["--visual-min-gap", str(visual_min_gap)])
|
||||||
|
if visual_similarity is not None:
|
||||||
|
cmd.extend(["--visual-similarity", str(visual_similarity)])
|
||||||
|
if vision_ocr:
|
||||||
|
cmd.append("--vision-ocr")
|
||||||
|
|
||||||
# Run video_scraper.py with streaming
|
# Run video_scraper.py with streaming
|
||||||
timeout = 600 # 10 minutes for video extraction
|
timeout = 600 # 10 minutes for video extraction
|
||||||
|
|
||||||
|
|||||||
@@ -24,12 +24,12 @@ class TestParserRegistry:
|
|||||||
|
|
||||||
def test_all_parsers_registered(self):
|
def test_all_parsers_registered(self):
|
||||||
"""Test that all parsers are registered."""
|
"""Test that all parsers are registered."""
|
||||||
assert len(PARSERS) == 22, f"Expected 22 parsers, got {len(PARSERS)}"
|
assert len(PARSERS) == 23, f"Expected 23 parsers, got {len(PARSERS)}"
|
||||||
|
|
||||||
def test_get_parser_names(self):
|
def test_get_parser_names(self):
|
||||||
"""Test getting list of parser names."""
|
"""Test getting list of parser names."""
|
||||||
names = get_parser_names()
|
names = get_parser_names()
|
||||||
assert len(names) == 22
|
assert len(names) == 23
|
||||||
assert "scrape" in names
|
assert "scrape" in names
|
||||||
assert "github" in names
|
assert "github" in names
|
||||||
assert "package" in names
|
assert "package" in names
|
||||||
@@ -37,6 +37,7 @@ class TestParserRegistry:
|
|||||||
assert "analyze" in names
|
assert "analyze" in names
|
||||||
assert "config" in names
|
assert "config" in names
|
||||||
assert "workflows" in names
|
assert "workflows" in names
|
||||||
|
assert "video" in names
|
||||||
|
|
||||||
def test_all_parsers_are_subcommand_parsers(self):
|
def test_all_parsers_are_subcommand_parsers(self):
|
||||||
"""Test that all parsers inherit from SubcommandParser."""
|
"""Test that all parsers inherit from SubcommandParser."""
|
||||||
@@ -242,9 +243,9 @@ class TestBackwardCompatibility:
|
|||||||
assert cmd in names, f"Command '{cmd}' not found in parser registry!"
|
assert cmd in names, f"Command '{cmd}' not found in parser registry!"
|
||||||
|
|
||||||
def test_command_count_matches(self):
|
def test_command_count_matches(self):
|
||||||
"""Test that we have exactly 22 commands (includes new create, workflows, and word commands)."""
|
"""Test that we have exactly 23 commands (includes create, workflows, word, and video commands)."""
|
||||||
assert len(PARSERS) == 22
|
assert len(PARSERS) == 23
|
||||||
assert len(get_parser_names()) == 22
|
assert len(get_parser_names()) == 23
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
4
uv.lock
generated
4
uv.lock
generated
@@ -5983,7 +5983,6 @@ dependencies = [
|
|||||||
{ name = "pygithub" },
|
{ name = "pygithub" },
|
||||||
{ name = "pygments" },
|
{ name = "pygments" },
|
||||||
{ name = "pymupdf" },
|
{ name = "pymupdf" },
|
||||||
{ name = "pytesseract" },
|
|
||||||
{ name = "python-dotenv" },
|
{ name = "python-dotenv" },
|
||||||
{ name = "pyyaml" },
|
{ name = "pyyaml" },
|
||||||
{ name = "requests" },
|
{ name = "requests" },
|
||||||
@@ -6084,6 +6083,7 @@ video-full = [
|
|||||||
{ name = "easyocr" },
|
{ name = "easyocr" },
|
||||||
{ name = "faster-whisper" },
|
{ name = "faster-whisper" },
|
||||||
{ name = "opencv-python-headless" },
|
{ name = "opencv-python-headless" },
|
||||||
|
{ name = "pytesseract" },
|
||||||
{ name = "scenedetect", extra = ["opencv"] },
|
{ name = "scenedetect", extra = ["opencv"] },
|
||||||
{ name = "youtube-transcript-api" },
|
{ name = "youtube-transcript-api" },
|
||||||
{ name = "yt-dlp" },
|
{ name = "yt-dlp" },
|
||||||
@@ -6164,7 +6164,7 @@ requires-dist = [
|
|||||||
{ name = "pygithub", specifier = ">=2.5.0" },
|
{ name = "pygithub", specifier = ">=2.5.0" },
|
||||||
{ name = "pygments", specifier = ">=2.19.2" },
|
{ name = "pygments", specifier = ">=2.19.2" },
|
||||||
{ name = "pymupdf", specifier = ">=1.24.14" },
|
{ name = "pymupdf", specifier = ">=1.24.14" },
|
||||||
{ name = "pytesseract", specifier = ">=0.3.13" },
|
{ name = "pytesseract", marker = "extra == 'video-full'", specifier = ">=0.3.13" },
|
||||||
{ name = "python-docx", marker = "extra == 'all'", specifier = ">=1.1.0" },
|
{ name = "python-docx", marker = "extra == 'all'", specifier = ">=1.1.0" },
|
||||||
{ name = "python-docx", marker = "extra == 'docx'", specifier = ">=1.1.0" },
|
{ name = "python-docx", marker = "extra == 'docx'", specifier = ">=1.1.0" },
|
||||||
{ name = "python-dotenv", specifier = ">=1.1.1" },
|
{ name = "python-dotenv", specifier = ">=1.1.1" },
|
||||||
|
|||||||
Reference in New Issue
Block a user