fix: resolve 15 bugs and gaps in video scraper pipeline
- Fix extract_visual_data returning 2-tuple instead of 3 (ValueError crash) - Move pytesseract from core deps to [video-full] optional group - Add 30-min timeout + user feedback to video enhancement subprocess - Add scrape_video_impl to MCP server fallback import block - Detect auto-generated YouTube captions via is_generated property - Forward --vision-ocr and --video-playlist through create command - Fix filename collision for non-ASCII video titles (fallback to video_id) - Make _vision_used a proper dataclass field on FrameSubSection - Expose 6 visual params in MCP scrape_video tool - Add install instructions on missing video deps in unified scraper - Update MCP docstring tool counts (25→33, 7 categories) - Add video and word commands to main.py docstring - Document video-full exclusion from [all] deps in pyproject.toml - Update parser registry test count (22→23 for video parser) All 2437 tests passing, 0 failures. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -360,8 +360,12 @@ class CreateCommand:
|
||||
|
||||
# Add video source (URL or file)
|
||||
parsed = self.source_info.parsed
|
||||
video_playlist = getattr(self.args, "video_playlist", None)
|
||||
if parsed.get("source_kind") == "file":
|
||||
argv.extend(["--video-file", parsed["file_path"]])
|
||||
elif video_playlist:
|
||||
# Explicit --video-playlist flag takes precedence
|
||||
argv.extend(["--playlist", video_playlist])
|
||||
elif parsed.get("url"):
|
||||
url = parsed["url"]
|
||||
# Detect playlist vs single video
|
||||
@@ -374,11 +378,15 @@ class CreateCommand:
|
||||
self._add_common_args(argv)
|
||||
|
||||
# Add video-specific arguments
|
||||
video_langs = getattr(self.args, "video_languages", None) or getattr(self.args, "languages", None)
|
||||
video_langs = getattr(self.args, "video_languages", None) or getattr(
|
||||
self.args, "languages", None
|
||||
)
|
||||
if video_langs:
|
||||
argv.extend(["--languages", video_langs])
|
||||
if getattr(self.args, "visual", False):
|
||||
argv.append("--visual")
|
||||
if getattr(self.args, "vision_ocr", False):
|
||||
argv.append("--vision-ocr")
|
||||
if getattr(self.args, "whisper_model", None) and self.args.whisper_model != "base":
|
||||
argv.extend(["--whisper-model", self.args.whisper_model])
|
||||
vi = getattr(self.args, "visual_interval", None)
|
||||
|
||||
Reference in New Issue
Block a user