fix: resolve 15 bugs and gaps in video scraper pipeline
- Fix extract_visual_data returning 2-tuple instead of 3 (ValueError crash) - Move pytesseract from core deps to [video-full] optional group - Add 30-min timeout + user feedback to video enhancement subprocess - Add scrape_video_impl to MCP server fallback import block - Detect auto-generated YouTube captions via is_generated property - Forward --vision-ocr and --video-playlist through create command - Fix filename collision for non-ASCII video titles (fallback to video_id) - Make _vision_used a proper dataclass field on FrameSubSection - Expose 6 visual params in MCP scrape_video tool - Add install instructions on missing video deps in unified scraper - Update MCP docstring tool counts (25→33, 7 categories) - Add video and word commands to main.py docstring - Document video-full exclusion from [all] deps in pyproject.toml - Update parser registry test count (22→23 for video parser) All 2437 tests passing, 0 failures. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
4
uv.lock
generated
4
uv.lock
generated
@@ -5983,7 +5983,6 @@ dependencies = [
|
||||
{ name = "pygithub" },
|
||||
{ name = "pygments" },
|
||||
{ name = "pymupdf" },
|
||||
{ name = "pytesseract" },
|
||||
{ name = "python-dotenv" },
|
||||
{ name = "pyyaml" },
|
||||
{ name = "requests" },
|
||||
@@ -6084,6 +6083,7 @@ video-full = [
|
||||
{ name = "easyocr" },
|
||||
{ name = "faster-whisper" },
|
||||
{ name = "opencv-python-headless" },
|
||||
{ name = "pytesseract" },
|
||||
{ name = "scenedetect", extra = ["opencv"] },
|
||||
{ name = "youtube-transcript-api" },
|
||||
{ name = "yt-dlp" },
|
||||
@@ -6164,7 +6164,7 @@ requires-dist = [
|
||||
{ name = "pygithub", specifier = ">=2.5.0" },
|
||||
{ name = "pygments", specifier = ">=2.19.2" },
|
||||
{ name = "pymupdf", specifier = ">=1.24.14" },
|
||||
{ name = "pytesseract", specifier = ">=0.3.13" },
|
||||
{ name = "pytesseract", marker = "extra == 'video-full'", specifier = ">=0.3.13" },
|
||||
{ name = "python-docx", marker = "extra == 'all'", specifier = ">=1.1.0" },
|
||||
{ name = "python-docx", marker = "extra == 'docx'", specifier = ">=1.1.0" },
|
||||
{ name = "python-dotenv", specifier = ">=1.1.1" },
|
||||
|
||||
Reference in New Issue
Block a user