fix: resolve 15 bugs and gaps in video scraper pipeline
- Fix extract_visual_data returning 2-tuple instead of 3 (ValueError crash) - Move pytesseract from core deps to [video-full] optional group - Add 30-min timeout + user feedback to video enhancement subprocess - Add scrape_video_impl to MCP server fallback import block - Detect auto-generated YouTube captions via is_generated property - Forward --vision-ocr and --video-playlist through create command - Fix filename collision for non-ASCII video titles (fallback to video_id) - Make _vision_used a proper dataclass field on FrameSubSection - Expose 6 visual params in MCP scrape_video tool - Add install instructions on missing video deps in unified scraper - Update MCP docstring tool counts (25→33, 7 categories) - Add video and word commands to main.py docstring - Document video-full exclusion from [all] deps in pyproject.toml - Update parser registry test count (22→23 for video parser) All 2437 tests passing, 0 failures. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1864,7 +1864,7 @@ def _ocr_single_panel(
|
||||
panel_id=f"panel_{row}_{col}",
|
||||
)
|
||||
# Stash vision_used flag for the caller to count
|
||||
ss._vision_used = vision_used # type: ignore[attr-defined]
|
||||
ss._vision_used = vision_used
|
||||
return ss
|
||||
|
||||
|
||||
@@ -1918,7 +1918,7 @@ def extract_visual_data(
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
logger.error(f"Cannot open video: {video_path}")
|
||||
return [], []
|
||||
return [], [], None
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
|
||||
total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
|
||||
@@ -2003,7 +2003,7 @@ def extract_visual_data(
|
||||
for fut in concurrent.futures.as_completed(futures):
|
||||
ss = fut.result()
|
||||
if ss is not None:
|
||||
if getattr(ss, "_vision_used", False):
|
||||
if ss._vision_used:
|
||||
vision_api_frames += 1
|
||||
sub_sections.append(ss)
|
||||
else:
|
||||
@@ -2018,7 +2018,7 @@ def extract_visual_data(
|
||||
use_vision_api,
|
||||
)
|
||||
if ss is not None:
|
||||
if getattr(ss, "_vision_used", False):
|
||||
if ss._vision_used:
|
||||
vision_api_frames += 1
|
||||
sub_sections.append(ss)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user