diff --git a/docs/plans/video/00_VIDEO_SOURCE_OVERVIEW.md b/docs/plans/video/00_VIDEO_SOURCE_OVERVIEW.md new file mode 100644 index 0000000..48628c4 --- /dev/null +++ b/docs/plans/video/00_VIDEO_SOURCE_OVERVIEW.md @@ -0,0 +1,261 @@ +# Video Source Support — Master Plan + +**Date:** February 27, 2026 +**Feature ID:** V1.0 +**Status:** Planning +**Priority:** High +**Estimated Complexity:** Large (multi-sprint feature) + +--- + +## Table of Contents + +1. [Executive Summary](#executive-summary) +2. [Motivation & Goals](#motivation--goals) +3. [Scope](#scope) +4. [Plan Documents Index](#plan-documents-index) +5. [High-Level Architecture](#high-level-architecture) +6. [Implementation Phases](#implementation-phases) +7. [Dependencies](#dependencies) +8. [Risk Assessment](#risk-assessment) +9. [Success Criteria](#success-criteria) + +--- + +## Executive Summary + +Add **video** as a first-class source type in Skill Seekers, alongside web documentation, GitHub repositories, PDF files, and Word documents. Videos contain a massive amount of knowledge — conference talks, official tutorials, live coding sessions, architecture walkthroughs — that is currently inaccessible to our pipeline. + +The video source will use a **3-stream parallel extraction** model: + +| Stream | What | Tool | +|--------|------|------| +| **ASR** (Audio Speech Recognition) | Spoken words → timestamped text | youtube-transcript-api + faster-whisper | +| **OCR** (Optical Character Recognition) | On-screen code/slides/diagrams → text | PySceneDetect + OpenCV + easyocr | +| **Metadata** | Title, chapters, tags, description | yt-dlp Python API | + +These three streams are **aligned on a shared timeline** and merged into structured `VideoSegment` objects — the fundamental output unit. Segments are then categorized, converted to reference markdown files, and integrated into SKILL.md just like any other source. + +--- + +## Motivation & Goals + +### Why Video? + +1. **Knowledge density** — A 30-minute conference talk can contain the equivalent of a 5,000-word blog post, plus live code demos that never appear in written docs. +2. **Official tutorials** — Many frameworks (React, Flutter, Unity, Godot) have official video tutorials that are the canonical learning resource. +3. **Code walkthroughs** — Screen-recorded coding sessions show real patterns, debugging workflows, and architecture decisions that written docs miss. +4. **Conference talks** — JSConf, PyCon, GopherCon, etc. contain deep technical insights from framework authors. +5. **Completeness** — Skill Seekers aims to be the **universal** documentation preprocessor. Video is the last major content type we don't support. + +### Goals + +- **G1:** Extract structured, time-aligned knowledge from YouTube videos, playlists, channels, and local video files. +- **G2:** Integrate video as a first-class source in the unified config system (multiple video sources per skill, alongside docs/github/pdf). +- **G3:** Auto-detect video sources in the `create` command (YouTube URLs, video file extensions). +- **G4:** Support two tiers: lightweight (transcript + metadata only) and full (+ visual extraction with OCR). +- **G5:** Produce output that is indistinguishable in quality from other source types — properly categorized reference files integrated into SKILL.md. +- **G6:** Make visual extraction (Whisper, OCR) available as optional add-on dependencies, keeping core install lightweight. + +### Non-Goals (explicitly out of scope for V1.0) + +- Real-time / live stream processing +- Video generation or editing +- Speaker diarization (identifying who said what) — future enhancement +- Automatic video discovery (e.g., "find all React tutorials on YouTube") — future enhancement +- DRM-protected or paywalled video content (Udemy, Coursera, etc.) +- Audio-only podcasts (similar pipeline but separate feature) + +--- + +## Scope + +### Supported Video Sources + +| Source | Input Format | Example | +|--------|-------------|---------| +| YouTube single video | URL | `https://youtube.com/watch?v=abc123` | +| YouTube short URL | URL | `https://youtu.be/abc123` | +| YouTube playlist | URL | `https://youtube.com/playlist?list=PLxxx` | +| YouTube channel | URL | `https://youtube.com/@channelname` | +| Vimeo video | URL | `https://vimeo.com/123456` | +| Local video file | Path | `./tutorials/intro.mp4` | +| Local video directory | Path | `./recordings/` (batch) | + +### Supported Video Formats (local files) + +| Format | Extension | Notes | +|--------|-----------|-------| +| MP4 | `.mp4` | Most common, universal | +| Matroska | `.mkv` | Common for screen recordings | +| WebM | `.webm` | Web-native, YouTube's format | +| AVI | `.avi` | Legacy but still used | +| QuickTime | `.mov` | macOS screen recordings | +| Flash Video | `.flv` | Legacy, rare | +| MPEG Transport | `.ts` | Streaming recordings | +| Windows Media | `.wmv` | Windows screen recordings | + +### Supported Languages (transcript) + +All languages supported by: +- YouTube's caption system (100+ languages) +- faster-whisper / OpenAI Whisper (99 languages) + +--- + +## Plan Documents Index + +| Document | Content | +|----------|---------| +| [`01_VIDEO_RESEARCH.md`](./01_VIDEO_RESEARCH.md) | Library research, benchmarks, industry standards | +| [`02_VIDEO_DATA_MODELS.md`](./02_VIDEO_DATA_MODELS.md) | All data classes, type definitions, JSON schemas | +| [`03_VIDEO_PIPELINE.md`](./03_VIDEO_PIPELINE.md) | Processing pipeline (6 phases), algorithms, edge cases | +| [`04_VIDEO_INTEGRATION.md`](./04_VIDEO_INTEGRATION.md) | CLI, config, source detection, unified scraper integration | +| [`05_VIDEO_OUTPUT.md`](./05_VIDEO_OUTPUT.md) | Output structure, SKILL.md integration, reference file format | +| [`06_VIDEO_TESTING.md`](./06_VIDEO_TESTING.md) | Test strategy, mocking, fixtures, CI considerations | +| [`07_VIDEO_DEPENDENCIES.md`](./07_VIDEO_DEPENDENCIES.md) | Dependency tiers, optional installs, system requirements | + +--- + +## High-Level Architecture + +``` + ┌──────────────────────┐ + │ User Input │ + │ │ + │ YouTube URL │ + │ Playlist URL │ + │ Local .mp4 file │ + │ Unified config JSON │ + └──────────┬───────────┘ + │ + ┌──────────▼───────────┐ + │ Source Detector │ + │ (source_detector.py) │ + │ type="video" │ + └──────────┬───────────┘ + │ + ┌──────────▼───────────┐ + │ Video Scraper │ + │ (video_scraper.py) │ + │ Main orchestrator │ + └──────────┬───────────┘ + │ + ┌────────────────────┼────────────────────┐ + │ │ │ + ┌──────────▼──────┐ ┌──────────▼──────┐ ┌──────────▼──────┐ + │ Stream 1: ASR │ │ Stream 2: OCR │ │ Stream 3: Meta │ + │ │ │ (optional) │ │ │ + │ youtube-trans- │ │ PySceneDetect │ │ yt-dlp │ + │ cript-api │ │ OpenCV │ │ extract_info() │ + │ faster-whisper │ │ easyocr │ │ │ + └────────┬────────┘ └────────┬────────┘ └────────┬────────┘ + │ │ │ + │ Timestamped │ Keyframes + │ Chapters, + │ transcript │ OCR text │ tags, desc + │ │ │ + └────────────────────┼────────────────────┘ + │ + ┌──────────▼───────────┐ + │ Segmenter & │ + │ Aligner │ + │ (video_segmenter.py)│ + │ │ + │ Align 3 streams │ + │ on shared timeline │ + └──────────┬───────────┘ + │ + list[VideoSegment] + │ + ┌──────────▼───────────┐ + │ Output Generator │ + │ │ + │ ├ references/*.md │ + │ ├ video_data/*.json │ + │ └ SKILL.md section │ + └──────────────────────┘ +``` + +--- + +## Implementation Phases + +### Phase 1: Foundation (Core Pipeline) +- `video_models.py` — All data classes +- `video_scraper.py` — Main orchestrator +- `video_transcript.py` — YouTube captions + Whisper fallback +- Source detector update — YouTube URL patterns, video file extensions +- Basic metadata extraction via yt-dlp +- Output: timestamped transcript as reference markdown + +### Phase 2: Segmentation & Structure +- `video_segmenter.py` — Chapter-aware segmentation +- Semantic segmentation fallback (when no chapters) +- Time-window fallback (configurable interval) +- Segment categorization (reuse smart_categorize patterns) + +### Phase 3: Visual Extraction +- `video_visual.py` — Frame extraction + scene detection +- Frame classification (code/slide/terminal/diagram/other) +- OCR on classified frames (easyocr) +- Timeline alignment with ASR transcript + +### Phase 4: Integration +- Unified config support (`"type": "video"`) +- `create` command routing +- CLI parser + arguments +- Unified scraper integration (video alongside docs/github/pdf) +- SKILL.md section generation + +### Phase 5: Quality & Polish +- AI enhancement for video content (summarization, topic extraction) +- RAG-optimized chunking for video segments +- MCP tools (scrape_video, export_video) +- Comprehensive test suite + +--- + +## Dependencies + +### Core (always required for video) +``` +yt-dlp>=2024.12.0 +youtube-transcript-api>=1.2.0 +``` + +### Full (for visual extraction + local file transcription) +``` +faster-whisper>=1.0.0 +scenedetect[opencv]>=0.6.4 +easyocr>=1.7.0 +opencv-python-headless>=4.9.0 +``` + +### System Requirements (for full mode) +- FFmpeg (required by faster-whisper and yt-dlp for audio extraction) +- GPU (optional but recommended for Whisper and easyocr) + +--- + +## Risk Assessment + +| Risk | Likelihood | Impact | Mitigation | +|------|-----------|--------|------------| +| YouTube API changes break scraping | Medium | High | yt-dlp actively maintained, abstract behind our API | +| Whisper models are large (~1.5GB) | Certain | Medium | Optional dependency, offer multiple model sizes | +| OCR accuracy on code is low | Medium | Medium | Combine OCR with transcript context, use confidence scoring | +| Video download is slow | High | Medium | Stream audio only, don't download full video for transcript | +| Auto-generated captions are noisy | High | Medium | Confidence filtering, AI cleanup in enhancement phase | +| Copyright / ToS concerns | Low | High | Document that user is responsible for content rights | +| CI tests can't download videos | Certain | Medium | Mock all network calls, use fixture transcripts | + +--- + +## Success Criteria + +1. **Functional:** `skill-seekers create https://youtube.com/watch?v=xxx` produces a skill with video content integrated into SKILL.md. +2. **Multi-source:** Video sources work alongside docs/github/pdf in unified configs. +3. **Quality:** Video-derived reference files are categorized and structured (not raw transcript dumps). +4. **Performance:** Transcript-only mode processes a 30-minute video in < 30 seconds. +5. **Tests:** Full test suite with mocked network calls, 100% of video pipeline covered. +6. **Tiered deps:** `pip install skill-seekers[video]` works without pulling Whisper/OpenCV. diff --git a/docs/plans/video/01_VIDEO_RESEARCH.md b/docs/plans/video/01_VIDEO_RESEARCH.md new file mode 100644 index 0000000..7dfcd3d --- /dev/null +++ b/docs/plans/video/01_VIDEO_RESEARCH.md @@ -0,0 +1,591 @@ +# Video Source — Library Research & Industry Standards + +**Date:** February 27, 2026 +**Document:** 01 of 07 +**Status:** Complete + +--- + +## Table of Contents + +1. [Industry Standards & Approaches](#industry-standards--approaches) +2. [Library Comparison Matrix](#library-comparison-matrix) +3. [Detailed Library Analysis](#detailed-library-analysis) +4. [Architecture Patterns from Industry](#architecture-patterns-from-industry) +5. [Benchmarks & Performance Data](#benchmarks--performance-data) +6. [Recommendations](#recommendations) + +--- + +## Industry Standards & Approaches + +### How the Industry Processes Video for AI/RAG + +Based on research from NVIDIA, LlamaIndex, Ragie, and open-source projects, the industry has converged on a **3-stream parallel extraction** model: + +#### The 3-Stream Model + +``` +Video Input + │ + ├──→ Stream 1: ASR (Audio Speech Recognition) + │ Extract spoken words with timestamps + │ Tools: Whisper, YouTube captions API + │ Output: [{text, start, end, confidence}, ...] + │ + ├──→ Stream 2: OCR (Optical Character Recognition) + │ Extract visual text (code, slides, diagrams) + │ Tools: OpenCV + scene detection + OCR engine + │ Output: [{text, timestamp, frame_type, bbox}, ...] + │ + └──→ Stream 3: Metadata + Extract structural info (chapters, tags, description) + Tools: yt-dlp, platform APIs + Output: {title, chapters, tags, description, ...} +``` + +**Key insight (from NVIDIA's multimodal RAG blog):** Ground everything to text first. Align all streams on a shared timeline, then merge into unified text segments. This makes the output compatible with any text-based RAG pipeline without requiring multimodal embeddings. + +#### Reference Implementations + +| Project | Approach | Strengths | Weaknesses | +|---------|----------|-----------|------------| +| [video-analyzer](https://github.com/byjlw/video-analyzer) | Whisper + OpenCV + LLM analysis | Full pipeline, LLM summaries | No chapter support, no YouTube integration | +| [LlamaIndex MultiModal RAG](https://www.llamaindex.ai/blog/multimodal-rag-for-advanced-video-processing-with-llamaindex-lancedb-33be4804822e) | Frame extraction + CLIP + LanceDB | Vector search over frames | Heavy (requires GPU), no ASR | +| [VideoRAG](https://video-rag.github.io/) | Graph-based reasoning + multimodal retrieval | Multi-hour video support | Research project, not production-ready | +| [Ragie Multimodal RAG](https://www.ragie.ai/blog/how-we-built-multimodal-rag-for-audio-and-video) | faster-whisper large-v3-turbo + OCR + object detection | Production-grade, 3-stream | Proprietary, not open-source | + +#### Industry Best Practices + +1. **Audio-only download** — Never download full video when you only need audio. Extract audio stream with FFmpeg (`-vn` flag). This is 10-50x smaller. +2. **Prefer existing captions** — YouTube manual captions are higher quality than any ASR model. Only fall back to Whisper when captions unavailable. +3. **Chapter-based segmentation** — YouTube chapters provide natural content boundaries. Use them as primary segmentation, fall back to time-window or semantic splitting. +4. **Confidence filtering** — Auto-generated captions and OCR output include confidence scores. Filter low-confidence content rather than including everything. +5. **Parallel extraction** — Run ASR and OCR in parallel (they're independent). Merge after both complete. +6. **Tiered processing** — Offer fast/light mode (transcript only) and deep mode (+ visual). Let users choose based on their compute budget. + +--- + +## Library Comparison Matrix + +### Metadata & Download + +| Library | Purpose | Install Size | Actively Maintained | Python API | License | +|---------|---------|-------------|-------------------|------------|---------| +| **yt-dlp** | Metadata + subtitles + download | ~15MB | Yes (weekly releases) | Yes (`YoutubeDL` class) | Unlicense | +| pytube | YouTube download | ~1MB | Inconsistent | Yes | MIT | +| youtube-dl | Download (original) | ~10MB | Stale | Yes | Unlicense | +| pafy | YouTube metadata | ~50KB | Dead (2021) | Yes | LGPL | + +**Winner: yt-dlp** — De-facto standard, actively maintained, comprehensive Python API, supports 1000+ sites (not just YouTube). + +### Transcript Extraction (YouTube) + +| Library | Purpose | Requires Download | Speed | Accuracy | License | +|---------|---------|-------------------|-------|----------|---------| +| **youtube-transcript-api** | YouTube captions | No | Very fast (<1s) | Depends on caption source | MIT | +| yt-dlp subtitles | Download subtitle files | Yes (subtitle only) | Fast (~2s) | Same as above | Unlicense | + +**Winner: youtube-transcript-api** — Fastest, no download needed, returns structured JSON with timestamps directly. Falls back to yt-dlp for non-YouTube platforms. + +### Speech-to-Text (ASR) + +| Library | Speed (30 min audio) | Word Timestamps | Model Sizes | GPU Required | Language Support | License | +|---------|---------------------|----------------|-------------|-------------|-----------------|---------| +| **faster-whisper** | ~2-4 min (GPU), ~8-15 min (CPU) | Yes (`word_timestamps=True`) | tiny (39M) → large-v3 (1.5B) | No (but recommended) | 99 languages | MIT | +| openai-whisper | ~5-10 min (GPU), ~20-40 min (CPU) | Yes | Same models | Recommended | 99 languages | MIT | +| whisper-timestamped | Same as openai-whisper | Yes (more accurate) | Same models | Recommended | 99 languages | MIT | +| whisperx | ~2-3 min (GPU) | Yes (best accuracy via wav2vec2) | Same + wav2vec2 | Yes (required) | 99 languages | BSD | +| stable-ts | Same as openai-whisper | Yes (stabilized) | Same models | Recommended | 99 languages | MIT | +| Google Speech-to-Text | Real-time | Yes | Cloud | No | 125+ languages | Proprietary | +| AssemblyAI | Real-time | Yes | Cloud | No | 100+ languages | Proprietary | + +**Winner: faster-whisper** — 4x faster than OpenAI Whisper via CTranslate2 optimization, MIT license, word-level timestamps, works without GPU (just slower), actively maintained. We may consider whisperx as a future upgrade for speaker diarization. + +### Scene Detection & Frame Extraction + +| Library | Purpose | Algorithm | Speed | License | +|---------|---------|-----------|-------|---------| +| **PySceneDetect** | Scene boundary detection | ContentDetector, ThresholdDetector, AdaptiveDetector | Fast | BSD | +| opencv-python-headless | Frame extraction, image processing | Manual (absdiff, histogram) | Fast | Apache 2.0 | +| Filmstrip | Keyframe extraction | Scene detection + selection | Medium | MIT | +| video-keyframe-detector | Keyframe extraction | Peak estimation from frame diff | Fast | MIT | +| decord | GPU-accelerated frame extraction | Direct frame access | Very fast | Apache 2.0 | + +**Winner: PySceneDetect + opencv-python-headless** — PySceneDetect handles intelligent boundary detection, OpenCV handles frame extraction and image processing. Both are well-maintained and BSD/Apache licensed. + +### OCR (Optical Character Recognition) + +| Library | Languages | GPU Support | Accuracy on Code | Speed | Install Size | License | +|---------|-----------|------------|-------------------|-------|-------------|---------| +| **easyocr** | 80+ | Yes (PyTorch) | Good | Medium | ~150MB + models | Apache 2.0 | +| pytesseract | 100+ | No | Medium | Fast | ~30MB + Tesseract | Apache 2.0 | +| PaddleOCR | 80+ | Yes (PaddlePaddle) | Very Good | Fast | ~200MB + models | Apache 2.0 | +| TrOCR (HuggingFace) | Multilingual | Yes | Good | Slow | ~500MB | MIT | +| docTR | 10+ | Yes (TF/PyTorch) | Good | Medium | ~100MB | Apache 2.0 | + +**Winner: easyocr** — Best balance of accuracy (especially on code/terminal text), GPU support, language coverage, and ease of use. PaddleOCR is a close second but has heavier dependencies (PaddlePaddle framework). + +--- + +## Detailed Library Analysis + +### 1. yt-dlp (Metadata & Download Engine) + +**What it provides:** +- Video metadata (title, description, duration, upload date, channel, tags, categories) +- Chapter information (title, start_time, end_time for each chapter) +- Subtitle/caption download (all available languages, all formats) +- Thumbnail URLs +- View/like counts +- Playlist information (title, entries, ordering) +- Audio-only extraction (no full video download needed) +- Supports 1000+ video sites (YouTube, Vimeo, Dailymotion, etc.) + +**Python API usage:** + +```python +from yt_dlp import YoutubeDL + +def extract_video_metadata(url: str) -> dict: + """Extract metadata without downloading.""" + opts = { + 'quiet': True, + 'no_warnings': True, + 'extract_flat': False, # Full extraction + } + with YoutubeDL(opts) as ydl: + info = ydl.extract_info(url, download=False) + return info +``` + +**Key fields in `info_dict`:** + +```python +{ + 'id': 'dQw4w9WgXcQ', # Video ID + 'title': 'Video Title', # Full title + 'description': '...', # Full description text + 'duration': 1832, # Duration in seconds + 'upload_date': '20260115', # YYYYMMDD format + 'uploader': 'Channel Name', # Channel/uploader name + 'uploader_id': '@channelname', # Channel ID + 'uploader_url': 'https://...', # Channel URL + 'channel_follower_count': 150000, # Subscriber count + 'view_count': 5000000, # View count + 'like_count': 120000, # Like count + 'comment_count': 8500, # Comment count + 'tags': ['react', 'hooks', ...], # Video tags + 'categories': ['Education'], # YouTube categories + 'language': 'en', # Primary language + 'subtitles': { # Manual captions + 'en': [{'ext': 'vtt', 'url': '...'}], + }, + 'automatic_captions': { # Auto-generated captions + 'en': [{'ext': 'vtt', 'url': '...'}], + }, + 'chapters': [ # Chapter markers + {'title': 'Intro', 'start_time': 0, 'end_time': 45}, + {'title': 'Setup', 'start_time': 45, 'end_time': 180}, + {'title': 'First Component', 'start_time': 180, 'end_time': 420}, + ], + 'thumbnail': 'https://...', # Best thumbnail URL + 'thumbnails': [...], # All thumbnail variants + 'webpage_url': 'https://...', # Canonical URL + 'formats': [...], # Available formats + 'requested_formats': [...], # Selected format info +} +``` + +**Playlist extraction:** + +```python +def extract_playlist(url: str) -> list[dict]: + """Extract all videos from a playlist.""" + opts = { + 'quiet': True, + 'extract_flat': True, # Don't extract each video yet + } + with YoutubeDL(opts) as ydl: + info = ydl.extract_info(url, download=False) + # info['entries'] contains all video entries + return info.get('entries', []) +``` + +**Audio-only download (for Whisper):** + +```python +def download_audio(url: str, output_dir: str) -> str: + """Download audio stream only (no video).""" + opts = { + 'format': 'bestaudio/best', + 'postprocessors': [{ + 'key': 'FFmpegExtractAudio', + 'preferredcodec': 'wav', + 'preferredquality': '16', # 16kHz (Whisper's native rate) + }], + 'outtmpl': f'{output_dir}/%(id)s.%(ext)s', + 'quiet': True, + } + with YoutubeDL(opts) as ydl: + info = ydl.extract_info(url, download=True) + return f"{output_dir}/{info['id']}.wav" +``` + +### 2. youtube-transcript-api (Caption Extraction) + +**What it provides:** +- Direct access to YouTube captions without downloading +- Manual and auto-generated caption support +- Translation support (translate captions to any language) +- Structured output with timestamps + +**Python API usage:** + +```python +from youtube_transcript_api import YouTubeTranscriptApi + +def get_youtube_transcript(video_id: str, languages: list[str] = None) -> list[dict]: + """Get transcript with timestamps.""" + languages = languages or ['en'] + + transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) + + # Prefer manual captions over auto-generated + try: + transcript = transcript_list.find_manually_created_transcript(languages) + except Exception: + transcript = transcript_list.find_generated_transcript(languages) + + # Fetch the actual transcript data + data = transcript.fetch() + return data + # Returns: [{'text': 'Hello', 'start': 0.0, 'duration': 1.5}, ...] +``` + +**Output format:** + +```python +[ + { + 'text': "Welcome to this React tutorial", + 'start': 0.0, # Start time in seconds + 'duration': 2.5 # Duration in seconds + }, + { + 'text': "Today we'll learn about hooks", + 'start': 2.5, + 'duration': 3.0 + }, + # ... continues for entire video +] +``` + +**Key features:** +- Segments are typically 2-5 seconds each +- Manual captions have punctuation and proper casing +- Auto-generated captions may lack punctuation and have lower accuracy +- Can detect available languages and caption types + +### 3. faster-whisper (Speech-to-Text) + +**What it provides:** +- OpenAI Whisper models with 4x speedup via CTranslate2 +- Word-level timestamps with confidence scores +- Language detection +- VAD (Voice Activity Detection) filtering +- Multiple model sizes from tiny (39M) to large-v3 (1.5B) + +**Python API usage:** + +```python +from faster_whisper import WhisperModel + +def transcribe_with_whisper(audio_path: str, model_size: str = "base") -> dict: + """Transcribe audio file with word-level timestamps.""" + model = WhisperModel( + model_size, + device="auto", # auto-detect GPU/CPU + compute_type="auto", # auto-select precision + ) + + segments, info = model.transcribe( + audio_path, + word_timestamps=True, + vad_filter=True, # Filter silence + vad_parameters={ + "min_silence_duration_ms": 500, + }, + ) + + result = { + 'language': info.language, + 'language_probability': info.language_probability, + 'duration': info.duration, + 'segments': [], + } + + for segment in segments: + seg_data = { + 'start': segment.start, + 'end': segment.end, + 'text': segment.text.strip(), + 'avg_logprob': segment.avg_logprob, + 'no_speech_prob': segment.no_speech_prob, + 'words': [], + } + if segment.words: + for word in segment.words: + seg_data['words'].append({ + 'word': word.word, + 'start': word.start, + 'end': word.end, + 'probability': word.probability, + }) + result['segments'].append(seg_data) + + return result +``` + +**Model size guide:** + +| Model | Parameters | English WER | Multilingual WER | VRAM (FP16) | Speed (30 min, GPU) | +|-------|-----------|-------------|------------------|-------------|---------------------| +| tiny | 39M | 14.8% | 23.2% | ~1GB | ~30s | +| base | 74M | 11.5% | 18.7% | ~1GB | ~45s | +| small | 244M | 9.5% | 14.6% | ~2GB | ~90s | +| medium | 769M | 8.0% | 12.4% | ~5GB | ~180s | +| large-v3 | 1.5B | 5.7% | 10.1% | ~10GB | ~240s | +| large-v3-turbo | 809M | 6.2% | 10.8% | ~6GB | ~120s | + +**Recommendation:** Default to `base` (good balance), offer `large-v3-turbo` for best accuracy, `tiny` for speed. + +### 4. PySceneDetect (Scene Boundary Detection) + +**What it provides:** +- Automatic scene/cut detection in video files +- Multiple detection algorithms (content-based, threshold, adaptive) +- Frame-accurate boundaries +- Integration with OpenCV + +**Python API usage:** + +```python +from scenedetect import detect, ContentDetector, AdaptiveDetector + +def detect_scene_changes(video_path: str) -> list[tuple[float, float]]: + """Detect scene boundaries in video. + + Returns list of (start_time, end_time) tuples. + """ + scene_list = detect( + video_path, + ContentDetector( + threshold=27.0, # Sensitivity (lower = more scenes) + min_scene_len=15, # Minimum 15 frames per scene + ), + ) + + boundaries = [] + for scene in scene_list: + start = scene[0].get_seconds() + end = scene[1].get_seconds() + boundaries.append((start, end)) + + return boundaries +``` + +**Detection algorithms:** + +| Algorithm | Best For | Speed | Sensitivity | +|-----------|----------|-------|-------------| +| ContentDetector | General content changes | Fast | Medium | +| AdaptiveDetector | Gradual transitions | Medium | High | +| ThresholdDetector | Hard cuts (black frames) | Very fast | Low | + +### 5. easyocr (Text Recognition) + +**What it provides:** +- Text detection and recognition from images +- 80+ language support +- GPU acceleration +- Bounding box coordinates for each text region +- Confidence scores + +**Python API usage:** + +```python +import easyocr + +def extract_text_from_frame(image_path: str, languages: list[str] = None) -> list[dict]: + """Extract text from a video frame image.""" + languages = languages or ['en'] + reader = easyocr.Reader(languages, gpu=True) + + results = reader.readtext(image_path) + # results: [([x1,y1],[x2,y2],[x3,y3],[x4,y4]), text, confidence] + + extracted = [] + for bbox, text, confidence in results: + extracted.append({ + 'text': text, + 'confidence': confidence, + 'bbox': bbox, # Corner coordinates + }) + + return extracted +``` + +**Tips for code/terminal OCR:** +- Pre-process images: increase contrast, convert to grayscale +- Use higher DPI/resolution frames +- Filter by confidence threshold (>0.5 for code) +- Detect monospace regions first, then OCR only those regions + +### 6. OpenCV (Frame Extraction) + +**What it provides:** +- Video file reading and frame extraction +- Image processing (resize, crop, color conversion) +- Template matching (detect code editors, terminals) +- Histogram analysis (detect slide vs code vs webcam) + +**Python API usage:** + +```python +import cv2 +import numpy as np + +def extract_frames_at_timestamps( + video_path: str, + timestamps: list[float], + output_dir: str +) -> list[str]: + """Extract frames at specific timestamps.""" + cap = cv2.VideoCapture(video_path) + fps = cap.get(cv2.CAP_PROP_FPS) + frame_paths = [] + + for ts in timestamps: + frame_number = int(ts * fps) + cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number) + ret, frame = cap.read() + if ret: + path = f"{output_dir}/frame_{ts:.2f}.png" + cv2.imwrite(path, frame) + frame_paths.append(path) + + cap.release() + return frame_paths + + +def classify_frame(image_path: str) -> str: + """Classify frame as code/slide/terminal/webcam/other. + + Uses heuristics: + - Dark background + monospace text regions = code/terminal + - Light background + large text blocks = slide + - Face detection = webcam + - High color variance = diagram + """ + img = cv2.imread(image_path) + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + h, w = gray.shape + + # Check brightness distribution + mean_brightness = np.mean(gray) + brightness_std = np.std(gray) + + # Dark background with structured content = code/terminal + if mean_brightness < 80 and brightness_std > 40: + return 'code' # or 'terminal' + + # Light background with text blocks = slide + if mean_brightness > 180 and brightness_std < 60: + return 'slide' + + # High edge density = diagram + edges = cv2.Canny(gray, 50, 150) + edge_density = np.count_nonzero(edges) / (h * w) + if edge_density > 0.15: + return 'diagram' + + return 'other' +``` + +--- + +## Benchmarks & Performance Data + +### Transcript Extraction Speed + +| Method | 10 min video | 30 min video | 60 min video | Requires Download | +|--------|-------------|-------------|-------------|-------------------| +| youtube-transcript-api | ~0.5s | ~0.5s | ~0.5s | No | +| yt-dlp subtitles | ~2s | ~2s | ~2s | Subtitle file only | +| faster-whisper (tiny, GPU) | ~10s | ~30s | ~60s | Audio only | +| faster-whisper (base, GPU) | ~15s | ~45s | ~90s | Audio only | +| faster-whisper (large-v3, GPU) | ~80s | ~240s | ~480s | Audio only | +| faster-whisper (base, CPU) | ~60s | ~180s | ~360s | Audio only | + +### Visual Extraction Speed + +| Operation | Per Frame | Per 10 min video (50 keyframes) | +|-----------|----------|-------------------------------| +| Frame extraction (OpenCV) | ~5ms | ~0.25s | +| Scene detection (PySceneDetect) | N/A | ~15s for full video | +| Frame classification (heuristic) | ~10ms | ~0.5s | +| OCR per frame (easyocr, GPU) | ~200ms | ~10s | +| OCR per frame (easyocr, CPU) | ~1-2s | ~50-100s | + +### Total Pipeline Time (estimated) + +| Mode | 10 min video | 30 min video | 1 hour video | +|------|-------------|-------------|-------------| +| Transcript only (YouTube captions) | ~2s | ~2s | ~2s | +| Transcript only (Whisper base, GPU) | ~20s | ~50s | ~100s | +| Full (transcript + visual, GPU) | ~35s | ~80s | ~170s | +| Full (transcript + visual, CPU) | ~120s | ~350s | ~700s | + +--- + +## Recommendations + +### Primary Stack (Chosen) + +| Component | Library | Why | +|-----------|---------|-----| +| Metadata + download | **yt-dlp** | De-facto standard, 1000+ sites, comprehensive Python API | +| YouTube transcripts | **youtube-transcript-api** | Fastest, no download, structured output | +| Speech-to-text | **faster-whisper** | 4x faster than Whisper, MIT, word timestamps | +| Scene detection | **PySceneDetect** | Best algorithm options, OpenCV-based | +| Frame extraction | **opencv-python-headless** | Standard, headless (no GUI deps) | +| OCR | **easyocr** | Best code/terminal accuracy, 80+ languages, GPU support | + +### Future Considerations + +| Component | Library | When to Add | +|-----------|---------|-------------| +| Speaker diarization | **whisperx** or **pyannote** | V2.0 — identify who said what | +| Object detection | **YOLO** | V2.0 — detect UI elements, diagrams | +| Multimodal embeddings | **CLIP** | V2.0 — embed frames for visual search | +| Slide detection | **python-pptx** + heuristics | V1.5 — detect and extract slide content | + +### Sources + +- [youtube-transcript-api (PyPI)](https://pypi.org/project/youtube-transcript-api/) +- [yt-dlp GitHub](https://github.com/yt-dlp/yt-dlp) +- [yt-dlp Information Extraction Pipeline (DeepWiki)](https://deepwiki.com/yt-dlp/yt-dlp/2.2-information-extraction-pipeline) +- [faster-whisper GitHub](https://github.com/SYSTRAN/faster-whisper) +- [faster-whisper (PyPI)](https://pypi.org/project/faster-whisper/) +- [whisper-timestamped GitHub](https://github.com/linto-ai/whisper-timestamped) +- [stable-ts (PyPI)](https://pypi.org/project/stable-ts/) +- [PySceneDetect GitHub](https://github.com/Breakthrough/PySceneDetect) +- [easyocr GitHub (implied from PyPI)](https://pypi.org/project/easyocr/) +- [NVIDIA Multimodal RAG for Video and Audio](https://developer.nvidia.com/blog/an-easy-introduction-to-multimodal-retrieval-augmented-generation-for-video-and-audio/) +- [LlamaIndex MultiModal RAG for Video](https://www.llamaindex.ai/blog/multimodal-rag-for-advanced-video-processing-with-llamaindex-lancedb-33be4804822e) +- [Ragie: How We Built Multimodal RAG](https://www.ragie.ai/blog/how-we-built-multimodal-rag-for-audio-and-video) +- [video-analyzer GitHub](https://github.com/byjlw/video-analyzer) +- [VideoRAG Project](https://video-rag.github.io/) +- [video-keyframe-detector GitHub](https://github.com/joelibaceta/video-keyframe-detector) +- [Filmstrip GitHub](https://github.com/tafsiri/filmstrip) diff --git a/docs/plans/video/02_VIDEO_DATA_MODELS.md b/docs/plans/video/02_VIDEO_DATA_MODELS.md new file mode 100644 index 0000000..d352e17 --- /dev/null +++ b/docs/plans/video/02_VIDEO_DATA_MODELS.md @@ -0,0 +1,972 @@ +# Video Source — Data Models & Type Definitions + +**Date:** February 27, 2026 +**Document:** 02 of 07 +**Status:** Planning + +--- + +## Table of Contents + +1. [Design Principles](#design-principles) +2. [Core Data Classes](#core-data-classes) +3. [Supporting Data Classes](#supporting-data-classes) +4. [Enumerations](#enumerations) +5. [JSON Schema (Serialization)](#json-schema-serialization) +6. [Relationships Diagram](#relationships-diagram) +7. [Config Schema (Unified Config)](#config-schema-unified-config) + +--- + +## Design Principles + +1. **Immutable after creation** — Use `@dataclass(frozen=True)` for segments and frames. Once extracted, data doesn't change. +2. **Serializable** — Every data class must serialize to/from JSON for caching, output, and inter-process communication. +3. **Timeline-aligned** — Every piece of data has `start_time` and `end_time` fields. This is the alignment axis for merging streams. +4. **Confidence-scored** — Every extracted piece of content carries a confidence score for quality filtering. +5. **Source-aware** — Every piece of data traces back to its origin (which video, which stream, which tool). +6. **Compatible** — Output structures must be compatible with existing Skill Seekers page/reference format for seamless integration. + +--- + +## Core Data Classes + +### VideoInfo — The top-level container for a single video + +```python +@dataclass +class VideoInfo: + """Complete metadata and extracted content for a single video. + + This is the primary output of the video scraper for one video. + It contains raw metadata from the platform, plus all extracted + and aligned content (segments). + + Lifecycle: + 1. Created with metadata during resolve phase + 2. Transcript populated during ASR phase + 3. Visual data populated during OCR phase (if enabled) + 4. Segments populated during alignment phase + """ + + # === Identity === + video_id: str + """Unique identifier. + - YouTube: 11-char video ID (e.g., 'dQw4w9WgXcQ') + - Vimeo: numeric ID (e.g., '123456789') + - Local: SHA-256 hash of file path + """ + + source_type: VideoSourceType + """Where this video came from (youtube, vimeo, local_file).""" + + source_url: str | None + """Original URL for online videos. None for local files.""" + + file_path: str | None + """Local file path. Set for local files, or after download for + online videos that needed audio extraction.""" + + # === Basic Metadata === + title: str + """Video title. For local files, derived from filename.""" + + description: str + """Full description text. Empty string for local files without metadata.""" + + duration: float + """Duration in seconds.""" + + upload_date: str | None + """Upload/creation date in ISO 8601 format (YYYY-MM-DD). + None if unknown.""" + + language: str + """Primary language code (e.g., 'en', 'tr', 'ja'). + Detected from captions, Whisper, or metadata.""" + + # === Channel / Author === + channel_name: str | None + """Channel or uploader name.""" + + channel_url: str | None + """URL to the channel/uploader page.""" + + channel_subscriber_count: int | None + """Subscriber/follower count. Quality signal.""" + + # === Engagement Metadata (quality signals) === + view_count: int | None + """Total view count. Higher = more authoritative.""" + + like_count: int | None + """Like count.""" + + comment_count: int | None + """Comment count. Higher = more discussion.""" + + # === Discovery Metadata === + tags: list[str] + """Video tags from platform. Used for categorization.""" + + categories: list[str] + """Platform categories (e.g., ['Education', 'Science & Technology']).""" + + thumbnail_url: str | None + """URL to the best quality thumbnail.""" + + # === Structure === + chapters: list[Chapter] + """YouTube chapter markers. Empty list if no chapters. + This is the PRIMARY segmentation source.""" + + # === Playlist Context === + playlist_title: str | None + """Title of the playlist this video belongs to. None if standalone.""" + + playlist_index: int | None + """0-based index within the playlist. None if standalone.""" + + playlist_total: int | None + """Total number of videos in the playlist. None if standalone.""" + + # === Extracted Content (populated during processing) === + raw_transcript: list[TranscriptSegment] + """Raw transcript segments as received from YouTube API or Whisper. + Before alignment and merging.""" + + segments: list[VideoSegment] + """Final aligned and merged segments. This is the PRIMARY output. + Each segment combines ASR + OCR + metadata into a single unit.""" + + # === Processing Metadata === + transcript_source: TranscriptSource + """How the transcript was obtained.""" + + visual_extraction_enabled: bool + """Whether OCR/frame extraction was performed.""" + + whisper_model: str | None + """Whisper model used, if applicable (e.g., 'base', 'large-v3').""" + + processing_time_seconds: float + """Total processing time for this video.""" + + extracted_at: str + """ISO 8601 timestamp of when extraction was performed.""" + + # === Quality Scores (computed) === + transcript_confidence: float + """Average confidence of transcript (0.0 - 1.0). + Based on caption type or Whisper probability.""" + + content_richness_score: float + """How rich/useful the extracted content is (0.0 - 1.0). + Based on: duration, chapters present, code detected, engagement.""" + + def to_dict(self) -> dict: + """Serialize to JSON-compatible dictionary.""" + ... + + @classmethod + def from_dict(cls, data: dict) -> 'VideoInfo': + """Deserialize from dictionary.""" + ... +``` + +### VideoSegment — The fundamental aligned content unit + +```python +@dataclass +class VideoSegment: + """A time-aligned segment combining all 3 extraction streams. + + This is the CORE data unit of the video pipeline. Every piece + of video content is broken into segments that align: + - ASR transcript (what was said) + - OCR content (what was shown on screen) + - Metadata (chapter title, topic) + + Segments are then used to generate reference markdown files + and integrate into SKILL.md. + + Segmentation strategies (in priority order): + 1. Chapter boundaries (YouTube chapters) + 2. Semantic boundaries (topic shifts detected by NLP) + 3. Time windows (configurable interval, default 3-5 minutes) + """ + + # === Time Bounds === + index: int + """0-based segment index within the video.""" + + start_time: float + """Start time in seconds.""" + + end_time: float + """End time in seconds.""" + + duration: float + """Segment duration in seconds (end_time - start_time).""" + + # === Stream 1: ASR (Audio) === + transcript: str + """Full transcript text for this time window. + Concatenated from word-level timestamps.""" + + words: list[WordTimestamp] + """Word-level timestamps within this segment. + Allows precise text-to-time mapping.""" + + transcript_confidence: float + """Average confidence for this segment's transcript (0.0 - 1.0).""" + + # === Stream 2: OCR (Visual) === + keyframes: list[KeyFrame] + """Extracted keyframes within this time window. + Only populated if visual_extraction is enabled.""" + + ocr_text: str + """Combined OCR text from all keyframes in this segment. + Deduplicated and cleaned.""" + + detected_code_blocks: list[CodeBlock] + """Code blocks detected on screen via OCR. + Includes language detection and formatted code.""" + + has_code_on_screen: bool + """Whether code/terminal was detected on screen.""" + + has_slides: bool + """Whether presentation slides were detected.""" + + has_diagram: bool + """Whether diagrams/architecture drawings were detected.""" + + # === Stream 3: Metadata === + chapter_title: str | None + """YouTube chapter title if this segment maps to a chapter. + None if video has no chapters or segment spans chapter boundary.""" + + topic: str | None + """Inferred topic for this segment. + Derived from chapter title, transcript keywords, or AI classification.""" + + category: str | None + """Mapped category (e.g., 'getting_started', 'api', 'tutorial'). + Uses the same categorization system as other sources.""" + + # === Merged Content === + content: str + """Final merged text content for this segment. + + Merging strategy: + 1. Start with transcript text + 2. If code detected on screen but not mentioned in transcript, + append code block with annotation + 3. If slide text detected, integrate as supplementary content + 4. Add chapter title as heading if present + + This is what gets written to reference markdown files. + """ + + summary: str | None + """AI-generated summary of this segment (populated during enhancement). + None until enhancement phase.""" + + # === Quality Metadata === + confidence: float + """Overall confidence for this segment (0.0 - 1.0). + Weighted average of transcript + OCR confidences.""" + + content_type: SegmentContentType + """Primary content type of this segment.""" + + def to_dict(self) -> dict: + """Serialize to JSON-compatible dictionary.""" + ... + + @classmethod + def from_dict(cls, data: dict) -> 'VideoSegment': + """Deserialize from dictionary.""" + ... + + @property + def timestamp_display(self) -> str: + """Human-readable timestamp (e.g., '05:30 - 08:15').""" + start_min, start_sec = divmod(int(self.start_time), 60) + end_min, end_sec = divmod(int(self.end_time), 60) + return f"{start_min:02d}:{start_sec:02d} - {end_min:02d}:{end_sec:02d}" + + @property + def youtube_timestamp_url(self) -> str | None: + """YouTube URL with timestamp parameter (e.g., '?t=330'). + Returns None if not a YouTube video.""" + ... +``` + +--- + +## Supporting Data Classes + +### Chapter — YouTube chapter marker + +```python +@dataclass(frozen=True) +class Chapter: + """A chapter marker from a video (typically YouTube). + + Chapters provide natural content boundaries and are the + preferred segmentation method. + """ + title: str + """Chapter title as shown in YouTube.""" + + start_time: float + """Start time in seconds.""" + + end_time: float + """End time in seconds.""" + + @property + def duration(self) -> float: + return self.end_time - self.start_time + + def to_dict(self) -> dict: + return { + 'title': self.title, + 'start_time': self.start_time, + 'end_time': self.end_time, + } +``` + +### TranscriptSegment — Raw transcript chunk from API/Whisper + +```python +@dataclass(frozen=True) +class TranscriptSegment: + """A raw transcript segment as received from the source. + + This is the unprocessed output from youtube-transcript-api or + faster-whisper, before alignment and merging. + + youtube-transcript-api segments are typically 2-5 seconds each. + faster-whisper segments are typically sentence-level (5-30 seconds). + """ + text: str + """Transcript text for this segment.""" + + start: float + """Start time in seconds.""" + + end: float + """End time in seconds. Computed as start + duration for YouTube API.""" + + confidence: float + """Confidence score (0.0 - 1.0). + - YouTube manual captions: 1.0 (assumed perfect) + - YouTube auto-generated: 0.8 (estimated) + - Whisper: actual model probability + """ + + words: list[WordTimestamp] | None + """Word-level timestamps, if available. + Always available from faster-whisper. + Not available from youtube-transcript-api. + """ + + source: TranscriptSource + """Which tool produced this segment.""" + + def to_dict(self) -> dict: + return { + 'text': self.text, + 'start': self.start, + 'end': self.end, + 'confidence': self.confidence, + 'words': [w.to_dict() for w in self.words] if self.words else None, + 'source': self.source.value, + } +``` + +### WordTimestamp — Individual word with timing + +```python +@dataclass(frozen=True) +class WordTimestamp: + """A single word with precise timing information. + + Enables precise text-to-time mapping within segments. + Essential for aligning ASR with OCR content. + """ + word: str + """The word text.""" + + start: float + """Start time in seconds.""" + + end: float + """End time in seconds.""" + + probability: float + """Model confidence for this word (0.0 - 1.0). + From faster-whisper's word_timestamps output.""" + + def to_dict(self) -> dict: + return { + 'word': self.word, + 'start': self.start, + 'end': self.end, + 'probability': self.probability, + } +``` + +### KeyFrame — Extracted video frame with analysis + +```python +@dataclass +class KeyFrame: + """An extracted video frame with visual analysis results. + + Keyframes are extracted at: + 1. Scene change boundaries (PySceneDetect) + 2. Chapter boundaries + 3. Regular intervals within segments (configurable) + + Each frame is classified and optionally OCR'd. + """ + timestamp: float + """Exact timestamp in seconds where this frame was extracted.""" + + image_path: str + """Path to the saved frame image file (PNG). + Relative to the video_data/frames/ directory.""" + + frame_type: FrameType + """Classification of what this frame shows.""" + + scene_change_score: float + """How different this frame is from the previous one (0.0 - 1.0). + Higher = more significant visual change. + From PySceneDetect's content detection.""" + + # === OCR Results === + ocr_regions: list[OCRRegion] + """All text regions detected in this frame. + Empty list if OCR was not performed or no text detected.""" + + ocr_text: str + """Combined OCR text from all regions. + Cleaned and deduplicated.""" + + ocr_confidence: float + """Average OCR confidence across all regions (0.0 - 1.0).""" + + # === Frame Properties === + width: int + """Frame width in pixels.""" + + height: int + """Frame height in pixels.""" + + mean_brightness: float + """Average brightness (0-255). Used for classification.""" + + def to_dict(self) -> dict: + return { + 'timestamp': self.timestamp, + 'image_path': self.image_path, + 'frame_type': self.frame_type.value, + 'scene_change_score': self.scene_change_score, + 'ocr_regions': [r.to_dict() for r in self.ocr_regions], + 'ocr_text': self.ocr_text, + 'ocr_confidence': self.ocr_confidence, + 'width': self.width, + 'height': self.height, + } +``` + +### OCRRegion — A detected text region in a frame + +```python +@dataclass(frozen=True) +class OCRRegion: + """A single text region detected by OCR within a frame. + + Includes bounding box coordinates for spatial analysis + (e.g., detecting code editors vs. slide titles). + """ + text: str + """Detected text content.""" + + confidence: float + """OCR confidence (0.0 - 1.0).""" + + bbox: tuple[int, int, int, int] + """Bounding box as (x1, y1, x2, y2) in pixels. + Top-left to bottom-right.""" + + is_monospace: bool + """Whether the text appears to be in a monospace font. + Indicates code/terminal content.""" + + def to_dict(self) -> dict: + return { + 'text': self.text, + 'confidence': self.confidence, + 'bbox': list(self.bbox), + 'is_monospace': self.is_monospace, + } +``` + +### CodeBlock — Detected code on screen + +```python +@dataclass +class CodeBlock: + """A code block detected via OCR from video frames. + + Represents code that was visible on screen during a segment. + May come from a code editor, terminal, or presentation slide. + """ + code: str + """The extracted code text. Cleaned and formatted.""" + + language: str | None + """Detected programming language (e.g., 'python', 'javascript'). + Uses the same detection heuristics as doc_scraper.detect_language(). + None if language cannot be determined.""" + + source_frame: float + """Timestamp of the frame where this code was extracted.""" + + context: CodeContext + """Where the code appeared (editor, terminal, slide).""" + + confidence: float + """OCR confidence for this code block (0.0 - 1.0).""" + + def to_dict(self) -> dict: + return { + 'code': self.code, + 'language': self.language, + 'source_frame': self.source_frame, + 'context': self.context.value, + 'confidence': self.confidence, + } +``` + +### VideoPlaylist — Container for playlist processing + +```python +@dataclass +class VideoPlaylist: + """A playlist or channel containing multiple videos. + + Used to track multi-video processing state and ordering. + """ + playlist_id: str + """Platform playlist ID.""" + + title: str + """Playlist title.""" + + description: str + """Playlist description.""" + + channel_name: str | None + """Channel that owns the playlist.""" + + video_count: int + """Total number of videos in the playlist.""" + + videos: list[VideoInfo] + """Extracted video information for each video. + Ordered by playlist index.""" + + source_url: str + """Original playlist URL.""" + + def to_dict(self) -> dict: + return { + 'playlist_id': self.playlist_id, + 'title': self.title, + 'description': self.description, + 'channel_name': self.channel_name, + 'video_count': self.video_count, + 'videos': [v.to_dict() for v in self.videos], + 'source_url': self.source_url, + } +``` + +### VideoScraperResult — Top-level scraper output + +```python +@dataclass +class VideoScraperResult: + """Complete result from the video scraper. + + This is the top-level output that gets passed to the + unified scraper and SKILL.md builder. + """ + videos: list[VideoInfo] + """All processed videos.""" + + playlists: list[VideoPlaylist] + """Playlist containers (if input was playlists).""" + + total_duration_seconds: float + """Sum of all video durations.""" + + total_segments: int + """Sum of all segments across all videos.""" + + total_code_blocks: int + """Total code blocks detected across all videos.""" + + categories: dict[str, list[VideoSegment]] + """Segments grouped by detected category. + Same category system as other sources.""" + + config: VideoSourceConfig + """Configuration used for this scrape.""" + + processing_time_seconds: float + """Total pipeline processing time.""" + + warnings: list[str] + """Any warnings generated during processing (e.g., missing captions).""" + + errors: list[VideoError] + """Errors for individual videos that failed processing.""" + + def to_dict(self) -> dict: + ... +``` + +--- + +## Enumerations + +```python +from enum import Enum + +class VideoSourceType(Enum): + """Where a video came from.""" + YOUTUBE = "youtube" + VIMEO = "vimeo" + LOCAL_FILE = "local_file" + LOCAL_DIRECTORY = "local_directory" + +class TranscriptSource(Enum): + """How the transcript was obtained.""" + YOUTUBE_MANUAL = "youtube_manual" # Human-created captions + YOUTUBE_AUTO = "youtube_auto_generated" # YouTube's ASR + WHISPER = "whisper" # faster-whisper local ASR + SUBTITLE_FILE = "subtitle_file" # SRT/VTT file alongside video + NONE = "none" # No transcript available + +class FrameType(Enum): + """Classification of a keyframe's visual content.""" + CODE_EDITOR = "code_editor" # IDE or code editor visible + TERMINAL = "terminal" # Terminal/command line + SLIDE = "slide" # Presentation slide + DIAGRAM = "diagram" # Architecture/flow diagram + BROWSER = "browser" # Web browser (documentation, output) + WEBCAM = "webcam" # Speaker face/webcam only + SCREENCAST = "screencast" # General screen recording + OTHER = "other" # Unclassified + +class CodeContext(Enum): + """Where code was displayed in the video.""" + EDITOR = "editor" # Code editor / IDE + TERMINAL = "terminal" # Terminal / command line output + SLIDE = "slide" # Code on a presentation slide + BROWSER = "browser" # Code in a browser (docs, playground) + UNKNOWN = "unknown" + +class SegmentContentType(Enum): + """Primary content type of a video segment.""" + EXPLANATION = "explanation" # Talking/explaining concepts + LIVE_CODING = "live_coding" # Writing code on screen + DEMO = "demo" # Running/showing a demo + SLIDES = "slides" # Presentation slides + Q_AND_A = "q_and_a" # Q&A section + INTRO = "intro" # Introduction/overview + OUTRO = "outro" # Conclusion/wrap-up + MIXED = "mixed" # Combination of types + +class SegmentationStrategy(Enum): + """How segments are determined.""" + CHAPTERS = "chapters" # YouTube chapter boundaries + SEMANTIC = "semantic" # Topic shift detection + TIME_WINDOW = "time_window" # Fixed time intervals + SCENE_CHANGE = "scene_change" # Visual scene changes + HYBRID = "hybrid" # Combination of strategies +``` + +--- + +## JSON Schema (Serialization) + +### VideoSegment JSON + +```json +{ + "index": 0, + "start_time": 45.0, + "end_time": 180.0, + "duration": 135.0, + "transcript": "Let's start by setting up our React project. First, we'll use Create React App...", + "words": [ + {"word": "Let's", "start": 45.0, "end": 45.3, "probability": 0.95}, + {"word": "start", "start": 45.3, "end": 45.6, "probability": 0.98} + ], + "transcript_confidence": 0.94, + "keyframes": [ + { + "timestamp": 52.3, + "image_path": "frames/video_abc123/frame_52.30.png", + "frame_type": "terminal", + "scene_change_score": 0.72, + "ocr_text": "npx create-react-app my-app", + "ocr_confidence": 0.89, + "ocr_regions": [ + { + "text": "npx create-react-app my-app", + "confidence": 0.89, + "bbox": [120, 340, 580, 370], + "is_monospace": true + } + ], + "width": 1920, + "height": 1080 + } + ], + "ocr_text": "npx create-react-app my-app\ncd my-app\nnpm start", + "detected_code_blocks": [ + { + "code": "npx create-react-app my-app\ncd my-app\nnpm start", + "language": "bash", + "source_frame": 52.3, + "context": "terminal", + "confidence": 0.89 + } + ], + "has_code_on_screen": true, + "has_slides": false, + "has_diagram": false, + "chapter_title": "Project Setup", + "topic": "react project setup", + "category": "getting_started", + "content": "## Project Setup (00:45 - 03:00)\n\nLet's start by setting up our React project...\n\n```bash\nnpx create-react-app my-app\ncd my-app\nnpm start\n```\n", + "summary": null, + "confidence": 0.92, + "content_type": "live_coding" +} +``` + +### VideoInfo JSON (abbreviated) + +```json +{ + "video_id": "abc123def45", + "source_type": "youtube", + "source_url": "https://www.youtube.com/watch?v=abc123def45", + "file_path": null, + "title": "React Hooks Tutorial for Beginners", + "description": "Learn React Hooks from scratch...", + "duration": 1832.0, + "upload_date": "2026-01-15", + "language": "en", + "channel_name": "React Official", + "channel_url": "https://www.youtube.com/@reactofficial", + "channel_subscriber_count": 250000, + "view_count": 1500000, + "like_count": 45000, + "comment_count": 2300, + "tags": ["react", "hooks", "tutorial", "javascript"], + "categories": ["Education"], + "thumbnail_url": "https://i.ytimg.com/vi/abc123def45/maxresdefault.jpg", + "chapters": [ + {"title": "Intro", "start_time": 0.0, "end_time": 45.0}, + {"title": "Project Setup", "start_time": 45.0, "end_time": 180.0}, + {"title": "useState Hook", "start_time": 180.0, "end_time": 540.0} + ], + "playlist_title": "React Complete Course", + "playlist_index": 3, + "playlist_total": 12, + "segments": ["... (see VideoSegment JSON above)"], + "transcript_source": "youtube_manual", + "visual_extraction_enabled": true, + "whisper_model": null, + "processing_time_seconds": 45.2, + "extracted_at": "2026-02-27T14:30:00Z", + "transcript_confidence": 0.95, + "content_richness_score": 0.88 +} +``` + +--- + +## Relationships Diagram + +``` +VideoScraperResult +├── videos: list[VideoInfo] +│ ├── chapters: list[Chapter] +│ ├── raw_transcript: list[TranscriptSegment] +│ │ └── words: list[WordTimestamp] | None +│ └── segments: list[VideoSegment] ← PRIMARY OUTPUT +│ ├── words: list[WordTimestamp] +│ ├── keyframes: list[KeyFrame] +│ │ └── ocr_regions: list[OCRRegion] +│ └── detected_code_blocks: list[CodeBlock] +├── playlists: list[VideoPlaylist] +│ └── videos: list[VideoInfo] ← same as above +├── categories: dict[str, list[VideoSegment]] +├── config: VideoSourceConfig +└── errors: list[VideoError] +``` + +--- + +## Config Schema (Unified Config) + +### Video source in unified config JSON + +```json +{ + "type": "video", + + "_comment_source": "One of: url, playlist, channel, path, directory", + + "url": "https://www.youtube.com/watch?v=abc123", + "playlist": "https://www.youtube.com/playlist?list=PLxxx", + "channel": "https://www.youtube.com/@channelname", + "path": "./recordings/tutorial.mp4", + "directory": "./recordings/", + + "name": "official_tutorials", + "description": "Official React tutorial videos", + "weight": 0.2, + + "_comment_filtering": "Control which videos to process", + "max_videos": 20, + "min_duration": 60, + "max_duration": 7200, + "languages": ["en"], + "title_include_patterns": ["tutorial", "guide"], + "title_exclude_patterns": ["shorts", "live stream"], + "min_views": 1000, + "upload_after": "2024-01-01", + + "_comment_extraction": "Control extraction depth", + "visual_extraction": true, + "whisper_model": "base", + "whisper_device": "auto", + "ocr_languages": ["en"], + "keyframe_interval": 5.0, + "min_scene_change_score": 0.3, + "ocr_confidence_threshold": 0.5, + "transcript_confidence_threshold": 0.3, + + "_comment_segmentation": "Control how content is segmented", + "segmentation_strategy": "hybrid", + "time_window_seconds": 300, + "merge_short_segments": true, + "min_segment_duration": 30, + "max_segment_duration": 600, + + "_comment_categorization": "Map segments to categories", + "categories": { + "getting_started": ["intro", "quickstart", "setup", "install"], + "hooks": ["useState", "useEffect", "useContext", "hooks"], + "components": ["component", "props", "state", "render"], + "advanced": ["performance", "suspense", "concurrent", "ssr"] + }, + + "_comment_local_files": "For local video sources", + "file_patterns": ["*.mp4", "*.mkv", "*.webm"], + "subtitle_patterns": ["*.srt", "*.vtt"], + "recursive": true +} +``` + +### VideoSourceConfig dataclass (parsed from JSON) + +```python +@dataclass +class VideoSourceConfig: + """Configuration for video source processing. + + Parsed from the 'sources' entry in unified config JSON. + Provides defaults for all optional fields. + """ + # Source specification (exactly one must be set) + url: str | None = None + playlist: str | None = None + channel: str | None = None + path: str | None = None + directory: str | None = None + + # Identity + name: str = "video" + description: str = "" + weight: float = 0.2 + + # Filtering + max_videos: int = 50 + min_duration: float = 60.0 # 1 minute + max_duration: float = 7200.0 # 2 hours + languages: list[str] | None = None # None = all languages + title_include_patterns: list[str] | None = None + title_exclude_patterns: list[str] | None = None + min_views: int | None = None + upload_after: str | None = None # ISO date + + # Extraction + visual_extraction: bool = False # Off by default (heavy) + whisper_model: str = "base" + whisper_device: str = "auto" # 'auto', 'cpu', 'cuda' + ocr_languages: list[str] | None = None + keyframe_interval: float = 5.0 # Extract frame every N seconds within segment + min_scene_change_score: float = 0.3 + ocr_confidence_threshold: float = 0.5 + transcript_confidence_threshold: float = 0.3 + + # Segmentation + segmentation_strategy: str = "hybrid" + time_window_seconds: float = 300.0 # 5 minutes + merge_short_segments: bool = True + min_segment_duration: float = 30.0 + max_segment_duration: float = 600.0 + + # Categorization + categories: dict[str, list[str]] | None = None + + # Local file options + file_patterns: list[str] | None = None + subtitle_patterns: list[str] | None = None + recursive: bool = True + + @classmethod + def from_dict(cls, data: dict) -> 'VideoSourceConfig': + """Create config from unified config source entry.""" + ... + + def validate(self) -> list[str]: + """Validate configuration. Returns list of errors.""" + errors = [] + sources_set = sum(1 for s in [self.url, self.playlist, self.channel, + self.path, self.directory] if s is not None) + if sources_set == 0: + errors.append("Video source must specify one of: url, playlist, channel, path, directory") + if sources_set > 1: + errors.append("Video source must specify exactly one source type") + if self.min_duration >= self.max_duration: + errors.append("min_duration must be less than max_duration") + if self.min_segment_duration >= self.max_segment_duration: + errors.append("min_segment_duration must be less than max_segment_duration") + return errors +``` diff --git a/docs/plans/video/03_VIDEO_PIPELINE.md b/docs/plans/video/03_VIDEO_PIPELINE.md new file mode 100644 index 0000000..255f179 --- /dev/null +++ b/docs/plans/video/03_VIDEO_PIPELINE.md @@ -0,0 +1,1097 @@ +# Video Source — Processing Pipeline + +**Date:** February 27, 2026 +**Document:** 03 of 07 +**Status:** Planning + +--- + +## Table of Contents + +1. [Pipeline Overview](#pipeline-overview) +2. [Phase 1: Source Resolution](#phase-1-source-resolution) +3. [Phase 2: Metadata Extraction](#phase-2-metadata-extraction) +4. [Phase 3: Transcript Extraction](#phase-3-transcript-extraction) +5. [Phase 4: Visual Extraction](#phase-4-visual-extraction) +6. [Phase 5: Segmentation & Alignment](#phase-5-segmentation--alignment) +7. [Phase 6: Output Generation](#phase-6-output-generation) +8. [Error Handling](#error-handling) +9. [Caching Strategy](#caching-strategy) +10. [Performance Optimization](#performance-optimization) + +--- + +## Pipeline Overview + +The video processing pipeline has **6 sequential phases**, with Phases 3 and 4 running in parallel where possible: + +``` +Phase 1: RESOLVE What videos are we processing? + │ Input: URL/path/playlist → list of video URLs/paths + ▼ +Phase 2: METADATA What do we know about each video? + │ yt-dlp extract_info() → VideoInfo (metadata only) + ▼ + ├──────────────────────────────────┐ + │ │ +Phase 3: TRANSCRIPT Phase 4: VISUAL (optional) + │ What was said? │ What was shown? + │ YouTube API / Whisper │ PySceneDetect + OpenCV + easyocr + │ → list[TranscriptSegment] │ → list[KeyFrame] + │ │ + └──────────────────────────────────┘ + │ + ▼ +Phase 5: SEGMENT & ALIGN Merge streams into structured segments + │ → list[VideoSegment] + ▼ +Phase 6: OUTPUT Generate reference files + SKILL.md section + → video_*.md + video_data/*.json +``` + +--- + +## Phase 1: Source Resolution + +**Purpose:** Take user input and resolve it to a concrete list of videos to process. + +### Input Types + +| Input | Resolution Strategy | +|-------|-------------------| +| YouTube video URL | Direct — single video | +| YouTube short URL (youtu.be) | Expand to full URL — single video | +| YouTube playlist URL | yt-dlp `extract_flat=True` → list of video URLs | +| YouTube channel URL | yt-dlp channel extraction → list of video URLs | +| Vimeo video URL | Direct — single video | +| Local video file | Direct — single file | +| Local directory | Glob for video extensions → list of file paths | + +### Algorithm + +``` +resolve_source(input, config) -> list[VideoTarget]: + + 1. Determine source type: + - YouTube video URL? → [VideoTarget(url=input)] + - YouTube playlist? → extract_playlist(input) → filter → [VideoTarget(url=...), ...] + - YouTube channel? → extract_channel_videos(input) → filter → [VideoTarget(url=...), ...] + - Vimeo URL? → [VideoTarget(url=input)] + - Local file? → [VideoTarget(path=input)] + - Local directory? → glob(directory, config.file_patterns) → [VideoTarget(path=...), ...] + + 2. Apply filters from config: + - max_videos: Limit total video count + - title_include_patterns: Only include matching titles + - title_exclude_patterns: Exclude matching titles + - min_views: Filter by minimum view count (online only) + - upload_after: Filter by upload date (online only) + + 3. Sort by relevance: + - Playlists: Keep playlist order + - Channels: Sort by view count (most popular first) + - Directories: Sort by filename + + 4. Return filtered, sorted list of VideoTarget objects +``` + +### Playlist Resolution Detail + +```python +def resolve_playlist(playlist_url: str, config: VideoSourceConfig) -> list[VideoTarget]: + """Resolve a YouTube playlist to individual video targets. + + Uses yt-dlp's extract_flat mode for fast playlist metadata + without downloading each video's full info. + """ + opts = { + 'quiet': True, + 'extract_flat': True, # Only get video IDs and titles + 'playlistend': config.max_videos, # Limit early + } + with YoutubeDL(opts) as ydl: + playlist_info = ydl.extract_info(playlist_url, download=False) + + targets = [] + for i, entry in enumerate(playlist_info.get('entries', [])): + video_url = f"https://www.youtube.com/watch?v={entry['id']}" + target = VideoTarget( + url=video_url, + video_id=entry['id'], + title=entry.get('title', ''), + playlist_title=playlist_info.get('title', ''), + playlist_index=i, + playlist_total=len(playlist_info.get('entries', [])), + ) + + # Apply title filters + if config.title_include_patterns: + if not any(p.lower() in target.title.lower() + for p in config.title_include_patterns): + continue + if config.title_exclude_patterns: + if any(p.lower() in target.title.lower() + for p in config.title_exclude_patterns): + continue + + targets.append(target) + + return targets[:config.max_videos] +``` + +### Local Directory Resolution + +```python +def resolve_local_directory( + directory: str, + config: VideoSourceConfig +) -> list[VideoTarget]: + """Resolve a local directory to video file targets. + + Also discovers associated subtitle files (.srt, .vtt) for each video. + """ + VIDEO_EXTENSIONS = {'.mp4', '.mkv', '.webm', '.avi', '.mov', '.flv', '.ts', '.wmv'} + SUBTITLE_EXTENSIONS = {'.srt', '.vtt', '.ass', '.ssa'} + + patterns = config.file_patterns or [f'*{ext}' for ext in VIDEO_EXTENSIONS] + subtitle_patterns = config.subtitle_patterns or [f'*{ext}' for ext in SUBTITLE_EXTENSIONS] + + video_files = [] + for pattern in patterns: + if config.recursive: + video_files.extend(Path(directory).rglob(pattern)) + else: + video_files.extend(Path(directory).glob(pattern)) + + # Build subtitle lookup (video_name -> subtitle_path) + subtitle_lookup = {} + for pattern in subtitle_patterns: + for sub_file in Path(directory).rglob(pattern): + stem = sub_file.stem + subtitle_lookup[stem] = str(sub_file) + + targets = [] + for video_file in sorted(video_files): + subtitle_path = subtitle_lookup.get(video_file.stem) + target = VideoTarget( + path=str(video_file), + video_id=hashlib.sha256(str(video_file).encode()).hexdigest()[:16], + title=video_file.stem, + subtitle_path=subtitle_path, + ) + targets.append(target) + + return targets[:config.max_videos] +``` + +--- + +## Phase 2: Metadata Extraction + +**Purpose:** Extract full metadata for each video without downloading content. + +### Algorithm + +``` +extract_metadata(target: VideoTarget) -> VideoInfo: + + IF target.url is set (online video): + 1. Call yt-dlp extract_info(url, download=False) + 2. Parse info_dict into VideoInfo fields: + - Basic: title, description, duration, upload_date + - Channel: channel_name, channel_url, subscriber_count + - Engagement: view_count, like_count, comment_count + - Discovery: tags, categories, language, thumbnail_url + - Structure: chapters (list of Chapter objects) + - Playlist: playlist_title, playlist_index (from target) + 3. Apply duration filter (skip if < min_duration or > max_duration) + 4. Apply view count filter (skip if < min_views) + + ELIF target.path is set (local file): + 1. Use ffprobe (via subprocess) or yt-dlp for local metadata: + - Duration + - Resolution + - Codec info + 2. Check for sidecar metadata files: + - {filename}.json (custom metadata) + - {filename}.nfo (media info) + 3. Check for sidecar subtitle files: + - {filename}.srt + - {filename}.vtt + 4. Generate VideoInfo with available metadata: + - Title from filename (cleaned) + - Duration from ffprobe + - Other fields set to None/empty + + Return VideoInfo (transcript and segments still empty) +``` + +### Metadata Fields from yt-dlp + +```python +def parse_ytdlp_metadata(info: dict, target: VideoTarget) -> VideoInfo: + """Convert yt-dlp info_dict to our VideoInfo model.""" + + # Parse chapters + chapters = [] + raw_chapters = info.get('chapters') or [] + for i, ch in enumerate(raw_chapters): + end_time = ch.get('end_time') + if end_time is None and i + 1 < len(raw_chapters): + end_time = raw_chapters[i + 1]['start_time'] + elif end_time is None: + end_time = info.get('duration', 0) + chapters.append(Chapter( + title=ch.get('title', f'Chapter {i + 1}'), + start_time=ch.get('start_time', 0), + end_time=end_time, + )) + + # Determine source type + if 'youtube' in info.get('extractor', '').lower(): + source_type = VideoSourceType.YOUTUBE + elif 'vimeo' in info.get('extractor', '').lower(): + source_type = VideoSourceType.VIMEO + else: + source_type = VideoSourceType.LOCAL_FILE + + return VideoInfo( + video_id=info.get('id', target.video_id), + source_type=source_type, + source_url=info.get('webpage_url', target.url), + file_path=target.path, + title=info.get('title', target.title or 'Untitled'), + description=info.get('description', ''), + duration=info.get('duration', 0.0), + upload_date=_parse_date(info.get('upload_date')), + language=info.get('language', 'unknown'), + channel_name=info.get('uploader') or info.get('channel'), + channel_url=info.get('uploader_url') or info.get('channel_url'), + channel_subscriber_count=info.get('channel_follower_count'), + view_count=info.get('view_count'), + like_count=info.get('like_count'), + comment_count=info.get('comment_count'), + tags=info.get('tags') or [], + categories=info.get('categories') or [], + thumbnail_url=info.get('thumbnail'), + chapters=chapters, + playlist_title=target.playlist_title, + playlist_index=target.playlist_index, + playlist_total=target.playlist_total, + raw_transcript=[], # Populated in Phase 3 + segments=[], # Populated in Phase 5 + transcript_source=TranscriptSource.NONE, # Updated in Phase 3 + visual_extraction_enabled=False, # Updated in Phase 4 + whisper_model=None, + processing_time_seconds=0.0, + extracted_at='', + transcript_confidence=0.0, + content_richness_score=0.0, + ) +``` + +--- + +## Phase 3: Transcript Extraction + +**Purpose:** Extract the spoken content of the video as timestamped text. + +### Decision Tree + +``` +get_transcript(video_info, config) -> list[TranscriptSegment]: + + IF video is YouTube: + TRY youtube-transcript-api: + 1. List available transcripts + 2. Prefer manual captions in user's language + 3. Fall back to auto-generated captions + 4. Fall back to translated captions + IF success: + SET transcript_source = YOUTUBE_MANUAL or YOUTUBE_AUTO + RETURN parsed transcript segments + + IF youtube-transcript-api fails: + TRY yt-dlp subtitle download: + 1. Download subtitle in best available format (VTT preferred) + 2. Parse VTT/SRT into segments + IF success: + SET transcript_source = SUBTITLE_FILE + RETURN parsed transcript segments + + IF video is local AND has sidecar subtitle file: + 1. Parse SRT/VTT file into segments + SET transcript_source = SUBTITLE_FILE + RETURN parsed transcript segments + + IF no transcript found AND Whisper is available: + 1. Extract audio from video (yt-dlp for online, ffmpeg for local) + 2. Run faster-whisper with word_timestamps=True + 3. Parse Whisper output into TranscriptSegment objects + SET transcript_source = WHISPER + RETURN parsed transcript segments + + IF no transcript and no Whisper: + LOG warning: "No transcript available for {video_id}" + SET transcript_source = NONE + RETURN empty list +``` + +### YouTube Transcript Extraction (Detail) + +```python +def extract_youtube_transcript( + video_id: str, + preferred_languages: list[str] | None = None, + confidence_threshold: float = 0.3, +) -> tuple[list[TranscriptSegment], TranscriptSource]: + """Extract transcript from YouTube captions. + + Priority: + 1. Manual captions in preferred language + 2. Manual captions in any language (with translation) + 3. Auto-generated captions in preferred language + 4. Auto-generated captions in any language (with translation) + """ + preferred_languages = preferred_languages or ['en'] + + try: + transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) + except Exception as e: + raise TranscriptNotAvailable(f"No transcripts for {video_id}: {e}") + + # Strategy 1: Manual captions in preferred language + transcript = None + source = TranscriptSource.YOUTUBE_MANUAL + try: + transcript = transcript_list.find_manually_created_transcript(preferred_languages) + except Exception: + pass + + # Strategy 2: Auto-generated in preferred language + if transcript is None: + source = TranscriptSource.YOUTUBE_AUTO + try: + transcript = transcript_list.find_generated_transcript(preferred_languages) + except Exception: + pass + + # Strategy 3: Any manual caption, translated + if transcript is None: + source = TranscriptSource.YOUTUBE_MANUAL + for t in transcript_list: + if not t.is_generated: + try: + transcript = t.translate(preferred_languages[0]) + break + except Exception: + continue + + # Strategy 4: Any auto-generated, translated + if transcript is None: + source = TranscriptSource.YOUTUBE_AUTO + for t in transcript_list: + if t.is_generated: + try: + transcript = t.translate(preferred_languages[0]) + break + except Exception: + continue + + if transcript is None: + raise TranscriptNotAvailable(f"No usable transcript for {video_id}") + + # Fetch and parse + raw_data = transcript.fetch() + segments = [] + for item in raw_data: + confidence = 1.0 if source == TranscriptSource.YOUTUBE_MANUAL else 0.8 + segments.append(TranscriptSegment( + text=item['text'], + start=item['start'], + end=item['start'] + item.get('duration', 0), + confidence=confidence, + words=None, # YouTube API doesn't provide word-level + source=source, + )) + + return segments, source +``` + +### Whisper Transcription (Detail) + +```python +def transcribe_with_whisper( + video_info: VideoInfo, + config: VideoSourceConfig, + output_dir: str, +) -> tuple[list[TranscriptSegment], str]: + """Transcribe video audio using faster-whisper. + + Steps: + 1. Extract audio from video (download if online) + 2. Load Whisper model + 3. Transcribe with word-level timestamps + 4. Convert to TranscriptSegment objects + + Returns: + (segments, model_name) tuple + """ + # Step 1: Get audio file + if video_info.source_url and not video_info.file_path: + # Download audio only (no video) + audio_path = download_audio_only( + video_info.source_url, + output_dir=output_dir, + ) + elif video_info.file_path: + # Extract audio from local file + audio_path = extract_audio_ffmpeg( + video_info.file_path, + output_dir=output_dir, + ) + else: + raise ValueError("No source URL or file path available") + + # Step 2: Load model + model = WhisperModel( + config.whisper_model, + device=config.whisper_device, + compute_type="auto", + ) + + # Step 3: Transcribe + whisper_segments, info = model.transcribe( + audio_path, + word_timestamps=True, + vad_filter=True, + vad_parameters={"min_silence_duration_ms": 500}, + language=video_info.language if video_info.language != 'unknown' else None, + ) + + # Update video language if detected + if video_info.language == 'unknown': + video_info.language = info.language + + # Step 4: Convert to our format + segments = [] + for seg in whisper_segments: + words = [] + if seg.words: + for w in seg.words: + words.append(WordTimestamp( + word=w.word.strip(), + start=w.start, + end=w.end, + probability=w.probability, + )) + + segments.append(TranscriptSegment( + text=seg.text.strip(), + start=seg.start, + end=seg.end, + confidence=_compute_segment_confidence(seg), + words=words if words else None, + source=TranscriptSource.WHISPER, + )) + + # Cleanup audio file + if os.path.exists(audio_path): + os.remove(audio_path) + + return segments, config.whisper_model + + +def download_audio_only(url: str, output_dir: str) -> str: + """Download only the audio stream using yt-dlp. + + Converts to WAV at 16kHz mono (Whisper's native format). + This is 10-50x smaller than downloading full video. + """ + opts = { + 'format': 'bestaudio/best', + 'postprocessors': [{ + 'key': 'FFmpegExtractAudio', + 'preferredcodec': 'wav', + }], + 'postprocessor_args': { + 'ffmpeg': ['-ar', '16000', '-ac', '1'], # 16kHz mono + }, + 'outtmpl': f'{output_dir}/audio_%(id)s.%(ext)s', + 'quiet': True, + 'no_warnings': True, + } + with YoutubeDL(opts) as ydl: + info = ydl.extract_info(url, download=True) + return f"{output_dir}/audio_{info['id']}.wav" + + +def extract_audio_ffmpeg(video_path: str, output_dir: str) -> str: + """Extract audio from local video file using FFmpeg. + + Converts to WAV at 16kHz mono for Whisper. + """ + stem = Path(video_path).stem + output_path = f"{output_dir}/audio_{stem}.wav" + subprocess.run([ + 'ffmpeg', '-i', video_path, + '-vn', # No video + '-ar', '16000', # 16kHz sample rate + '-ac', '1', # Mono + '-f', 'wav', # WAV format + output_path, + '-y', # Overwrite + '-loglevel', 'quiet', + ], check=True) + return output_path +``` + +### Subtitle File Parsing + +```python +def parse_subtitle_file(subtitle_path: str) -> list[TranscriptSegment]: + """Parse SRT or VTT subtitle file into transcript segments. + + Supports: + - SRT (.srt): SubRip format + - VTT (.vtt): WebVTT format + """ + ext = Path(subtitle_path).suffix.lower() + + if ext == '.srt': + return _parse_srt(subtitle_path) + elif ext == '.vtt': + return _parse_vtt(subtitle_path) + else: + raise ValueError(f"Unsupported subtitle format: {ext}") + + +def _parse_srt(path: str) -> list[TranscriptSegment]: + """Parse SRT subtitle file. + + SRT format: + 1 + 00:00:01,500 --> 00:00:04,000 + Welcome to the tutorial + + 2 + 00:00:04,500 --> 00:00:07,000 + Today we'll learn React + """ + segments = [] + with open(path, 'r', encoding='utf-8') as f: + content = f.read() + + blocks = content.strip().split('\n\n') + for block in blocks: + lines = block.strip().split('\n') + if len(lines) < 3: + continue + + # Parse timestamp line + time_line = lines[1] + start_str, end_str = time_line.split(' --> ') + start = _srt_time_to_seconds(start_str.strip()) + end = _srt_time_to_seconds(end_str.strip()) + + # Join text lines + text = ' '.join(lines[2:]).strip() + # Remove HTML tags + text = re.sub(r'<[^>]+>', '', text) + + segments.append(TranscriptSegment( + text=text, + start=start, + end=end, + confidence=1.0, # Subtitle files assumed accurate + words=None, + source=TranscriptSource.SUBTITLE_FILE, + )) + + return segments +``` + +--- + +## Phase 4: Visual Extraction + +**Purpose:** Extract and analyze visual content (code, slides, diagrams) from video frames. + +**This phase is OPTIONAL** — only runs when `visual_extraction=True` in config or `--visual` CLI flag. + +### Algorithm + +``` +extract_visual_content(video_info, config) -> list[KeyFrame]: + + 1. GET VIDEO FILE: + - If local file: use directly + - If online: download video (lowest sufficient resolution) + + 2. DETECT SCENE BOUNDARIES: + - Run PySceneDetect ContentDetector on video + - Get list of (start_time, end_time) for each scene + - Filter by min_scene_change_score + + 3. SELECT KEYFRAME TIMESTAMPS: + For each segment (from chapters or scene boundaries): + - Add frame at segment start + - Add frames at scene change points within segment + - Add frames at regular intervals (keyframe_interval seconds) + Deduplicate timestamps within 1-second window + + 4. EXTRACT FRAMES: + For each selected timestamp: + - Use OpenCV to extract frame at exact timestamp + - Save as PNG to video_data/frames/{video_id}/ + + 5. CLASSIFY FRAMES: + For each extracted frame: + - Run frame classifier (heuristic-based): + - Brightness analysis → dark bg = code/terminal + - Edge density → high = diagram + - Color distribution → uniform = slide + - Face detection → webcam + - Set frame_type + + 6. OCR ON RELEVANT FRAMES: + For each frame where frame_type in (code_editor, terminal, slide, diagram): + - Run easyocr with appropriate languages + - Parse OCR results into OCRRegion objects + - Detect monospace text (code indicator) + - Filter by confidence threshold + - Combine regions into KeyFrame.ocr_text + + 7. DETECT CODE BLOCKS: + For frames classified as code_editor or terminal: + - Group contiguous monospace OCR regions + - Detect programming language (reuse detect_language from doc_scraper) + - Create CodeBlock objects + + 8. CLEANUP: + - Remove downloaded video file (if downloaded) + - Keep extracted frame images (for reference) + + RETURN list of KeyFrame objects with all analysis populated +``` + +### Scene Detection Detail + +```python +def detect_keyframe_timestamps( + video_path: str, + chapters: list[Chapter], + config: VideoSourceConfig, +) -> list[float]: + """Determine which timestamps to extract frames at. + + Combines: + 1. Chapter boundaries + 2. Scene change detection + 3. Regular intervals + + Returns sorted, deduplicated list of timestamps in seconds. + """ + timestamps = set() + + # Source 1: Chapter boundaries + for chapter in chapters: + timestamps.add(chapter.start_time) + # Also add midpoint for long chapters + if chapter.duration > 120: # > 2 minutes + timestamps.add(chapter.start_time + chapter.duration / 2) + + # Source 2: Scene change detection + scene_list = detect( + video_path, + ContentDetector(threshold=27.0, min_scene_len=30), + ) + for scene_start, scene_end in scene_list: + ts = scene_start.get_seconds() + timestamps.add(ts) + + # Source 3: Regular intervals (fill gaps) + duration = get_video_duration(video_path) + interval = config.keyframe_interval + t = 0.0 + while t < duration: + timestamps.add(t) + t += interval + + # Sort and deduplicate (merge timestamps within 1 second) + sorted_ts = sorted(timestamps) + deduped = [sorted_ts[0]] if sorted_ts else [] + for ts in sorted_ts[1:]: + if ts - deduped[-1] >= 1.0: + deduped.append(ts) + + return deduped +``` + +### Frame Classification Detail + +```python +def classify_frame(image_path: str) -> FrameType: + """Classify a video frame based on visual characteristics. + + Uses heuristic analysis: + - Background brightness (dark = code/terminal) + - Text density and layout + - Color distribution + - Edge patterns + + This is a fast, deterministic classifier. More accurate + classification could use a trained CNN, but heuristics + are sufficient for our use case and run in <10ms per frame. + """ + img = cv2.imread(image_path) + if img is None: + return FrameType.OTHER + + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + h, w = gray.shape + hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) + + # === Metrics === + mean_brightness = float(np.mean(gray)) + brightness_std = float(np.std(gray)) + saturation_mean = float(np.mean(hsv[:, :, 1])) + + # Edge analysis + edges = cv2.Canny(gray, 50, 150) + edge_density = float(np.count_nonzero(edges)) / (h * w) + + # Top and bottom bar detection (common in slides) + top_strip = gray[:int(h * 0.1), :] + bottom_strip = gray[int(h * 0.9):, :] + top_uniform = float(np.std(top_strip)) < 20 + bottom_uniform = float(np.std(bottom_strip)) < 20 + + # === Classification Rules === + + # Dark background with structured content → code or terminal + if mean_brightness < 80: + if edge_density > 0.05: + # Has text/content on dark background + if brightness_std > 50: + return FrameType.CODE_EDITOR + else: + return FrameType.TERMINAL + else: + return FrameType.OTHER # Just a dark frame + + # Light background, uniform, with text → slide + if mean_brightness > 170 and brightness_std < 60 and saturation_mean < 50: + if edge_density > 0.03: + return FrameType.SLIDE + else: + return FrameType.OTHER # Blank/near-blank frame + + # High edge density with moderate brightness → diagram + if edge_density > 0.15 and 80 < mean_brightness < 200: + return FrameType.DIAGRAM + + # Browser detection (address bar pattern) + # Look for horizontal line near top of frame + top_section = gray[:int(h * 0.15), :] + horizontal_lines = cv2.HoughLinesP( + cv2.Canny(top_section, 50, 150), + 1, np.pi / 180, threshold=100, + minLineLength=int(w * 0.3), maxLineGap=10 + ) + if horizontal_lines is not None and len(horizontal_lines) > 0: + return FrameType.BROWSER + + # Moderate brightness, some edges → general screencast + if 80 < mean_brightness < 200 and edge_density > 0.02: + return FrameType.SCREENCAST + + return FrameType.OTHER +``` + +--- + +## Phase 5: Segmentation & Alignment + +**Purpose:** Combine the 3 streams (ASR + OCR + metadata) into structured `VideoSegment` objects aligned on the timeline. + +### Segmentation Strategy + +``` +determine_segments(video_info, config) -> list[TimeWindow]: + + STRATEGY 1 - CHAPTERS (preferred): + IF video has YouTube chapters: + Use chapter boundaries directly + Each chapter → one segment + May split long chapters (> max_segment_duration) + + STRATEGY 2 - HYBRID (default): + IF chapters available but sparse: + Use chapters as primary boundaries + Add scene change boundaries between chapters + Merge very short scenes (< min_segment_duration) + + STRATEGY 3 - TIME WINDOW (fallback): + IF no chapters and no good scene boundaries: + Split into fixed-duration windows (config.time_window_seconds) + Try to split at sentence boundaries in transcript + Avoid splitting mid-sentence +``` + +### Alignment Algorithm + +``` +align_streams( + time_windows: list[TimeWindow], + transcript: list[TranscriptSegment], + keyframes: list[KeyFrame], # May be empty if visual extraction disabled + chapters: list[Chapter], +) -> list[VideoSegment]: + + For each time_window: + 1. COLLECT TRANSCRIPT for this window: + - Find all TranscriptSegments that overlap with [start, end] + - For partial overlaps, include full segment if >50% overlaps + - Concatenate text, collect words + - Compute average confidence + + 2. COLLECT KEYFRAMES for this window: + - Find all KeyFrames where timestamp in [start, end] + - Already classified and OCR'd in Phase 4 + + 3. COLLECT OCR TEXT: + - Gather ocr_text from all keyframes in window + - Deduplicate (same text in consecutive frames) + - Identify code blocks + + 4. MAP CHAPTER: + - Find chapter that best overlaps this window + - Set chapter_title + + 5. DETERMINE CONTENT TYPE: + - If has_code_on_screen and transcript mentions coding → LIVE_CODING + - If has_slides → SLIDES + - If mostly talking with no visual → EXPLANATION + - etc. + + 6. GENERATE MERGED CONTENT: + - Start with transcript text + - If code on screen not mentioned in transcript: + Append: "\n\n**Code shown on screen:**\n```{language}\n{code}\n```" + - If slide text adds info beyond transcript: + Append: "\n\n**Slide content:**\n{slide_text}" + - Prepend chapter title as heading if present + + 7. DETECT CATEGORY: + - Use smart_categorize logic from doc_scraper + - Match chapter_title and transcript against category keywords + - Set segment.category + + 8. CREATE VideoSegment with all populated fields +``` + +### Content Merging Detail + +```python +def merge_segment_content( + transcript: str, + keyframes: list[KeyFrame], + code_blocks: list[CodeBlock], + chapter_title: str | None, + start_time: float, + end_time: float, +) -> str: + """Generate the final merged content for a segment. + + Merging rules: + 1. Chapter title becomes a heading with timestamp + 2. Transcript is the primary content + 3. Code blocks are inserted where contextually relevant + 4. Slide/diagram text supplements the transcript + 5. Duplicate information is not repeated + """ + parts = [] + + # Heading + timestamp_str = _format_timestamp(start_time, end_time) + if chapter_title: + parts.append(f"### {chapter_title} ({timestamp_str})\n") + else: + parts.append(f"### Segment ({timestamp_str})\n") + + # Transcript (cleaned) + cleaned_transcript = _clean_transcript(transcript) + if cleaned_transcript: + parts.append(cleaned_transcript) + + # Code blocks (if not already mentioned in transcript) + for cb in code_blocks: + # Check if code content appears in transcript already + code_snippet = cb.code[:50] # First 50 chars + if code_snippet.lower() not in transcript.lower(): + lang = cb.language or '' + context_label = { + CodeContext.EDITOR: "Code shown in editor", + CodeContext.TERMINAL: "Terminal command", + CodeContext.SLIDE: "Code from slide", + CodeContext.BROWSER: "Code from browser", + }.get(cb.context, "Code shown on screen") + + parts.append(f"\n**{context_label}:**") + parts.append(f"```{lang}\n{cb.code}\n```") + + # Slide text (supplementary) + slide_frames = [kf for kf in keyframes if kf.frame_type == FrameType.SLIDE] + for sf in slide_frames: + if sf.ocr_text and sf.ocr_text.lower() not in transcript.lower(): + parts.append(f"\n**Slide:**\n{sf.ocr_text}") + + return '\n\n'.join(parts) +``` + +--- + +## Phase 6: Output Generation + +**Purpose:** Convert processed VideoInfo and VideoSegments into reference files and SKILL.md integration. + +See **[05_VIDEO_OUTPUT.md](./05_VIDEO_OUTPUT.md)** for full output format specification. + +### Summary of Outputs + +``` +output/{skill_name}/ +├── references/ +│ ├── video_{sanitized_title}.md # One per video, contains all segments +│ └── ... +├── video_data/ +│ ├── metadata.json # All video metadata (VideoScraperResult) +│ ├── transcripts/ +│ │ ├── {video_id}.json # Raw transcript per video +│ │ └── ... +│ ├── segments/ +│ │ ├── {video_id}_segments.json # Aligned segments per video +│ │ └── ... +│ └── frames/ # Only if visual extraction enabled +│ ├── {video_id}/ +│ │ ├── frame_000.00.png +│ │ └── ... +│ └── ... +└── pages/ + └── video_{video_id}.json # Page format for compatibility +``` + +--- + +## Error Handling + +### Error Categories + +| Error | Severity | Strategy | +|-------|----------|----------| +| Video not found (404) | Per-video | Skip, log warning, continue with others | +| Private/restricted video | Per-video | Skip, log warning | +| No transcript available | Per-video | Try Whisper fallback, then skip | +| Whisper model download fails | Fatal for Whisper | Fall back to no-transcript mode | +| FFmpeg not installed | Fatal for Whisper/visual | Clear error message with install instructions | +| Rate limited (YouTube) | Temporary | Exponential backoff, retry 3 times | +| Network timeout | Temporary | Retry 3 times with increasing timeout | +| Corrupt video file | Per-video | Skip, log error | +| OCR fails on frame | Per-frame | Skip frame, continue with others | +| Out of disk space | Fatal | Check space before download, clear error | +| GPU out of memory | Per-video | Fall back to CPU, log warning | + +### Error Reporting + +```python +@dataclass +class VideoError: + """Error encountered during video processing.""" + video_id: str + video_title: str + phase: str # 'resolve', 'metadata', 'transcript', 'visual', 'segment' + error_type: str # 'not_found', 'private', 'no_transcript', 'network', etc. + message: str + recoverable: bool + timestamp: str # ISO 8601 + + def to_dict(self) -> dict: + return { + 'video_id': self.video_id, + 'video_title': self.video_title, + 'phase': self.phase, + 'error_type': self.error_type, + 'message': self.message, + 'recoverable': self.recoverable, + } +``` + +--- + +## Caching Strategy + +### What Gets Cached + +| Data | Cache Key | Location | TTL | +|------|-----------|----------|-----| +| yt-dlp metadata | `{video_id}_meta.json` | `video_data/cache/` | 7 days | +| YouTube transcript | `{video_id}_transcript.json` | `video_data/cache/` | 7 days | +| Whisper transcript | `{video_id}_whisper_{model}.json` | `video_data/cache/` | Permanent | +| Keyframes | `{video_id}/frame_*.png` | `video_data/frames/` | Permanent | +| OCR results | `{video_id}_ocr.json` | `video_data/cache/` | Permanent | +| Aligned segments | `{video_id}_segments.json` | `video_data/segments/` | Permanent | + +### Cache Invalidation + +- Metadata cache: Invalidated after 7 days (engagement numbers change) +- Transcript cache: Invalidated if video is re-uploaded or captions updated +- Whisper cache: Only invalidated if model changes +- Visual cache: Only invalidated if config changes (different threshold, interval) + +### Resume Support + +Video processing integrates with the existing `resume_command.py`: +- Progress saved after each video completes +- On resume: skip already-processed videos +- Resume point: per-video granularity + +--- + +## Performance Optimization + +### Parallel Processing + +``` +For a playlist of N videos: + +Sequential bottleneck: Whisper transcription (GPU-bound) +Parallelizable: YouTube API calls, metadata extraction, OCR + +Approach: +1. Phase 1-2 (resolve + metadata): Parallel HTTP requests (ThreadPool, max 5) +2. Phase 3 (transcript): + - YouTube API calls: Parallel (ThreadPool, max 10) + - Whisper: Sequential (GPU is the bottleneck) +3. Phase 4 (visual): Sequential per video (GPU-bound for OCR) +4. Phase 5-6 (segment + output): Parallel per video (CPU-bound, fast) +``` + +### Memory Management + +- **Whisper model:** Load once, reuse across videos. Unload after all videos processed. +- **easyocr Reader:** Load once, reuse across frames. Unload after visual extraction. +- **OpenCV VideoCapture:** Open per video, close immediately after frame extraction. +- **Frames:** Save to disk immediately, don't hold in memory. + +### Disk Space Management + +| Content | Size per 30 min video | Notes | +|---------|----------------------|-------| +| Audio WAV (16kHz mono) | ~55 MB | Temporary, deleted after Whisper | +| Keyframes (50 frames) | ~15 MB | Permanent, compressed PNG | +| Transcript JSON | ~50 KB | Small | +| Segments JSON | ~100 KB | Small | +| Downloaded video (if needed) | ~200-500 MB | Temporary, deleted after visual extraction | + +**Total permanent storage per video:** ~15-20 MB (with visual extraction), ~200 KB (transcript only). diff --git a/docs/plans/video/04_VIDEO_INTEGRATION.md b/docs/plans/video/04_VIDEO_INTEGRATION.md new file mode 100644 index 0000000..8c403e0 --- /dev/null +++ b/docs/plans/video/04_VIDEO_INTEGRATION.md @@ -0,0 +1,808 @@ +# Video Source — System Integration + +**Date:** February 27, 2026 +**Document:** 04 of 07 +**Status:** Planning + +--- + +## Table of Contents + +1. [CLI Integration](#cli-integration) +2. [Source Detection](#source-detection) +3. [Unified Config Integration](#unified-config-integration) +4. [Unified Scraper Integration](#unified-scraper-integration) +5. [Create Command Integration](#create-command-integration) +6. [Parser & Arguments](#parser--arguments) +7. [MCP Tool Integration](#mcp-tool-integration) +8. [Enhancement Integration](#enhancement-integration) +9. [File Map (New & Modified)](#file-map-new--modified-files) + +--- + +## CLI Integration + +### New Subcommand: `video` + +```bash +# Dedicated video scraping command +skill-seekers video --url https://youtube.com/watch?v=abc123 +skill-seekers video --playlist https://youtube.com/playlist?list=PLxxx +skill-seekers video --channel https://youtube.com/@channelname +skill-seekers video --path ./recording.mp4 +skill-seekers video --directory ./recordings/ + +# With options +skill-seekers video --url \ + --output output/react-videos/ \ + --visual \ + --whisper-model large-v3 \ + --max-videos 20 \ + --languages en \ + --categories '{"hooks": ["useState", "useEffect"]}' \ + --enhance-level 2 +``` + +### Auto-Detection via `create` Command + +```bash +# These all auto-detect as video sources +skill-seekers create https://youtube.com/watch?v=abc123 +skill-seekers create https://youtu.be/abc123 +skill-seekers create https://youtube.com/playlist?list=PLxxx +skill-seekers create https://youtube.com/@channelname +skill-seekers create https://vimeo.com/123456789 +skill-seekers create ./tutorial.mp4 +skill-seekers create ./recordings/ # Directory of videos + +# With universal flags +skill-seekers create https://youtube.com/watch?v=abc123 --visual -p comprehensive +skill-seekers create ./tutorial.mp4 --enhance-level 2 --dry-run +``` + +### Registration in main.py + +```python +# In src/skill_seekers/cli/main.py - COMMAND_MODULES dict + +COMMAND_MODULES = { + # ... existing commands ... + 'video': 'skill_seekers.cli.video_scraper', + # ... rest of commands ... +} +``` + +--- + +## Source Detection + +### Changes to `source_detector.py` + +```python +# New patterns to add: + +class SourceDetector: + # Existing patterns... + + # NEW: Video URL patterns + YOUTUBE_VIDEO_PATTERN = re.compile( + r'(?:https?://)?(?:www\.)?' + r'(?:youtube\.com/watch\?v=|youtu\.be/)' + r'([a-zA-Z0-9_-]{11})' + ) + YOUTUBE_PLAYLIST_PATTERN = re.compile( + r'(?:https?://)?(?:www\.)?' + r'youtube\.com/playlist\?list=([a-zA-Z0-9_-]+)' + ) + YOUTUBE_CHANNEL_PATTERN = re.compile( + r'(?:https?://)?(?:www\.)?' + r'youtube\.com/(?:@|c/|channel/|user/)([a-zA-Z0-9_.-]+)' + ) + VIMEO_PATTERN = re.compile( + r'(?:https?://)?(?:www\.)?vimeo\.com/(\d+)' + ) + + # Video file extensions + VIDEO_EXTENSIONS = { + '.mp4', '.mkv', '.webm', '.avi', '.mov', + '.flv', '.ts', '.wmv', '.m4v', '.ogv', + } + + @classmethod + def detect(cls, source: str) -> SourceInfo: + """Updated detection order: + 1. .json (config) + 2. .pdf + 3. .docx + 4. Video file extensions (.mp4, .mkv, .webm, etc.) ← NEW + 5. Directory (may contain videos) + 6. YouTube/Vimeo URL patterns ← NEW + 7. GitHub patterns + 8. Web URL + 9. Domain inference + """ + # 1. Config file + if source.endswith('.json'): + return cls._detect_config(source) + + # 2. PDF file + if source.endswith('.pdf'): + return cls._detect_pdf(source) + + # 3. Word document + if source.endswith('.docx'): + return cls._detect_word(source) + + # 4. NEW: Video file + ext = os.path.splitext(source)[1].lower() + if ext in cls.VIDEO_EXTENSIONS: + return cls._detect_video_file(source) + + # 5. Directory + if os.path.isdir(source): + # Check if directory contains mostly video files + if cls._is_video_directory(source): + return cls._detect_video_directory(source) + return cls._detect_local(source) + + # 6. NEW: Video URL patterns (before general web URL) + video_info = cls._detect_video_url(source) + if video_info: + return video_info + + # 7. GitHub patterns + github_info = cls._detect_github(source) + if github_info: + return github_info + + # 8. Web URL + if source.startswith('http://') or source.startswith('https://'): + return cls._detect_web(source) + + # 9. Domain inference + if '.' in source and not source.startswith('/'): + return cls._detect_web(f'https://{source}') + + raise ValueError( + f"Cannot determine source type for: {source}\n\n" + "Examples:\n" + " Web: skill-seekers create https://docs.react.dev/\n" + " GitHub: skill-seekers create facebook/react\n" + " Local: skill-seekers create ./my-project\n" + " PDF: skill-seekers create tutorial.pdf\n" + " DOCX: skill-seekers create document.docx\n" + " Video: skill-seekers create https://youtube.com/watch?v=xxx\n" # NEW + " Playlist: skill-seekers create https://youtube.com/playlist?list=xxx\n" # NEW + " Config: skill-seekers create configs/react.json" + ) + + @classmethod + def _detect_video_url(cls, source: str) -> SourceInfo | None: + """Detect YouTube or Vimeo video URL.""" + + # YouTube video + match = cls.YOUTUBE_VIDEO_PATTERN.search(source) + if match: + video_id = match.group(1) + return SourceInfo( + type='video', + parsed={ + 'video_source': 'youtube_video', + 'video_id': video_id, + 'url': f'https://www.youtube.com/watch?v={video_id}', + }, + suggested_name=f'video-{video_id}', + raw_input=source, + ) + + # YouTube playlist + match = cls.YOUTUBE_PLAYLIST_PATTERN.search(source) + if match: + playlist_id = match.group(1) + return SourceInfo( + type='video', + parsed={ + 'video_source': 'youtube_playlist', + 'playlist_id': playlist_id, + 'url': f'https://www.youtube.com/playlist?list={playlist_id}', + }, + suggested_name=f'playlist-{playlist_id[:12]}', + raw_input=source, + ) + + # YouTube channel + match = cls.YOUTUBE_CHANNEL_PATTERN.search(source) + if match: + channel_name = match.group(1) + return SourceInfo( + type='video', + parsed={ + 'video_source': 'youtube_channel', + 'channel': channel_name, + 'url': source if source.startswith('http') else f'https://www.youtube.com/@{channel_name}', + }, + suggested_name=channel_name.lstrip('@'), + raw_input=source, + ) + + # Vimeo + match = cls.VIMEO_PATTERN.search(source) + if match: + video_id = match.group(1) + return SourceInfo( + type='video', + parsed={ + 'video_source': 'vimeo', + 'video_id': video_id, + 'url': f'https://vimeo.com/{video_id}', + }, + suggested_name=f'vimeo-{video_id}', + raw_input=source, + ) + + return None + + @classmethod + def _detect_video_file(cls, source: str) -> SourceInfo: + """Detect local video file.""" + name = os.path.splitext(os.path.basename(source))[0] + return SourceInfo( + type='video', + parsed={ + 'video_source': 'local_file', + 'file_path': os.path.abspath(source), + }, + suggested_name=name, + raw_input=source, + ) + + @classmethod + def _detect_video_directory(cls, source: str) -> SourceInfo: + """Detect directory containing video files.""" + directory = os.path.abspath(source) + name = os.path.basename(directory) + return SourceInfo( + type='video', + parsed={ + 'video_source': 'local_directory', + 'directory': directory, + }, + suggested_name=name, + raw_input=source, + ) + + @classmethod + def _is_video_directory(cls, path: str) -> bool: + """Check if a directory contains mostly video files. + + Returns True if >50% of files are video files. + Used to distinguish video directories from code directories. + """ + total = 0 + video = 0 + for f in os.listdir(path): + if os.path.isfile(os.path.join(path, f)): + total += 1 + ext = os.path.splitext(f)[1].lower() + if ext in cls.VIDEO_EXTENSIONS: + video += 1 + return total > 0 and (video / total) > 0.5 + + @classmethod + def validate_source(cls, source_info: SourceInfo) -> None: + """Updated to include video validation.""" + # ... existing validation ... + + if source_info.type == 'video': + video_source = source_info.parsed.get('video_source') + if video_source == 'local_file': + file_path = source_info.parsed['file_path'] + if not os.path.exists(file_path): + raise ValueError(f"Video file does not exist: {file_path}") + elif video_source == 'local_directory': + directory = source_info.parsed['directory'] + if not os.path.exists(directory): + raise ValueError(f"Video directory does not exist: {directory}") + # For online sources, validation happens during scraping +``` + +--- + +## Unified Config Integration + +### Updated `scraped_data` dict in `unified_scraper.py` + +```python +# In UnifiedScraper.__init__(): +self.scraped_data = { + "documentation": [], + "github": [], + "pdf": [], + "word": [], + "local": [], + "video": [], # ← NEW +} +``` + +### Video Source Processing in Unified Scraper + +```python +def _scrape_video_source(self, source: dict, source_index: int) -> dict: + """Process a video source from unified config. + + Args: + source: Video source config dict from unified JSON + source_index: Index for unique naming + + Returns: + Dict with scraping results and metadata + """ + from skill_seekers.cli.video_scraper import VideoScraper + from skill_seekers.cli.video_models import VideoSourceConfig + + config = VideoSourceConfig.from_dict(source) + scraper = VideoScraper(config=config, output_dir=self.output_dir) + + result = scraper.scrape() + + return { + 'source_type': 'video', + 'source_name': source.get('name', f'video_{source_index}'), + 'weight': source.get('weight', 0.2), + 'result': result, + 'video_count': len(result.videos), + 'segment_count': result.total_segments, + 'categories': result.categories, + } +``` + +### Example Unified Config with Video + +```json +{ + "name": "react-complete", + "description": "React 19 - Documentation + Code + Video Tutorials", + "output_dir": "output/react-complete/", + + "sources": [ + { + "type": "documentation", + "url": "https://react.dev/", + "name": "official_docs", + "weight": 0.4, + "selectors": { + "main_content": "article", + "code_blocks": "pre code" + }, + "categories": { + "getting_started": ["learn", "quick-start"], + "hooks": ["hooks", "use-state", "use-effect"], + "api": ["reference", "api"] + } + }, + { + "type": "github", + "repo": "facebook/react", + "name": "source_code", + "weight": 0.3, + "analysis_depth": "deep" + }, + { + "type": "video", + "playlist": "https://www.youtube.com/playlist?list=PLreactplaylist", + "name": "official_tutorials", + "weight": 0.2, + "max_videos": 15, + "visual_extraction": true, + "languages": ["en"], + "categories": { + "getting_started": ["intro", "quickstart", "setup"], + "hooks": ["useState", "useEffect", "hooks"], + "advanced": ["suspense", "concurrent", "server"] + } + }, + { + "type": "video", + "url": "https://www.youtube.com/watch?v=abc123def45", + "name": "react_conf_keynote", + "weight": 0.1, + "visual_extraction": false + } + ], + + "merge_strategy": "unified", + "conflict_resolution": "docs_first", + + "enhancement": { + "enabled": true, + "level": 2 + } +} +``` + +--- + +## Create Command Integration + +### Changes to Create Command Routing + +```python +# In src/skill_seekers/cli/create_command.py (or equivalent in main.py) + +def route_source(source_info: SourceInfo, args: argparse.Namespace): + """Route detected source to appropriate scraper.""" + + if source_info.type == 'web': + return _route_web(source_info, args) + elif source_info.type == 'github': + return _route_github(source_info, args) + elif source_info.type == 'local': + return _route_local(source_info, args) + elif source_info.type == 'pdf': + return _route_pdf(source_info, args) + elif source_info.type == 'word': + return _route_word(source_info, args) + elif source_info.type == 'video': # ← NEW + return _route_video(source_info, args) + elif source_info.type == 'config': + return _route_config(source_info, args) + + +def _route_video(source_info: SourceInfo, args: argparse.Namespace): + """Route video source to video scraper.""" + from skill_seekers.cli.video_scraper import VideoScraper + from skill_seekers.cli.video_models import VideoSourceConfig + + parsed = source_info.parsed + + # Build config from CLI args + parsed source info + config_dict = { + 'name': getattr(args, 'name', None) or source_info.suggested_name, + 'visual_extraction': getattr(args, 'visual', False), + 'whisper_model': getattr(args, 'whisper_model', 'base'), + 'max_videos': getattr(args, 'max_videos', 50), + 'languages': getattr(args, 'languages', None), + } + + # Set the appropriate source field + video_source = parsed['video_source'] + if video_source in ('youtube_video', 'vimeo'): + config_dict['url'] = parsed['url'] + elif video_source == 'youtube_playlist': + config_dict['playlist'] = parsed['url'] + elif video_source == 'youtube_channel': + config_dict['channel'] = parsed['url'] + elif video_source == 'local_file': + config_dict['path'] = parsed['file_path'] + elif video_source == 'local_directory': + config_dict['directory'] = parsed['directory'] + + config = VideoSourceConfig.from_dict(config_dict) + output_dir = getattr(args, 'output', None) or f'output/{config_dict["name"]}/' + + scraper = VideoScraper(config=config, output_dir=output_dir) + + if getattr(args, 'dry_run', False): + scraper.dry_run() + return + + result = scraper.scrape() + scraper.generate_output(result) +``` + +--- + +## Parser & Arguments + +### New Parser: `video_parser.py` + +```python +# src/skill_seekers/cli/parsers/video_parser.py + +from skill_seekers.cli.parsers.base import SubcommandParser + + +class VideoParser(SubcommandParser): + """Parser for the video scraping command.""" + + name = 'video' + help = 'Extract knowledge from YouTube videos, playlists, channels, or local video files' + description = ( + 'Process video content into structured skill documentation.\n\n' + 'Supports YouTube (single video, playlist, channel), Vimeo, and local video files.\n' + 'Extracts transcripts, metadata, chapters, and optionally visual content (code, slides).' + ) + + def add_arguments(self, parser): + # Source (mutually exclusive group) + source = parser.add_mutually_exclusive_group(required=True) + source.add_argument('--url', help='YouTube or Vimeo video URL') + source.add_argument('--playlist', help='YouTube playlist URL') + source.add_argument('--channel', help='YouTube channel URL') + source.add_argument('--path', help='Local video file path') + source.add_argument('--directory', help='Directory containing video files') + + # Add shared arguments (output, dry-run, verbose, etc.) + from skill_seekers.cli.arguments.common import add_all_standard_arguments + add_all_standard_arguments(parser) + + # Add video-specific arguments + from skill_seekers.cli.arguments.video import add_video_arguments + add_video_arguments(parser) +``` + +### New Arguments: `video.py` + +```python +# src/skill_seekers/cli/arguments/video.py + +VIDEO_ARGUMENTS = { + # === Filtering === + "max_videos": { + "flags": ("--max-videos",), + "kwargs": { + "type": int, + "default": 50, + "help": "Maximum number of videos to process (default: 50)", + }, + }, + "min_duration": { + "flags": ("--min-duration",), + "kwargs": { + "type": float, + "default": 60.0, + "help": "Minimum video duration in seconds (default: 60)", + }, + }, + "max_duration": { + "flags": ("--max-duration",), + "kwargs": { + "type": float, + "default": 7200.0, + "help": "Maximum video duration in seconds (default: 7200 = 2 hours)", + }, + }, + "languages": { + "flags": ("--languages",), + "kwargs": { + "nargs": "+", + "default": None, + "help": "Preferred transcript languages (default: all). Example: --languages en es", + }, + }, + "min_views": { + "flags": ("--min-views",), + "kwargs": { + "type": int, + "default": None, + "help": "Minimum view count filter (online videos only)", + }, + }, + + # === Extraction === + "visual": { + "flags": ("--visual",), + "kwargs": { + "action": "store_true", + "help": "Enable visual extraction (OCR on keyframes). Requires video-full dependencies.", + }, + }, + "whisper_model": { + "flags": ("--whisper-model",), + "kwargs": { + "default": "base", + "choices": ["tiny", "base", "small", "medium", "large-v3", "large-v3-turbo"], + "help": "Whisper model size for speech-to-text (default: base)", + }, + }, + "whisper_device": { + "flags": ("--whisper-device",), + "kwargs": { + "default": "auto", + "choices": ["auto", "cpu", "cuda"], + "help": "Device for Whisper inference (default: auto)", + }, + }, + "ocr_languages": { + "flags": ("--ocr-languages",), + "kwargs": { + "nargs": "+", + "default": None, + "help": "OCR languages for visual extraction (default: same as --languages)", + }, + }, + + # === Segmentation === + "segment_strategy": { + "flags": ("--segment-strategy",), + "kwargs": { + "default": "hybrid", + "choices": ["chapters", "semantic", "time_window", "scene_change", "hybrid"], + "help": "How to segment video content (default: hybrid)", + }, + }, + "segment_duration": { + "flags": ("--segment-duration",), + "kwargs": { + "type": float, + "default": 300.0, + "help": "Target segment duration in seconds for time_window strategy (default: 300)", + }, + }, + + # === Local file options === + "file_patterns": { + "flags": ("--file-patterns",), + "kwargs": { + "nargs": "+", + "default": None, + "help": "File patterns for directory scanning (default: *.mp4 *.mkv *.webm)", + }, + }, + "recursive": { + "flags": ("--recursive",), + "kwargs": { + "action": "store_true", + "default": True, + "help": "Recursively scan directories (default: True)", + }, + }, + "no_recursive": { + "flags": ("--no-recursive",), + "kwargs": { + "action": "store_true", + "help": "Disable recursive directory scanning", + }, + }, +} + + +def add_video_arguments(parser): + """Add all video-specific arguments to a parser.""" + for arg_name, arg_def in VIDEO_ARGUMENTS.items(): + parser.add_argument(*arg_def["flags"], **arg_def["kwargs"]) +``` + +### Progressive Help for Create Command + +```python +# In arguments/create.py - add video to help modes + +# New help flag +"help_video": { + "flags": ("--help-video",), + "kwargs": { + "action": "store_true", + "help": "Show video-specific options", + }, +} + +# VIDEO_ARGUMENTS added to create command's video help mode +# skill-seekers create --help-video +``` + +--- + +## MCP Tool Integration + +### New MCP Tool: `scrape_video` + +```python +# In src/skill_seekers/mcp/tools/scraping_tools.py + +@mcp.tool() +def scrape_video( + url: str | None = None, + playlist: str | None = None, + path: str | None = None, + output_dir: str = "output/", + visual: bool = False, + max_videos: int = 20, + whisper_model: str = "base", +) -> str: + """Scrape and extract knowledge from video content. + + Supports YouTube videos, playlists, channels, and local video files. + Extracts transcripts, metadata, chapters, and optionally visual content. + + Args: + url: YouTube or Vimeo video URL + playlist: YouTube playlist URL + path: Local video file or directory path + output_dir: Output directory for results + visual: Enable visual extraction (OCR on keyframes) + max_videos: Maximum videos to process (for playlists) + whisper_model: Whisper model size for transcription + + Returns: + JSON string with scraping results summary + """ + ... +``` + +### Updated Tool Count + +Total MCP tools: **27** (was 26, add `scrape_video`) + +--- + +## Enhancement Integration + +### Video Content Enhancement + +Video segments can be enhanced using the same AI enhancement pipeline: + +```python +# In enhance_skill_local.py or enhance_command.py + +def enhance_video_content(segments: list[VideoSegment], level: int) -> list[VideoSegment]: + """AI-enhance video segments. + + Enhancement levels: + 0 - No enhancement + 1 - Summary generation per segment + 2 - + Topic extraction, category refinement, code annotation + 3 - + Cross-segment connections, tutorial flow analysis, key takeaways + + Uses the same enhancement infrastructure as other sources. + """ + if level == 0: + return segments + + for segment in segments: + if level >= 1: + segment.summary = ai_summarize(segment.content) + + if level >= 2: + segment.topic = ai_extract_topic(segment.content) + segment.category = ai_refine_category( + segment.content, segment.category + ) + # Annotate code blocks with explanations + for cb in segment.detected_code_blocks: + cb.explanation = ai_explain_code(cb.code, segment.transcript) + + if level >= 3: + # Cross-segment analysis (needs all segments) + pass # Handled at video level, not segment level + + return segments +``` + +--- + +## File Map (New & Modified Files) + +### New Files + +| File | Purpose | Estimated Size | +|------|---------|---------------| +| `src/skill_seekers/cli/video_scraper.py` | Main video scraper orchestrator | ~800-1000 lines | +| `src/skill_seekers/cli/video_models.py` | All data classes and enums | ~500-600 lines | +| `src/skill_seekers/cli/video_transcript.py` | Transcript extraction (YouTube API + Whisper) | ~400-500 lines | +| `src/skill_seekers/cli/video_visual.py` | Visual extraction (scene detection + OCR) | ~500-600 lines | +| `src/skill_seekers/cli/video_segmenter.py` | Segmentation and stream alignment | ~400-500 lines | +| `src/skill_seekers/cli/parsers/video_parser.py` | CLI argument parser | ~80-100 lines | +| `src/skill_seekers/cli/arguments/video.py` | Video-specific argument definitions | ~120-150 lines | +| `tests/test_video_scraper.py` | Video scraper tests | ~600-800 lines | +| `tests/test_video_transcript.py` | Transcript extraction tests | ~400-500 lines | +| `tests/test_video_visual.py` | Visual extraction tests | ~400-500 lines | +| `tests/test_video_segmenter.py` | Segmentation tests | ~300-400 lines | +| `tests/test_video_models.py` | Data model tests | ~200-300 lines | +| `tests/test_video_integration.py` | Integration tests | ~300-400 lines | +| `tests/fixtures/video/` | Test fixtures (mock transcripts, metadata) | Various | + +### Modified Files + +| File | Changes | +|------|---------| +| `src/skill_seekers/cli/source_detector.py` | Add video URL patterns, video file detection, video directory detection | +| `src/skill_seekers/cli/main.py` | Register `video` subcommand in COMMAND_MODULES | +| `src/skill_seekers/cli/unified_scraper.py` | Add `"video": []` to scraped_data, add `_scrape_video_source()` | +| `src/skill_seekers/cli/arguments/create.py` | Add video args to create command, add `--help-video` | +| `src/skill_seekers/cli/parsers/__init__.py` | Register VideoParser | +| `src/skill_seekers/cli/config_validator.py` | Validate video source entries in unified config | +| `src/skill_seekers/mcp/tools/scraping_tools.py` | Add `scrape_video` tool | +| `pyproject.toml` | Add `[video]` and `[video-full]` optional dependencies, add `skill-seekers-video` entry point | +| `tests/test_source_detector.py` | Add video detection tests | +| `tests/test_unified.py` | Add video source integration tests | diff --git a/docs/plans/video/05_VIDEO_OUTPUT.md b/docs/plans/video/05_VIDEO_OUTPUT.md new file mode 100644 index 0000000..de926ce --- /dev/null +++ b/docs/plans/video/05_VIDEO_OUTPUT.md @@ -0,0 +1,619 @@ +# Video Source — Output Structure & SKILL.md Integration + +**Date:** February 27, 2026 +**Document:** 05 of 07 +**Status:** Planning + +--- + +## Table of Contents + +1. [Output Directory Structure](#output-directory-structure) +2. [Reference File Format](#reference-file-format) +3. [SKILL.md Section Format](#skillmd-section-format) +4. [Metadata JSON Format](#metadata-json-format) +5. [Page JSON Format (Compatibility)](#page-json-format-compatibility) +6. [RAG Chunking for Video](#rag-chunking-for-video) +7. [Examples](#examples) + +--- + +## Output Directory Structure + +``` +output/{skill_name}/ +├── SKILL.md # Main skill file (video section added) +├── references/ +│ ├── getting_started.md # From docs (existing) +│ ├── api.md # From docs (existing) +│ ├── video_react-hooks-tutorial.md # ← Video reference file +│ ├── video_project-setup-guide.md # ← Video reference file +│ └── video_advanced-patterns.md # ← Video reference file +├── video_data/ # ← NEW: Video-specific data +│ ├── metadata.json # VideoScraperResult (full metadata) +│ ├── transcripts/ +│ │ ├── abc123def45.json # Raw transcript per video +│ │ ├── xyz789ghi01.json +│ │ └── ... +│ ├── segments/ +│ │ ├── abc123def45_segments.json # Aligned segments per video +│ │ ├── xyz789ghi01_segments.json +│ │ └── ... +│ └── frames/ # Only if --visual enabled +│ ├── abc123def45/ +│ │ ├── frame_045.00_terminal.png +│ │ ├── frame_052.30_code.png +│ │ ├── frame_128.00_slide.png +│ │ └── ... +│ └── xyz789ghi01/ +│ └── ... +├── pages/ # Existing page format +│ ├── page_001.json # From docs (existing) +│ ├── video_abc123def45.json # ← Video in page format +│ └── ... +└── {skill_name}_data/ # Raw scrape data (existing) +``` + +--- + +## Reference File Format + +Each video produces one reference markdown file in `references/`. The filename is derived from the video title, sanitized and prefixed with `video_`. + +### Naming Convention + +``` +video_{sanitized_title}.md +``` + +Sanitization rules: +- Lowercase +- Replace spaces and special chars with hyphens +- Remove consecutive hyphens +- Truncate to 60 characters +- Example: "React Hooks Tutorial for Beginners" → `video_react-hooks-tutorial-for-beginners.md` + +### File Structure + +```markdown +# {Video Title} + +> **Source:** [{channel_name}]({channel_url}) | **Duration:** {HH:MM:SS} | **Published:** {date} +> **URL:** [{url}]({url}) +> **Views:** {view_count} | **Likes:** {like_count} +> **Tags:** {tag1}, {tag2}, {tag3} + +{description_summary (first 200 chars)} + +--- + +## Table of Contents + +{auto-generated from chapter titles / segment headings} + +--- + +{segments rendered as sections} + +### {Chapter Title or "Segment N"} ({MM:SS} - {MM:SS}) + +{merged content: transcript + code blocks + slide text} + +```{language} +{code shown on screen} +``` + +--- + +### {Next Chapter} ({MM:SS} - {MM:SS}) + +{content continues...} + +--- + +## Key Takeaways + +{AI-generated summary of main points — populated during enhancement} + +## Code Examples + +{Consolidated list of all code blocks from the video} +``` + +### Full Example + +```markdown +# React Hooks Tutorial for Beginners + +> **Source:** [React Official](https://youtube.com/@reactofficial) | **Duration:** 30:32 | **Published:** 2026-01-15 +> **URL:** [https://youtube.com/watch?v=abc123def45](https://youtube.com/watch?v=abc123def45) +> **Views:** 1,500,000 | **Likes:** 45,000 +> **Tags:** react, hooks, tutorial, javascript, web development + +Learn React Hooks from scratch in this comprehensive tutorial. We'll cover useState, useEffect, useContext, and custom hooks with practical examples. + +--- + +## Table of Contents + +- [Intro](#intro-0000---0045) +- [Project Setup](#project-setup-0045---0300) +- [useState Hook](#usestate-hook-0300---0900) +- [useEffect Hook](#useeffect-hook-0900---1500) +- [Custom Hooks](#custom-hooks-1500---2200) +- [Best Practices](#best-practices-2200---2800) +- [Wrap Up](#wrap-up-2800---3032) + +--- + +### Intro (00:00 - 00:45) + +Welcome to this React Hooks tutorial. Today we'll learn about the most important hooks in React and how to use them effectively in your applications. By the end of this video, you'll understand useState, useEffect, useContext, and how to create your own custom hooks. + +--- + +### Project Setup (00:45 - 03:00) + +Let's start by setting up our React project. We'll use Create React App which gives us a great starting point with all the tooling configured. + +**Terminal command:** +```bash +npx create-react-app hooks-demo +cd hooks-demo +npm start +``` + +Open the project in your code editor. You'll see the standard React project structure with src/App.js as our main component file. Let's clear out the boilerplate and start fresh. + +**Code shown in editor:** +```jsx +import React from 'react'; + +function App() { + return ( +
+

Hooks Demo

+
+ ); +} + +export default App; +``` + +--- + +### useState Hook (03:00 - 09:00) + +The useState hook is the most fundamental hook in React. It lets you add state to functional components. Before hooks, you needed class components for state management. + +Let's create a simple counter to demonstrate useState. The hook returns an array with two elements: the current state value and a function to update it. We use array destructuring to name them. + +**Code shown in editor:** +```jsx +import React, { useState } from 'react'; + +function Counter() { + const [count, setCount] = useState(0); + + return ( +
+

Count: {count}

+ + +
+ ); +} +``` + +Important things to remember about useState: the initial value is only used on the first render. If you need to compute the initial value, pass a function instead of a value to avoid recomputing on every render. + +--- + +## Key Takeaways + +1. **useState** is for managing simple state values in functional components +2. **useEffect** handles side effects (data fetching, subscriptions, DOM updates) +3. Always include a dependency array in useEffect to control when it runs +4. Custom hooks let you extract reusable stateful logic +5. Follow the Rules of Hooks: only call hooks at the top level, only in React functions + +## Code Examples + +### Counter with useState +```jsx +const [count, setCount] = useState(0); +``` + +### Data Fetching with useEffect +```jsx +useEffect(() => { + fetch('/api/data') + .then(res => res.json()) + .then(setData); +}, []); +``` + +### Custom Hook: useLocalStorage +```jsx +function useLocalStorage(key, initialValue) { + const [value, setValue] = useState(() => { + const saved = localStorage.getItem(key); + return saved ? JSON.parse(saved) : initialValue; + }); + + useEffect(() => { + localStorage.setItem(key, JSON.stringify(value)); + }, [key, value]); + + return [value, setValue]; +} +``` +``` + +--- + +## SKILL.md Section Format + +Video content is integrated into SKILL.md as a dedicated section, following the existing section patterns. + +### Section Placement + +```markdown +# {Skill Name} + +## Overview +{existing overview section} + +## Quick Reference +{existing quick reference} + +## Getting Started +{from docs/github} + +## Core Concepts +{from docs/github} + +## API Reference +{from docs/github} + +## Video Tutorials ← NEW SECTION +{from video sources} + +## Code Examples +{consolidated from all sources} + +## References +{file listing} +``` + +### Section Content + +```markdown +## Video Tutorials + +This skill includes knowledge extracted from {N} video tutorial(s) totaling {HH:MM:SS} of content. + +### {Video Title 1} +**Source:** [{channel}]({url}) | {duration} | {view_count} views + +{summary or first segment content, abbreviated} + +**Topics covered:** {chapter titles or detected topics} + +→ Full transcript: [references/video_{sanitized_title}.md](references/video_{sanitized_title}.md) + +--- + +### {Video Title 2} +... + +### Key Patterns from Videos + +{AI-generated section highlighting patterns that appear across multiple videos} + +### Code Examples from Videos + +{Consolidated code blocks from all videos, organized by topic} + +```{language} +// From: {video_title} at {timestamp} +{code} +``` +``` + +### Playlist Grouping + +When a video source is a playlist, the SKILL.md section groups videos under the playlist title: + +```markdown +## Video Tutorials + +### React Complete Course (12 videos, 6:30:00 total) + +1. **Introduction to React** (15:00) — Components, JSX, virtual DOM +2. **React Hooks Deep Dive** (30:32) — useState, useEffect, custom hooks +3. **State Management** (28:15) — Context API, Redux patterns +... + +→ Full transcripts in [references/](references/) (video_*.md files) +``` + +--- + +## Metadata JSON Format + +### `video_data/metadata.json` — Full scraper result + +```json +{ + "scraper_version": "3.2.0", + "extracted_at": "2026-02-27T14:30:00Z", + "processing_time_seconds": 125.4, + "config": { + "visual_extraction": true, + "whisper_model": "base", + "segmentation_strategy": "hybrid", + "max_videos": 20 + }, + "summary": { + "total_videos": 5, + "total_duration_seconds": 5420.0, + "total_segments": 42, + "total_code_blocks": 18, + "total_keyframes": 156, + "languages": ["en"], + "categories_found": ["getting_started", "hooks", "advanced"] + }, + "videos": [ + { + "video_id": "abc123def45", + "title": "React Hooks Tutorial for Beginners", + "duration": 1832.0, + "segments_count": 7, + "code_blocks_count": 5, + "transcript_source": "youtube_manual", + "transcript_confidence": 0.95, + "content_richness_score": 0.88, + "reference_file": "references/video_react-hooks-tutorial-for-beginners.md" + } + ], + "warnings": [ + "Video xyz789: Auto-generated captions used (manual not available)" + ], + "errors": [] +} +``` + +### `video_data/transcripts/{video_id}.json` — Raw transcript + +```json +{ + "video_id": "abc123def45", + "transcript_source": "youtube_manual", + "language": "en", + "segments": [ + { + "text": "Welcome to this React Hooks tutorial.", + "start": 0.0, + "end": 2.5, + "confidence": 1.0, + "words": null + }, + { + "text": "Today we'll learn about the most important hooks.", + "start": 2.5, + "end": 5.8, + "confidence": 1.0, + "words": null + } + ] +} +``` + +### `video_data/segments/{video_id}_segments.json` — Aligned segments + +```json +{ + "video_id": "abc123def45", + "segmentation_strategy": "chapters", + "segments": [ + { + "index": 0, + "start_time": 0.0, + "end_time": 45.0, + "duration": 45.0, + "chapter_title": "Intro", + "category": "getting_started", + "content_type": "explanation", + "transcript": "Welcome to this React Hooks tutorial...", + "transcript_confidence": 0.95, + "has_code_on_screen": false, + "has_slides": false, + "keyframes_count": 2, + "code_blocks_count": 0, + "confidence": 0.95 + } + ] +} +``` + +--- + +## Page JSON Format (Compatibility) + +For compatibility with the existing page-based pipeline (`pages/*.json`), each video also produces a page JSON file. This ensures video content flows through the same build pipeline as other sources. + +### `pages/video_{video_id}.json` + +```json +{ + "url": "https://www.youtube.com/watch?v=abc123def45", + "title": "React Hooks Tutorial for Beginners", + "content": "{full merged content from all segments}", + "category": "tutorials", + "source_type": "video", + "metadata": { + "video_id": "abc123def45", + "duration": 1832.0, + "channel": "React Official", + "view_count": 1500000, + "chapters": 7, + "transcript_source": "youtube_manual", + "has_visual_extraction": true + }, + "code_blocks": [ + { + "language": "jsx", + "code": "const [count, setCount] = useState(0);", + "source": "video_ocr", + "timestamp": 195.0 + } + ], + "extracted_at": "2026-02-27T14:30:00Z" +} +``` + +This format is compatible with the existing `build_skill()` function in `doc_scraper.py`, which reads `pages/*.json` files to build the skill. + +--- + +## RAG Chunking for Video + +When `--chunk-for-rag` is enabled, video segments are chunked differently from text documents because they already have natural boundaries (chapters/segments). + +### Chunking Strategy + +``` +For each VideoSegment: + IF segment.duration <= chunk_duration_threshold (default: 300s / 5 min): + → Output as single chunk + + ELIF segment has sub-sections (code blocks interleaved with explanation): + → Split at code block boundaries + → Each chunk = explanation + associated code block + + ELSE (long segment without clear sub-sections): + → Split at sentence boundaries + → Target chunk size: config.chunk_size tokens + → Overlap: config.chunk_overlap tokens +``` + +### RAG Metadata per Chunk + +```json +{ + "text": "chunk content...", + "metadata": { + "source": "video", + "source_type": "youtube", + "video_id": "abc123def45", + "video_title": "React Hooks Tutorial", + "channel": "React Official", + "timestamp_start": 180.0, + "timestamp_end": 300.0, + "timestamp_url": "https://youtube.com/watch?v=abc123def45&t=180", + "chapter": "useState Hook", + "category": "hooks", + "content_type": "live_coding", + "has_code": true, + "language": "en", + "confidence": 0.94, + "view_count": 1500000, + "upload_date": "2026-01-15" + } +} +``` + +The `timestamp_url` field is especially valuable — it lets RAG systems link directly to the relevant moment in the video. + +--- + +## Examples + +### Minimal Output (transcript only, single video) + +``` +output/react-hooks-video/ +├── SKILL.md # Skill with video section +├── references/ +│ └── video_react-hooks-tutorial.md # Full transcript organized by chapters +├── video_data/ +│ ├── metadata.json # Scraper metadata +│ ├── transcripts/ +│ │ └── abc123def45.json # Raw transcript +│ └── segments/ +│ └── abc123def45_segments.json # Aligned segments +└── pages/ + └── video_abc123def45.json # Page-compatible format +``` + +### Full Output (visual extraction, playlist of 5 videos) + +``` +output/react-complete/ +├── SKILL.md +├── references/ +│ ├── video_intro-to-react.md +│ ├── video_react-hooks-deep-dive.md +│ ├── video_state-management.md +│ ├── video_react-router.md +│ └── video_testing-react-apps.md +├── video_data/ +│ ├── metadata.json +│ ├── transcripts/ +│ │ ├── abc123def45.json +│ │ ├── def456ghi78.json +│ │ ├── ghi789jkl01.json +│ │ ├── jkl012mno34.json +│ │ └── mno345pqr67.json +│ ├── segments/ +│ │ ├── abc123def45_segments.json +│ │ ├── def456ghi78_segments.json +│ │ ├── ghi789jkl01_segments.json +│ │ ├── jkl012mno34_segments.json +│ │ └── mno345pqr67_segments.json +│ └── frames/ +│ ├── abc123def45/ +│ │ ├── frame_045.00_terminal.png +│ │ ├── frame_052.30_code.png +│ │ ├── frame_128.00_slide.png +│ │ └── ... (50+ frames) +│ ├── def456ghi78/ +│ │ └── ... +│ └── ... +└── pages/ + ├── video_abc123def45.json + ├── video_def456ghi78.json + ├── video_ghi789jkl01.json + ├── video_jkl012mno34.json + └── video_mno345pqr67.json +``` + +### Mixed Source Output (docs + github + video) + +``` +output/react-unified/ +├── SKILL.md # Unified skill with ALL sources +├── references/ +│ ├── getting_started.md # From docs +│ ├── hooks.md # From docs +│ ├── api_reference.md # From docs +│ ├── architecture.md # From GitHub analysis +│ ├── patterns.md # From GitHub analysis +│ ├── video_react-hooks-tutorial.md # From video +│ ├── video_react-conf-keynote.md # From video +│ └── video_advanced-patterns.md # From video +├── video_data/ +│ └── ... (video-specific data) +├── pages/ +│ ├── page_001.json # From docs +│ ├── page_002.json +│ ├── video_abc123def45.json # From video +│ └── video_def456ghi78.json +└── react_data/ + └── pages/ # Raw scrape data +``` diff --git a/docs/plans/video/06_VIDEO_TESTING.md b/docs/plans/video/06_VIDEO_TESTING.md new file mode 100644 index 0000000..ea7b360 --- /dev/null +++ b/docs/plans/video/06_VIDEO_TESTING.md @@ -0,0 +1,748 @@ +# Video Source — Testing Strategy + +**Date:** February 27, 2026 +**Document:** 06 of 07 +**Status:** Planning + +--- + +## Table of Contents + +1. [Testing Principles](#testing-principles) +2. [Test File Structure](#test-file-structure) +3. [Fixtures & Mock Data](#fixtures--mock-data) +4. [Unit Tests](#unit-tests) +5. [Integration Tests](#integration-tests) +6. [E2E Tests](#e2e-tests) +7. [CI Considerations](#ci-considerations) +8. [Performance Tests](#performance-tests) + +--- + +## Testing Principles + +1. **No network calls in unit tests** — All YouTube API, yt-dlp, and download operations must be mocked. +2. **No GPU required in CI** — All Whisper and easyocr tests must work on CPU, or be marked `@pytest.mark.slow`. +3. **No video files in repo** — Test fixtures use JSON transcripts and small synthetic images, not actual video files. +4. **100% pipeline coverage** — Every phase of the 6-phase pipeline must be tested. +5. **Edge case focus** — Test missing chapters, empty transcripts, corrupt frames, rate limits. +6. **Compatible with existing test infra** — Use existing conftest.py, markers, and patterns. + +--- + +## Test File Structure + +``` +tests/ +├── test_video_models.py # Data model tests (serialization, validation) +├── test_video_scraper.py # Main scraper orchestration tests +├── test_video_transcript.py # Transcript extraction tests +├── test_video_visual.py # Visual extraction tests +├── test_video_segmenter.py # Segmentation and alignment tests +├── test_video_integration.py # Integration with unified scraper, create command +├── test_video_output.py # Output generation tests +├── test_video_source_detector.py # Source detection tests (or add to existing) +├── fixtures/ +│ └── video/ +│ ├── sample_metadata.json # yt-dlp info_dict mock +│ ├── sample_transcript.json # YouTube transcript mock +│ ├── sample_whisper_output.json # Whisper transcription mock +│ ├── sample_chapters.json # Chapter data mock +│ ├── sample_playlist.json # Playlist metadata mock +│ ├── sample_segments.json # Pre-aligned segments +│ ├── sample_frame_code.png # 100x100 synthetic dark frame +│ ├── sample_frame_slide.png # 100x100 synthetic light frame +│ ├── sample_frame_diagram.png # 100x100 synthetic edge-heavy frame +│ ├── sample_srt.srt # SRT subtitle file +│ ├── sample_vtt.vtt # WebVTT subtitle file +│ └── sample_config.json # Video source config +``` + +--- + +## Fixtures & Mock Data + +### yt-dlp Metadata Fixture + +```python +# tests/fixtures/video/sample_metadata.json +SAMPLE_YTDLP_METADATA = { + "id": "abc123def45", + "title": "React Hooks Tutorial for Beginners", + "description": "Learn React Hooks from scratch. Covers useState, useEffect, and custom hooks.", + "duration": 1832, + "upload_date": "20260115", + "uploader": "React Official", + "uploader_url": "https://www.youtube.com/@reactofficial", + "channel_follower_count": 250000, + "view_count": 1500000, + "like_count": 45000, + "comment_count": 2300, + "tags": ["react", "hooks", "tutorial", "javascript"], + "categories": ["Education"], + "language": "en", + "thumbnail": "https://i.ytimg.com/vi/abc123def45/maxresdefault.jpg", + "webpage_url": "https://www.youtube.com/watch?v=abc123def45", + "chapters": [ + {"title": "Intro", "start_time": 0, "end_time": 45}, + {"title": "Project Setup", "start_time": 45, "end_time": 180}, + {"title": "useState Hook", "start_time": 180, "end_time": 540}, + {"title": "useEffect Hook", "start_time": 540, "end_time": 900}, + {"title": "Custom Hooks", "start_time": 900, "end_time": 1320}, + {"title": "Best Practices", "start_time": 1320, "end_time": 1680}, + {"title": "Wrap Up", "start_time": 1680, "end_time": 1832}, + ], + "subtitles": { + "en": [{"ext": "vtt", "url": "https://..."}], + }, + "automatic_captions": { + "en": [{"ext": "vtt", "url": "https://..."}], + }, + "extractor": "youtube", +} +``` + +### YouTube Transcript Fixture + +```python +SAMPLE_YOUTUBE_TRANSCRIPT = [ + {"text": "Welcome to this React Hooks tutorial.", "start": 0.0, "duration": 2.5}, + {"text": "Today we'll learn about the most important hooks.", "start": 2.5, "duration": 3.0}, + {"text": "Let's start by setting up our project.", "start": 45.0, "duration": 2.8}, + {"text": "We'll use Create React App.", "start": 47.8, "duration": 2.0}, + {"text": "Run npx create-react-app hooks-demo.", "start": 49.8, "duration": 3.5}, + # ... more segments covering all chapters +] +``` + +### Whisper Output Fixture + +```python +SAMPLE_WHISPER_OUTPUT = { + "language": "en", + "language_probability": 0.98, + "duration": 1832.0, + "segments": [ + { + "start": 0.0, + "end": 2.5, + "text": "Welcome to this React Hooks tutorial.", + "avg_logprob": -0.15, + "no_speech_prob": 0.01, + "words": [ + {"word": "Welcome", "start": 0.0, "end": 0.4, "probability": 0.97}, + {"word": "to", "start": 0.4, "end": 0.5, "probability": 0.99}, + {"word": "this", "start": 0.5, "end": 0.7, "probability": 0.98}, + {"word": "React", "start": 0.7, "end": 1.1, "probability": 0.95}, + {"word": "Hooks", "start": 1.1, "end": 1.5, "probability": 0.93}, + {"word": "tutorial.", "start": 1.5, "end": 2.3, "probability": 0.96}, + ], + }, + ], +} +``` + +### Synthetic Frame Fixtures + +```python +# Generate in conftest.py or fixture setup +import numpy as np +import cv2 + +def create_dark_frame(path: str): + """Create a synthetic dark frame (simulates code editor).""" + img = np.zeros((1080, 1920, 3), dtype=np.uint8) + img[200:250, 100:800] = [200, 200, 200] # Simulated text line + img[270:320, 100:600] = [180, 180, 180] # Another text line + cv2.imwrite(path, img) + +def create_light_frame(path: str): + """Create a synthetic light frame (simulates slide).""" + img = np.ones((1080, 1920, 3), dtype=np.uint8) * 240 + img[100:150, 200:1000] = [40, 40, 40] # Title text + img[300:330, 200:1200] = [60, 60, 60] # Body text + cv2.imwrite(path, img) +``` + +### conftest.py Additions + +```python +# tests/conftest.py — add video fixtures + +import pytest +import json +from pathlib import Path + +FIXTURES_DIR = Path(__file__).parent / "fixtures" / "video" + + +@pytest.fixture +def sample_ytdlp_metadata(): + """Load sample yt-dlp metadata.""" + with open(FIXTURES_DIR / "sample_metadata.json") as f: + return json.load(f) + + +@pytest.fixture +def sample_transcript(): + """Load sample YouTube transcript.""" + with open(FIXTURES_DIR / "sample_transcript.json") as f: + return json.load(f) + + +@pytest.fixture +def sample_whisper_output(): + """Load sample Whisper transcription output.""" + with open(FIXTURES_DIR / "sample_whisper_output.json") as f: + return json.load(f) + + +@pytest.fixture +def sample_chapters(): + """Load sample chapter data.""" + with open(FIXTURES_DIR / "sample_chapters.json") as f: + return json.load(f) + + +@pytest.fixture +def sample_video_config(): + """Create a sample VideoSourceConfig.""" + from skill_seekers.cli.video_models import VideoSourceConfig + return VideoSourceConfig( + url="https://www.youtube.com/watch?v=abc123def45", + name="test_video", + visual_extraction=False, + max_videos=5, + ) + + +@pytest.fixture +def video_output_dir(tmp_path): + """Create a temporary output directory for video tests.""" + output = tmp_path / "output" / "test_video" + output.mkdir(parents=True) + (output / "video_data").mkdir() + (output / "video_data" / "transcripts").mkdir() + (output / "video_data" / "segments").mkdir() + (output / "video_data" / "frames").mkdir() + (output / "references").mkdir() + (output / "pages").mkdir() + return output +``` + +--- + +## Unit Tests + +### test_video_models.py + +```python +"""Tests for video data models and serialization.""" + +class TestVideoInfo: + def test_create_from_ytdlp_metadata(self, sample_ytdlp_metadata): + """VideoInfo correctly parses yt-dlp info_dict.""" + ... + + def test_serialization_round_trip(self): + """VideoInfo serializes to dict and deserializes back identically.""" + ... + + def test_content_richness_score(self): + """Content richness score computed correctly based on signals.""" + ... + + def test_empty_chapters(self): + """VideoInfo handles video with no chapters.""" + ... + + +class TestVideoSegment: + def test_timestamp_display(self): + """Timestamp display formats correctly (MM:SS - MM:SS).""" + ... + + def test_youtube_timestamp_url(self): + """YouTube timestamp URL generated correctly.""" + ... + + def test_segment_with_code_blocks(self): + """Segment correctly tracks detected code blocks.""" + ... + + def test_segment_without_visual(self): + """Segment works when visual extraction is disabled.""" + ... + + +class TestChapter: + def test_chapter_duration(self): + """Chapter duration computed correctly.""" + ... + + def test_chapter_serialization(self): + """Chapter serializes to/from dict.""" + ... + + +class TestTranscriptSegment: + def test_from_youtube_api(self): + """TranscriptSegment created from YouTube API format.""" + ... + + def test_from_whisper_output(self): + """TranscriptSegment created from Whisper output.""" + ... + + def test_with_word_timestamps(self): + """TranscriptSegment preserves word-level timestamps.""" + ... + + +class TestVideoSourceConfig: + def test_validate_single_source(self): + """Config requires exactly one source field.""" + ... + + def test_validate_duration_range(self): + """Config validates min < max duration.""" + ... + + def test_defaults(self): + """Config has sensible defaults.""" + ... + + def test_from_unified_config(self, sample_video_config): + """Config created from unified config JSON entry.""" + ... + + +class TestEnums: + def test_all_video_source_types(self): + """All VideoSourceType values are valid.""" + ... + + def test_all_frame_types(self): + """All FrameType values are valid.""" + ... + + def test_all_transcript_sources(self): + """All TranscriptSource values are valid.""" + ... +``` + +### test_video_transcript.py + +```python +"""Tests for transcript extraction (YouTube API + Whisper + subtitle parsing).""" + +class TestYouTubeTranscript: + @patch('skill_seekers.cli.video_transcript.YouTubeTranscriptApi') + def test_extract_manual_captions(self, mock_api, sample_transcript): + """Prefers manual captions over auto-generated.""" + ... + + @patch('skill_seekers.cli.video_transcript.YouTubeTranscriptApi') + def test_fallback_to_auto_generated(self, mock_api): + """Falls back to auto-generated when manual not available.""" + ... + + @patch('skill_seekers.cli.video_transcript.YouTubeTranscriptApi') + def test_fallback_to_translation(self, mock_api): + """Falls back to translated captions when preferred language unavailable.""" + ... + + @patch('skill_seekers.cli.video_transcript.YouTubeTranscriptApi') + def test_no_transcript_available(self, mock_api): + """Raises TranscriptNotAvailable when no captions exist.""" + ... + + @patch('skill_seekers.cli.video_transcript.YouTubeTranscriptApi') + def test_confidence_scoring(self, mock_api, sample_transcript): + """Manual captions get 1.0 confidence, auto-generated get 0.8.""" + ... + + +class TestWhisperTranscription: + @pytest.mark.slow + @patch('skill_seekers.cli.video_transcript.WhisperModel') + def test_transcribe_with_word_timestamps(self, mock_model): + """Whisper returns word-level timestamps.""" + ... + + @patch('skill_seekers.cli.video_transcript.WhisperModel') + def test_language_detection(self, mock_model): + """Whisper detects video language.""" + ... + + @patch('skill_seekers.cli.video_transcript.WhisperModel') + def test_vad_filtering(self, mock_model): + """VAD filter removes silence segments.""" + ... + + def test_download_audio_only(self): + """Audio extraction downloads audio stream only (not video).""" + # Mock yt-dlp download + ... + + +class TestSubtitleParsing: + def test_parse_srt(self, tmp_path): + """Parse SRT subtitle file into segments.""" + srt_content = "1\n00:00:01,500 --> 00:00:04,000\nHello world\n\n2\n00:00:05,000 --> 00:00:08,000\nSecond line\n" + srt_file = tmp_path / "test.srt" + srt_file.write_text(srt_content) + ... + + def test_parse_vtt(self, tmp_path): + """Parse WebVTT subtitle file into segments.""" + vtt_content = "WEBVTT\n\n00:00:01.500 --> 00:00:04.000\nHello world\n\n00:00:05.000 --> 00:00:08.000\nSecond line\n" + vtt_file = tmp_path / "test.vtt" + vtt_file.write_text(vtt_content) + ... + + def test_srt_html_tag_removal(self, tmp_path): + """SRT parser removes inline HTML tags.""" + ... + + def test_empty_subtitle_file(self, tmp_path): + """Handle empty subtitle file gracefully.""" + ... + + +class TestTranscriptFallbackChain: + @patch('skill_seekers.cli.video_transcript.YouTubeTranscriptApi') + @patch('skill_seekers.cli.video_transcript.WhisperModel') + def test_youtube_then_whisper_fallback(self, mock_whisper, mock_yt_api): + """Falls back to Whisper when YouTube captions fail.""" + ... + + def test_subtitle_file_discovery(self, tmp_path): + """Discovers sidecar subtitle files for local videos.""" + ... +``` + +### test_video_visual.py + +```python +"""Tests for visual extraction (scene detection, frame extraction, OCR).""" + +class TestFrameClassification: + def test_classify_dark_frame_as_code(self, tmp_path): + """Dark frame with text patterns classified as code_editor.""" + ... + + def test_classify_light_frame_as_slide(self, tmp_path): + """Light uniform frame classified as slide.""" + ... + + def test_classify_high_edge_as_diagram(self, tmp_path): + """High edge density frame classified as diagram.""" + ... + + def test_classify_blank_frame_as_other(self, tmp_path): + """Nearly blank frame classified as other.""" + ... + + +class TestKeyframeTimestamps: + def test_chapter_boundaries_included(self, sample_chapters): + """Keyframe timestamps include chapter start times.""" + ... + + def test_long_chapter_midpoint(self, sample_chapters): + """Long chapters (>2 min) get midpoint keyframe.""" + ... + + def test_deduplication_within_1_second(self): + """Timestamps within 1 second are deduplicated.""" + ... + + def test_regular_intervals_fill_gaps(self): + """Regular interval timestamps fill gaps between scenes.""" + ... + + +class TestOCRExtraction: + @pytest.mark.slow + @patch('skill_seekers.cli.video_visual.easyocr.Reader') + def test_extract_text_from_code_frame(self, mock_reader, tmp_path): + """OCR extracts text from code editor frame.""" + ... + + @patch('skill_seekers.cli.video_visual.easyocr.Reader') + def test_confidence_filtering(self, mock_reader): + """Low-confidence OCR results are filtered out.""" + ... + + @patch('skill_seekers.cli.video_visual.easyocr.Reader') + def test_monospace_detection(self, mock_reader): + """Monospace text regions correctly detected.""" + ... + + +class TestCodeBlockDetection: + def test_detect_python_code(self): + """Detect Python code from OCR text.""" + ... + + def test_detect_terminal_commands(self): + """Detect terminal commands from OCR text.""" + ... + + def test_language_detection_from_ocr(self): + """Language detection works on OCR-extracted code.""" + ... +``` + +### test_video_segmenter.py + +```python +"""Tests for segmentation and stream alignment.""" + +class TestChapterSegmentation: + def test_chapters_create_segments(self, sample_chapters): + """Chapters map directly to segments.""" + ... + + def test_long_chapter_splitting(self): + """Chapters exceeding max_segment_duration are split.""" + ... + + def test_empty_chapters(self): + """Falls back to time window when no chapters.""" + ... + + +class TestTimeWindowSegmentation: + def test_fixed_windows(self): + """Creates segments at fixed intervals.""" + ... + + def test_sentence_boundary_alignment(self): + """Segments split at sentence boundaries, not mid-word.""" + ... + + def test_configurable_window_size(self): + """Window size respects config.time_window_seconds.""" + ... + + +class TestStreamAlignment: + def test_align_transcript_to_segments(self, sample_transcript, sample_chapters): + """Transcript segments mapped to correct time windows.""" + ... + + def test_align_keyframes_to_segments(self): + """Keyframes mapped to correct segments by timestamp.""" + ... + + def test_partial_overlap_handling(self): + """Transcript segments partially overlapping window boundaries.""" + ... + + def test_empty_segment_handling(self): + """Handle segments with no transcript (silence, music).""" + ... + + +class TestContentMerging: + def test_transcript_only_content(self): + """Content is just transcript when no visual data.""" + ... + + def test_code_block_appended(self): + """Code on screen is appended to transcript content.""" + ... + + def test_duplicate_code_not_repeated(self): + """Code mentioned in transcript is not duplicated from OCR.""" + ... + + def test_chapter_title_as_heading(self): + """Chapter title becomes markdown heading in content.""" + ... + + def test_slide_text_supplementary(self): + """Slide text adds to content when not in transcript.""" + ... + + +class TestCategorization: + def test_category_from_chapter_title(self): + """Category inferred from chapter title keywords.""" + ... + + def test_category_from_transcript(self): + """Category inferred from transcript content.""" + ... + + def test_custom_categories_from_config(self): + """Custom category keywords from config used.""" + ... +``` + +--- + +## Integration Tests + +### test_video_integration.py + +```python +"""Integration tests for video pipeline end-to-end.""" + +class TestSourceDetectorVideo: + def test_detect_youtube_video(self): + info = SourceDetector.detect("https://youtube.com/watch?v=abc123def45") + assert info.type == "video" + assert info.parsed["video_source"] == "youtube_video" + + def test_detect_youtube_short_url(self): + info = SourceDetector.detect("https://youtu.be/abc123def45") + assert info.type == "video" + + def test_detect_youtube_playlist(self): + info = SourceDetector.detect("https://youtube.com/playlist?list=PLxxx") + assert info.type == "video" + assert info.parsed["video_source"] == "youtube_playlist" + + def test_detect_youtube_channel(self): + info = SourceDetector.detect("https://youtube.com/@reactofficial") + assert info.type == "video" + assert info.parsed["video_source"] == "youtube_channel" + + def test_detect_vimeo(self): + info = SourceDetector.detect("https://vimeo.com/123456789") + assert info.type == "video" + assert info.parsed["video_source"] == "vimeo" + + def test_detect_mp4_file(self, tmp_path): + f = tmp_path / "tutorial.mp4" + f.touch() + info = SourceDetector.detect(str(f)) + assert info.type == "video" + assert info.parsed["video_source"] == "local_file" + + def test_detect_video_directory(self, tmp_path): + d = tmp_path / "videos" + d.mkdir() + (d / "vid1.mp4").touch() + (d / "vid2.mkv").touch() + info = SourceDetector.detect(str(d)) + assert info.type == "video" + + def test_youtube_not_confused_with_web(self): + """YouTube URLs detected as video, not web.""" + info = SourceDetector.detect("https://www.youtube.com/watch?v=dQw4w9WgXcQ") + assert info.type == "video" + assert info.type != "web" + + +class TestUnifiedConfigVideo: + def test_video_source_in_config(self, tmp_path): + """Video source parsed correctly from unified config.""" + ... + + def test_multiple_video_sources(self, tmp_path): + """Multiple video sources in same config.""" + ... + + def test_video_alongside_docs(self, tmp_path): + """Video source alongside documentation source.""" + ... + + +class TestFullPipeline: + @patch('skill_seekers.cli.video_transcript.YouTubeTranscriptApi') + @patch('skill_seekers.cli.video_scraper.YoutubeDL') + def test_single_video_transcript_only( + self, mock_ytdl, mock_transcript, sample_ytdlp_metadata, + sample_transcript, video_output_dir + ): + """Full pipeline: single YouTube video, transcript only.""" + mock_ytdl.return_value.__enter__.return_value.extract_info.return_value = sample_ytdlp_metadata + mock_transcript.list_transcripts.return_value = ... + + # Run pipeline + # Assert output files exist and content is correct + ... + + @pytest.mark.slow + @patch('skill_seekers.cli.video_visual.easyocr.Reader') + @patch('skill_seekers.cli.video_transcript.YouTubeTranscriptApi') + @patch('skill_seekers.cli.video_scraper.YoutubeDL') + def test_single_video_with_visual( + self, mock_ytdl, mock_transcript, mock_ocr, + sample_ytdlp_metadata, video_output_dir + ): + """Full pipeline: single video with visual extraction.""" + ... +``` + +--- + +## CI Considerations + +### What Runs in CI (Default) + +- All unit tests (mocked, no network, no GPU) +- Integration tests with mocked external services +- Source detection tests (pure logic) +- Data model tests (pure logic) + +### What Doesn't Run in CI (Marked) + +```python +@pytest.mark.slow # Whisper model loading, actual OCR +@pytest.mark.integration # Real YouTube API calls +@pytest.mark.e2e # Full pipeline with real video download +``` + +### CI Test Matrix Compatibility + +| Test | Ubuntu | macOS | Python 3.10 | Python 3.12 | GPU | +|------|--------|-------|-------------|-------------|-----| +| Unit tests | Yes | Yes | Yes | Yes | No | +| Integration (mocked) | Yes | Yes | Yes | Yes | No | +| Whisper tests (mocked) | Yes | Yes | Yes | Yes | No | +| OCR tests (mocked) | Yes | Yes | Yes | Yes | No | +| E2E (real download) | Skip | Skip | Skip | Skip | No | + +### Dependency Handling in Tests + +```python +# At top of visual test files: +pytest.importorskip("cv2", reason="opencv-python-headless required for visual tests") +pytest.importorskip("easyocr", reason="easyocr required for OCR tests") + +# At top of whisper test files: +pytest.importorskip("faster_whisper", reason="faster-whisper required for transcription tests") +``` + +--- + +## Performance Tests + +```python +@pytest.mark.benchmark +class TestVideoPerformance: + def test_transcript_parsing_speed(self, sample_transcript): + """Transcript parsing completes in < 10ms for 1000 segments.""" + ... + + def test_segment_alignment_speed(self): + """Segment alignment completes in < 50ms for 100 segments.""" + ... + + def test_frame_classification_speed(self, tmp_path): + """Frame classification completes in < 20ms per frame.""" + ... + + def test_content_merging_speed(self): + """Content merging completes in < 5ms per segment.""" + ... + + def test_output_generation_speed(self, video_output_dir): + """Output generation (5 videos, 50 segments) in < 1 second.""" + ... +``` diff --git a/docs/plans/video/07_VIDEO_DEPENDENCIES.md b/docs/plans/video/07_VIDEO_DEPENDENCIES.md new file mode 100644 index 0000000..141eb37 --- /dev/null +++ b/docs/plans/video/07_VIDEO_DEPENDENCIES.md @@ -0,0 +1,506 @@ +# Video Source — Dependencies & System Requirements + +**Date:** February 27, 2026 +**Document:** 07 of 07 +**Status:** Planning + +--- + +## Table of Contents + +1. [Dependency Tiers](#dependency-tiers) +2. [pyproject.toml Changes](#pyprojecttoml-changes) +3. [System Requirements](#system-requirements) +4. [Import Guards](#import-guards) +5. [Dependency Check Command](#dependency-check-command) +6. [Model Management](#model-management) +7. [Docker Considerations](#docker-considerations) + +--- + +## Dependency Tiers + +Video processing has two tiers to keep the base install lightweight: + +### Tier 1: `[video]` — Lightweight (YouTube transcripts + metadata) + +**Use case:** YouTube videos with existing captions. No download, no GPU needed. + +| Package | Version | Size | Purpose | +|---------|---------|------|---------| +| `yt-dlp` | `>=2024.12.0` | ~15MB | Metadata extraction, audio download | +| `youtube-transcript-api` | `>=1.2.0` | ~50KB | YouTube caption extraction | + +**Capabilities:** +- YouTube metadata (title, chapters, tags, description, engagement) +- YouTube captions (manual and auto-generated) +- Vimeo metadata +- Playlist and channel resolution +- Subtitle file parsing (SRT, VTT) +- Segmentation and alignment +- Full output generation + +**NOT included:** +- Speech-to-text (Whisper) +- Visual extraction (frame + OCR) +- Local video file transcription (without subtitles) + +### Tier 2: `[video-full]` — Full (adds Whisper + visual extraction) + +**Use case:** Local videos without subtitles, or when you want code/slide extraction from screen. + +| Package | Version | Size | Purpose | +|---------|---------|------|---------| +| `yt-dlp` | `>=2024.12.0` | ~15MB | Metadata + audio download | +| `youtube-transcript-api` | `>=1.2.0` | ~50KB | YouTube captions | +| `faster-whisper` | `>=1.0.0` | ~5MB (+ models: 75MB-3GB) | Speech-to-text | +| `scenedetect[opencv]` | `>=0.6.4` | ~50MB (includes OpenCV) | Scene boundary detection | +| `easyocr` | `>=1.7.0` | ~150MB (+ models: ~200MB) | Text recognition from frames | +| `opencv-python-headless` | `>=4.9.0` | ~50MB | Frame extraction, image processing | + +**Additional capabilities over Tier 1:** +- Whisper speech-to-text (99 languages, word-level timestamps) +- Scene detection (find visual transitions) +- Keyframe extraction (save important frames) +- Frame classification (code/slide/terminal/diagram) +- OCR on frames (extract code and text from screen) +- Code block detection from video + +**Total install size:** +- Tier 1: ~15MB +- Tier 2: ~270MB + models (~300MB-3.2GB depending on Whisper model) + +--- + +## pyproject.toml Changes + +```toml +[project.optional-dependencies] +# Existing dependencies... +gemini = ["google-generativeai>=0.8.0"] +openai = ["openai>=1.0.0"] +all-llms = ["google-generativeai>=0.8.0", "openai>=1.0.0"] + +# NEW: Video processing +video = [ + "yt-dlp>=2024.12.0", + "youtube-transcript-api>=1.2.0", +] +video-full = [ + "yt-dlp>=2024.12.0", + "youtube-transcript-api>=1.2.0", + "faster-whisper>=1.0.0", + "scenedetect[opencv]>=0.6.4", + "easyocr>=1.7.0", + "opencv-python-headless>=4.9.0", +] + +# Update 'all' to include video +all = [ + # ... existing all dependencies ... + "yt-dlp>=2024.12.0", + "youtube-transcript-api>=1.2.0", + "faster-whisper>=1.0.0", + "scenedetect[opencv]>=0.6.4", + "easyocr>=1.7.0", + "opencv-python-headless>=4.9.0", +] + +[project.scripts] +# ... existing entry points ... +skill-seekers-video = "skill_seekers.cli.video_scraper:main" # NEW +``` + +### Installation Commands + +```bash +# Lightweight video (YouTube transcripts + metadata) +pip install skill-seekers[video] + +# Full video (+ Whisper + visual extraction) +pip install skill-seekers[video-full] + +# Everything +pip install skill-seekers[all] + +# Development (editable) +pip install -e ".[video]" +pip install -e ".[video-full]" +``` + +--- + +## System Requirements + +### Tier 1 (Lightweight) + +| Requirement | Needed For | How to Check | +|-------------|-----------|-------------| +| Python 3.10+ | All | `python --version` | +| Internet connection | YouTube API calls | N/A | + +No additional system dependencies. Pure Python. + +### Tier 2 (Full) + +| Requirement | Needed For | How to Check | Install | +|-------------|-----------|-------------|---------| +| Python 3.10+ | All | `python --version` | — | +| FFmpeg | Audio extraction, video processing | `ffmpeg -version` | See below | +| GPU (optional) | Whisper + easyocr acceleration | `nvidia-smi` (NVIDIA) | CUDA toolkit | + +### FFmpeg Installation + +FFmpeg is required for: +- Extracting audio from video files (Whisper input) +- Downloading audio-only streams (yt-dlp post-processing) +- Converting between audio formats + +```bash +# macOS +brew install ffmpeg + +# Ubuntu/Debian +sudo apt install ffmpeg + +# Windows (winget) +winget install ffmpeg + +# Windows (choco) +choco install ffmpeg + +# Verify +ffmpeg -version +``` + +### GPU Support (Optional) + +GPU accelerates Whisper (~4x) and easyocr (~5x) but is not required. + +**NVIDIA GPU (CUDA):** +```bash +# Check CUDA availability +python -c "import torch; print(torch.cuda.is_available())" + +# faster-whisper uses CTranslate2 which auto-detects CUDA +# easyocr uses PyTorch which auto-detects CUDA +# No additional setup needed if PyTorch CUDA is working +``` + +**Apple Silicon (MPS):** +```bash +# faster-whisper does not support MPS directly +# Falls back to CPU on Apple Silicon +# easyocr has partial MPS support +``` + +**CPU-only (no GPU):** +```bash +# Everything works on CPU, just slower +# Whisper base model: ~4x slower on CPU vs GPU +# easyocr: ~5x slower on CPU vs GPU +# For short videos (<10 min), CPU is fine +``` + +--- + +## Import Guards + +All video dependencies use try/except import guards to provide clear error messages: + +### video_scraper.py + +```python +"""Video scraper - main orchestrator.""" + +# Core dependencies (always available) +import json +import logging +import os +from pathlib import Path + +# Tier 1: Video basics +try: + from yt_dlp import YoutubeDL + HAS_YTDLP = True +except ImportError: + HAS_YTDLP = False + +try: + from youtube_transcript_api import YouTubeTranscriptApi + HAS_YT_TRANSCRIPT = True +except ImportError: + HAS_YT_TRANSCRIPT = False + +# Feature availability check +def check_video_dependencies(require_full: bool = False) -> None: + """Check that video dependencies are installed. + + Args: + require_full: If True, check for full dependencies (Whisper, OCR) + + Raises: + ImportError: With installation instructions + """ + missing = [] + + if not HAS_YTDLP: + missing.append("yt-dlp") + if not HAS_YT_TRANSCRIPT: + missing.append("youtube-transcript-api") + + if missing: + raise ImportError( + f"Video processing requires: {', '.join(missing)}\n" + f"Install with: pip install skill-seekers[video]" + ) + + if require_full: + full_missing = [] + try: + import faster_whisper + except ImportError: + full_missing.append("faster-whisper") + try: + import cv2 + except ImportError: + full_missing.append("opencv-python-headless") + try: + import scenedetect + except ImportError: + full_missing.append("scenedetect[opencv]") + try: + import easyocr + except ImportError: + full_missing.append("easyocr") + + if full_missing: + raise ImportError( + f"Visual extraction requires: {', '.join(full_missing)}\n" + f"Install with: pip install skill-seekers[video-full]" + ) +``` + +### video_transcript.py + +```python +"""Transcript extraction module.""" + +# YouTube transcript (Tier 1) +try: + from youtube_transcript_api import YouTubeTranscriptApi + HAS_YT_TRANSCRIPT = True +except ImportError: + HAS_YT_TRANSCRIPT = False + +# Whisper (Tier 2) +try: + from faster_whisper import WhisperModel + HAS_WHISPER = True +except ImportError: + HAS_WHISPER = False + + +def get_transcript(video_info, config): + """Get transcript using best available method.""" + + # Try YouTube captions first (Tier 1) + if HAS_YT_TRANSCRIPT and video_info.source_type == VideoSourceType.YOUTUBE: + try: + return extract_youtube_transcript(video_info.video_id, config.languages) + except TranscriptNotAvailable: + pass + + # Try Whisper fallback (Tier 2) + if HAS_WHISPER: + return transcribe_with_whisper(video_info, config) + + # No transcript possible + if not HAS_WHISPER: + logger.warning( + f"No transcript for {video_info.video_id}. " + "Install faster-whisper for speech-to-text: " + "pip install skill-seekers[video-full]" + ) + return [], TranscriptSource.NONE +``` + +### video_visual.py + +```python +"""Visual extraction module.""" + +try: + import cv2 + HAS_OPENCV = True +except ImportError: + HAS_OPENCV = False + +try: + from scenedetect import detect, ContentDetector + HAS_SCENEDETECT = True +except ImportError: + HAS_SCENEDETECT = False + +try: + import easyocr + HAS_EASYOCR = True +except ImportError: + HAS_EASYOCR = False + + +def check_visual_dependencies() -> None: + """Check visual extraction dependencies.""" + missing = [] + if not HAS_OPENCV: + missing.append("opencv-python-headless") + if not HAS_SCENEDETECT: + missing.append("scenedetect[opencv]") + if not HAS_EASYOCR: + missing.append("easyocr") + + if missing: + raise ImportError( + f"Visual extraction requires: {', '.join(missing)}\n" + f"Install with: pip install skill-seekers[video-full]" + ) + + +def check_ffmpeg() -> bool: + """Check if FFmpeg is available.""" + import shutil + return shutil.which('ffmpeg') is not None +``` + +--- + +## Dependency Check Command + +Add a dependency check to the `config` command: + +```bash +# Check all video dependencies +skill-seekers config --check-video + +# Output: +# Video Dependencies: +# yt-dlp ✅ 2025.01.15 +# youtube-transcript-api ✅ 1.2.3 +# faster-whisper ❌ Not installed (pip install skill-seekers[video-full]) +# opencv-python-headless ❌ Not installed +# scenedetect ❌ Not installed +# easyocr ❌ Not installed +# +# System Dependencies: +# FFmpeg ✅ 6.1.1 +# GPU (CUDA) ❌ Not available (CPU mode will be used) +# +# Available modes: +# Transcript only ✅ YouTube captions available +# Whisper fallback ❌ Install faster-whisper +# Visual extraction ❌ Install video-full dependencies +``` + +--- + +## Model Management + +### Whisper Models + +Whisper models are downloaded on first use and cached in the user's home directory. + +| Model | Download Size | Disk Size | First-Use Download Time | +|-------|-------------|-----------|------------------------| +| tiny | 75 MB | 75 MB | ~15s | +| base | 142 MB | 142 MB | ~25s | +| small | 466 MB | 466 MB | ~60s | +| medium | 1.5 GB | 1.5 GB | ~3 min | +| large-v3 | 3.1 GB | 3.1 GB | ~5 min | +| large-v3-turbo | 1.6 GB | 1.6 GB | ~3 min | + +**Cache location:** `~/.cache/huggingface/hub/` (CTranslate2 models) + +**Pre-download command:** +```bash +# Pre-download a model before using it +python -c "from faster_whisper import WhisperModel; WhisperModel('base')" +``` + +### easyocr Models + +easyocr models are also downloaded on first use. + +| Language Pack | Download Size | Disk Size | +|-------------|-------------|-----------| +| English | ~100 MB | ~100 MB | +| + Additional language | ~50-100 MB each | ~50-100 MB each | + +**Cache location:** `~/.EasyOCR/model/` + +**Pre-download command:** +```bash +# Pre-download English OCR model +python -c "import easyocr; easyocr.Reader(['en'])" +``` + +--- + +## Docker Considerations + +### Dockerfile additions for video support + +```dockerfile +# Tier 1 (lightweight) +RUN pip install skill-seekers[video] + +# Tier 2 (full) +RUN apt-get update && apt-get install -y ffmpeg +RUN pip install skill-seekers[video-full] + +# Pre-download Whisper model (avoids first-run download) +RUN python -c "from faster_whisper import WhisperModel; WhisperModel('base')" + +# Pre-download easyocr model +RUN python -c "import easyocr; easyocr.Reader(['en'])" +``` + +### Docker image sizes + +| Tier | Base Image Size | Additional Size | Total | +|------|----------------|----------------|-------| +| Tier 1 (video) | ~300 MB | ~20 MB | ~320 MB | +| Tier 2 (video-full, CPU) | ~300 MB | ~800 MB | ~1.1 GB | +| Tier 2 (video-full, GPU) | ~5 GB (CUDA base) | ~800 MB | ~5.8 GB | + +### Kubernetes resource recommendations + +```yaml +# Tier 1 (transcript only) +resources: + requests: + memory: "256Mi" + cpu: "500m" + limits: + memory: "512Mi" + cpu: "1000m" + +# Tier 2 (full, CPU) +resources: + requests: + memory: "2Gi" + cpu: "2000m" + limits: + memory: "4Gi" + cpu: "4000m" + +# Tier 2 (full, GPU) +resources: + requests: + memory: "4Gi" + cpu: "2000m" + nvidia.com/gpu: 1 + limits: + memory: "8Gi" + cpu: "4000m" + nvidia.com/gpu: 1 +``` diff --git a/pyproject.toml b/pyproject.toml index 0c2a3ab..726480c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -115,6 +115,22 @@ docx = [ "python-docx>=1.1.0", ] +# Video processing (lightweight: YouTube transcripts + metadata) +video = [ + "yt-dlp>=2024.12.0", + "youtube-transcript-api>=1.2.0", +] + +# Video processing (full: + Whisper + visual extraction) +video-full = [ + "yt-dlp>=2024.12.0", + "youtube-transcript-api>=1.2.0", + "faster-whisper>=1.0.0", + "scenedetect[opencv]>=0.6.4", + "easyocr>=1.7.0", + "opencv-python-headless>=4.9.0", +] + # RAG vector database upload support chroma = [ "chromadb>=0.4.0", @@ -154,6 +170,8 @@ embedding = [ all = [ "mammoth>=1.6.0", "python-docx>=1.1.0", + "yt-dlp>=2024.12.0", + "youtube-transcript-api>=1.2.0", "mcp>=1.25,<2", "httpx>=0.28.1", "httpx-sse>=0.4.3", @@ -195,6 +213,7 @@ skill-seekers-scrape = "skill_seekers.cli.doc_scraper:main" skill-seekers-github = "skill_seekers.cli.github_scraper:main" skill-seekers-pdf = "skill_seekers.cli.pdf_scraper:main" skill-seekers-word = "skill_seekers.cli.word_scraper:main" +skill-seekers-video = "skill_seekers.cli.video_scraper:main" skill-seekers-unified = "skill_seekers.cli.unified_scraper:main" skill-seekers-enhance = "skill_seekers.cli.enhance_command:main" skill-seekers-enhance-status = "skill_seekers.cli.enhance_status:main" diff --git a/src/skill_seekers/cli/arguments/create.py b/src/skill_seekers/cli/arguments/create.py index 03b30c7..1db0de1 100644 --- a/src/skill_seekers/cli/arguments/create.py +++ b/src/skill_seekers/cli/arguments/create.py @@ -401,6 +401,86 @@ WORD_ARGUMENTS: dict[str, dict[str, Any]] = { }, } +# Video specific (from video.py) +VIDEO_ARGUMENTS: dict[str, dict[str, Any]] = { + "video_url": { + "flags": ("--video-url",), + "kwargs": { + "type": str, + "help": "Video URL (YouTube, Vimeo)", + "metavar": "URL", + }, + }, + "video_file": { + "flags": ("--video-file",), + "kwargs": { + "type": str, + "help": "Local video file path", + "metavar": "PATH", + }, + }, + "video_playlist": { + "flags": ("--video-playlist",), + "kwargs": { + "type": str, + "help": "Playlist URL", + "metavar": "URL", + }, + }, + "video_languages": { + "flags": ("--video-languages",), + "kwargs": { + "type": str, + "default": "en", + "help": "Transcript language preference (comma-separated)", + "metavar": "LANGS", + }, + }, + "visual": { + "flags": ("--visual",), + "kwargs": { + "action": "store_true", + "help": "Enable visual extraction (requires video-full deps)", + }, + }, + "whisper_model": { + "flags": ("--whisper-model",), + "kwargs": { + "type": str, + "default": "base", + "help": "Whisper model size (default: base)", + "metavar": "MODEL", + }, + }, + "visual_interval": { + "flags": ("--visual-interval",), + "kwargs": { + "type": float, + "default": 0.7, + "help": "Visual scan interval in seconds (default: 0.7)", + "metavar": "SECS", + }, + }, + "visual_min_gap": { + "flags": ("--visual-min-gap",), + "kwargs": { + "type": float, + "default": 0.5, + "help": "Min gap between extracted frames in seconds (default: 0.5)", + "metavar": "SECS", + }, + }, + "visual_similarity": { + "flags": ("--visual-similarity",), + "kwargs": { + "type": float, + "default": 3.0, + "help": "Pixel-diff threshold for duplicate detection; lower = more frames (default: 3.0)", + "metavar": "THRESH", + }, + }, +} + # Multi-source config specific (from unified_scraper.py) CONFIG_ARGUMENTS: dict[str, dict[str, Any]] = { "merge_mode": { @@ -484,6 +564,7 @@ def get_source_specific_arguments(source_type: str) -> dict[str, dict[str, Any]] "local": LOCAL_ARGUMENTS, "pdf": PDF_ARGUMENTS, "word": WORD_ARGUMENTS, + "video": VIDEO_ARGUMENTS, "config": CONFIG_ARGUMENTS, } return source_args.get(source_type, {}) @@ -521,6 +602,7 @@ def add_create_arguments(parser: argparse.ArgumentParser, mode: str = "default") - 'local': Universal + local-specific - 'pdf': Universal + pdf-specific - 'word': Universal + word-specific + - 'video': Universal + video-specific - 'advanced': Advanced/rare arguments - 'all': All 120+ arguments @@ -561,6 +643,10 @@ def add_create_arguments(parser: argparse.ArgumentParser, mode: str = "default") for arg_name, arg_def in WORD_ARGUMENTS.items(): parser.add_argument(*arg_def["flags"], **arg_def["kwargs"]) + if mode in ["video", "all"]: + for arg_name, arg_def in VIDEO_ARGUMENTS.items(): + parser.add_argument(*arg_def["flags"], **arg_def["kwargs"]) + if mode in ["config", "all"]: for arg_name, arg_def in CONFIG_ARGUMENTS.items(): parser.add_argument(*arg_def["flags"], **arg_def["kwargs"]) diff --git a/src/skill_seekers/cli/arguments/video.py b/src/skill_seekers/cli/arguments/video.py new file mode 100644 index 0000000..884385a --- /dev/null +++ b/src/skill_seekers/cli/arguments/video.py @@ -0,0 +1,141 @@ +"""Video command argument definitions. + +This module defines ALL arguments for the video command in ONE place. +Both video_scraper.py (standalone) and parsers/video_parser.py (unified CLI) +import and use these definitions. + +Shared arguments (name, description, output, enhance-level, api-key, +dry-run, verbose, quiet, workflow args) come from common.py / workflow.py +via ``add_all_standard_arguments()``. +""" + +import argparse +from typing import Any + +from .common import add_all_standard_arguments + +# Video-specific argument definitions as data structure +# NOTE: Shared args (name, description, output, enhance_level, api_key, dry_run, +# verbose, quiet, workflow args) are registered by add_all_standard_arguments(). +VIDEO_ARGUMENTS: dict[str, dict[str, Any]] = { + "url": { + "flags": ("--url",), + "kwargs": { + "type": str, + "help": "Video URL (YouTube, Vimeo)", + "metavar": "URL", + }, + }, + "video_file": { + "flags": ("--video-file",), + "kwargs": { + "type": str, + "help": "Local video file path", + "metavar": "PATH", + }, + }, + "playlist": { + "flags": ("--playlist",), + "kwargs": { + "type": str, + "help": "Playlist URL", + "metavar": "URL", + }, + }, + "languages": { + "flags": ("--languages",), + "kwargs": { + "type": str, + "default": "en", + "help": "Transcript language preference (comma-separated, default: en)", + "metavar": "LANGS", + }, + }, + "visual": { + "flags": ("--visual",), + "kwargs": { + "action": "store_true", + "help": "Enable visual extraction (requires video-full deps)", + }, + }, + "whisper_model": { + "flags": ("--whisper-model",), + "kwargs": { + "type": str, + "default": "base", + "help": "Whisper model size (default: base)", + "metavar": "MODEL", + }, + }, + "from_json": { + "flags": ("--from-json",), + "kwargs": { + "type": str, + "help": "Build skill from extracted JSON", + "metavar": "FILE", + }, + }, + "visual_interval": { + "flags": ("--visual-interval",), + "kwargs": { + "type": float, + "default": 0.7, + "help": "Visual scan interval in seconds (default: 0.7)", + "metavar": "SECS", + }, + }, + "visual_min_gap": { + "flags": ("--visual-min-gap",), + "kwargs": { + "type": float, + "default": 0.5, + "help": "Minimum gap between extracted frames in seconds (default: 0.5)", + "metavar": "SECS", + }, + }, + "visual_similarity": { + "flags": ("--visual-similarity",), + "kwargs": { + "type": float, + "default": 3.0, + "help": "Pixel-diff threshold for duplicate frame detection; lower = more frames kept (default: 3.0)", + "metavar": "THRESH", + }, + }, + "vision_ocr": { + "flags": ("--vision-ocr",), + "kwargs": { + "action": "store_true", + "help": "Use Claude Vision API as fallback for low-confidence code frames (requires ANTHROPIC_API_KEY, ~$0.004/frame)", + }, + }, +} + + +def add_video_arguments(parser: argparse.ArgumentParser) -> None: + """Add all video command arguments to a parser. + + Registers shared args (name, description, output, enhance-level, api-key, + dry-run, verbose, quiet, workflow args) via add_all_standard_arguments(), + then adds video-specific args on top. + + The default for --enhance-level is overridden to 0 (disabled) for video. + """ + # Shared universal args first + add_all_standard_arguments(parser) + + # Override enhance-level default to 0 for video + for action in parser._actions: + if hasattr(action, "dest") and action.dest == "enhance_level": + action.default = 0 + action.help = ( + "AI enhancement level (auto-detects API vs LOCAL mode): " + "0=disabled (default for video), 1=SKILL.md only, 2=+architecture/config, 3=full enhancement. " + "Mode selection: uses API if ANTHROPIC_API_KEY is set, otherwise LOCAL (Claude Code)" + ) + + # Video-specific args + for arg_name, arg_def in VIDEO_ARGUMENTS.items(): + flags = arg_def["flags"] + kwargs = arg_def["kwargs"] + parser.add_argument(*flags, **kwargs) diff --git a/src/skill_seekers/cli/config_validator.py b/src/skill_seekers/cli/config_validator.py index c3329a3..c55e73d 100644 --- a/src/skill_seekers/cli/config_validator.py +++ b/src/skill_seekers/cli/config_validator.py @@ -27,7 +27,7 @@ class ConfigValidator: """ # Valid source types - VALID_SOURCE_TYPES = {"documentation", "github", "pdf", "local"} + VALID_SOURCE_TYPES = {"documentation", "github", "pdf", "local", "word", "video"} # Valid merge modes VALID_MERGE_MODES = {"rule-based", "claude-enhanced"} diff --git a/src/skill_seekers/cli/create_command.py b/src/skill_seekers/cli/create_command.py index 92f6b1b..be10054 100644 --- a/src/skill_seekers/cli/create_command.py +++ b/src/skill_seekers/cli/create_command.py @@ -133,6 +133,8 @@ class CreateCommand: return self._route_pdf() elif self.source_info.type == "word": return self._route_word() + elif self.source_info.type == "video": + return self._route_video() elif self.source_info.type == "config": return self._route_config() else: @@ -345,6 +347,55 @@ class CreateCommand: finally: sys.argv = original_argv + def _route_video(self) -> int: + """Route to video scraper (video_scraper.py).""" + from skill_seekers.cli import video_scraper + + # Reconstruct argv for video_scraper + argv = ["video_scraper"] + + # Add video source (URL or file) + parsed = self.source_info.parsed + if parsed.get("source_kind") == "file": + argv.extend(["--video-file", parsed["file_path"]]) + elif parsed.get("url"): + url = parsed["url"] + # Detect playlist vs single video + if "playlist" in url.lower(): + argv.extend(["--playlist", url]) + else: + argv.extend(["--url", url]) + + # Add universal arguments + self._add_common_args(argv) + + # Add video-specific arguments + video_langs = getattr(self.args, "video_languages", None) or getattr(self.args, "languages", None) + if video_langs: + argv.extend(["--languages", video_langs]) + if getattr(self.args, "visual", False): + argv.append("--visual") + if getattr(self.args, "whisper_model", None) and self.args.whisper_model != "base": + argv.extend(["--whisper-model", self.args.whisper_model]) + vi = getattr(self.args, "visual_interval", None) + if vi is not None and vi != 0.7: + argv.extend(["--visual-interval", str(vi)]) + vmg = getattr(self.args, "visual_min_gap", None) + if vmg is not None and vmg != 0.5: + argv.extend(["--visual-min-gap", str(vmg)]) + vs = getattr(self.args, "visual_similarity", None) + if vs is not None and vs != 3.0: + argv.extend(["--visual-similarity", str(vs)]) + + # Call video_scraper with modified argv + logger.debug(f"Calling video_scraper with argv: {argv}") + original_argv = sys.argv + try: + sys.argv = argv + return video_scraper.main() + finally: + sys.argv = original_argv + def _route_config(self) -> int: """Route to unified scraper for config files (unified_scraper.py).""" from skill_seekers.cli import unified_scraper @@ -468,6 +519,8 @@ Examples: Local: skill-seekers create ./my-project -p comprehensive PDF: skill-seekers create tutorial.pdf --ocr DOCX: skill-seekers create document.docx + Video: skill-seekers create https://youtube.com/watch?v=... + Video: skill-seekers create recording.mp4 Config: skill-seekers create configs/react.json Source Auto-Detection: @@ -476,6 +529,8 @@ Source Auto-Detection: • ./path → local codebase • file.pdf → PDF extraction • file.docx → Word document extraction + • youtube.com/... → Video transcript extraction + • file.mp4 → Video file extraction • file.json → multi-source config Progressive Help (13 → 120+ flags): @@ -483,6 +538,7 @@ Progressive Help (13 → 120+ flags): --help-github GitHub repository options --help-local Local codebase analysis --help-pdf PDF extraction options + --help-video Video extraction options --help-advanced Rare/advanced options --help-all All options + compatibility @@ -513,6 +569,9 @@ Common Workflows: parser.add_argument( "--help-word", action="store_true", help=argparse.SUPPRESS, dest="_help_word" ) + parser.add_argument( + "--help-video", action="store_true", help=argparse.SUPPRESS, dest="_help_video" + ) parser.add_argument( "--help-config", action="store_true", help=argparse.SUPPRESS, dest="_help_config" ) @@ -571,6 +630,15 @@ Common Workflows: add_create_arguments(parser_word, mode="word") parser_word.print_help() return 0 + elif args._help_video: + parser_video = argparse.ArgumentParser( + prog="skill-seekers create", + description="Create skill from video (YouTube, Vimeo, local files)", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + add_create_arguments(parser_video, mode="video") + parser_video.print_help() + return 0 elif args._help_config: parser_config = argparse.ArgumentParser( prog="skill-seekers create", diff --git a/src/skill_seekers/cli/enhance_skill.py b/src/skill_seekers/cli/enhance_skill.py index 9960eab..0523eab 100644 --- a/src/skill_seekers/cli/enhance_skill.py +++ b/src/skill_seekers/cli/enhance_skill.py @@ -97,9 +97,17 @@ class SkillEnhancer: print(f"❌ Error calling Claude API: {e}") return None + def _is_video_source(self, references): + """Check if the references come from video tutorial extraction.""" + return any(meta["source"] == "video_tutorial" for meta in references.values()) + def _build_enhancement_prompt(self, references, current_skill_md): """Build the prompt for Claude with multi-source awareness""" + # Dispatch to video-specific prompt if video source detected + if self._is_video_source(references): + return self._build_video_enhancement_prompt(references, current_skill_md) + # Extract skill name and description skill_name = self.skill_dir.name @@ -276,6 +284,148 @@ Return ONLY the complete SKILL.md content, starting with the frontmatter (---). return prompt + def _build_video_enhancement_prompt(self, references, current_skill_md): + """Build a video-specific enhancement prompt. + + Video tutorial references contain transcript text, OCR'd code panels, + code timelines with edits, and audio-visual alignment pairs. This prompt + is tailored to reconstruct clean code from noisy OCR, detect programming + languages from context, and synthesize a coherent tutorial skill. + """ + skill_name = self.skill_dir.name + + prompt = f"""You are enhancing a Claude skill built from VIDEO TUTORIAL extraction. This skill is about: {skill_name} + +The raw data was extracted from video tutorials using: +1. **Transcript** (speech-to-text) — HIGH quality, this is the primary signal +2. **OCR on code panels** — NOISY, may contain line numbers, UI chrome, garbled text +3. **Code Timeline** — Tracks code evolution across frames with diffs +4. **Audio-Visual Alignment** — Pairs of on-screen code + narrator explanation + +CURRENT SKILL.MD: +{"```markdown" if current_skill_md else "(none - create from scratch)"} +{current_skill_md or "No existing SKILL.md"} +{"```" if current_skill_md else ""} + +REFERENCE FILES: +""" + + # Add all reference content + for filename, metadata in references.items(): + content = metadata["content"] + if len(content) > 30000: + content = content[:30000] + "\n\n[Content truncated for size...]" + prompt += f"\n#### {filename}\n" + prompt += f"*Source: {metadata['source']}, Confidence: {metadata['confidence']}*\n\n" + prompt += f"```markdown\n{content}\n```\n" + + prompt += """ + +VIDEO-SPECIFIC ENHANCEMENT INSTRUCTIONS: + +You are working with data extracted from programming tutorial videos. The data has +specific characteristics you MUST handle: + +## 1. OCR Code Reconstruction (CRITICAL) + +The OCR'd code blocks are NOISY. Common issues you MUST fix: +- **Line numbers in code**: OCR captures line numbers (1, 2, 3...) as part of the code — STRIP THEM +- **UI chrome contamination**: Tab bars, file names, button text appear in code blocks — REMOVE +- **Garbled characters**: OCR errors like `l` → `1`, `O` → `0`, `rn` → `m` — FIX using context +- **Duplicate fragments**: Same code appears across multiple frames with minor OCR variations — DEDUPLICATE +- **Incomplete lines**: Lines cut off at panel edges — RECONSTRUCT from transcript context +- **Animation/timeline numbers**: Frame counters or timeline numbers in code — REMOVE + +When reconstructing code: +- The TRANSCRIPT is the ground truth for WHAT the code does +- The OCR is the ground truth for HOW the code looks (syntax, structure) +- Combine both: use transcript to understand intent, OCR for actual code structure +- If OCR is too garbled, reconstruct the code based on what the narrator describes + +## 2. Language Detection + +The OCR-based language detection is often WRONG. Fix it by: +- Reading the transcript for language mentions ("in GDScript", "this Python function", "our C# class") +- Using code patterns: `extends`, `func`, `var`, `signal` = GDScript; `def`, `class`, `import` = Python; + `function`, `const`, `let` = JavaScript/TypeScript; `using`, `namespace` = C# +- Looking at file extensions mentioned in the transcript or visible in tab bars +- Using proper language tags in all code fences (```gdscript, ```python, etc.) + +## 3. Code Timeline Processing + +The "Code Timeline" section shows how code EVOLVES during the tutorial. Use it to: +- Show the FINAL version of each code block (not intermediate states) +- Optionally show key intermediate steps if the tutorial is about building up code progressively +- The edit diffs show exactly what changed between frames — use these to understand the tutorial flow + +## 4. Audio-Visual Alignment + +These are the MOST VALUABLE pairs: each links on-screen code with the narrator's explanation. +- Use these to create annotated code examples with inline comments +- The narrator text explains WHY each piece of code exists +- Cross-reference these pairs to build the "how-to" sections + +## 5. Tutorial Structure + +Transform the raw chronological data into a LOGICAL tutorial structure: +- Group by TOPIC, not by timestamp (e.g., "Setting Up the State Machine" not "Segment 3") +- Create clear section headers that describe what is being TAUGHT +- Build a progressive learning path: concepts build on each other +- Include prerequisite knowledge mentioned by the narrator + +YOUR TASK — Create an enhanced SKILL.md: + +1. **Clean Overview Section** + - What does this tutorial teach? (from transcript, NOT generic) + - Prerequisites mentioned by the narrator + - Key technologies/frameworks used (from actual code, not guesses) + +2. **"When to Use This Skill" Section** + - Specific trigger conditions based on what the tutorial covers + - Use cases directly from the tutorial content + - Reference the framework/library/tool being taught + +3. **Quick Reference Section** (MOST IMPORTANT) + - Extract 5-10 CLEAN, reconstructed code examples + - Each example must be: + a. Denoised (no line numbers, no UI chrome, no garbled text) + b. Complete (not cut off mid-line) + c. Properly language-tagged + d. Annotated with a description from the transcript + - Prefer code from Audio-Visual Alignment pairs (they have narrator context) + - Show the FINAL working version of each code block + +4. **Step-by-Step Tutorial Section** + - Follow the tutorial's teaching flow + - Each step includes: clean code + explanation from transcript + - Use narrator's explanations as the descriptions (paraphrase, don't copy verbatim) + - Show code evolution where the tutorial builds up code incrementally + +5. **Key Concepts Section** + - Extract terminology and concepts the narrator explains + - Define them using the narrator's own explanations + - Link concepts to specific code examples + +6. **Reference Files Description** + - Explain what each reference file contains + - Note that OCR data is raw and may contain errors + - Point to the most useful sections (Audio-Visual Alignment, Code Timeline) + +7. **Keep the frontmatter** (---\\nname: ...\\n---) intact if present + +CRITICAL RULES: +- NEVER include raw OCR text with line numbers or UI chrome — always clean it first +- ALWAYS use correct language tags (detect from context, not from OCR metadata) +- The transcript is your BEST source for understanding content — trust it over garbled OCR +- Extract REAL code from the references, reconstruct where needed, but never invent code +- Keep code examples SHORT and focused (5-30 lines max per example) +- Make the skill actionable: someone reading it should be able to implement what the tutorial teaches + +OUTPUT: +Return ONLY the complete SKILL.md content, starting with the frontmatter (---). +""" + return prompt + def save_enhanced_skill_md(self, content): """Save the enhanced SKILL.md""" # Backup original diff --git a/src/skill_seekers/cli/main.py b/src/skill_seekers/cli/main.py index fb0a478..5f1cb8a 100644 --- a/src/skill_seekers/cli/main.py +++ b/src/skill_seekers/cli/main.py @@ -48,6 +48,7 @@ COMMAND_MODULES = { "github": "skill_seekers.cli.github_scraper", "pdf": "skill_seekers.cli.pdf_scraper", "word": "skill_seekers.cli.word_scraper", + "video": "skill_seekers.cli.video_scraper", "unified": "skill_seekers.cli.unified_scraper", "enhance": "skill_seekers.cli.enhance_command", "enhance-status": "skill_seekers.cli.enhance_status", @@ -142,7 +143,6 @@ def _reconstruct_argv(command: str, args: argparse.Namespace) -> list[str]: # Handle positional arguments (no -- prefix) if key in [ "source", # create command - "url", "directory", "file", "job_id", diff --git a/src/skill_seekers/cli/parsers/__init__.py b/src/skill_seekers/cli/parsers/__init__.py index d12c2a6..7cca3c4 100644 --- a/src/skill_seekers/cli/parsers/__init__.py +++ b/src/skill_seekers/cli/parsers/__init__.py @@ -13,6 +13,7 @@ from .scrape_parser import ScrapeParser from .github_parser import GitHubParser from .pdf_parser import PDFParser from .word_parser import WordParser +from .video_parser import VideoParser from .unified_parser import UnifiedParser from .enhance_parser import EnhanceParser from .enhance_status_parser import EnhanceStatusParser @@ -43,6 +44,7 @@ PARSERS = [ EnhanceStatusParser(), PDFParser(), WordParser(), + VideoParser(), UnifiedParser(), EstimateParser(), InstallParser(), diff --git a/src/skill_seekers/cli/parsers/video_parser.py b/src/skill_seekers/cli/parsers/video_parser.py new file mode 100644 index 0000000..8ad3fb6 --- /dev/null +++ b/src/skill_seekers/cli/parsers/video_parser.py @@ -0,0 +1,32 @@ +"""Video subcommand parser. + +Uses shared argument definitions from arguments.video to ensure +consistency with the standalone video_scraper module. +""" + +from .base import SubcommandParser +from skill_seekers.cli.arguments.video import add_video_arguments + + +class VideoParser(SubcommandParser): + """Parser for video subcommand.""" + + @property + def name(self) -> str: + return "video" + + @property + def help(self) -> str: + return "Extract from video (YouTube, local files)" + + @property + def description(self) -> str: + return "Extract transcripts and metadata from videos and generate skill" + + def add_arguments(self, parser): + """Add video-specific arguments. + + Uses shared argument definitions to ensure consistency + with video_scraper.py (standalone scraper). + """ + add_video_arguments(parser) diff --git a/src/skill_seekers/cli/source_detector.py b/src/skill_seekers/cli/source_detector.py index 7f2397c..9ce464a 100644 --- a/src/skill_seekers/cli/source_detector.py +++ b/src/skill_seekers/cli/source_detector.py @@ -63,24 +63,34 @@ class SourceDetector: if source.endswith(".docx"): return cls._detect_word(source) - # 2. Directory detection + # Video file extensions + VIDEO_EXTENSIONS = (".mp4", ".mkv", ".avi", ".mov", ".webm", ".flv", ".wmv") + if source.lower().endswith(VIDEO_EXTENSIONS): + return cls._detect_video_file(source) + + # 2. Video URL detection (before directory check) + video_url_info = cls._detect_video_url(source) + if video_url_info: + return video_url_info + + # 3. Directory detection if os.path.isdir(source): return cls._detect_local(source) - # 3. GitHub patterns + # 4. GitHub patterns github_info = cls._detect_github(source) if github_info: return github_info - # 4. URL detection + # 5. URL detection if source.startswith("http://") or source.startswith("https://"): return cls._detect_web(source) - # 5. Domain inference (add https://) + # 6. Domain inference (add https://) if "." in source and not source.startswith("/"): return cls._detect_web(f"https://{source}") - # 6. Error - cannot determine + # 7. Error - cannot determine raise ValueError( f"Cannot determine source type for: {source}\n\n" "Examples:\n" @@ -89,6 +99,8 @@ class SourceDetector: " Local: skill-seekers create ./my-project\n" " PDF: skill-seekers create tutorial.pdf\n" " DOCX: skill-seekers create document.docx\n" + " Video: skill-seekers create https://youtube.com/watch?v=...\n" + " Video: skill-seekers create recording.mp4\n" " Config: skill-seekers create configs/react.json" ) @@ -116,6 +128,55 @@ class SourceDetector: type="word", parsed={"file_path": source}, suggested_name=name, raw_input=source ) + @classmethod + def _detect_video_file(cls, source: str) -> SourceInfo: + """Detect local video file source.""" + name = os.path.splitext(os.path.basename(source))[0] + return SourceInfo( + type="video", + parsed={"file_path": source, "source_kind": "file"}, + suggested_name=name, + raw_input=source, + ) + + @classmethod + def _detect_video_url(cls, source: str) -> SourceInfo | None: + """Detect video platform URL (YouTube, Vimeo). + + Returns SourceInfo if the source is a video URL, None otherwise. + """ + lower = source.lower() + + # YouTube patterns + youtube_keywords = ["youtube.com/watch", "youtu.be/", "youtube.com/playlist", + "youtube.com/@", "youtube.com/channel/", "youtube.com/c/", + "youtube.com/shorts/", "youtube.com/embed/"] + if any(kw in lower for kw in youtube_keywords): + # Determine suggested name + if "playlist" in lower: + name = "youtube_playlist" + elif "/@" in lower or "/channel/" in lower or "/c/" in lower: + name = "youtube_channel" + else: + name = "youtube_video" + return SourceInfo( + type="video", + parsed={"url": source, "source_kind": "url"}, + suggested_name=name, + raw_input=source, + ) + + # Vimeo patterns + if "vimeo.com/" in lower: + return SourceInfo( + type="video", + parsed={"url": source, "source_kind": "url"}, + suggested_name="vimeo_video", + raw_input=source, + ) + + return None + @classmethod def _detect_local(cls, source: str) -> SourceInfo: """Detect local directory source.""" @@ -209,6 +270,15 @@ class SourceDetector: if not os.path.isfile(file_path): raise ValueError(f"Path is not a file: {file_path}") + elif source_info.type == "video": + if source_info.parsed.get("source_kind") == "file": + file_path = source_info.parsed["file_path"] + if not os.path.exists(file_path): + raise ValueError(f"Video file does not exist: {file_path}") + if not os.path.isfile(file_path): + raise ValueError(f"Path is not a file: {file_path}") + # URL-based video sources are validated during processing + elif source_info.type == "config": config_path = source_info.parsed["config_path"] if not os.path.exists(config_path): diff --git a/src/skill_seekers/cli/unified_scraper.py b/src/skill_seekers/cli/unified_scraper.py index 092c218..6cfc9cf 100644 --- a/src/skill_seekers/cli/unified_scraper.py +++ b/src/skill_seekers/cli/unified_scraper.py @@ -74,11 +74,12 @@ class UnifiedScraper: "github": [], # List of github sources "pdf": [], # List of pdf sources "word": [], # List of word sources + "video": [], # List of video sources "local": [], # List of local sources (docs or code) } # Track source index for unique naming (multi-source support) - self._source_counters = {"documentation": 0, "github": 0, "pdf": 0, "word": 0, "local": 0} + self._source_counters = {"documentation": 0, "github": 0, "pdf": 0, "word": 0, "video": 0, "local": 0} # Output paths - cleaner organization self.name = self.config["name"] @@ -154,6 +155,8 @@ class UnifiedScraper: self._scrape_pdf(source) elif source_type == "word": self._scrape_word(source) + elif source_type == "video": + self._scrape_video(source) elif source_type == "local": self._scrape_local(source) else: @@ -576,6 +579,63 @@ class UnifiedScraper: logger.info(f"✅ Word: {len(word_data.get('pages', []))} sections extracted") + def _scrape_video(self, source: dict[str, Any]): + """Scrape video source (YouTube, local file, etc.).""" + try: + from skill_seekers.cli.video_scraper import VideoToSkillConverter + except ImportError: + logger.error("video_scraper.py not found") + return + + # Multi-source support: Get unique index for this video source + idx = self._source_counters["video"] + self._source_counters["video"] += 1 + + # Determine video identifier + video_url = source.get("url", "") + video_id = video_url or source.get("path", f"video_{idx}") + + # Create config for video scraper + video_config = { + "name": f"{self.name}_video_{idx}", + "url": source.get("url"), + "video_file": source.get("path"), + "playlist": source.get("playlist"), + "description": source.get("description", ""), + "languages": ",".join(source.get("languages", ["en"])), + "visual": source.get("visual_extraction", False), + "whisper_model": source.get("whisper_model", "base"), + } + + # Process video + logger.info(f"Scraping video: {video_id}") + converter = VideoToSkillConverter(video_config) + + try: + result = converter.process() + converter.save_extracted_data() + + # Append to list + self.scraped_data["video"].append( + { + "video_id": video_id, + "idx": idx, + "data": result.to_dict(), + "data_file": converter.data_file, + } + ) + + # Build standalone SKILL.md for synthesis + converter.build_skill() + logger.info("✅ Video: Standalone SKILL.md created") + + logger.info( + f"✅ Video: {len(result.videos)} videos, " + f"{result.total_segments} segments extracted" + ) + except Exception as e: + logger.error(f"Failed to process video source: {e}") + def _scrape_local(self, source: dict[str, Any]): """ Scrape local directory (documentation files or source code). diff --git a/src/skill_seekers/cli/utils.py b/src/skill_seekers/cli/utils.py index c3d56e5..a11833c 100755 --- a/src/skill_seekers/cli/utils.py +++ b/src/skill_seekers/cli/utils.py @@ -289,6 +289,10 @@ def read_reference_files( else: return "codebase_analysis", "medium", repo_id + # Video tutorial sources (video_*.md from video scraper) + elif relative_path.name.startswith("video_"): + return "video_tutorial", "high", None + # Conflicts report (discrepancy detection) elif "conflicts" in path_str: return "conflicts", "medium", None diff --git a/src/skill_seekers/cli/video_metadata.py b/src/skill_seekers/cli/video_metadata.py new file mode 100644 index 0000000..06dcacf --- /dev/null +++ b/src/skill_seekers/cli/video_metadata.py @@ -0,0 +1,270 @@ +"""Video metadata extraction module. + +Uses yt-dlp for metadata extraction without downloading video content. +Supports YouTube, Vimeo, and local video files. +""" + +import hashlib +import logging +import os +import re + +from skill_seekers.cli.video_models import ( + Chapter, + VideoInfo, + VideoSourceType, +) + +logger = logging.getLogger(__name__) + +# Optional dependency: yt-dlp +try: + import yt_dlp + + HAS_YTDLP = True +except ImportError: + HAS_YTDLP = False + + +# ============================================================================= +# Video ID Extraction +# ============================================================================= + + +# YouTube URL patterns +YOUTUBE_PATTERNS = [ + re.compile(r"(?:https?://)?(?:www\.)?youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})"), + re.compile(r"(?:https?://)?youtu\.be/([a-zA-Z0-9_-]{11})"), + re.compile(r"(?:https?://)?(?:www\.)?youtube\.com/embed/([a-zA-Z0-9_-]{11})"), + re.compile(r"(?:https?://)?(?:www\.)?youtube\.com/v/([a-zA-Z0-9_-]{11})"), + re.compile(r"(?:https?://)?(?:www\.)?youtube\.com/shorts/([a-zA-Z0-9_-]{11})"), +] + +YOUTUBE_PLAYLIST_PATTERN = re.compile( + r"(?:https?://)?(?:www\.)?youtube\.com/playlist\?list=([a-zA-Z0-9_-]+)" +) + +YOUTUBE_CHANNEL_PATTERNS = [ + re.compile(r"(?:https?://)?(?:www\.)?youtube\.com/@([a-zA-Z0-9_-]+)"), + re.compile(r"(?:https?://)?(?:www\.)?youtube\.com/channel/([a-zA-Z0-9_-]+)"), + re.compile(r"(?:https?://)?(?:www\.)?youtube\.com/c/([a-zA-Z0-9_-]+)"), +] + +VIMEO_PATTERN = re.compile(r"(?:https?://)?(?:www\.)?vimeo\.com/(\d+)") + + +def extract_video_id(url: str) -> str | None: + """Extract YouTube video ID from various URL formats. + + Args: + url: YouTube URL in any supported format. + + Returns: + 11-character video ID, or None if not a YouTube URL. + """ + for pattern in YOUTUBE_PATTERNS: + match = pattern.search(url) + if match: + return match.group(1) + return None + + +def detect_video_source_type(url_or_path: str) -> VideoSourceType: + """Detect the source type of a video URL or file path. + + Args: + url_or_path: URL or local file path. + + Returns: + VideoSourceType enum value. + """ + if os.path.isfile(url_or_path): + return VideoSourceType.LOCAL_FILE + if os.path.isdir(url_or_path): + return VideoSourceType.LOCAL_DIRECTORY + + url_lower = url_or_path.lower() + if "youtube.com" in url_lower or "youtu.be" in url_lower: + return VideoSourceType.YOUTUBE + if "vimeo.com" in url_lower: + return VideoSourceType.VIMEO + + return VideoSourceType.LOCAL_FILE + + +# ============================================================================= +# YouTube Metadata via yt-dlp +# ============================================================================= + + +def _check_ytdlp(): + """Raise RuntimeError if yt-dlp is not installed.""" + if not HAS_YTDLP: + raise RuntimeError( + "yt-dlp is required for video metadata extraction.\n" + 'Install with: pip install "skill-seekers[video]"\n' + "Or: pip install yt-dlp" + ) + + +def extract_youtube_metadata(url: str) -> VideoInfo: + """Extract metadata from a YouTube video URL without downloading. + + Args: + url: YouTube video URL. + + Returns: + VideoInfo with metadata populated. + + Raises: + RuntimeError: If yt-dlp is not installed. + """ + _check_ytdlp() + + ydl_opts = { + "quiet": True, + "no_warnings": True, + "extract_flat": False, + "skip_download": True, + } + + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(url, download=False) + + video_id = info.get("id", extract_video_id(url) or "unknown") + + # Parse chapters + chapters = [] + raw_chapters = info.get("chapters") or [] + for i, ch in enumerate(raw_chapters): + end_time = ch.get("end_time", 0) + if i + 1 < len(raw_chapters): + end_time = raw_chapters[i + 1].get("start_time", end_time) + chapters.append( + Chapter( + title=ch.get("title", f"Chapter {i + 1}"), + start_time=ch.get("start_time", 0), + end_time=end_time, + ) + ) + + return VideoInfo( + video_id=video_id, + source_type=VideoSourceType.YOUTUBE, + source_url=url, + title=info.get("title", ""), + description=info.get("description", ""), + duration=float(info.get("duration", 0)), + upload_date=info.get("upload_date"), + language=info.get("language") or "en", + channel_name=info.get("channel") or info.get("uploader"), + channel_url=info.get("channel_url") or info.get("uploader_url"), + view_count=info.get("view_count"), + like_count=info.get("like_count"), + comment_count=info.get("comment_count"), + tags=info.get("tags") or [], + categories=info.get("categories") or [], + thumbnail_url=info.get("thumbnail"), + chapters=chapters, + ) + + +def extract_local_metadata(file_path: str) -> VideoInfo: + """Extract basic metadata from a local video file. + + Args: + file_path: Path to video file. + + Returns: + VideoInfo with basic metadata from filename/file properties. + """ + path = os.path.abspath(file_path) + name = os.path.splitext(os.path.basename(path))[0] + video_id = hashlib.sha256(path.encode()).hexdigest()[:16] + + return VideoInfo( + video_id=video_id, + source_type=VideoSourceType.LOCAL_FILE, + file_path=path, + title=name.replace("-", " ").replace("_", " ").title(), + duration=0.0, # Would need ffprobe for accurate duration + ) + + +# ============================================================================= +# Playlist / Channel Resolution +# ============================================================================= + + +def resolve_playlist(url: str) -> list[str]: + """Resolve a YouTube playlist URL to a list of video URLs. + + Args: + url: YouTube playlist URL. + + Returns: + List of video URLs in playlist order. + + Raises: + RuntimeError: If yt-dlp is not installed. + """ + _check_ytdlp() + + ydl_opts = { + "quiet": True, + "no_warnings": True, + "extract_flat": True, + "skip_download": True, + } + + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(url, download=False) + + entries = info.get("entries") or [] + video_urls = [] + for entry in entries: + vid_url = entry.get("url") or entry.get("webpage_url") + if vid_url: + video_urls.append(vid_url) + elif entry.get("id"): + video_urls.append(f"https://www.youtube.com/watch?v={entry['id']}") + + return video_urls + + +def resolve_channel(url: str, max_videos: int = 50) -> list[str]: + """Resolve a YouTube channel URL to a list of recent video URLs. + + Args: + url: YouTube channel URL. + max_videos: Maximum number of videos to resolve. + + Returns: + List of video URLs (most recent first). + + Raises: + RuntimeError: If yt-dlp is not installed. + """ + _check_ytdlp() + + ydl_opts = { + "quiet": True, + "no_warnings": True, + "extract_flat": True, + "skip_download": True, + "playlistend": max_videos, + } + + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(url, download=False) + + entries = info.get("entries") or [] + video_urls = [] + for entry in entries: + vid_url = entry.get("url") or entry.get("webpage_url") + if vid_url: + video_urls.append(vid_url) + elif entry.get("id"): + video_urls.append(f"https://www.youtube.com/watch?v={entry['id']}") + + return video_urls[:max_videos] diff --git a/src/skill_seekers/cli/video_models.py b/src/skill_seekers/cli/video_models.py new file mode 100644 index 0000000..3da19c7 --- /dev/null +++ b/src/skill_seekers/cli/video_models.py @@ -0,0 +1,813 @@ +"""Video source data models and type definitions. + +Defines all enumerations and dataclasses for the video extraction pipeline: +- Enums: VideoSourceType, TranscriptSource, FrameType, CodeContext, SegmentContentType +- Core: VideoInfo, VideoSegment, VideoScraperResult +- Supporting: Chapter, TranscriptSegment, WordTimestamp, KeyFrame, OCRRegion, + FrameSubSection, CodeBlock +- Config: VideoSourceConfig +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from enum import Enum +from typing import Any + + +# ============================================================================= +# Enumerations +# ============================================================================= + + +class VideoSourceType(Enum): + """Where a video came from.""" + + YOUTUBE = "youtube" + VIMEO = "vimeo" + LOCAL_FILE = "local_file" + LOCAL_DIRECTORY = "local_directory" + + +class TranscriptSource(Enum): + """How the transcript was obtained.""" + + YOUTUBE_MANUAL = "youtube_manual" + YOUTUBE_AUTO = "youtube_auto_generated" + WHISPER = "whisper" + SUBTITLE_FILE = "subtitle_file" + NONE = "none" + + +class FrameType(Enum): + """Classification of a keyframe's visual content.""" + + CODE_EDITOR = "code_editor" + TERMINAL = "terminal" + SLIDE = "slide" + DIAGRAM = "diagram" + BROWSER = "browser" + WEBCAM = "webcam" + SCREENCAST = "screencast" + OTHER = "other" + + +class CodeContext(Enum): + """Where code was displayed in the video.""" + + EDITOR = "editor" + TERMINAL = "terminal" + SLIDE = "slide" + BROWSER = "browser" + UNKNOWN = "unknown" + + +class SegmentContentType(Enum): + """Primary content type of a video segment.""" + + EXPLANATION = "explanation" + LIVE_CODING = "live_coding" + DEMO = "demo" + SLIDES = "slides" + Q_AND_A = "q_and_a" + INTRO = "intro" + OUTRO = "outro" + MIXED = "mixed" + + +class SegmentationStrategy(Enum): + """How segments are determined.""" + + CHAPTERS = "chapters" + TIME_WINDOW = "time_window" + SCENE_CHANGE = "scene_change" + HYBRID = "hybrid" + + +# ============================================================================= +# Supporting Data Classes +# ============================================================================= + + +@dataclass(frozen=True) +class Chapter: + """A chapter marker from a video (typically YouTube).""" + + title: str + start_time: float + end_time: float + + @property + def duration(self) -> float: + return self.end_time - self.start_time + + def to_dict(self) -> dict: + return { + "title": self.title, + "start_time": self.start_time, + "end_time": self.end_time, + } + + @classmethod + def from_dict(cls, data: dict) -> Chapter: + return cls( + title=data["title"], + start_time=data["start_time"], + end_time=data["end_time"], + ) + + +@dataclass(frozen=True) +class WordTimestamp: + """A single word with precise timing information.""" + + word: str + start: float + end: float + probability: float = 1.0 + + def to_dict(self) -> dict: + return { + "word": self.word, + "start": self.start, + "end": self.end, + "probability": self.probability, + } + + @classmethod + def from_dict(cls, data: dict) -> WordTimestamp: + return cls( + word=data["word"], + start=data["start"], + end=data["end"], + probability=data.get("probability", 1.0), + ) + + +@dataclass(frozen=True) +class TranscriptSegment: + """A raw transcript segment from YouTube API or Whisper.""" + + text: str + start: float + end: float + confidence: float = 1.0 + words: list[WordTimestamp] | None = None + source: TranscriptSource = TranscriptSource.NONE + + def to_dict(self) -> dict: + return { + "text": self.text, + "start": self.start, + "end": self.end, + "confidence": self.confidence, + "words": [w.to_dict() for w in self.words] if self.words else None, + "source": self.source.value, + } + + @classmethod + def from_dict(cls, data: dict) -> TranscriptSegment: + words = None + if data.get("words"): + words = [WordTimestamp.from_dict(w) for w in data["words"]] + return cls( + text=data["text"], + start=data["start"], + end=data["end"], + confidence=data.get("confidence", 1.0), + words=words, + source=TranscriptSource(data.get("source", "none")), + ) + + +@dataclass(frozen=True) +class OCRRegion: + """A detected text region in a video frame.""" + + text: str + confidence: float + bbox: tuple[int, int, int, int] + is_monospace: bool = False + + def to_dict(self) -> dict: + return { + "text": self.text, + "confidence": self.confidence, + "bbox": list(self.bbox), + "is_monospace": self.is_monospace, + } + + @classmethod + def from_dict(cls, data: dict) -> OCRRegion: + return cls( + text=data["text"], + confidence=data["confidence"], + bbox=tuple(data["bbox"]), + is_monospace=data.get("is_monospace", False), + ) + + +@dataclass +class FrameSubSection: + """A single panel/region within a video frame, OCR'd independently. + + Each IDE panel (e.g. code editor, terminal, file tree) is detected + as a separate sub-section so that side-by-side editors produce + independent OCR results instead of being merged into one blob. + """ + + bbox: tuple[int, int, int, int] # (x1, y1, x2, y2) + frame_type: FrameType = FrameType.OTHER + ocr_text: str = "" + ocr_regions: list[OCRRegion] = field(default_factory=list) + ocr_confidence: float = 0.0 + panel_id: str = "" # e.g. "panel_0_0" (row_col) + + def to_dict(self) -> dict: + return { + "bbox": list(self.bbox), + "frame_type": self.frame_type.value, + "ocr_text": self.ocr_text, + "ocr_regions": [r.to_dict() for r in self.ocr_regions], + "ocr_confidence": self.ocr_confidence, + "panel_id": self.panel_id, + } + + @classmethod + def from_dict(cls, data: dict) -> FrameSubSection: + return cls( + bbox=tuple(data["bbox"]), + frame_type=FrameType(data.get("frame_type", "other")), + ocr_text=data.get("ocr_text", ""), + ocr_regions=[OCRRegion.from_dict(r) for r in data.get("ocr_regions", [])], + ocr_confidence=data.get("ocr_confidence", 0.0), + panel_id=data.get("panel_id", ""), + ) + + +@dataclass +class KeyFrame: + """An extracted video frame with visual analysis results.""" + + timestamp: float + image_path: str + frame_type: FrameType = FrameType.OTHER + scene_change_score: float = 0.0 + ocr_regions: list[OCRRegion] = field(default_factory=list) + ocr_text: str = "" + ocr_confidence: float = 0.0 + width: int = 0 + height: int = 0 + sub_sections: list[FrameSubSection] = field(default_factory=list) + + def to_dict(self) -> dict: + return { + "timestamp": self.timestamp, + "image_path": self.image_path, + "frame_type": self.frame_type.value, + "scene_change_score": self.scene_change_score, + "ocr_regions": [r.to_dict() for r in self.ocr_regions], + "ocr_text": self.ocr_text, + "ocr_confidence": self.ocr_confidence, + "width": self.width, + "height": self.height, + "sub_sections": [ss.to_dict() for ss in self.sub_sections], + } + + @classmethod + def from_dict(cls, data: dict) -> KeyFrame: + return cls( + timestamp=data["timestamp"], + image_path=data["image_path"], + frame_type=FrameType(data.get("frame_type", "other")), + scene_change_score=data.get("scene_change_score", 0.0), + ocr_regions=[OCRRegion.from_dict(r) for r in data.get("ocr_regions", [])], + ocr_text=data.get("ocr_text", ""), + ocr_confidence=data.get("ocr_confidence", 0.0), + width=data.get("width", 0), + height=data.get("height", 0), + sub_sections=[FrameSubSection.from_dict(ss) for ss in data.get("sub_sections", [])], + ) + + +@dataclass +class CodeBlock: + """A code block detected via OCR from video frames.""" + + code: str + language: str | None = None + source_frame: float = 0.0 + context: CodeContext = CodeContext.UNKNOWN + confidence: float = 0.0 + text_group_id: str = "" + + def to_dict(self) -> dict: + return { + "code": self.code, + "language": self.language, + "source_frame": self.source_frame, + "context": self.context.value, + "confidence": self.confidence, + "text_group_id": self.text_group_id, + } + + @classmethod + def from_dict(cls, data: dict) -> CodeBlock: + return cls( + code=data["code"], + language=data.get("language"), + source_frame=data.get("source_frame", 0.0), + context=CodeContext(data.get("context", "unknown")), + confidence=data.get("confidence", 0.0), + text_group_id=data.get("text_group_id", ""), + ) + + +@dataclass +class TextGroupEdit: + """Represents an edit detected between appearances of a text group.""" + + timestamp: float + added_lines: list[str] = field(default_factory=list) + removed_lines: list[str] = field(default_factory=list) + modified_lines: list[dict] = field(default_factory=list) + + def to_dict(self) -> dict: + return { + "timestamp": self.timestamp, + "added_lines": self.added_lines, + "removed_lines": self.removed_lines, + "modified_lines": self.modified_lines, + } + + @classmethod + def from_dict(cls, data: dict) -> TextGroupEdit: + return cls( + timestamp=data["timestamp"], + added_lines=data.get("added_lines", []), + removed_lines=data.get("removed_lines", []), + modified_lines=data.get("modified_lines", []), + ) + + +@dataclass +class TextGroup: + """A group of related text blocks tracked across the video. + + Represents a single code file/snippet as it appears and evolves + across multiple video frames. + """ + + group_id: str + appearances: list[tuple[float, float]] = field(default_factory=list) + consensus_lines: list[dict] = field(default_factory=list) + edits: list[TextGroupEdit] = field(default_factory=list) + detected_language: str | None = None + frame_type: FrameType = FrameType.CODE_EDITOR + panel_id: str = "" # Tracks which panel this group originated from + + @property + def full_text(self) -> str: + return "\n".join(line["text"] for line in self.consensus_lines if line.get("text")) + + def to_dict(self) -> dict: + return { + "group_id": self.group_id, + "appearances": [[s, e] for s, e in self.appearances], + "consensus_lines": self.consensus_lines, + "edits": [e.to_dict() for e in self.edits], + "detected_language": self.detected_language, + "frame_type": self.frame_type.value, + "panel_id": self.panel_id, + "full_text": self.full_text, + } + + @classmethod + def from_dict(cls, data: dict) -> TextGroup: + return cls( + group_id=data["group_id"], + appearances=[tuple(a) for a in data.get("appearances", [])], + consensus_lines=data.get("consensus_lines", []), + edits=[TextGroupEdit.from_dict(e) for e in data.get("edits", [])], + detected_language=data.get("detected_language"), + frame_type=FrameType(data.get("frame_type", "code_editor")), + panel_id=data.get("panel_id", ""), + ) + + +@dataclass +class TextGroupTimeline: + """Timeline of all text groups and their lifecycle in the video.""" + + text_groups: list[TextGroup] = field(default_factory=list) + total_code_time: float = 0.0 + total_groups: int = 0 + total_edits: int = 0 + + def get_groups_at_time(self, timestamp: float) -> list[TextGroup]: + """Return all text groups visible at a given timestamp.""" + return [ + tg + for tg in self.text_groups + if any(start <= timestamp <= end for start, end in tg.appearances) + ] + + def to_dict(self) -> dict: + return { + "text_groups": [tg.to_dict() for tg in self.text_groups], + "total_code_time": self.total_code_time, + "total_groups": self.total_groups, + "total_edits": self.total_edits, + } + + @classmethod + def from_dict(cls, data: dict) -> TextGroupTimeline: + return cls( + text_groups=[TextGroup.from_dict(tg) for tg in data.get("text_groups", [])], + total_code_time=data.get("total_code_time", 0.0), + total_groups=data.get("total_groups", 0), + total_edits=data.get("total_edits", 0), + ) + + +@dataclass +class AudioVisualAlignment: + """Links on-screen code with concurrent transcript narration.""" + + text_group_id: str + start_time: float + end_time: float + on_screen_code: str + transcript_during: str + language: str | None = None + + def to_dict(self) -> dict: + return { + "text_group_id": self.text_group_id, + "start_time": self.start_time, + "end_time": self.end_time, + "on_screen_code": self.on_screen_code, + "transcript_during": self.transcript_during, + "language": self.language, + } + + @classmethod + def from_dict(cls, data: dict) -> AudioVisualAlignment: + return cls( + text_group_id=data["text_group_id"], + start_time=data["start_time"], + end_time=data["end_time"], + on_screen_code=data["on_screen_code"], + transcript_during=data.get("transcript_during", ""), + language=data.get("language"), + ) + + +# ============================================================================= +# Core Data Classes +# ============================================================================= + + +@dataclass +class VideoSegment: + """A time-aligned segment combining transcript + visual + metadata.""" + + index: int + start_time: float + end_time: float + duration: float + + # Stream 1: ASR (Audio) + transcript: str = "" + words: list[WordTimestamp] = field(default_factory=list) + transcript_confidence: float = 0.0 + + # Stream 2: OCR (Visual) + keyframes: list[KeyFrame] = field(default_factory=list) + ocr_text: str = "" + detected_code_blocks: list[CodeBlock] = field(default_factory=list) + has_code_on_screen: bool = False + has_slides: bool = False + has_diagram: bool = False + + # Stream 3: Metadata + chapter_title: str | None = None + topic: str | None = None + category: str | None = None + + # Merged content + content: str = "" + summary: str | None = None + + # Quality metadata + confidence: float = 0.0 + content_type: SegmentContentType = SegmentContentType.MIXED + + def to_dict(self) -> dict: + return { + "index": self.index, + "start_time": self.start_time, + "end_time": self.end_time, + "duration": self.duration, + "transcript": self.transcript, + "words": [w.to_dict() for w in self.words], + "transcript_confidence": self.transcript_confidence, + "keyframes": [k.to_dict() for k in self.keyframes], + "ocr_text": self.ocr_text, + "detected_code_blocks": [c.to_dict() for c in self.detected_code_blocks], + "has_code_on_screen": self.has_code_on_screen, + "has_slides": self.has_slides, + "has_diagram": self.has_diagram, + "chapter_title": self.chapter_title, + "topic": self.topic, + "category": self.category, + "content": self.content, + "summary": self.summary, + "confidence": self.confidence, + "content_type": self.content_type.value, + } + + @classmethod + def from_dict(cls, data: dict) -> VideoSegment: + return cls( + index=data["index"], + start_time=data["start_time"], + end_time=data["end_time"], + duration=data["duration"], + transcript=data.get("transcript", ""), + words=[WordTimestamp.from_dict(w) for w in data.get("words", [])], + transcript_confidence=data.get("transcript_confidence", 0.0), + keyframes=[KeyFrame.from_dict(k) for k in data.get("keyframes", [])], + ocr_text=data.get("ocr_text", ""), + detected_code_blocks=[ + CodeBlock.from_dict(c) for c in data.get("detected_code_blocks", []) + ], + has_code_on_screen=data.get("has_code_on_screen", False), + has_slides=data.get("has_slides", False), + has_diagram=data.get("has_diagram", False), + chapter_title=data.get("chapter_title"), + topic=data.get("topic"), + category=data.get("category"), + content=data.get("content", ""), + summary=data.get("summary"), + confidence=data.get("confidence", 0.0), + content_type=SegmentContentType(data.get("content_type", "mixed")), + ) + + @property + def timestamp_display(self) -> str: + """Human-readable timestamp (e.g., '05:30 - 08:15').""" + start_min, start_sec = divmod(int(self.start_time), 60) + end_min, end_sec = divmod(int(self.end_time), 60) + if self.start_time >= 3600 or self.end_time >= 3600: + start_hr, start_min = divmod(start_min, 60) + end_hr, end_min = divmod(end_min, 60) + return f"{start_hr:d}:{start_min:02d}:{start_sec:02d} - {end_hr:d}:{end_min:02d}:{end_sec:02d}" + return f"{start_min:02d}:{start_sec:02d} - {end_min:02d}:{end_sec:02d}" + + +@dataclass +class VideoInfo: + """Complete metadata and extracted content for a single video.""" + + # Identity + video_id: str + source_type: VideoSourceType + source_url: str | None = None + file_path: str | None = None + + # Basic metadata + title: str = "" + description: str = "" + duration: float = 0.0 + upload_date: str | None = None + language: str = "en" + + # Channel / Author + channel_name: str | None = None + channel_url: str | None = None + + # Engagement metadata + view_count: int | None = None + like_count: int | None = None + comment_count: int | None = None + + # Discovery metadata + tags: list[str] = field(default_factory=list) + categories: list[str] = field(default_factory=list) + thumbnail_url: str | None = None + + # Structure + chapters: list[Chapter] = field(default_factory=list) + + # Playlist context + playlist_title: str | None = None + playlist_index: int | None = None + playlist_total: int | None = None + + # Extracted content + raw_transcript: list[TranscriptSegment] = field(default_factory=list) + segments: list[VideoSegment] = field(default_factory=list) + + # Processing metadata + transcript_source: TranscriptSource = TranscriptSource.NONE + visual_extraction_enabled: bool = False + whisper_model: str | None = None + processing_time_seconds: float = 0.0 + extracted_at: str = "" + + # Quality scores + transcript_confidence: float = 0.0 + content_richness_score: float = 0.0 + + # Consensus-based text tracking (Phase A-D) + text_group_timeline: TextGroupTimeline | None = None + audio_visual_alignments: list[AudioVisualAlignment] = field(default_factory=list) + + def to_dict(self) -> dict: + return { + "video_id": self.video_id, + "source_type": self.source_type.value, + "source_url": self.source_url, + "file_path": self.file_path, + "title": self.title, + "description": self.description, + "duration": self.duration, + "upload_date": self.upload_date, + "language": self.language, + "channel_name": self.channel_name, + "channel_url": self.channel_url, + "view_count": self.view_count, + "like_count": self.like_count, + "comment_count": self.comment_count, + "tags": self.tags, + "categories": self.categories, + "thumbnail_url": self.thumbnail_url, + "chapters": [c.to_dict() for c in self.chapters], + "playlist_title": self.playlist_title, + "playlist_index": self.playlist_index, + "playlist_total": self.playlist_total, + "raw_transcript": [t.to_dict() for t in self.raw_transcript], + "segments": [s.to_dict() for s in self.segments], + "transcript_source": self.transcript_source.value, + "visual_extraction_enabled": self.visual_extraction_enabled, + "whisper_model": self.whisper_model, + "processing_time_seconds": self.processing_time_seconds, + "extracted_at": self.extracted_at, + "transcript_confidence": self.transcript_confidence, + "content_richness_score": self.content_richness_score, + "text_group_timeline": self.text_group_timeline.to_dict() + if self.text_group_timeline + else None, + "audio_visual_alignments": [a.to_dict() for a in self.audio_visual_alignments], + } + + @classmethod + def from_dict(cls, data: dict) -> VideoInfo: + timeline_data = data.get("text_group_timeline") + timeline = TextGroupTimeline.from_dict(timeline_data) if timeline_data else None + return cls( + video_id=data["video_id"], + source_type=VideoSourceType(data["source_type"]), + source_url=data.get("source_url"), + file_path=data.get("file_path"), + title=data.get("title", ""), + description=data.get("description", ""), + duration=data.get("duration", 0.0), + upload_date=data.get("upload_date"), + language=data.get("language", "en"), + channel_name=data.get("channel_name"), + channel_url=data.get("channel_url"), + view_count=data.get("view_count"), + like_count=data.get("like_count"), + comment_count=data.get("comment_count"), + tags=data.get("tags", []), + categories=data.get("categories", []), + thumbnail_url=data.get("thumbnail_url"), + chapters=[Chapter.from_dict(c) for c in data.get("chapters", [])], + playlist_title=data.get("playlist_title"), + playlist_index=data.get("playlist_index"), + playlist_total=data.get("playlist_total"), + raw_transcript=[TranscriptSegment.from_dict(t) for t in data.get("raw_transcript", [])], + segments=[VideoSegment.from_dict(s) for s in data.get("segments", [])], + transcript_source=TranscriptSource(data.get("transcript_source", "none")), + visual_extraction_enabled=data.get("visual_extraction_enabled", False), + whisper_model=data.get("whisper_model"), + processing_time_seconds=data.get("processing_time_seconds", 0.0), + extracted_at=data.get("extracted_at", ""), + transcript_confidence=data.get("transcript_confidence", 0.0), + content_richness_score=data.get("content_richness_score", 0.0), + text_group_timeline=timeline, + audio_visual_alignments=[ + AudioVisualAlignment.from_dict(a) for a in data.get("audio_visual_alignments", []) + ], + ) + + +@dataclass +class VideoSourceConfig: + """Configuration for video source processing.""" + + # Source specification (exactly one should be set) + url: str | None = None + playlist: str | None = None + channel: str | None = None + path: str | None = None + directory: str | None = None + + # Identity + name: str = "video" + description: str = "" + + # Filtering + max_videos: int = 50 + languages: list[str] | None = None + + # Extraction + visual_extraction: bool = False + whisper_model: str = "base" + + # Segmentation + time_window_seconds: float = 120.0 + min_segment_duration: float = 10.0 + max_segment_duration: float = 600.0 + + # Categorization + categories: dict[str, list[str]] | None = None + + # Subtitle files + subtitle_patterns: list[str] | None = None + + @classmethod + def from_dict(cls, data: dict) -> VideoSourceConfig: + return cls( + url=data.get("url"), + playlist=data.get("playlist"), + channel=data.get("channel"), + path=data.get("path"), + directory=data.get("directory"), + name=data.get("name", "video"), + description=data.get("description", ""), + max_videos=data.get("max_videos", 50), + languages=data.get("languages"), + visual_extraction=data.get("visual_extraction", False), + whisper_model=data.get("whisper_model", "base"), + time_window_seconds=data.get("time_window_seconds", 120.0), + min_segment_duration=data.get("min_segment_duration", 10.0), + max_segment_duration=data.get("max_segment_duration", 600.0), + categories=data.get("categories"), + subtitle_patterns=data.get("subtitle_patterns"), + ) + + def validate(self) -> list[str]: + """Validate configuration. Returns list of errors.""" + errors = [] + sources_set = sum( + 1 + for s in [self.url, self.playlist, self.channel, self.path, self.directory] + if s is not None + ) + if sources_set == 0: + errors.append( + "Video source must specify one of: url, playlist, channel, path, directory" + ) + if sources_set > 1: + errors.append("Video source must specify exactly one source type") + return errors + + +@dataclass +class VideoScraperResult: + """Complete result from the video scraper.""" + + videos: list[VideoInfo] = field(default_factory=list) + total_duration_seconds: float = 0.0 + total_segments: int = 0 + total_code_blocks: int = 0 + config: VideoSourceConfig | None = None + processing_time_seconds: float = 0.0 + warnings: list[str] = field(default_factory=list) + errors: list[dict[str, Any]] = field(default_factory=list) + + def to_dict(self) -> dict: + return { + "videos": [v.to_dict() for v in self.videos], + "total_duration_seconds": self.total_duration_seconds, + "total_segments": self.total_segments, + "total_code_blocks": self.total_code_blocks, + "processing_time_seconds": self.processing_time_seconds, + "warnings": self.warnings, + "errors": self.errors, + } + + @classmethod + def from_dict(cls, data: dict) -> VideoScraperResult: + return cls( + videos=[VideoInfo.from_dict(v) for v in data.get("videos", [])], + total_duration_seconds=data.get("total_duration_seconds", 0.0), + total_segments=data.get("total_segments", 0), + total_code_blocks=data.get("total_code_blocks", 0), + processing_time_seconds=data.get("processing_time_seconds", 0.0), + warnings=data.get("warnings", []), + errors=data.get("errors", []), + ) diff --git a/src/skill_seekers/cli/video_scraper.py b/src/skill_seekers/cli/video_scraper.py new file mode 100644 index 0000000..eca06a2 --- /dev/null +++ b/src/skill_seekers/cli/video_scraper.py @@ -0,0 +1,954 @@ +#!/usr/bin/env python3 +""" +Video to Claude Skill Converter + +Extracts transcripts, metadata, and visual content from videos +and converts them into Claude AI skills. + +Supports YouTube videos/playlists, Vimeo, and local video files. + +Usage: + python3 video_scraper.py --url https://www.youtube.com/watch?v=... + python3 video_scraper.py --video-file recording.mp4 + python3 video_scraper.py --playlist https://www.youtube.com/playlist?list=... + python3 video_scraper.py --from-json video_extracted.json +""" + +import argparse +import json +import logging +import os +import re +import sys +import time + +from skill_seekers.cli.video_models import ( + AudioVisualAlignment, + TextGroupTimeline, + TranscriptSource, + VideoInfo, + VideoScraperResult, + VideoSourceConfig, + VideoSourceType, +) + +logger = logging.getLogger(__name__) + + +# ============================================================================= +# Dependency Guard +# ============================================================================= + +# Core video deps are optional +try: + import yt_dlp # noqa: F401 + + HAS_YTDLP = True +except ImportError: + HAS_YTDLP = False + +try: + from youtube_transcript_api import YouTubeTranscriptApi # noqa: F401 + + HAS_YOUTUBE_TRANSCRIPT = True +except ImportError: + HAS_YOUTUBE_TRANSCRIPT = False + + +def check_video_dependencies(require_full: bool = False) -> None: + """Check that required video dependencies are available. + + Args: + require_full: If True, also check Tier 2 deps (Whisper, OpenCV, etc.) + + Raises: + RuntimeError: If required dependencies are missing. + """ + missing = [] + if not HAS_YTDLP: + missing.append("yt-dlp") + if not HAS_YOUTUBE_TRANSCRIPT: + missing.append("youtube-transcript-api") + + if require_full: + try: + import cv2 # noqa: F401 + except ImportError: + missing.append("opencv-python-headless") + try: + import faster_whisper # noqa: F401 + except ImportError: + missing.append("faster-whisper") + + if missing: + deps = ", ".join(missing) + extra = "[video-full]" if require_full else "[video]" + raise RuntimeError( + f"Missing video dependencies: {deps}\n" + f'Install with: pip install "skill-seekers{extra}"\n' + f"Or: pip install {' '.join(missing)}" + ) + + +# ============================================================================= +# Helper Functions +# ============================================================================= + + +def _sanitize_filename(title: str, max_length: int = 60) -> str: + """Sanitize a video title for use as a filename.""" + name = title.lower() + name = re.sub(r"[^a-z0-9\s-]", "", name) + name = re.sub(r"[\s]+", "-", name) + name = re.sub(r"-+", "-", name) + name = name.strip("-") + return name[:max_length] + + +def _format_duration(seconds: float) -> str: + """Format seconds as HH:MM:SS or MM:SS.""" + total = int(seconds) + hours, remainder = divmod(total, 3600) + minutes, secs = divmod(remainder, 60) + if hours > 0: + return f"{hours}:{minutes:02d}:{secs:02d}" + return f"{minutes:02d}:{secs:02d}" + + +def _format_count(count: int | None) -> str: + """Format a count with commas.""" + if count is None: + return "N/A" + return f"{count:,}" + + +def infer_description_from_video(video_info: VideoInfo, name: str = "") -> str: + """Infer skill description from video metadata.""" + if video_info.description: + desc = video_info.description[:150].strip() + if len(video_info.description) > 150: + desc += "..." + return f"Use when {desc.lower()}" + if video_info.title: + return f"Use when working with {video_info.title.lower()}" + return ( + f"Use when referencing {name} video content" + if name + else "Use when referencing this video content" + ) + + +# ============================================================================= +# Audio-Visual Alignment +# ============================================================================= + + +def _build_audio_visual_alignments( + timeline: TextGroupTimeline, + transcript_segments: list, +) -> list[AudioVisualAlignment]: + """Build audio-visual alignments pairing on-screen code with transcript. + + For each text group appearance, finds overlapping transcript segments + and pairs them into AudioVisualAlignment objects. + + Args: + timeline: TextGroupTimeline with text groups and appearances. + transcript_segments: List of TranscriptSegment objects. + + Returns: + List of AudioVisualAlignment objects. + """ + alignments: list[AudioVisualAlignment] = [] + + for group in timeline.text_groups: + for start, end in group.appearances: + # Find overlapping transcript segments + overlapping_text = [] + for seg in transcript_segments: + seg_start = seg.start + seg_end = seg.end + # Check overlap + if seg_end > start and seg_start < end: + overlapping_text.append(seg.text) + + transcript_during = " ".join(overlapping_text).strip() + if not transcript_during: + continue + + alignments.append( + AudioVisualAlignment( + text_group_id=group.group_id, + start_time=start, + end_time=end, + on_screen_code=group.full_text, + transcript_during=transcript_during, + language=group.detected_language, + ) + ) + + return alignments + + +# ============================================================================= +# Main Converter Class +# ============================================================================= + + +class VideoToSkillConverter: + """Convert video content to Claude skill.""" + + def __init__(self, config: dict): + """Initialize converter. + + Args: + config: Configuration dict with keys: + - name: Skill name + - url/video_file/playlist: Video source + - description: Optional description + - languages: Optional language preferences + - visual: Whether to enable visual extraction + - whisper_model: Whisper model size + """ + self.config = config + self.name = config["name"] + self.description = config.get("description", "") + self.languages = (config.get("languages") or "en").split(",") + self.visual = config.get("visual", False) + self.whisper_model = config.get("whisper_model", "base") + self.visual_interval = config.get("visual_interval", 0.7) + self.visual_min_gap = config.get("visual_min_gap", 0.5) + self.visual_similarity = config.get("visual_similarity", 3.0) + self.vision_ocr = config.get("vision_ocr", False) + + # Paths + self.skill_dir = config.get("output") or f"output/{self.name}" + self.data_file = f"output/{self.name}_video_extracted.json" + + # Results + self.result: VideoScraperResult | None = None + + def process(self) -> VideoScraperResult: + """Run the full video processing pipeline. + + Returns: + VideoScraperResult with all extracted data. + """ + from skill_seekers.cli.video_metadata import ( + detect_video_source_type, + extract_local_metadata, + extract_youtube_metadata, + resolve_playlist, + ) + from skill_seekers.cli.video_segmenter import segment_video + from skill_seekers.cli.video_transcript import get_transcript + + start_time = time.time() + + # Validate visual deps upfront so we fail fast + if self.visual: + check_video_dependencies(require_full=True) + from skill_seekers.cli.video_visual import check_visual_dependencies + + deps = check_visual_dependencies() + missing = [name for name, available in deps.items() if not available] + if missing: + raise RuntimeError( + f"Visual extraction requires: {', '.join(missing)}\n" + 'Install with: pip install "skill-seekers[video-full]"\n' + "Or: pip install opencv-python-headless scenedetect easyocr" + ) + + source_config = VideoSourceConfig( + name=self.name, + description=self.description, + languages=self.languages, + visual_extraction=self.visual, + whisper_model=self.whisper_model, + ) + + videos: list[VideoInfo] = [] + warnings: list[str] = [] + errors: list[dict] = [] + + # Determine source URLs + urls_or_paths = [] + if self.config.get("playlist"): + logger.info("Resolving playlist...") + try: + check_video_dependencies() + urls_or_paths = resolve_playlist(self.config["playlist"]) + logger.info(f"Found {len(urls_or_paths)} videos in playlist") + except Exception as e: + errors.append({"source": self.config["playlist"], "error": str(e)}) + logger.error(f"Failed to resolve playlist: {e}") + elif self.config.get("url"): + urls_or_paths = [self.config["url"]] + elif self.config.get("video_file"): + urls_or_paths = [self.config["video_file"]] + + # Process each video + for i, source in enumerate(urls_or_paths): + logger.info(f"[{i + 1}/{len(urls_or_paths)}] Processing: {source}") + try: + source_type = detect_video_source_type(source) + + # Extract metadata + if source_type == VideoSourceType.YOUTUBE: + check_video_dependencies() + video_info = extract_youtube_metadata(source) + else: + video_info = extract_local_metadata(source) + + # Extract transcript + transcript_segments, transcript_source = get_transcript(video_info, source_config) + video_info.raw_transcript = transcript_segments + video_info.transcript_source = transcript_source + + if not transcript_segments: + warnings.append(f"No transcript available for '{video_info.title}'") + + # Compute transcript confidence + if transcript_segments: + video_info.transcript_confidence = sum( + s.confidence for s in transcript_segments + ) / len(transcript_segments) + + if transcript_source == TranscriptSource.YOUTUBE_AUTO: + video_info.transcript_confidence *= 0.8 + + # Segment video + segments = segment_video(video_info, transcript_segments, source_config) + video_info.segments = segments + + # Visual extraction (Tier 2) + if self.visual: + from skill_seekers.cli.video_visual import ( + download_video, + extract_visual_data, + ) + + video_path = video_info.file_path + temp_video_dir = None + + # Download if remote (YouTube/Vimeo) + if not video_path or not os.path.exists(video_path): + import tempfile as _tmpmod + + temp_video_dir = _tmpmod.mkdtemp(prefix="ss_video_") + video_path = download_video(source, temp_video_dir) + + if video_path and os.path.exists(video_path): + keyframes, code_blocks, timeline = extract_visual_data( + video_path, + segments, + self.skill_dir, + sample_interval=self.visual_interval, + min_gap=self.visual_min_gap, + similarity_threshold=self.visual_similarity, + use_vision_api=self.vision_ocr, + ) + # Attach keyframes to segments + for kf in keyframes: + for seg in segments: + if seg.start_time <= kf.timestamp < seg.end_time: + seg.keyframes.append(kf) + break + # Assign code blocks to segments by timestamp + for cb in code_blocks: + for seg in segments: + if seg.start_time <= cb.source_frame < seg.end_time: + seg.detected_code_blocks.append(cb) + seg.has_code_on_screen = True + break + # Set timeline and build audio-visual alignments + video_info.text_group_timeline = timeline + if timeline: + video_info.audio_visual_alignments = _build_audio_visual_alignments( + timeline, video_info.raw_transcript + ) + logger.info( + f" Visual: {len(keyframes)} keyframes extracted, " + f"{sum(1 for kf in keyframes if kf.ocr_text)} with OCR text, " + f"{len(code_blocks)} code blocks detected" + ) + else: + warnings.append(f"Could not download video for visual extraction: {source}") + + # Clean up temp download + if temp_video_dir: + import shutil + + shutil.rmtree(temp_video_dir, ignore_errors=True) + + # Set processing metadata + video_info.extracted_at = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + video_info.visual_extraction_enabled = self.visual + video_info.processing_time_seconds = time.time() - start_time + + videos.append(video_info) + visual_msg = "" + if self.visual: + total_kf = sum(len(s.keyframes) for s in segments) + total_ocr = sum(1 for s in segments for kf in s.keyframes if kf.ocr_text) + visual_msg = f", {total_kf} keyframes, {total_ocr} with OCR" + logger.info( + f" => {len(segments)} segments, " + f"{len(transcript_segments)} transcript chunks, " + f"source: {transcript_source.value}{visual_msg}" + ) + + except Exception as e: + errors.append({"source": source, "error": str(e)}) + logger.error(f"Failed to process {source}: {e}") + logger.debug("Traceback:", exc_info=True) + + # Build result + total_duration = sum(v.duration for v in videos) + total_segments = sum(len(v.segments) for v in videos) + total_code_blocks = sum( + sum(len(s.detected_code_blocks) for s in v.segments) for v in videos + ) + + self.result = VideoScraperResult( + videos=videos, + total_duration_seconds=total_duration, + total_segments=total_segments, + total_code_blocks=total_code_blocks, + config=source_config, + processing_time_seconds=time.time() - start_time, + warnings=warnings, + errors=errors, + ) + + return self.result + + def save_extracted_data(self) -> str: + """Save extracted data to JSON file. + + Returns: + Path to saved JSON file. + """ + if self.result is None: + raise RuntimeError("No data to save. Run process() first.") + + os.makedirs(os.path.dirname(self.data_file) or ".", exist_ok=True) + with open(self.data_file, "w", encoding="utf-8") as f: + json.dump(self.result.to_dict(), f, indent=2, ensure_ascii=False) + + logger.info(f"Saved extracted data to {self.data_file}") + return self.data_file + + def load_extracted_data(self, json_path: str) -> None: + """Load previously extracted data from JSON. + + Args: + json_path: Path to extracted JSON file. + """ + with open(json_path, encoding="utf-8") as f: + data = json.load(f) + self.result = VideoScraperResult.from_dict(data) + logger.info(f"Loaded {len(self.result.videos)} videos from {json_path}") + + def build_skill(self) -> str: + """Build skill directory with SKILL.md and reference files. + + Returns: + Path to skill directory. + """ + if self.result is None: + raise RuntimeError( + "No data to build from. Run process() or load_extracted_data() first." + ) + + # Create directories + refs_dir = os.path.join(self.skill_dir, "references") + video_data_dir = os.path.join(self.skill_dir, "video_data") + os.makedirs(refs_dir, exist_ok=True) + os.makedirs(video_data_dir, exist_ok=True) + + # Generate reference files for each video + for video in self.result.videos: + ref_filename = f"video_{_sanitize_filename(video.title)}.md" + ref_path = os.path.join(refs_dir, ref_filename) + ref_content = self._generate_reference_md(video) + with open(ref_path, "w", encoding="utf-8") as f: + f.write(ref_content) + + # Save metadata JSON + metadata_path = os.path.join(video_data_dir, "metadata.json") + with open(metadata_path, "w", encoding="utf-8") as f: + json.dump(self.result.to_dict(), f, indent=2, ensure_ascii=False) + + # Generate SKILL.md + skill_md = self._generate_skill_md() + skill_path = os.path.join(self.skill_dir, "SKILL.md") + with open(skill_path, "w", encoding="utf-8") as f: + f.write(skill_md) + + logger.info(f"Built skill at {self.skill_dir}") + logger.info(f" {len(self.result.videos)} videos, {self.result.total_segments} segments") + return self.skill_dir + + def _generate_reference_md(self, video: VideoInfo) -> str: + """Generate reference markdown file for a single video.""" + lines = [] + + # Title + lines.append(f"# {video.title}\n") + + # Metadata block + meta_parts = [] + if video.channel_name: + if video.channel_url: + meta_parts.append(f"**Source:** [{video.channel_name}]({video.channel_url})") + else: + meta_parts.append(f"**Source:** {video.channel_name}") + if video.duration > 0: + meta_parts.append(f"**Duration:** {_format_duration(video.duration)}") + if video.upload_date: + meta_parts.append(f"**Published:** {video.upload_date}") + + if meta_parts: + lines.append("> " + " | ".join(meta_parts)) + + if video.source_url: + lines.append(f"> **URL:** [{video.source_url}]({video.source_url})") + + engagement_parts = [] + if video.view_count is not None: + engagement_parts.append(f"**Views:** {_format_count(video.view_count)}") + if video.like_count is not None: + engagement_parts.append(f"**Likes:** {_format_count(video.like_count)}") + if engagement_parts: + lines.append("> " + " | ".join(engagement_parts)) + + if video.tags: + lines.append(f"> **Tags:** {', '.join(video.tags[:10])}") + + lines.append("") + + # Description summary + if video.description: + desc = video.description[:300] + if len(video.description) > 300: + desc += "..." + lines.append(desc) + lines.append("") + + lines.append("---\n") + + # Table of contents (from chapters or segments) + if video.segments: + lines.append("## Table of Contents\n") + for seg in video.segments: + label = seg.chapter_title or f"Segment {seg.index + 1}" + lines.append( + f"- [{label}](#{_sanitize_filename(label)}-{seg.timestamp_display.replace(' ', '')})" + ) + lines.append("\n---\n") + + # Segments as sections + for seg in video.segments: + lines.append(seg.content) + + # Visual data (keyframes + OCR) + if seg.keyframes: + for kf in seg.keyframes: + if kf.image_path and os.path.exists(kf.image_path): + rel_path = os.path.relpath( + kf.image_path, + os.path.dirname(os.path.join(self.skill_dir, "references", "x.md")), + ) + lines.append( + f"\n> **Frame** ({kf.frame_type.value} at {_format_duration(kf.timestamp)}):" + ) + lines.append(f"> ![keyframe]({rel_path})") + if kf.sub_sections: + from skill_seekers.cli.video_models import FrameType + + lang_hint = "" + if seg.detected_code_blocks: + for cb in seg.detected_code_blocks: + if cb.language: + lang_hint = cb.language + break + for ss in kf.sub_sections: + if ( + ss.frame_type in (FrameType.CODE_EDITOR, FrameType.TERMINAL) + and ss.ocr_text + ): + lines.append(f"\n```{lang_hint}") + lines.append(ss.ocr_text) + lines.append("```") + elif kf.ocr_text: + from skill_seekers.cli.video_models import FrameType + + if kf.frame_type in (FrameType.CODE_EDITOR, FrameType.TERMINAL): + lang_hint = "" + if seg.detected_code_blocks: + for cb in seg.detected_code_blocks: + if cb.language: + lang_hint = cb.language + break + lines.append(f"\n```{lang_hint}") + lines.append(kf.ocr_text) + lines.append("```") + elif kf.frame_type == FrameType.SLIDE: + for text_line in kf.ocr_text.split("\n"): + if text_line.strip(): + lines.append(f"> {text_line}") + else: + lines.append(f"> **On-screen text:** {kf.ocr_text}") + + # Detected code blocks subsection + if seg.detected_code_blocks: + lines.append("\n#### Detected Code\n") + for cb in seg.detected_code_blocks: + lang_label = cb.language or "unknown" + context_label = cb.context.value if cb.context else "unknown" + lines.append( + f"**{lang_label}** ({context_label} at " + f"{_format_duration(cb.source_frame)}):\n" + ) + lines.append(f"```{cb.language or ''}") + lines.append(cb.code) + lines.append("```\n") + + lines.append("\n---\n") + + # Code Timeline section (from text groups) + if video.text_group_timeline and video.text_group_timeline.text_groups: + tl = video.text_group_timeline + lines.append("\n## Code Timeline\n") + lines.append( + f"> {tl.total_groups} code groups tracked, " + f"{tl.total_edits} edits detected, " + f"{tl.total_code_time:.0f}s of on-screen code\n" + ) + + for group in tl.text_groups: + lang_hint = group.detected_language or "" + lines.append(f"### {group.group_id}") + appearance_strs = [] + for start, end in group.appearances: + appearance_strs.append(f"{_format_duration(start)} - {_format_duration(end)}") + lines.append(f"**Appearances:** {', '.join(appearance_strs)}\n") + + lines.append(f"```{lang_hint}") + lines.append(group.full_text) + lines.append("```\n") + + if group.edits: + lines.append("**Edits:**\n") + for edit in group.edits: + lines.append(f"- At {_format_duration(edit.timestamp)}:") + for line in edit.added_lines: + lines.append(f" + `{line}`") + for line in edit.removed_lines: + lines.append(f" - `{line}`") + for mod in edit.modified_lines: + lines.append( + f" ~ L{mod.get('line_num', '?')}: " + f"`{mod.get('old', '')}` → `{mod.get('new', '')}`" + ) + lines.append("") + + lines.append("---\n") + + # Audio-Visual Alignment section + if video.audio_visual_alignments: + lines.append("\n## Audio-Visual Alignment\n") + lines.append(f"> {len(video.audio_visual_alignments)} code-narration pairs\n") + + for av in video.audio_visual_alignments: + lang_hint = av.language or "" + lines.append( + f"**{av.text_group_id}** " + f"({_format_duration(av.start_time)} - {_format_duration(av.end_time)})\n" + ) + lines.append(f"```{lang_hint}") + lines.append(av.on_screen_code) + lines.append("```\n") + lines.append(f"> **Narrator:** {av.transcript_during}\n") + + lines.append("---\n") + + # Transcript source info + lines.append(f"\n*Transcript source: {video.transcript_source.value}*") + if video.transcript_confidence > 0: + lines.append(f"*Confidence: {video.transcript_confidence:.0%}*") + + return "\n".join(lines) + + def _generate_skill_md(self) -> str: + """Generate the main SKILL.md file.""" + lines = [] + desc = self.description or infer_description_from_video( + self.result.videos[0] + if self.result.videos + else VideoInfo(video_id="none", source_type=VideoSourceType.YOUTUBE), + self.name, + ) + + lines.append(f"# {self.name}\n") + lines.append(f"{desc}\n") + + # Overview + total_dur = _format_duration(self.result.total_duration_seconds) + lines.append("## Overview\n") + overview = ( + f"This skill includes knowledge extracted from " + f"{len(self.result.videos)} video(s) totaling {total_dur} of content." + ) + # Visual extraction summary + total_kf = sum( + len(kf) for v in self.result.videos for s in v.segments for kf in [s.keyframes] + ) + total_ocr = sum( + 1 for v in self.result.videos for s in v.segments for kf in s.keyframes if kf.ocr_text + ) + total_code = sum( + len(s.detected_code_blocks) for v in self.result.videos for s in v.segments + ) + if total_kf > 0: + overview += ( + f"\nVisual extraction: {total_kf} keyframes, {total_ocr} with on-screen text" + ) + if total_code > 0: + overview += f", {total_code} code blocks detected" + overview += "." + lines.append(f"{overview}\n") + + # Video tutorials section + lines.append("## Video Tutorials\n") + + for video in self.result.videos: + lines.append(f"### {video.title}") + meta = [] + if video.channel_name: + if video.source_url: + meta.append(f"[{video.channel_name}]({video.source_url})") + else: + meta.append(video.channel_name) + if video.duration > 0: + meta.append(_format_duration(video.duration)) + if video.view_count is not None: + meta.append(f"{_format_count(video.view_count)} views") + if meta: + lines.append(f"**Source:** {' | '.join(meta)}\n") + + # Topics covered + topics = [s.chapter_title for s in video.segments if s.chapter_title] + if topics: + lines.append(f"**Topics covered:** {', '.join(topics)}\n") + + # First segment preview + if video.segments and video.segments[0].transcript: + preview = video.segments[0].transcript[:200] + if len(video.segments[0].transcript) > 200: + preview += "..." + lines.append(f"{preview}\n") + + ref_filename = f"video_{_sanitize_filename(video.title)}.md" + lines.append( + f"> Full transcript: [references/{ref_filename}](references/{ref_filename})\n" + ) + lines.append("---\n") + + # Warnings + if self.result.warnings: + lines.append("## Notes\n") + for warning in self.result.warnings: + lines.append(f"- {warning}") + lines.append("") + + # References + lines.append("## References\n") + for video in self.result.videos: + ref_filename = f"video_{_sanitize_filename(video.title)}.md" + lines.append(f"- [{video.title}](references/{ref_filename})") + + return "\n".join(lines) + + +# ============================================================================= +# CLI Entry Point +# ============================================================================= + + +def main() -> int: + """Entry point for video scraper CLI. + + Returns: + Exit code (0 for success, non-zero for error). + """ + from skill_seekers.cli.arguments.video import add_video_arguments + + parser = argparse.ArgumentParser( + prog="skill-seekers-video", + description="Extract transcripts and metadata from videos and generate skill", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog="""\ +Examples: + skill-seekers video --url https://www.youtube.com/watch?v=... + skill-seekers video --video-file recording.mp4 + skill-seekers video --playlist https://www.youtube.com/playlist?list=... + skill-seekers video --from-json video_extracted.json + skill-seekers video --url https://youtu.be/... --languages en,es +""", + ) + + add_video_arguments(parser) + args = parser.parse_args() + + # Setup logging + log_level = logging.DEBUG if args.verbose else (logging.WARNING if args.quiet else logging.INFO) + logging.basicConfig(level=log_level, format="%(levelname)s: %(message)s") + + # Validate inputs + has_source = any( + [ + getattr(args, "url", None), + getattr(args, "video_file", None), + getattr(args, "playlist", None), + ] + ) + has_json = getattr(args, "from_json", None) + + if not has_source and not has_json: + parser.error("Must specify --url, --video-file, --playlist, or --from-json") + + # Build config + config = { + "name": args.name or "video_skill", + "description": getattr(args, "description", None) or "", + "output": getattr(args, "output", None), + "url": getattr(args, "url", None), + "video_file": getattr(args, "video_file", None), + "playlist": getattr(args, "playlist", None), + "languages": getattr(args, "languages", "en"), + "visual": getattr(args, "visual", False), + "whisper_model": getattr(args, "whisper_model", "base"), + "visual_interval": getattr(args, "visual_interval", 0.7), + "visual_min_gap": getattr(args, "visual_min_gap", 0.5), + "visual_similarity": getattr(args, "visual_similarity", 3.0), + "vision_ocr": getattr(args, "vision_ocr", False), + } + + converter = VideoToSkillConverter(config) + + # Dry run + if args.dry_run: + logger.info("DRY RUN — would process:") + for key in ["url", "video_file", "playlist"]: + if config.get(key): + logger.info(f" {key}: {config[key]}") + logger.info(f" name: {config['name']}") + logger.info(f" languages: {config['languages']}") + logger.info(f" visual: {config['visual']}") + return 0 + + # Workflow 1: Build from JSON + if has_json: + logger.info(f"Loading extracted data from {args.from_json}") + converter.load_extracted_data(args.from_json) + converter.build_skill() + logger.info(f"Skill built at {converter.skill_dir}") + return 0 + + # Workflow 2: Full extraction + build + try: + result = converter.process() + if not result.videos: + logger.error("No videos were successfully processed") + if result.errors: + for err in result.errors: + logger.error(f" {err['source']}: {err['error']}") + return 1 + + converter.save_extracted_data() + converter.build_skill() + + logger.info(f"\nSkill built successfully at {converter.skill_dir}") + logger.info(f" Videos: {len(result.videos)}") + logger.info(f" Segments: {result.total_segments}") + logger.info(f" Duration: {_format_duration(result.total_duration_seconds)}") + logger.info(f" Processing time: {result.processing_time_seconds:.1f}s") + + if result.warnings: + for w in result.warnings: + logger.warning(f" {w}") + + except RuntimeError as e: + logger.error(str(e)) + return 1 + + # Enhancement + enhance_level = getattr(args, "enhance_level", 0) + if enhance_level > 0: + # Auto-inject video-tutorial workflow if no workflow specified + if not getattr(args, "enhance_workflow", None): + args.enhance_workflow = ["video-tutorial"] + + # Run workflow stages (specialized video analysis) + try: + from skill_seekers.cli.workflow_runner import run_workflows + + video_context = { + "skill_name": converter.name, + "skill_dir": converter.skill_dir, + "source_type": "video_tutorial", + } + run_workflows(args, context=video_context) + except ImportError: + logger.debug("Workflow runner not available, skipping workflow stages") + + # Run traditional SKILL.md enhancement (reads references + rewrites) + _run_video_enhancement(converter.skill_dir, enhance_level, args) + + return 0 + + +def _run_video_enhancement(skill_dir: str, enhance_level: int, args) -> None: + """Run traditional SKILL.md enhancement with video-aware prompt. + + This calls the same SkillEnhancer used by other scrapers, but the prompt + auto-detects video_tutorial source type and uses a video-specific prompt. + """ + import os + import subprocess + + has_api_key = bool( + os.environ.get("ANTHROPIC_API_KEY") + or os.environ.get("ANTHROPIC_AUTH_TOKEN") + or getattr(args, "api_key", None) + ) + + if not has_api_key: + logger.info("\n💡 Enhance your video skill with AI:") + logger.info(f" export ANTHROPIC_API_KEY=sk-ant-...") + logger.info(f" skill-seekers enhance {skill_dir} --enhance-level {enhance_level}") + return + + logger.info(f"\n🤖 Running video-aware SKILL.md enhancement (level {enhance_level})...") + + try: + enhance_cmd = ["skill-seekers-enhance", skill_dir] + enhance_cmd.extend(["--enhance-level", str(enhance_level)]) + api_key = getattr(args, "api_key", None) + if api_key: + enhance_cmd.extend(["--api-key", api_key]) + + result = subprocess.run(enhance_cmd, check=True) + if result.returncode == 0: + logger.info("✅ Video skill enhancement complete!") + except subprocess.CalledProcessError: + logger.warning("⚠ Enhancement failed, but skill was still built") + except FileNotFoundError: + logger.warning("⚠ skill-seekers-enhance not found. Run manually:") + logger.info(f" skill-seekers enhance {skill_dir} --enhance-level {enhance_level}") + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/skill_seekers/cli/video_segmenter.py b/src/skill_seekers/cli/video_segmenter.py new file mode 100644 index 0000000..a914a28 --- /dev/null +++ b/src/skill_seekers/cli/video_segmenter.py @@ -0,0 +1,218 @@ +"""Video segmentation module. + +Aligns transcript + metadata into VideoSegment objects using: +1. Chapter-based segmentation (primary — uses YouTube chapters) +2. Time-window segmentation (fallback — fixed-duration windows) +""" + +import logging + +from skill_seekers.cli.video_models import ( + SegmentContentType, + TranscriptSegment, + VideoInfo, + VideoSegment, + VideoSourceConfig, +) + +logger = logging.getLogger(__name__) + + +def _classify_content_type(transcript: str) -> SegmentContentType: + """Classify segment content type based on transcript text.""" + lower = transcript.lower() + + code_indicators = ["import ", "def ", "class ", "function ", "const ", "npm ", "pip ", "git "] + intro_indicators = ["welcome", "hello", "today we", "in this video", "let's get started"] + outro_indicators = ["thanks for watching", "subscribe", "see you next", "that's it for"] + + if any(kw in lower for kw in outro_indicators): + return SegmentContentType.OUTRO + if any(kw in lower for kw in intro_indicators): + return SegmentContentType.INTRO + if sum(1 for kw in code_indicators if kw in lower) >= 2: + return SegmentContentType.LIVE_CODING + + return SegmentContentType.EXPLANATION + + +def _build_segment_content( + transcript: str, + chapter_title: str | None, + start_time: float, + end_time: float, +) -> str: + """Build merged content string for a segment.""" + parts = [] + + # Add chapter heading + start_min, start_sec = divmod(int(start_time), 60) + end_min, end_sec = divmod(int(end_time), 60) + ts = f"{start_min:02d}:{start_sec:02d} - {end_min:02d}:{end_sec:02d}" + + if chapter_title: + parts.append(f"### {chapter_title} ({ts})\n") + else: + parts.append(f"### Segment ({ts})\n") + + if transcript: + parts.append(transcript) + + return "\n".join(parts) + + +def _get_transcript_in_range( + transcript_segments: list[TranscriptSegment], + start_time: float, + end_time: float, +) -> tuple[str, float]: + """Get concatenated transcript text and average confidence for a time range. + + Returns: + Tuple of (text, avg_confidence). + """ + texts = [] + confidences = [] + + for seg in transcript_segments: + # Check overlap: segment overlaps with time range + if seg.end > start_time and seg.start < end_time: + texts.append(seg.text) + confidences.append(seg.confidence) + + text = " ".join(texts) + avg_confidence = sum(confidences) / len(confidences) if confidences else 0.0 + return text, avg_confidence + + +def segment_by_chapters( + video_info: VideoInfo, + transcript_segments: list[TranscriptSegment], +) -> list[VideoSegment]: + """Segment video using YouTube chapter boundaries. + + Args: + video_info: Video metadata with chapters. + transcript_segments: Raw transcript segments. + + Returns: + List of VideoSegment objects aligned to chapters. + """ + segments = [] + + for i, chapter in enumerate(video_info.chapters): + transcript, confidence = _get_transcript_in_range( + transcript_segments, chapter.start_time, chapter.end_time + ) + + content_type = _classify_content_type(transcript) + content = _build_segment_content( + transcript, chapter.title, chapter.start_time, chapter.end_time + ) + + segments.append( + VideoSegment( + index=i, + start_time=chapter.start_time, + end_time=chapter.end_time, + duration=chapter.end_time - chapter.start_time, + transcript=transcript, + transcript_confidence=confidence, + chapter_title=chapter.title, + content=content, + confidence=confidence, + content_type=content_type, + ) + ) + + return segments + + +def segment_by_time_window( + video_info: VideoInfo, + transcript_segments: list[TranscriptSegment], + window_seconds: float = 120.0, +) -> list[VideoSegment]: + """Segment video using fixed time windows. + + Args: + video_info: Video metadata. + transcript_segments: Raw transcript segments. + window_seconds: Duration of each window in seconds. + + Returns: + List of VideoSegment objects. + """ + segments = [] + duration = video_info.duration + + if duration <= 0 and transcript_segments: + duration = max(seg.end for seg in transcript_segments) + + if duration <= 0: + return segments + + current_time = 0.0 + index = 0 + + while current_time < duration: + end_time = min(current_time + window_seconds, duration) + + transcript, confidence = _get_transcript_in_range( + transcript_segments, current_time, end_time + ) + + if transcript.strip(): + content_type = _classify_content_type(transcript) + content = _build_segment_content(transcript, None, current_time, end_time) + + segments.append( + VideoSegment( + index=index, + start_time=current_time, + end_time=end_time, + duration=end_time - current_time, + transcript=transcript, + transcript_confidence=confidence, + content=content, + confidence=confidence, + content_type=content_type, + ) + ) + index += 1 + + current_time = end_time + + return segments + + +def segment_video( + video_info: VideoInfo, + transcript_segments: list[TranscriptSegment], + config: VideoSourceConfig, +) -> list[VideoSegment]: + """Segment a video using the best available strategy. + + Priority: + 1. Chapter-based (if chapters available) + 2. Time-window fallback + + Args: + video_info: Video metadata. + transcript_segments: Raw transcript segments. + config: Video source configuration. + + Returns: + List of VideoSegment objects. + """ + # Use chapters if available + if video_info.chapters: + logger.info(f"Using chapter-based segmentation ({len(video_info.chapters)} chapters)") + segments = segment_by_chapters(video_info, transcript_segments) + if segments: + return segments + + # Fallback to time-window + window = config.time_window_seconds + logger.info(f"Using time-window segmentation ({window}s windows)") + return segment_by_time_window(video_info, transcript_segments, window) diff --git a/src/skill_seekers/cli/video_transcript.py b/src/skill_seekers/cli/video_transcript.py new file mode 100644 index 0000000..c527fac --- /dev/null +++ b/src/skill_seekers/cli/video_transcript.py @@ -0,0 +1,370 @@ +"""Video transcript extraction module. + +Handles all transcript acquisition: +- YouTube captions via youtube-transcript-api (Tier 1) +- Subtitle file parsing: SRT and VTT (Tier 1) +- Whisper ASR stub (Tier 2 — raises ImportError with install instructions) +""" + +import logging +import re +from pathlib import Path + +from skill_seekers.cli.video_models import ( + TranscriptSegment, + TranscriptSource, + VideoInfo, + VideoSourceConfig, + VideoSourceType, +) + +logger = logging.getLogger(__name__) + +# Optional dependency: youtube-transcript-api +try: + from youtube_transcript_api import YouTubeTranscriptApi + + HAS_YOUTUBE_TRANSCRIPT = True +except ImportError: + HAS_YOUTUBE_TRANSCRIPT = False + +# Optional dependency: faster-whisper (Tier 2) +try: + from faster_whisper import WhisperModel # noqa: F401 + + HAS_WHISPER = True +except ImportError: + HAS_WHISPER = False + + +# ============================================================================= +# YouTube Transcript Extraction (Tier 1) +# ============================================================================= + + +def extract_youtube_transcript( + video_id: str, + languages: list[str] | None = None, +) -> tuple[list[TranscriptSegment], TranscriptSource]: + """Fetch YouTube captions via youtube-transcript-api. + + Args: + video_id: YouTube video ID (11 chars). + languages: Language preference list (e.g., ['en', 'tr']). + + Returns: + Tuple of (transcript segments, source type). + + Raises: + RuntimeError: If youtube-transcript-api is not installed. + """ + if not HAS_YOUTUBE_TRANSCRIPT: + raise RuntimeError( + "youtube-transcript-api is required for YouTube transcript extraction.\n" + 'Install with: pip install "skill-seekers[video]"\n' + "Or: pip install youtube-transcript-api" + ) + + if languages is None: + languages = ["en"] + + try: + ytt_api = YouTubeTranscriptApi() + transcript = ytt_api.fetch(video_id, languages=languages) + + segments = [] + source = TranscriptSource.YOUTUBE_MANUAL + for snippet in transcript.snippets: + text = snippet.text.strip() + if not text: + continue + start = snippet.start + duration = snippet.duration + segments.append( + TranscriptSegment( + text=text, + start=start, + end=start + duration, + confidence=1.0, + source=source, + ) + ) + + if not segments: + return [], TranscriptSource.NONE + + return segments, source + + except Exception as e: + logger.warning(f"Failed to fetch YouTube transcript for {video_id}: {e}") + return [], TranscriptSource.NONE + + +# ============================================================================= +# Subtitle File Parsing (Tier 1) +# ============================================================================= + + +def _parse_timestamp_srt(ts: str) -> float: + """Parse SRT timestamp (HH:MM:SS,mmm) to seconds.""" + ts = ts.strip().replace(",", ".") + parts = ts.split(":") + if len(parts) == 3: + h, m, s = parts + return int(h) * 3600 + int(m) * 60 + float(s) + return 0.0 + + +def _parse_timestamp_vtt(ts: str) -> float: + """Parse VTT timestamp (HH:MM:SS.mmm or MM:SS.mmm) to seconds.""" + ts = ts.strip() + parts = ts.split(":") + if len(parts) == 3: + h, m, s = parts + return int(h) * 3600 + int(m) * 60 + float(s) + elif len(parts) == 2: + m, s = parts + return int(m) * 60 + float(s) + return 0.0 + + +def parse_srt(path: str) -> list[TranscriptSegment]: + """Parse an SRT subtitle file into TranscriptSegments. + + Args: + path: Path to .srt file. + + Returns: + List of TranscriptSegment objects. + """ + content = Path(path).read_text(encoding="utf-8", errors="replace") + segments = [] + + # SRT format: index\nstart --> end\ntext\n\n + blocks = re.split(r"\n\s*\n", content.strip()) + for block in blocks: + lines = block.strip().split("\n") + if len(lines) < 2: + continue + + # Find the timestamp line (contains -->) + ts_line = None + text_lines = [] + for line in lines: + if "-->" in line: + ts_line = line + elif ts_line is not None: + text_lines.append(line) + + if ts_line is None: + continue + + parts = ts_line.split("-->") + if len(parts) != 2: + continue + + start = _parse_timestamp_srt(parts[0]) + end = _parse_timestamp_srt(parts[1]) + text = " ".join(text_lines).strip() + + # Remove HTML tags + text = re.sub(r"<[^>]+>", "", text) + + if text: + segments.append( + TranscriptSegment( + text=text, + start=start, + end=end, + confidence=1.0, + source=TranscriptSource.SUBTITLE_FILE, + ) + ) + + return segments + + +def parse_vtt(path: str) -> list[TranscriptSegment]: + """Parse a WebVTT subtitle file into TranscriptSegments. + + Args: + path: Path to .vtt file. + + Returns: + List of TranscriptSegment objects. + """ + content = Path(path).read_text(encoding="utf-8", errors="replace") + segments = [] + + # Skip VTT header + lines = content.strip().split("\n") + i = 0 + # Skip WEBVTT header and any metadata + while i < len(lines) and not re.match(r"\d{2}:\d{2}", lines[i]): + i += 1 + + current_text_lines = [] + current_start = 0.0 + current_end = 0.0 + in_cue = False + + while i < len(lines): + line = lines[i].strip() + i += 1 + + if "-->" in line: + # Save previous cue + if in_cue and current_text_lines: + text = " ".join(current_text_lines).strip() + text = re.sub(r"<[^>]+>", "", text) + if text: + segments.append( + TranscriptSegment( + text=text, + start=current_start, + end=current_end, + confidence=1.0, + source=TranscriptSource.SUBTITLE_FILE, + ) + ) + + parts = line.split("-->") + current_start = _parse_timestamp_vtt(parts[0]) + current_end = _parse_timestamp_vtt(parts[1].split()[0]) + current_text_lines = [] + in_cue = True + + elif line == "": + if in_cue and current_text_lines: + text = " ".join(current_text_lines).strip() + text = re.sub(r"<[^>]+>", "", text) + if text: + segments.append( + TranscriptSegment( + text=text, + start=current_start, + end=current_end, + confidence=1.0, + source=TranscriptSource.SUBTITLE_FILE, + ) + ) + current_text_lines = [] + in_cue = False + + elif in_cue: + # Skip cue identifiers (numeric lines before timestamps) + if not line.isdigit(): + current_text_lines.append(line) + + # Handle last cue + if in_cue and current_text_lines: + text = " ".join(current_text_lines).strip() + text = re.sub(r"<[^>]+>", "", text) + if text: + segments.append( + TranscriptSegment( + text=text, + start=current_start, + end=current_end, + confidence=1.0, + source=TranscriptSource.SUBTITLE_FILE, + ) + ) + + return segments + + +# ============================================================================= +# Whisper Stub (Tier 2) +# ============================================================================= + + +def transcribe_with_whisper( + audio_path: str, # noqa: ARG001 + model: str = "base", # noqa: ARG001 + language: str | None = None, # noqa: ARG001 +) -> list[TranscriptSegment]: + """Transcribe audio using faster-whisper (Tier 2). + + Raises: + RuntimeError: Always, unless faster-whisper is installed. + """ + if not HAS_WHISPER: + raise RuntimeError( + "faster-whisper is required for Whisper transcription.\n" + 'Install with: pip install "skill-seekers[video-full]"\n' + "Or: pip install faster-whisper" + ) + + # Tier 2 implementation placeholder + raise NotImplementedError("Whisper transcription will be implemented in Tier 2") + + +# ============================================================================= +# Main Entry Point +# ============================================================================= + + +def get_transcript( + video_info: VideoInfo, + config: VideoSourceConfig, +) -> tuple[list[TranscriptSegment], TranscriptSource]: + """Get transcript for a video, trying available methods in priority order. + + Priority: + 1. YouTube API (for YouTube videos) + 2. Subtitle files (SRT/VTT alongside local files) + 3. Whisper fallback (Tier 2) + 4. NONE (no transcript available) + + Args: + video_info: Video metadata. + config: Video source configuration. + + Returns: + Tuple of (transcript segments, source type). + """ + languages = config.languages or ["en"] + + # 1. Try YouTube API for YouTube videos + if video_info.source_type == VideoSourceType.YOUTUBE and HAS_YOUTUBE_TRANSCRIPT: + try: + segments, source = extract_youtube_transcript(video_info.video_id, languages) + if segments: + logger.info( + f"Got {len(segments)} transcript segments via YouTube API " + f"({source.value}) for '{video_info.title}'" + ) + return segments, source + except Exception as e: + logger.warning(f"YouTube transcript failed: {e}") + + # 2. Try subtitle files for local videos + if video_info.file_path: + base = Path(video_info.file_path).stem + parent = Path(video_info.file_path).parent + + for ext in [".srt", ".vtt"]: + sub_path = parent / f"{base}{ext}" + if sub_path.exists(): + logger.info(f"Found subtitle file: {sub_path}") + segments = parse_srt(str(sub_path)) if ext == ".srt" else parse_vtt(str(sub_path)) + if segments: + return segments, TranscriptSource.SUBTITLE_FILE + + # 3. Whisper fallback (Tier 2 — only if installed) + if HAS_WHISPER and video_info.file_path: + try: + segments = transcribe_with_whisper( + video_info.file_path, + model=config.whisper_model, + language=languages[0] if languages else None, + ) + if segments: + return segments, TranscriptSource.WHISPER + except (RuntimeError, NotImplementedError): + pass + + # 4. No transcript available + logger.warning(f"No transcript available for '{video_info.title}'") + return [], TranscriptSource.NONE diff --git a/src/skill_seekers/cli/video_visual.py b/src/skill_seekers/cli/video_visual.py new file mode 100644 index 0000000..be441ce --- /dev/null +++ b/src/skill_seekers/cli/video_visual.py @@ -0,0 +1,2182 @@ +"""Video visual extraction module (Tier 2). + +Extracts keyframes from videos, classifies them, and performs OCR +to extract text content from slides, code, and terminal screens. + +Dependencies (Tier 2): +- opencv-python-headless: Frame extraction and image analysis +- scenedetect: Scene boundary detection +- easyocr: Text recognition in frames +""" + +from __future__ import annotations + +import concurrent.futures +import difflib +import logging +import os +import tempfile +from dataclasses import dataclass, field + +from skill_seekers.cli.video_models import ( + CodeBlock, + CodeContext, + FrameSubSection, + FrameType, + KeyFrame, + OCRRegion, + TextGroup, + TextGroupEdit, + TextGroupTimeline, +) + +logger = logging.getLogger(__name__) + +# Tier 2 dependency flags +try: + import cv2 + + HAS_OPENCV = True +except ImportError: + cv2 = None # type: ignore[assignment] + HAS_OPENCV = False + +try: + import scenedetect as sd + + HAS_SCENEDETECT = True +except ImportError: + sd = None # type: ignore[assignment] + HAS_SCENEDETECT = False + +try: + import easyocr + + HAS_EASYOCR = True +except ImportError: + easyocr = None # type: ignore[assignment] + HAS_EASYOCR = False + +try: + import pytesseract + + HAS_PYTESSERACT = True +except ImportError: + pytesseract = None # type: ignore[assignment] + HAS_PYTESSERACT = False + + +_INSTALL_MSG = ( + "Visual extraction requires additional dependencies.\n" + 'Install with: pip install "skill-seekers[video-full]"\n' + "Or: pip install opencv-python-headless scenedetect easyocr" +) + +# Lazy-initialized EasyOCR reader (heavy, only load once) +_ocr_reader = None + + +def _get_ocr_reader(): + """Get or create the EasyOCR reader (lazy singleton).""" + global _ocr_reader + if _ocr_reader is None: + logger.info("Initializing OCR engine (first run may download models)...") + _ocr_reader = easyocr.Reader(["en"], gpu=False) + return _ocr_reader + + +def _detect_theme(gray_img) -> str: + """Detect 'dark' or 'light' theme from grayscale image. + + Uses median brightness: < 128 = dark theme, >= 128 = light theme. + """ + import numpy as np + + median = float(np.median(gray_img)) + return "dark" if median < 128 else "light" + + +def _preprocess_frame_for_ocr(frame_path: str, frame_type: FrameType) -> str: + """Apply frame-type-aware preprocessing before OCR. + + CODE_EDITOR/TERMINAL: COLOR inversion (preserves syntax highlighting) → + grayscale → aggressive upscale → CLAHE contrast enhancement. Produces + a high-res, high-contrast grayscale suitable for EasyOCR. + + SLIDE: mild sharpening. + Others: no preprocessing. + + Args: + frame_path: Path to the original frame image. + frame_type: Classification of the frame. + + Returns: + Path to the preprocessed image (may be a temp file or the original). + """ + if not HAS_OPENCV: + return frame_path + + import numpy as np + + if frame_type in (FrameType.CODE_EDITOR, FrameType.TERMINAL): + img = cv2.imread(frame_path) + if img is None: + return frame_path + + # 1. Theme detection on original grayscale + gray_check = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + theme = _detect_theme(gray_check) + + # 2. COLOR inversion on BGR — preserves syntax highlighting distinctions. + # Grayscale-then-invert loses the difference between blue/green/red text. + if theme == "dark": + img = cv2.bitwise_not(img) + + # 3. Convert inverted color to grayscale + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + + # 4. Aggressive upscale BEFORE any processing — OCR needs ~12px+ char height. + # Must be done on grayscale (not binary) for clean INTER_CUBIC interpolation. + h, w = gray.shape + if w < 1920: + scale = max(2, (1920 // w) + 1) + gray = cv2.resize(gray, (w * scale, h * scale), interpolation=cv2.INTER_CUBIC) + + # 5. CLAHE contrast enhancement — brings out faint text + clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) + gray = clahe.apply(gray) + + with tempfile.NamedTemporaryFile(suffix=".png", prefix="ocr_pre_", delete=False) as tmp: + tmp_path = tmp.name + cv2.imwrite(tmp_path, gray) + return tmp_path + + if frame_type == FrameType.SLIDE: + img = cv2.imread(frame_path) + if img is None: + return frame_path + kernel = np.array([[0, -0.5, 0], [-0.5, 3, -0.5], [0, -0.5, 0]]) + sharpened = cv2.filter2D(img, -1, kernel) + with tempfile.NamedTemporaryFile(suffix=".png", prefix="ocr_pre_", delete=False) as tmp: + tmp_path = tmp.name + cv2.imwrite(tmp_path, sharpened) + return tmp_path + + return frame_path + + +def _binarize_for_tesseract(grayscale_path: str) -> str: + """Produce a clean binary image from a preprocessed grayscale, for Tesseract. + + Pipeline: Gaussian blur → Otsu's threshold → morphological close. + Tesseract performs best on clean black-text-on-white binary images. + + Args: + grayscale_path: Path to a preprocessed grayscale image. + + Returns: + Path to the binary image (temp file). + """ + import numpy as np + + gray = cv2.imread(grayscale_path, cv2.IMREAD_GRAYSCALE) + if gray is None: + return grayscale_path + + # Gaussian blur to smooth noise before thresholding + blurred = cv2.GaussianBlur(gray, (3, 3), 0) + + # Otsu's binarization — globally optimal for bimodal (text vs background) + _, binary = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + + # Morphological close to fill small gaps in character strokes + kernel = np.ones((2, 2), np.uint8) + binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=1) + + with tempfile.NamedTemporaryFile(suffix=".png", prefix="ocr_bin_", delete=False) as tmp: + tmp_path = tmp.name + cv2.imwrite(tmp_path, binary) + return tmp_path + + +def _get_ocr_params(frame_type: FrameType) -> dict: + """Return EasyOCR readtext kwargs tuned per frame type. + + CODE_EDITOR/TERMINAL: lower thresholds, beam search, higher mag. + SLIDE/OTHER: defaults with greedy decoder. + """ + if frame_type in (FrameType.CODE_EDITOR, FrameType.TERMINAL): + return { + "text_threshold": 0.4, + "low_text": 0.3, + "contrast_ths": 0.3, + "mag_ratio": 1.0, # Frame already upscaled in preprocessing + "decoder": "beamsearch", + "beamWidth": 10, + } + if frame_type == FrameType.SLIDE: + return { + "text_threshold": 0.6, + "low_text": 0.4, + "mag_ratio": 1.0, + "decoder": "greedy", + "beamWidth": 5, + } + return { + "text_threshold": 0.6, + "low_text": 0.4, + "mag_ratio": 1.0, + "decoder": "greedy", + "beamWidth": 5, + } + + +_CODE_TOKENS = frozenset( + { + "func", + "var", + "def", + "class", + "return", + "if", + "for", + "while", + "import", + "from", + "const", + "let", + "function", + "extends", + "self", + "true", + "false", + "null", + "none", + "elif", + "else", + "try", + "except", + "async", + "await", + "yield", + "print", + "int", + "str", + "float", + "bool", + "=", + "(", + ")", + "{", + "}", + "[", + "]", + ":", + "->", + "=>", + "==", + "!=", + } +) + + +def _has_code_tokens(text: str) -> bool: + """Check if text contains recognizable code tokens.""" + lower = text.lower() + return any(token in lower for token in _CODE_TOKENS) + + +def _run_tesseract_ocr(preprocessed_path: str, frame_type: FrameType) -> list[tuple]: # noqa: ARG001 + """Run pytesseract on a preprocessed frame. + + Creates a binarized version of the preprocessed grayscale (Tesseract + performs best on clean binary images), then runs Tesseract with + ``--psm 4`` (single column of variable-size text) and LSTM engine. + + Returns results in the same format as EasyOCR: list of (bbox, text, confidence). + Groups words into lines by y-coordinate. + + Args: + preprocessed_path: Path to the preprocessed grayscale image. + frame_type: Frame classification (reserved for future per-type tuning). + """ + if not HAS_PYTESSERACT: + return [] + + # Produce clean binary for Tesseract + binary_path = _binarize_for_tesseract(preprocessed_path) + try: + data = pytesseract.image_to_data( + binary_path, + config="--psm 4 --oem 1", + output_type=pytesseract.Output.DICT, + ) + except Exception: # noqa: BLE001 + logger.debug("pytesseract failed, returning empty results") + return [] + finally: + if binary_path != preprocessed_path and os.path.exists(binary_path): + os.unlink(binary_path) + + # Collect words with valid confidence + words = [] + for i in range(len(data["text"])): + text = data["text"][i].strip() + conf = float(data["conf"][i]) + if not text or conf < 30: + continue + x = data["left"][i] + y = data["top"][i] + w = data["width"][i] + h = data["height"][i] + bbox = [[x, y], [x + w, y], [x + w, y + h], [x, y + h]] + words.append( + { + "bbox": bbox, + "text": text, + "conf": conf / 100.0, + "y_center": y + h / 2, + "line_num": data["line_num"][i], + "block_num": data["block_num"][i], + } + ) + + if not words: + return [] + + # Group by (block_num, line_num) to form lines + line_groups: dict[tuple[int, int], list[dict]] = {} + for w in words: + key = (w["block_num"], w["line_num"]) + line_groups.setdefault(key, []).append(w) + + results = [] + for _key, line_words in sorted(line_groups.items()): + line_words.sort(key=lambda w: w["bbox"][0][0]) + line_text = " ".join(w["text"] for w in line_words) + avg_conf = sum(w["conf"] for w in line_words) / len(line_words) + + # Build bounding box for the whole line + x_min = min(w["bbox"][0][0] for w in line_words) + y_min = min(w["bbox"][0][1] for w in line_words) + x_max = max(w["bbox"][1][0] for w in line_words) + y_max = max(w["bbox"][2][1] for w in line_words) + bbox = [[x_min, y_min], [x_max, y_min], [x_max, y_max], [x_min, y_max]] + + results.append((bbox, line_text, avg_conf)) + + return results + + +def _run_multi_engine_ocr( + frame_path: str, + frame_type: FrameType, +) -> tuple[list[tuple], str]: + """Run multiple OCR engines and ensemble the results. + + Strategy: + 1. Preprocess the frame (inversion + binarization for code frames). + 2. Run EasyOCR on the preprocessed image. + 3. Run pytesseract on the preprocessed image. + 4. For each y-bucket line, pick the engine result with higher confidence. + 5. Prefer results that contain recognizable code tokens. + + Returns: + Tuple of (raw_results, flat_text). + """ + preprocessed_path = _preprocess_frame_for_ocr(frame_path, frame_type) + try: + return _ensemble_ocr_results(preprocessed_path, frame_type) + finally: + if preprocessed_path != frame_path and os.path.exists(preprocessed_path): + os.unlink(preprocessed_path) + + +def _ensemble_ocr_results( + preprocessed_path: str, + frame_type: FrameType, +) -> tuple[list[tuple], str]: + """Run EasyOCR + pytesseract and merge results by y-bucket.""" + # Run EasyOCR + easy_results: list[tuple] = [] + if HAS_EASYOCR: + try: + reader = _get_ocr_reader() + ocr_params = _get_ocr_params(frame_type) + raw = reader.readtext(preprocessed_path, detail=1, paragraph=False, **ocr_params) + easy_results = [ + (bbox, text.strip(), conf) + for bbox, text, conf in raw + if conf >= 0.3 and text.strip() + ] + except Exception: # noqa: BLE001 + logger.debug("EasyOCR failed in multi-engine pipeline") + + # Run pytesseract + tess_results = _run_tesseract_ocr(preprocessed_path, frame_type) + + if not easy_results and not tess_results: + return [], "" + if not easy_results: + flat = " ".join(text for _, text, _ in tess_results) + return tess_results, flat + if not tess_results: + flat = " ".join(text for _, text, _ in easy_results) + return easy_results, flat + + # Merge by y-bucket: for each line, pick the better engine result + merged = _merge_by_y_bucket(easy_results, tess_results) + flat = " ".join(text for _, text, _ in merged) + return merged, flat + + +def _merge_by_y_bucket( + easy_results: list[tuple], + tess_results: list[tuple], + y_tolerance: float = 20.0, +) -> list[tuple]: + """Merge two sets of OCR results by matching y-coordinate lines. + + For each y-bucket, picks the result with higher confidence, + with a preference for results containing code tokens. + """ + + def _y_center(bbox) -> float: + return (min(pt[1] for pt in bbox) + max(pt[1] for pt in bbox)) / 2 + + # Build y-indexed lines for each engine + easy_lines = [(r, _y_center(r[0])) for r in easy_results] + tess_lines = [(r, _y_center(r[0])) for r in tess_results] + + # Sort by y + easy_lines.sort(key=lambda x: x[1]) + tess_lines.sort(key=lambda x: x[1]) + + merged: list[tuple] = [] + used_tess = set() + + for easy_r, easy_y in easy_lines: + # Find matching tess line + best_tess_idx = None + best_dist = float("inf") + for i, (tess_r, tess_y) in enumerate(tess_lines): + if i in used_tess: + continue + dist = abs(easy_y - tess_y) + if dist <= y_tolerance and dist < best_dist: + best_dist = dist + best_tess_idx = i + + if best_tess_idx is not None: + used_tess.add(best_tess_idx) + tess_r = tess_lines[best_tess_idx][0] + # Pick better result + winner = _pick_better_ocr_result(easy_r, tess_r) + merged.append(winner) + else: + merged.append(easy_r) + + # Add unmatched tess lines + for i, (tess_r, _) in enumerate(tess_lines): + if i not in used_tess: + merged.append(tess_r) + + # Sort final results by y position + merged.sort(key=lambda r: _y_center(r[0])) + return merged + + +def _pick_better_ocr_result(result_a: tuple, result_b: tuple) -> tuple: + """Pick the better of two OCR results for the same line. + + Prefers code-token-containing results; ties broken by confidence. + """ + _, text_a, conf_a = result_a + _, text_b, conf_b = result_b + + has_code_a = _has_code_tokens(text_a) + has_code_b = _has_code_tokens(text_b) + + # If one has code tokens and the other doesn't, prefer code tokens + if has_code_a and not has_code_b: + return result_a + if has_code_b and not has_code_a: + return result_b + + # Both have or both lack code tokens — pick higher confidence + return result_a if conf_a >= conf_b else result_b + + +def _ocr_with_claude_vision(frame_path: str, frame_type: FrameType) -> tuple[str, float]: + """Use Claude Vision API to extract code from a frame. + + Sends the frame image to Claude Haiku and asks it to extract all + visible code/text exactly as shown. + + Returns: + (extracted_text, confidence). Confidence is 0.95 when successful. + Returns ("", 0.0) if API key is not set or the call fails. + """ + import base64 + + api_key = os.environ.get("ANTHROPIC_API_KEY", "") + if not api_key: + return "", 0.0 + + try: + import anthropic + + # Read image as base64 + with open(frame_path, "rb") as f: + image_data = base64.standard_b64encode(f.read()).decode("utf-8") + + # Determine media type + ext = os.path.splitext(frame_path)[1].lower() + media_type_map = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".webp": "image/webp", + } + media_type = media_type_map.get(ext, "image/png") + + context = "IDE screenshot" if frame_type == FrameType.CODE_EDITOR else "terminal screenshot" + prompt = ( + f"Extract all visible code/text from this {context} exactly as shown. " + "Preserve indentation, line breaks, and all characters. " + "Return only the raw code text, no explanations." + ) + + client = anthropic.Anthropic(api_key=api_key) + response = client.messages.create( + model="claude-haiku-4-5-20251001", + max_tokens=4096, + messages=[ + { + "role": "user", + "content": [ + { + "type": "image", + "source": { + "type": "base64", + "media_type": media_type, + "data": image_data, + }, + }, + { + "type": "text", + "text": prompt, + }, + ], + } + ], + ) + + text = response.content[0].text.strip() if response.content else "" + if text: + return text, 0.95 + return "", 0.0 + + except Exception: # noqa: BLE001 + logger.debug("Claude Vision API call failed, falling back to OCR results") + return "", 0.0 + + +def check_visual_dependencies() -> dict[str, bool]: + """Check which visual extraction dependencies are available. + + Returns: + Dict mapping dependency name to availability. + """ + return { + "opencv": HAS_OPENCV, + "scenedetect": HAS_SCENEDETECT, + "easyocr": HAS_EASYOCR, + } + + +def detect_scenes(video_path: str) -> list[tuple[float, float]]: + """Detect scene boundaries in a video using scenedetect. + + Args: + video_path: Path to video file. + + Returns: + List of (start_time, end_time) tuples for each scene in seconds. + + Raises: + RuntimeError: If required dependencies are not installed. + """ + if not HAS_OPENCV or not HAS_SCENEDETECT: + raise RuntimeError(_INSTALL_MSG) + + logger.info(f"Detecting scenes in {video_path}...") + + video = sd.open_video(video_path) + scene_manager = sd.SceneManager() + scene_manager.add_detector(sd.ContentDetector(threshold=27.0)) + scene_manager.detect_scenes(video) + scene_list = scene_manager.get_scene_list() + + scenes = [] + for scene_start, scene_end in scene_list: + scenes.append((scene_start.get_seconds(), scene_end.get_seconds())) + + logger.info(f"Detected {len(scenes)} scenes") + return scenes + + +def extract_keyframes(video_path: str, timestamps: list[float]) -> list[KeyFrame]: + """Extract keyframes at specified timestamps using OpenCV. + + Args: + video_path: Path to video file. + timestamps: List of timestamps (in seconds) to extract frames at. + + Returns: + List of KeyFrame objects with saved frame paths. + + Raises: + RuntimeError: If required dependencies are not installed. + """ + if not HAS_OPENCV: + raise RuntimeError(_INSTALL_MSG) + + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + logger.error(f"Cannot open video: {video_path}") + return [] + + fps = cap.get(cv2.CAP_PROP_FPS) or 30.0 + keyframes = [] + + for ts in sorted(timestamps): + frame_num = int(ts * fps) + cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num) + ret, frame = cap.read() + if not ret: + logger.warning(f"Could not read frame at {ts:.1f}s") + continue + + # Save frame to temp file + with tempfile.NamedTemporaryFile( + suffix=".jpg", prefix=f"frame_{ts:.0f}s_", delete=False + ) as tmp: + tmp_path = tmp.name + cv2.imwrite(tmp_path, frame) + + frame_type = classify_frame(tmp_path) + kf = KeyFrame( + timestamp=ts, + image_path=tmp_path, + frame_type=frame_type, + ) + keyframes.append(kf) + + cap.release() + logger.info(f"Extracted {len(keyframes)} keyframes") + return keyframes + + +# Minimum panel dimensions for region-based classification. +# IDE panels smaller than these are toolbar/tab/scrollbar noise. +_MIN_PANEL_WIDTH = 200 +_MIN_PANEL_HEIGHT = 150 +_MIN_PANEL_AREA_PCT = 5.0 # percent of total frame area + + +def _classify_region(gray, edges, hsv) -> FrameType: + """Classify a single rectangular region from pre-computed arrays.""" + import numpy as np + + h, w = gray.shape + mean_brightness = float(gray.mean()) + edge_density = float(edges.mean()) / 255.0 + saturation_mean = float(hsv[:, :, 1].mean()) + + # Horizontal line detection for code editors + horizontal_lines = 0 + if mean_brightness < 80 and edge_density > 0.008: + lines = cv2.HoughLinesP( + edges, 1, np.pi / 180, threshold=80, minLineLength=w // 8, maxLineGap=10 + ) + if lines is not None: + for line in lines: + x1, y1, x2, y2 = line[0] + angle = abs(np.degrees(np.arctan2(y2 - y1, x2 - x1))) + if angle < 5 or angle > 175: + horizontal_lines += 1 + + if mean_brightness < 80 and ( + edge_density > 0.05 or (edge_density > 0.01 and horizontal_lines >= 3) + ): + if saturation_mean < 30: + return FrameType.TERMINAL + return FrameType.CODE_EDITOR + elif mean_brightness > 180 and edge_density > 0.03: + return FrameType.SLIDE + elif mean_brightness > 160 and edge_density < 0.02: + return FrameType.DIAGRAM + elif saturation_mean > 60 and mean_brightness > 80: + return FrameType.WEBCAM + + return FrameType.OTHER + + +def _detect_panel_dividers(gray) -> tuple[list[int], list[int]]: + """Detect IDE panel divider positions using brightness gradients. + + Panel dividers are thin lines where many rows (or columns) have a + sharp brightness change. Returns lists of x and y positions. + """ + import numpy as np + + h, w = gray.shape + + # Vertical dividers: column-wise horizontal gradient + dx = np.abs(np.diff(gray.astype(np.float32), axis=1)) + v_sig = (dx > 25).sum(axis=0) + v_cols = np.where(v_sig > h * 0.3)[0] + + v_dividers: list[int] = [] + if len(v_cols) > 0: + group = [v_cols[0]] + for x in v_cols[1:]: + if x - group[-1] <= 15: + group.append(x) + else: + v_dividers.append(int(np.mean(group))) + group = [x] + v_dividers.append(int(np.mean(group))) + v_dividers = [d for d in v_dividers if w * 0.03 < d < w * 0.97] + + # Horizontal dividers: row-wise vertical gradient + dy = np.abs(np.diff(gray.astype(np.float32), axis=0)) + h_sig = (dy > 25).sum(axis=1) + h_rows = np.where(h_sig > w * 0.3)[0] + + h_dividers: list[int] = [] + if len(h_rows) > 0: + group = [h_rows[0]] + for y in h_rows[1:]: + if y - group[-1] <= 15: + group.append(y) + else: + h_dividers.append(int(np.mean(group))) + group = [y] + h_dividers.append(int(np.mean(group))) + h_dividers = [d for d in h_dividers if h * 0.03 < d < h * 0.97] + + return v_dividers, h_dividers + + +def classify_frame_regions( + frame_path: str, +) -> list[tuple[int, int, int, int, FrameType]]: + """Classify a frame by detecting IDE panels as rectangles. + + Finds panel divider lines (vertical and horizontal brightness edges), + builds a grid of rectangular panels, filters by minimum size, and + classifies each panel independently. + + This handles split-screen IDE layouts where half the screen shows code + and the other half shows a game viewport or inspector. + + Args: + frame_path: Path to frame image file. + + Returns: + List of ``(x1, y1, x2, y2, FrameType)`` for each detected panel + that meets the minimum size threshold. + """ + if not HAS_OPENCV: + raise RuntimeError(_INSTALL_MSG) + + img = cv2.imread(frame_path) + if img is None: + return [(0, 0, 0, 0, FrameType.OTHER)] + + h, w = img.shape[:2] + gray_full = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + edges_full = cv2.Canny(gray_full, 50, 150) + hsv_full = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) + + v_dividers, h_dividers = _detect_panel_dividers(gray_full) + + xs = [0] + v_dividers + [w] + ys = [0] + h_dividers + [h] + total_area = w * h + + panels: list[tuple[int, int, int, int, FrameType]] = [] + for i in range(len(ys) - 1): + for j in range(len(xs) - 1): + x1, x2 = xs[j], xs[j + 1] + y1, y2 = ys[i], ys[i + 1] + pw, ph = x2 - x1, y2 - y1 + area_pct = (pw * ph) / total_area * 100 + + if pw < _MIN_PANEL_WIDTH or ph < _MIN_PANEL_HEIGHT: + continue + if area_pct < _MIN_PANEL_AREA_PCT: + continue + + ft = _classify_region( + gray_full[y1:y2, x1:x2], + edges_full[y1:y2, x1:x2], + hsv_full[y1:y2, x1:x2], + ) + panels.append((x1, y1, x2, y2, ft)) + + # Fallback: if no panels survived the size filter, classify whole frame + if not panels: + ft = _classify_region(gray_full, edges_full, hsv_full) + panels.append((0, 0, w, h, ft)) + + return panels + + +def _find_code_bbox( + regions: list[tuple[int, int, int, int, FrameType]], +) -> tuple[int, int, int, int] | None: + """Merge all code/terminal panels into one bounding box. + + Returns ``(x1, y1, x2, y2)`` covering all code regions, or None. + """ + code = [r for r in regions if r[4] in (FrameType.CODE_EDITOR, FrameType.TERMINAL)] + if not code: + return None + return ( + min(r[0] for r in code), + min(r[1] for r in code), + max(r[2] for r in code), + max(r[3] for r in code), + ) + + +# Panels narrower than this produce mostly OCR noise (inspector sidebars, +# narrow file-tree strips, thin toolbars). 300 px is roughly the width +# needed for a single readable code line at typical IDE font sizes. +_MIN_PANEL_OCR_WIDTH = 300 + + +def _get_code_panels( + regions: list[tuple[int, int, int, int, FrameType]], + min_width: int = _MIN_PANEL_OCR_WIDTH, +) -> list[tuple[int, int, int, int]]: + """Return bounding boxes for individual code/terminal panels. + + Unlike ``_find_code_bbox`` which merges all code regions into one, + this returns each code panel separately so they can be OCR'd + independently. Panels narrower than *min_width* pixels are + discarded — they typically contain inspector sidebars or toolbars + that produce garbage OCR. + """ + return [ + (r[0], r[1], r[2], r[3]) + for r in regions + if r[4] in (FrameType.CODE_EDITOR, FrameType.TERMINAL) and (r[2] - r[0]) >= min_width + ] + + +def _crop_code_region(frame_path: str, bbox: tuple[int, int, int, int], suffix: str = "") -> str: + """Crop the code region from a frame and save as a temp file. + + Args: + frame_path: Path to the source frame image. + bbox: ``(x1, y1, x2, y2)`` crop rectangle. + suffix: Optional suffix to disambiguate when cropping multiple + panels from the same frame (e.g. ``"_p0"``, ``"_p1"``). + """ + img = cv2.imread(frame_path) + x1, y1, x2, y2 = bbox + cropped = img[y1:y2, x1:x2] + base, ext = os.path.splitext(frame_path) + cropped_path = f"{base}_code_crop{suffix}{ext}" + cv2.imwrite(cropped_path, cropped) + return cropped_path + + +def classify_frame(frame_path: str) -> FrameType: + """Classify a video frame by its visual content. + + Uses region-based panel detection: finds IDE panel boundaries, + classifies each rectangular panel, returns CODE_EDITOR/TERMINAL + if *any* panel contains code. This handles split-screen layouts. + + Args: + frame_path: Path to frame image file. + + Returns: + FrameType classification (CODE_EDITOR if any panel has code). + """ + regions = classify_frame_regions(frame_path) + + # If any panel is code, the frame "has code" + for _x1, _y1, _x2, _y2, ft in regions: + if ft == FrameType.TERMINAL: + return FrameType.TERMINAL + if ft == FrameType.CODE_EDITOR: + return FrameType.CODE_EDITOR + + # No code — return the most common type + from collections import Counter + + type_counts = Counter(ft for _, _, _, _, ft in regions) + return type_counts.most_common(1)[0][0] + + +def extract_text_from_frame( + frame_path: str, + frame_type: FrameType = FrameType.OTHER, +) -> tuple[list[tuple], str]: + """Extract text from a video frame using EasyOCR. + + Applies frame-type-aware preprocessing and OCR parameters for + better accuracy on code, terminal, and slide frames. + + Args: + frame_path: Path to frame image file. + frame_type: Classification of the frame content. + + Returns: + Tuple of (raw_easyocr_results, flat_text_string). + Each raw result is (bbox, text, confidence). + + Raises: + RuntimeError: If required dependencies are not installed. + """ + if not HAS_EASYOCR: + raise RuntimeError(_INSTALL_MSG) + + preprocessed_path = _preprocess_frame_for_ocr(frame_path, frame_type) + try: + reader = _get_ocr_reader() + ocr_params = _get_ocr_params(frame_type) + results = reader.readtext(preprocessed_path, detail=1, paragraph=False, **ocr_params) + finally: + if preprocessed_path != frame_path and os.path.exists(preprocessed_path): + os.unlink(preprocessed_path) + + # Filter by confidence + filtered = [] + texts = [] + for bbox, text, conf in results: + if conf >= 0.3 and text.strip(): + filtered.append((bbox, text.strip(), conf)) + texts.append(text.strip()) + + return filtered, " ".join(texts) + + +def _cluster_ocr_into_lines( + raw_results: list[tuple], + frame_type: FrameType = FrameType.OTHER, +) -> list[OCRRegion]: + """Cluster EasyOCR results into line-based OCRRegions. + + Groups text fragments that share similar y-coordinates into + lines, sorts within each line by x-coordinate, and builds + one OCRRegion per line. + + Args: + raw_results: List of (bbox, text, confidence) from EasyOCR. + frame_type: Frame classification for monospace detection. + + Returns: + List of OCRRegion objects, one per detected text line. + """ + if not raw_results: + return [] + + # Compute y_center for each result and estimate line height + items = [] + for bbox, text, conf in raw_results: + y_top = min(pt[1] for pt in bbox) + y_bottom = max(pt[1] for pt in bbox) + x_left = min(pt[0] for pt in bbox) + x_right = max(pt[0] for pt in bbox) + y_center = (y_top + y_bottom) / 2 + line_height = y_bottom - y_top + items.append( + { + "text": text, + "conf": conf, + "y_center": y_center, + "y_top": y_top, + "y_bottom": y_bottom, + "x_left": x_left, + "x_right": x_right, + "line_height": max(line_height, 1), + } + ) + + # Sort by y_center + items.sort(key=lambda it: it["y_center"]) + + # Cluster into lines + lines: list[list[dict]] = [[items[0]]] + for item in items[1:]: + current_line = lines[-1] + avg_height = sum(it["line_height"] for it in current_line) / len(current_line) + if abs(item["y_center"] - current_line[-1]["y_center"]) <= avg_height * 0.5: + current_line.append(item) + else: + lines.append([item]) + + # Estimate average character width for tab detection + total_chars = sum(len(it["text"]) for it in items) + total_width = sum(it["x_right"] - it["x_left"] for it in items) + avg_char_width = total_width / max(total_chars, 1) + + is_mono = frame_type in (FrameType.CODE_EDITOR, FrameType.TERMINAL) + + regions = [] + for line in lines: + # Sort fragments within line by x-coordinate + line.sort(key=lambda it: it["x_left"]) + + # Join fragments with appropriate spacing + parts = [] + for i, frag in enumerate(line): + if i > 0: + gap = frag["x_left"] - line[i - 1]["x_right"] + if gap > avg_char_width * 2: + parts.append("\t") + else: + parts.append(" ") + parts.append(frag["text"]) + + text = "".join(parts) + avg_conf = sum(f["conf"] for f in line) / len(line) + bbox = ( + int(min(f["x_left"] for f in line)), + int(min(f["y_top"] for f in line)), + int(max(f["x_right"] for f in line)), + int(max(f["y_bottom"] for f in line)), + ) + + regions.append( + OCRRegion( + text=text, + confidence=avg_conf, + bbox=bbox, + is_monospace=is_mono, + ) + ) + + return regions + + +def _assemble_structured_text(regions: list[OCRRegion], frame_type: FrameType) -> str: + """Join OCR line regions into structured text. + + CODE_EDITOR/TERMINAL: newline-separated with indentation from x-offset. + SLIDE: double-newline paragraph spacing. + Others: space-separated flat text. + + Args: + regions: List of OCRRegion objects (one per line). + frame_type: Frame classification. + + Returns: + Formatted text string. + """ + if not regions: + return "" + + if frame_type in (FrameType.CODE_EDITOR, FrameType.TERMINAL): + if not regions: + return "" + # Estimate indentation from x-offset relative to leftmost region + min_x = min(r.bbox[0] for r in regions) + lines = [] + for r in regions: + indent_px = r.bbox[0] - min_x + # Estimate character width from the region + region_width = r.bbox[2] - r.bbox[0] + char_count = len(r.text.replace("\t", " ")) + char_width = region_width / max(char_count, 1) + indent_chars = int(indent_px / max(char_width, 1)) + # Round to nearest 4-space indent + indent_level = round(indent_chars / 4) + lines.append(" " * indent_level + r.text) + return "\n".join(lines) + + if frame_type == FrameType.SLIDE: + return "\n\n".join(r.text for r in regions) + + return " ".join(r.text for r in regions) + + +def _compute_frame_timestamps( + video_path: str, + duration: float, + sample_interval: float = 0.7, + min_gap: float = 0.5, +) -> list[float]: + """Build a deduplicated list of timestamps to extract frames at. + + Combines scene-change detection (catches visual transitions) with + regular interval sampling (catches gradual changes). Nearby + timestamps closer than *min_gap* seconds are merged. + + Args: + video_path: Path to the video file. + duration: Total video duration in seconds. + sample_interval: Seconds between interval samples. + min_gap: Minimum gap between kept timestamps. + + Returns: + Sorted, deduplicated list of timestamps (seconds). + """ + timestamps: set[float] = set() + + # 1. Scene detection — catches cuts, slide transitions, editor switches + if HAS_SCENEDETECT: + try: + scenes = detect_scenes(video_path) + for start, _end in scenes: + # Take frame 0.5s after the scene starts (avoids transition blur) + timestamps.add(round(start + 0.5, 1)) + except Exception as exc: # noqa: BLE001 + logger.warning(f"Scene detection failed, falling back to interval: {exc}") + + # 2. Regular interval sampling — fills gaps between scene cuts + t = 0.5 # start slightly after 0 to avoid black intro frames + while t < duration: + timestamps.add(round(t, 1)) + t += sample_interval + + # Always include near the end + if duration > 2.0: + timestamps.add(round(duration - 1.0, 1)) + + # 3. Sort and deduplicate (merge timestamps closer than min_gap) + sorted_ts = sorted(timestamps) + if not sorted_ts: + return [] + + deduped = [sorted_ts[0]] + for ts in sorted_ts[1:]: + if ts - deduped[-1] >= min_gap: + deduped.append(ts) + return deduped + + +def _frames_are_similar(frame_a, frame_b, threshold: float = 3.0) -> bool: + """Check if two OpenCV frames are visually similar. + + Uses mean absolute pixel difference on downscaled grayscale. + This catches text changes on dark backgrounds that histogram + correlation would miss. + + Args: + frame_a: First BGR frame (numpy array). + frame_b: Second BGR frame (numpy array). + threshold: Mean pixel difference below this = "duplicate". + Typical values: 1-2 for identical, 3-5 for minor text + changes, 10+ for scene changes. + + Returns: + True if the frames are similar enough to skip one. + """ + import numpy as np + + gray_a = cv2.cvtColor(frame_a, cv2.COLOR_BGR2GRAY) + gray_b = cv2.cvtColor(frame_b, cv2.COLOR_BGR2GRAY) + + # Resize to same small size for speed + small = (320, 180) + gray_a = cv2.resize(gray_a, small) + gray_b = cv2.resize(gray_b, small) + + # Mean absolute pixel difference (0-255 scale) + diff = np.abs(gray_a.astype(np.float32) - gray_b.astype(np.float32)) + mean_diff = diff.mean() + + return mean_diff < threshold + + +def _text_similarity(text_a: str, text_b: str) -> float: + """Compute text similarity ratio using SequenceMatcher. + + Args: + text_a: First text string. + text_b: Second text string. + + Returns: + Similarity ratio between 0.0 and 1.0. + """ + if not text_a or not text_b: + return 0.0 + return difflib.SequenceMatcher(None, text_a, text_b).ratio() + + +@dataclass +class YBucketLine: + """A line tracked by y-coordinate across multiple frames.""" + + y_center: float + y_tolerance: float = 15.0 + observations: list[dict] = field(default_factory=list) + consensus_text: str = "" + consensus_confidence: float = 0.0 + + +class YBucketConsensusEngine: + """Build consensus text from OCR observations across multiple frames. + + Groups OCR regions by y-coordinate into buckets, then for each bucket + selects the best text by clustering similar observations and picking + the highest-confidence cluster winner. + """ + + def __init__(self, y_tolerance: float = 15.0): + self._y_tolerance = y_tolerance + self._buckets: list[YBucketLine] = [] + self._frame_count = 0 + + def add_frame( + self, + frame_index: int, + timestamp: float, + ocr_regions: list[OCRRegion], + ) -> None: + """Feed one frame's OCR regions into the engine.""" + self._frame_count += 1 + for region in ocr_regions: + y_center = (region.bbox[1] + region.bbox[3]) / 2.0 + obs = { + "text": region.text, + "confidence": region.confidence, + "frame_index": frame_index, + "timestamp": timestamp, + "x_left": region.bbox[0], + "x_right": region.bbox[2], + } + + # Find matching bucket + matched = False + for bucket in self._buckets: + if abs(bucket.y_center - y_center) <= bucket.y_tolerance: + bucket.observations.append(obs) + matched = True + break + + if not matched: + self._buckets.append( + YBucketLine( + y_center=y_center, + y_tolerance=self._y_tolerance, + observations=[obs], + ) + ) + + def build_consensus(self) -> list[YBucketLine]: + """Build consensus text for each y-bucket. + + Algorithm: + 1. Sort observations by confidence (descending). + 2. Cluster observations by text similarity (ratio >= 0.6). + 3. Score clusters by sum of confidence weights. + 4. Winning cluster's highest-confidence observation = consensus_text. + 5. Single observations with confidence < 0.4 → empty (unreliable). + """ + for bucket in self._buckets: + if not bucket.observations: + continue + + # Sort by confidence descending + sorted_obs = sorted(bucket.observations, key=lambda o: o["confidence"], reverse=True) + + # Single observation with low confidence → skip + if len(sorted_obs) == 1 and sorted_obs[0]["confidence"] < 0.4: + bucket.consensus_text = "" + bucket.consensus_confidence = 0.0 + continue + + # Cluster by text similarity + clusters: list[list[dict]] = [] + for obs in sorted_obs: + placed = False + for cluster in clusters: + rep_text = cluster[0]["text"] + sim = _text_similarity(rep_text, obs["text"]) + if sim >= 0.6: + cluster.append(obs) + placed = True + break + if not placed: + clusters.append([obs]) + + # Score clusters by sum of confidence + best_cluster = max(clusters, key=lambda c: sum(o["confidence"] for o in c)) + + # Winner = highest confidence in best cluster + winner = best_cluster[0] # already sorted by confidence + bucket.consensus_text = winner["text"] + bucket.consensus_confidence = sum(o["confidence"] for o in best_cluster) / len( + best_cluster + ) + + # Sort buckets by y_center (top to bottom) + self._buckets.sort(key=lambda b: b.y_center) + return self._buckets + + def get_consensus_text(self) -> str: + """Return assembled consensus text (newline-joined lines).""" + return "\n".join(b.consensus_text for b in self._buckets if b.consensus_text) + + def get_consensus_confidence(self) -> float: + """Return mean consensus confidence across non-empty buckets.""" + non_empty = [b for b in self._buckets if b.consensus_text] + if not non_empty: + return 0.0 + return sum(b.consensus_confidence for b in non_empty) / len(non_empty) + + def get_bucket_y_centers(self) -> set[float]: + """Return the set of y-center values for all buckets.""" + return {b.y_center for b in self._buckets} + + def reset(self) -> None: + """Clear all state.""" + self._buckets.clear() + self._frame_count = 0 + + +@dataclass +class TrackedTextBlock: + """A text block tracked across multiple video frames.""" + + first_seen: float + last_seen: float + frame_indices: list[int] = field(default_factory=list) + text_snapshots: list[str] = field(default_factory=list) + frame_type: FrameType = FrameType.OTHER + best_text: str = "" + best_confidence: float = 0.0 + # Consensus fields (Phase A) + consensus_lines: list[dict] = field(default_factory=list) + text_group_id: str = "" + ocr_regions_per_frame: list[list[OCRRegion]] = field(default_factory=list) + panel_bbox: tuple[int, int, int, int] | None = None + panel_id: str = "" + + +class TextBlockTracker: + """Track text blocks across video frames for continuity detection. + + Uses y-bucket overlap matching when OCR regions are available, + falling back to text similarity matching otherwise. + """ + + def __init__(self, similarity_threshold: float = 0.6, y_tolerance: float = 15.0): + self._active_blocks: list[TrackedTextBlock] = [] + self._completed_blocks: list[TrackedTextBlock] = [] + self._similarity_threshold = similarity_threshold + self._y_tolerance = y_tolerance + # Y-bucket consensus engines keyed by active block index + self._engines: dict[int, YBucketConsensusEngine] = {} + # Text group tracking + self._text_groups: list[TextGroup] = [] + self._next_group_id = 1 + + def update( + self, + frame_index: int, + timestamp: float, + ocr_text: str, + confidence: float, + frame_type: FrameType, + ocr_regions: list[OCRRegion] | None = None, + panel_bbox: tuple[int, int, int, int] | None = None, + ) -> None: + """Process a new frame's OCR results. + + For code/terminal frames: match against active blocks using panel + position (when ``panel_bbox`` is provided), y-bucket overlap (when + ``ocr_regions`` are provided), or text similarity as final fallback. + For other frames: complete all active blocks. + """ + is_code_frame = frame_type in (FrameType.CODE_EDITOR, FrameType.TERMINAL) + + if not is_code_frame: + self._complete_all_active() + return + + if not ocr_text or len(ocr_text.strip()) < 10: + return + + best_match: TrackedTextBlock | None = None + best_match_idx = -1 + + # 1. Try panel position matching first (for per-panel OCR) + if panel_bbox is not None: + best_match, best_match_idx = self._match_by_panel_position(panel_bbox, ocr_text) + + # 2. Try y-bucket matching when regions are available + if best_match is None and ocr_regions: + best_match, best_match_idx = self._match_by_y_buckets(ocr_regions) + + # 3. Fallback to text similarity (skip when panel_bbox is provided — + # spatial position is the authoritative signal for panel identity) + if best_match is None and panel_bbox is None: + best_sim = 0.0 + for i, block in enumerate(self._active_blocks): + sim = _text_similarity(block.best_text, ocr_text) + if sim >= self._similarity_threshold and sim > best_sim: + best_match = block + best_match_idx = i + best_sim = sim + + if best_match is not None: + best_match.last_seen = timestamp + best_match.frame_indices.append(frame_index) + best_match.text_snapshots.append(ocr_text) + if ocr_regions: + best_match.ocr_regions_per_frame.append(list(ocr_regions)) + if confidence > best_match.best_confidence: + best_match.best_text = ocr_text + best_match.best_confidence = confidence + # Update panel_bbox if not set yet + if panel_bbox is not None and best_match.panel_bbox is None: + best_match.panel_bbox = panel_bbox + # Feed into consensus engine + if ocr_regions and best_match_idx in self._engines: + self._engines[best_match_idx].add_frame(frame_index, timestamp, ocr_regions) + else: + new_idx = len(self._active_blocks) + new_block = TrackedTextBlock( + first_seen=timestamp, + last_seen=timestamp, + frame_indices=[frame_index], + text_snapshots=[ocr_text], + frame_type=frame_type, + best_text=ocr_text, + best_confidence=confidence, + ocr_regions_per_frame=[list(ocr_regions)] if ocr_regions else [], + panel_bbox=panel_bbox, + ) + self._active_blocks.append(new_block) + # Create consensus engine for new block + engine = YBucketConsensusEngine(y_tolerance=self._y_tolerance) + if ocr_regions: + engine.add_frame(frame_index, timestamp, ocr_regions) + self._engines[new_idx] = engine + + def _match_by_y_buckets( + self, new_regions: list[OCRRegion] + ) -> tuple[TrackedTextBlock | None, int]: + """Match new frame regions against active blocks by y-bucket overlap. + + Returns (matched_block, block_index) or (None, -1) if no match. + A match requires >= 40% of the new frame's region y-centers to + fall within existing bucket y-centers (within tolerance). + """ + if not self._active_blocks: + return None, -1 + + new_y_centers = [] + for r in new_regions: + y_center = (r.bbox[1] + r.bbox[3]) / 2.0 + new_y_centers.append(y_center) + + if not new_y_centers: + return None, -1 + + best_block = None + best_idx = -1 + best_overlap = 0.0 + + for i, _block in enumerate(self._active_blocks): + engine = self._engines.get(i) + if engine is None: + continue + + existing_y_centers = engine.get_bucket_y_centers() + if not existing_y_centers: + continue + + # Count how many new y-centers match existing buckets + matched = 0 + for ny in new_y_centers: + for ey in existing_y_centers: + if abs(ny - ey) <= self._y_tolerance: + matched += 1 + break + + overlap = matched / len(new_y_centers) + if overlap >= 0.4 and overlap > best_overlap: + best_overlap = overlap + best_block = self._active_blocks[i] + best_idx = i + + return best_block, best_idx + + def _match_by_panel_position( + self, + panel_bbox: tuple[int, int, int, int], + ocr_text: str, + ) -> tuple[TrackedTextBlock | None, int]: + """Match by panel x-range overlap (horizontal position). + + Two panels match if their x-ranges overlap by >= 50%. + Also requires text similarity >= 0.3 to avoid matching + completely different content that happens to be in the same position. + """ + if not self._active_blocks: + return None, -1 + + px1, _py1, px2, _py2 = panel_bbox + p_width = px2 - px1 + if p_width <= 0: + return None, -1 + + best_block: TrackedTextBlock | None = None + best_idx = -1 + best_overlap = 0.0 + + for i, block in enumerate(self._active_blocks): + if block.panel_bbox is None: + continue + + bx1, _by1, bx2, _by2 = block.panel_bbox + b_width = bx2 - bx1 + if b_width <= 0: + continue + + # Compute x-range overlap + overlap_start = max(px1, bx1) + overlap_end = min(px2, bx2) + overlap_width = max(0, overlap_end - overlap_start) + + # Overlap as fraction of the smaller panel width + min_width = min(p_width, b_width) + x_overlap = overlap_width / min_width + + if x_overlap >= 0.5 and x_overlap > best_overlap: + # Require minimal text similarity to avoid cross-matching + sim = _text_similarity(block.best_text, ocr_text) + if sim >= 0.3: + best_overlap = x_overlap + best_block = block + best_idx = i + + return best_block, best_idx + + def _complete_all_active(self) -> None: + """Move all active blocks to completed, building consensus first.""" + for i, block in enumerate(self._active_blocks): + engine = self._engines.get(i) + if engine is not None: + buckets = engine.build_consensus() + block.consensus_lines = [ + { + "y_center": b.y_center, + "text": b.consensus_text, + "confidence": b.consensus_confidence, + } + for b in buckets + if b.consensus_text + ] + consensus_text = engine.get_consensus_text() + consensus_conf = engine.get_consensus_confidence() + if consensus_text and consensus_conf > block.best_confidence: + block.best_text = consensus_text + block.best_confidence = consensus_conf + + self._completed_blocks.append(block) + + self._active_blocks.clear() + self._engines.clear() + + def _assign_text_group(self, block: TrackedTextBlock) -> None: + """Assign a text group ID to a completed block. + + Compares consensus_lines against existing TextGroups: + - Overlap >= 60% → same group (possibly edited) + - Overlap < 60% → new group + """ + block_lines = [cl["text"] for cl in block.consensus_lines if cl.get("text")] + if not block_lines: + # Fallback: use best_text lines + block_lines = [line for line in block.best_text.split("\n") if line.strip()] + if not block_lines: + return + + best_group = None + best_overlap = 0.0 + + for group in self._text_groups: + group_lines = [cl["text"] for cl in group.consensus_lines if cl.get("text")] + if not group_lines: + continue + + # Compute overlap + shorter_len = min(len(block_lines), len(group_lines)) + if shorter_len == 0: + continue + + matched = 0 + for bl in block_lines: + for gl in group_lines: + if _text_similarity(bl, gl) >= 0.6: + matched += 1 + break + + overlap = matched / shorter_len + if overlap >= 0.6 and overlap > best_overlap: + best_overlap = overlap + best_group = group + + if best_group is not None: + # Same group — compute edit + old_lines = [cl["text"] for cl in best_group.consensus_lines if cl.get("text")] + edit = self._compute_edit(old_lines, block_lines, block.first_seen) + if edit is not None: + best_group.edits.append(edit) + + # Update group's consensus lines to new version + best_group.consensus_lines = ( + list(block.consensus_lines) + if block.consensus_lines + else [ + {"y_center": 0.0, "text": line, "confidence": block.best_confidence} + for line in block_lines + ] + ) + best_group.appearances.append((block.first_seen, block.last_seen)) + block.text_group_id = best_group.group_id + # Propagate panel_id if not already set + if block.panel_id and not best_group.panel_id: + best_group.panel_id = block.panel_id + else: + # New group + group_id = f"TG-{self._next_group_id:03d}" + self._next_group_id += 1 + new_group = TextGroup( + group_id=group_id, + appearances=[(block.first_seen, block.last_seen)], + consensus_lines=list(block.consensus_lines) + if block.consensus_lines + else [ + {"y_center": 0.0, "text": line, "confidence": block.best_confidence} + for line in block_lines + ], + edits=[], + frame_type=block.frame_type, + panel_id=block.panel_id, + ) + self._text_groups.append(new_group) + block.text_group_id = group_id + + def _compute_edit( + self, old_lines: list[str], new_lines: list[str], timestamp: float + ) -> TextGroupEdit | None: + """Compute a TextGroupEdit between old and new line lists.""" + if old_lines == new_lines: + return None + + matcher = difflib.SequenceMatcher(None, old_lines, new_lines) + added: list[str] = [] + removed: list[str] = [] + modified: list[dict] = [] + + for tag, i1, i2, j1, j2 in matcher.get_opcodes(): + if tag == "equal": + continue + elif tag == "insert": + added.extend(new_lines[j1:j2]) + elif tag == "delete": + removed.extend(old_lines[i1:i2]) + elif tag == "replace": + for k, old_line in enumerate(old_lines[i1:i2]): + if k < (j2 - j1): + modified.append( + { + "line_num": i1 + k, + "old": old_line, + "new": new_lines[j1 + k], + } + ) + else: + removed.append(old_line) + if (j2 - j1) > (i2 - i1): + added.extend(new_lines[j1 + (i2 - i1) : j2]) + + if not added and not removed and not modified: + return None + + return TextGroupEdit( + timestamp=timestamp, + added_lines=added, + removed_lines=removed, + modified_lines=modified, + ) + + def finalize(self) -> list[TrackedTextBlock]: + """Complete tracking, assign text groups, and return all blocks.""" + self._complete_all_active() + for block in self._completed_blocks: + self._assign_text_group(block) + return list(self._completed_blocks) + + def get_text_groups(self) -> list[TextGroup]: + """Return all text groups after finalize().""" + return list(self._text_groups) + + +def _extract_code_blocks( + tracked_blocks: list[TrackedTextBlock], + text_groups: list[TextGroup] | None = None, +) -> list[CodeBlock]: + """Convert tracked text blocks into CodeBlock objects. + + Filters for code/terminal frames with sufficient text length + and attempts language detection. When text_groups are provided + and a block has a text_group_id, uses the group's consensus text + for better quality. + + Args: + tracked_blocks: Tracked text blocks from TextBlockTracker. + text_groups: Optional list of TextGroup objects for consensus text. + + Returns: + List of CodeBlock objects with detected language. + """ + code_blocks = [] + + # Build lookup for text groups + group_map: dict[str, TextGroup] = {} + if text_groups: + for tg in text_groups: + group_map[tg.group_id] = tg + + # Lazy import language detector + try: + from skill_seekers.cli.language_detector import LanguageDetector + + detector = LanguageDetector() + except ImportError: + detector = None + + for block in tracked_blocks: + if block.frame_type not in (FrameType.CODE_EDITOR, FrameType.TERMINAL): + continue + if len(block.best_text) < 20: + continue + + # Use consensus text from text group when available + code_text = block.best_text + if block.text_group_id and block.text_group_id in group_map: + group = group_map[block.text_group_id] + group_text = group.full_text + if group_text and len(group_text) >= 20: + code_text = group_text + + # Detect language + language = None + if detector is not None: + try: + lang, _conf = detector.detect_from_code(code_text) + if lang: + language = lang + except Exception: # noqa: BLE001 + pass + + # Map FrameType to CodeContext + if block.frame_type == FrameType.CODE_EDITOR: + context = CodeContext.EDITOR + elif block.frame_type == FrameType.TERMINAL: + context = CodeContext.TERMINAL + else: + context = CodeContext.UNKNOWN + + code_blocks.append( + CodeBlock( + code=code_text, + language=language, + source_frame=block.first_seen, + context=context, + confidence=block.best_confidence, + text_group_id=block.text_group_id, + ) + ) + + return code_blocks + + +def _ocr_single_panel( + frame_path: str, + panel_bbox: tuple[int, int, int, int], + panel_idx: int, + frame_type: FrameType, + full_area: int, + regions: list[tuple[int, int, int, int, FrameType]], + use_vision_api: bool, +) -> FrameSubSection | None: + """OCR a single panel and return a FrameSubSection (or None). + + Designed to be called in parallel via ThreadPoolExecutor — each + invocation is independent (unique crop path, no shared mutable state). + """ + x1, y1, x2, y2 = panel_bbox + panel_area = (x2 - x1) * (y2 - y1) + + # Crop panel if it's a subset of the frame + cropped_path: str | None = None + if panel_area < full_area * 0.9: + cropped_path = _crop_code_region(frame_path, panel_bbox, suffix=f"_p{panel_idx}") + ocr_target = cropped_path + else: + ocr_target = frame_path + + try: + raw_results, _ = _run_multi_engine_ocr(ocr_target, frame_type) + p_regions = _cluster_ocr_into_lines(raw_results, frame_type) if raw_results else [] + p_text = _assemble_structured_text(p_regions, frame_type) if p_regions else "" + p_conf = sum(r.confidence for r in p_regions) / len(p_regions) if p_regions else 0.0 + + # Vision API fallback for low-confidence panels + vision_used = False + if use_vision_api and p_conf < 0.5: + v_text, v_conf = _ocr_with_claude_vision(ocr_target, frame_type) + if v_text and v_conf > p_conf: + p_text, p_conf, p_regions = v_text, v_conf, [] + vision_used = True + finally: + if cropped_path and os.path.exists(cropped_path): + os.unlink(cropped_path) + + if not p_text.strip(): + return None + + row = sum(1 for r in regions if r[1] < y1) + col = sum(1 for r in regions if r[0] < x1 and abs(r[1] - y1) < 50) + + ss = FrameSubSection( + bbox=panel_bbox, + frame_type=frame_type, + ocr_text=p_text, + ocr_regions=p_regions, + ocr_confidence=p_conf, + panel_id=f"panel_{row}_{col}", + ) + # Stash vision_used flag for the caller to count + ss._vision_used = vision_used # type: ignore[attr-defined] + return ss + + +def extract_visual_data( + video_path: str, + segments: list, + output_dir: str, + sample_interval: float = 0.7, + min_gap: float = 0.5, + similarity_threshold: float = 3.0, + use_vision_api: bool = False, +) -> tuple[list[KeyFrame], list[CodeBlock], TextGroupTimeline | None]: + """Run continuous visual extraction on a video. + + Instead of extracting one frame per segment, this scans the entire + video using scene-change detection + interval sampling, deduplicates + near-identical frames, classifies each frame, runs OCR with + frame-type-aware preprocessing, preserves spatial layout, tracks + text across frames with y-bucket consensus, and builds a text group + timeline for code lifecycle tracking. + + For code/terminal frames, uses multi-engine OCR (EasyOCR + pytesseract) + with ensemble voting. When ``use_vision_api`` is True and multi-engine + confidence is below 0.5, falls back to Claude Vision API. + + Args: + video_path: Path to downloaded video file. + segments: List of VideoSegment objects (used for duration hint). + output_dir: Directory to save extracted frames. + sample_interval: Seconds between interval samples (default 0.7s). + min_gap: Minimum gap between kept timestamps (default 0.5s). + similarity_threshold: Pixel-diff threshold for duplicate detection (default 3.0). + use_vision_api: If True, use Claude Vision API as fallback for low-confidence + code frames (requires ANTHROPIC_API_KEY). + + Returns: + Tuple of (keyframes, code_blocks, text_group_timeline). + text_group_timeline is None when no code frames are found. + """ + if not HAS_OPENCV: + raise RuntimeError(_INSTALL_MSG) + + frames_dir = os.path.join(output_dir, "frames") + # Clean stale frames from previous runs + if os.path.exists(frames_dir): + for old in os.listdir(frames_dir): + if old.endswith(".jpg"): + os.remove(os.path.join(frames_dir, old)) + os.makedirs(frames_dir, exist_ok=True) + + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + logger.error(f"Cannot open video: {video_path}") + return [], [] + + fps = cap.get(cv2.CAP_PROP_FPS) or 30.0 + total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT) + duration = total_frames / fps if fps > 0 else 0.0 + + # If segments give a better duration hint, use it + if segments: + seg_end = max(s.end_time for s in segments) + if seg_end > duration: + duration = seg_end + + logger.info( + f"Continuous visual scan: {duration:.0f}s video, " + f"interval={sample_interval}s, scene detection={'ON' if HAS_SCENEDETECT else 'OFF'}" + ) + + # Build candidate timestamps + timestamps = _compute_frame_timestamps( + video_path, duration, sample_interval=sample_interval, min_gap=min_gap + ) + logger.info(f" {len(timestamps)} candidate timestamps after dedup") + + keyframes = [] + prev_frame = None + skipped_similar = 0 + vision_api_frames = 0 + tracker = TextBlockTracker() + + for ts in timestamps: + frame_num = int(ts * fps) + cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num) + ret, frame = cap.read() + if not ret: + continue + + # Skip near-duplicate frames + if prev_frame is not None and _frames_are_similar( + prev_frame, frame, threshold=similarity_threshold + ): + skipped_similar += 1 + continue + prev_frame = frame.copy() + + # Save frame + idx = len(keyframes) + frame_filename = f"frame_{idx:03d}_{ts:.0f}s.jpg" + frame_path = os.path.join(frames_dir, frame_filename) + cv2.imwrite(frame_path, frame) + + # Classify using region-based panel detection + regions = classify_frame_regions(frame_path) + code_panels = _get_code_panels(regions) + frame_type = classify_frame(frame_path) # dominant type for metadata + is_code_frame = frame_type in (FrameType.CODE_EDITOR, FrameType.TERMINAL) + + # Per-panel OCR: each code/terminal panel is OCR'd independently + # so side-by-side editors produce separate code blocks. + sub_sections: list[FrameSubSection] = [] + ocr_text = "" + ocr_regions: list[OCRRegion] = [] + ocr_confidence = 0.0 + + if is_code_frame and code_panels and (HAS_EASYOCR or HAS_PYTESSERACT): + full_area = frame.shape[0] * frame.shape[1] + + if len(code_panels) > 1: + # Parallel OCR — each panel is independent + with concurrent.futures.ThreadPoolExecutor(max_workers=len(code_panels)) as pool: + futures = { + pool.submit( + _ocr_single_panel, + frame_path, + pb, + pi, + frame_type, + full_area, + regions, + use_vision_api, + ): pi + for pi, pb in enumerate(code_panels) + } + for fut in concurrent.futures.as_completed(futures): + ss = fut.result() + if ss is not None: + if getattr(ss, "_vision_used", False): + vision_api_frames += 1 + sub_sections.append(ss) + else: + # Single panel — avoid thread overhead + ss = _ocr_single_panel( + frame_path, + code_panels[0], + 0, + frame_type, + full_area, + regions, + use_vision_api, + ) + if ss is not None: + if getattr(ss, "_vision_used", False): + vision_api_frames += 1 + sub_sections.append(ss) + + # Track each sub-section independently + for ss in sub_sections: + tracker.update( + idx, + ts, + ss.ocr_text, + ss.ocr_confidence, + ss.frame_type, + ocr_regions=ss.ocr_regions, + panel_bbox=ss.bbox, + ) + + # Set frame-level OCR to best sub-section for backward compat + if sub_sections: + best_ss = max(sub_sections, key=lambda s: s.ocr_confidence) + ocr_text = best_ss.ocr_text + ocr_regions = best_ss.ocr_regions + ocr_confidence = best_ss.ocr_confidence + + elif is_code_frame and (HAS_EASYOCR or HAS_PYTESSERACT): + # No code panels detected but frame is code — OCR whole frame + raw_ocr_results, _flat_text = _run_multi_engine_ocr(frame_path, frame_type) + if raw_ocr_results: + ocr_regions = _cluster_ocr_into_lines(raw_ocr_results, frame_type) + ocr_text = _assemble_structured_text(ocr_regions, frame_type) + ocr_confidence = ( + sum(r.confidence for r in ocr_regions) / len(ocr_regions) + if ocr_regions + else 0.0 + ) + + if use_vision_api and ocr_confidence < 0.5: + vision_text, vision_conf = _ocr_with_claude_vision(frame_path, frame_type) + if vision_text and vision_conf > ocr_confidence: + ocr_text = vision_text + ocr_confidence = vision_conf + ocr_regions = [] + vision_api_frames += 1 + + tracker.update(idx, ts, ocr_text, ocr_confidence, frame_type, ocr_regions=ocr_regions) + + elif HAS_EASYOCR: + # Standard EasyOCR for non-code frames + raw_ocr_results, _flat_text = extract_text_from_frame(frame_path, frame_type) + if raw_ocr_results: + ocr_regions = _cluster_ocr_into_lines(raw_ocr_results, frame_type) + ocr_text = _assemble_structured_text(ocr_regions, frame_type) + ocr_confidence = ( + sum(r.confidence for r in ocr_regions) / len(ocr_regions) + if ocr_regions + else 0.0 + ) + + tracker.update(idx, ts, ocr_text, ocr_confidence, frame_type, ocr_regions=ocr_regions) + + kf = KeyFrame( + timestamp=ts, + image_path=frame_path, + frame_type=frame_type, + ocr_text=ocr_text, + ocr_regions=ocr_regions, + ocr_confidence=ocr_confidence, + width=frame.shape[1], + height=frame.shape[0], + sub_sections=sub_sections, + ) + keyframes.append(kf) + + logger.debug( + f" Frame {idx}: {frame_type.value} at {ts:.1f}s" + + ( + f" | OCR: {ocr_text[:60]}..." + if len(ocr_text) > 60 + else f" | OCR: {ocr_text}" + if ocr_text + else "" + ) + ) + + cap.release() + + # Finalize text tracking and extract code blocks + tracked_blocks = tracker.finalize() + text_groups = tracker.get_text_groups() + code_blocks = _extract_code_blocks(tracked_blocks, text_groups=text_groups) + + # Build timeline + timeline: TextGroupTimeline | None = None + if text_groups: + total_code_time = sum(end - start for tg in text_groups for start, end in tg.appearances) + total_edits = sum(len(tg.edits) for tg in text_groups) + timeline = TextGroupTimeline( + text_groups=text_groups, + total_code_time=total_code_time, + total_groups=len(text_groups), + total_edits=total_edits, + ) + + vision_msg = f", {vision_api_frames} via Vision API" if vision_api_frames > 0 else "" + logger.info( + f"Extracted {len(keyframes)} unique keyframes " + f"({skipped_similar} duplicates skipped), " + f"{sum(1 for kf in keyframes if kf.ocr_text)} with OCR text, " + f"{len(code_blocks)} code blocks detected, " + f"{len(text_groups)} text groups{vision_msg}" + ) + return keyframes, code_blocks, timeline + + +def download_video(url: str, output_dir: str) -> str | None: + """Download a video using yt-dlp for visual processing. + + Downloads the best quality up to 1080p. Uses separate video+audio streams + and merges them (via ffmpeg) since YouTube only offers combined streams at + 360p/720p — higher resolutions require downloading video-only + audio-only + and muxing. + + Args: + url: Video URL. + output_dir: Directory to save the downloaded file. + + Returns: + Path to downloaded video file, or None on failure. + """ + try: + import yt_dlp + except ImportError: + logger.error("yt-dlp is required for video download") + return None + + os.makedirs(output_dir, exist_ok=True) + output_template = os.path.join(output_dir, "video.%(ext)s") + + opts = { + "format": "bestvideo[height<=1080]+bestaudio/best[height<=1080]", + "merge_output_format": "mp4", + "outtmpl": output_template, + "quiet": True, + "no_warnings": True, + } + + logger.info(f"Downloading video for visual extraction...") + try: + with yt_dlp.YoutubeDL(opts) as ydl: + info = ydl.extract_info(url, download=True) + filename = ydl.prepare_filename(info) + if os.path.exists(filename): + logger.info(f"Downloaded: {filename}") + return filename + # Try common extensions + for ext in ["mp4", "webm", "mkv"]: + candidate = os.path.join(output_dir, f"video.{ext}") + if os.path.exists(candidate): + return candidate + except Exception as e: + logger.error(f"Failed to download video: {e}") + + return None diff --git a/src/skill_seekers/mcp/server_fastmcp.py b/src/skill_seekers/mcp/server_fastmcp.py index 1c2e5d4..5e8a581 100644 --- a/src/skill_seekers/mcp/server_fastmcp.py +++ b/src/skill_seekers/mcp/server_fastmcp.py @@ -98,6 +98,7 @@ try: scrape_docs_impl, scrape_github_impl, scrape_pdf_impl, + scrape_video_impl, # Splitting tools split_config_impl, submit_config_impl, @@ -420,6 +421,55 @@ async def scrape_pdf( return str(result) +@safe_tool_decorator( + description="Extract transcripts and metadata from videos (YouTube, Vimeo, local files) and build Claude skill." +) +async def scrape_video( + url: str | None = None, + video_file: str | None = None, + playlist: str | None = None, + name: str | None = None, + description: str | None = None, + languages: str | None = None, + from_json: str | None = None, +) -> str: + """ + Scrape video content and build Claude skill. + + Args: + url: Video URL (YouTube, Vimeo) + video_file: Local video file path + playlist: Playlist URL + name: Skill name + description: Skill description + languages: Transcript language preferences (comma-separated) + from_json: Build from extracted JSON file + + Returns: + Video scraping results with file paths. + """ + args = {} + if url: + args["url"] = url + if video_file: + args["video_file"] = video_file + if playlist: + args["playlist"] = playlist + if name: + args["name"] = name + if description: + args["description"] = description + if languages: + args["languages"] = languages + if from_json: + args["from_json"] = from_json + + result = await scrape_video_impl(args) + if isinstance(result, list) and result: + return result[0].text if hasattr(result[0], "text") else str(result[0]) + return str(result) + + @safe_tool_decorator( description="Analyze local codebase and extract code knowledge. Walks directory tree, analyzes code files, extracts signatures, docstrings, and optionally generates API reference documentation and dependency graphs." ) diff --git a/src/skill_seekers/mcp/tools/__init__.py b/src/skill_seekers/mcp/tools/__init__.py index 188af2b..66284c4 100644 --- a/src/skill_seekers/mcp/tools/__init__.py +++ b/src/skill_seekers/mcp/tools/__init__.py @@ -63,6 +63,9 @@ from .scraping_tools import ( from .scraping_tools import ( scrape_pdf_tool as scrape_pdf_impl, ) +from .scraping_tools import ( + scrape_video_tool as scrape_video_impl, +) from .source_tools import ( add_config_source_tool as add_config_source_impl, ) @@ -123,6 +126,7 @@ __all__ = [ "scrape_docs_impl", "scrape_github_impl", "scrape_pdf_impl", + "scrape_video_impl", "scrape_codebase_impl", "detect_patterns_impl", "extract_test_examples_impl", diff --git a/src/skill_seekers/mcp/tools/scraping_tools.py b/src/skill_seekers/mcp/tools/scraping_tools.py index f4b986a..a9c083c 100644 --- a/src/skill_seekers/mcp/tools/scraping_tools.py +++ b/src/skill_seekers/mcp/tools/scraping_tools.py @@ -356,6 +356,81 @@ async def scrape_pdf_tool(args: dict) -> list[TextContent]: return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")] +async def scrape_video_tool(args: dict) -> list[TextContent]: + """ + Scrape video content (YouTube, local files) and build Claude skill. + + Extracts transcripts, metadata, and optionally visual content from videos + to create skills. + + Args: + args: Dictionary containing: + - url (str, optional): Video URL (YouTube, Vimeo) + - video_file (str, optional): Local video file path + - playlist (str, optional): Playlist URL + - name (str, optional): Skill name + - description (str, optional): Skill description + - languages (str, optional): Language preferences (comma-separated) + - from_json (str, optional): Build from extracted JSON file + + Returns: + List[TextContent]: Tool execution results + """ + url = args.get("url") + video_file = args.get("video_file") + playlist = args.get("playlist") + name = args.get("name") + description = args.get("description") + languages = args.get("languages") + from_json = args.get("from_json") + + # Build command + cmd = [sys.executable, str(CLI_DIR / "video_scraper.py")] + + if from_json: + cmd.extend(["--from-json", from_json]) + elif url: + cmd.extend(["--url", url]) + if name: + cmd.extend(["--name", name]) + if description: + cmd.extend(["--description", description]) + if languages: + cmd.extend(["--languages", languages]) + elif video_file: + cmd.extend(["--video-file", video_file]) + if name: + cmd.extend(["--name", name]) + if description: + cmd.extend(["--description", description]) + elif playlist: + cmd.extend(["--playlist", playlist]) + if name: + cmd.extend(["--name", name]) + else: + return [ + TextContent( + type="text", + text="❌ Error: Must specify --url, --video-file, --playlist, or --from-json", + ) + ] + + # Run video_scraper.py with streaming + timeout = 600 # 10 minutes for video extraction + + progress_msg = "🎬 Scraping video content...\n" + progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n" + + stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout) + + output = progress_msg + stdout + + if returncode == 0: + return [TextContent(type="text", text=output)] + else: + return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")] + + async def scrape_github_tool(args: dict) -> list[TextContent]: """ Scrape GitHub repository and build Claude skill. diff --git a/src/skill_seekers/workflows/video-tutorial.yaml b/src/skill_seekers/workflows/video-tutorial.yaml new file mode 100644 index 0000000..747971f --- /dev/null +++ b/src/skill_seekers/workflows/video-tutorial.yaml @@ -0,0 +1,111 @@ +name: video-tutorial +description: > + Video tutorial enhancement workflow. Cleans OCR noise, reconstructs code from + transcript + visual data, detects programming languages, and synthesizes a + coherent tutorial skill from raw video extraction output. +version: "1.0" +applies_to: + - video_scraping +variables: {} +stages: + - name: ocr_code_cleanup + type: custom + target: skill_md + enabled: true + uses_history: false + prompt: > + You are reviewing code blocks extracted from video tutorial OCR. + The OCR output is noisy — it contains line numbers, UI chrome text, + garbled characters, and incomplete lines. + + Clean each code block by: + 1. Remove line numbers that OCR captured (leading digits like "1 ", "2 ", "23 ") + 2. Remove UI elements (tab bar text, file names, button labels) + 3. Fix common OCR errors (l/1, O/0, rn/m confusions) + 4. Remove animation timeline numbers or frame counters + 5. Strip trailing whitespace and normalize indentation + + Output JSON with: + - "cleaned_blocks": array of cleaned code strings + - "languages_detected": map of block index to detected language + - "confidence": overall confidence in the cleanup (0-1) + + - name: language_detection + type: custom + target: skill_md + enabled: true + uses_history: true + prompt: > + Based on the previous OCR cleanup results and the transcript content, + determine the programming language for each code block. + + Detection strategy (in priority order): + 1. Narrator mentions: "in GDScript", "this Python function", "our C# class" + 2. Code patterns: extends/func/signal=GDScript, def/import=Python, + function/const/let=JavaScript, using/namespace=C# + 3. File extensions visible in OCR (.gd, .py, .js, .cs) + 4. Framework context from transcript (Godot=GDScript, Unity=C#, Django=Python) + + Output JSON with: + - "language_map": map of block index to language identifier + - "primary_language": the main language used in the tutorial + - "framework": detected framework/engine if any + + - name: tutorial_synthesis + type: custom + target: skill_md + enabled: true + uses_history: true + prompt: > + Synthesize the cleaned code blocks, detected languages, and transcript + into a coherent tutorial structure. + + Group content by TOPIC rather than timestamp: + 1. Identify the main concepts taught in the tutorial + 2. Group related code blocks under concept headings + 3. Use narrator explanations as descriptions for each code block + 4. Build a progressive learning path where concepts build on each other + 5. Show final working code for each concept, not intermediate OCR states + + Use the Audio-Visual Alignment pairs (code + narrator text) as the + primary source for creating annotated examples. + + Output JSON with: + - "sections": array of tutorial sections with title, description, code examples + - "prerequisites": what the viewer should know beforehand + - "key_concepts": important terms and their definitions from the tutorial + - "learning_path": ordered list of concept names + + - name: skill_polish + type: custom + target: skill_md + enabled: true + uses_history: true + prompt: > + Using all previous stage results, polish the SKILL.md for this video tutorial. + + Create: + 1. Clear "When to Use This Skill" with specific trigger conditions + 2. Quick Reference with 5-10 clean, annotated code examples + 3. Step-by-step guide following the tutorial flow + 4. Key concepts with definitions from the narrator + 5. Proper language tags on all code fences + + Rules: + - Never include raw OCR artifacts (line numbers, UI chrome) + - Always use correct language tags + - Keep code examples short and focused (5-30 lines) + - Make it actionable for someone implementing what the tutorial teaches + + Output JSON with: + - "improved_overview": enhanced overview section + - "quick_start": concise getting-started snippet + - "key_concepts": essential concepts with definitions + - "code_examples": array of clean, annotated code examples + +post_process: + reorder_sections: [] + add_metadata: + enhanced: true + workflow: video-tutorial + source_type: video diff --git a/tests/test_video_scraper.py b/tests/test_video_scraper.py new file mode 100644 index 0000000..57485a2 --- /dev/null +++ b/tests/test_video_scraper.py @@ -0,0 +1,3119 @@ +#!/usr/bin/env python3 +""" +Tests for Video Scraper (cli/video_scraper.py) + +Tests cover: +- Data models (enums, dataclasses, serialization) +- Metadata extraction (YouTube URL parsing, video ID extraction) +- Transcript extraction (SRT/VTT parsing, fallback chain) +- Segmentation (chapter-based, time-window) +- Full pipeline (VideoToSkillConverter) +- Source detection (SourceDetector video patterns) +- CLI argument parsing +- Create command routing +""" + +import os +import shutil +import tempfile +import unittest + +# Video-specific deps are optional +try: + import yt_dlp # noqa: F401 + + HAS_YTDLP = True +except ImportError: + HAS_YTDLP = False + +try: + from youtube_transcript_api import YouTubeTranscriptApi # noqa: F401 + + HAS_YOUTUBE_TRANSCRIPT = True +except ImportError: + HAS_YOUTUBE_TRANSCRIPT = False + + +# ============================================================================= +# Helper: Build mock data +# ============================================================================= + + +def _make_sample_video_info(): + """Build a minimal VideoInfo dict for testing.""" + from skill_seekers.cli.video_models import ( + TranscriptSource, + VideoInfo, + VideoSourceType, + Chapter, + ) + + return VideoInfo( + video_id="abc123def45", + source_type=VideoSourceType.YOUTUBE, + source_url="https://www.youtube.com/watch?v=abc123def45", + title="Test Video Tutorial", + description="A test video for unit testing.", + duration=600.0, + upload_date="2026-01-15", + language="en", + channel_name="Test Channel", + channel_url="https://youtube.com/@testchannel", + view_count=100000, + like_count=5000, + tags=["test", "tutorial", "python"], + categories=["Education"], + chapters=[ + Chapter(title="Intro", start_time=0.0, end_time=60.0), + Chapter(title="Setup", start_time=60.0, end_time=180.0), + Chapter(title="Main Content", start_time=180.0, end_time=500.0), + Chapter(title="Wrap Up", start_time=500.0, end_time=600.0), + ], + transcript_source=TranscriptSource.YOUTUBE_MANUAL, + ) + + +def _make_sample_transcript_segments(): + """Build a list of TranscriptSegment objects for testing.""" + from skill_seekers.cli.video_models import TranscriptSegment, TranscriptSource + + return [ + TranscriptSegment( + text="Welcome to this tutorial.", + start=0.0, + end=3.0, + confidence=1.0, + source=TranscriptSource.YOUTUBE_MANUAL, + ), + TranscriptSegment( + text="Today we'll learn about Python.", + start=3.0, + end=6.0, + confidence=1.0, + source=TranscriptSource.YOUTUBE_MANUAL, + ), + TranscriptSegment( + text="Let's set up our environment.", + start=60.0, + end=65.0, + confidence=1.0, + source=TranscriptSource.YOUTUBE_MANUAL, + ), + TranscriptSegment( + text="First install Python from python.org.", + start=65.0, + end=70.0, + confidence=1.0, + source=TranscriptSource.YOUTUBE_MANUAL, + ), + TranscriptSegment( + text="Now let's write some code.", + start=180.0, + end=185.0, + confidence=1.0, + source=TranscriptSource.YOUTUBE_MANUAL, + ), + TranscriptSegment( + text="def hello(): return 'world'", + start=185.0, + end=190.0, + confidence=0.95, + source=TranscriptSource.YOUTUBE_MANUAL, + ), + TranscriptSegment( + text="Thanks for watching, subscribe for more.", + start=500.0, + end=510.0, + confidence=1.0, + source=TranscriptSource.YOUTUBE_MANUAL, + ), + ] + + +def _make_sample_srt_content(): + """Build sample SRT subtitle content.""" + return """1 +00:00:00,000 --> 00:00:03,000 +Welcome to this tutorial. + +2 +00:00:03,000 --> 00:00:06,000 +Today we'll learn about Python. + +3 +00:01:00,000 --> 00:01:05,000 +Let's set up our environment. +""" + + +def _make_sample_vtt_content(): + """Build sample WebVTT subtitle content.""" + return """WEBVTT + +00:00:00.000 --> 00:00:03.000 +Welcome to this tutorial. + +00:00:03.000 --> 00:00:06.000 +Today we'll learn about Python. + +00:01:00.000 --> 00:01:05.000 +Let's set up our environment. +""" + + +# ============================================================================= +# Test: Data Models +# ============================================================================= + + +class TestVideoModels(unittest.TestCase): + """Test video data models (enums + dataclasses).""" + + def test_video_source_type_enum(self): + from skill_seekers.cli.video_models import VideoSourceType + + self.assertEqual(VideoSourceType.YOUTUBE.value, "youtube") + self.assertEqual(VideoSourceType.LOCAL_FILE.value, "local_file") + self.assertEqual(VideoSourceType.VIMEO.value, "vimeo") + + def test_transcript_source_enum(self): + from skill_seekers.cli.video_models import TranscriptSource + + self.assertEqual(TranscriptSource.YOUTUBE_MANUAL.value, "youtube_manual") + self.assertEqual(TranscriptSource.WHISPER.value, "whisper") + self.assertEqual(TranscriptSource.NONE.value, "none") + + def test_segment_content_type_enum(self): + from skill_seekers.cli.video_models import SegmentContentType + + self.assertEqual(SegmentContentType.LIVE_CODING.value, "live_coding") + self.assertEqual(SegmentContentType.EXPLANATION.value, "explanation") + + def test_chapter_serialization(self): + from skill_seekers.cli.video_models import Chapter + + ch = Chapter(title="Intro", start_time=0.0, end_time=60.0) + d = ch.to_dict() + self.assertEqual(d["title"], "Intro") + self.assertEqual(d["start_time"], 0.0) + self.assertEqual(d["end_time"], 60.0) + + ch2 = Chapter.from_dict(d) + self.assertEqual(ch2.title, "Intro") + self.assertAlmostEqual(ch2.duration, 60.0) + + def test_transcript_segment_serialization(self): + from skill_seekers.cli.video_models import TranscriptSegment, TranscriptSource + + seg = TranscriptSegment( + text="Hello world", + start=0.0, + end=2.5, + confidence=0.95, + source=TranscriptSource.YOUTUBE_MANUAL, + ) + d = seg.to_dict() + self.assertEqual(d["text"], "Hello world") + self.assertEqual(d["source"], "youtube_manual") + + seg2 = TranscriptSegment.from_dict(d) + self.assertEqual(seg2.text, "Hello world") + self.assertEqual(seg2.source, TranscriptSource.YOUTUBE_MANUAL) + + def test_video_segment_serialization(self): + from skill_seekers.cli.video_models import SegmentContentType, VideoSegment + + seg = VideoSegment( + index=0, + start_time=0.0, + end_time=60.0, + duration=60.0, + transcript="Hello world", + chapter_title="Intro", + content_type=SegmentContentType.INTRO, + confidence=0.9, + ) + d = seg.to_dict() + self.assertEqual(d["chapter_title"], "Intro") + self.assertEqual(d["content_type"], "intro") + + seg2 = VideoSegment.from_dict(d) + self.assertEqual(seg2.chapter_title, "Intro") + self.assertEqual(seg2.content_type, SegmentContentType.INTRO) + + def test_video_segment_timestamp_display(self): + from skill_seekers.cli.video_models import VideoSegment + + seg = VideoSegment(index=0, start_time=330.0, end_time=495.0, duration=165.0) + self.assertEqual(seg.timestamp_display, "05:30 - 08:15") + + def test_video_segment_timestamp_display_hours(self): + from skill_seekers.cli.video_models import VideoSegment + + seg = VideoSegment(index=0, start_time=3661.0, end_time=7200.0, duration=3539.0) + self.assertIn("1:", seg.timestamp_display) + + def test_video_info_serialization(self): + info = _make_sample_video_info() + d = info.to_dict() + self.assertEqual(d["video_id"], "abc123def45") + self.assertEqual(d["source_type"], "youtube") + self.assertEqual(len(d["chapters"]), 4) + + from skill_seekers.cli.video_models import VideoInfo + + info2 = VideoInfo.from_dict(d) + self.assertEqual(info2.video_id, "abc123def45") + self.assertEqual(len(info2.chapters), 4) + + def test_video_source_config_validation(self): + from skill_seekers.cli.video_models import VideoSourceConfig + + # No source specified + config = VideoSourceConfig() + errors = config.validate() + self.assertTrue(len(errors) > 0) + + # Valid config + config = VideoSourceConfig(url="https://youtube.com/watch?v=test") + errors = config.validate() + self.assertEqual(len(errors), 0) + + # Multiple sources + config = VideoSourceConfig(url="test", path="test.mp4") + errors = config.validate() + self.assertTrue(len(errors) > 0) + + def test_video_scraper_result_serialization(self): + from skill_seekers.cli.video_models import VideoScraperResult + + result = VideoScraperResult( + total_duration_seconds=600.0, + total_segments=4, + warnings=["Test warning"], + ) + d = result.to_dict() + self.assertEqual(d["total_segments"], 4) + self.assertEqual(d["warnings"], ["Test warning"]) + + result2 = VideoScraperResult.from_dict(d) + self.assertEqual(result2.total_segments, 4) + + def test_word_timestamp_serialization(self): + from skill_seekers.cli.video_models import WordTimestamp + + wt = WordTimestamp(word="hello", start=0.0, end=0.5, probability=0.95) + d = wt.to_dict() + self.assertEqual(d["word"], "hello") + + wt2 = WordTimestamp.from_dict(d) + self.assertEqual(wt2.word, "hello") + + def test_code_block_serialization(self): + from skill_seekers.cli.video_models import CodeBlock, CodeContext + + cb = CodeBlock( + code="print('hi')", language="python", context=CodeContext.EDITOR, confidence=0.9 + ) + d = cb.to_dict() + self.assertEqual(d["context"], "editor") + + cb2 = CodeBlock.from_dict(d) + self.assertEqual(cb2.context, CodeContext.EDITOR) + + +# ============================================================================= +# Test: Metadata +# ============================================================================= + + +class TestVideoMetadata(unittest.TestCase): + """Test video metadata extraction functions.""" + + def test_extract_video_id_standard_url(self): + from skill_seekers.cli.video_metadata import extract_video_id + + self.assertEqual( + extract_video_id("https://www.youtube.com/watch?v=dQw4w9WgXcQ"), + "dQw4w9WgXcQ", + ) + + def test_extract_video_id_short_url(self): + from skill_seekers.cli.video_metadata import extract_video_id + + self.assertEqual( + extract_video_id("https://youtu.be/dQw4w9WgXcQ"), + "dQw4w9WgXcQ", + ) + + def test_extract_video_id_embed_url(self): + from skill_seekers.cli.video_metadata import extract_video_id + + self.assertEqual( + extract_video_id("https://www.youtube.com/embed/dQw4w9WgXcQ"), + "dQw4w9WgXcQ", + ) + + def test_extract_video_id_shorts_url(self): + from skill_seekers.cli.video_metadata import extract_video_id + + self.assertEqual( + extract_video_id("https://www.youtube.com/shorts/dQw4w9WgXcQ"), + "dQw4w9WgXcQ", + ) + + def test_extract_video_id_not_youtube(self): + from skill_seekers.cli.video_metadata import extract_video_id + + self.assertIsNone(extract_video_id("https://vimeo.com/123456")) + self.assertIsNone(extract_video_id("https://example.com")) + + def test_detect_video_source_type_youtube(self): + from skill_seekers.cli.video_metadata import detect_video_source_type + from skill_seekers.cli.video_models import VideoSourceType + + self.assertEqual( + detect_video_source_type("https://www.youtube.com/watch?v=test"), + VideoSourceType.YOUTUBE, + ) + self.assertEqual( + detect_video_source_type("https://youtu.be/test"), + VideoSourceType.YOUTUBE, + ) + + def test_detect_video_source_type_vimeo(self): + from skill_seekers.cli.video_metadata import detect_video_source_type + from skill_seekers.cli.video_models import VideoSourceType + + self.assertEqual( + detect_video_source_type("https://vimeo.com/123456"), + VideoSourceType.VIMEO, + ) + + def test_extract_local_metadata(self): + from skill_seekers.cli.video_metadata import extract_local_metadata + + # Create a temp file + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp: + tmp_name = tmp.name + try: + info = extract_local_metadata(tmp_name) + self.assertEqual(info.source_type.value, "local_file") + self.assertIsNotNone(info.video_id) + self.assertIsNotNone(info.file_path) + finally: + os.unlink(tmp_name) + + +# ============================================================================= +# Test: Transcript +# ============================================================================= + + +class TestVideoTranscript(unittest.TestCase): + """Test transcript extraction functions.""" + + def test_parse_srt(self): + from skill_seekers.cli.video_transcript import parse_srt + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".srt", delete=False, encoding="utf-8" + ) as tmp: + tmp.write(_make_sample_srt_content()) + tmp_name = tmp.name + try: + segments = parse_srt(tmp_name) + self.assertEqual(len(segments), 3) + self.assertEqual(segments[0].text, "Welcome to this tutorial.") + self.assertAlmostEqual(segments[0].start, 0.0) + self.assertAlmostEqual(segments[0].end, 3.0) + self.assertEqual(segments[0].source.value, "subtitle_file") + finally: + os.unlink(tmp_name) + + def test_parse_vtt(self): + from skill_seekers.cli.video_transcript import parse_vtt + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".vtt", delete=False, encoding="utf-8" + ) as tmp: + tmp.write(_make_sample_vtt_content()) + tmp_name = tmp.name + try: + segments = parse_vtt(tmp_name) + self.assertEqual(len(segments), 3) + self.assertEqual(segments[0].text, "Welcome to this tutorial.") + self.assertAlmostEqual(segments[2].start, 60.0) + finally: + os.unlink(tmp_name) + + def test_parse_srt_with_html_tags(self): + from skill_seekers.cli.video_transcript import parse_srt + + content = """1 +00:00:00,000 --> 00:00:03,000 +Bold text and italic +""" + with tempfile.NamedTemporaryFile( + mode="w", suffix=".srt", delete=False, encoding="utf-8" + ) as tmp: + tmp.write(content) + tmp_name = tmp.name + try: + segments = parse_srt(tmp_name) + self.assertEqual(len(segments), 1) + self.assertEqual(segments[0].text, "Bold text and italic") + finally: + os.unlink(tmp_name) + + def test_whisper_stub_raises(self): + from skill_seekers.cli.video_transcript import transcribe_with_whisper, HAS_WHISPER + + if not HAS_WHISPER: + with self.assertRaises(RuntimeError) as ctx: + transcribe_with_whisper("test.wav") + self.assertIn("faster-whisper", str(ctx.exception)) + + def test_get_transcript_fallback_to_subtitle(self): + """Test that get_transcript falls back to subtitle files.""" + from skill_seekers.cli.video_transcript import get_transcript + from skill_seekers.cli.video_models import ( + TranscriptSource, + VideoInfo, + VideoSourceConfig, + VideoSourceType, + ) + + tmp_dir = tempfile.mkdtemp() + try: + # Create a fake video file and matching SRT + video_path = os.path.join(tmp_dir, "test.mp4") + srt_path = os.path.join(tmp_dir, "test.srt") + with open(video_path, "w") as f: + f.write("fake") + with open(srt_path, "w", encoding="utf-8") as f: + f.write(_make_sample_srt_content()) + + video_info = VideoInfo( + video_id="local123", + source_type=VideoSourceType.LOCAL_FILE, + file_path=video_path, + ) + config = VideoSourceConfig() + + segments, source = get_transcript(video_info, config) + self.assertEqual(source, TranscriptSource.SUBTITLE_FILE) + self.assertEqual(len(segments), 3) + finally: + shutil.rmtree(tmp_dir) + + +# ============================================================================= +# Test: Segmenter +# ============================================================================= + + +class TestVideoSegmenter(unittest.TestCase): + """Test video segmentation.""" + + def test_segment_by_chapters(self): + from skill_seekers.cli.video_segmenter import segment_by_chapters + + video_info = _make_sample_video_info() + transcript = _make_sample_transcript_segments() + segments = segment_by_chapters(video_info, transcript) + + self.assertEqual(len(segments), 4) + self.assertEqual(segments[0].chapter_title, "Intro") + self.assertEqual(segments[1].chapter_title, "Setup") + self.assertIn("Welcome", segments[0].transcript) + + def test_segment_by_time_window(self): + from skill_seekers.cli.video_segmenter import segment_by_time_window + + video_info = _make_sample_video_info() + transcript = _make_sample_transcript_segments() + segments = segment_by_time_window(video_info, transcript, window_seconds=300.0) + + # With 600s duration and 300s windows, expect 2 segments + self.assertTrue(len(segments) >= 1) + self.assertIsNone(segments[0].chapter_title) + + def test_segment_video_uses_chapters(self): + from skill_seekers.cli.video_segmenter import segment_video + from skill_seekers.cli.video_models import VideoSourceConfig + + video_info = _make_sample_video_info() + transcript = _make_sample_transcript_segments() + config = VideoSourceConfig() + + segments = segment_video(video_info, transcript, config) + # Should use chapters since they're available + self.assertEqual(len(segments), 4) + self.assertEqual(segments[0].chapter_title, "Intro") + + def test_segment_video_fallback_to_time_window(self): + from skill_seekers.cli.video_segmenter import segment_video + from skill_seekers.cli.video_models import VideoInfo, VideoSourceConfig, VideoSourceType + + video_info = VideoInfo( + video_id="no_chapters", + source_type=VideoSourceType.YOUTUBE, + duration=300.0, + ) + transcript = _make_sample_transcript_segments() + config = VideoSourceConfig(time_window_seconds=120.0) + + segments = segment_video(video_info, transcript, config) + self.assertTrue(len(segments) >= 1) + # No chapters, so chapter_title should be None + for seg in segments: + self.assertIsNone(seg.chapter_title) + + def test_segment_content_type_classification(self): + from skill_seekers.cli.video_segmenter import _classify_content_type + from skill_seekers.cli.video_models import SegmentContentType + + self.assertEqual( + _classify_content_type("Welcome to this tutorial, today we"), + SegmentContentType.INTRO, + ) + self.assertEqual( + _classify_content_type("import os\ndef process_data(): return result"), + SegmentContentType.LIVE_CODING, + ) + self.assertEqual( + _classify_content_type("thanks for watching subscribe for more"), + SegmentContentType.OUTRO, + ) + + +# ============================================================================= +# Test: Source Detection +# ============================================================================= + + +class TestVideoSourceDetection(unittest.TestCase): + """Test SourceDetector recognizes video URLs and file extensions.""" + + def test_detect_youtube_url(self): + from skill_seekers.cli.source_detector import SourceDetector + + info = SourceDetector.detect("https://www.youtube.com/watch?v=dQw4w9WgXcQ") + self.assertEqual(info.type, "video") + self.assertEqual(info.parsed["source_kind"], "url") + + def test_detect_youtube_short_url(self): + from skill_seekers.cli.source_detector import SourceDetector + + info = SourceDetector.detect("https://youtu.be/dQw4w9WgXcQ") + self.assertEqual(info.type, "video") + + def test_detect_youtube_playlist(self): + from skill_seekers.cli.source_detector import SourceDetector + + info = SourceDetector.detect("https://www.youtube.com/playlist?list=PLtest123") + self.assertEqual(info.type, "video") + self.assertEqual(info.suggested_name, "youtube_playlist") + + def test_detect_youtube_channel(self): + from skill_seekers.cli.source_detector import SourceDetector + + info = SourceDetector.detect("https://www.youtube.com/@testchannel") + self.assertEqual(info.type, "video") + self.assertEqual(info.suggested_name, "youtube_channel") + + def test_detect_vimeo_url(self): + from skill_seekers.cli.source_detector import SourceDetector + + info = SourceDetector.detect("https://vimeo.com/123456789") + self.assertEqual(info.type, "video") + self.assertEqual(info.suggested_name, "vimeo_video") + + def test_detect_mp4_file(self): + from skill_seekers.cli.source_detector import SourceDetector + + info = SourceDetector.detect("recording.mp4") + self.assertEqual(info.type, "video") + self.assertEqual(info.suggested_name, "recording") + self.assertEqual(info.parsed["source_kind"], "file") + + def test_detect_mkv_file(self): + from skill_seekers.cli.source_detector import SourceDetector + + info = SourceDetector.detect("tutorial.mkv") + self.assertEqual(info.type, "video") + + def test_detect_webm_file(self): + from skill_seekers.cli.source_detector import SourceDetector + + info = SourceDetector.detect("screencast.webm") + self.assertEqual(info.type, "video") + + def test_detect_avi_file(self): + from skill_seekers.cli.source_detector import SourceDetector + + info = SourceDetector.detect("old-recording.avi") + self.assertEqual(info.type, "video") + + def test_detect_mov_file(self): + from skill_seekers.cli.source_detector import SourceDetector + + info = SourceDetector.detect("screen.mov") + self.assertEqual(info.type, "video") + + def test_validate_video_file_exists(self): + from skill_seekers.cli.source_detector import SourceDetector, SourceInfo + + info = SourceInfo( + type="video", + parsed={"file_path": "/nonexistent/file.mp4", "source_kind": "file"}, + suggested_name="file", + raw_input="file.mp4", + ) + with self.assertRaises(ValueError): + SourceDetector.validate_source(info) + + def test_validate_video_url_no_error(self): + """URL-based video sources should not raise during validation.""" + from skill_seekers.cli.source_detector import SourceDetector, SourceInfo + + info = SourceInfo( + type="video", + parsed={"url": "https://youtube.com/watch?v=test", "source_kind": "url"}, + suggested_name="test", + raw_input="https://youtube.com/watch?v=test", + ) + # Should not raise + SourceDetector.validate_source(info) + + +# ============================================================================= +# Test: CLI Arguments +# ============================================================================= + + +class TestVideoArguments(unittest.TestCase): + """Test video CLI argument definitions.""" + + def test_video_arguments_dict(self): + from skill_seekers.cli.arguments.video import VIDEO_ARGUMENTS + + self.assertIn("url", VIDEO_ARGUMENTS) + self.assertIn("video_file", VIDEO_ARGUMENTS) + self.assertIn("playlist", VIDEO_ARGUMENTS) + self.assertIn("languages", VIDEO_ARGUMENTS) + self.assertIn("visual", VIDEO_ARGUMENTS) + self.assertIn("whisper_model", VIDEO_ARGUMENTS) + self.assertIn("from_json", VIDEO_ARGUMENTS) + + def test_add_video_arguments(self): + import argparse + from skill_seekers.cli.arguments.video import add_video_arguments + + parser = argparse.ArgumentParser() + add_video_arguments(parser) + + # Should parse without error + args = parser.parse_args(["--url", "https://youtube.com/watch?v=test"]) + self.assertEqual(args.url, "https://youtube.com/watch?v=test") + + def test_enhance_level_defaults_to_zero(self): + import argparse + from skill_seekers.cli.arguments.video import add_video_arguments + + parser = argparse.ArgumentParser() + add_video_arguments(parser) + + args = parser.parse_args([]) + self.assertEqual(args.enhance_level, 0) + + def test_unified_parser_has_video(self): + """Test video subcommand is registered in main parser.""" + from skill_seekers.cli.main import create_parser + + parser = create_parser() + args = parser.parse_args(["video", "--url", "https://youtube.com/watch?v=test"]) + self.assertEqual(args.url, "https://youtube.com/watch?v=test") + + +# ============================================================================= +# Test: VideoToSkillConverter +# ============================================================================= + + +class TestVideoToSkillConverter(unittest.TestCase): + """Test the main VideoToSkillConverter class.""" + + def setUp(self): + self.temp_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.temp_dir, ignore_errors=True) + # Clean up output dirs that may have been created + for d in ["output/test_video", "output/test_video_video_extracted.json"]: + if os.path.exists(d): + if os.path.isdir(d): + shutil.rmtree(d, ignore_errors=True) + else: + os.unlink(d) + + def test_init_with_url(self): + from skill_seekers.cli.video_scraper import VideoToSkillConverter + + config = {"name": "test_video", "url": "https://youtube.com/watch?v=test"} + converter = VideoToSkillConverter(config) + self.assertEqual(converter.name, "test_video") + + def test_init_with_video_file(self): + from skill_seekers.cli.video_scraper import VideoToSkillConverter + + config = {"name": "test_video", "video_file": "test.mp4"} + converter = VideoToSkillConverter(config) + self.assertEqual(converter.config["video_file"], "test.mp4") + + def test_build_skill_from_loaded_data(self): + """Test build_skill works with pre-loaded result data.""" + from skill_seekers.cli.video_scraper import VideoToSkillConverter + from skill_seekers.cli.video_models import ( + VideoScraperResult, + VideoInfo, + VideoSourceType, + TranscriptSource, + VideoSegment, + SegmentContentType, + ) + + config = { + "name": "test_video", + "output": os.path.join(self.temp_dir, "test_video"), + } + converter = VideoToSkillConverter(config) + + # Manually set result + converter.result = VideoScraperResult( + videos=[ + VideoInfo( + video_id="test123", + source_type=VideoSourceType.YOUTUBE, + source_url="https://youtube.com/watch?v=test123", + title="Test Video", + description="A test video.", + duration=120.0, + channel_name="Test", + view_count=1000, + transcript_source=TranscriptSource.YOUTUBE_MANUAL, + segments=[ + VideoSegment( + index=0, + start_time=0.0, + end_time=60.0, + duration=60.0, + transcript="Hello world test content.", + chapter_title="Intro", + content="### Intro (00:00 - 01:00)\n\nHello world test content.", + content_type=SegmentContentType.INTRO, + confidence=0.9, + ), + VideoSegment( + index=1, + start_time=60.0, + end_time=120.0, + duration=60.0, + transcript="Main content here.", + chapter_title="Main", + content="### Main (01:00 - 02:00)\n\nMain content here.", + content_type=SegmentContentType.EXPLANATION, + confidence=0.9, + ), + ], + ), + ], + total_duration_seconds=120.0, + total_segments=2, + ) + + skill_dir = converter.build_skill() + self.assertTrue(os.path.isdir(skill_dir)) + self.assertTrue(os.path.isfile(os.path.join(skill_dir, "SKILL.md"))) + self.assertTrue(os.path.isdir(os.path.join(skill_dir, "references"))) + self.assertTrue(os.path.isdir(os.path.join(skill_dir, "video_data"))) + + # Check SKILL.md content + with open(os.path.join(skill_dir, "SKILL.md"), encoding="utf-8") as f: + skill_content = f.read() + self.assertIn("Test Video", skill_content) + self.assertIn("Video Tutorials", skill_content) + + def test_save_and_load_extracted_data(self): + """Test JSON save/load roundtrip.""" + from skill_seekers.cli.video_scraper import VideoToSkillConverter + from skill_seekers.cli.video_models import VideoScraperResult, VideoInfo, VideoSourceType + + config = {"name": "test_video"} + converter = VideoToSkillConverter(config) + converter.result = VideoScraperResult( + videos=[VideoInfo(video_id="test", source_type=VideoSourceType.YOUTUBE, title="Test")], + total_duration_seconds=60.0, + ) + + # Save + data_file = converter.save_extracted_data() + self.assertTrue(os.path.isfile(data_file)) + + # Load into new converter + converter2 = VideoToSkillConverter(config) + converter2.load_extracted_data(data_file) + self.assertEqual(len(converter2.result.videos), 1) + self.assertEqual(converter2.result.videos[0].title, "Test") + + # Clean up + os.unlink(data_file) + + +# ============================================================================= +# Test: Visual Extraction Stubs +# ============================================================================= + + +class TestVideoVisualStubs(unittest.TestCase): + """Test Tier 2 visual extraction stubs raise proper errors.""" + + def test_check_visual_dependencies(self): + from skill_seekers.cli.video_visual import check_visual_dependencies + + deps = check_visual_dependencies() + self.assertIn("opencv", deps) + self.assertIn("scenedetect", deps) + self.assertIn("easyocr", deps) + + def test_detect_scenes_raises_without_deps(self): + from skill_seekers.cli.video_visual import detect_scenes, HAS_OPENCV + + if not HAS_OPENCV: + with self.assertRaises(RuntimeError): + detect_scenes("test.mp4") + + def test_extract_keyframes_raises_without_deps(self): + from skill_seekers.cli.video_visual import extract_keyframes, HAS_OPENCV + + if not HAS_OPENCV: + with self.assertRaises(RuntimeError): + extract_keyframes("test.mp4", [0.0, 1.0]) + + def test_classify_frame_raises_without_deps(self): + from skill_seekers.cli.video_visual import classify_frame, HAS_OPENCV + + if not HAS_OPENCV: + with self.assertRaises(RuntimeError): + classify_frame("frame.png") + + def test_extract_text_raises_without_deps(self): + from skill_seekers.cli.video_visual import extract_text_from_frame, HAS_EASYOCR + + if not HAS_EASYOCR: + with self.assertRaises(RuntimeError): + extract_text_from_frame("frame.png") + + +# ============================================================================= +# Test: Create Command Integration +# ============================================================================= + + +class TestVideoCreateCommandIntegration(unittest.TestCase): + """Test create command routes video sources correctly.""" + + def test_create_command_routing_youtube_url(self): + """Test that CreateCommand routes YouTube URLs to video scraper.""" + from skill_seekers.cli.source_detector import SourceDetector + + # Detect source + info = SourceDetector.detect("https://www.youtube.com/watch?v=dQw4w9WgXcQ") + self.assertEqual(info.type, "video") + + def test_create_command_routing_video_file(self): + """Test that CreateCommand routes video files to video scraper.""" + from skill_seekers.cli.source_detector import SourceDetector + + info = SourceDetector.detect("tutorial.mp4") + self.assertEqual(info.type, "video") + + def test_create_arguments_include_video(self): + """Test that create arguments include video mode.""" + from skill_seekers.cli.arguments.create import get_source_specific_arguments + + video_args = get_source_specific_arguments("video") + self.assertIn("video_url", video_args) + self.assertIn("visual", video_args) + self.assertIn("whisper_model", video_args) + + +# ============================================================================= +# Test: Config Validator +# ============================================================================= + + +class TestVideoConfigValidator(unittest.TestCase): + """Test that video is a valid source type in config validator.""" + + def test_video_in_valid_source_types(self): + from skill_seekers.cli.config_validator import ConfigValidator + + self.assertIn("video", ConfigValidator.VALID_SOURCE_TYPES) + + +# ============================================================================= +# Test: Helper Functions +# ============================================================================= + + +class TestVideoHelperFunctions(unittest.TestCase): + """Test module-level helper functions.""" + + def test_sanitize_filename(self): + from skill_seekers.cli.video_scraper import _sanitize_filename + + self.assertEqual( + _sanitize_filename("React Hooks Tutorial for Beginners"), + "react-hooks-tutorial-for-beginners", + ) + self.assertEqual( + _sanitize_filename("Test!!! Video---Title"), + "test-video-title", + ) + + def test_sanitize_filename_max_length(self): + from skill_seekers.cli.video_scraper import _sanitize_filename + + result = _sanitize_filename("a" * 100, max_length=20) + self.assertLessEqual(len(result), 20) + + def test_format_duration(self): + from skill_seekers.cli.video_scraper import _format_duration + + self.assertEqual(_format_duration(65), "01:05") + self.assertEqual(_format_duration(3661), "1:01:01") + self.assertEqual(_format_duration(0), "00:00") + + def test_format_count(self): + from skill_seekers.cli.video_scraper import _format_count + + self.assertEqual(_format_count(1500000), "1,500,000") + self.assertEqual(_format_count(None), "N/A") + + def test_infer_description_from_video(self): + from skill_seekers.cli.video_scraper import infer_description_from_video + + info = _make_sample_video_info() + desc = infer_description_from_video(info) + self.assertTrue(desc.startswith("Use when")) + + +# ============================================================================= +# Test: OCR Preprocessing (Phase 1) +# ============================================================================= + + +class TestOCRPreprocessing(unittest.TestCase): + """Test frame-type-aware OCR preprocessing functions.""" + + def test_get_ocr_params_code_editor(self): + from skill_seekers.cli.video_visual import _get_ocr_params + from skill_seekers.cli.video_models import FrameType + + params = _get_ocr_params(FrameType.CODE_EDITOR) + self.assertEqual(params["decoder"], "beamsearch") + self.assertEqual(params["text_threshold"], 0.4) + self.assertEqual(params["contrast_ths"], 0.3) + self.assertEqual(params["mag_ratio"], 1.0) + + def test_get_ocr_params_terminal(self): + from skill_seekers.cli.video_visual import _get_ocr_params + from skill_seekers.cli.video_models import FrameType + + params = _get_ocr_params(FrameType.TERMINAL) + self.assertEqual(params["decoder"], "beamsearch") + self.assertEqual(params["low_text"], 0.3) + + def test_get_ocr_params_slide(self): + from skill_seekers.cli.video_visual import _get_ocr_params + from skill_seekers.cli.video_models import FrameType + + params = _get_ocr_params(FrameType.SLIDE) + self.assertEqual(params["decoder"], "greedy") + self.assertEqual(params["text_threshold"], 0.6) + + def test_get_ocr_params_other(self): + from skill_seekers.cli.video_visual import _get_ocr_params + from skill_seekers.cli.video_models import FrameType + + params = _get_ocr_params(FrameType.OTHER) + self.assertEqual(params["decoder"], "greedy") + + def test_preprocess_returns_original_for_other(self): + from skill_seekers.cli.video_visual import _preprocess_frame_for_ocr + from skill_seekers.cli.video_models import FrameType + + result = _preprocess_frame_for_ocr("/nonexistent/path.jpg", FrameType.OTHER) + self.assertEqual(result, "/nonexistent/path.jpg") + + def test_preprocess_returns_original_for_webcam(self): + from skill_seekers.cli.video_visual import _preprocess_frame_for_ocr + from skill_seekers.cli.video_models import FrameType + + result = _preprocess_frame_for_ocr("/nonexistent/path.jpg", FrameType.WEBCAM) + self.assertEqual(result, "/nonexistent/path.jpg") + + +# ============================================================================= +# Test: Spatial Layout (Phase 2) +# ============================================================================= + + +class TestSpatialLayout(unittest.TestCase): + """Test OCR spatial layout preservation functions.""" + + def test_cluster_empty_results(self): + from skill_seekers.cli.video_visual import _cluster_ocr_into_lines + from skill_seekers.cli.video_models import FrameType + + regions = _cluster_ocr_into_lines([], FrameType.OTHER) + self.assertEqual(regions, []) + + def test_cluster_single_result(self): + from skill_seekers.cli.video_visual import _cluster_ocr_into_lines + from skill_seekers.cli.video_models import FrameType + + raw = [([[0, 10], [100, 10], [100, 30], [0, 30]], "hello world", 0.9)] + regions = _cluster_ocr_into_lines(raw, FrameType.OTHER) + self.assertEqual(len(regions), 1) + self.assertEqual(regions[0].text, "hello world") + self.assertAlmostEqual(regions[0].confidence, 0.9) + + def test_cluster_two_lines(self): + from skill_seekers.cli.video_visual import _cluster_ocr_into_lines + from skill_seekers.cli.video_models import FrameType + + raw = [ + ([[0, 10], [100, 10], [100, 30], [0, 30]], "line one", 0.9), + ([[0, 50], [100, 50], [100, 70], [0, 70]], "line two", 0.8), + ] + regions = _cluster_ocr_into_lines(raw, FrameType.CODE_EDITOR) + self.assertEqual(len(regions), 2) + self.assertEqual(regions[0].text, "line one") + self.assertEqual(regions[1].text, "line two") + self.assertTrue(regions[0].is_monospace) + + def test_cluster_same_line_fragments(self): + from skill_seekers.cli.video_visual import _cluster_ocr_into_lines + from skill_seekers.cli.video_models import FrameType + + raw = [ + ([[0, 10], [50, 10], [50, 30], [0, 30]], "hello", 0.9), + ([[55, 10], [120, 10], [120, 30], [55, 30]], "world", 0.85), + ] + regions = _cluster_ocr_into_lines(raw, FrameType.OTHER) + self.assertEqual(len(regions), 1) + self.assertIn("hello", regions[0].text) + self.assertIn("world", regions[0].text) + + def test_cluster_monospace_flag(self): + from skill_seekers.cli.video_visual import _cluster_ocr_into_lines + from skill_seekers.cli.video_models import FrameType + + raw = [([[0, 0], [100, 0], [100, 20], [0, 20]], "test", 0.9)] + + code_regions = _cluster_ocr_into_lines(raw, FrameType.CODE_EDITOR) + self.assertTrue(code_regions[0].is_monospace) + + terminal_regions = _cluster_ocr_into_lines(raw, FrameType.TERMINAL) + self.assertTrue(terminal_regions[0].is_monospace) + + slide_regions = _cluster_ocr_into_lines(raw, FrameType.SLIDE) + self.assertFalse(slide_regions[0].is_monospace) + + def test_assemble_code_editor_newlines(self): + from skill_seekers.cli.video_visual import _assemble_structured_text + from skill_seekers.cli.video_models import FrameType, OCRRegion + + regions = [ + OCRRegion(text="def hello():", confidence=0.9, bbox=(100, 10, 300, 30)), + OCRRegion(text="return 'world'", confidence=0.9, bbox=(100, 40, 350, 60)), + ] + text = _assemble_structured_text(regions, FrameType.CODE_EDITOR) + self.assertIn("\n", text) + self.assertIn("def hello():", text) + self.assertIn("return 'world'", text) + + def test_assemble_slide_double_newlines(self): + from skill_seekers.cli.video_visual import _assemble_structured_text + from skill_seekers.cli.video_models import FrameType, OCRRegion + + regions = [ + OCRRegion(text="Title", confidence=0.9, bbox=(100, 10, 300, 30)), + OCRRegion(text="Subtitle", confidence=0.9, bbox=(100, 80, 350, 100)), + ] + text = _assemble_structured_text(regions, FrameType.SLIDE) + self.assertIn("\n\n", text) + + def test_assemble_other_flat(self): + from skill_seekers.cli.video_visual import _assemble_structured_text + from skill_seekers.cli.video_models import FrameType, OCRRegion + + regions = [ + OCRRegion(text="hello", confidence=0.9, bbox=(0, 0, 50, 20)), + OCRRegion(text="world", confidence=0.9, bbox=(0, 30, 50, 50)), + ] + text = _assemble_structured_text(regions, FrameType.OTHER) + self.assertEqual(text, "hello world") + self.assertNotIn("\n", text) + + def test_assemble_empty_regions(self): + from skill_seekers.cli.video_visual import _assemble_structured_text + from skill_seekers.cli.video_models import FrameType + + text = _assemble_structured_text([], FrameType.CODE_EDITOR) + self.assertEqual(text, "") + + +# ============================================================================= +# Test: Cross-Frame Text Continuity (Phase 3) +# ============================================================================= + + +class TestTextContinuity(unittest.TestCase): + """Test cross-frame text tracking and code block detection.""" + + def test_text_similarity_identical(self): + from skill_seekers.cli.video_visual import _text_similarity + + self.assertAlmostEqual(_text_similarity("hello world", "hello world"), 1.0) + + def test_text_similarity_empty(self): + from skill_seekers.cli.video_visual import _text_similarity + + self.assertEqual(_text_similarity("", "hello"), 0.0) + self.assertEqual(_text_similarity("hello", ""), 0.0) + self.assertEqual(_text_similarity("", ""), 0.0) + + def test_text_similarity_different(self): + from skill_seekers.cli.video_visual import _text_similarity + + sim = _text_similarity("hello world", "goodbye universe") + self.assertLess(sim, 0.5) + + def test_text_similarity_similar(self): + from skill_seekers.cli.video_visual import _text_similarity + + sim = _text_similarity( + "def hello():\n return 'world'", + "def hello():\n return 'world!'", + ) + self.assertGreater(sim, 0.8) + + def test_tracker_creates_new_block(self): + from skill_seekers.cli.video_visual import TextBlockTracker + from skill_seekers.cli.video_models import FrameType + + tracker = TextBlockTracker() + tracker.update(0, 1.0, "def hello():\n return 'world'", 0.9, FrameType.CODE_EDITOR) + blocks = tracker.finalize() + self.assertEqual(len(blocks), 1) + self.assertEqual(blocks[0].first_seen, 1.0) + self.assertEqual(blocks[0].frame_type, FrameType.CODE_EDITOR) + + def test_tracker_merges_similar_frames(self): + from skill_seekers.cli.video_visual import TextBlockTracker + from skill_seekers.cli.video_models import FrameType + + tracker = TextBlockTracker() + text1 = "def hello():\n return 'world'" + text2 = "def hello():\n return 'world!'" + tracker.update(0, 1.0, text1, 0.8, FrameType.CODE_EDITOR) + tracker.update(1, 2.0, text2, 0.9, FrameType.CODE_EDITOR) + blocks = tracker.finalize() + self.assertEqual(len(blocks), 1) + self.assertEqual(blocks[0].best_text, text2) + self.assertEqual(blocks[0].best_confidence, 0.9) + self.assertEqual(len(blocks[0].frame_indices), 2) + + def test_tracker_creates_separate_blocks_for_different_text(self): + from skill_seekers.cli.video_visual import TextBlockTracker + from skill_seekers.cli.video_models import FrameType + + tracker = TextBlockTracker() + tracker.update(0, 1.0, "completely different text about cats", 0.8, FrameType.CODE_EDITOR) + tracker.update(1, 2.0, "unrelated content about dogs and stuff", 0.9, FrameType.CODE_EDITOR) + blocks = tracker.finalize() + self.assertEqual(len(blocks), 2) + + def test_tracker_completes_on_non_code_frame(self): + from skill_seekers.cli.video_visual import TextBlockTracker + from skill_seekers.cli.video_models import FrameType + + tracker = TextBlockTracker() + tracker.update(0, 1.0, "def hello():\n return 'world'", 0.9, FrameType.CODE_EDITOR) + tracker.update(1, 2.0, "slide text", 0.9, FrameType.SLIDE) + # After slide frame, the code block should be completed + tracker.update(2, 3.0, "def hello():\n return 'world'", 0.9, FrameType.CODE_EDITOR) + blocks = tracker.finalize() + # Should have 2 blocks (before and after the slide) + self.assertEqual(len(blocks), 2) + + def test_tracker_ignores_short_text(self): + from skill_seekers.cli.video_visual import TextBlockTracker + from skill_seekers.cli.video_models import FrameType + + tracker = TextBlockTracker() + tracker.update(0, 1.0, "short", 0.9, FrameType.CODE_EDITOR) + blocks = tracker.finalize() + self.assertEqual(len(blocks), 0) + + def test_extract_code_blocks_filters_short(self): + from skill_seekers.cli.video_visual import _extract_code_blocks, TrackedTextBlock + from skill_seekers.cli.video_models import FrameType + + blocks_in = [ + TrackedTextBlock( + first_seen=1.0, + last_seen=2.0, + frame_indices=[0], + text_snapshots=["short"], + frame_type=FrameType.CODE_EDITOR, + best_text="short", + best_confidence=0.9, + ), + ] + code_blocks = _extract_code_blocks(blocks_in) + self.assertEqual(len(code_blocks), 0) + + def test_extract_code_blocks_maps_context(self): + from skill_seekers.cli.video_visual import _extract_code_blocks, TrackedTextBlock + from skill_seekers.cli.video_models import CodeContext, FrameType + + blocks_in = [ + TrackedTextBlock( + first_seen=1.0, + last_seen=2.0, + frame_indices=[0, 1], + text_snapshots=["def hello():\n return 'world'"], + frame_type=FrameType.CODE_EDITOR, + best_text="def hello():\n return 'world'", + best_confidence=0.9, + ), + TrackedTextBlock( + first_seen=3.0, + last_seen=4.0, + frame_indices=[2], + text_snapshots=["$ python hello.py\nHello World output"], + frame_type=FrameType.TERMINAL, + best_text="$ python hello.py\nHello World output", + best_confidence=0.8, + ), + ] + code_blocks = _extract_code_blocks(blocks_in) + self.assertEqual(len(code_blocks), 2) + self.assertEqual(code_blocks[0].context, CodeContext.EDITOR) + self.assertEqual(code_blocks[1].context, CodeContext.TERMINAL) + + def test_extract_code_blocks_skips_non_code_frames(self): + from skill_seekers.cli.video_visual import _extract_code_blocks, TrackedTextBlock + from skill_seekers.cli.video_models import FrameType + + blocks_in = [ + TrackedTextBlock( + first_seen=1.0, + last_seen=2.0, + frame_indices=[0], + text_snapshots=["This is a long slide text with lots of content here"], + frame_type=FrameType.SLIDE, + best_text="This is a long slide text with lots of content here", + best_confidence=0.9, + ), + ] + code_blocks = _extract_code_blocks(blocks_in) + self.assertEqual(len(code_blocks), 0) + + def test_extract_visual_data_returns_tuple(self): + """Verify extract_visual_data returns (keyframes, code_blocks) tuple.""" + from skill_seekers.cli.video_visual import extract_visual_data, HAS_OPENCV + + if not HAS_OPENCV: + with self.assertRaises(RuntimeError): + extract_visual_data("test.mp4", [], "/tmp/test") + else: + # If opencv is available, at least verify the signature + import inspect + + sig = inspect.signature(extract_visual_data) + # Check the return annotation + self.assertIn("tuple", str(sig.return_annotation).lower()) + + def test_extract_text_from_frame_returns_tuple(self): + """Verify extract_text_from_frame returns (raw_results, flat_text) tuple.""" + from skill_seekers.cli.video_visual import extract_text_from_frame, HAS_EASYOCR + + if not HAS_EASYOCR: + with self.assertRaises(RuntimeError): + extract_text_from_frame("frame.png") + else: + import inspect + + sig = inspect.signature(extract_text_from_frame) + self.assertIn("tuple", str(sig.return_annotation).lower()) + + +# ============================================================================= +# Test: Output Formatting (Phase 4) +# ============================================================================= + + +class TestOutputFormatting(unittest.TestCase): + """Test type-aware output formatting in reference markdown.""" + + def setUp(self): + self.temp_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def test_reference_md_code_block_formatting(self): + """Test that code editor OCR is wrapped in fenced code blocks.""" + from skill_seekers.cli.video_scraper import VideoToSkillConverter + from skill_seekers.cli.video_models import ( + CodeBlock, + CodeContext, + FrameType, + KeyFrame, + SegmentContentType, + TranscriptSource, + VideoInfo, + VideoScraperResult, + VideoSegment, + VideoSourceType, + ) + + config = { + "name": "test_video", + "output": os.path.join(self.temp_dir, "test_video"), + } + converter = VideoToSkillConverter(config) + + converter.result = VideoScraperResult( + videos=[ + VideoInfo( + video_id="test123", + source_type=VideoSourceType.YOUTUBE, + title="Code Tutorial", + duration=60.0, + transcript_source=TranscriptSource.YOUTUBE_MANUAL, + segments=[ + VideoSegment( + index=0, + start_time=0.0, + end_time=60.0, + duration=60.0, + transcript="Some code content.", + content="### Intro (00:00 - 01:00)\n\nSome code content.", + content_type=SegmentContentType.LIVE_CODING, + confidence=0.9, + keyframes=[ + KeyFrame( + timestamp=5.0, + image_path="/nonexistent/frame.jpg", + frame_type=FrameType.CODE_EDITOR, + ocr_text="def hello():\n return 'world'", + ), + ], + detected_code_blocks=[ + CodeBlock( + code="def hello():\n return 'world'", + language="python", + source_frame=5.0, + context=CodeContext.EDITOR, + confidence=0.9, + ), + ], + has_code_on_screen=True, + ), + ], + ), + ], + total_duration_seconds=60.0, + total_segments=1, + ) + + ref_md = converter._generate_reference_md(converter.result.videos[0]) + # OCR text should be in a fenced code block with language hint + self.assertIn("```python", ref_md) + self.assertIn("def hello():", ref_md) + # Detected code subsection should exist + self.assertIn("#### Detected Code", ref_md) + + def test_reference_md_slide_formatting(self): + """Test that slide OCR is formatted as blockquotes.""" + from skill_seekers.cli.video_scraper import VideoToSkillConverter + from skill_seekers.cli.video_models import ( + FrameType, + KeyFrame, + SegmentContentType, + TranscriptSource, + VideoInfo, + VideoScraperResult, + VideoSegment, + VideoSourceType, + ) + + config = { + "name": "test_video", + "output": os.path.join(self.temp_dir, "test_video"), + } + converter = VideoToSkillConverter(config) + + converter.result = VideoScraperResult( + videos=[ + VideoInfo( + video_id="test456", + source_type=VideoSourceType.YOUTUBE, + title="Slide Presentation", + duration=60.0, + transcript_source=TranscriptSource.YOUTUBE_MANUAL, + segments=[ + VideoSegment( + index=0, + start_time=0.0, + end_time=60.0, + duration=60.0, + content="### Slides\n\nPresentation content.", + content_type=SegmentContentType.SLIDES, + confidence=0.9, + keyframes=[ + KeyFrame( + timestamp=5.0, + image_path="/nonexistent/frame.jpg", + frame_type=FrameType.SLIDE, + ocr_text="Title\n\nSubtitle", + ), + ], + ), + ], + ), + ], + total_duration_seconds=60.0, + total_segments=1, + ) + + ref_md = converter._generate_reference_md(converter.result.videos[0]) + self.assertIn("> Title", ref_md) + self.assertIn("> Subtitle", ref_md) + # Should NOT be in a fenced code block + self.assertNotIn("```", ref_md) + + def test_skill_md_code_block_count(self): + """Test that SKILL.md overview includes code block count.""" + from skill_seekers.cli.video_scraper import VideoToSkillConverter + from skill_seekers.cli.video_models import ( + CodeBlock, + CodeContext, + KeyFrame, + SegmentContentType, + TranscriptSource, + VideoInfo, + VideoScraperResult, + VideoSegment, + VideoSourceType, + ) + + config = { + "name": "test_video", + "output": os.path.join(self.temp_dir, "test_video"), + } + converter = VideoToSkillConverter(config) + + converter.result = VideoScraperResult( + videos=[ + VideoInfo( + video_id="test789", + source_type=VideoSourceType.YOUTUBE, + title="Code Tutorial", + duration=60.0, + transcript_source=TranscriptSource.YOUTUBE_MANUAL, + segments=[ + VideoSegment( + index=0, + start_time=0.0, + end_time=60.0, + duration=60.0, + content="### Code\n\nSome content.", + content_type=SegmentContentType.LIVE_CODING, + confidence=0.9, + keyframes=[ + KeyFrame( + timestamp=5.0, + image_path="/nonexistent/frame.jpg", + ocr_text="print('hi')", + ), + ], + detected_code_blocks=[ + CodeBlock( + code="print('hi')", + language="python", + source_frame=5.0, + context=CodeContext.EDITOR, + confidence=0.9, + ), + ], + ), + ], + ), + ], + total_duration_seconds=60.0, + total_segments=1, + total_code_blocks=1, + ) + + skill_md = converter._generate_skill_md() + self.assertIn("1 code blocks detected", skill_md) + + +# ============================================================================= +# Test: Y-Bucket Consensus Engine (Phase A) +# ============================================================================= + + +class TestYBucketConsensus(unittest.TestCase): + """Test the Y-bucket consensus engine for multi-frame OCR.""" + + def test_single_frame_single_region(self): + from skill_seekers.cli.video_visual import YBucketConsensusEngine + from skill_seekers.cli.video_models import OCRRegion + + engine = YBucketConsensusEngine(y_tolerance=15.0) + engine.add_frame( + 0, + 1.0, + [OCRRegion(text="hello world", confidence=0.9, bbox=(10, 100, 200, 120))], + ) + buckets = engine.build_consensus() + self.assertEqual(len(buckets), 1) + self.assertEqual(buckets[0].consensus_text, "hello world") + self.assertAlmostEqual(buckets[0].consensus_confidence, 0.9) + + def test_consensus_from_multiple_frames(self): + from skill_seekers.cli.video_visual import YBucketConsensusEngine + from skill_seekers.cli.video_models import OCRRegion + + engine = YBucketConsensusEngine(y_tolerance=15.0) + # Frame 0: low confidence garbled text + engine.add_frame( + 0, + 1.0, + [OCRRegion(text="Dlctionary", confidence=0.3, bbox=(10, 100, 200, 120))], + ) + # Frame 1: medium confidence + engine.add_frame( + 1, + 1.5, + [OCRRegion(text="Dictionary", confidence=0.62, bbox=(10, 102, 200, 122))], + ) + # Frame 2: good confidence + engine.add_frame( + 2, + 2.0, + [OCRRegion(text="Dictionary", confidence=0.85, bbox=(10, 101, 200, 121))], + ) + buckets = engine.build_consensus() + self.assertEqual(len(buckets), 1) + self.assertEqual(buckets[0].consensus_text, "Dictionary") + self.assertGreater(buckets[0].consensus_confidence, 0.5) + + def test_multiple_lines_tracked(self): + from skill_seekers.cli.video_visual import YBucketConsensusEngine + from skill_seekers.cli.video_models import OCRRegion + + engine = YBucketConsensusEngine(y_tolerance=15.0) + engine.add_frame( + 0, + 1.0, + [ + OCRRegion(text="line one", confidence=0.9, bbox=(10, 100, 200, 120)), + OCRRegion(text="line two", confidence=0.8, bbox=(10, 150, 200, 170)), + ], + ) + buckets = engine.build_consensus() + self.assertEqual(len(buckets), 2) + texts = [b.consensus_text for b in buckets] + self.assertIn("line one", texts) + self.assertIn("line two", texts) + + def test_low_confidence_single_observation_empty(self): + from skill_seekers.cli.video_visual import YBucketConsensusEngine + from skill_seekers.cli.video_models import OCRRegion + + engine = YBucketConsensusEngine(y_tolerance=15.0) + engine.add_frame( + 0, + 1.0, + [OCRRegion(text="garbled", confidence=0.2, bbox=(10, 100, 200, 120))], + ) + buckets = engine.build_consensus() + self.assertEqual(len(buckets), 1) + self.assertEqual(buckets[0].consensus_text, "") + + def test_get_consensus_text_joins_lines(self): + from skill_seekers.cli.video_visual import YBucketConsensusEngine + from skill_seekers.cli.video_models import OCRRegion + + engine = YBucketConsensusEngine(y_tolerance=15.0) + engine.add_frame( + 0, + 1.0, + [ + OCRRegion(text="def hello():", confidence=0.9, bbox=(10, 100, 200, 120)), + OCRRegion(text=" return 'world'", confidence=0.8, bbox=(10, 140, 250, 160)), + ], + ) + engine.build_consensus() + text = engine.get_consensus_text() + self.assertIn("def hello():", text) + self.assertIn("return 'world'", text) + self.assertIn("\n", text) + + def test_reset_clears_state(self): + from skill_seekers.cli.video_visual import YBucketConsensusEngine + from skill_seekers.cli.video_models import OCRRegion + + engine = YBucketConsensusEngine() + engine.add_frame(0, 1.0, [OCRRegion(text="test", confidence=0.9, bbox=(10, 100, 200, 120))]) + engine.reset() + self.assertEqual(engine.get_consensus_text(), "") + self.assertEqual(engine.get_consensus_confidence(), 0.0) + + def test_get_bucket_y_centers(self): + from skill_seekers.cli.video_visual import YBucketConsensusEngine + from skill_seekers.cli.video_models import OCRRegion + + engine = YBucketConsensusEngine(y_tolerance=15.0) + engine.add_frame( + 0, + 1.0, + [ + OCRRegion(text="a", confidence=0.9, bbox=(0, 100, 100, 120)), + OCRRegion(text="b", confidence=0.9, bbox=(0, 200, 100, 220)), + ], + ) + centers = engine.get_bucket_y_centers() + self.assertEqual(len(centers), 2) + self.assertIn(110.0, centers) + self.assertIn(210.0, centers) + + +# ============================================================================= +# Test: Text Group Lifecycle (Phase B) +# ============================================================================= + + +class TestTextGroupLifecycle(unittest.TestCase): + """Test text group assignment and edit detection.""" + + def test_single_block_creates_group(self): + from skill_seekers.cli.video_visual import TextBlockTracker + from skill_seekers.cli.video_models import FrameType, OCRRegion + + tracker = TextBlockTracker() + regions = [ + OCRRegion(text="def hello():", confidence=0.9, bbox=(10, 100, 200, 120)), + OCRRegion(text=" return 'world'", confidence=0.8, bbox=(10, 140, 250, 160)), + ] + tracker.update( + 0, + 1.0, + "def hello():\n return 'world'", + 0.85, + FrameType.CODE_EDITOR, + ocr_regions=regions, + ) + tracker.finalize() + groups = tracker.get_text_groups() + self.assertEqual(len(groups), 1) + self.assertEqual(groups[0].group_id, "TG-001") + self.assertEqual(len(groups[0].appearances), 1) + + def test_same_text_reappears_same_group(self): + from skill_seekers.cli.video_visual import TextBlockTracker + from skill_seekers.cli.video_models import FrameType, OCRRegion + + tracker = TextBlockTracker() + regions = [ + OCRRegion(text="def hello():", confidence=0.9, bbox=(10, 100, 200, 120)), + OCRRegion(text=" return 'world'", confidence=0.8, bbox=(10, 140, 250, 160)), + ] + text = "def hello():\n return 'world'" + + # First appearance + tracker.update(0, 1.0, text, 0.85, FrameType.CODE_EDITOR, ocr_regions=regions) + # Break with non-code frame + tracker.update(1, 5.0, "webcam", 0.5, FrameType.WEBCAM) + # Re-appear + tracker.update(2, 10.0, text, 0.85, FrameType.CODE_EDITOR, ocr_regions=regions) + + tracker.finalize() + groups = tracker.get_text_groups() + self.assertEqual(len(groups), 1) + self.assertEqual(len(groups[0].appearances), 2) + + def test_different_text_creates_new_group(self): + from skill_seekers.cli.video_visual import TextBlockTracker + from skill_seekers.cli.video_models import FrameType, OCRRegion + + tracker = TextBlockTracker() + regions_a = [ + OCRRegion(text="def func_a():", confidence=0.9, bbox=(10, 100, 200, 120)), + ] + regions_b = [ + OCRRegion(text="class TotallyDifferent:", confidence=0.9, bbox=(10, 100, 300, 120)), + ] + + tracker.update(0, 1.0, "def func_a():", 0.9, FrameType.CODE_EDITOR, ocr_regions=regions_a) + tracker.update(1, 5.0, "webcam", 0.5, FrameType.WEBCAM) + tracker.update( + 2, 10.0, "class TotallyDifferent:", 0.9, FrameType.CODE_EDITOR, ocr_regions=regions_b + ) + + tracker.finalize() + groups = tracker.get_text_groups() + self.assertEqual(len(groups), 2) + + def test_edit_detected_between_appearances(self): + from skill_seekers.cli.video_visual import TextBlockTracker + from skill_seekers.cli.video_models import FrameType, OCRRegion + + tracker = TextBlockTracker() + regions_v1 = [ + OCRRegion(text="def hello():", confidence=0.9, bbox=(10, 100, 200, 120)), + OCRRegion(text=" return 'world'", confidence=0.8, bbox=(10, 140, 250, 160)), + ] + regions_v2 = [ + OCRRegion(text="def hello():", confidence=0.9, bbox=(10, 100, 200, 120)), + OCRRegion(text=" return 'hello world'", confidence=0.8, bbox=(10, 140, 250, 160)), + ] + + # First version + tracker.update( + 0, + 1.0, + "def hello():\n return 'world'", + 0.85, + FrameType.CODE_EDITOR, + ocr_regions=regions_v1, + ) + tracker.update(1, 5.0, "webcam", 0.5, FrameType.WEBCAM) + # Modified version + tracker.update( + 2, + 10.0, + "def hello():\n return 'hello world'", + 0.85, + FrameType.CODE_EDITOR, + ocr_regions=regions_v2, + ) + + tracker.finalize() + groups = tracker.get_text_groups() + self.assertEqual(len(groups), 1) + self.assertGreaterEqual(len(groups[0].edits), 1) + + def test_tracker_y_bucket_matching(self): + """Test that y-bucket matching works for consecutive code frames.""" + from skill_seekers.cli.video_visual import TextBlockTracker + from skill_seekers.cli.video_models import FrameType, OCRRegion + + tracker = TextBlockTracker() + # Two frames with same y-coordinates but slightly different text + regions_1 = [ + OCRRegion(text="Dlctionary", confidence=0.3, bbox=(10, 100, 200, 120)), + OCRRegion(text="var x = 1", confidence=0.7, bbox=(10, 140, 200, 160)), + ] + regions_2 = [ + OCRRegion(text="Dictionary", confidence=0.8, bbox=(10, 101, 200, 121)), + OCRRegion(text="var x = 1", confidence=0.9, bbox=(10, 141, 200, 161)), + ] + + tracker.update( + 0, 1.0, "Dlctionary\nvar x = 1", 0.5, FrameType.CODE_EDITOR, ocr_regions=regions_1 + ) + tracker.update( + 1, 2.0, "Dictionary\nvar x = 1", 0.85, FrameType.CODE_EDITOR, ocr_regions=regions_2 + ) + + blocks = tracker.finalize() + # Should be one block (matched by y-bucket overlap) + self.assertEqual(len(blocks), 1) + self.assertEqual(len(blocks[0].frame_indices), 2) + + def test_compute_edit_no_changes(self): + from skill_seekers.cli.video_visual import TextBlockTracker + + tracker = TextBlockTracker() + result = tracker._compute_edit(["line1", "line2"], ["line1", "line2"], 1.0) + self.assertIsNone(result) + + def test_compute_edit_with_additions(self): + from skill_seekers.cli.video_visual import TextBlockTracker + + tracker = TextBlockTracker() + result = tracker._compute_edit(["line1"], ["line1", "line2"], 1.0) + self.assertIsNotNone(result) + self.assertIn("line2", result.added_lines) + + def test_compute_edit_with_removals(self): + from skill_seekers.cli.video_visual import TextBlockTracker + + tracker = TextBlockTracker() + result = tracker._compute_edit(["line1", "line2"], ["line1"], 1.0) + self.assertIsNotNone(result) + self.assertIn("line2", result.removed_lines) + + +# ============================================================================= +# Test: Text Group Timeline (Phase C) +# ============================================================================= + + +class TestTextGroupTimeline(unittest.TestCase): + """Test TextGroupTimeline data structure.""" + + def test_timeline_serialization(self): + from skill_seekers.cli.video_models import TextGroup, TextGroupTimeline, FrameType + + tg = TextGroup( + group_id="TG-001", + appearances=[(1.0, 5.0), (10.0, 15.0)], + consensus_lines=[ + {"y_center": 110.0, "text": "def hello():", "confidence": 0.9}, + {"y_center": 150.0, "text": " return 'world'", "confidence": 0.8}, + ], + edits=[], + frame_type=FrameType.CODE_EDITOR, + ) + timeline = TextGroupTimeline( + text_groups=[tg], + total_code_time=9.0, + total_groups=1, + total_edits=0, + ) + + d = timeline.to_dict() + self.assertEqual(len(d["text_groups"]), 1) + self.assertEqual(d["total_code_time"], 9.0) + + timeline2 = TextGroupTimeline.from_dict(d) + self.assertEqual(len(timeline2.text_groups), 1) + self.assertEqual(timeline2.text_groups[0].group_id, "TG-001") + + def test_get_groups_at_time(self): + from skill_seekers.cli.video_models import TextGroup, TextGroupTimeline, FrameType + + tg1 = TextGroup( + group_id="TG-001", + appearances=[(1.0, 5.0)], + consensus_lines=[{"text": "code1", "y_center": 100.0, "confidence": 0.9}], + edits=[], + frame_type=FrameType.CODE_EDITOR, + ) + tg2 = TextGroup( + group_id="TG-002", + appearances=[(3.0, 8.0)], + consensus_lines=[{"text": "code2", "y_center": 100.0, "confidence": 0.9}], + edits=[], + frame_type=FrameType.CODE_EDITOR, + ) + timeline = TextGroupTimeline(text_groups=[tg1, tg2]) + + # At t=4, both should be active + active = timeline.get_groups_at_time(4.0) + self.assertEqual(len(active), 2) + + # At t=0, none active + active = timeline.get_groups_at_time(0.0) + self.assertEqual(len(active), 0) + + # At t=6, only TG-002 + active = timeline.get_groups_at_time(6.0) + self.assertEqual(len(active), 1) + self.assertEqual(active[0].group_id, "TG-002") + + def test_text_group_full_text(self): + from skill_seekers.cli.video_models import TextGroup, FrameType + + tg = TextGroup( + group_id="TG-001", + consensus_lines=[ + {"y_center": 100.0, "text": "line one", "confidence": 0.9}, + {"y_center": 120.0, "text": "", "confidence": 0.0}, + {"y_center": 140.0, "text": "line three", "confidence": 0.8}, + ], + edits=[], + frame_type=FrameType.CODE_EDITOR, + ) + self.assertEqual(tg.full_text, "line one\nline three") + + def test_text_group_serialization(self): + from skill_seekers.cli.video_models import TextGroup, TextGroupEdit, FrameType + + edit = TextGroupEdit( + timestamp=5.0, + added_lines=["new line"], + removed_lines=[], + modified_lines=[{"line_num": 0, "old": "x", "new": "y"}], + ) + tg = TextGroup( + group_id="TG-001", + appearances=[(1.0, 5.0)], + consensus_lines=[{"y_center": 100.0, "text": "code", "confidence": 0.9}], + edits=[edit], + detected_language="python", + frame_type=FrameType.CODE_EDITOR, + ) + + d = tg.to_dict() + self.assertEqual(d["group_id"], "TG-001") + self.assertEqual(d["detected_language"], "python") + self.assertEqual(len(d["edits"]), 1) + + tg2 = TextGroup.from_dict(d) + self.assertEqual(tg2.group_id, "TG-001") + self.assertEqual(tg2.detected_language, "python") + self.assertEqual(len(tg2.edits), 1) + self.assertEqual(tg2.edits[0].added_lines, ["new line"]) + + def test_code_block_text_group_id(self): + from skill_seekers.cli.video_models import CodeBlock, CodeContext + + cb = CodeBlock( + code="print('hi')", + language="python", + context=CodeContext.EDITOR, + confidence=0.9, + text_group_id="TG-001", + ) + d = cb.to_dict() + self.assertEqual(d["text_group_id"], "TG-001") + + cb2 = CodeBlock.from_dict(d) + self.assertEqual(cb2.text_group_id, "TG-001") + + def test_video_info_timeline_serialization(self): + from skill_seekers.cli.video_models import ( + VideoInfo, + VideoSourceType, + TextGroupTimeline, + TextGroup, + FrameType, + ) + + tg = TextGroup( + group_id="TG-001", + appearances=[(1.0, 5.0)], + consensus_lines=[{"y_center": 100.0, "text": "code", "confidence": 0.9}], + edits=[], + frame_type=FrameType.CODE_EDITOR, + ) + timeline = TextGroupTimeline(text_groups=[tg], total_groups=1) + + info = VideoInfo( + video_id="test", + source_type=VideoSourceType.YOUTUBE, + text_group_timeline=timeline, + ) + d = info.to_dict() + self.assertIsNotNone(d["text_group_timeline"]) + self.assertEqual(len(d["text_group_timeline"]["text_groups"]), 1) + + info2 = VideoInfo.from_dict(d) + self.assertIsNotNone(info2.text_group_timeline) + self.assertEqual(len(info2.text_group_timeline.text_groups), 1) + + def test_video_info_no_timeline_serialization(self): + from skill_seekers.cli.video_models import VideoInfo, VideoSourceType + + info = VideoInfo(video_id="test", source_type=VideoSourceType.YOUTUBE) + d = info.to_dict() + self.assertIsNone(d["text_group_timeline"]) + + info2 = VideoInfo.from_dict(d) + self.assertIsNone(info2.text_group_timeline) + + def test_extract_visual_data_returns_3_tuple(self): + """Verify extract_visual_data returns (keyframes, code_blocks, timeline) tuple.""" + from skill_seekers.cli.video_visual import extract_visual_data, HAS_OPENCV + + if not HAS_OPENCV: + with self.assertRaises(RuntimeError): + extract_visual_data("test.mp4", [], "/tmp/test") + else: + import inspect + + sig = inspect.signature(extract_visual_data) + self.assertIn("tuple", str(sig.return_annotation).lower()) + self.assertIn("TextGroupTimeline", str(sig.return_annotation)) + + +# ============================================================================= +# Test: Audio-Visual Alignment (Phase D) +# ============================================================================= + + +class TestAudioVisualAlignment(unittest.TestCase): + """Test audio-visual alignment building and rendering.""" + + def test_alignment_serialization(self): + from skill_seekers.cli.video_models import AudioVisualAlignment + + av = AudioVisualAlignment( + text_group_id="TG-001", + start_time=1.0, + end_time=5.0, + on_screen_code="def hello():\n return 'world'", + transcript_during="Now let's define a hello function", + language="python", + ) + d = av.to_dict() + self.assertEqual(d["text_group_id"], "TG-001") + self.assertEqual(d["language"], "python") + + av2 = AudioVisualAlignment.from_dict(d) + self.assertEqual(av2.text_group_id, "TG-001") + self.assertEqual(av2.language, "python") + self.assertIn("hello function", av2.transcript_during) + + def test_build_audio_visual_alignments(self): + from skill_seekers.cli.video_scraper import _build_audio_visual_alignments + from skill_seekers.cli.video_models import ( + TextGroup, + TextGroupTimeline, + TranscriptSegment, + TranscriptSource, + FrameType, + ) + + tg = TextGroup( + group_id="TG-001", + appearances=[(10.0, 20.0)], + consensus_lines=[ + {"y_center": 100.0, "text": "def hello():", "confidence": 0.9}, + ], + edits=[], + frame_type=FrameType.CODE_EDITOR, + ) + timeline = TextGroupTimeline(text_groups=[tg]) + + transcript = [ + TranscriptSegment( + text="Before code", start=5.0, end=9.0, source=TranscriptSource.YOUTUBE_MANUAL + ), + TranscriptSegment( + text="Now we define hello", + start=10.0, + end=15.0, + source=TranscriptSource.YOUTUBE_MANUAL, + ), + TranscriptSegment( + text="and it returns world", + start=15.0, + end=20.0, + source=TranscriptSource.YOUTUBE_MANUAL, + ), + TranscriptSegment( + text="After code", start=21.0, end=25.0, source=TranscriptSource.YOUTUBE_MANUAL + ), + ] + + alignments = _build_audio_visual_alignments(timeline, transcript) + self.assertEqual(len(alignments), 1) + self.assertEqual(alignments[0].text_group_id, "TG-001") + self.assertIn("define hello", alignments[0].transcript_during) + self.assertIn("returns world", alignments[0].transcript_during) + # Before and after should not be included + self.assertNotIn("Before code", alignments[0].transcript_during) + self.assertNotIn("After code", alignments[0].transcript_during) + + def test_build_alignments_no_overlap(self): + from skill_seekers.cli.video_scraper import _build_audio_visual_alignments + from skill_seekers.cli.video_models import ( + TextGroup, + TextGroupTimeline, + TranscriptSegment, + TranscriptSource, + FrameType, + ) + + tg = TextGroup( + group_id="TG-001", + appearances=[(100.0, 110.0)], + consensus_lines=[{"y_center": 100.0, "text": "code", "confidence": 0.9}], + edits=[], + frame_type=FrameType.CODE_EDITOR, + ) + timeline = TextGroupTimeline(text_groups=[tg]) + + transcript = [ + TranscriptSegment( + text="Unrelated", start=0.0, end=5.0, source=TranscriptSource.YOUTUBE_MANUAL + ), + ] + + alignments = _build_audio_visual_alignments(timeline, transcript) + self.assertEqual(len(alignments), 0) + + def test_reference_md_code_timeline_section(self): + """Test that Code Timeline section renders correctly.""" + from skill_seekers.cli.video_scraper import VideoToSkillConverter + from skill_seekers.cli.video_models import ( + FrameType, + TextGroup, + TextGroupTimeline, + TranscriptSource, + VideoInfo, + VideoScraperResult, + VideoSegment, + SegmentContentType, + VideoSourceType, + ) + + config = {"name": "test_video", "output": os.path.join(tempfile.mkdtemp(), "test_video")} + converter = VideoToSkillConverter(config) + + tg = TextGroup( + group_id="TG-001", + appearances=[(1.0, 5.0)], + consensus_lines=[ + {"y_center": 100.0, "text": "def hello():", "confidence": 0.9}, + {"y_center": 140.0, "text": " return 'world'", "confidence": 0.8}, + ], + edits=[], + frame_type=FrameType.CODE_EDITOR, + ) + timeline = TextGroupTimeline( + text_groups=[tg], total_code_time=4.0, total_groups=1, total_edits=0 + ) + + converter.result = VideoScraperResult( + videos=[ + VideoInfo( + video_id="test", + source_type=VideoSourceType.YOUTUBE, + title="Test", + duration=60.0, + transcript_source=TranscriptSource.YOUTUBE_MANUAL, + text_group_timeline=timeline, + segments=[ + VideoSegment( + index=0, + start_time=0.0, + end_time=60.0, + duration=60.0, + content="### Intro\n\nContent.", + content_type=SegmentContentType.LIVE_CODING, + ), + ], + ), + ], + total_duration_seconds=60.0, + total_segments=1, + ) + + ref_md = converter._generate_reference_md(converter.result.videos[0]) + self.assertIn("## Code Timeline", ref_md) + self.assertIn("TG-001", ref_md) + self.assertIn("def hello():", ref_md) + self.assertIn("return 'world'", ref_md) + + def test_reference_md_audio_visual_section(self): + """Test that Audio-Visual Alignment section renders correctly.""" + from skill_seekers.cli.video_scraper import VideoToSkillConverter + from skill_seekers.cli.video_models import ( + AudioVisualAlignment, + TranscriptSource, + VideoInfo, + VideoScraperResult, + VideoSegment, + SegmentContentType, + VideoSourceType, + ) + + config = {"name": "test_video", "output": os.path.join(tempfile.mkdtemp(), "test_video")} + converter = VideoToSkillConverter(config) + + converter.result = VideoScraperResult( + videos=[ + VideoInfo( + video_id="test", + source_type=VideoSourceType.YOUTUBE, + title="Test", + duration=60.0, + transcript_source=TranscriptSource.YOUTUBE_MANUAL, + audio_visual_alignments=[ + AudioVisualAlignment( + text_group_id="TG-001", + start_time=1.0, + end_time=5.0, + on_screen_code="def hello():\n return 'world'", + transcript_during="Now we write a hello function", + language="python", + ), + ], + segments=[ + VideoSegment( + index=0, + start_time=0.0, + end_time=60.0, + duration=60.0, + content="### Intro\n\nContent.", + content_type=SegmentContentType.LIVE_CODING, + ), + ], + ), + ], + total_duration_seconds=60.0, + total_segments=1, + ) + + ref_md = converter._generate_reference_md(converter.result.videos[0]) + self.assertIn("## Audio-Visual Alignment", ref_md) + self.assertIn("TG-001", ref_md) + self.assertIn("def hello():", ref_md) + self.assertIn("hello function", ref_md) + self.assertIn("**Narrator:**", ref_md) + + +# ============================================================================= +# Phase E-G Tests: Dark Theme, Multi-Engine OCR, Claude Vision +# ============================================================================= + + +class TestDarkThemePreprocessing(unittest.TestCase): + """Tests for dark theme detection and frame preprocessing.""" + + def test_detect_theme_dark(self): + """Dark image (median < 128) returns 'dark'.""" + import numpy as np + + from skill_seekers.cli.video_visual import _detect_theme + + # Simulate a dark IDE background (median ~30) + dark_img = np.full((100, 200), 30, dtype=np.uint8) + self.assertEqual(_detect_theme(dark_img), "dark") + + def test_detect_theme_light(self): + """Light image (median >= 128) returns 'light'.""" + import numpy as np + + from skill_seekers.cli.video_visual import _detect_theme + + # Simulate a light background (median ~220) + light_img = np.full((100, 200), 220, dtype=np.uint8) + self.assertEqual(_detect_theme(light_img), "light") + + def test_preprocess_inverts_dark_frame(self): + """Verify dark code frame gets inverted to produce lighter output.""" + try: + import cv2 + import numpy as np + except ImportError: + self.skipTest("OpenCV not available") + + from skill_seekers.cli.video_models import FrameType + from skill_seekers.cli.video_visual import _preprocess_frame_for_ocr + + # Create a dark frame (simulating dark-theme IDE) + dark_frame = np.full((100, 200, 3), 30, dtype=np.uint8) + # Add some "text" pixels (bright on dark) + dark_frame[40:60, 20:180] = 200 + + with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: + tmp_path = tmp.name + cv2.imwrite(tmp_path, dark_frame) + + try: + result_path = _preprocess_frame_for_ocr(tmp_path, FrameType.CODE_EDITOR) + self.assertNotEqual(result_path, tmp_path) + + result_img = cv2.imread(result_path, cv2.IMREAD_GRAYSCALE) + self.assertIsNotNone(result_img) + + # After inversion + binarization, the output should have higher + # median brightness (white background with dark text) + original_gray = cv2.imread(tmp_path, cv2.IMREAD_GRAYSCALE) + self.assertGreater(float(np.median(result_img)), float(np.median(original_gray))) + + os.unlink(result_path) + finally: + os.unlink(tmp_path) + + def test_preprocess_keeps_light_frame_orientation(self): + """Verify light code frame is binarized but not double-inverted.""" + try: + import cv2 + import numpy as np + except ImportError: + self.skipTest("OpenCV not available") + + from skill_seekers.cli.video_models import FrameType + from skill_seekers.cli.video_visual import _preprocess_frame_for_ocr + + # Create a light frame (white background, dark text) + light_frame = np.full((100, 200, 3), 240, dtype=np.uint8) + light_frame[40:60, 20:180] = 30 # dark text + + with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: + tmp_path = tmp.name + cv2.imwrite(tmp_path, light_frame) + + try: + result_path = _preprocess_frame_for_ocr(tmp_path, FrameType.CODE_EDITOR) + self.assertNotEqual(result_path, tmp_path) + + result_img = cv2.imread(result_path, cv2.IMREAD_GRAYSCALE) + self.assertIsNotNone(result_img) + + # Light frame should still have high median (white background preserved) + self.assertGreater(float(np.median(result_img)), 128) + + os.unlink(result_path) + finally: + os.unlink(tmp_path) + + +class TestMultiEngineOCR(unittest.TestCase): + """Tests for multi-engine OCR ensemble voting.""" + + def test_tesseract_ocr_returns_correct_format(self): + """Verify _run_tesseract_ocr returns (bbox, text, confidence) tuples.""" + try: + import pytesseract # noqa: F401 + import cv2 + import numpy as np + except ImportError: + self.skipTest("pytesseract or OpenCV not available") + + from skill_seekers.cli.video_models import FrameType + from skill_seekers.cli.video_visual import _run_tesseract_ocr + + # Create a simple white image with black text + img = np.full((100, 400), 255, dtype=np.uint8) + cv2.putText(img, "def hello():", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, 0, 2) + + with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: + tmp_path = tmp.name + cv2.imwrite(tmp_path, img) + + try: + results = _run_tesseract_ocr(tmp_path, FrameType.CODE_EDITOR) + # Results should be a list of tuples + self.assertIsInstance(results, list) + for item in results: + self.assertEqual(len(item), 3) + bbox, text, conf = item + self.assertIsInstance(bbox, list) + self.assertIsInstance(text, str) + self.assertIsInstance(conf, float) + self.assertGreaterEqual(conf, 0.0) + self.assertLessEqual(conf, 1.0) + finally: + os.unlink(tmp_path) + + def test_multi_engine_picks_higher_confidence(self): + """Mock both engines: higher confidence result wins.""" + from skill_seekers.cli.video_visual import _pick_better_ocr_result + + result_high = ([[0, 0], [100, 0], [100, 20], [0, 20]], "def foo():", 0.9) + result_low = ([[0, 0], [100, 0], [100, 20], [0, 20]], "deff fo()", 0.4) + + winner = _pick_better_ocr_result(result_high, result_low) + self.assertEqual(winner[1], "def foo():") + self.assertEqual(winner[2], 0.9) + + def test_multi_engine_code_token_preference(self): + """Result with code tokens preferred over garbage.""" + from skill_seekers.cli.video_visual import _pick_better_ocr_result + + # Garbage has higher confidence but no code tokens + garbage = ([[0, 0], [100, 0], [100, 20], [0, 20]], "chitd Icrate", 0.8) + code = ([[0, 0], [100, 0], [100, 20], [0, 20]], "def create():", 0.6) + + winner = _pick_better_ocr_result(garbage, code) + self.assertEqual(winner[1], "def create():") + + def test_multi_engine_single_engine_fallback(self): + """When one engine returns nothing, use the other.""" + from skill_seekers.cli.video_visual import _merge_by_y_bucket + + easy_results = [ + ([[0, 0], [100, 0], [100, 20], [0, 20]], "line one", 0.8), + ([[0, 30], [100, 30], [100, 50], [0, 50]], "line two", 0.7), + ] + + merged = _merge_by_y_bucket(easy_results, []) + # Should return easy_results when tess is empty + # (the function won't be called with both empty — that's handled upstream) + self.assertEqual(len(merged), 2) + + +class TestClaudeVisionOCR(unittest.TestCase): + """Tests for Claude Vision API OCR fallback.""" + + def test_vision_ocr_no_api_key(self): + """Returns empty when ANTHROPIC_API_KEY is not set.""" + from unittest.mock import patch + + from skill_seekers.cli.video_models import FrameType + from skill_seekers.cli.video_visual import _ocr_with_claude_vision + + with patch.dict(os.environ, {}, clear=True): + # Ensure no ANTHROPIC_API_KEY + os.environ.pop("ANTHROPIC_API_KEY", None) + text, conf = _ocr_with_claude_vision("/fake/path.png", FrameType.CODE_EDITOR) + self.assertEqual(text, "") + self.assertEqual(conf, 0.0) + + def test_vision_ocr_success(self): + """Mock anthropic client returns extracted code.""" + import sys + from unittest.mock import MagicMock, patch + + from skill_seekers.cli.video_models import FrameType + from skill_seekers.cli.video_visual import _ocr_with_claude_vision + + # Create a minimal image file + with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: + tmp.write(b"\x89PNG\r\n\x1a\n" + b"\x00" * 100) + tmp_path = tmp.name + + try: + mock_response = MagicMock() + mock_content = MagicMock() + mock_content.text = "def hello():\n return 'world'" + mock_response.content = [mock_content] + + mock_client = MagicMock() + mock_client.messages.create.return_value = mock_response + + mock_anthropic = MagicMock() + mock_anthropic.Anthropic.return_value = mock_client + + with ( + patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}), + patch.dict(sys.modules, {"anthropic": mock_anthropic}), + ): + text, conf = _ocr_with_claude_vision(tmp_path, FrameType.CODE_EDITOR) + + self.assertIn("def hello():", text) + self.assertEqual(conf, 0.95) + finally: + os.unlink(tmp_path) + + def test_vision_fallback_on_low_confidence(self): + """Vision API is only called when multi-engine conf < 0.5.""" + from skill_seekers.cli.video_models import FrameType + from skill_seekers.cli.video_visual import _ocr_with_claude_vision + + # Without API key, vision always returns empty — simulating no-fallback + os.environ.pop("ANTHROPIC_API_KEY", None) + text, conf = _ocr_with_claude_vision("/fake.png", FrameType.CODE_EDITOR) + self.assertEqual(text, "") + self.assertEqual(conf, 0.0) + + +class TestRegionDetection(unittest.TestCase): + """Tests for IDE panel detection and region-based classification.""" + + def test_single_panel_no_dividers(self): + """A uniform frame produces a single full-frame region.""" + try: + import cv2 + import numpy as np + except ImportError: + self.skipTest("OpenCV not available") + + from skill_seekers.cli.video_visual import classify_frame_regions + + # Uniform dark frame — no dividers + img = np.full((400, 800, 3), 35, dtype=np.uint8) + with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: + tmp_path = tmp.name + cv2.imwrite(tmp_path, img) + + try: + regions = classify_frame_regions(tmp_path) + self.assertEqual(len(regions), 1) + x1, y1, x2, y2, _ft = regions[0] + self.assertEqual((x1, y1), (0, 0)) + self.assertEqual((x2, y2), (800, 400)) + finally: + os.unlink(tmp_path) + + def test_vertical_divider_splits_panels(self): + """A bright vertical line creates two separate panels.""" + try: + import cv2 + import numpy as np + except ImportError: + self.skipTest("OpenCV not available") + + from skill_seekers.cli.video_visual import classify_frame_regions + + # Dark frame with a bright vertical divider at x=400 + img = np.full((600, 800, 3), 35, dtype=np.uint8) + img[:, 398:402] = 200 # 4px bright vertical line + + with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: + tmp_path = tmp.name + cv2.imwrite(tmp_path, img) + + try: + regions = classify_frame_regions(tmp_path) + # Should detect at least 2 panels (left and right of divider) + self.assertGreaterEqual(len(regions), 2) + finally: + os.unlink(tmp_path) + + def test_find_code_bbox_merges_regions(self): + """_find_code_bbox merges multiple code panels into one box.""" + from skill_seekers.cli.video_models import FrameType + from skill_seekers.cli.video_visual import _find_code_bbox + + regions = [ + (0, 0, 200, 600, FrameType.CODE_EDITOR), + (200, 0, 800, 600, FrameType.WEBCAM), + (800, 0, 1000, 600, FrameType.CODE_EDITOR), + ] + bbox = _find_code_bbox(regions) + self.assertIsNotNone(bbox) + self.assertEqual(bbox, (0, 0, 1000, 600)) + + def test_find_code_bbox_returns_none_for_no_code(self): + """_find_code_bbox returns None when no code regions exist.""" + from skill_seekers.cli.video_models import FrameType + from skill_seekers.cli.video_visual import _find_code_bbox + + regions = [ + (0, 0, 800, 600, FrameType.WEBCAM), + (800, 0, 1200, 600, FrameType.DIAGRAM), + ] + self.assertIsNone(_find_code_bbox(regions)) + + def test_small_panels_filtered_out(self): + """Panels smaller than minimum size thresholds are excluded.""" + try: + import cv2 + import numpy as np + except ImportError: + self.skipTest("OpenCV not available") + + from skill_seekers.cli.video_visual import classify_frame_regions + + # Create frame with many thin vertical dividers creating tiny panels + img = np.full((400, 800, 3), 35, dtype=np.uint8) + # Add dividers at x=50, x=100 — creates panels < 200px wide + img[:, 48:52] = 200 + img[:, 98:102] = 200 + + with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: + tmp_path = tmp.name + cv2.imwrite(tmp_path, img) + + try: + regions = classify_frame_regions(tmp_path) + # Tiny panels (< 200px wide) should be filtered out + for x1, _y1, x2, _y2, _ft in regions: + self.assertGreaterEqual(x2 - x1, 200) + finally: + os.unlink(tmp_path) + + def test_crop_code_region(self): + """_crop_code_region saves a cropped version of the frame.""" + try: + import cv2 + import numpy as np + except ImportError: + self.skipTest("OpenCV not available") + + from skill_seekers.cli.video_visual import _crop_code_region + + img = np.full((600, 1000, 3), 100, dtype=np.uint8) + # Mark code region with distinct color + img[100:500, 200:800] = 50 + + with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: + tmp_path = tmp.name + cv2.imwrite(tmp_path, img) + + try: + cropped = _crop_code_region(tmp_path, (200, 100, 800, 500)) + self.assertTrue(os.path.exists(cropped)) + cropped_img = cv2.imread(cropped) + self.assertEqual(cropped_img.shape[:2], (400, 600)) + os.unlink(cropped) + finally: + os.unlink(tmp_path) + + +class TestPerPanelOCR(unittest.TestCase): + """Tests for per-panel sub-section OCR tracking.""" + + def test_get_code_panels_returns_individual_panels(self): + """_get_code_panels returns separate bboxes instead of merging.""" + from skill_seekers.cli.video_models import FrameType + from skill_seekers.cli.video_visual import _get_code_panels + + regions = [ + (0, 0, 500, 1080, FrameType.CODE_EDITOR), + (500, 0, 1000, 1080, FrameType.CODE_EDITOR), + (1000, 0, 1920, 1080, FrameType.OTHER), + ] + + panels = _get_code_panels(regions) + self.assertEqual(len(panels), 2) + self.assertEqual(panels[0], (0, 0, 500, 1080)) + self.assertEqual(panels[1], (500, 0, 1000, 1080)) + + def test_get_code_panels_includes_terminals(self): + """_get_code_panels returns terminal panels too.""" + from skill_seekers.cli.video_models import FrameType + from skill_seekers.cli.video_visual import _get_code_panels + + regions = [ + (0, 0, 960, 540, FrameType.CODE_EDITOR), + (0, 540, 960, 1080, FrameType.TERMINAL), + (960, 0, 1920, 1080, FrameType.OTHER), + ] + + panels = _get_code_panels(regions) + self.assertEqual(len(panels), 2) + + def test_get_code_panels_filters_narrow_panels(self): + """_get_code_panels drops panels narrower than min_width.""" + from skill_seekers.cli.video_models import FrameType + from skill_seekers.cli.video_visual import _get_code_panels + + regions = [ + (0, 0, 500, 1080, FrameType.CODE_EDITOR), # 500px wide — kept + (500, 0, 1400, 1080, FrameType.CODE_EDITOR), # 900px wide — kept + (1400, 0, 1650, 1080, FrameType.CODE_EDITOR), # 250px wide — dropped + (1650, 0, 1920, 1080, FrameType.CODE_EDITOR), # 270px wide — dropped + ] + + panels = _get_code_panels(regions) + self.assertEqual(len(panels), 2) + self.assertEqual(panels[0], (0, 0, 500, 1080)) + self.assertEqual(panels[1], (500, 0, 1400, 1080)) + + def test_get_code_panels_custom_min_width(self): + """_get_code_panels respects custom min_width.""" + from skill_seekers.cli.video_models import FrameType + from skill_seekers.cli.video_visual import _get_code_panels + + regions = [ + (0, 0, 200, 1080, FrameType.CODE_EDITOR), # 200px + (200, 0, 500, 1080, FrameType.CODE_EDITOR), # 300px + ] + + # Default min_width=300 drops the 200px panel + self.assertEqual(len(_get_code_panels(regions)), 1) + # Custom min_width=100 keeps both + self.assertEqual(len(_get_code_panels(regions, min_width=100)), 2) + + def test_frame_subsection_serialization(self): + """FrameSubSection to_dict/from_dict round-trips correctly.""" + from skill_seekers.cli.video_models import ( + FrameSubSection, + FrameType, + OCRRegion, + ) + + ss = FrameSubSection( + bbox=(100, 200, 500, 600), + frame_type=FrameType.CODE_EDITOR, + ocr_text="def hello():\n pass", + ocr_regions=[OCRRegion(text="def hello():", confidence=0.9, bbox=(100, 200, 400, 220))], + ocr_confidence=0.9, + panel_id="panel_0_0", + ) + + data = ss.to_dict() + restored = FrameSubSection.from_dict(data) + self.assertEqual(restored.bbox, (100, 200, 500, 600)) + self.assertEqual(restored.frame_type, FrameType.CODE_EDITOR) + self.assertEqual(restored.ocr_text, "def hello():\n pass") + self.assertEqual(len(restored.ocr_regions), 1) + self.assertAlmostEqual(restored.ocr_confidence, 0.9) + self.assertEqual(restored.panel_id, "panel_0_0") + + def test_keyframe_with_sub_sections(self): + """KeyFrame serialization preserves sub_sections.""" + from skill_seekers.cli.video_models import ( + FrameSubSection, + FrameType, + KeyFrame, + ) + + kf = KeyFrame( + timestamp=10.0, + image_path="/tmp/frame.jpg", + frame_type=FrameType.CODE_EDITOR, + sub_sections=[ + FrameSubSection( + bbox=(0, 0, 500, 1080), + frame_type=FrameType.CODE_EDITOR, + ocr_text="panel 1 code", + panel_id="panel_0_0", + ), + FrameSubSection( + bbox=(500, 0, 1000, 1080), + frame_type=FrameType.CODE_EDITOR, + ocr_text="panel 2 code", + panel_id="panel_0_1", + ), + ], + ) + + data = kf.to_dict() + self.assertEqual(len(data["sub_sections"]), 2) + + restored = KeyFrame.from_dict(data) + self.assertEqual(len(restored.sub_sections), 2) + self.assertEqual(restored.sub_sections[0].ocr_text, "panel 1 code") + self.assertEqual(restored.sub_sections[1].panel_id, "panel_0_1") + + def test_tracker_panel_position_matching(self): + """Two calls with overlapping x-range bbox match the same block.""" + from skill_seekers.cli.video_models import FrameType + from skill_seekers.cli.video_visual import TextBlockTracker + + tracker = TextBlockTracker() + code = "def hello():\n return 'world'\n# some code here" + + # First frame — left panel + tracker.update( + frame_index=0, + timestamp=1.0, + ocr_text=code, + confidence=0.8, + frame_type=FrameType.CODE_EDITOR, + panel_bbox=(0, 0, 500, 1080), + ) + + # Second frame — same left panel (slightly shifted) + tracker.update( + frame_index=1, + timestamp=2.0, + ocr_text=code + "\n# added line", + confidence=0.85, + frame_type=FrameType.CODE_EDITOR, + panel_bbox=(0, 0, 510, 1080), + ) + + blocks = tracker.finalize() + # Should match as one block due to x-range overlap + self.assertEqual(len(blocks), 1) + self.assertEqual(len(blocks[0].frame_indices), 2) + + def test_tracker_separate_panels_tracked_separately(self): + """Two calls with non-overlapping bboxes create separate blocks.""" + from skill_seekers.cli.video_models import FrameType + from skill_seekers.cli.video_visual import TextBlockTracker + + tracker = TextBlockTracker() + left_code = "def left_func():\n return 'left'\n# left panel code" + right_code = "def right_func():\n return 'right'\n# right panel code" + + # Frame 0: left panel + tracker.update( + frame_index=0, + timestamp=1.0, + ocr_text=left_code, + confidence=0.8, + frame_type=FrameType.CODE_EDITOR, + panel_bbox=(0, 0, 500, 1080), + ) + + # Frame 0: right panel (same frame, different panel) + tracker.update( + frame_index=0, + timestamp=1.0, + ocr_text=right_code, + confidence=0.8, + frame_type=FrameType.CODE_EDITOR, + panel_bbox=(520, 0, 1020, 1080), + ) + + blocks = tracker.finalize() + self.assertEqual(len(blocks), 2) + # Verify they tracked different content + texts = {b.best_text for b in blocks} + self.assertIn(left_code, texts) + self.assertIn(right_code, texts) + + +class TestTextGroupPanelId(unittest.TestCase): + """Tests for panel_id propagation to TextGroup.""" + + def test_text_group_inherits_panel_id(self): + """Panel ID propagates from TrackedTextBlock to TextGroup.""" + from skill_seekers.cli.video_models import FrameType + from skill_seekers.cli.video_visual import TextBlockTracker + + tracker = TextBlockTracker() + code = "class MyClass:\n def method(self):\n pass" + + tracker.update( + frame_index=0, + timestamp=1.0, + ocr_text=code, + confidence=0.8, + frame_type=FrameType.CODE_EDITOR, + panel_bbox=(0, 0, 500, 1080), + ) + + # Complete blocks and assign text groups + tracker.finalize() + groups = tracker.get_text_groups() + + # TrackedTextBlock should have panel_bbox set + blocks = tracker._completed_blocks + self.assertEqual(len(blocks), 1) + self.assertEqual(blocks[0].panel_bbox, (0, 0, 500, 1080)) + + # The text group should exist (but panel_id propagation depends + # on panel_id being set on the block, which requires the extraction + # loop to set it — here we verify the mechanism works) + self.assertTrue(len(groups) >= 1) + + def test_text_group_panel_id_serialization(self): + """TextGroup panel_id survives to_dict/from_dict.""" + from skill_seekers.cli.video_models import FrameType, TextGroup + + group = TextGroup( + group_id="TG-001", + appearances=[(1.0, 5.0)], + consensus_lines=[{"y_center": 100.0, "text": "hello", "confidence": 0.9}], + frame_type=FrameType.CODE_EDITOR, + panel_id="panel_0_1", + ) + + data = group.to_dict() + self.assertEqual(data["panel_id"], "panel_0_1") + + restored = TextGroup.from_dict(data) + self.assertEqual(restored.panel_id, "panel_0_1") + + +# ============================================================================= +# Video Enhancement Tests +# ============================================================================= + + +class TestVideoEnhanceSourceDetection(unittest.TestCase): + """Test video source detection in utils and enhance_skill.""" + + def test_utils_detect_video_source(self): + """_determine_source_metadata classifies video_ files as video_tutorial.""" + from skill_seekers.cli.utils import read_reference_files + + # Create a temp skill dir with a video reference file + with tempfile.TemporaryDirectory() as tmpdir: + refs_dir = os.path.join(tmpdir, "references") + os.makedirs(refs_dir) + video_ref = os.path.join(refs_dir, "video_my_tutorial.md") + with open(video_ref, "w") as f: + f.write("# Test Video\n\nSome content") + + references = read_reference_files(tmpdir) + self.assertIn("video_my_tutorial.md", references) + self.assertEqual(references["video_my_tutorial.md"]["source"], "video_tutorial") + self.assertEqual(references["video_my_tutorial.md"]["confidence"], "high") + + def test_utils_non_video_not_detected(self): + """Regular reference files are not classified as video_tutorial.""" + from skill_seekers.cli.utils import read_reference_files + + with tempfile.TemporaryDirectory() as tmpdir: + refs_dir = os.path.join(tmpdir, "references") + os.makedirs(refs_dir) + ref = os.path.join(refs_dir, "api_reference.md") + with open(ref, "w") as f: + f.write("# API Reference\n\nSome content") + + references = read_reference_files(tmpdir) + self.assertIn("api_reference.md", references) + self.assertNotEqual(references["api_reference.md"]["source"], "video_tutorial") + + +class TestVideoEnhancementPrompt(unittest.TestCase): + """Test video-specific enhancement prompt building.""" + + def test_is_video_source_true(self): + """_is_video_source returns True for video_tutorial references.""" + from unittest.mock import MagicMock + + from skill_seekers.cli.enhance_skill import SkillEnhancer + + # Mock the enhancer (skip API key requirement) + enhancer = MagicMock(spec=SkillEnhancer) + enhancer._is_video_source = SkillEnhancer._is_video_source.__get__(enhancer) + + refs = { + "video_tutorial.md": {"source": "video_tutorial", "confidence": "high"}, + } + self.assertTrue(enhancer._is_video_source(refs)) + + def test_is_video_source_false(self): + """_is_video_source returns False for non-video references.""" + from unittest.mock import MagicMock + + from skill_seekers.cli.enhance_skill import SkillEnhancer + + enhancer = MagicMock(spec=SkillEnhancer) + enhancer._is_video_source = SkillEnhancer._is_video_source.__get__(enhancer) + + refs = { + "api.md": {"source": "documentation", "confidence": "high"}, + } + self.assertFalse(enhancer._is_video_source(refs)) + + def test_video_prompt_contains_key_instructions(self): + """Video enhancement prompt contains video-specific instructions.""" + from unittest.mock import MagicMock, PropertyMock + + from skill_seekers.cli.enhance_skill import SkillEnhancer + + enhancer = MagicMock(spec=SkillEnhancer) + enhancer._build_video_enhancement_prompt = ( + SkillEnhancer._build_video_enhancement_prompt.__get__(enhancer) + ) + type(enhancer).skill_dir = PropertyMock( + return_value=type("P", (), {"name": "test-tutorial"})() + ) + + refs = { + "video_test.md": { + "source": "video_tutorial", + "confidence": "high", + "content": "# Test\n\n## Segment 1\nTranscript here\n```\nsome code\n```", + "size": 100, + }, + } + + prompt = enhancer._build_video_enhancement_prompt(refs, "# test\n") + + # Check key video-specific sections are present + self.assertIn("OCR Code Reconstruction", prompt) + self.assertIn("Language Detection", prompt) + self.assertIn("Code Timeline", prompt) + self.assertIn("Audio-Visual Alignment", prompt) + self.assertIn("line numbers", prompt.lower()) + self.assertIn("UI chrome", prompt) + self.assertIn("GDScript", prompt) + self.assertIn("video_test.md", prompt) + + def test_video_prompt_dispatched_automatically(self): + """_build_enhancement_prompt dispatches to video prompt when video source detected.""" + from unittest.mock import MagicMock, PropertyMock + + from skill_seekers.cli.enhance_skill import SkillEnhancer + + enhancer = MagicMock(spec=SkillEnhancer) + enhancer._is_video_source = SkillEnhancer._is_video_source.__get__(enhancer) + enhancer._build_enhancement_prompt = SkillEnhancer._build_enhancement_prompt.__get__( + enhancer + ) + enhancer._build_video_enhancement_prompt = ( + SkillEnhancer._build_video_enhancement_prompt.__get__(enhancer) + ) + type(enhancer).skill_dir = PropertyMock(return_value=type("P", (), {"name": "my-video"})()) + + refs = { + "video_tutorial.md": { + "source": "video_tutorial", + "confidence": "high", + "content": "# Video\n\nContent here", + "size": 50, + }, + } + + prompt = enhancer._build_enhancement_prompt(refs, "# SKILL\n") + + # Should use video prompt (has VIDEO TUTORIAL in header) + self.assertIn("VIDEO TUTORIAL", prompt) + self.assertIn("OCR Code Reconstruction", prompt) + + +class TestVideoWorkflowAutoInjection(unittest.TestCase): + """Test that video scraper auto-injects video-tutorial workflow.""" + + def test_workflow_auto_injected(self): + """When no workflow specified, video-tutorial is injected.""" + import argparse + + args = argparse.Namespace( + enhance_level=2, + enhance_workflow=None, + enhance_stage=None, + var=None, + workflow_dry_run=False, + api_key=None, + ) + + # Simulate the auto-injection logic from video_scraper main() + if not getattr(args, "enhance_workflow", None): + args.enhance_workflow = ["video-tutorial"] + + self.assertEqual(args.enhance_workflow, ["video-tutorial"]) + + def test_workflow_not_overridden(self): + """When user specifies workflow, it is NOT overridden.""" + import argparse + + args = argparse.Namespace( + enhance_level=2, + enhance_workflow=["custom-workflow"], + enhance_stage=None, + var=None, + workflow_dry_run=False, + api_key=None, + ) + + # Simulate the auto-injection logic + if not getattr(args, "enhance_workflow", None): + args.enhance_workflow = ["video-tutorial"] + + self.assertEqual(args.enhance_workflow, ["custom-workflow"]) + + def test_video_tutorial_yaml_exists(self): + """video-tutorial.yaml workflow file is bundled.""" + from importlib.resources import files as importlib_files + + try: + pkg = importlib_files("skill_seekers.workflows") + yaml_content = pkg.joinpath("video-tutorial.yaml").read_text(encoding="utf-8") + self.assertIn("video-tutorial", yaml_content) + self.assertIn("ocr_code_cleanup", yaml_content) + self.assertIn("video_scraping", yaml_content) + except Exception: + # If package not installed in editable mode, check file directly + import pathlib + + yaml_path = ( + pathlib.Path(__file__).parent.parent + / "src" + / "skill_seekers" + / "workflows" + / "video-tutorial.yaml" + ) + self.assertTrue(yaml_path.exists(), "video-tutorial.yaml not found") + + +if __name__ == "__main__": + unittest.main() diff --git a/uv.lock b/uv.lock index 6d7bf71..ce357a1 100644 --- a/uv.lock +++ b/uv.lock @@ -250,6 +250,63 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f8/00/3ed12264094ec91f534fae429945efbaa9f8c666f3aa7061cc3b2a26a0cd/authlib-1.6.7-py2.py3-none-any.whl", hash = "sha256:c637340d9a02789d2efa1d003a7437d10d3e565237bcb5fcbc6c134c7b95bab0", size = 244115, upload-time = "2026-02-06T14:04:12.141Z" }, ] +[[package]] +name = "av" +version = "16.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/78/cd/3a83ffbc3cc25b39721d174487fb0d51a76582f4a1703f98e46170ce83d4/av-16.1.0.tar.gz", hash = "sha256:a094b4fd87a3721dacf02794d3d2c82b8d712c85b9534437e82a8a978c175ffd", size = 4285203, upload-time = "2026-01-11T07:31:33.772Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/97/51/2217a9249409d2e88e16e3f16f7c0def9fd3e7ffc4238b2ec211f9935bdb/av-16.1.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:2395748b0c34fe3a150a1721e4f3d4487b939520991b13e7b36f8926b3b12295", size = 26942590, upload-time = "2026-01-09T20:17:58.588Z" }, + { url = "https://files.pythonhosted.org/packages/bf/cd/a7070f4febc76a327c38808e01e2ff6b94531fe0b321af54ea3915165338/av-16.1.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:72d7ac832710a158eeb7a93242370aa024a7646516291c562ee7f14a7ea881fd", size = 21507910, upload-time = "2026-01-09T20:18:02.309Z" }, + { url = "https://files.pythonhosted.org/packages/ae/30/ec812418cd9b297f0238fe20eb0747d8a8b68d82c5f73c56fe519a274143/av-16.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6cbac833092e66b6b0ac4d81ab077970b8ca874951e9c3974d41d922aaa653ed", size = 38738309, upload-time = "2026-01-09T20:18:04.701Z" }, + { url = "https://files.pythonhosted.org/packages/3a/b8/6c5795bf1f05f45c5261f8bce6154e0e5e86b158a6676650ddd77c28805e/av-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:eb990672d97c18f99c02f31c8d5750236f770ffe354b5a52c5f4d16c5e65f619", size = 40293006, upload-time = "2026-01-09T20:18:07.238Z" }, + { url = "https://files.pythonhosted.org/packages/a7/44/5e183bcb9333fc3372ee6e683be8b0c9b515a506894b2d32ff465430c074/av-16.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:05ad70933ac3b8ef896a820ea64b33b6cca91a5fac5259cb9ba7fa010435be15", size = 40123516, upload-time = "2026-01-09T20:18:09.955Z" }, + { url = "https://files.pythonhosted.org/packages/12/1d/b5346d582a3c3d958b4d26a2cc63ce607233582d956121eb20d2bbe55c2e/av-16.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d831a1062a3c47520bf99de6ec682bd1d64a40dfa958e5457bb613c5270e7ce3", size = 41463289, upload-time = "2026-01-09T20:18:12.459Z" }, + { url = "https://files.pythonhosted.org/packages/fa/31/acc946c0545f72b8d0d74584cb2a0ade9b7dfe2190af3ef9aa52a2e3c0b1/av-16.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:358ab910fef3c5a806c55176f2b27e5663b33c4d0a692dafeb049c6ed71f8aff", size = 31754959, upload-time = "2026-01-09T20:18:14.718Z" }, + { url = "https://files.pythonhosted.org/packages/48/d0/b71b65d1b36520dcb8291a2307d98b7fc12329a45614a303ff92ada4d723/av-16.1.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:e88ad64ee9d2b9c4c5d891f16c22ae78e725188b8926eb88187538d9dd0b232f", size = 26927747, upload-time = "2026-01-09T20:18:16.976Z" }, + { url = "https://files.pythonhosted.org/packages/2f/79/720a5a6ccdee06eafa211b945b0a450e3a0b8fc3d12922f0f3c454d870d2/av-16.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:cb296073fa6935724de72593800ba86ae49ed48af03960a4aee34f8a611f442b", size = 21492232, upload-time = "2026-01-09T20:18:19.266Z" }, + { url = "https://files.pythonhosted.org/packages/8e/4f/a1ba8d922f2f6d1a3d52419463ef26dd6c4d43ee364164a71b424b5ae204/av-16.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:720edd4d25aa73723c1532bb0597806d7b9af5ee34fc02358782c358cfe2f879", size = 39291737, upload-time = "2026-01-09T20:18:21.513Z" }, + { url = "https://files.pythonhosted.org/packages/1a/31/fc62b9fe8738d2693e18d99f040b219e26e8df894c10d065f27c6b4f07e3/av-16.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:c7f2bc703d0df260a1fdf4de4253c7f5500ca9fc57772ea241b0cb241bcf972e", size = 40846822, upload-time = "2026-01-09T20:18:24.275Z" }, + { url = "https://files.pythonhosted.org/packages/53/10/ab446583dbce730000e8e6beec6ec3c2753e628c7f78f334a35cad0317f4/av-16.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d69c393809babada7d54964d56099e4b30a3e1f8b5736ca5e27bd7be0e0f3c83", size = 40675604, upload-time = "2026-01-09T20:18:26.866Z" }, + { url = "https://files.pythonhosted.org/packages/31/d7/1003be685277005f6d63fd9e64904ee222fe1f7a0ea70af313468bb597db/av-16.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:441892be28582356d53f282873c5a951592daaf71642c7f20165e3ddcb0b4c63", size = 42015955, upload-time = "2026-01-09T20:18:29.461Z" }, + { url = "https://files.pythonhosted.org/packages/2f/4a/fa2a38ee9306bf4579f556f94ecbc757520652eb91294d2a99c7cf7623b9/av-16.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:273a3e32de64819e4a1cd96341824299fe06f70c46f2288b5dc4173944f0fd62", size = 31750339, upload-time = "2026-01-09T20:18:32.249Z" }, + { url = "https://files.pythonhosted.org/packages/9c/84/2535f55edcd426cebec02eb37b811b1b0c163f26b8d3f53b059e2ec32665/av-16.1.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:640f57b93f927fba8689f6966c956737ee95388a91bd0b8c8b5e0481f73513d6", size = 26945785, upload-time = "2026-01-09T20:18:34.486Z" }, + { url = "https://files.pythonhosted.org/packages/b6/17/ffb940c9e490bf42e86db4db1ff426ee1559cd355a69609ec1efe4d3a9eb/av-16.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:ae3fb658eec00852ebd7412fdc141f17f3ddce8afee2d2e1cf366263ad2a3b35", size = 21481147, upload-time = "2026-01-09T20:18:36.716Z" }, + { url = "https://files.pythonhosted.org/packages/15/c1/e0d58003d2d83c3921887d5c8c9b8f5f7de9b58dc2194356a2656a45cfdc/av-16.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:27ee558d9c02a142eebcbe55578a6d817fedfde42ff5676275504e16d07a7f86", size = 39517197, upload-time = "2026-01-11T09:57:31.937Z" }, + { url = "https://files.pythonhosted.org/packages/32/77/787797b43475d1b90626af76f80bfb0c12cfec5e11eafcfc4151b8c80218/av-16.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:7ae547f6d5fa31763f73900d43901e8c5fa6367bb9a9840978d57b5a7ae14ed2", size = 41174337, upload-time = "2026-01-11T09:57:35.792Z" }, + { url = "https://files.pythonhosted.org/packages/8e/ac/d90df7f1e3b97fc5554cf45076df5045f1e0a6adf13899e10121229b826c/av-16.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8cf065f9d438e1921dc31fc7aa045790b58aee71736897866420d80b5450f62a", size = 40817720, upload-time = "2026-01-11T09:57:39.039Z" }, + { url = "https://files.pythonhosted.org/packages/80/6f/13c3a35f9dbcebafd03fe0c4cbd075d71ac8968ec849a3cfce406c35a9d2/av-16.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a345877a9d3cc0f08e2bc4ec163ee83176864b92587afb9d08dff50f37a9a829", size = 42267396, upload-time = "2026-01-11T09:57:42.115Z" }, + { url = "https://files.pythonhosted.org/packages/c8/b9/275df9607f7fb44317ccb1d4be74827185c0d410f52b6e2cd770fe209118/av-16.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:f49243b1d27c91cd8c66fdba90a674e344eb8eb917264f36117bf2b6879118fd", size = 31752045, upload-time = "2026-01-11T09:57:45.106Z" }, + { url = "https://files.pythonhosted.org/packages/75/2a/63797a4dde34283dd8054219fcb29294ba1c25d68ba8c8c8a6ae53c62c45/av-16.1.0-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:ce2a1b3d8bf619f6c47a9f28cfa7518ff75ddd516c234a4ee351037b05e6a587", size = 26916715, upload-time = "2026-01-11T09:57:47.682Z" }, + { url = "https://files.pythonhosted.org/packages/d2/c4/0b49cf730d0ae8cda925402f18ae814aef351f5772d14da72dd87ff66448/av-16.1.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:408dbe6a2573ca58a855eb8cd854112b33ea598651902c36709f5f84c991ed8e", size = 21452167, upload-time = "2026-01-11T09:57:50.606Z" }, + { url = "https://files.pythonhosted.org/packages/51/23/408806503e8d5d840975aad5699b153aaa21eb6de41ade75248a79b7a37f/av-16.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:57f657f86652a160a8a01887aaab82282f9e629abf94c780bbdbb01595d6f0f7", size = 39215659, upload-time = "2026-01-11T09:57:53.757Z" }, + { url = "https://files.pythonhosted.org/packages/c4/19/a8528d5bba592b3903f44c28dab9cc653c95fcf7393f382d2751a1d1523e/av-16.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:adbad2b355c2ee4552cac59762809d791bda90586d134a33c6f13727fb86cb3a", size = 40874970, upload-time = "2026-01-11T09:57:56.802Z" }, + { url = "https://files.pythonhosted.org/packages/e8/24/2dbcdf0e929ad56b7df078e514e7bd4ca0d45cba798aff3c8caac097d2f7/av-16.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f42e1a68ec2aebd21f7eb6895be69efa6aa27eec1670536876399725bbda4b99", size = 40530345, upload-time = "2026-01-11T09:58:00.421Z" }, + { url = "https://files.pythonhosted.org/packages/54/27/ae91b41207f34e99602d1c72ab6ffd9c51d7c67e3fbcd4e3a6c0e54f882c/av-16.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:58fe47aeaef0f100c40ec8a5de9abbd37f118d3ca03829a1009cf288e9aef67c", size = 41972163, upload-time = "2026-01-11T09:58:03.756Z" }, + { url = "https://files.pythonhosted.org/packages/fc/7a/22158fb923b2a9a00dfab0e96ef2e8a1763a94dd89e666a5858412383d46/av-16.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:565093ebc93b2f4b76782589564869dadfa83af5b852edebedd8fee746457d06", size = 31729230, upload-time = "2026-01-11T09:58:07.254Z" }, + { url = "https://files.pythonhosted.org/packages/7f/f1/878f8687d801d6c4565d57ebec08449c46f75126ebca8e0fed6986599627/av-16.1.0-cp313-cp313t-macosx_11_0_x86_64.whl", hash = "sha256:574081a24edb98343fd9f473e21ae155bf61443d4ec9d7708987fa597d6b04b2", size = 27008769, upload-time = "2026-01-11T09:58:10.266Z" }, + { url = "https://files.pythonhosted.org/packages/30/f1/bd4ce8c8b5cbf1d43e27048e436cbc9de628d48ede088a1d0a993768eb86/av-16.1.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:9ab00ea29c25ebf2ea1d1e928d7babb3532d562481c5d96c0829212b70756ad0", size = 21590588, upload-time = "2026-01-11T09:58:12.629Z" }, + { url = "https://files.pythonhosted.org/packages/1d/dd/c81f6f9209201ff0b5d5bed6da6c6e641eef52d8fbc930d738c3f4f6f75d/av-16.1.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:a84a91188c1071f238a9523fd42dbe567fb2e2607b22b779851b2ce0eac1b560", size = 40638029, upload-time = "2026-01-11T09:58:15.399Z" }, + { url = "https://files.pythonhosted.org/packages/15/4d/07edff82b78d0459a6e807e01cd280d3180ce832efc1543de80d77676722/av-16.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:c2cd0de4dd022a7225ff224fde8e7971496d700be41c50adaaa26c07bb50bf97", size = 41970776, upload-time = "2026-01-11T09:58:19.075Z" }, + { url = "https://files.pythonhosted.org/packages/da/9d/1f48b354b82fa135d388477cd1b11b81bdd4384bd6a42a60808e2ec2d66b/av-16.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:0816143530624a5a93bc5494f8c6eeaf77549b9366709c2ac8566c1e9bff6df5", size = 41764751, upload-time = "2026-01-11T09:58:22.788Z" }, + { url = "https://files.pythonhosted.org/packages/2f/c7/a509801e98db35ec552dd79da7bdbcff7104044bfeb4c7d196c1ce121593/av-16.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e3a28053af29644696d0c007e897d19b1197585834660a54773e12a40b16974c", size = 43034355, upload-time = "2026-01-11T09:58:26.125Z" }, + { url = "https://files.pythonhosted.org/packages/36/8b/e5f530d9e8f640da5f5c5f681a424c65f9dd171c871cd255d8a861785a6e/av-16.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2e3e67144a202b95ed299d165232533989390a9ea3119d37eccec697dc6dbb0c", size = 31947047, upload-time = "2026-01-11T09:58:31.867Z" }, + { url = "https://files.pythonhosted.org/packages/df/18/8812221108c27d19f7e5f486a82c827923061edf55f906824ee0fcaadf50/av-16.1.0-cp314-cp314-macosx_11_0_x86_64.whl", hash = "sha256:39a634d8e5a87e78ea80772774bfd20c0721f0d633837ff185f36c9d14ffede4", size = 26916179, upload-time = "2026-01-11T09:58:36.506Z" }, + { url = "https://files.pythonhosted.org/packages/38/ef/49d128a9ddce42a2766fe2b6595bd9c49e067ad8937a560f7838a541464e/av-16.1.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:0ba32fb9e9300948a7fa9f8a3fc686e6f7f77599a665c71eb2118fdfd2c743f9", size = 21460168, upload-time = "2026-01-11T09:58:39.231Z" }, + { url = "https://files.pythonhosted.org/packages/e6/a9/b310d390844656fa74eeb8c2750e98030877c75b97551a23a77d3f982741/av-16.1.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:ca04d17815182d34ce3edc53cbda78a4f36e956c0fd73e3bab249872a831c4d7", size = 39210194, upload-time = "2026-01-11T09:58:42.138Z" }, + { url = "https://files.pythonhosted.org/packages/0c/7b/e65aae179929d0f173af6e474ad1489b5b5ad4c968a62c42758d619e54cf/av-16.1.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:ee0e8de2e124a9ef53c955fe2add6ee7c56cc8fd83318265549e44057db77142", size = 40811675, upload-time = "2026-01-11T09:58:45.871Z" }, + { url = "https://files.pythonhosted.org/packages/54/3f/5d7edefd26b6a5187d6fac0f5065ee286109934f3dea607ef05e53f05b31/av-16.1.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:22bf77a2f658827043a1e184b479c3bf25c4c43ab32353677df2d119f080e28f", size = 40543942, upload-time = "2026-01-11T09:58:49.759Z" }, + { url = "https://files.pythonhosted.org/packages/1b/24/f8b17897b67be0900a211142f5646a99d896168f54d57c81f3e018853796/av-16.1.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2dd419d262e6a71cab206d80bbf28e0a10d0f227b671cdf5e854c028faa2d043", size = 41924336, upload-time = "2026-01-11T09:58:53.344Z" }, + { url = "https://files.pythonhosted.org/packages/1c/cf/d32bc6bbbcf60b65f6510c54690ed3ae1c4ca5d9fafbce835b6056858686/av-16.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:53585986fd431cd436f290fba662cfb44d9494fbc2949a183de00acc5b33fa88", size = 31735077, upload-time = "2026-01-11T09:58:56.684Z" }, + { url = "https://files.pythonhosted.org/packages/53/f4/9b63dc70af8636399bd933e9df4f3025a0294609510239782c1b746fc796/av-16.1.0-cp314-cp314t-macosx_11_0_x86_64.whl", hash = "sha256:76f5ed8495cf41e1209a5775d3699dc63fdc1740b94a095e2485f13586593205", size = 27014423, upload-time = "2026-01-11T09:58:59.703Z" }, + { url = "https://files.pythonhosted.org/packages/d1/da/787a07a0d6ed35a0888d7e5cfb8c2ffa202f38b7ad2c657299fac08eb046/av-16.1.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:8d55397190f12a1a3ae7538be58c356cceb2bf50df1b33523817587748ce89e5", size = 21595536, upload-time = "2026-01-11T09:59:02.508Z" }, + { url = "https://files.pythonhosted.org/packages/d8/f4/9a7d8651a611be6e7e3ab7b30bb43779899c8cac5f7293b9fb634c44a3f3/av-16.1.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:9d51d9037437218261b4bbf9df78a95e216f83d7774fbfe8d289230b5b2e28e2", size = 40642490, upload-time = "2026-01-11T09:59:05.842Z" }, + { url = "https://files.pythonhosted.org/packages/6b/e4/eb79bc538a94b4ff93cd4237d00939cba797579f3272490dd0144c165a21/av-16.1.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:0ce07a89c15644407f49d942111ca046e323bbab0a9078ff43ee57c9b4a50dad", size = 41976905, upload-time = "2026-01-11T09:59:09.169Z" }, + { url = "https://files.pythonhosted.org/packages/5e/f5/f6db0dd86b70167a4d55ee0d9d9640983c570d25504f2bde42599f38241e/av-16.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:cac0c074892ea97113b53556ff41c99562db7b9f09f098adac1f08318c2acad5", size = 41770481, upload-time = "2026-01-11T09:59:12.74Z" }, + { url = "https://files.pythonhosted.org/packages/9e/8b/33651d658e45e16ab7671ea5fcf3d20980ea7983234f4d8d0c63c65581a5/av-16.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7dec3dcbc35a187ce450f65a2e0dda820d5a9e6553eea8344a1459af11c98649", size = 43036824, upload-time = "2026-01-11T09:59:16.507Z" }, + { url = "https://files.pythonhosted.org/packages/83/41/7f13361db54d7e02f11552575c0384dadaf0918138f4eaa82ea03a9f9580/av-16.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:6f90dc082ff2068ddbe77618400b44d698d25d9c4edac57459e250c16b33d700", size = 31948164, upload-time = "2026-01-11T09:59:19.501Z" }, +] + [[package]] name = "azure-core" version = "1.38.0" @@ -871,6 +928,49 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0d/c3/e90f4a4feae6410f914f8ebac129b9ae7a8c92eb60a638012dde42030a9d/cryptography-46.0.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:6b5063083824e5509fdba180721d55909ffacccc8adbec85268b48439423d78c", size = 3438528, upload-time = "2025-10-15T23:18:26.227Z" }, ] +[[package]] +name = "ctranslate2" +version = "4.7.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pyyaml" }, + { name = "setuptools" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/e0/b69c40c3d739b213a78d327071240590792071b4f890e34088b03b95bb1e/ctranslate2-4.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9017a355dd7c6d29dc3bca6e9fc74827306c61b702c66bb1f6b939655e7de3fa", size = 1255773, upload-time = "2026-02-04T06:11:04.769Z" }, + { url = "https://files.pythonhosted.org/packages/51/29/e5c2fc1253e3fb9b2c86997f36524bba182a8ed77fb4f8fe8444a5649191/ctranslate2-4.7.1-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:6abcd0552285e7173475836f9d133e04dfc3e42ca8e6930f65eaa4b8b13a47fa", size = 11914945, upload-time = "2026-02-04T06:11:06.853Z" }, + { url = "https://files.pythonhosted.org/packages/03/25/e7fe847d3f02c84d2e9c5e8312434fbeab5af3d8916b6c8e2bdbe860d052/ctranslate2-4.7.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8492cba605319e0d7f2760180957d5a2a435dfdebcef1a75d2ade740e6b9fb0b", size = 16547973, upload-time = "2026-02-04T06:11:09.021Z" }, + { url = "https://files.pythonhosted.org/packages/68/75/074ed22bc340c2e26c09af6bf85859b586516e4e2d753b20189936d0dcf7/ctranslate2-4.7.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:688bd82482b5d057eff5bc1e727f11bb9a1277b7e4fce8ab01fd3bb70e69294b", size = 38636471, upload-time = "2026-02-04T06:11:12.146Z" }, + { url = "https://files.pythonhosted.org/packages/76/b6/9baf8a565f6dcdbfbc9cfd179dd6214529838cda4e91e89b616045a670f0/ctranslate2-4.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:3b39a5f4e3c87ac91976996458a64ba08a7cbf974dc0be4e6df83a9e040d4bd2", size = 18842389, upload-time = "2026-02-04T06:11:15.154Z" }, + { url = "https://files.pythonhosted.org/packages/da/25/41920ccee68e91cb6fa0fc9e8078ab2b7839f2c668f750dc123144cb7c6e/ctranslate2-4.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f74200bab9996b14a57cf6f7cb27d0921ceedc4acc1e905598e3e85b4d75b1ec", size = 1256943, upload-time = "2026-02-04T06:11:17.781Z" }, + { url = "https://files.pythonhosted.org/packages/79/22/bc81fcc9f10ba4da3ffd1a9adec15cfb73cb700b3bbe69c6c8b55d333316/ctranslate2-4.7.1-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:59b427eb3ac999a746315b03a63942fddd351f511db82ba1a66880d4dea98e25", size = 11916445, upload-time = "2026-02-04T06:11:19.938Z" }, + { url = "https://files.pythonhosted.org/packages/0a/a7/494a66bb02c7926331cadfff51d5ce81f5abfb1e8d05d7f2459082f31b48/ctranslate2-4.7.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:95f0c1051c180669d2a83a44b44b518b2d1683de125f623bbc81ad5dd6f6141c", size = 16696997, upload-time = "2026-02-04T06:11:22.697Z" }, + { url = "https://files.pythonhosted.org/packages/ed/4e/b48f79fd36e5d3c7e12db383aa49814c340921a618ef7364bd0ced670644/ctranslate2-4.7.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0ed92d9ab0ac6bc7005942be83d68714c80adb0897ab17f98157294ee0374347", size = 38836379, upload-time = "2026-02-04T06:11:26.325Z" }, + { url = "https://files.pythonhosted.org/packages/d2/23/8c01ac52e1f26fc4dbe985a35222ae7cd365bbf7ee5db5fd5545d8926f91/ctranslate2-4.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:67d9ad9b69933fbfeee7dcec899b2cd9341d5dca4fdfb53e8ba8c109dc332ee1", size = 18843315, upload-time = "2026-02-04T06:11:29.441Z" }, + { url = "https://files.pythonhosted.org/packages/fc/0f/581de94b64c5f2327a736270bc7e7a5f8fe5cf1ed56a2203b52de4d8986a/ctranslate2-4.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4c0cbd46a23b8dc37ccdbd9b447cb5f7fadc361c90e9df17d82ca84b1f019986", size = 1257089, upload-time = "2026-02-04T06:11:32.442Z" }, + { url = "https://files.pythonhosted.org/packages/3d/e9/d55b0e436362f9fe26bd98fefd2dd5d81926121f1d7f799c805e6035bb26/ctranslate2-4.7.1-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:5b141ddad1da5f84cf3c2a569a56227a37de649a555d376cbd9b80e8f0373dd8", size = 11918502, upload-time = "2026-02-04T06:11:33.986Z" }, + { url = "https://files.pythonhosted.org/packages/ec/ce/9f29f0b0bb4280c2ebafb3ddb6cdff8ef1c2e185ee020c0ec0ecba7dc934/ctranslate2-4.7.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d00a62544db4a3caaa58a3c50d39b25613c042b430053ae32384d94eb1d40990", size = 16859601, upload-time = "2026-02-04T06:11:36.227Z" }, + { url = "https://files.pythonhosted.org/packages/b3/86/428d270fd72117d19fb48ed3211aa8a3c8bd7577373252962cb634e0fd01/ctranslate2-4.7.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:722b93a89647974cbd182b4c7f87fefc7794fff7fc9cbd0303b6447905cc157e", size = 38995338, upload-time = "2026-02-04T06:11:42.789Z" }, + { url = "https://files.pythonhosted.org/packages/4a/f4/d23dbfb9c62cb642c114a30f05d753ba61d6ffbfd8a3a4012fe85a073bcb/ctranslate2-4.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:d0f734dc3757118094663bdaaf713f5090c55c1927fb330a76bb8b84173940e8", size = 18844949, upload-time = "2026-02-04T06:11:45.436Z" }, + { url = "https://files.pythonhosted.org/packages/34/6d/eb49ba05db286b4ea9d5d3fcf5f5cd0a9a5e218d46349618d5041001e303/ctranslate2-4.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6b2abf2929756e3ec6246057b56df379995661560a2d776af05f9d97f63afcf5", size = 1256960, upload-time = "2026-02-04T06:11:47.487Z" }, + { url = "https://files.pythonhosted.org/packages/45/5a/b9cce7b00d89fc6fdeaf27587aa52d0597b465058563e93ff50910553bdd/ctranslate2-4.7.1-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:857ef3959d6b1c40dc227c715a36db33db2d097164996d6c75b6db8e30828f52", size = 11918645, upload-time = "2026-02-04T06:11:49.599Z" }, + { url = "https://files.pythonhosted.org/packages/ea/03/c0db0a5276599fb44ceafa2f2cb1afd5628808ec406fe036060a39693680/ctranslate2-4.7.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:393a9e7e989034660526a2c0e8bb65d1924f43d9a5c77d336494a353d16ba2a4", size = 16860452, upload-time = "2026-02-04T06:11:52.276Z" }, + { url = "https://files.pythonhosted.org/packages/0b/03/4e3728ce29d192ee75ed9a2d8589bf4f19edafe5bed3845187de51b179a3/ctranslate2-4.7.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a3d0682f2b9082e31c73d75b45f16cde77355ab76d7e8356a24c3cb2480a6d3", size = 38995174, upload-time = "2026-02-04T06:11:55.477Z" }, + { url = "https://files.pythonhosted.org/packages/9b/15/6e8e87c6a201d69803a79ac2e29623ce7c2cc9cd1df9db99810cca714373/ctranslate2-4.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:baa6d2b10f57933d8c11791e8522659217918722d07bbef2389a443801125fe7", size = 18844953, upload-time = "2026-02-04T06:11:58.519Z" }, + { url = "https://files.pythonhosted.org/packages/fd/73/8a6b7ba18cad0c8667ee221ddab8c361cb70926440e5b8dd0e81924c28ac/ctranslate2-4.7.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d5dfb076566551f4959dfd0706f94c923c1931def9b7bb249a2caa6ab23353a0", size = 1257560, upload-time = "2026-02-04T06:12:00.926Z" }, + { url = "https://files.pythonhosted.org/packages/70/c2/8817ca5d6c1b175b23a12f7c8b91484652f8718a76353317e5919b038733/ctranslate2-4.7.1-cp314-cp314-macosx_11_0_x86_64.whl", hash = "sha256:eecdb4ed934b384f16e8c01b185b082d6b5ffc7dcbb0b6a6eb48cd465282d957", size = 11918995, upload-time = "2026-02-04T06:12:02.875Z" }, + { url = "https://files.pythonhosted.org/packages/ac/33/b8eb3acc67bbca4d9872fc9ff94db78e6167a7ba5cd932f585d1560effc7/ctranslate2-4.7.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1aa6796edcc3c8d163c9e39c429d50076d266d68980fed9d1b2443f617c67e9e", size = 16844162, upload-time = "2026-02-04T06:12:05.099Z" }, + { url = "https://files.pythonhosted.org/packages/80/11/6474893b07121057035069a0a483fe1cd8c47878213f282afb4c0c6fc275/ctranslate2-4.7.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:24c0482c51726430fb83724451921c0e539d769c8618dcfd46b1645e7f75960d", size = 38966728, upload-time = "2026-02-04T06:12:07.923Z" }, + { url = "https://files.pythonhosted.org/packages/94/88/8fc7ff435c5e783e5fad9586d839d463e023988dbbbad949d442092d01f1/ctranslate2-4.7.1-cp314-cp314-win_amd64.whl", hash = "sha256:76db234c0446a23d20dd8eeaa7a789cc87d1d05283f48bf3152bae9fa0a69844", size = 19100788, upload-time = "2026-02-04T06:12:10.592Z" }, + { url = "https://files.pythonhosted.org/packages/d9/b3/f100013a76a98d64e67c721bd4559ea4eeb54be3e4ac45f4d801769899af/ctranslate2-4.7.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:058c9db2277dc8b19ecc86c7937628f69022f341844b9081d2ab642965d88fc6", size = 1280179, upload-time = "2026-02-04T06:12:12.596Z" }, + { url = "https://files.pythonhosted.org/packages/39/22/b77f748015667a5e2ca54a5ee080d7016fce34314f0e8cf904784549305a/ctranslate2-4.7.1-cp314-cp314t-macosx_11_0_x86_64.whl", hash = "sha256:5abcf885062c7f28a3f9a46be8d185795e8706ac6230ad086cae0bc82917df31", size = 11940166, upload-time = "2026-02-04T06:12:14.054Z" }, + { url = "https://files.pythonhosted.org/packages/7d/78/6d7fd52f646c6ba3343f71277a9bbef33734632949d1651231948b0f0359/ctranslate2-4.7.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9950acb04a002d5c60ae90a1ddceead1a803af1f00cadd9b1a1dc76e1f017481", size = 16849483, upload-time = "2026-02-04T06:12:17.082Z" }, + { url = "https://files.pythonhosted.org/packages/40/27/58769ff15ac31b44205bd7a8aeca80cf7357c657ea5df1b94ce0f5c83771/ctranslate2-4.7.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1dcc734e92e3f1ceeaa0c42bbfd009352857be179ecd4a7ed6cccc086a202f58", size = 38949393, upload-time = "2026-02-04T06:12:21.302Z" }, + { url = "https://files.pythonhosted.org/packages/0e/5c/9fa0ad6462b62efd0fb5ac1100eee47bc96ecc198ff4e237c731e5473616/ctranslate2-4.7.1-cp314-cp314t-win_amd64.whl", hash = "sha256:dfb7657bdb7b8211c8f9ecb6f3b70bc0db0e0384d01a8b1808cb66fe7199df59", size = 19123451, upload-time = "2026-02-04T06:12:24.115Z" }, +] + [[package]] name = "cuda-bindings" version = "12.9.4" @@ -978,6 +1078,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b0/0d/9feae160378a3553fa9a339b0e9c1a048e147a4127210e286ef18b730f03/durationpy-0.10-py3-none-any.whl", hash = "sha256:3b41e1b601234296b4fb368338fdcd3e13e0b4fb5b67345948f4f2bf9868b286", size = 3922, upload-time = "2025-05-17T13:52:36.463Z" }, ] +[[package]] +name = "easyocr" +version = "1.7.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ninja" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "opencv-python-headless" }, + { name = "pillow" }, + { name = "pyclipper" }, + { name = "python-bidi" }, + { name = "pyyaml" }, + { name = "scikit-image", version = "0.25.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scikit-image", version = "0.26.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scipy", version = "1.17.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "shapely" }, + { name = "torch" }, + { name = "torchvision" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/bb/84/4a2cab0e6adde6a85e7ba543862e5fc0250c51f3ac721a078a55cdcff250/easyocr-1.7.2-py3-none-any.whl", hash = "sha256:5be12f9b0e595d443c9c3d10b0542074b50f0ec2d98b141a109cd961fd1c177c", size = 2870178, upload-time = "2024-09-24T11:34:43.554Z" }, +] + [[package]] name = "exceptiongroup" version = "1.3.1" @@ -1006,6 +1131,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ae/8b/c8050e556f5d7a1f33a93c2c94379a0bae23c58a79ad9709d7e052d0c3b8/fastapi-0.128.4-py3-none-any.whl", hash = "sha256:9321282cee605fd2075ccbc95c0f2e549d675c59de4a952bba202cd1730ac66b", size = 103684, upload-time = "2026-02-07T08:14:07.939Z" }, ] +[[package]] +name = "faster-whisper" +version = "1.2.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "av" }, + { name = "ctranslate2" }, + { name = "huggingface-hub" }, + { name = "onnxruntime" }, + { name = "tokenizers" }, + { name = "tqdm" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/05/99/49ee85903dee060d9f08297b4a342e5e0bcfca2f027a07b4ee0a38ab13f9/faster_whisper-1.2.1-py3-none-any.whl", hash = "sha256:79a66ad50688c0b794dd501dc340a736992a6342f7f95e5811be60b5224a26a7", size = 1118909, upload-time = "2025-10-31T11:35:47.794Z" }, +] + [[package]] name = "ffmpeg-python" version = "0.2.0" @@ -1757,6 +1898,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, ] +[[package]] +name = "imageio" +version = "2.37.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pillow" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a3/6f/606be632e37bf8d05b253e8626c2291d74c691ddc7bcdf7d6aaf33b32f6a/imageio-2.37.2.tar.gz", hash = "sha256:0212ef2727ac9caa5ca4b2c75ae89454312f440a756fcfc8ef1993e718f50f8a", size = 389600, upload-time = "2025-11-04T14:29:39.898Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/fe/301e0936b79bcab4cacc7548bf2853fc28dced0a578bab1f7ef53c9aa75b/imageio-2.37.2-py3-none-any.whl", hash = "sha256:ad9adfb20335d718c03de457358ed69f141021a333c40a53e57273d8a5bd0b9b", size = 317646, upload-time = "2025-11-04T14:29:37.948Z" }, +] + [[package]] name = "importlib-metadata" version = "8.7.1" @@ -2112,6 +2267,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e6/8e/063e09c5e8a3dcd77e2a8f0bff3f71c1c52a9d238da1bcafd2df3281da17/langsmith-0.6.9-py3-none-any.whl", hash = "sha256:86ba521e042397f6fbb79d63991df9d5f7b6a6dd6a6323d4f92131291478dcff", size = 319228, upload-time = "2026-02-05T20:10:54.248Z" }, ] +[[package]] +name = "lazy-loader" +version = "0.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6f/6b/c875b30a1ba490860c93da4cabf479e03f584eba06fe5963f6f6644653d8/lazy_loader-0.4.tar.gz", hash = "sha256:47c75182589b91a4e1a85a136c074285a5ad4d9f39c63e0d7fb76391c4574cd1", size = 15431, upload-time = "2024-04-05T13:03:12.261Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/83/60/d497a310bde3f01cb805196ac61b7ad6dc5dcf8dce66634dc34364b20b4f/lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc", size = 12097, upload-time = "2024-04-05T13:03:10.514Z" }, +] + [[package]] name = "librt" version = "0.7.8" @@ -3029,6 +3196,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/c9/b2622292ea83fbb4ec318f5b9ab867d0a28ab43c5717bb85b0a5f6b3b0a4/networkx-3.6.1-py3-none-any.whl", hash = "sha256:d47fbf302e7d9cbbb9e2555a0d267983d2aa476bac30e90dfbe5669bd57f3762", size = 2068504, upload-time = "2025-12-08T17:02:38.159Z" }, ] +[[package]] +name = "ninja" +version = "1.13.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/73/79a0b22fc731989c708068427579e840a6cf4e937fe7ae5c5d0b7356ac22/ninja-1.13.0.tar.gz", hash = "sha256:4a40ce995ded54d9dc24f8ea37ff3bf62ad192b547f6c7126e7e25045e76f978", size = 242558, upload-time = "2025-08-11T15:10:19.421Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/74/d02409ed2aa865e051b7edda22ad416a39d81a84980f544f8de717cab133/ninja-1.13.0-py3-none-macosx_10_9_universal2.whl", hash = "sha256:fa2a8bfc62e31b08f83127d1613d10821775a0eb334197154c4d6067b7068ff1", size = 310125, upload-time = "2025-08-11T15:09:50.971Z" }, + { url = "https://files.pythonhosted.org/packages/8e/de/6e1cd6b84b412ac1ef327b76f0641aeb5dcc01e9d3f9eee0286d0c34fd93/ninja-1.13.0-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3d00c692fb717fd511abeb44b8c5d00340c36938c12d6538ba989fe764e79630", size = 177467, upload-time = "2025-08-11T15:09:52.767Z" }, + { url = "https://files.pythonhosted.org/packages/c8/83/49320fb6e58ae3c079381e333575fdbcf1cca3506ee160a2dcce775046fa/ninja-1.13.0-py3-none-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:be7f478ff9f96a128b599a964fc60a6a87b9fa332ee1bd44fa243ac88d50291c", size = 187834, upload-time = "2025-08-11T15:09:54.115Z" }, + { url = "https://files.pythonhosted.org/packages/56/c7/ba22748fb59f7f896b609cd3e568d28a0a367a6d953c24c461fe04fc4433/ninja-1.13.0-py3-none-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:60056592cf495e9a6a4bea3cd178903056ecb0943e4de45a2ea825edb6dc8d3e", size = 202736, upload-time = "2025-08-11T15:09:55.745Z" }, + { url = "https://files.pythonhosted.org/packages/79/22/d1de07632b78ac8e6b785f41fa9aad7a978ec8c0a1bf15772def36d77aac/ninja-1.13.0-py3-none-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:1c97223cdda0417f414bf864cfb73b72d8777e57ebb279c5f6de368de0062988", size = 179034, upload-time = "2025-08-11T15:09:57.394Z" }, + { url = "https://files.pythonhosted.org/packages/ed/de/0e6edf44d6a04dabd0318a519125ed0415ce437ad5a1ec9b9be03d9048cf/ninja-1.13.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fb46acf6b93b8dd0322adc3a4945452a4e774b75b91293bafcc7b7f8e6517dfa", size = 180716, upload-time = "2025-08-11T15:09:58.696Z" }, + { url = "https://files.pythonhosted.org/packages/54/28/938b562f9057aaa4d6bfbeaa05e81899a47aebb3ba6751e36c027a7f5ff7/ninja-1.13.0-py3-none-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4be9c1b082d244b1ad7ef41eb8ab088aae8c109a9f3f0b3e56a252d3e00f42c1", size = 146843, upload-time = "2025-08-11T15:10:00.046Z" }, + { url = "https://files.pythonhosted.org/packages/2a/fb/d06a3838de4f8ab866e44ee52a797b5491df823901c54943b2adb0389fbb/ninja-1.13.0-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:6739d3352073341ad284246f81339a384eec091d9851a886dfa5b00a6d48b3e2", size = 154402, upload-time = "2025-08-11T15:10:01.657Z" }, + { url = "https://files.pythonhosted.org/packages/31/bf/0d7808af695ceddc763cf251b84a9892cd7f51622dc8b4c89d5012779f06/ninja-1.13.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:11be2d22027bde06f14c343f01d31446747dbb51e72d00decca2eb99be911e2f", size = 552388, upload-time = "2025-08-11T15:10:03.349Z" }, + { url = "https://files.pythonhosted.org/packages/9d/70/c99d0c2c809f992752453cce312848abb3b1607e56d4cd1b6cded317351a/ninja-1.13.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:aa45b4037b313c2f698bc13306239b8b93b4680eb47e287773156ac9e9304714", size = 472501, upload-time = "2025-08-11T15:10:04.735Z" }, + { url = "https://files.pythonhosted.org/packages/9f/43/c217b1153f0e499652f5e0766da8523ce3480f0a951039c7af115e224d55/ninja-1.13.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5f8e1e8a1a30835eeb51db05cf5a67151ad37542f5a4af2a438e9490915e5b72", size = 638280, upload-time = "2025-08-11T15:10:06.512Z" }, + { url = "https://files.pythonhosted.org/packages/8c/45/9151bba2c8d0ae2b6260f71696330590de5850e5574b7b5694dce6023e20/ninja-1.13.0-py3-none-musllinux_1_2_ppc64le.whl", hash = "sha256:3d7d7779d12cb20c6d054c61b702139fd23a7a964ec8f2c823f1ab1b084150db", size = 642420, upload-time = "2025-08-11T15:10:08.35Z" }, + { url = "https://files.pythonhosted.org/packages/3c/fb/95752eb635bb8ad27d101d71bef15bc63049de23f299e312878fc21cb2da/ninja-1.13.0-py3-none-musllinux_1_2_riscv64.whl", hash = "sha256:d741a5e6754e0bda767e3274a0f0deeef4807f1fec6c0d7921a0244018926ae5", size = 585106, upload-time = "2025-08-11T15:10:09.818Z" }, + { url = "https://files.pythonhosted.org/packages/c1/31/aa56a1a286703800c0cbe39fb4e82811c277772dc8cd084f442dd8e2938a/ninja-1.13.0-py3-none-musllinux_1_2_s390x.whl", hash = "sha256:e8bad11f8a00b64137e9b315b137d8bb6cbf3086fbdc43bf1f90fd33324d2e96", size = 707138, upload-time = "2025-08-11T15:10:11.366Z" }, + { url = "https://files.pythonhosted.org/packages/34/6f/5f5a54a1041af945130abdb2b8529cbef0cdcbbf9bcf3f4195378319d29a/ninja-1.13.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b4f2a072db3c0f944c32793e91532d8948d20d9ab83da9c0c7c15b5768072200", size = 581758, upload-time = "2025-08-11T15:10:13.295Z" }, + { url = "https://files.pythonhosted.org/packages/95/97/51359c77527d45943fe7a94d00a3843b81162e6c4244b3579fe8fc54cb9c/ninja-1.13.0-py3-none-win32.whl", hash = "sha256:8cfbb80b4a53456ae8a39f90ae3d7a2129f45ea164f43fadfa15dc38c4aef1c9", size = 267201, upload-time = "2025-08-11T15:10:15.158Z" }, + { url = "https://files.pythonhosted.org/packages/29/45/c0adfbfb0b5895aa18cec400c535b4f7ff3e52536e0403602fc1a23f7de9/ninja-1.13.0-py3-none-win_amd64.whl", hash = "sha256:fb8ee8719f8af47fed145cced4a85f0755dd55d45b2bddaf7431fa89803c5f3e", size = 309975, upload-time = "2025-08-11T15:10:16.697Z" }, + { url = "https://files.pythonhosted.org/packages/df/93/a7b983643d1253bb223234b5b226e69de6cda02b76cdca7770f684b795f5/ninja-1.13.0-py3-none-win_arm64.whl", hash = "sha256:3c0b40b1f0bba764644385319028650087b4c1b18cdfa6f45cb39a3669b81aa9", size = 290806, upload-time = "2025-08-11T15:10:18.018Z" }, +] + [[package]] name = "nltk" version = "3.9.2" @@ -3391,6 +3584,44 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/27/4b/7c1a00c2c3fbd004253937f7520f692a9650767aa73894d7a34f0d65d3f4/openai-2.14.0-py3-none-any.whl", hash = "sha256:7ea40aca4ffc4c4a776e77679021b47eec1160e341f42ae086ba949c9dcc9183", size = 1067558, upload-time = "2025-12-19T03:28:43.727Z" }, ] +[[package]] +name = "opencv-python" +version = "4.13.0.92" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/6f/5a28fef4c4a382be06afe3938c64cc168223016fa520c5abaf37e8862aa5/opencv_python-4.13.0.92-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:caf60c071ec391ba51ed00a4a920f996d0b64e3e46068aac1f646b5de0326a19", size = 46247052, upload-time = "2026-02-05T07:01:25.046Z" }, + { url = "https://files.pythonhosted.org/packages/08/ac/6c98c44c650b8114a0fb901691351cfb3956d502e8e9b5cd27f4ee7fbf2f/opencv_python-4.13.0.92-cp37-abi3-macosx_14_0_x86_64.whl", hash = "sha256:5868a8c028a0b37561579bfb8ac1875babdc69546d236249fff296a8c010ccf9", size = 32568781, upload-time = "2026-02-05T07:01:41.379Z" }, + { url = "https://files.pythonhosted.org/packages/3e/51/82fed528b45173bf629fa44effb76dff8bc9f4eeaee759038362dfa60237/opencv_python-4.13.0.92-cp37-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0bc2596e68f972ca452d80f444bc404e08807d021fbba40df26b61b18e01838a", size = 47685527, upload-time = "2026-02-05T06:59:11.24Z" }, + { url = "https://files.pythonhosted.org/packages/db/07/90b34a8e2cf9c50fe8ed25cac9011cde0676b4d9d9c973751ac7616223a2/opencv_python-4.13.0.92-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:402033cddf9d294693094de5ef532339f14ce821da3ad7df7c9f6e8316da32cf", size = 70460872, upload-time = "2026-02-05T06:59:19.162Z" }, + { url = "https://files.pythonhosted.org/packages/02/6d/7a9cc719b3eaf4377b9c2e3edeb7ed3a81de41f96421510c0a169ca3cfd4/opencv_python-4.13.0.92-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:bccaabf9eb7f897ca61880ce2869dcd9b25b72129c28478e7f2a5e8dee945616", size = 46708208, upload-time = "2026-02-05T06:59:15.419Z" }, + { url = "https://files.pythonhosted.org/packages/fd/55/b3b49a1b97aabcfbbd6c7326df9cb0b6fa0c0aefa8e89d500939e04aa229/opencv_python-4.13.0.92-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:620d602b8f7d8b8dab5f4b99c6eb353e78d3fb8b0f53db1bd258bb1aa001c1d5", size = 72927042, upload-time = "2026-02-05T06:59:23.389Z" }, + { url = "https://files.pythonhosted.org/packages/fb/17/de5458312bcb07ddf434d7bfcb24bb52c59635ad58c6e7c751b48949b009/opencv_python-4.13.0.92-cp37-abi3-win32.whl", hash = "sha256:372fe164a3148ac1ca51e5f3ad0541a4a276452273f503441d718fab9c5e5f59", size = 30932638, upload-time = "2026-02-05T07:02:14.98Z" }, + { url = "https://files.pythonhosted.org/packages/e9/a5/1be1516390333ff9be3a9cb648c9f33df79d5096e5884b5df71a588af463/opencv_python-4.13.0.92-cp37-abi3-win_amd64.whl", hash = "sha256:423d934c9fafb91aad38edf26efb46da91ffbc05f3f59c4b0c72e699720706f5", size = 40212062, upload-time = "2026-02-05T07:02:12.724Z" }, +] + +[[package]] +name = "opencv-python-headless" +version = "4.13.0.92" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/42/2310883be3b8826ac58c3f2787b9358a2d46923d61f88fedf930bc59c60c/opencv_python_headless-4.13.0.92-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:1a7d040ac656c11b8c38677cc8cccdc149f98535089dbe5b081e80a4e5903209", size = 46247192, upload-time = "2026-02-05T07:01:35.187Z" }, + { url = "https://files.pythonhosted.org/packages/2d/1e/6f9e38005a6f7f22af785df42a43139d0e20f169eb5787ce8be37ee7fcc9/opencv_python_headless-4.13.0.92-cp37-abi3-macosx_14_0_x86_64.whl", hash = "sha256:3e0a6f0a37994ec6ce5f59e936be21d5d6384a4556f2d2da9c2f9c5dc948394c", size = 32568914, upload-time = "2026-02-05T07:01:51.989Z" }, + { url = "https://files.pythonhosted.org/packages/21/76/9417a6aef9def70e467a5bf560579f816148a4c658b7d525581b356eda9e/opencv_python_headless-4.13.0.92-cp37-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5c8cfc8e87ed452b5cecb9419473ee5560a989859fe1d10d1ce11ae87b09a2cb", size = 33703709, upload-time = "2026-02-05T10:24:46.469Z" }, + { url = "https://files.pythonhosted.org/packages/92/ce/bd17ff5772938267fd49716e94ca24f616ff4cb1ff4c6be13085108037be/opencv_python_headless-4.13.0.92-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0525a3d2c0b46c611e2130b5fdebc94cf404845d8fa64d2f3a3b679572a5bd22", size = 56016764, upload-time = "2026-02-05T10:26:48.904Z" }, + { url = "https://files.pythonhosted.org/packages/8f/b4/b7bcbf7c874665825a8c8e1097e93ea25d1f1d210a3e20d4451d01da30aa/opencv_python_headless-4.13.0.92-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:eb60e36b237b1ebd40a912da5384b348df8ed534f6f644d8e0b4f103e272ba7d", size = 35010236, upload-time = "2026-02-05T10:28:11.031Z" }, + { url = "https://files.pythonhosted.org/packages/4b/33/b5db29a6c00eb8f50708110d8d453747ca125c8b805bc437b289dbdcc057/opencv_python_headless-4.13.0.92-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:0bd48544f77c68b2941392fcdf9bcd2b9cdf00e98cb8c29b2455d194763cf99e", size = 60391106, upload-time = "2026-02-05T10:30:14.236Z" }, + { url = "https://files.pythonhosted.org/packages/fb/c3/52cfea47cd33e53e8c0fbd6e7c800b457245c1fda7d61660b4ffe9596a7f/opencv_python_headless-4.13.0.92-cp37-abi3-win32.whl", hash = "sha256:a7cf08e5b191f4ebb530791acc0825a7986e0d0dee2a3c491184bd8599848a4b", size = 30812232, upload-time = "2026-02-05T07:02:29.594Z" }, + { url = "https://files.pythonhosted.org/packages/4a/90/b338326131ccb2aaa3c2c85d00f41822c0050139a4bfe723cfd95455bd2d/opencv_python_headless-4.13.0.92-cp37-abi3-win_amd64.whl", hash = "sha256:77a82fe35ddcec0f62c15f2ba8a12ecc2ed4207c17b0902c7a3151ae29f37fb6", size = 40070414, upload-time = "2026-02-05T07:02:26.448Z" }, +] + [[package]] name = "opentelemetry-api" version = "1.39.1" @@ -4193,6 +4424,49 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ee/2b/e18ee7c5ee508a82897f021c1981533eca2940b5f072fc6ed0906c03a7a7/pybase64-1.4.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:debf737e09b8bf832ba86f5ecc3d3dbd0e3021d6cd86ba4abe962d6a5a77adb3", size = 36134, upload-time = "2025-12-06T13:26:47.35Z" }, ] +[[package]] +name = "pyclipper" +version = "1.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f6/21/3c06205bb407e1f79b73b7b4dfb3950bd9537c4f625a68ab5cc41177f5bc/pyclipper-1.4.0.tar.gz", hash = "sha256:9882bd889f27da78add4dd6f881d25697efc740bf840274e749988d25496c8e1", size = 54489, upload-time = "2025-12-01T13:15:35.015Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/9f/a10173d32ecc2ce19a04d018163f3ca22a04c0c6ad03b464dcd32f9152a8/pyclipper-1.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:bafad70d2679c187120e8c44e1f9a8b06150bad8c0aecf612ad7dfbfa9510f73", size = 264510, upload-time = "2025-12-01T13:14:46.551Z" }, + { url = "https://files.pythonhosted.org/packages/e0/c2/5490ddc4a1f7ceeaa0258f4266397e720c02db515b2ca5bc69b85676f697/pyclipper-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0b74a9dd44b22a7fd35d65fb1ceeba57f3817f34a97a28c3255556362e491447", size = 139498, upload-time = "2025-12-01T13:14:48.31Z" }, + { url = "https://files.pythonhosted.org/packages/3b/0a/bea9102d1d75634b1a5702b0e92982451a1eafca73c4845d3dbe27eba13d/pyclipper-1.4.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0a4d2736fb3c42e8eb1d38bf27a720d1015526c11e476bded55138a977c17d9d", size = 970974, upload-time = "2025-12-01T13:14:49.799Z" }, + { url = "https://files.pythonhosted.org/packages/8b/1b/097f8776d5b3a10eb7b443b632221f4ed825d892e79e05682f4b10a1a59c/pyclipper-1.4.0-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b3b3630051b53ad2564cb079e088b112dd576e3d91038338ad1cc7915e0f14dc", size = 943315, upload-time = "2025-12-01T13:14:51.266Z" }, + { url = "https://files.pythonhosted.org/packages/fd/4d/17d6a3f1abf0f368d58f2309e80ee3761afb1fd1342f7780ab32ba4f0b1d/pyclipper-1.4.0-cp310-cp310-win32.whl", hash = "sha256:8d42b07a2f6cfe2d9b87daf345443583f00a14e856927782fde52f3a255e305a", size = 95286, upload-time = "2025-12-01T13:14:52.922Z" }, + { url = "https://files.pythonhosted.org/packages/53/ca/b30138427ed122ec9b47980b943164974a2ec606fa3f71597033b9a9f9a6/pyclipper-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:6a97b961f182b92d899ca88c1bb3632faea2e00ce18d07c5f789666ebb021ca4", size = 104227, upload-time = "2025-12-01T13:14:54.013Z" }, + { url = "https://files.pythonhosted.org/packages/de/e3/64cf7794319b088c288706087141e53ac259c7959728303276d18adc665d/pyclipper-1.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:adcb7ca33c5bdc33cd775e8b3eadad54873c802a6d909067a57348bcb96e7a2d", size = 264281, upload-time = "2025-12-01T13:14:55.47Z" }, + { url = "https://files.pythonhosted.org/packages/34/cd/44ec0da0306fa4231e76f1c2cb1fa394d7bde8db490a2b24d55b39865f69/pyclipper-1.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fd24849d2b94ec749ceac7c34c9f01010d23b6e9d9216cf2238b8481160e703d", size = 139426, upload-time = "2025-12-01T13:14:56.683Z" }, + { url = "https://files.pythonhosted.org/packages/ad/88/d8f6c6763ea622fe35e19c75d8b39ed6c55191ddc82d65e06bc46b26cb8e/pyclipper-1.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1b6c8d75ba20c6433c9ea8f1a0feb7e4d3ac06a09ad1fd6d571afc1ddf89b869", size = 989649, upload-time = "2025-12-01T13:14:58.28Z" }, + { url = "https://files.pythonhosted.org/packages/ff/e9/ea7d68c8c4af3842d6515bedcf06418610ad75f111e64c92c1d4785a1513/pyclipper-1.4.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:58e29d7443d7cc0e83ee9daf43927730386629786d00c63b04fe3b53ac01462c", size = 962842, upload-time = "2025-12-01T13:15:00.044Z" }, + { url = "https://files.pythonhosted.org/packages/4e/b7/0b4a272d8726e51ab05e2b933d8cc47f29757fb8212e38b619e170e6015c/pyclipper-1.4.0-cp311-cp311-win32.whl", hash = "sha256:a8d2b5fb75ebe57e21ce61e79a9131edec2622ff23cc665e4d1d1f201bc1a801", size = 95098, upload-time = "2025-12-01T13:15:01.359Z" }, + { url = "https://files.pythonhosted.org/packages/3a/76/4901de2919198bb2bd3d989f86d4a1dff363962425bb2d63e24e6c990042/pyclipper-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:e9b973467d9c5fa9bc30bb6ac95f9f4d7c3d9fc25f6cf2d1cc972088e5955c01", size = 104362, upload-time = "2025-12-01T13:15:02.439Z" }, + { url = "https://files.pythonhosted.org/packages/90/1b/7a07b68e0842324d46c03e512d8eefa9cb92ba2a792b3b4ebf939dafcac3/pyclipper-1.4.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:222ac96c8b8281b53d695b9c4fedc674f56d6d4320ad23f1bdbd168f4e316140", size = 265676, upload-time = "2025-12-01T13:15:04.15Z" }, + { url = "https://files.pythonhosted.org/packages/6b/dd/8bd622521c05d04963420ae6664093f154343ed044c53ea260a310c8bb4d/pyclipper-1.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f3672dbafbb458f1b96e1ee3e610d174acb5ace5bd2ed5d1252603bb797f2fc6", size = 140458, upload-time = "2025-12-01T13:15:05.76Z" }, + { url = "https://files.pythonhosted.org/packages/7a/06/6e3e241882bf7d6ab23d9c69ba4e85f1ec47397cbbeee948a16cf75e21ed/pyclipper-1.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d1f807e2b4760a8e5c6d6b4e8c1d71ef52b7fe1946ff088f4fa41e16a881a5ca", size = 978235, upload-time = "2025-12-01T13:15:06.993Z" }, + { url = "https://files.pythonhosted.org/packages/cf/f4/3418c1cd5eea640a9fa2501d4bc0b3655fa8d40145d1a4f484b987990a75/pyclipper-1.4.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ce1f83c9a4e10ea3de1959f0ae79e9a5bd41346dff648fee6228ba9eaf8b3872", size = 961388, upload-time = "2025-12-01T13:15:08.467Z" }, + { url = "https://files.pythonhosted.org/packages/ac/94/c85401d24be634af529c962dd5d781f3cb62a67cd769534df2cb3feee97a/pyclipper-1.4.0-cp312-cp312-win32.whl", hash = "sha256:3ef44b64666ebf1cb521a08a60c3e639d21b8c50bfbe846ba7c52a0415e936f4", size = 95169, upload-time = "2025-12-01T13:15:10.098Z" }, + { url = "https://files.pythonhosted.org/packages/97/77/dfea08e3b230b82ee22543c30c35d33d42f846a77f96caf7c504dd54fab1/pyclipper-1.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:d1e5498d883b706a4ce636247f0d830c6eb34a25b843a1b78e2c969754ca9037", size = 104619, upload-time = "2025-12-01T13:15:11.592Z" }, + { url = "https://files.pythonhosted.org/packages/67/d0/cbce7d47de1e6458f66a4d999b091640134deb8f2c7351eab993b70d2e10/pyclipper-1.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d49df13cbb2627ccb13a1046f3ea6ebf7177b5504ec61bdef87d6a704046fd6e", size = 264342, upload-time = "2025-12-01T13:15:12.697Z" }, + { url = "https://files.pythonhosted.org/packages/ce/cc/742b9d69d96c58ac156947e1b56d0f81cbacbccf869e2ac7229f2f86dc4e/pyclipper-1.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:37bfec361e174110cdddffd5ecd070a8064015c99383d95eb692c253951eee8a", size = 139839, upload-time = "2025-12-01T13:15:13.911Z" }, + { url = "https://files.pythonhosted.org/packages/db/48/dd301d62c1529efdd721b47b9e5fb52120fcdac5f4d3405cfc0d2f391414/pyclipper-1.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:14c8bdb5a72004b721c4e6f448d2c2262d74a7f0c9e3076aeff41e564a92389f", size = 972142, upload-time = "2025-12-01T13:15:15.477Z" }, + { url = "https://files.pythonhosted.org/packages/07/bf/d493fd1b33bb090fa64e28c1009374d5d72fa705f9331cd56517c35e381e/pyclipper-1.4.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f2a50c22c3a78cb4e48347ecf06930f61ce98cf9252f2e292aa025471e9d75b1", size = 952789, upload-time = "2025-12-01T13:15:17.042Z" }, + { url = "https://files.pythonhosted.org/packages/cf/88/b95ea8ea21ddca34aa14b123226a81526dd2faaa993f9aabd3ed21231604/pyclipper-1.4.0-cp313-cp313-win32.whl", hash = "sha256:c9a3faa416ff536cee93417a72bfb690d9dea136dc39a39dbbe1e5dadf108c9c", size = 94817, upload-time = "2025-12-01T13:15:18.724Z" }, + { url = "https://files.pythonhosted.org/packages/ba/42/0a1920d276a0e1ca21dc0d13ee9e3ba10a9a8aa3abac76cd5e5a9f503306/pyclipper-1.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:d4b2d7c41086f1927d14947c563dfc7beed2f6c0d9af13c42fe3dcdc20d35832", size = 104007, upload-time = "2025-12-01T13:15:19.763Z" }, + { url = "https://files.pythonhosted.org/packages/1a/20/04d58c70f3ccd404f179f8dd81d16722a05a3bf1ab61445ee64e8218c1f8/pyclipper-1.4.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:7c87480fc91a5af4c1ba310bdb7de2f089a3eeef5fe351a3cedc37da1fcced1c", size = 265167, upload-time = "2025-12-01T13:15:20.844Z" }, + { url = "https://files.pythonhosted.org/packages/bd/2e/a570c1abe69b7260ca0caab4236ce6ea3661193ebf8d1bd7f78ccce537a5/pyclipper-1.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:81d8bb2d1fb9d66dc7ea4373b176bb4b02443a7e328b3b603a73faec088b952e", size = 139966, upload-time = "2025-12-01T13:15:22.036Z" }, + { url = "https://files.pythonhosted.org/packages/e8/3b/e0859e54adabdde8a24a29d3f525ebb31c71ddf2e8d93edce83a3c212ffc/pyclipper-1.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:773c0e06b683214dcfc6711be230c83b03cddebe8a57eae053d4603dd63582f9", size = 968216, upload-time = "2025-12-01T13:15:23.18Z" }, + { url = "https://files.pythonhosted.org/packages/f6/6b/e3c4febf0a35ae643ee579b09988dd931602b5bf311020535fd9e5b7e715/pyclipper-1.4.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9bc45f2463d997848450dbed91c950ca37c6cf27f84a49a5cad4affc0b469e39", size = 954198, upload-time = "2025-12-01T13:15:24.522Z" }, + { url = "https://files.pythonhosted.org/packages/fc/74/728efcee02e12acb486ce9d56fa037120c9bf5b77c54bbdbaa441c14a9d9/pyclipper-1.4.0-cp314-cp314-win32.whl", hash = "sha256:0b8c2105b3b3c44dbe1a266f64309407fe30bf372cf39a94dc8aaa97df00da5b", size = 96951, upload-time = "2025-12-01T13:15:25.79Z" }, + { url = "https://files.pythonhosted.org/packages/e3/d7/7f4354e69f10a917e5c7d5d72a499ef2e10945312f5e72c414a0a08d2ae4/pyclipper-1.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:6c317e182590c88ec0194149995e3d71a979cfef3b246383f4e035f9d4a11826", size = 106782, upload-time = "2025-12-01T13:15:26.945Z" }, + { url = "https://files.pythonhosted.org/packages/63/60/fc32c7a3d7f61a970511ec2857ecd09693d8ac80d560ee7b8e67a6d268c9/pyclipper-1.4.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:f160a2c6ba036f7eaf09f1f10f4fbfa734234af9112fb5187877efed78df9303", size = 269880, upload-time = "2025-12-01T13:15:28.117Z" }, + { url = "https://files.pythonhosted.org/packages/49/df/c4a72d3f62f0ba03ec440c4fff56cd2d674a4334d23c5064cbf41c9583f6/pyclipper-1.4.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:a9f11ad133257c52c40d50de7a0ca3370a0cdd8e3d11eec0604ad3c34ba549e9", size = 141706, upload-time = "2025-12-01T13:15:30.134Z" }, + { url = "https://files.pythonhosted.org/packages/c5/0b/cf55df03e2175e1e2da9db585241401e0bc98f76bee3791bed39d0313449/pyclipper-1.4.0-cp314-cp314t-win32.whl", hash = "sha256:bbc827b77442c99deaeee26e0e7f172355ddb097a5e126aea206d447d3b26286", size = 105308, upload-time = "2025-12-01T13:15:31.225Z" }, + { url = "https://files.pythonhosted.org/packages/8f/dc/53df8b6931d47080b4fe4ee8450d42e660ee1c5c1556c7ab73359182b769/pyclipper-1.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:29dae3e0296dff8502eeb7639fcfee794b0eec8590ba3563aee28db269da6b04", size = 117608, upload-time = "2025-12-01T13:15:32.69Z" }, + { url = "https://files.pythonhosted.org/packages/18/59/81050abdc9e5b90ffc2c765738c5e40e9abd8e44864aaa737b600f16c562/pyclipper-1.4.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:98b2a40f98e1fc1b29e8a6094072e7e0c7dfe901e573bf6cfc6eb7ce84a7ae87", size = 126495, upload-time = "2025-12-01T13:15:33.743Z" }, +] + [[package]] name = "pycparser" version = "2.23" @@ -4539,6 +4813,90 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" }, ] +[[package]] +name = "python-bidi" +version = "0.6.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ed/e3/c0c8bf6fca79ac946a28d57f116e3b9e5b10a4469b6f70bf73f3744c49bf/python_bidi-0.6.7.tar.gz", hash = "sha256:c10065081c0e137975de5d9ba2ff2306286dbf5e0c586d4d5aec87c856239b41", size = 45503, upload-time = "2025-10-22T09:52:49.624Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/c3/cdbece686fab47d4d04f2c15d372b3d3f3308da2e535657bf4bbd5afef50/python_bidi-0.6.7-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:94dbfd6a6ec0ae64b5262290bf014d6063f9ac8688bda9ec668dc175378d2c80", size = 274857, upload-time = "2025-10-22T09:51:57.298Z" }, + { url = "https://files.pythonhosted.org/packages/aa/19/1cd52f04345717613eafe8b23dd1ce8799116f7cc54b23aaefa27db298d6/python_bidi-0.6.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d8274ff02d447cca026ba00f56070ba15f95e184b2d028ee0e4b6c9813d2aaf9", size = 264682, upload-time = "2025-10-22T09:51:48.203Z" }, + { url = "https://files.pythonhosted.org/packages/c7/39/f46dae8bd298ffecaf169ea8871c1e63c6116e1b0178ca4eab2cb99d1c13/python_bidi-0.6.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24afff65c581a5d6f658a9ec027d6719d19a1d8a4401000fdb22d2eeb677b8e3", size = 293680, upload-time = "2025-10-22T09:50:57.091Z" }, + { url = "https://files.pythonhosted.org/packages/96/ed/c4e2c684bf8f226de4d0070780073fc7f3f97def3ad06f11b4c021bfa965/python_bidi-0.6.7-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8678c2272e7bd60a75f781409e900c9ddb9f01f55c625d83ae0d49dfc6a2674f", size = 302625, upload-time = "2025-10-22T09:51:05.378Z" }, + { url = "https://files.pythonhosted.org/packages/83/fa/3b5be9187515a4c28ad358c2f2785f968d4de090389f08a11c826ae1c17f/python_bidi-0.6.7-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d4cd82e65b5aeb31bd73534e61ece1cab625f4bcbdc13bc4ddc5f8cbfb37c24a", size = 441183, upload-time = "2025-10-22T09:51:14.014Z" }, + { url = "https://files.pythonhosted.org/packages/d7/c7/023028ca45e674b67abee29a049fb3b7aac74873181940a1d34ad27e23cd/python_bidi-0.6.7-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dde1c3f3edb1f0095dcbf79cf8a0bb768f9539e809d0ad010d78200eea97d42a", size = 326788, upload-time = "2025-10-22T09:51:22.58Z" }, + { url = "https://files.pythonhosted.org/packages/d3/30/0753601fdad405e806c89cfa9603ff75241f8c7196cfe2cb37c43e34cdbd/python_bidi-0.6.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c463ae15e94b1c6a8a50bd671d6166b0b0d779fd1e56cbf46d8a4a84c9aa2d0", size = 302036, upload-time = "2025-10-22T09:51:40.341Z" }, + { url = "https://files.pythonhosted.org/packages/c6/38/e83901206c7161e4fa14f52d1244eb54bad2b9a959be62af7b472cded20a/python_bidi-0.6.7-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6f9fa1257e075eeeed67d21f95e411036b7ca2b5c78f757d4ac66485c191720a", size = 315484, upload-time = "2025-10-22T09:51:32.285Z" }, + { url = "https://files.pythonhosted.org/packages/98/89/cd73185ad92990261b050a30753a693ad22a72ad5dc61b4e3845c58eff75/python_bidi-0.6.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9adeec7cab0f2c2c291bd7faf9fa3fa233365fd0bf1c1c27a6ddd6cc563d4b32", size = 474003, upload-time = "2025-10-22T09:52:06.535Z" }, + { url = "https://files.pythonhosted.org/packages/9f/38/03fd74c68cae08d08a32a4bc2031300a882a7ceab39b7e7fc5a5e37f5b7c/python_bidi-0.6.7-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:3b96744e4709f4445788a3645cea7ef8d7520ccd4fa8bbbfb3b650702e12c1e6", size = 567114, upload-time = "2025-10-22T09:52:17.534Z" }, + { url = "https://files.pythonhosted.org/packages/98/44/e196002ba8317d48ebab4750092a61287574195a3f685232059aa776edf4/python_bidi-0.6.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:8860d67dc04dc530b8b4f588f38b7341a76f2ec44a45685a2d54e9dcffa5d15a", size = 493810, upload-time = "2025-10-22T09:52:28.683Z" }, + { url = "https://files.pythonhosted.org/packages/e8/e2/1d495515d3fea0ecdd8bbb50e573282826ba074bceb2c0430206f94cde68/python_bidi-0.6.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a4319f478ab1b90bbbe9921606ecb7baa0ebf0b332e821d41c3abdf1a30f0c35", size = 465208, upload-time = "2025-10-22T09:52:39.411Z" }, + { url = "https://files.pythonhosted.org/packages/89/c7/fc5b25d017677793435c415c7884f9c60ce7705bd35565280cca3be69fa9/python_bidi-0.6.7-cp310-cp310-win32.whl", hash = "sha256:8d4e621caadfdbc73d36eabdb2f392da850d28c58b020738411d09dda6208509", size = 157426, upload-time = "2025-10-22T09:52:58.114Z" }, + { url = "https://files.pythonhosted.org/packages/85/be/bd323950b98d40ab45f97630c3bfb5ed3a7416b2f71c250bcc1ed1267eb0/python_bidi-0.6.7-cp310-cp310-win_amd64.whl", hash = "sha256:fd87d112eda1f0528074e1f7c0312881816cb75854133021124269a27c6c48dc", size = 161038, upload-time = "2025-10-22T09:52:50.44Z" }, + { url = "https://files.pythonhosted.org/packages/ec/de/c30a13ad95239507af472a5fc2cadd2e5e172055068f12ac39b37922c7f8/python_bidi-0.6.7-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a8892a7da0f617135fe9c92dc7070d13a0f96ab3081f9db7ff5b172a3905bd78", size = 274420, upload-time = "2025-10-22T09:51:58.262Z" }, + { url = "https://files.pythonhosted.org/packages/ad/9f/be5efef7eea5f1e2a6415c4052a988f594dcf5a11a15103f2718d324a35b/python_bidi-0.6.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:06650a164e63e94dc8a291cc9d415b4027cb1cce125bc9b02dac0f34d535ed47", size = 264586, upload-time = "2025-10-22T09:51:49.255Z" }, + { url = "https://files.pythonhosted.org/packages/87/ec/2c374b6de35870817ffb3512c0666ea8c3794ef923b5586c69451e0e5395/python_bidi-0.6.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6df7be07af867ec1d121c92ea827efad4d77b25457c06eeab477b601e82b2340", size = 293672, upload-time = "2025-10-22T09:50:58.504Z" }, + { url = "https://files.pythonhosted.org/packages/29/1a/722d7d7128bdc9a530351a0d2fdf2ff5f4af66a865a6bca925f99832e2cc/python_bidi-0.6.7-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:73a88dc333efc42281bd800d5182c8625c6e11d109fc183fe3d7a11d48ab1150", size = 302643, upload-time = "2025-10-22T09:51:06.419Z" }, + { url = "https://files.pythonhosted.org/packages/24/d7/5b9b593dd58fc745233d8476e9f4e0edd437547c78c58340619868470349/python_bidi-0.6.7-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f24189dc3aea3a0a94391a047076e1014306b39ba17d7a38ebab510553cd1a97", size = 441692, upload-time = "2025-10-22T09:51:15.39Z" }, + { url = "https://files.pythonhosted.org/packages/08/b9/16e7a1db5f022da6654e89875d231ec2e044d42ef7b635feeff61cee564c/python_bidi-0.6.7-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a507fe6928a27a308e04ebf2065719b7850d1bf9ff1924f4e601ef77758812bd", size = 326933, upload-time = "2025-10-22T09:51:23.631Z" }, + { url = "https://files.pythonhosted.org/packages/e0/a6/45aaec301292c6a07a9cc3168f5d1a92c8adc2ef36a3cd1f227b9caa980c/python_bidi-0.6.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fbbffb948a32f9783d1a28bc0c53616f0a76736ed1e7c1d62e3e99a8dfaab869", size = 302034, upload-time = "2025-10-22T09:51:41.347Z" }, + { url = "https://files.pythonhosted.org/packages/71/a3/7e42cce6e153c21b4e5cc96d429a5910909823f6fedd174b64ff67bc76a7/python_bidi-0.6.7-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f7e507e1e798ebca77ddc9774fd405107833315ad802cfdaa1ab07b6d9154fc8", size = 315738, upload-time = "2025-10-22T09:51:33.409Z" }, + { url = "https://files.pythonhosted.org/packages/43/7c/a5e4c0acc8e6ca61953b4add0576f0483f63b809b5389154e5da13927b0b/python_bidi-0.6.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:849a57d39feaf897955d0b19bbf4796bea53d1bcdf83b82e0a7b059167eb2049", size = 473968, upload-time = "2025-10-22T09:52:07.624Z" }, + { url = "https://files.pythonhosted.org/packages/b1/aa/a18bc3cbab7a0e598cbe7b89f2c0913aedcc66dcafce9a4c357465c87859/python_bidi-0.6.7-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:5ebc19f24e65a1f5c472e26d88e78b9d316e293bc6f205f32de4c4e99276336e", size = 567038, upload-time = "2025-10-22T09:52:18.594Z" }, + { url = "https://files.pythonhosted.org/packages/92/46/fc6c54a8b5bfbee50e650f885ddef4f8c4f92880467ea0bc2bf133747048/python_bidi-0.6.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:24388c77cb00b8aa0f9c84beb7e3e523a3dac4f786ece64a1d8175a07b24da72", size = 493970, upload-time = "2025-10-22T09:52:29.815Z" }, + { url = "https://files.pythonhosted.org/packages/e3/f1/2c15f5b938b2e087e4e950cc14dcead5bedbaabfc6c576dac15739bc0c91/python_bidi-0.6.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:19737d217088ef27014f98eac1827c5913e6fb1dea96332ed84ede61791070d9", size = 465161, upload-time = "2025-10-22T09:52:40.517Z" }, + { url = "https://files.pythonhosted.org/packages/56/d7/73a70a1fb819152485521b8dfe627e14ba9d3d5a65213244ab099adf3600/python_bidi-0.6.7-cp311-cp311-win32.whl", hash = "sha256:95c9de7ebc55ffb777548f2ecaf4b96b0fa0c92f42bf4d897b9f4cd164ec7394", size = 157033, upload-time = "2025-10-22T09:52:59.228Z" }, + { url = "https://files.pythonhosted.org/packages/68/84/06999dc54ea047fe33209af7150df4202ab7ad52deeb66b2c2040ac07884/python_bidi-0.6.7-cp311-cp311-win_amd64.whl", hash = "sha256:898db0ea3e4aaa95b7fecba02a7560dfbf368f9d85053f2875f6d610c4d4ec2c", size = 161282, upload-time = "2025-10-22T09:52:51.467Z" }, + { url = "https://files.pythonhosted.org/packages/e5/03/5b2f3e73501d0f41ebc2b075b49473047c6cdfc3465cf890263fc69e3915/python_bidi-0.6.7-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:11c51579e01f768446a7e13a0059fea1530936a707abcbeaad9467a55cb16073", size = 272536, upload-time = "2025-10-22T09:51:59.721Z" }, + { url = "https://files.pythonhosted.org/packages/31/77/c6048e938a73e5a7c6fa3d5e3627a5961109daa728c2e7d050567cecdc26/python_bidi-0.6.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:47deaada8949af3a790f2cd73b613f9bfa153b4c9450f91c44a60c3109a81f73", size = 263258, upload-time = "2025-10-22T09:51:50.328Z" }, + { url = "https://files.pythonhosted.org/packages/57/56/ed4dc501cab7de70ce35cd435c86278e4eb1caf238c80bc72297767c9219/python_bidi-0.6.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b38ddfab41d10e780edb431edc30aec89bee4ce43d718e3896e99f33dae5c1d3", size = 292700, upload-time = "2025-10-22T09:50:59.628Z" }, + { url = "https://files.pythonhosted.org/packages/77/6a/1bf06d7544c940ffddd97cd0e02c55348a92163c5495fa18e34217dfbebe/python_bidi-0.6.7-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2a93b0394cc684d64356b0475858c116f1e335ffbaba388db93bf47307deadfa", size = 300881, upload-time = "2025-10-22T09:51:07.507Z" }, + { url = "https://files.pythonhosted.org/packages/22/1d/ce7577a8f50291c06e94f651ac5de0d1678fc2642af26a5dad9901a0244f/python_bidi-0.6.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ec1694134961b71ac05241ac989b49ccf08e232b5834d5fc46f8a7c3bb1c13a9", size = 439125, upload-time = "2025-10-22T09:51:16.559Z" }, + { url = "https://files.pythonhosted.org/packages/a3/87/4cf6dcd58e22f0fd904e7a161c6b73a5f9d17d4d49073fcb089ba62f1469/python_bidi-0.6.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8047c33b85f7790474a1f488bef95689f049976a4e1c6f213a8d075d180a93e4", size = 325816, upload-time = "2025-10-22T09:51:25.12Z" }, + { url = "https://files.pythonhosted.org/packages/2a/0a/4028a088e29ce8f1673e85ec9f64204fc368355c3207e6a71619c2b4579a/python_bidi-0.6.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d9de35eb5987da27dd81e371c52142dd8e924bd61c1006003071ea05a735587", size = 300550, upload-time = "2025-10-22T09:51:42.739Z" }, + { url = "https://files.pythonhosted.org/packages/1f/05/cac15eba462d5a2407ac4ef1c792c45a948652b00c6bd81eaab3834a62d2/python_bidi-0.6.7-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a99d898ad1a399d9c8cab5561b3667fd24f4385820ac90c3340aa637aa5adfc9", size = 313017, upload-time = "2025-10-22T09:51:34.905Z" }, + { url = "https://files.pythonhosted.org/packages/4b/b1/3ba91b9ea60fa54a9aa730a5fe432bd73095d55be371244584fc6818eae1/python_bidi-0.6.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5debaab33562fdfc79ffdbd8d9c51cf07b8529de0e889d8cd145d78137aab21e", size = 472798, upload-time = "2025-10-22T09:52:09.079Z" }, + { url = "https://files.pythonhosted.org/packages/50/40/4bf5fb7255e35c218174f322a4d4c80b63b2604d73adc6e32f843e700824/python_bidi-0.6.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:c11c62a3cdb9d1426b1536de9e3446cb09c7d025bd4df125275cae221f214899", size = 565234, upload-time = "2025-10-22T09:52:19.703Z" }, + { url = "https://files.pythonhosted.org/packages/bd/81/ad23fb85bff69d0a25729cd3834254b87c3c7caa93d657c8f8edcbed08f6/python_bidi-0.6.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6c051f2d28ca542092d01da8b5fe110fb6191ff58d298a54a93dc183bece63bf", size = 491844, upload-time = "2025-10-22T09:52:31.216Z" }, + { url = "https://files.pythonhosted.org/packages/65/85/103baaf142b2838f583b71904a2454fa31bd2a912ff505c25874f45d6c3e/python_bidi-0.6.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:95867a07c5dee0ea2340fe1d0e4f6d9f5c5687d473193b6ee6f86fa44aac45d1", size = 463753, upload-time = "2025-10-22T09:52:41.943Z" }, + { url = "https://files.pythonhosted.org/packages/54/c3/6a5c3b9f42a6b188430c83a7e70a76bc7c0db3354302fce7c8ed94a0c062/python_bidi-0.6.7-cp312-cp312-win32.whl", hash = "sha256:4c73cd980d45bb967799c7f0fc98ea93ae3d65b21ef2ba6abef6a057720bf483", size = 155820, upload-time = "2025-10-22T09:53:00.254Z" }, + { url = "https://files.pythonhosted.org/packages/45/c4/683216398ee3abf6b9bb0f26ae15c696fabbe36468ba26d5271f0c11b343/python_bidi-0.6.7-cp312-cp312-win_amd64.whl", hash = "sha256:d524a4ba765bae9b950706472a77a887a525ed21144fe4b41f6190f6e57caa2c", size = 159966, upload-time = "2025-10-22T09:52:52.547Z" }, + { url = "https://files.pythonhosted.org/packages/25/a5/8ad0a448d42fd5d01dd127c1dc5ab974a8ea6e20305ac89a3356dacd3bdf/python_bidi-0.6.7-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1c061207212cd1db27bf6140b96dcd0536246f1e13e99bb5d03f4632f8e2ad7f", size = 272129, upload-time = "2025-10-22T09:52:00.761Z" }, + { url = "https://files.pythonhosted.org/packages/e6/c0/a13981fc0427a0d35e96fc4e31fbb0f981b28d0ce08416f98f42d51ea3bc/python_bidi-0.6.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a2eb8fca918c7381531035c3aae31c29a1c1300ab8a63cad1ec3a71331096c78", size = 263174, upload-time = "2025-10-22T09:51:51.401Z" }, + { url = "https://files.pythonhosted.org/packages/9c/32/74034239d0bca32c315cac5c3ec07ef8eb44fa0e8cea1585cad85f5b8651/python_bidi-0.6.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:414004fe9cba33d288ff4a04e1c9afe6a737f440595d01b5bbed00d750296bbd", size = 292496, upload-time = "2025-10-22T09:51:00.708Z" }, + { url = "https://files.pythonhosted.org/packages/83/fa/d6c853ed2668b1c12d66e71d4f843d0710d1ccaecc17ce09b35d2b1382a7/python_bidi-0.6.7-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5013ba963e9da606c4c03958cc737ebd5f8b9b8404bd71ab0d580048c746f875", size = 300727, upload-time = "2025-10-22T09:51:09.152Z" }, + { url = "https://files.pythonhosted.org/packages/9c/8d/55685bddfc1fbfa6e28e1c0be7df4023e504de7d2ac1355a3fa610836bc1/python_bidi-0.6.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad5f0847da00687f52d2b81828e8d887bdea9eb8686a9841024ea7a0e153028e", size = 438823, upload-time = "2025-10-22T09:51:17.844Z" }, + { url = "https://files.pythonhosted.org/packages/9f/54/db9e70443f89e3ec6fa70dcd16809c3656d1efe7946076dcd59832f722df/python_bidi-0.6.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:26a8fe0d532b966708fc5f8aea0602107fde4745a8a5ae961edd3cf02e807d07", size = 325721, upload-time = "2025-10-22T09:51:26.132Z" }, + { url = "https://files.pythonhosted.org/packages/55/c5/98ac9c00f17240f9114c756791f0cd9ba59a5d4b5d84fd1a6d0d50604e82/python_bidi-0.6.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6323e943c7672b271ad9575a2232508f17e87e81a78d7d10d6e93040e210eddf", size = 300493, upload-time = "2025-10-22T09:51:43.783Z" }, + { url = "https://files.pythonhosted.org/packages/0b/cb/382538dd7c656eb50408802b9a9466dbd3432bea059410e65a6c14bc79f9/python_bidi-0.6.7-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:349b89c3110bd25aa56d79418239ca4785d4bcc7a596e63bb996a9696fc6a907", size = 312889, upload-time = "2025-10-22T09:51:36.011Z" }, + { url = "https://files.pythonhosted.org/packages/50/8d/dbc784cecd9b2950ba99c8fef0387ae588837e4e2bfd543be191d18bf9f6/python_bidi-0.6.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e7cad66317f12f0fd755fe41ee7c6b06531d2189a9048a8f37addb5109f7e3e3", size = 472798, upload-time = "2025-10-22T09:52:10.446Z" }, + { url = "https://files.pythonhosted.org/packages/83/e6/398d59075265717d2950622ede1d366aff88ffcaa67a30b85709dea72206/python_bidi-0.6.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:49639743f1230648fd4fb47547f8a48ada9c5ca1426b17ac08e3be607c65394c", size = 564974, upload-time = "2025-10-22T09:52:22.416Z" }, + { url = "https://files.pythonhosted.org/packages/7c/8e/2b939be0651bc2b69c234dc700723a26b93611d5bdd06b253d67d9da3557/python_bidi-0.6.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4636d572b357ab9f313c5340915c1cf51e3e54dd069351e02b6b76577fd1a854", size = 491711, upload-time = "2025-10-22T09:52:32.322Z" }, + { url = "https://files.pythonhosted.org/packages/8f/05/f53739ab2ce2eee0c855479a31b64933f6ff6164f3ddc611d04e4b79d922/python_bidi-0.6.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d7310312a68fdb1a8249cf114acb5435aa6b6a958b15810f053c1df5f98476e4", size = 463536, upload-time = "2025-10-22T09:52:43.142Z" }, + { url = "https://files.pythonhosted.org/packages/77/c6/800899e2764f723c2ea9172eabcc1a31ffb8b4bb71ea5869158fd83bd437/python_bidi-0.6.7-cp313-cp313-win32.whl", hash = "sha256:ec985386bc3cd54155f2ef0434fccbfd743617ed6fc1a84dae2ab1de6062e0c6", size = 155786, upload-time = "2025-10-22T09:53:01.357Z" }, + { url = "https://files.pythonhosted.org/packages/30/ba/a811c12c1a4b8fa7c0c0963d92c042284c2049b1586615af6b1774b786d9/python_bidi-0.6.7-cp313-cp313-win_amd64.whl", hash = "sha256:f57726b5a90d818625e6996f5116971b7a4ceb888832337d0e2cf43d1c362a90", size = 159863, upload-time = "2025-10-22T09:52:53.537Z" }, + { url = "https://files.pythonhosted.org/packages/6f/a5/cda302126e878be162bf183eb0bd6dc47ca3e680fb52111e49c62a8ea1eb/python_bidi-0.6.7-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:b0bee27fb596a0f518369c275a965d0448c39a0730e53a030b311bb10562d4d5", size = 271899, upload-time = "2025-10-22T09:52:01.758Z" }, + { url = "https://files.pythonhosted.org/packages/4d/4b/9c15ca0fe795a5c55a39daa391524ac74e26d9187493632d455257771023/python_bidi-0.6.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:6c19ab378fefb1f09623f583fcfa12ed42369a998ddfbd39c40908397243c56b", size = 262235, upload-time = "2025-10-22T09:51:52.379Z" }, + { url = "https://files.pythonhosted.org/packages/0f/5e/25b25be64bff05272aa28d8bef2fbbad8415db3159a41703eb2e63dc9824/python_bidi-0.6.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:630cee960ba9e3016f95a8e6f725a621ddeff6fd287839f5693ccfab3f3a9b5c", size = 471983, upload-time = "2025-10-22T09:52:12.182Z" }, + { url = "https://files.pythonhosted.org/packages/4d/78/a9363f5da1b10d9211514b96ea47ecc95c797ed5ac566684bfece0666082/python_bidi-0.6.7-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:0dbb4bbae212cca5bcf6e522fe8f572aff7d62544557734c2f810ded844d9eea", size = 565016, upload-time = "2025-10-22T09:52:23.515Z" }, + { url = "https://files.pythonhosted.org/packages/0d/ed/37dcb7d3dc250ecdff8120b026c37fcdbeada4111e4d7148c053180bcf54/python_bidi-0.6.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:1dd0a5ec0d8710905cebb4c9e5018aa8464395a33cb32a3a6c2a951bf1984fe5", size = 491180, upload-time = "2025-10-22T09:52:33.505Z" }, + { url = "https://files.pythonhosted.org/packages/40/a3/50d1f6060a7a500768768f5f8735cb68deba36391248dbf13d5d2c9c0885/python_bidi-0.6.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4ea928c31c7364098f853f122868f6f2155d6840661f7ea8b2ccfdf6084eb9f4", size = 463126, upload-time = "2025-10-22T09:52:44.28Z" }, + { url = "https://files.pythonhosted.org/packages/d2/47/712cd7d1068795c57fdf6c4acca00716688aa8b4e353b30de2ed8f599fd6/python_bidi-0.6.7-cp314-cp314-win32.whl", hash = "sha256:f7c055a50d068b3a924bd33a327646346839f55bcb762a26ec3fde8ea5d40564", size = 155793, upload-time = "2025-10-22T09:53:02.7Z" }, + { url = "https://files.pythonhosted.org/packages/c3/e8/1f86bf699b20220578351f9b7b635ed8b6e84dd51ad3cca08b89513ae971/python_bidi-0.6.7-cp314-cp314-win_amd64.whl", hash = "sha256:8a17631e3e691eec4ae6a370f7b035cf0a5767f4457bd615d11728c23df72e43", size = 159821, upload-time = "2025-10-22T09:52:54.95Z" }, + { url = "https://files.pythonhosted.org/packages/b8/4e/6135798d84b62eea70c0f9435301c2a4ba854e87be93a3fcd1d935266d24/python_bidi-0.6.7-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c9a679b24f5c6f366a0dec75745e1abeae2f597f033d0d54c74cbe62e7e6ae28", size = 276275, upload-time = "2025-10-22T09:52:05.078Z" }, + { url = "https://files.pythonhosted.org/packages/74/83/2123596d43e552af9e2806e361646fa579f34a1d1e9e2c1707a0ab6a02dd/python_bidi-0.6.7-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:05fe5971110013610f0db40505d0b204edc756e92eafac1372a464f8b9162b11", size = 266951, upload-time = "2025-10-22T09:51:56.216Z" }, + { url = "https://files.pythonhosted.org/packages/5c/8c/8d1e1501717227a6d52fc7b9c47a3de61486b024fbdd4821bfad724c0699/python_bidi-0.6.7-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17572944e6d8fb616d111fc702c759da2bf7cedab85a3e4fa2af0c9eb95ed438", size = 295745, upload-time = "2025-10-22T09:51:04.438Z" }, + { url = "https://files.pythonhosted.org/packages/fd/ff/ef04e7f9067c2c5d862b9f8d9a192486c500c8aa295f0fb756c25ab47fc8/python_bidi-0.6.7-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3b63d19f3f56ff7f99bce5ca9ef8c811dbf0f509d8e84c1bc06105ed26a49528", size = 304123, upload-time = "2025-10-22T09:51:12.559Z" }, + { url = "https://files.pythonhosted.org/packages/be/72/b973895e257a7d4cc8365ab094612f6ee885df863a4964d8865b9f534b67/python_bidi-0.6.7-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f1350033431d75be749273236dcfc808e54404cd6ece6204cdb1bc4ccc163455", size = 442484, upload-time = "2025-10-22T09:51:21.575Z" }, + { url = "https://files.pythonhosted.org/packages/c1/1a/68ca9d10bc309828e8cdb2d57a30dd7e5753ac8520c8d7a0322daeb9eef7/python_bidi-0.6.7-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c5fb99f774748de283fadf915106f130b74be1bade934b7f73a7a8488b95da1", size = 329149, upload-time = "2025-10-22T09:51:31.232Z" }, + { url = "https://files.pythonhosted.org/packages/03/40/ab450c06167a7de596d99b1ba5cee2c605b3ff184baccf08210ede706b1b/python_bidi-0.6.7-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d28e2bdcadf5b6161bb4ee9313ce41eac746ba57e744168bf723a415a11af05", size = 303529, upload-time = "2025-10-22T09:51:46.997Z" }, + { url = "https://files.pythonhosted.org/packages/ec/c5/585b5c413e3b77a32500fb877ea30aa23c45a6064dbd7fe77d87b72cd90b/python_bidi-0.6.7-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c3777ae3e088e94df854fbcbd8d59f9239b74aac036cb6bbd19f8035c8e42478", size = 317753, upload-time = "2025-10-22T09:51:39.272Z" }, + { url = "https://files.pythonhosted.org/packages/f9/05/b7b4b447890d614ccb40633f4d65f334bcf9fe3ad13be33aaa54dcbc34f3/python_bidi-0.6.7-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:77bb4cbadf4121db395189065c58c9dd5d1950257cc1983004e6df4a3e2f97ad", size = 476054, upload-time = "2025-10-22T09:52:15.856Z" }, + { url = "https://files.pythonhosted.org/packages/ca/94/64f6d2c09c4426918345b54ca8902f94b663eadd744c9dd89070f546c9bc/python_bidi-0.6.7-pp311-pypy311_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:f1fe71c203f66bc169a393964d5702f9251cfd4d70279cb6453fdd42bd2e675f", size = 568365, upload-time = "2025-10-22T09:52:27.556Z" }, + { url = "https://files.pythonhosted.org/packages/fc/d2/c39a6b82aa0fcedac7cbe6078b78bb9089b43d903f8e00859e42b504bb8e/python_bidi-0.6.7-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:d87ed09e5c9b6d2648e8856a4e556147b9d3cd4d63905fa664dd6706bc414256", size = 495292, upload-time = "2025-10-22T09:52:38.306Z" }, + { url = "https://files.pythonhosted.org/packages/0a/8d/a80f37ab92118e305d7b574306553599f81534c50b4eb23ef34ebe09c09c/python_bidi-0.6.7-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:766d5f5a686eb99b53168a7bdfb338035931a609bdbbcb537cef9e050a86f359", size = 467159, upload-time = "2025-10-22T09:52:48.603Z" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -5063,6 +5421,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/58/5b/632a58724221ef03d78ab65062e82a1010e1bef8e8e0b9d7c6d7b8044841/safetensors-0.7.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:473b32699f4200e69801bf5abf93f1a4ecd432a70984df164fc22ccf39c4a6f3", size = 531885, upload-time = "2025-11-19T15:18:27.146Z" }, ] +[[package]] +name = "scenedetect" +version = "0.6.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "platformdirs" }, + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bd/b1/800d4c1d4da24cd673b921c0b5ffd5bbdcaa2a7f4f4dd86dd2c202a673c6/scenedetect-0.6.7.tar.gz", hash = "sha256:1a2c73b57de2e1656f7896edc8523de7217f361179a8966e947f79d33e40830f", size = 164213, upload-time = "2025-08-25T03:37:24.124Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/e9/05a20eaeed21d2e0761fc4d3819f1f5013a49945133323ba0ce7be8be291/scenedetect-0.6.7-py3-none-any.whl", hash = "sha256:935571453142f5d7d44a8d9bb713fdd89bdb69efdbce92c7dfe09d52c523ac2b", size = 130834, upload-time = "2025-08-25T03:37:22.8Z" }, +] + +[package.optional-dependencies] +opencv = [ + { name = "opencv-python" }, +] + [[package]] name = "schedule" version = "1.2.2" @@ -5072,6 +5451,120 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/a7/84c96b61fd13205f2cafbe263cdb2745965974bdf3e0078f121dfeca5f02/schedule-1.2.2-py3-none-any.whl", hash = "sha256:5bef4a2a0183abf44046ae0d164cadcac21b1db011bdd8102e4a0c1e91e06a7d", size = 12220, upload-time = "2024-05-25T18:41:59.121Z" }, ] +[[package]] +name = "scikit-image" +version = "0.25.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] +dependencies = [ + { name = "imageio", marker = "python_full_version < '3.11'" }, + { name = "lazy-loader", marker = "python_full_version < '3.11'" }, + { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "packaging", marker = "python_full_version < '3.11'" }, + { name = "pillow", marker = "python_full_version < '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "tifffile", version = "2025.5.10", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c7/a8/3c0f256012b93dd2cb6fda9245e9f4bff7dc0486880b248005f15ea2255e/scikit_image-0.25.2.tar.gz", hash = "sha256:e5a37e6cd4d0c018a7a55b9d601357e3382826d3888c10d0213fc63bff977dde", size = 22693594, upload-time = "2025-02-18T18:05:24.538Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/cb/016c63f16065c2d333c8ed0337e18a5cdf9bc32d402e4f26b0db362eb0e2/scikit_image-0.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d3278f586793176599df6a4cf48cb6beadae35c31e58dc01a98023af3dc31c78", size = 13988922, upload-time = "2025-02-18T18:04:11.069Z" }, + { url = "https://files.pythonhosted.org/packages/30/ca/ff4731289cbed63c94a0c9a5b672976603118de78ed21910d9060c82e859/scikit_image-0.25.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:5c311069899ce757d7dbf1d03e32acb38bb06153236ae77fcd820fd62044c063", size = 13192698, upload-time = "2025-02-18T18:04:15.362Z" }, + { url = "https://files.pythonhosted.org/packages/39/6d/a2aadb1be6d8e149199bb9b540ccde9e9622826e1ab42fe01de4c35ab918/scikit_image-0.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be455aa7039a6afa54e84f9e38293733a2622b8c2fb3362b822d459cc5605e99", size = 14153634, upload-time = "2025-02-18T18:04:18.496Z" }, + { url = "https://files.pythonhosted.org/packages/96/08/916e7d9ee4721031b2f625db54b11d8379bd51707afaa3e5a29aecf10bc4/scikit_image-0.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4c464b90e978d137330be433df4e76d92ad3c5f46a22f159520ce0fdbea8a09", size = 14767545, upload-time = "2025-02-18T18:04:22.556Z" }, + { url = "https://files.pythonhosted.org/packages/5f/ee/c53a009e3997dda9d285402f19226fbd17b5b3cb215da391c4ed084a1424/scikit_image-0.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:60516257c5a2d2f74387c502aa2f15a0ef3498fbeaa749f730ab18f0a40fd054", size = 12812908, upload-time = "2025-02-18T18:04:26.364Z" }, + { url = "https://files.pythonhosted.org/packages/c4/97/3051c68b782ee3f1fb7f8f5bb7d535cf8cb92e8aae18fa9c1cdf7e15150d/scikit_image-0.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f4bac9196fb80d37567316581c6060763b0f4893d3aca34a9ede3825bc035b17", size = 14003057, upload-time = "2025-02-18T18:04:30.395Z" }, + { url = "https://files.pythonhosted.org/packages/19/23/257fc696c562639826065514d551b7b9b969520bd902c3a8e2fcff5b9e17/scikit_image-0.25.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d989d64ff92e0c6c0f2018c7495a5b20e2451839299a018e0e5108b2680f71e0", size = 13180335, upload-time = "2025-02-18T18:04:33.449Z" }, + { url = "https://files.pythonhosted.org/packages/ef/14/0c4a02cb27ca8b1e836886b9ec7c9149de03053650e9e2ed0625f248dd92/scikit_image-0.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2cfc96b27afe9a05bc92f8c6235321d3a66499995675b27415e0d0c76625173", size = 14144783, upload-time = "2025-02-18T18:04:36.594Z" }, + { url = "https://files.pythonhosted.org/packages/dd/9b/9fb556463a34d9842491d72a421942c8baff4281025859c84fcdb5e7e602/scikit_image-0.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24cc986e1f4187a12aa319f777b36008764e856e5013666a4a83f8df083c2641", size = 14785376, upload-time = "2025-02-18T18:04:39.856Z" }, + { url = "https://files.pythonhosted.org/packages/de/ec/b57c500ee85885df5f2188f8bb70398481393a69de44a00d6f1d055f103c/scikit_image-0.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:b4f6b61fc2db6340696afe3db6b26e0356911529f5f6aee8c322aa5157490c9b", size = 12791698, upload-time = "2025-02-18T18:04:42.868Z" }, + { url = "https://files.pythonhosted.org/packages/35/8c/5df82881284459f6eec796a5ac2a0a304bb3384eec2e73f35cfdfcfbf20c/scikit_image-0.25.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8db8dd03663112783221bf01ccfc9512d1cc50ac9b5b0fe8f4023967564719fb", size = 13986000, upload-time = "2025-02-18T18:04:47.156Z" }, + { url = "https://files.pythonhosted.org/packages/ce/e6/93bebe1abcdce9513ffec01d8af02528b4c41fb3c1e46336d70b9ed4ef0d/scikit_image-0.25.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:483bd8cc10c3d8a7a37fae36dfa5b21e239bd4ee121d91cad1f81bba10cfb0ed", size = 13235893, upload-time = "2025-02-18T18:04:51.049Z" }, + { url = "https://files.pythonhosted.org/packages/53/4b/eda616e33f67129e5979a9eb33c710013caa3aa8a921991e6cc0b22cea33/scikit_image-0.25.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d1e80107bcf2bf1291acfc0bf0425dceb8890abe9f38d8e94e23497cbf7ee0d", size = 14178389, upload-time = "2025-02-18T18:04:54.245Z" }, + { url = "https://files.pythonhosted.org/packages/6b/b5/b75527c0f9532dd8a93e8e7cd8e62e547b9f207d4c11e24f0006e8646b36/scikit_image-0.25.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a17e17eb8562660cc0d31bb55643a4da996a81944b82c54805c91b3fe66f4824", size = 15003435, upload-time = "2025-02-18T18:04:57.586Z" }, + { url = "https://files.pythonhosted.org/packages/34/e3/49beb08ebccda3c21e871b607c1cb2f258c3fa0d2f609fed0a5ba741b92d/scikit_image-0.25.2-cp312-cp312-win_amd64.whl", hash = "sha256:bdd2b8c1de0849964dbc54037f36b4e9420157e67e45a8709a80d727f52c7da2", size = 12899474, upload-time = "2025-02-18T18:05:01.166Z" }, + { url = "https://files.pythonhosted.org/packages/e6/7c/9814dd1c637f7a0e44342985a76f95a55dd04be60154247679fd96c7169f/scikit_image-0.25.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7efa888130f6c548ec0439b1a7ed7295bc10105458a421e9bf739b457730b6da", size = 13921841, upload-time = "2025-02-18T18:05:03.963Z" }, + { url = "https://files.pythonhosted.org/packages/84/06/66a2e7661d6f526740c309e9717d3bd07b473661d5cdddef4dd978edab25/scikit_image-0.25.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:dd8011efe69c3641920614d550f5505f83658fe33581e49bed86feab43a180fc", size = 13196862, upload-time = "2025-02-18T18:05:06.986Z" }, + { url = "https://files.pythonhosted.org/packages/4e/63/3368902ed79305f74c2ca8c297dfeb4307269cbe6402412668e322837143/scikit_image-0.25.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28182a9d3e2ce3c2e251383bdda68f8d88d9fff1a3ebe1eb61206595c9773341", size = 14117785, upload-time = "2025-02-18T18:05:10.69Z" }, + { url = "https://files.pythonhosted.org/packages/cd/9b/c3da56a145f52cd61a68b8465d6a29d9503bc45bc993bb45e84371c97d94/scikit_image-0.25.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8abd3c805ce6944b941cfed0406d88faeb19bab3ed3d4b50187af55cf24d147", size = 14977119, upload-time = "2025-02-18T18:05:13.871Z" }, + { url = "https://files.pythonhosted.org/packages/8a/97/5fcf332e1753831abb99a2525180d3fb0d70918d461ebda9873f66dcc12f/scikit_image-0.25.2-cp313-cp313-win_amd64.whl", hash = "sha256:64785a8acefee460ec49a354706db0b09d1f325674107d7fa3eadb663fb56d6f", size = 12885116, upload-time = "2025-02-18T18:05:17.844Z" }, + { url = "https://files.pythonhosted.org/packages/10/cc/75e9f17e3670b5ed93c32456fda823333c6279b144cd93e2c03aa06aa472/scikit_image-0.25.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:330d061bd107d12f8d68f1d611ae27b3b813b8cdb0300a71d07b1379178dd4cd", size = 13862801, upload-time = "2025-02-18T18:05:20.783Z" }, +] + +[[package]] +name = "scikit-image" +version = "0.26.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.14'", + "python_full_version == '3.13.*'", + "python_full_version == '3.12.*'", + "python_full_version == '3.11.*'", +] +dependencies = [ + { name = "imageio", marker = "python_full_version >= '3.11'" }, + { name = "lazy-loader", marker = "python_full_version >= '3.11'" }, + { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "packaging", marker = "python_full_version >= '3.11'" }, + { name = "pillow", marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.17.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "tifffile", version = "2026.2.24", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/b4/2528bb43c67d48053a7a649a9666432dc307d66ba02e3a6d5c40f46655df/scikit_image-0.26.0.tar.gz", hash = "sha256:f5f970ab04efad85c24714321fcc91613fcb64ef2a892a13167df2f3e59199fa", size = 22729739, upload-time = "2025-12-20T17:12:21.824Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/16/8a407688b607f86f81f8c649bf0d68a2a6d67375f18c2d660aba20f5b648/scikit_image-0.26.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b1ede33a0fb3731457eaf53af6361e73dd510f449dac437ab54573b26788baf0", size = 12355510, upload-time = "2025-12-20T17:10:31.628Z" }, + { url = "https://files.pythonhosted.org/packages/6b/f9/7efc088ececb6f6868fd4475e16cfafc11f242ce9ab5fc3557d78b5da0d4/scikit_image-0.26.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7af7aa331c6846bd03fa28b164c18d0c3fd419dbb888fb05e958ac4257a78fdd", size = 12056334, upload-time = "2025-12-20T17:10:34.559Z" }, + { url = "https://files.pythonhosted.org/packages/9f/1e/bc7fb91fb5ff65ef42346c8b7ee8b09b04eabf89235ab7dbfdfd96cbd1ea/scikit_image-0.26.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9ea6207d9e9d21c3f464efe733121c0504e494dbdc7728649ff3e23c3c5a4953", size = 13297768, upload-time = "2025-12-20T17:10:37.733Z" }, + { url = "https://files.pythonhosted.org/packages/a5/2a/e71c1a7d90e70da67b88ccc609bd6ae54798d5847369b15d3a8052232f9d/scikit_image-0.26.0-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74aa5518ccea28121f57a95374581d3b979839adc25bb03f289b1bc9b99c58af", size = 13711217, upload-time = "2025-12-20T17:10:40.935Z" }, + { url = "https://files.pythonhosted.org/packages/d4/59/9637ee12c23726266b91296791465218973ce1ad3e4c56fc81e4d8e7d6e1/scikit_image-0.26.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d5c244656de905e195a904e36dbc18585e06ecf67d90f0482cbde63d7f9ad59d", size = 14337782, upload-time = "2025-12-20T17:10:43.452Z" }, + { url = "https://files.pythonhosted.org/packages/e7/5c/a3e1e0860f9294663f540c117e4bf83d55e5b47c281d475cc06227e88411/scikit_image-0.26.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:21a818ee6ca2f2131b9e04d8eb7637b5c18773ebe7b399ad23dcc5afaa226d2d", size = 14805997, upload-time = "2025-12-20T17:10:45.93Z" }, + { url = "https://files.pythonhosted.org/packages/d3/c6/2eeacf173da041a9e388975f54e5c49df750757fcfc3ee293cdbbae1ea0a/scikit_image-0.26.0-cp311-cp311-win_amd64.whl", hash = "sha256:9490360c8d3f9a7e85c8de87daf7c0c66507960cf4947bb9610d1751928721c7", size = 11878486, upload-time = "2025-12-20T17:10:48.246Z" }, + { url = "https://files.pythonhosted.org/packages/c3/a4/a852c4949b9058d585e762a66bf7e9a2cd3be4795cd940413dfbfbb0ce79/scikit_image-0.26.0-cp311-cp311-win_arm64.whl", hash = "sha256:0baa0108d2d027f34d748e84e592b78acc23e965a5de0e4bb03cf371de5c0581", size = 11346518, upload-time = "2025-12-20T17:10:50.575Z" }, + { url = "https://files.pythonhosted.org/packages/99/e8/e13757982264b33a1621628f86b587e9a73a13f5256dad49b19ba7dc9083/scikit_image-0.26.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d454b93a6fa770ac5ae2d33570f8e7a321bb80d29511ce4b6b78058ebe176e8c", size = 12376452, upload-time = "2025-12-20T17:10:52.796Z" }, + { url = "https://files.pythonhosted.org/packages/e3/be/f8dd17d0510f9911f9f17ba301f7455328bf13dae416560126d428de9568/scikit_image-0.26.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3409e89d66eff5734cd2b672d1c48d2759360057e714e1d92a11df82c87cba37", size = 12061567, upload-time = "2025-12-20T17:10:55.207Z" }, + { url = "https://files.pythonhosted.org/packages/b3/2b/c70120a6880579fb42b91567ad79feb4772f7be72e8d52fec403a3dde0c6/scikit_image-0.26.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c717490cec9e276afb0438dd165b7c3072d6c416709cc0f9f5a4c1070d23a44", size = 13084214, upload-time = "2025-12-20T17:10:57.468Z" }, + { url = "https://files.pythonhosted.org/packages/f4/a2/70401a107d6d7466d64b466927e6b96fcefa99d57494b972608e2f8be50f/scikit_image-0.26.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7df650e79031634ac90b11e64a9eedaf5a5e06fcd09bcd03a34be01745744466", size = 13561683, upload-time = "2025-12-20T17:10:59.49Z" }, + { url = "https://files.pythonhosted.org/packages/13/a5/48bdfd92794c5002d664e0910a349d0a1504671ef5ad358150f21643c79a/scikit_image-0.26.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cefd85033e66d4ea35b525bb0937d7f42d4cdcfed2d1888e1570d5ce450d3932", size = 14112147, upload-time = "2025-12-20T17:11:02.083Z" }, + { url = "https://files.pythonhosted.org/packages/ee/b5/ac71694da92f5def5953ca99f18a10fe98eac2dd0a34079389b70b4d0394/scikit_image-0.26.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3f5bf622d7c0435884e1e141ebbe4b2804e16b2dd23ae4c6183e2ea99233be70", size = 14661625, upload-time = "2025-12-20T17:11:04.528Z" }, + { url = "https://files.pythonhosted.org/packages/23/4d/a3cc1e96f080e253dad2251bfae7587cf2b7912bcd76fd43fd366ff35a87/scikit_image-0.26.0-cp312-cp312-win_amd64.whl", hash = "sha256:abed017474593cd3056ae0fe948d07d0747b27a085e92df5474f4955dd65aec0", size = 11911059, upload-time = "2025-12-20T17:11:06.61Z" }, + { url = "https://files.pythonhosted.org/packages/35/8a/d1b8055f584acc937478abf4550d122936f420352422a1a625eef2c605d8/scikit_image-0.26.0-cp312-cp312-win_arm64.whl", hash = "sha256:4d57e39ef67a95d26860c8caf9b14b8fb130f83b34c6656a77f191fa6d1d04d8", size = 11348740, upload-time = "2025-12-20T17:11:09.118Z" }, + { url = "https://files.pythonhosted.org/packages/4f/48/02357ffb2cca35640f33f2cfe054a4d6d5d7a229b88880a64f1e45c11f4e/scikit_image-0.26.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a2e852eccf41d2d322b8e60144e124802873a92b8d43a6f96331aa42888491c7", size = 12346329, upload-time = "2025-12-20T17:11:11.599Z" }, + { url = "https://files.pythonhosted.org/packages/67/b9/b792c577cea2c1e94cda83b135a656924fc57c428e8a6d302cd69aac1b60/scikit_image-0.26.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:98329aab3bc87db352b9887f64ce8cdb8e75f7c2daa19927f2e121b797b678d5", size = 12031726, upload-time = "2025-12-20T17:11:13.871Z" }, + { url = "https://files.pythonhosted.org/packages/07/a9/9564250dfd65cb20404a611016db52afc6268b2b371cd19c7538ea47580f/scikit_image-0.26.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:915bb3ba66455cf8adac00dc8fdf18a4cd29656aec7ddd38cb4dda90289a6f21", size = 13094910, upload-time = "2025-12-20T17:11:16.2Z" }, + { url = "https://files.pythonhosted.org/packages/a3/b8/0d8eeb5a9fd7d34ba84f8a55753a0a3e2b5b51b2a5a0ade648a8db4a62f7/scikit_image-0.26.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b36ab5e778bf50af5ff386c3ac508027dc3aaeccf2161bdf96bde6848f44d21b", size = 13660939, upload-time = "2025-12-20T17:11:18.464Z" }, + { url = "https://files.pythonhosted.org/packages/2f/d6/91d8973584d4793d4c1a847d388e34ef1218d835eeddecfc9108d735b467/scikit_image-0.26.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:09bad6a5d5949c7896c8347424c4cca899f1d11668030e5548813ab9c2865dcb", size = 14138938, upload-time = "2025-12-20T17:11:20.919Z" }, + { url = "https://files.pythonhosted.org/packages/39/9a/7e15d8dc10d6bbf212195fb39bdeb7f226c46dd53f9c63c312e111e2e175/scikit_image-0.26.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:aeb14db1ed09ad4bee4ceb9e635547a8d5f3549be67fc6c768c7f923e027e6cd", size = 14752243, upload-time = "2025-12-20T17:11:23.347Z" }, + { url = "https://files.pythonhosted.org/packages/8f/58/2b11b933097bc427e42b4a8b15f7de8f24f2bac1fd2779d2aea1431b2c31/scikit_image-0.26.0-cp313-cp313-win_amd64.whl", hash = "sha256:ac529eb9dbd5954f9aaa2e3fe9a3fd9661bfe24e134c688587d811a0233127f1", size = 11906770, upload-time = "2025-12-20T17:11:25.297Z" }, + { url = "https://files.pythonhosted.org/packages/ad/ec/96941474a18a04b69b6f6562a5bd79bd68049fa3728d3b350976eccb8b93/scikit_image-0.26.0-cp313-cp313-win_arm64.whl", hash = "sha256:a2d211bc355f59725efdcae699b93b30348a19416cc9e017f7b2fb599faf7219", size = 11342506, upload-time = "2025-12-20T17:11:27.399Z" }, + { url = "https://files.pythonhosted.org/packages/03/e5/c1a9962b0cf1952f42d32b4a2e48eed520320dbc4d2ff0b981c6fa508b6b/scikit_image-0.26.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:9eefb4adad066da408a7601c4c24b07af3b472d90e08c3e7483d4e9e829d8c49", size = 12663278, upload-time = "2025-12-20T17:11:29.358Z" }, + { url = "https://files.pythonhosted.org/packages/ae/97/c1a276a59ce8e4e24482d65c1a3940d69c6b3873279193b7ebd04e5ee56b/scikit_image-0.26.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6caec76e16c970c528d15d1c757363334d5cb3069f9cea93d2bead31820511f3", size = 12405142, upload-time = "2025-12-20T17:11:31.282Z" }, + { url = "https://files.pythonhosted.org/packages/d4/4a/f1cbd1357caef6c7993f7efd514d6e53d8fd6f7fe01c4714d51614c53289/scikit_image-0.26.0-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a07200fe09b9d99fcdab959859fe0f7db8df6333d6204344425d476850ce3604", size = 12942086, upload-time = "2025-12-20T17:11:33.683Z" }, + { url = "https://files.pythonhosted.org/packages/5b/6f/74d9fb87c5655bd64cf00b0c44dc3d6206d9002e5f6ba1c9aeb13236f6bf/scikit_image-0.26.0-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:92242351bccf391fc5df2d1529d15470019496d2498d615beb68da85fe7fdf37", size = 13265667, upload-time = "2025-12-20T17:11:36.11Z" }, + { url = "https://files.pythonhosted.org/packages/a7/73/faddc2413ae98d863f6fa2e3e14da4467dd38e788e1c23346cf1a2b06b97/scikit_image-0.26.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:52c496f75a7e45844d951557f13c08c81487c6a1da2e3c9c8a39fcde958e02cc", size = 14001966, upload-time = "2025-12-20T17:11:38.55Z" }, + { url = "https://files.pythonhosted.org/packages/02/94/9f46966fa042b5d57c8cd641045372b4e0df0047dd400e77ea9952674110/scikit_image-0.26.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:20ef4a155e2e78b8ab973998e04d8a361d49d719e65412405f4dadd9155a61d9", size = 14359526, upload-time = "2025-12-20T17:11:41.087Z" }, + { url = "https://files.pythonhosted.org/packages/5d/b4/2840fe38f10057f40b1c9f8fb98a187a370936bf144a4ac23452c5ef1baf/scikit_image-0.26.0-cp313-cp313t-win_amd64.whl", hash = "sha256:c9087cf7d0e7f33ab5c46d2068d86d785e70b05400a891f73a13400f1e1faf6a", size = 12287629, upload-time = "2025-12-20T17:11:43.11Z" }, + { url = "https://files.pythonhosted.org/packages/22/ba/73b6ca70796e71f83ab222690e35a79612f0117e5aaf167151b7d46f5f2c/scikit_image-0.26.0-cp313-cp313t-win_arm64.whl", hash = "sha256:27d58bc8b2acd351f972c6508c1b557cfed80299826080a4d803dd29c51b707e", size = 11647755, upload-time = "2025-12-20T17:11:45.279Z" }, + { url = "https://files.pythonhosted.org/packages/51/44/6b744f92b37ae2833fd423cce8f806d2368859ec325a699dc30389e090b9/scikit_image-0.26.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:63af3d3a26125f796f01052052f86806da5b5e54c6abef152edb752683075a9c", size = 12365810, upload-time = "2025-12-20T17:11:47.357Z" }, + { url = "https://files.pythonhosted.org/packages/40/f5/83590d9355191f86ac663420fec741b82cc547a4afe7c4c1d986bf46e4db/scikit_image-0.26.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ce00600cd70d4562ed59f80523e18cdcc1fae0e10676498a01f73c255774aefd", size = 12075717, upload-time = "2025-12-20T17:11:49.483Z" }, + { url = "https://files.pythonhosted.org/packages/72/48/253e7cf5aee6190459fe136c614e2cbccc562deceb4af96e0863f1b8ee29/scikit_image-0.26.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6381edf972b32e4f54085449afde64365a57316637496c1325a736987083e2ab", size = 13161520, upload-time = "2025-12-20T17:11:51.58Z" }, + { url = "https://files.pythonhosted.org/packages/73/c3/cec6a3cbaadfdcc02bd6ff02f3abfe09eaa7f4d4e0a525a1e3a3f4bce49c/scikit_image-0.26.0-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c6624a76c6085218248154cc7e1500e6b488edcd9499004dd0d35040607d7505", size = 13684340, upload-time = "2025-12-20T17:11:53.708Z" }, + { url = "https://files.pythonhosted.org/packages/d4/0d/39a776f675d24164b3a267aa0db9f677a4cb20127660d8bf4fd7fef66817/scikit_image-0.26.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f775f0e420faac9c2aa6757135f4eb468fb7b70e0b67fa77a5e79be3c30ee331", size = 14203839, upload-time = "2025-12-20T17:11:55.89Z" }, + { url = "https://files.pythonhosted.org/packages/ee/25/2514df226bbcedfe9b2caafa1ba7bc87231a0c339066981b182b08340e06/scikit_image-0.26.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ede4d6d255cc5da9faeb2f9ba7fedbc990abbc652db429f40a16b22e770bb578", size = 14770021, upload-time = "2025-12-20T17:11:58.014Z" }, + { url = "https://files.pythonhosted.org/packages/8d/5b/0671dc91c0c79340c3fe202f0549c7d3681eb7640fe34ab68a5f090a7c7f/scikit_image-0.26.0-cp314-cp314-win_amd64.whl", hash = "sha256:0660b83968c15293fd9135e8d860053ee19500d52bf55ca4fb09de595a1af650", size = 12023490, upload-time = "2025-12-20T17:12:00.013Z" }, + { url = "https://files.pythonhosted.org/packages/65/08/7c4cb59f91721f3de07719085212a0b3962e3e3f2d1818cbac4eeb1ea53e/scikit_image-0.26.0-cp314-cp314-win_arm64.whl", hash = "sha256:b8d14d3181c21c11170477a42542c1addc7072a90b986675a71266ad17abc37f", size = 11473782, upload-time = "2025-12-20T17:12:01.983Z" }, + { url = "https://files.pythonhosted.org/packages/49/41/65c4258137acef3d73cb561ac55512eacd7b30bb4f4a11474cad526bc5db/scikit_image-0.26.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:cde0bbd57e6795eba83cb10f71a677f7239271121dc950bc060482834a668ad1", size = 12686060, upload-time = "2025-12-20T17:12:03.886Z" }, + { url = "https://files.pythonhosted.org/packages/e7/32/76971f8727b87f1420a962406388a50e26667c31756126444baf6668f559/scikit_image-0.26.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:163e9afb5b879562b9aeda0dd45208a35316f26cc7a3aed54fd601604e5cf46f", size = 12422628, upload-time = "2025-12-20T17:12:05.921Z" }, + { url = "https://files.pythonhosted.org/packages/37/0d/996febd39f757c40ee7b01cdb861867327e5c8e5f595a634e8201462d958/scikit_image-0.26.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:724f79fd9b6cb6f4a37864fe09f81f9f5d5b9646b6868109e1b100d1a7019e59", size = 12962369, upload-time = "2025-12-20T17:12:07.912Z" }, + { url = "https://files.pythonhosted.org/packages/48/b4/612d354f946c9600e7dea012723c11d47e8d455384e530f6daaaeb9bf62c/scikit_image-0.26.0-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3268f13310e6857508bd87202620df996199a016a1d281b309441d227c822394", size = 13272431, upload-time = "2025-12-20T17:12:10.255Z" }, + { url = "https://files.pythonhosted.org/packages/0a/6e/26c00b466e06055a086de2c6e2145fe189ccdc9a1d11ccc7de020f2591ad/scikit_image-0.26.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:fac96a1f9b06cd771cbbb3cd96c5332f36d4efd839b1d8b053f79e5887acde62", size = 14016362, upload-time = "2025-12-20T17:12:12.793Z" }, + { url = "https://files.pythonhosted.org/packages/47/88/00a90402e1775634043c2a0af8a3c76ad450866d9fa444efcc43b553ba2d/scikit_image-0.26.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:2c1e7bd342f43e7a97e571b3f03ba4c1293ea1a35c3f13f41efdc8a81c1dc8f2", size = 14364151, upload-time = "2025-12-20T17:12:14.909Z" }, + { url = "https://files.pythonhosted.org/packages/da/ca/918d8d306bd43beacff3b835c6d96fac0ae64c0857092f068b88db531a7c/scikit_image-0.26.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b702c3bb115e1dcf4abf5297429b5c90f2189655888cbed14921f3d26f81d3a4", size = 12413484, upload-time = "2025-12-20T17:12:17.046Z" }, + { url = "https://files.pythonhosted.org/packages/dc/cd/4da01329b5a8d47ff7ec3c99a2b02465a8017b186027590dc7425cee0b56/scikit_image-0.26.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0608aa4a9ec39e0843de10d60edb2785a30c1c47819b67866dd223ebd149acaf", size = 11769501, upload-time = "2025-12-20T17:12:19.339Z" }, +] + [[package]] name = "scikit-learn" version = "1.7.2" @@ -5342,6 +5835,74 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e1/e3/c164c88b2e5ce7b24d667b9bd83589cf4f3520d97cad01534cd3c4f55fdb/setuptools-81.0.0-py3-none-any.whl", hash = "sha256:fdd925d5c5d9f62e4b74b30d6dd7828ce236fd6ed998a08d81de62ce5a6310d6", size = 1062021, upload-time = "2026-02-06T21:10:37.175Z" }, ] +[[package]] +name = "shapely" +version = "2.1.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4d/bc/0989043118a27cccb4e906a46b7565ce36ca7b57f5a18b78f4f1b0f72d9d/shapely-2.1.2.tar.gz", hash = "sha256:2ed4ecb28320a433db18a5bf029986aa8afcfd740745e78847e330d5d94922a9", size = 315489, upload-time = "2025-09-24T13:51:41.432Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/05/89/c3548aa9b9812a5d143986764dededfa48d817714e947398bdda87c77a72/shapely-2.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7ae48c236c0324b4e139bea88a306a04ca630f49be66741b340729d380d8f52f", size = 1825959, upload-time = "2025-09-24T13:50:00.682Z" }, + { url = "https://files.pythonhosted.org/packages/ce/8a/7ebc947080442edd614ceebe0ce2cdbd00c25e832c240e1d1de61d0e6b38/shapely-2.1.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eba6710407f1daa8e7602c347dfc94adc02205ec27ed956346190d66579eb9ea", size = 1629196, upload-time = "2025-09-24T13:50:03.447Z" }, + { url = "https://files.pythonhosted.org/packages/c8/86/c9c27881c20d00fc409e7e059de569d5ed0abfcec9c49548b124ebddea51/shapely-2.1.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ef4a456cc8b7b3d50ccec29642aa4aeda959e9da2fe9540a92754770d5f0cf1f", size = 2951065, upload-time = "2025-09-24T13:50:05.266Z" }, + { url = "https://files.pythonhosted.org/packages/50/8a/0ab1f7433a2a85d9e9aea5b1fbb333f3b09b309e7817309250b4b7b2cc7a/shapely-2.1.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e38a190442aacc67ff9f75ce60aec04893041f16f97d242209106d502486a142", size = 3058666, upload-time = "2025-09-24T13:50:06.872Z" }, + { url = "https://files.pythonhosted.org/packages/bb/c6/5a30ffac9c4f3ffd5b7113a7f5299ccec4713acd5ee44039778a7698224e/shapely-2.1.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:40d784101f5d06a1fd30b55fc11ea58a61be23f930d934d86f19a180909908a4", size = 3966905, upload-time = "2025-09-24T13:50:09.417Z" }, + { url = "https://files.pythonhosted.org/packages/9c/72/e92f3035ba43e53959007f928315a68fbcf2eeb4e5ededb6f0dc7ff1ecc3/shapely-2.1.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f6f6cd5819c50d9bcf921882784586aab34a4bd53e7553e175dece6db513a6f0", size = 4129260, upload-time = "2025-09-24T13:50:11.183Z" }, + { url = "https://files.pythonhosted.org/packages/42/24/605901b73a3d9f65fa958e63c9211f4be23d584da8a1a7487382fac7fdc5/shapely-2.1.2-cp310-cp310-win32.whl", hash = "sha256:fe9627c39c59e553c90f5bc3128252cb85dc3b3be8189710666d2f8bc3a5503e", size = 1544301, upload-time = "2025-09-24T13:50:12.521Z" }, + { url = "https://files.pythonhosted.org/packages/e1/89/6db795b8dd3919851856bd2ddd13ce434a748072f6fdee42ff30cbd3afa3/shapely-2.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:1d0bfb4b8f661b3b4ec3565fa36c340bfb1cda82087199711f86a88647d26b2f", size = 1722074, upload-time = "2025-09-24T13:50:13.909Z" }, + { url = "https://files.pythonhosted.org/packages/8f/8d/1ff672dea9ec6a7b5d422eb6d095ed886e2e523733329f75fdcb14ee1149/shapely-2.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:91121757b0a36c9aac3427a651a7e6567110a4a67c97edf04f8d55d4765f6618", size = 1820038, upload-time = "2025-09-24T13:50:15.628Z" }, + { url = "https://files.pythonhosted.org/packages/4f/ce/28fab8c772ce5db23a0d86bf0adaee0c4c79d5ad1db766055fa3dab442e2/shapely-2.1.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:16a9c722ba774cf50b5d4541242b4cce05aafd44a015290c82ba8a16931ff63d", size = 1626039, upload-time = "2025-09-24T13:50:16.881Z" }, + { url = "https://files.pythonhosted.org/packages/70/8b/868b7e3f4982f5006e9395c1e12343c66a8155c0374fdc07c0e6a1ab547d/shapely-2.1.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cc4f7397459b12c0b196c9efe1f9d7e92463cbba142632b4cc6d8bbbbd3e2b09", size = 3001519, upload-time = "2025-09-24T13:50:18.606Z" }, + { url = "https://files.pythonhosted.org/packages/13/02/58b0b8d9c17c93ab6340edd8b7308c0c5a5b81f94ce65705819b7416dba5/shapely-2.1.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:136ab87b17e733e22f0961504d05e77e7be8c9b5a8184f685b4a91a84efe3c26", size = 3110842, upload-time = "2025-09-24T13:50:21.77Z" }, + { url = "https://files.pythonhosted.org/packages/af/61/8e389c97994d5f331dcffb25e2fa761aeedfb52b3ad9bcdd7b8671f4810a/shapely-2.1.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:16c5d0fc45d3aa0a69074979f4f1928ca2734fb2e0dde8af9611e134e46774e7", size = 4021316, upload-time = "2025-09-24T13:50:23.626Z" }, + { url = "https://files.pythonhosted.org/packages/d3/d4/9b2a9fe6039f9e42ccf2cb3e84f219fd8364b0c3b8e7bbc857b5fbe9c14c/shapely-2.1.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6ddc759f72b5b2b0f54a7e7cde44acef680a55019eb52ac63a7af2cf17cb9cd2", size = 4178586, upload-time = "2025-09-24T13:50:25.443Z" }, + { url = "https://files.pythonhosted.org/packages/16/f6/9840f6963ed4decf76b08fd6d7fed14f8779fb7a62cb45c5617fa8ac6eab/shapely-2.1.2-cp311-cp311-win32.whl", hash = "sha256:2fa78b49485391224755a856ed3b3bd91c8455f6121fee0db0e71cefb07d0ef6", size = 1543961, upload-time = "2025-09-24T13:50:26.968Z" }, + { url = "https://files.pythonhosted.org/packages/38/1e/3f8ea46353c2a33c1669eb7327f9665103aa3a8dfe7f2e4ef714c210b2c2/shapely-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:c64d5c97b2f47e3cd9b712eaced3b061f2b71234b3fc263e0fcf7d889c6559dc", size = 1722856, upload-time = "2025-09-24T13:50:28.497Z" }, + { url = "https://files.pythonhosted.org/packages/24/c0/f3b6453cf2dfa99adc0ba6675f9aaff9e526d2224cbd7ff9c1a879238693/shapely-2.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fe2533caae6a91a543dec62e8360fe86ffcdc42a7c55f9dfd0128a977a896b94", size = 1833550, upload-time = "2025-09-24T13:50:30.019Z" }, + { url = "https://files.pythonhosted.org/packages/86/07/59dee0bc4b913b7ab59ab1086225baca5b8f19865e6101db9ebb7243e132/shapely-2.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ba4d1333cc0bc94381d6d4308d2e4e008e0bd128bdcff5573199742ee3634359", size = 1643556, upload-time = "2025-09-24T13:50:32.291Z" }, + { url = "https://files.pythonhosted.org/packages/26/29/a5397e75b435b9895cd53e165083faed5d12fd9626eadec15a83a2411f0f/shapely-2.1.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0bd308103340030feef6c111d3eb98d50dc13feea33affc8a6f9fa549e9458a3", size = 2988308, upload-time = "2025-09-24T13:50:33.862Z" }, + { url = "https://files.pythonhosted.org/packages/b9/37/e781683abac55dde9771e086b790e554811a71ed0b2b8a1e789b7430dd44/shapely-2.1.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1e7d4d7ad262a48bb44277ca12c7c78cb1b0f56b32c10734ec9a1d30c0b0c54b", size = 3099844, upload-time = "2025-09-24T13:50:35.459Z" }, + { url = "https://files.pythonhosted.org/packages/d8/f3/9876b64d4a5a321b9dc482c92bb6f061f2fa42131cba643c699f39317cb9/shapely-2.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e9eddfe513096a71896441a7c37db72da0687b34752c4e193577a145c71736fc", size = 3988842, upload-time = "2025-09-24T13:50:37.478Z" }, + { url = "https://files.pythonhosted.org/packages/d1/a0/704c7292f7014c7e74ec84eddb7b109e1fbae74a16deae9c1504b1d15565/shapely-2.1.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:980c777c612514c0cf99bc8a9de6d286f5e186dcaf9091252fcd444e5638193d", size = 4152714, upload-time = "2025-09-24T13:50:39.9Z" }, + { url = "https://files.pythonhosted.org/packages/53/46/319c9dc788884ad0785242543cdffac0e6530e4d0deb6c4862bc4143dcf3/shapely-2.1.2-cp312-cp312-win32.whl", hash = "sha256:9111274b88e4d7b54a95218e243282709b330ef52b7b86bc6aaf4f805306f454", size = 1542745, upload-time = "2025-09-24T13:50:41.414Z" }, + { url = "https://files.pythonhosted.org/packages/ec/bf/cb6c1c505cb31e818e900b9312d514f381fbfa5c4363edfce0fcc4f8c1a4/shapely-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:743044b4cfb34f9a67205cee9279feaf60ba7d02e69febc2afc609047cb49179", size = 1722861, upload-time = "2025-09-24T13:50:43.35Z" }, + { url = "https://files.pythonhosted.org/packages/c3/90/98ef257c23c46425dc4d1d31005ad7c8d649fe423a38b917db02c30f1f5a/shapely-2.1.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b510dda1a3672d6879beb319bc7c5fd302c6c354584690973c838f46ec3e0fa8", size = 1832644, upload-time = "2025-09-24T13:50:44.886Z" }, + { url = "https://files.pythonhosted.org/packages/6d/ab/0bee5a830d209adcd3a01f2d4b70e587cdd9fd7380d5198c064091005af8/shapely-2.1.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8cff473e81017594d20ec55d86b54bc635544897e13a7cfc12e36909c5309a2a", size = 1642887, upload-time = "2025-09-24T13:50:46.735Z" }, + { url = "https://files.pythonhosted.org/packages/2d/5e/7d7f54ba960c13302584c73704d8c4d15404a51024631adb60b126a4ae88/shapely-2.1.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fe7b77dc63d707c09726b7908f575fc04ff1d1ad0f3fb92aec212396bc6cfe5e", size = 2970931, upload-time = "2025-09-24T13:50:48.374Z" }, + { url = "https://files.pythonhosted.org/packages/f2/a2/83fc37e2a58090e3d2ff79175a95493c664bcd0b653dd75cb9134645a4e5/shapely-2.1.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7ed1a5bbfb386ee8332713bf7508bc24e32d24b74fc9a7b9f8529a55db9f4ee6", size = 3082855, upload-time = "2025-09-24T13:50:50.037Z" }, + { url = "https://files.pythonhosted.org/packages/44/2b/578faf235a5b09f16b5f02833c53822294d7f21b242f8e2d0cf03fb64321/shapely-2.1.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a84e0582858d841d54355246ddfcbd1fce3179f185da7470f41ce39d001ee1af", size = 3979960, upload-time = "2025-09-24T13:50:51.74Z" }, + { url = "https://files.pythonhosted.org/packages/4d/04/167f096386120f692cc4ca02f75a17b961858997a95e67a3cb6a7bbd6b53/shapely-2.1.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dc3487447a43d42adcdf52d7ac73804f2312cbfa5d433a7d2c506dcab0033dfd", size = 4142851, upload-time = "2025-09-24T13:50:53.49Z" }, + { url = "https://files.pythonhosted.org/packages/48/74/fb402c5a6235d1c65a97348b48cdedb75fb19eca2b1d66d04969fc1c6091/shapely-2.1.2-cp313-cp313-win32.whl", hash = "sha256:9c3a3c648aedc9f99c09263b39f2d8252f199cb3ac154fadc173283d7d111350", size = 1541890, upload-time = "2025-09-24T13:50:55.337Z" }, + { url = "https://files.pythonhosted.org/packages/41/47/3647fe7ad990af60ad98b889657a976042c9988c2807cf322a9d6685f462/shapely-2.1.2-cp313-cp313-win_amd64.whl", hash = "sha256:ca2591bff6645c216695bdf1614fca9c82ea1144d4a7591a466fef64f28f0715", size = 1722151, upload-time = "2025-09-24T13:50:57.153Z" }, + { url = "https://files.pythonhosted.org/packages/3c/49/63953754faa51ffe7d8189bfbe9ca34def29f8c0e34c67cbe2a2795f269d/shapely-2.1.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2d93d23bdd2ed9dc157b46bc2f19b7da143ca8714464249bef6771c679d5ff40", size = 1834130, upload-time = "2025-09-24T13:50:58.49Z" }, + { url = "https://files.pythonhosted.org/packages/7f/ee/dce001c1984052970ff60eb4727164892fb2d08052c575042a47f5a9e88f/shapely-2.1.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:01d0d304b25634d60bd7cf291828119ab55a3bab87dc4af1e44b07fb225f188b", size = 1642802, upload-time = "2025-09-24T13:50:59.871Z" }, + { url = "https://files.pythonhosted.org/packages/da/e7/fc4e9a19929522877fa602f705706b96e78376afb7fad09cad5b9af1553c/shapely-2.1.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8d8382dd120d64b03698b7298b89611a6ea6f55ada9d39942838b79c9bc89801", size = 3018460, upload-time = "2025-09-24T13:51:02.08Z" }, + { url = "https://files.pythonhosted.org/packages/a1/18/7519a25db21847b525696883ddc8e6a0ecaa36159ea88e0fef11466384d0/shapely-2.1.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:19efa3611eef966e776183e338b2d7ea43569ae99ab34f8d17c2c054d3205cc0", size = 3095223, upload-time = "2025-09-24T13:51:04.472Z" }, + { url = "https://files.pythonhosted.org/packages/48/de/b59a620b1f3a129c3fecc2737104a0a7e04e79335bd3b0a1f1609744cf17/shapely-2.1.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:346ec0c1a0fcd32f57f00e4134d1200e14bf3f5ae12af87ba83ca275c502498c", size = 4030760, upload-time = "2025-09-24T13:51:06.455Z" }, + { url = "https://files.pythonhosted.org/packages/96/b3/c6655ee7232b417562bae192ae0d3ceaadb1cc0ffc2088a2ddf415456cc2/shapely-2.1.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6305993a35989391bd3476ee538a5c9a845861462327efe00dd11a5c8c709a99", size = 4170078, upload-time = "2025-09-24T13:51:08.584Z" }, + { url = "https://files.pythonhosted.org/packages/a0/8e/605c76808d73503c9333af8f6cbe7e1354d2d238bda5f88eea36bfe0f42a/shapely-2.1.2-cp313-cp313t-win32.whl", hash = "sha256:c8876673449f3401f278c86eb33224c5764582f72b653a415d0e6672fde887bf", size = 1559178, upload-time = "2025-09-24T13:51:10.73Z" }, + { url = "https://files.pythonhosted.org/packages/36/f7/d317eb232352a1f1444d11002d477e54514a4a6045536d49d0c59783c0da/shapely-2.1.2-cp313-cp313t-win_amd64.whl", hash = "sha256:4a44bc62a10d84c11a7a3d7c1c4fe857f7477c3506e24c9062da0db0ae0c449c", size = 1739756, upload-time = "2025-09-24T13:51:12.105Z" }, + { url = "https://files.pythonhosted.org/packages/fc/c4/3ce4c2d9b6aabd27d26ec988f08cb877ba9e6e96086eff81bfea93e688c7/shapely-2.1.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:9a522f460d28e2bf4e12396240a5fc1518788b2fcd73535166d748399ef0c223", size = 1831290, upload-time = "2025-09-24T13:51:13.56Z" }, + { url = "https://files.pythonhosted.org/packages/17/b9/f6ab8918fc15429f79cb04afa9f9913546212d7fb5e5196132a2af46676b/shapely-2.1.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1ff629e00818033b8d71139565527ced7d776c269a49bd78c9df84e8f852190c", size = 1641463, upload-time = "2025-09-24T13:51:14.972Z" }, + { url = "https://files.pythonhosted.org/packages/a5/57/91d59ae525ca641e7ac5551c04c9503aee6f29b92b392f31790fcb1a4358/shapely-2.1.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f67b34271dedc3c653eba4e3d7111aa421d5be9b4c4c7d38d30907f796cb30df", size = 2970145, upload-time = "2025-09-24T13:51:16.961Z" }, + { url = "https://files.pythonhosted.org/packages/8a/cb/4948be52ee1da6927831ab59e10d4c29baa2a714f599f1f0d1bc747f5777/shapely-2.1.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21952dc00df38a2c28375659b07a3979d22641aeb104751e769c3ee825aadecf", size = 3073806, upload-time = "2025-09-24T13:51:18.712Z" }, + { url = "https://files.pythonhosted.org/packages/03/83/f768a54af775eb41ef2e7bec8a0a0dbe7d2431c3e78c0a8bdba7ab17e446/shapely-2.1.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1f2f33f486777456586948e333a56ae21f35ae273be99255a191f5c1fa302eb4", size = 3980803, upload-time = "2025-09-24T13:51:20.37Z" }, + { url = "https://files.pythonhosted.org/packages/9f/cb/559c7c195807c91c79d38a1f6901384a2878a76fbdf3f1048893a9b7534d/shapely-2.1.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cf831a13e0d5a7eb519e96f58ec26e049b1fad411fc6fc23b162a7ce04d9cffc", size = 4133301, upload-time = "2025-09-24T13:51:21.887Z" }, + { url = "https://files.pythonhosted.org/packages/80/cd/60d5ae203241c53ef3abd2ef27c6800e21afd6c94e39db5315ea0cbafb4a/shapely-2.1.2-cp314-cp314-win32.whl", hash = "sha256:61edcd8d0d17dd99075d320a1dd39c0cb9616f7572f10ef91b4b5b00c4aeb566", size = 1583247, upload-time = "2025-09-24T13:51:23.401Z" }, + { url = "https://files.pythonhosted.org/packages/74/d4/135684f342e909330e50d31d441ace06bf83c7dc0777e11043f99167b123/shapely-2.1.2-cp314-cp314-win_amd64.whl", hash = "sha256:a444e7afccdb0999e203b976adb37ea633725333e5b119ad40b1ca291ecf311c", size = 1773019, upload-time = "2025-09-24T13:51:24.873Z" }, + { url = "https://files.pythonhosted.org/packages/a3/05/a44f3f9f695fa3ada22786dc9da33c933da1cbc4bfe876fe3a100bafe263/shapely-2.1.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:5ebe3f84c6112ad3d4632b1fd2290665aa75d4cef5f6c5d77c4c95b324527c6a", size = 1834137, upload-time = "2025-09-24T13:51:26.665Z" }, + { url = "https://files.pythonhosted.org/packages/52/7e/4d57db45bf314573427b0a70dfca15d912d108e6023f623947fa69f39b72/shapely-2.1.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5860eb9f00a1d49ebb14e881f5caf6c2cf472c7fd38bd7f253bbd34f934eb076", size = 1642884, upload-time = "2025-09-24T13:51:28.029Z" }, + { url = "https://files.pythonhosted.org/packages/5a/27/4e29c0a55d6d14ad7422bf86995d7ff3f54af0eba59617eb95caf84b9680/shapely-2.1.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b705c99c76695702656327b819c9660768ec33f5ce01fa32b2af62b56ba400a1", size = 3018320, upload-time = "2025-09-24T13:51:29.903Z" }, + { url = "https://files.pythonhosted.org/packages/9f/bb/992e6a3c463f4d29d4cd6ab8963b75b1b1040199edbd72beada4af46bde5/shapely-2.1.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a1fd0ea855b2cf7c9cddaf25543e914dd75af9de08785f20ca3085f2c9ca60b0", size = 3094931, upload-time = "2025-09-24T13:51:32.699Z" }, + { url = "https://files.pythonhosted.org/packages/9c/16/82e65e21070e473f0ed6451224ed9fa0be85033d17e0c6e7213a12f59d12/shapely-2.1.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:df90e2db118c3671a0754f38e36802db75fe0920d211a27481daf50a711fdf26", size = 4030406, upload-time = "2025-09-24T13:51:34.189Z" }, + { url = "https://files.pythonhosted.org/packages/7c/75/c24ed871c576d7e2b64b04b1fe3d075157f6eb54e59670d3f5ffb36e25c7/shapely-2.1.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:361b6d45030b4ac64ddd0a26046906c8202eb60d0f9f53085f5179f1d23021a0", size = 4169511, upload-time = "2025-09-24T13:51:36.297Z" }, + { url = "https://files.pythonhosted.org/packages/b1/f7/b3d1d6d18ebf55236eec1c681ce5e665742aab3c0b7b232720a7d43df7b6/shapely-2.1.2-cp314-cp314t-win32.whl", hash = "sha256:b54df60f1fbdecc8ebc2c5b11870461a6417b3d617f555e5033f1505d36e5735", size = 1602607, upload-time = "2025-09-24T13:51:37.757Z" }, + { url = "https://files.pythonhosted.org/packages/9a/f6/f09272a71976dfc138129b8faf435d064a811ae2f708cb147dccdf7aacdb/shapely-2.1.2-cp314-cp314t-win_amd64.whl", hash = "sha256:0036ac886e0923417932c2e6369b6c52e38e0ff5d9120b90eef5cd9a5fc5cae9", size = 1796682, upload-time = "2025-09-24T13:51:39.233Z" }, +] + [[package]] name = "shellingham" version = "1.5.4" @@ -5412,6 +5973,8 @@ all = [ { name = "uvicorn" }, { name = "voyageai" }, { name = "weaviate-client" }, + { name = "youtube-transcript-api" }, + { name = "yt-dlp" }, ] all-cloud = [ { name = "azure-storage-blob" }, @@ -5468,6 +6031,18 @@ s3 = [ sentence-transformers = [ { name = "sentence-transformers" }, ] +video = [ + { name = "youtube-transcript-api" }, + { name = "yt-dlp" }, +] +video-full = [ + { name = "easyocr" }, + { name = "faster-whisper" }, + { name = "opencv-python-headless" }, + { name = "scenedetect", extra = ["opencv"] }, + { name = "youtube-transcript-api" }, + { name = "yt-dlp" }, +] weaviate = [ { name = "weaviate-client" }, ] @@ -5504,8 +6079,10 @@ requires-dist = [ { name = "chromadb", marker = "extra == 'chroma'", specifier = ">=0.4.0" }, { name = "chromadb", marker = "extra == 'rag-upload'", specifier = ">=0.4.0" }, { name = "click", specifier = ">=8.3.0" }, + { name = "easyocr", marker = "extra == 'video-full'", specifier = ">=1.7.0" }, { name = "fastapi", marker = "extra == 'all'", specifier = ">=0.109.0" }, { name = "fastapi", marker = "extra == 'embedding'", specifier = ">=0.109.0" }, + { name = "faster-whisper", marker = "extra == 'video-full'", specifier = ">=1.0.0" }, { name = "gitpython", specifier = ">=3.1.40" }, { name = "google-cloud-storage", marker = "extra == 'all'", specifier = ">=2.10.0" }, { name = "google-cloud-storage", marker = "extra == 'all-cloud'", specifier = ">=2.10.0" }, @@ -5531,6 +6108,7 @@ requires-dist = [ { name = "openai", marker = "extra == 'all'", specifier = ">=1.0.0" }, { name = "openai", marker = "extra == 'all-llms'", specifier = ">=1.0.0" }, { name = "openai", marker = "extra == 'openai'", specifier = ">=1.0.0" }, + { name = "opencv-python-headless", marker = "extra == 'video-full'", specifier = ">=4.9.0" }, { name = "pathspec", specifier = ">=0.12.1" }, { name = "pillow", specifier = ">=11.0.0" }, { name = "pydantic", specifier = ">=2.12.3" }, @@ -5544,6 +6122,7 @@ requires-dist = [ { name = "python-dotenv", specifier = ">=1.1.1" }, { name = "pyyaml", specifier = ">=6.0" }, { name = "requests", specifier = ">=2.32.5" }, + { name = "scenedetect", extras = ["opencv"], marker = "extra == 'video-full'", specifier = ">=0.6.4" }, { name = "schedule", specifier = ">=1.2.0" }, { name = "sentence-transformers", marker = "extra == 'all'", specifier = ">=2.3.0" }, { name = "sentence-transformers", marker = "extra == 'embedding'", specifier = ">=2.3.0" }, @@ -5562,8 +6141,14 @@ requires-dist = [ { name = "weaviate-client", marker = "extra == 'all'", specifier = ">=3.25.0" }, { name = "weaviate-client", marker = "extra == 'rag-upload'", specifier = ">=3.25.0" }, { name = "weaviate-client", marker = "extra == 'weaviate'", specifier = ">=3.25.0" }, + { name = "youtube-transcript-api", marker = "extra == 'all'", specifier = ">=1.2.0" }, + { name = "youtube-transcript-api", marker = "extra == 'video'", specifier = ">=1.2.0" }, + { name = "youtube-transcript-api", marker = "extra == 'video-full'", specifier = ">=1.2.0" }, + { name = "yt-dlp", marker = "extra == 'all'", specifier = ">=2024.12.0" }, + { name = "yt-dlp", marker = "extra == 'video'", specifier = ">=2024.12.0" }, + { name = "yt-dlp", marker = "extra == 'video-full'", specifier = ">=2024.12.0" }, ] -provides-extras = ["mcp", "gemini", "openai", "all-llms", "s3", "gcs", "azure", "docx", "chroma", "weaviate", "sentence-transformers", "rag-upload", "all-cloud", "embedding", "all"] +provides-extras = ["mcp", "gemini", "openai", "all-llms", "s3", "gcs", "azure", "docx", "video", "video-full", "chroma", "weaviate", "sentence-transformers", "rag-upload", "all-cloud", "embedding", "all"] [package.metadata.requires-dev] dev = [ @@ -5735,6 +6320,39 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" }, ] +[[package]] +name = "tifffile" +version = "2025.5.10" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] +dependencies = [ + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/44/d0/18fed0fc0916578a4463f775b0fbd9c5fed2392152d039df2fb533bfdd5d/tifffile-2025.5.10.tar.gz", hash = "sha256:018335d34283aa3fd8c263bae5c3c2b661ebc45548fde31504016fcae7bf1103", size = 365290, upload-time = "2025-05-10T19:22:34.386Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/06/bd0a6097da704a7a7c34a94cfd771c3ea3c2f405dd214e790d22c93f6be1/tifffile-2025.5.10-py3-none-any.whl", hash = "sha256:e37147123c0542d67bc37ba5cdd67e12ea6fbe6e86c52bee037a9eb6a064e5ad", size = 226533, upload-time = "2025-05-10T19:22:27.279Z" }, +] + +[[package]] +name = "tifffile" +version = "2026.2.24" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.14'", + "python_full_version == '3.13.*'", + "python_full_version == '3.12.*'", + "python_full_version == '3.11.*'", +] +dependencies = [ + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6e/1c/19fc653e2b05ec0defae511b03b330ca60c95f2c47fcaaf21c52c6e84aa8/tifffile-2026.2.24.tar.gz", hash = "sha256:d73cfa6d7a8f5775a1e3c9f3bfca77c992946639fb41a5bbe888878cb6964dc6", size = 387373, upload-time = "2026-02-24T23:59:11.706Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ee/fe/80250dc06cd4a3a5afe7059875a8d53e97a78528c5dd9ea8c3f981fb897a/tifffile-2026.2.24-py3-none-any.whl", hash = "sha256:38ef6258c2bd8dd3551c7480c6d75a36c041616262e6cd55a50dd16046b71863", size = 243223, upload-time = "2026-02-24T23:59:10.131Z" }, +] + [[package]] name = "tiktoken" version = "0.12.0" @@ -5950,6 +6568,47 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/66/4d/35352043ee0eaffdeff154fad67cd4a31dbed7ff8e3be1cc4549717d6d51/torch-2.10.0-cp314-cp314t-win_amd64.whl", hash = "sha256:71283a373f0ee2c89e0f0d5f446039bdabe8dbc3c9ccf35f0f784908b0acd185", size = 113995816, upload-time = "2026-01-21T16:22:05.312Z" }, ] +[[package]] +name = "torchvision" +version = "0.25.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pillow" }, + { name = "torch" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/50/ae/cbf727421eb73f1cf907fbe5788326a08f111b3f6b6ddca15426b53fec9a/torchvision-0.25.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a95c47abb817d4e90ea1a8e57bd0d728e3e6b533b3495ae77d84d883c4d11f56", size = 1874919, upload-time = "2026-01-21T16:27:47.617Z" }, + { url = "https://files.pythonhosted.org/packages/64/68/dc7a224f606d53ea09f9a85196a3921ec3a801b0b1d17e84c73392f0c029/torchvision-0.25.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:acc339aba4a858192998c2b91f635827e40d9c469d9cf1455bafdda6e4c28ea4", size = 2343220, upload-time = "2026-01-21T16:27:44.26Z" }, + { url = "https://files.pythonhosted.org/packages/f9/fa/8cce5ca7ffd4da95193232493703d20aa06303f37b119fd23a65df4f239a/torchvision-0.25.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:0d9a3f925a081dd2ebb0b791249b687c2ef2c2717d027946654607494b9b64b6", size = 8068106, upload-time = "2026-01-21T16:27:37.805Z" }, + { url = "https://files.pythonhosted.org/packages/8b/b9/a53bcf8f78f2cd89215e9ded70041765d50ef13bf301f9884ec6041a9421/torchvision-0.25.0-cp310-cp310-win_amd64.whl", hash = "sha256:b57430fbe9e9b697418a395041bb615124d9c007710a2712fda6e35fb310f264", size = 3697295, upload-time = "2026-01-21T16:27:36.574Z" }, + { url = "https://files.pythonhosted.org/packages/3e/be/c704bceaf11c4f6b19d64337a34a877fcdfe3bd68160a8c9ae9bea4a35a3/torchvision-0.25.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:db74a551946b75d19f9996c419a799ffdf6a223ecf17c656f90da011f1d75b20", size = 1874923, upload-time = "2026-01-21T16:27:46.574Z" }, + { url = "https://files.pythonhosted.org/packages/ae/e9/f143cd71232430de1f547ceab840f68c55e127d72558b1061a71d0b193cd/torchvision-0.25.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:f49964f96644dbac2506dffe1a0a7ec0f2bf8cf7a588c3319fed26e6329ffdf3", size = 2344808, upload-time = "2026-01-21T16:27:43.191Z" }, + { url = "https://files.pythonhosted.org/packages/43/ae/ad5d6165797de234c9658752acb4fce65b78a6a18d82efdf8367c940d8da/torchvision-0.25.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:153c0d2cbc34b7cf2da19d73450f24ba36d2b75ec9211b9962b5022fb9e4ecee", size = 8070752, upload-time = "2026-01-21T16:27:33.748Z" }, + { url = "https://files.pythonhosted.org/packages/23/19/55b28aecdc7f38df57b8eb55eb0b14a62b470ed8efeb22cdc74224df1d6a/torchvision-0.25.0-cp311-cp311-win_amd64.whl", hash = "sha256:ea580ffd6094cc01914ad32f8c8118174f18974629af905cea08cb6d5d48c7b7", size = 4038722, upload-time = "2026-01-21T16:27:41.355Z" }, + { url = "https://files.pythonhosted.org/packages/56/3a/6ea0d73f49a9bef38a1b3a92e8dd455cea58470985d25635beab93841748/torchvision-0.25.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c2abe430c90b1d5e552680037d68da4eb80a5852ebb1c811b2b89d299b10573b", size = 1874920, upload-time = "2026-01-21T16:27:45.348Z" }, + { url = "https://files.pythonhosted.org/packages/51/f8/c0e1ef27c66e15406fece94930e7d6feee4cb6374bbc02d945a630d6426e/torchvision-0.25.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b75deafa2dfea3e2c2a525559b04783515e3463f6e830cb71de0fb7ea36fe233", size = 2344556, upload-time = "2026-01-21T16:27:40.125Z" }, + { url = "https://files.pythonhosted.org/packages/68/2f/f24b039169db474e8688f649377de082a965fbf85daf4e46c44412f1d15a/torchvision-0.25.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:f25aa9e380865b11ea6e9d99d84df86b9cc959f1a007cd966fc6f1ab2ed0e248", size = 8072351, upload-time = "2026-01-21T16:27:21.074Z" }, + { url = "https://files.pythonhosted.org/packages/ad/16/8f650c2e288977cf0f8f85184b90ee56ed170a4919347fc74ee99286ed6f/torchvision-0.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:f9c55ae8d673ab493325d1267cbd285bb94d56f99626c00ac4644de32a59ede3", size = 4303059, upload-time = "2026-01-21T16:27:11.08Z" }, + { url = "https://files.pythonhosted.org/packages/f5/5b/1562a04a6a5a4cf8cf40016a0cdeda91ede75d6962cff7f809a85ae966a5/torchvision-0.25.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:24e11199e4d84ba9c5ee7825ebdf1cd37ce8deec225117f10243cae984ced3ec", size = 1874918, upload-time = "2026-01-21T16:27:39.02Z" }, + { url = "https://files.pythonhosted.org/packages/36/b1/3d6c42f62c272ce34fcce609bb8939bdf873dab5f1b798fd4e880255f129/torchvision-0.25.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5f271136d2d2c0b7a24c5671795c6e4fd8da4e0ea98aeb1041f62bc04c4370ef", size = 2309106, upload-time = "2026-01-21T16:27:30.624Z" }, + { url = "https://files.pythonhosted.org/packages/c7/60/59bb9c8b67cce356daeed4cb96a717caa4f69c9822f72e223a0eae7a9bd9/torchvision-0.25.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:855c0dc6d37f462482da7531c6788518baedca1e0847f3df42a911713acdfe52", size = 8071522, upload-time = "2026-01-21T16:27:29.392Z" }, + { url = "https://files.pythonhosted.org/packages/32/a5/9a9b1de0720f884ea50dbf9acb22cbe5312e51d7b8c4ac6ba9b51efd9bba/torchvision-0.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:cef0196be31be421f6f462d1e9da1101be7332d91984caa6f8022e6c78a5877f", size = 4321911, upload-time = "2026-01-21T16:27:35.195Z" }, + { url = "https://files.pythonhosted.org/packages/52/99/dca81ed21ebaeff2b67cc9f815a20fdaa418b69f5f9ea4c6ed71721470db/torchvision-0.25.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a8f8061284395ce31bcd460f2169013382ccf411148ceb2ee38e718e9860f5a7", size = 1896209, upload-time = "2026-01-21T16:27:32.159Z" }, + { url = "https://files.pythonhosted.org/packages/28/cc/2103149761fdb4eaed58a53e8437b2d716d48f05174fab1d9fcf1e2a2244/torchvision-0.25.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:146d02c9876858420adf41f3189fe90e3d6a409cbfa65454c09f25fb33bf7266", size = 2310735, upload-time = "2026-01-21T16:27:22.327Z" }, + { url = "https://files.pythonhosted.org/packages/76/ad/f4c985ad52ddd3b22711c588501be1b330adaeaf6850317f66751711b78c/torchvision-0.25.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:c4d395cb2c4a2712f6eb93a34476cdf7aae74bb6ea2ea1917f858e96344b00aa", size = 8089557, upload-time = "2026-01-21T16:27:27.666Z" }, + { url = "https://files.pythonhosted.org/packages/63/cc/0ea68b5802e5e3c31f44b307e74947bad5a38cc655231d845534ed50ddb8/torchvision-0.25.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5e6b449e9fa7d642142c0e27c41e5a43b508d57ed8e79b7c0a0c28652da8678c", size = 4344260, upload-time = "2026-01-21T16:27:17.018Z" }, + { url = "https://files.pythonhosted.org/packages/9e/1f/fa839532660e2602b7e704d65010787c5bb296258b44fa8b9c1cd6175e7d/torchvision-0.25.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:620a236288d594dcec7634c754484542dc0a5c1b0e0b83a34bda5e91e9b7c3a1", size = 1896193, upload-time = "2026-01-21T16:27:24.785Z" }, + { url = "https://files.pythonhosted.org/packages/80/ed/d51889da7ceaf5ff7a0574fb28f9b6b223df19667265395891f81b364ab3/torchvision-0.25.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:0b5e7f50002a8145a98c5694a018e738c50e2972608310c7e88e1bd4c058f6ce", size = 2309331, upload-time = "2026-01-21T16:27:19.97Z" }, + { url = "https://files.pythonhosted.org/packages/90/a5/f93fcffaddd8f12f9e812256830ec9c9ca65abbf1bc369379f9c364d1ff4/torchvision-0.25.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:632db02300e83793812eee4f61ae6a2686dab10b4cfd628b620dc47747aa9d03", size = 8088713, upload-time = "2026-01-21T16:27:15.281Z" }, + { url = "https://files.pythonhosted.org/packages/1f/eb/d0096eed5690d962853213f2ee00d91478dfcb586b62dbbb449fb8abc3a6/torchvision-0.25.0-cp314-cp314-win_amd64.whl", hash = "sha256:d1abd5ed030c708f5dbf4812ad5f6fbe9384b63c40d6bd79f8df41a4a759a917", size = 4325058, upload-time = "2026-01-21T16:27:26.165Z" }, + { url = "https://files.pythonhosted.org/packages/97/36/96374a4c7ab50dea9787ce987815614ccfe988a42e10ac1a2e3e5b60319a/torchvision-0.25.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ad9a8a5877782944d99186e4502a614770fe906626d76e9cd32446a0ac3075f2", size = 1896207, upload-time = "2026-01-21T16:27:23.383Z" }, + { url = "https://files.pythonhosted.org/packages/b5/e2/7abb10a867db79b226b41da419b63b69c0bd5b82438c4a4ed50e084c552f/torchvision-0.25.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:40a122c3cf4d14b651f095e0f672b688dde78632783fc5cd3d4d5e4f6a828563", size = 2310741, upload-time = "2026-01-21T16:27:18.712Z" }, + { url = "https://files.pythonhosted.org/packages/08/e6/0927784e6ffc340b6676befde1c60260bd51641c9c574b9298d791a9cda4/torchvision-0.25.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:846890161b825b38aa85fc37fb3ba5eea74e7091ff28bab378287111483b6443", size = 8089772, upload-time = "2026-01-21T16:27:14.048Z" }, + { url = "https://files.pythonhosted.org/packages/b6/37/e7ca4ec820d434c0f23f824eb29f0676a0c3e7a118f1514f5b949c3356da/torchvision-0.25.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f07f01d27375ad89d72aa2b3f2180f07da95dd9d2e4c758e015c0acb2da72977", size = 4425879, upload-time = "2026-01-21T16:27:12.579Z" }, +] + [[package]] name = "tqdm" version = "4.67.1" @@ -6726,6 +7385,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/73/ae/b48f95715333080afb75a4504487cbe142cae1268afc482d06692d605ae6/yarl-1.22.0-py3-none-any.whl", hash = "sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff", size = 46814, upload-time = "2025-10-06T14:12:53.872Z" }, ] +[[package]] +name = "youtube-transcript-api" +version = "1.2.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "defusedxml" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/60/43/4104185a2eaa839daa693b30e15c37e7e58795e8e09ec414f22b3db54bec/youtube_transcript_api-1.2.4.tar.gz", hash = "sha256:b72d0e96a335df599d67cee51d49e143cff4f45b84bcafc202ff51291603ddcd", size = 469839, upload-time = "2026-01-29T09:09:17.088Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/be/95/129ea37efd6cd6ed00f62baae6543345c677810b8a3bf0026756e1d3cf3c/youtube_transcript_api-1.2.4-py3-none-any.whl", hash = "sha256:03878759356da5caf5edac77431780b91448fb3d8c21d4496015bdc8a7bc43ff", size = 485227, upload-time = "2026-01-29T09:09:15.427Z" }, +] + +[[package]] +name = "yt-dlp" +version = "2026.2.21" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/d9/55ffff25204733e94a507552ad984d5a8a8e4f9d1f0d91763e6b1a41c79b/yt_dlp-2026.2.21.tar.gz", hash = "sha256:4407dfc1a71fec0dee5ef916a8d4b66057812939b509ae45451fa8fb4376b539", size = 3116630, upload-time = "2026-02-21T20:40:53.522Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/40/664c99ee36d80d84ce7a96cd98aebcb3d16c19e6c3ad3461d2cf5424040e/yt_dlp-2026.2.21-py3-none-any.whl", hash = "sha256:0d8408f5b6d20487f5caeb946dfd04f9bcd2f1a3a125b744a0a982b590e449f7", size = 3313392, upload-time = "2026-02-21T20:40:51.514Z" }, +] + [[package]] name = "zipp" version = "3.23.0"