feat: add EPUB input support (#310)
Adds EPUB as a first-class input source for skill generation. - EpubToSkillConverter (epub_scraper.py, ~1200 lines) following PDF scraper pattern - Dublin Core metadata, spine items, code blocks, tables, images extraction - DRM detection (Adobe ADEPT, Apple FairPlay, Readium LCP) with fail-fast - EPUB 3 NCX TOC bug workaround (ignore_ncx=True) - ebooklib as optional dep: pip install skill-seekers[epub] - Wired into create command with .epub auto-detection - 104 tests, all passing Review fixes: removed 3 empty test stubs, fixed SVG double-counting in _extract_images(), added logger.debug to bare except pass. Based on PR #310 by @christianbaumann. Co-authored-by: Christian Baumann <mail@chriss-baumann.de>
This commit is contained in:
@@ -114,6 +114,11 @@ docx = [
|
||||
"python-docx>=1.1.0",
|
||||
]
|
||||
|
||||
# EPUB (.epub) support
|
||||
epub = [
|
||||
"ebooklib>=0.18",
|
||||
]
|
||||
|
||||
# Video processing (lightweight: YouTube transcripts + metadata)
|
||||
video = [
|
||||
"yt-dlp>=2024.12.0",
|
||||
@@ -178,6 +183,7 @@ embedding = [
|
||||
all = [
|
||||
"mammoth>=1.6.0",
|
||||
"python-docx>=1.1.0",
|
||||
"ebooklib>=0.18",
|
||||
"yt-dlp>=2024.12.0",
|
||||
"youtube-transcript-api>=1.2.0",
|
||||
"mcp>=1.25,<2",
|
||||
@@ -222,6 +228,7 @@ skill-seekers-scrape = "skill_seekers.cli.doc_scraper:main"
|
||||
skill-seekers-github = "skill_seekers.cli.github_scraper:main"
|
||||
skill-seekers-pdf = "skill_seekers.cli.pdf_scraper:main"
|
||||
skill-seekers-word = "skill_seekers.cli.word_scraper:main"
|
||||
skill-seekers-epub = "skill_seekers.cli.epub_scraper:main"
|
||||
skill-seekers-video = "skill_seekers.cli.video_scraper:main"
|
||||
skill-seekers-unified = "skill_seekers.cli.unified_scraper:main"
|
||||
skill-seekers-enhance = "skill_seekers.cli.enhance_command:main"
|
||||
|
||||
Reference in New Issue
Block a user