feat: add EPUB input support (#310)

Adds EPUB as a first-class input source for skill generation.

- EpubToSkillConverter (epub_scraper.py, ~1200 lines) following PDF scraper pattern
- Dublin Core metadata, spine items, code blocks, tables, images extraction
- DRM detection (Adobe ADEPT, Apple FairPlay, Readium LCP) with fail-fast
- EPUB 3 NCX TOC bug workaround (ignore_ncx=True)
- ebooklib as optional dep: pip install skill-seekers[epub]
- Wired into create command with .epub auto-detection
- 104 tests, all passing

Review fixes: removed 3 empty test stubs, fixed SVG double-counting in
_extract_images(), added logger.debug to bare except pass.

Based on PR #310 by @christianbaumann.
Co-authored-by: Christian Baumann <mail@chriss-baumann.de>
This commit is contained in:
yusyus
2026-03-15 02:34:41 +03:00
committed by GitHub
parent 83b9a695ba
commit 2e30970dfb
16 changed files with 4502 additions and 9 deletions

View File

@@ -63,6 +63,9 @@ class SourceDetector:
if source.endswith(".docx"):
return cls._detect_word(source)
if source.endswith(".epub"):
return cls._detect_epub(source)
# Video file extensions
VIDEO_EXTENSIONS = (".mp4", ".mkv", ".avi", ".mov", ".webm", ".flv", ".wmv")
if source.lower().endswith(VIDEO_EXTENSIONS):
@@ -99,6 +102,7 @@ class SourceDetector:
" Local: skill-seekers create ./my-project\n"
" PDF: skill-seekers create tutorial.pdf\n"
" DOCX: skill-seekers create document.docx\n"
" EPUB: skill-seekers create ebook.epub\n"
" Video: skill-seekers create https://youtube.com/watch?v=...\n"
" Video: skill-seekers create recording.mp4\n"
" Config: skill-seekers create configs/react.json"
@@ -128,6 +132,14 @@ class SourceDetector:
type="word", parsed={"file_path": source}, suggested_name=name, raw_input=source
)
@classmethod
def _detect_epub(cls, source: str) -> SourceInfo:
"""Detect EPUB file source."""
name = os.path.splitext(os.path.basename(source))[0]
return SourceInfo(
type="epub", parsed={"file_path": source}, suggested_name=name, raw_input=source
)
@classmethod
def _detect_video_file(cls, source: str) -> SourceInfo:
"""Detect local video file source."""
@@ -277,6 +289,13 @@ class SourceDetector:
if not os.path.isfile(file_path):
raise ValueError(f"Path is not a file: {file_path}")
elif source_info.type == "epub":
file_path = source_info.parsed["file_path"]
if not os.path.exists(file_path):
raise ValueError(f"EPUB file does not exist: {file_path}")
if not os.path.isfile(file_path):
raise ValueError(f"Path is not a file: {file_path}")
elif source_info.type == "video":
if source_info.parsed.get("source_kind") == "file":
file_path = source_info.parsed["file_path"]