feat: add EPUB input support (#310)
Adds EPUB as a first-class input source for skill generation. - EpubToSkillConverter (epub_scraper.py, ~1200 lines) following PDF scraper pattern - Dublin Core metadata, spine items, code blocks, tables, images extraction - DRM detection (Adobe ADEPT, Apple FairPlay, Readium LCP) with fail-fast - EPUB 3 NCX TOC bug workaround (ignore_ncx=True) - ebooklib as optional dep: pip install skill-seekers[epub] - Wired into create command with .epub auto-detection - 104 tests, all passing Review fixes: removed 3 empty test stubs, fixed SVG double-counting in _extract_images(), added logger.debug to bare except pass. Based on PR #310 by @christianbaumann. Co-authored-by: Christian Baumann <mail@chriss-baumann.de>
This commit is contained in:
@@ -63,6 +63,9 @@ class SourceDetector:
|
||||
if source.endswith(".docx"):
|
||||
return cls._detect_word(source)
|
||||
|
||||
if source.endswith(".epub"):
|
||||
return cls._detect_epub(source)
|
||||
|
||||
# Video file extensions
|
||||
VIDEO_EXTENSIONS = (".mp4", ".mkv", ".avi", ".mov", ".webm", ".flv", ".wmv")
|
||||
if source.lower().endswith(VIDEO_EXTENSIONS):
|
||||
@@ -99,6 +102,7 @@ class SourceDetector:
|
||||
" Local: skill-seekers create ./my-project\n"
|
||||
" PDF: skill-seekers create tutorial.pdf\n"
|
||||
" DOCX: skill-seekers create document.docx\n"
|
||||
" EPUB: skill-seekers create ebook.epub\n"
|
||||
" Video: skill-seekers create https://youtube.com/watch?v=...\n"
|
||||
" Video: skill-seekers create recording.mp4\n"
|
||||
" Config: skill-seekers create configs/react.json"
|
||||
@@ -128,6 +132,14 @@ class SourceDetector:
|
||||
type="word", parsed={"file_path": source}, suggested_name=name, raw_input=source
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _detect_epub(cls, source: str) -> SourceInfo:
|
||||
"""Detect EPUB file source."""
|
||||
name = os.path.splitext(os.path.basename(source))[0]
|
||||
return SourceInfo(
|
||||
type="epub", parsed={"file_path": source}, suggested_name=name, raw_input=source
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _detect_video_file(cls, source: str) -> SourceInfo:
|
||||
"""Detect local video file source."""
|
||||
@@ -277,6 +289,13 @@ class SourceDetector:
|
||||
if not os.path.isfile(file_path):
|
||||
raise ValueError(f"Path is not a file: {file_path}")
|
||||
|
||||
elif source_info.type == "epub":
|
||||
file_path = source_info.parsed["file_path"]
|
||||
if not os.path.exists(file_path):
|
||||
raise ValueError(f"EPUB file does not exist: {file_path}")
|
||||
if not os.path.isfile(file_path):
|
||||
raise ValueError(f"Path is not a file: {file_path}")
|
||||
|
||||
elif source_info.type == "video":
|
||||
if source_info.parsed.get("source_kind") == "file":
|
||||
file_path = source_info.parsed["file_path"]
|
||||
|
||||
Reference in New Issue
Block a user