From 2e30970dfbfeb4e06b2e3139f7ff1855c8a8ea0d Mon Sep 17 00:00:00 2001
From: yusyus <yusufkaraaslan.yk@pm.me>
Date: Sun, 15 Mar 2026 02:34:41 +0300
Subject: [PATCH] feat: add EPUB input support (#310)

Adds EPUB as a first-class input source for skill generation.

- EpubToSkillConverter (epub_scraper.py, ~1200 lines) following PDF scraper pattern
- Dublin Core metadata, spine items, code blocks, tables, images extraction
- DRM detection (Adobe ADEPT, Apple FairPlay, Readium LCP) with fail-fast
- EPUB 3 NCX TOC bug workaround (ignore_ncx=True)
- ebooklib as optional dep: pip install skill-seekers[epub]
- Wired into create command with .epub auto-detection
- 104 tests, all passing

Review fixes: removed 3 empty test stubs, fixed SVG double-counting in
_extract_images(), added logger.debug to bare except pass.

Based on PR #310 by @christianbaumann.
Co-authored-by: Christian Baumann <mail@chriss-baumann.de>
---
 CHANGELOG.md                                  |   12 +
 CLAUDE.md                                     |   12 +-
 .../plans/2026-03-14-epub-input-support.md    | 1160 ++++++++++++
 ...03-14-epub-input-support-affected-files.md |  271 +++
 pyproject.toml                                |    7 +
 src/skill_seekers/cli/arguments/create.py     |   18 +
 src/skill_seekers/cli/arguments/epub.py       |   66 +
 src/skill_seekers/cli/create_command.py       |   40 +
 src/skill_seekers/cli/epub_scraper.py         | 1206 ++++++++++++
 src/skill_seekers/cli/main.py                 |    2 +
 src/skill_seekers/cli/parsers/__init__.py     |    2 +
 src/skill_seekers/cli/parsers/epub_parser.py  |   32 +
 src/skill_seekers/cli/source_detector.py      |   19 +
 tests/test_cli_parsers.py                     |   10 +-
 tests/test_epub_scraper.py                    | 1626 +++++++++++++++++
 uv.lock                                       |   28 +-
 16 files changed, 4502 insertions(+), 9 deletions(-)
 create mode 100644 docs/agents/plans/2026-03-14-epub-input-support.md
 create mode 100644 docs/agents/research/2026-03-14-epub-input-support-affected-files.md
 create mode 100644 src/skill_seekers/cli/arguments/epub.py
 create mode 100644 src/skill_seekers/cli/epub_scraper.py
 create mode 100644 src/skill_seekers/cli/parsers/epub_parser.py
 create mode 100644 tests/test_epub_scraper.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 12818d0..220d09e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,18 @@ All notable changes to Skill Seeker will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [Unreleased]
+
+### Added
+- **EPUB (.epub) input support** via `skill-seekers create book.epub` or `skill-seekers epub --epub book.epub`
+  - Extracts chapters, metadata (Dublin Core), code blocks, images, and tables from EPUB 2 and EPUB 3 files
+  - DRM detection with clear error messages (Adobe ADEPT, Apple FairPlay, Readium LCP)
+  - Font obfuscation correctly identified as non-DRM
+  - EPUB 3 TOC bug workaround (`ignore_ncx` option)
+  - `--help-epub` flag for EPUB-specific help
+  - Optional dependency: `pip install "skill-seekers[epub]"` (ebooklib)
+  - 107 tests across 14 test classes
+
 ## [3.2.0] - 2026-03-01
 
 **Theme:** Video source support, Word document support, Pinecone adaptor, and quality improvements. 94 files changed, +23,500 lines since v3.1.3. **2,540 tests passing.**
diff --git a/CLAUDE.md b/CLAUDE.md
index 83edfda..3615cf0 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 
 ## 🎯 Project Overview
 
-**Skill Seekers** is the **universal documentation preprocessor** for AI systems. It transforms documentation websites, GitHub repositories, and PDFs into production-ready formats for **16+ platforms**: RAG pipelines (LangChain, LlamaIndex, Haystack), vector databases (Pinecone, Chroma, Weaviate, FAISS, Qdrant), AI coding assistants (Cursor, Windsurf, Cline, Continue.dev), and LLM platforms (Claude, Gemini, OpenAI).
+**Skill Seekers** is the **universal documentation preprocessor** for AI systems. It transforms documentation websites, GitHub repositories, PDFs, and EPUBs into production-ready formats for **16+ platforms**: RAG pipelines (LangChain, LlamaIndex, Haystack), vector databases (Pinecone, Chroma, Weaviate, FAISS, Qdrant), AI coding assistants (Cursor, Windsurf, Cline, Continue.dev), and LLM platforms (Claude, Gemini, OpenAI).
 
 **Current Version:** v3.1.3
 **Python Version:** 3.10+ required
@@ -222,6 +222,7 @@ src/skill_seekers/
 │   ├── dependency_analyzer.py        # Dependency graph analysis
 │   ├── signal_flow_analyzer.py       # C3.10 Signal flow analysis (Godot)
 │   ├── pdf_scraper.py                # PDF extraction
+│   ├── epub_scraper.py               # EPUB extraction
 │   └── adaptors/                     # ⭐ Platform adaptor pattern
 │       ├── __init__.py               # Factory: get_adaptor()
 │       ├── base_adaptor.py           # Abstract base
@@ -397,7 +398,7 @@ The unified CLI modifies `sys.argv` and calls existing `main()` functions to mai
 # Transforms to: doc_scraper.main() with modified sys.argv
 ```
 
-**Subcommands:** create, scrape, github, pdf, unified, codebase, enhance, enhance-status, package, upload, estimate, install, install-agent, patterns, how-to-guides
+**Subcommands:** create, scrape, github, pdf, epub, unified, codebase, enhance, enhance-status, package, upload, estimate, install, install-agent, patterns, how-to-guides
 
 ### NEW: Unified `create` Command
 
@@ -409,6 +410,7 @@ skill-seekers create https://docs.react.dev/         # → Web scraping
 skill-seekers create facebook/react                  # → GitHub analysis
 skill-seekers create ./my-project                    # → Local codebase
 skill-seekers create tutorial.pdf                    # → PDF extraction
+skill-seekers create book.epub                       # → EPUB extraction
 skill-seekers create configs/react.json              # → Multi-source
 
 # Progressive help system
@@ -417,6 +419,7 @@ skill-seekers create --help-web       # Shows web-specific options
 skill-seekers create --help-github    # Shows GitHub-specific options
 skill-seekers create --help-local     # Shows local analysis options
 skill-seekers create --help-pdf       # Shows PDF extraction options
+skill-seekers create --help-epub      # Shows EPUB extraction options
 skill-seekers create --help-advanced  # Shows advanced/rare options
 skill-seekers create --help-all       # Shows all 120+ flags
 
@@ -685,6 +688,7 @@ pytest tests/ -v -m ""
 - `test_unified.py` - Multi-source scraping
 - `test_github_scraper.py` - GitHub analysis
 - `test_pdf_scraper.py` - PDF extraction
+- `test_epub_scraper.py` - EPUB extraction
 - `test_install_multiplatform.py` - Multi-platform packaging
 - `test_integration.py` - End-to-end workflows
 - `test_install_skill.py` - One-command install
@@ -741,6 +745,7 @@ skill-seekers-resume = "skill_seekers.cli.resume_command:main"                #
 skill-seekers-scrape = "skill_seekers.cli.doc_scraper:main"
 skill-seekers-github = "skill_seekers.cli.github_scraper:main"
 skill-seekers-pdf = "skill_seekers.cli.pdf_scraper:main"
+skill-seekers-epub = "skill_seekers.cli.epub_scraper:main"
 skill-seekers-unified = "skill_seekers.cli.unified_scraper:main"
 skill-seekers-codebase = "skill_seekers.cli.codebase_scraper:main"           # C2.x Local codebase analysis
 skill-seekers-enhance = "skill_seekers.cli.enhance_skill_local:main"
@@ -1754,6 +1759,7 @@ This section helps you quickly locate the right files when implementing common c
 | GitHub scraping | `src/skill_seekers/cli/github_scraper.py` | ~56KB | Repo analysis + metadata |
 | GitHub API | `src/skill_seekers/cli/github_fetcher.py` | ~17KB | Rate limit handling |
 | PDF extraction | `src/skill_seekers/cli/pdf_scraper.py` | Medium | PyMuPDF + OCR |
+| EPUB extraction | `src/skill_seekers/cli/epub_scraper.py` | Medium | ebooklib + BeautifulSoup |
 | Code analysis | `src/skill_seekers/cli/code_analyzer.py` | ~65KB | Multi-language AST parsing |
 | Pattern detection | `src/skill_seekers/cli/pattern_recognizer.py` | Medium | C3.1 - 10 GoF patterns |
 | Test extraction | `src/skill_seekers/cli/test_example_extractor.py` | Medium | C3.2 - 5 categories |
@@ -1777,7 +1783,7 @@ This section helps you quickly locate the right files when implementing common c
 2. **Arguments:** `src/skill_seekers/cli/arguments/create.py`
    - Three tiers of arguments:
      - `UNIVERSAL_ARGUMENTS` (13 flags) - Work for all sources
-     - Source-specific dicts (`WEB_ARGUMENTS`, `GITHUB_ARGUMENTS`, etc.)
+     - Source-specific dicts (`WEB_ARGUMENTS`, `GITHUB_ARGUMENTS`, `EPUB_ARGUMENTS`, etc.)
      - `ADVANCED_ARGUMENTS` - Rare/advanced options
    - `add_create_arguments(parser, mode)` - Multi-mode argument addition
 
diff --git a/docs/agents/plans/2026-03-14-epub-input-support.md b/docs/agents/plans/2026-03-14-epub-input-support.md
new file mode 100644
index 0000000..2e20478
--- /dev/null
+++ b/docs/agents/plans/2026-03-14-epub-input-support.md
@@ -0,0 +1,1160 @@
+---
+date: 2026-03-14T19:30:35.172407+00:00
+git_commit: 7c90a4b9c9bccac8341b0769550d77aae3b4e524
+branch: development
+topic: "Add EPUB Input Support"
+tags: [plan, epub, scraper, input-format]
+status: complete
+---
+
+# Add EPUB Input Support — Implementation Plan
+
+## Overview
+
+Add `.epub` as an input format for Skill Seekers, enabling `skill-seekers create book.epub` and `skill-seekers epub --epub book.epub`. Follows the established Word/PDF scraper pattern: source detection → routing → extraction → categorize → build skill.
+
+**Authoritative reference**: [W3C EPUB 3.3 Specification](https://www.w3.org/TR/epub-33/) (also covers EPUB 2 backward compatibility).
+
+## Current State Analysis
+
+The codebase has a consistent multi-layer architecture for document input formats. PDF and Word (.docx) serve as direct analogs. The Word scraper (`word_scraper.py`) is the closest pattern match since both Word and EPUB produce HTML/XHTML that is parsed with BeautifulSoup.
+
+### Key Discoveries:
+- Word scraper converts `.docx` → HTML (via mammoth) → BeautifulSoup parse → intermediate JSON → SKILL.md (`word_scraper.py:96-235`)
+- EPUB files contain XHTML natively (per W3C spec §5), so the mammoth conversion step is unnecessary — BeautifulSoup can parse EPUB XHTML content directly
+- Source detection uses file extension matching (`source_detector.py:57-65`)
+- Optional dependencies use a guard pattern with `try/except ImportError` and a `_check_*_deps()` function (`word_scraper.py:21-40`)
+- The `ebooklib` library (v0.18+) provides `epub.read_epub()` returning an `EpubBook` with spine iteration, metadata access via `get_metadata('DC', key)`, and item content via `get_content()`/`get_body_content()`
+- ebooklib has a known bug: EPUB 3 files read TOC from NCX instead of NAV (issue #200); workaround: `options={"ignore_ncx": True}`
+- ebooklib loads entire EPUB into memory — acceptable for typical books but relevant for edge cases
+
+## Desired End State
+
+Running `skill-seekers create book.epub` produces:
+```
+output/book/
+├── SKILL.md              # Main skill file with metadata, concepts, code examples
+├── references/
+│   ├── index.md          # Category index with statistics
+│   └── book.md           # Chapter content (or multiple files if categorized)
+├── scripts/
+└── assets/
+    └── *.png|*.jpg       # Extracted images
+```
+
+### CLI Output Mockup
+
+```
+$ skill-seekers create programming-rust.epub
+
+ℹ️  Detected source type: epub
+ℹ️  Routing to epub scraper...
+
+🔍 Extracting from EPUB: programming-rust.epub
+   Title: Programming Rust, 2nd Edition
+   Author: Jim Blandy, Jason Orendorff
+   Language: en
+   Chapters: 23 (spine items)
+
+📄 Processing chapters...
+   Chapter 1/23: Why Rust? (2 sections, 1 code block)
+   Chapter 2/23: A Tour of Rust (5 sections, 12 code blocks)
+   ...
+   Chapter 23/23: Macros (4 sections, 8 code blocks)
+
+📊 Extraction complete:
+   Sections: 142
+   Code blocks: 287 (Rust: 245, Shell: 28, TOML: 14)
+   Images: 34
+   Tables: 12
+
+💾 Saved extracted data to: output/programming-rust_extracted.json
+
+📋 Categorizing content...
+✅ Created 1 category (single EPUB source)
+   - programming-rust: 142 sections
+
+📝 Generating reference files...
+   Generated: output/programming-rust/references/programming-rust.md
+   Generated: output/programming-rust/references/index.md
+
+✅ Skill built successfully: output/programming-rust/
+
+📦 Next step: Package with: skill-seekers package output/programming-rust/
+```
+
+### Verification:
+- [x] `skill-seekers create book.epub` produces valid output directory
+- [x] `skill-seekers epub --epub book.epub --name mybook` works standalone
+- [x] `skill-seekers create book.epub --dry-run` shows config without processing
+- [x] All ~2,540+ existing tests still pass (982 passed, 1 pre-existing failure)
+- [x] New test suite has 100+ tests covering happy path, errors, and edge cases (107 tests, 14 classes)
+
+## What We're NOT Doing
+
+- DRM decryption (detect and error gracefully with clear message)
+- EPUB writing/creation (read-only)
+- Media overlay / audio / video extraction (ignore gracefully)
+- Fixed-layout OCR (detect and warn; extract whatever text exists in XHTML)
+- `--chapter-range` flag (can be added later)
+- Unified scraper (`unified_scraper.py`) EPUB support (separate future task)
+- MCP tool for EPUB (separate future task)
+
+## Implementation Approach
+
+Follow the Word scraper pattern exactly, with EPUB-specific extraction logic:
+
+1. **Phase 1**: Core `epub_scraper.py` — the `EpubToSkillConverter` class
+2. **Phase 2**: CLI integration — source detection, arguments, parser, routing, entry points
+3. **Phase 3**: Comprehensive test suite — 100+ tests across 11 test classes
+4. **Phase 4**: Documentation updates
+
+---
+
+## Phase 1: Core EPUB Scraper
+
+### Overview
+Create `epub_scraper.py` with `EpubToSkillConverter` class following the Word scraper pattern. This is the bulk of new code.
+
+### Changes Required:
+
+#### [x] 1. Optional dependency in pyproject.toml
+**File**: `pyproject.toml`
+**Changes**: Add `epub` optional dependency group and include in `all` group
+
+```toml
+# After the docx group (~line 115)
+# EPUB (.epub) support
+epub = [
+    "ebooklib>=0.18",
+]
+```
+
+Add `"ebooklib>=0.18",` to the `all` group (~line 178).
+
+#### [x] 2. Create `src/skill_seekers/cli/epub_scraper.py`
+**File**: `src/skill_seekers/cli/epub_scraper.py` (new)
+**Changes**: Full EPUB scraper module
+
+**Structure** (following `word_scraper.py` pattern):
+
+```python
+"""
+EPUB Documentation to Skill Converter
+
+Converts EPUB e-books into skills.
+Uses ebooklib for EPUB parsing, BeautifulSoup for XHTML content extraction.
+
+Usage:
+    skill-seekers epub --epub book.epub --name myskill
+    skill-seekers epub --from-json book_extracted.json
+"""
+
+import argparse
+import json
+import logging
+import os
+import re
+import sys
+from pathlib import Path
+
+# Optional dependency guard
+try:
+    import ebooklib
+    from ebooklib import epub
+    EPUB_AVAILABLE = True
+except ImportError:
+    EPUB_AVAILABLE = False
+
+# BeautifulSoup is a core dependency (always available)
+from bs4 import BeautifulSoup, Comment
+
+logger = logging.getLogger(__name__)
+
+
+def _check_epub_deps():
+    """Raise RuntimeError if ebooklib is not installed."""
+    if not EPUB_AVAILABLE:
+        raise RuntimeError(
+            "ebooklib is required for EPUB support.\n"
+            'Install with: pip install "skill-seekers[epub]"\n'
+            "Or: pip install ebooklib"
+        )
+
+
+def infer_description_from_epub(metadata: dict | None = None, name: str = "") -> str:
+    """Infer skill description from EPUB metadata."""
+    if metadata:
+        if metadata.get("description") and len(metadata["description"]) > 20:
+            desc = metadata["description"].strip()
+            if len(desc) > 150:
+                desc = desc[:147] + "..."
+            return f"Use when {desc.lower()}"
+        if metadata.get("title") and len(metadata["title"]) > 10:
+            return f"Use when working with {metadata['title'].lower()}"
+    return (
+        f"Use when referencing {name} documentation"
+        if name
+        else "Use when referencing this documentation"
+    )
+```
+
+**`EpubToSkillConverter` class methods:**
+
+```python
+class EpubToSkillConverter:
+    def __init__(self, config: dict):
+        self.config = config
+        self.name = config["name"]
+        self.epub_path = config.get("epub_path", "")
+        self.description = config.get(
+            "description", f"Use when referencing {self.name} documentation"
+        )
+        self.skill_dir = f"output/{self.name}"
+        self.data_file = f"output/{self.name}_extracted.json"
+        self.categories = config.get("categories", {})
+        self.extracted_data = None
+
+    def extract_epub(self) -> bool:
+        """Extract content from EPUB file.
+
+        Workflow:
+        1. Check dependencies (ebooklib)
+        2. Detect DRM via META-INF/encryption.xml (fail fast)
+        3. Read EPUB via ebooklib with ignore_ncx=True (EPUB 3 TOC bug workaround)
+        4. Extract Dublin Core metadata (title, creator, language, publisher, date, description, subject)
+        5. Iterate spine items in reading order
+        6. For each ITEM_DOCUMENT: parse XHTML with BeautifulSoup
+        7. Split by h1/h2 heading boundaries into sections
+        8. Extract code blocks from <pre>/<code> elements
+        9. Extract images from EpubImage items
+        10. Detect code languages via LanguageDetector
+        11. Save intermediate JSON to {name}_extracted.json
+
+        Returns True on success.
+        Raises RuntimeError for DRM-protected files.
+        Raises FileNotFoundError for missing files.
+        Raises ValueError for invalid EPUB files.
+        """
+```
+
+**DRM detection** (per W3C spec §4.2.6.3.2):
+
+```python
+def _detect_drm(self, book) -> bool:
+    """Detect DRM by checking for encryption.xml with non-font-obfuscation entries.
+
+    Per W3C EPUB 3.3 spec: encryption.xml is present when resources are encrypted.
+    Font obfuscation (IDPF algorithm http://www.idpf.org/2008/embedding or
+    Adobe algorithm http://ns.adobe.com/pdf/enc#RC) is NOT DRM — only font mangling.
+
+    Actual DRM uses algorithms like:
+    - Adobe ADEPT: http://ns.adobe.com/adept namespace
+    - Apple FairPlay: http://itunes.apple.com/dataenc
+    - Readium LCP: http://readium.org/2014/01/lcp
+    """
+```
+
+**Metadata extraction** (per W3C spec §5.2, Dublin Core):
+
+```python
+def _extract_metadata(self, book) -> dict:
+    """Extract Dublin Core metadata from EPUB.
+
+    Per W3C EPUB 3.3 spec: required elements are dc:identifier, dc:title, dc:language.
+    Optional: dc:creator, dc:contributor, dc:date, dc:description, dc:publisher,
+    dc:subject, dc:rights, dc:type, dc:coverage, dc:source, dc:relation, dc:format.
+
+    ebooklib API: book.get_metadata('DC', key) returns list of (value, attrs) tuples.
+    """
+    def _get_one(key):
+        data = book.get_metadata('DC', key)
+        return data[0][0] if data else None
+
+    def _get_list(key):
+        data = book.get_metadata('DC', key)
+        return [x[0] for x in data] if data else []
+
+    return {
+        "title": _get_one('title') or "Untitled",
+        "author": ", ".join(_get_list('creator')) or None,
+        "language": _get_one('language') or "en",
+        "publisher": _get_one('publisher'),
+        "date": _get_one('date'),
+        "description": _get_one('description'),
+        "subject": ", ".join(_get_list('subject')) or None,
+        "rights": _get_one('rights'),
+        "identifier": _get_one('identifier'),
+    }
+```
+
+**Content extraction** (per W3C spec §5 — XHTML Content Documents use XML serialization of HTML5):
+
+```python
+def _extract_spine_content(self, book) -> list[dict]:
+    """Extract content from spine items in reading order.
+
+    Per W3C EPUB 3.3 spec §3.4.8: spine defines ordered list of content documents.
+    Linear="yes" (default) items form the primary reading order.
+    Linear="no" items are auxiliary (footnotes, glossary).
+
+    Per spec §5: XHTML content documents use XML syntax of HTML5.
+    Parse with BeautifulSoup, split by h1/h2 heading boundaries.
+    """
+    sections = []
+    section_number = 0
+
+    for item_id, linear in book.spine:
+        item = book.get_item_with_id(item_id)
+        if not item or item.get_type() != ebooklib.ITEM_DOCUMENT:
+            continue
+
+        soup = BeautifulSoup(item.get_content(), 'html.parser')
+
+        # Remove scripts, styles, comments (not useful for text extraction)
+        for tag in soup(['script', 'style']):
+            tag.decompose()
+        for comment in soup.find_all(string=lambda t: isinstance(t, Comment)):
+            comment.extract()
+
+        body = soup.find('body')
+        if not body:
+            continue
+
+        # Split by h1/h2 heading boundaries (same as word_scraper)
+        # Each heading starts a new section
+        ...
+```
+
+**Image extraction** (per W3C spec §3.3 — core media types include JPEG, PNG, GIF, WebP, SVG):
+
+```python
+def _extract_images(self, book) -> list[dict]:
+    """Extract images from EPUB manifest.
+
+    Per W3C EPUB 3.3 spec §3.3: core image media types are
+    image/gif, image/jpeg, image/png, image/svg+xml, image/webp.
+
+    ebooklib API: book.get_items_of_type(ebooklib.ITEM_IMAGE)
+    returns EpubImage items with get_content() (bytes) and media_type.
+
+    SVG images (ITEM_VECTOR) handled separately.
+    """
+```
+
+**The remaining methods** (`categorize_content`, `build_skill`, `_generate_reference_file`, `_generate_index`, `_generate_skill_md`, `_format_key_concepts`, `_format_patterns_from_content`, `_sanitize_filename`) follow the Word scraper pattern exactly — they operate on the same intermediate JSON structure.
+
+**`main()` function** (following `word_scraper.py:923-1059`):
+
+```python
+def main():
+    from .arguments.epub import add_epub_arguments
+
+    parser = argparse.ArgumentParser(
+        description="Convert EPUB e-book to skill",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    add_epub_arguments(parser)
+    args = parser.parse_args()
+
+    # Logging setup
+    if getattr(args, "quiet", False):
+        logging.getLogger().setLevel(logging.WARNING)
+    elif getattr(args, "verbose", False):
+        logging.getLogger().setLevel(logging.DEBUG)
+
+    # Dry run
+    if getattr(args, "dry_run", False):
+        source = args.epub or args.from_json or "(none)"
+        print(f"\n{'=' * 60}")
+        print("DRY RUN: EPUB Extraction")
+        print(f"{'=' * 60}")
+        print(f"Source:         {source}")
+        print(f"Name:           {getattr(args, 'name', None) or '(auto-detect)'}")
+        print(f"Enhance level:  {getattr(args, 'enhance_level', 0)}")
+        print(f"\n✅ Dry run complete")
+        return
+
+    # Validate inputs
+    if not (args.epub or args.from_json):
+        parser.error("Must specify --epub or --from-json")
+
+    # From-JSON workflow
+    if args.from_json:
+        name = Path(args.from_json).stem.replace("_extracted", "")
+        config = {
+            "name": name,
+            "description": args.description or f"Use when referencing {name} documentation",
+        }
+        converter = EpubToSkillConverter(config)
+        converter.load_extracted_data(args.from_json)
+        converter.build_skill()
+        return
+
+    # Direct EPUB workflow
+    name = args.name or Path(args.epub).stem
+    config = {
+        "name": name,
+        "epub_path": args.epub,
+        "description": args.description or f"Use when referencing {name} documentation",
+    }
+
+    try:
+        converter = EpubToSkillConverter(config)
+        if not converter.extract_epub():
+            print("\n❌ EPUB extraction failed", file=sys.stderr)
+            sys.exit(1)
+        converter.build_skill()
+
+        # Enhancement workflow integration
+        from skill_seekers.cli.workflow_runner import run_workflows
+        run_workflows(args)
+
+        # Traditional enhancement
+        if getattr(args, "enhance_level", 0) > 0:
+            # Same pattern as word_scraper.py and pdf_scraper.py
+            ...
+
+    except RuntimeError as e:
+        print(f"\n❌ Error: {e}", file=sys.stderr)
+        sys.exit(1)
+```
+
+### Success Criteria:
+
+#### Automated Verification:
+- [x] `ruff check src/skill_seekers/cli/epub_scraper.py` passes
+- [x] `ruff format --check src/skill_seekers/cli/epub_scraper.py` passes
+- [ ] `mypy src/skill_seekers/cli/epub_scraper.py` passes (continue-on-error)
+- [x] `pip install -e ".[epub]"` installs successfully
+
+#### Manual Verification:
+- [x] Verify `import ebooklib` works after install
+- [x] Review epub_scraper.py structure matches word_scraper.py pattern
+
+**Implementation Note**: After completing this phase and all automated verification passes, pause here for manual confirmation from the human before proceeding to the next phase.
+
+---
+
+## Phase 2: CLI Integration
+
+### Overview
+Wire the EPUB scraper into the CLI: source detection, argument definitions, parser registration, create command routing, and entry points.
+
+### Changes Required:
+
+#### [x] 1. Source detection
+**File**: `src/skill_seekers/cli/source_detector.py`
+**Changes**: Add `.epub` extension detection, `_detect_epub()` method, validation, and error message update
+
+Add after the `.docx` check (line 64):
+```python
+if source.endswith(".epub"):
+    return cls._detect_epub(source)
+```
+
+Add `_detect_epub()` method (following `_detect_word()` at line 124):
+```python
+@classmethod
+def _detect_epub(cls, source: str) -> SourceInfo:
+    """Detect EPUB file source."""
+    name = os.path.splitext(os.path.basename(source))[0]
+    return SourceInfo(
+        type="epub", parsed={"file_path": source}, suggested_name=name, raw_input=source
+    )
+```
+
+Add epub validation in `validate_source()` (after word block at line 278):
+```python
+elif source_info.type == "epub":
+    file_path = source_info.parsed["file_path"]
+    if not os.path.exists(file_path):
+        raise ValueError(f"EPUB file does not exist: {file_path}")
+    if not os.path.isfile(file_path):
+        raise ValueError(f"Path is not a file: {file_path}")
+```
+
+Add EPUB example to the ValueError message (line 94):
+```python
+"  EPUB:  skill-seekers create ebook.epub\n"
+```
+
+#### [x] 2. Argument definitions
+**File**: `src/skill_seekers/cli/arguments/epub.py` (new)
+**Changes**: EPUB-specific argument definitions
+
+```python
+"""EPUB-specific CLI arguments."""
+
+import argparse
+from typing import Any
+
+from .common import add_all_standard_arguments
+
+EPUB_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "epub": {
+        "flags": ("--epub",),
+        "kwargs": {
+            "type": str,
+            "help": "Direct EPUB file path",
+            "metavar": "PATH",
+        },
+    },
+    "from_json": {
+        "flags": ("--from-json",),
+        "kwargs": {
+            "type": str,
+            "help": "Build skill from extracted JSON",
+            "metavar": "FILE",
+        },
+    },
+}
+
+
+def add_epub_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add EPUB-specific arguments to parser."""
+    add_all_standard_arguments(parser)
+
+    # Override enhance-level default to 0 for EPUB
+    for action in parser._actions:
+        if hasattr(action, "dest") and action.dest == "enhance_level":
+            action.default = 0
+            action.help = (
+                "AI enhancement level (auto-detects API vs LOCAL mode): "
+                "0=disabled (default for EPUB), 1=SKILL.md only, "
+                "2=+architecture/config, 3=full enhancement. "
+                "Mode selection: uses API if ANTHROPIC_API_KEY is set, "
+                "otherwise LOCAL (Claude Code)"
+            )
+
+    for arg_name, arg_def in EPUB_ARGUMENTS.items():
+        parser.add_argument(*arg_def["flags"], **arg_def["kwargs"])
+```
+
+#### [x] 3. Create command argument integration
+**File**: `src/skill_seekers/cli/arguments/create.py`
+**Changes**: Add EPUB_ARGUMENTS dict, register in helper functions, add mode handling
+
+Add after WORD_ARGUMENTS (~line 411):
+```python
+# EPUB specific (from epub.py)
+EPUB_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "epub": {
+        "flags": ("--epub",),
+        "kwargs": {
+            "type": str,
+            "help": "EPUB file path",
+            "metavar": "PATH",
+        },
+    },
+}
+```
+
+Add to `get_source_specific_arguments()` (line 595):
+```python
+"epub": EPUB_ARGUMENTS,
+```
+
+Add to `add_create_arguments()` (after word block at line 678):
+```python
+if mode in ["epub", "all"]:
+    for arg_name, arg_def in EPUB_ARGUMENTS.items():
+        parser.add_argument(*arg_def["flags"], **arg_def["kwargs"])
+```
+
+#### [x] 4. Parser class
+**File**: `src/skill_seekers/cli/parsers/epub_parser.py` (new)
+**Changes**: Subcommand parser for standalone `skill-seekers epub` command
+
+```python
+"""Parser for epub subcommand."""
+
+from .base import SubcommandParser
+from skill_seekers.cli.arguments.epub import add_epub_arguments
+
+
+class EpubParser(SubcommandParser):
+    """Parser for EPUB extraction command."""
+
+    @property
+    def name(self) -> str:
+        return "epub"
+
+    @property
+    def help(self) -> str:
+        return "Extract from EPUB e-book (.epub)"
+
+    @property
+    def description(self) -> str:
+        return "Extract content from EPUB e-book (.epub) and generate skill"
+
+    def add_arguments(self, parser):
+        add_epub_arguments(parser)
+```
+
+#### [x] 5. Parser registration
+**File**: `src/skill_seekers/cli/parsers/__init__.py`
+**Changes**: Import and register EpubParser
+
+Add import (after WordParser import, line 15):
+```python
+from .epub_parser import EpubParser
+```
+
+Add to PARSERS list (after `WordParser()`, line 46):
+```python
+EpubParser(),
+```
+
+#### [x] 6. CLI dispatcher
+**File**: `src/skill_seekers/cli/main.py`
+**Changes**: Add epub to COMMAND_MODULES dict and module docstring
+
+Add to COMMAND_MODULES (after "word" entry, line 52):
+```python
+"epub": "skill_seekers.cli.epub_scraper",
+```
+
+Add to module docstring (after "word" line, line 15):
+```python
+#    epub                 Extract from EPUB e-book (.epub)
+```
+
+#### [x] 7. Create command routing
+**File**: `src/skill_seekers/cli/create_command.py`
+**Changes**: Add `_route_epub()` method, routing case, help flag, and epilog example
+
+Add to `_route_to_scraper()` (after word case, line 136):
+```python
+elif self.source_info.type == "epub":
+    return self._route_epub()
+```
+
+Add `_route_epub()` method (after `_route_word()`, line 352):
+```python
+def _route_epub(self) -> int:
+    """Route to EPUB scraper (epub_scraper.py)."""
+    from skill_seekers.cli import epub_scraper
+
+    argv = ["epub_scraper"]
+    file_path = self.source_info.parsed["file_path"]
+    argv.extend(["--epub", file_path])
+    self._add_common_args(argv)
+
+    logger.debug(f"Calling epub_scraper with argv: {argv}")
+    original_argv = sys.argv
+    try:
+        sys.argv = argv
+        return epub_scraper.main()
+    finally:
+        sys.argv = original_argv
+```
+
+Add to epilog (line 543, after DOCX example):
+```python
+  EPUB:     skill-seekers create ebook.epub
+```
+
+Add to Source Auto-Detection section:
+```python
+  • file.epub → EPUB extraction
+```
+
+Add `--help-epub` flag and handler (after `--help-word` at line 592):
+```python
+parser.add_argument(
+    "--help-epub", action="store_true", help=argparse.SUPPRESS, dest="_help_epub"
+)
+```
+
+Add handler block (after `_help_word` block at line 654):
+```python
+elif args._help_epub:
+    parser_epub = argparse.ArgumentParser(
+        prog="skill-seekers create",
+        description="Create skill from EPUB e-book (.epub)",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    add_create_arguments(parser_epub, mode="epub")
+    parser_epub.print_help()
+    return 0
+```
+
+#### [x] 8. Entry point
+**File**: `pyproject.toml`
+**Changes**: Add standalone entry point
+
+Add after `skill-seekers-word` (line 224):
+```toml
+skill-seekers-epub = "skill_seekers.cli.epub_scraper:main"
+```
+
+#### [x] 9. Positional argument handling in main.py
+**File**: `src/skill_seekers/cli/main.py`
+**Changes**: Add "input_file" is already in the positional list at line 153, so no change needed. Verify `_reconstruct_argv` handles epub correctly through the standard delegation path.
+
+### Success Criteria:
+
+#### Automated Verification:
+- [x] `ruff check src/skill_seekers/cli/source_detector.py src/skill_seekers/cli/arguments/epub.py src/skill_seekers/cli/parsers/epub_parser.py src/skill_seekers/cli/create_command.py` passes
+- [x] `ruff format --check src/skill_seekers/cli/` passes
+- [x] `pip install -e ".[epub]"` installs with all entry points
+- [x] `skill-seekers epub --help` shows EPUB-specific help
+- [x] `skill-seekers create --help-epub` shows EPUB arguments (via standalone entry point `skill-seekers-create`)
+- [x] `skill-seekers create nonexistent.epub` gives clear error about missing file
+- [x] Existing tests still pass: `pytest tests/ -v -x -m "not slow and not integration"` (875 passed, 1 pre-existing unrelated failure in test_git_sources_e2e)
+
+#### Manual Verification:
+- [x] `skill-seekers --help` lists `epub` command
+- [x] `skill-seekers create book.epub --dry-run` shows dry run output
+
+**Implementation Note**: After completing this phase and all automated verification passes, pause here for manual confirmation from the human before proceeding to the next phase.
+
+---
+
+## Phase 3: Comprehensive Test Suite
+
+### Overview
+Create `tests/test_epub_scraper.py` with 100+ tests across 11 test classes, covering happy path, negative cases, edge cases, and CLI integration.
+
+### Changes Required:
+
+#### [x] 1. Create test file
+**File**: `tests/test_epub_scraper.py` (new)
+**Changes**: Comprehensive test suite following `test_word_scraper.py` patterns
+
+```python
+"""
+Tests for EPUB scraper (epub_scraper.py).
+
+Covers: initialization, extraction, categorization, skill building,
+code blocks, tables, images, error handling, JSON workflow, CLI arguments,
+helper functions, source detection, DRM detection, and edge cases.
+
+Tests use mock data and do not require actual EPUB files or ebooklib installed.
+"""
+
+import json
+import os
+import shutil
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import MagicMock, patch, PropertyMock
+
+
+# Conditional import (same pattern as test_word_scraper.py)
+try:
+    from skill_seekers.cli.epub_scraper import (
+        EpubToSkillConverter,
+        infer_description_from_epub,
+        _score_code_quality,
+        _check_epub_deps,
+        EPUB_AVAILABLE,
+    )
+    IMPORT_OK = True
+except ImportError:
+    IMPORT_OK = False
+```
+
+**Helper factory function:**
+
+```python
+def _make_sample_extracted_data(
+    num_sections=2,
+    include_code=False,
+    include_tables=False,
+    include_images=False,
+) -> dict:
+    """Create minimal extracted_data dict for testing."""
+    sections = []
+    total_code = 0
+    total_images = 0
+    languages = {}
+
+    for i in range(1, num_sections + 1):
+        section = {
+            "section_number": i,
+            "heading": f"Chapter {i}",
+            "heading_level": "h1",
+            "text": f"Content of chapter {i}. This is sample text.",
+            "headings": [{"level": "h2", "text": f"Section {i}.1"}],
+            "code_samples": [],
+            "tables": [],
+            "images": [],
+        }
+
+        if include_code:
+            section["code_samples"] = [
+                {"code": f"def func_{i}():\n    return {i}", "language": "python", "quality_score": 7.5},
+                {"code": f"console.log({i})", "language": "javascript", "quality_score": 4.0},
+            ]
+            total_code += 2
+            languages["python"] = languages.get("python", 0) + 1
+            languages["javascript"] = languages.get("javascript", 0) + 1
+
+        if include_tables:
+            section["tables"] = [
+                {"headers": ["Name", "Value"], "rows": [["key", "val"]]}
+            ]
+
+        if include_images:
+            section["images"] = [
+                {"index": 1, "data": b"\x89PNG\r\n\x1a\n", "width": 100, "height": 100}
+            ]
+            total_images += 1
+
+        sections.append(section)
+
+    return {
+        "source_file": "test.epub",
+        "metadata": {
+            "title": "Test Book",
+            "author": "Test Author",
+            "language": "en",
+            "publisher": "Test Publisher",
+            "date": "2024-01-01",
+            "description": "A test book for unit testing",
+            "subject": "Testing, Unit Tests",
+            "rights": "Copyright 2024",
+            "identifier": "urn:uuid:12345",
+        },
+        "total_sections": num_sections,
+        "total_code_blocks": total_code,
+        "total_images": total_images,
+        "languages_detected": languages,
+        "pages": sections,
+    }
+```
+
+### Test Classes and Methods:
+
+#### [x] Class 1: `TestEpubToSkillConverterInit` (8 tests)
+
+**Happy path:**
+- `test_init_with_name_and_epub_path` — basic config with name + epub_path
+- `test_init_with_full_config` — config with all fields (name, epub_path, description, categories)
+- `test_default_description_uses_name` — description defaults to "Use when referencing {name} documentation"
+- `test_skill_dir_uses_name` — skill_dir is `output/{name}`
+- `test_data_file_uses_name` — data_file is `output/{name}_extracted.json`
+
+**Negative:**
+- `test_init_requires_name` — missing "name" key raises KeyError
+- `test_init_empty_name` — empty string name still works (no crash)
+
+**Edge case:**
+- `test_init_with_special_characters_in_name` — name with spaces/dashes sanitized for paths
+
+#### [x] Class 2: `TestEpubExtraction` (12 tests)
+
+**Happy path:**
+- `test_extract_basic_epub` — mock ebooklib, verify sections extracted in spine order
+- `test_extract_metadata` — verify Dublin Core metadata extraction (title, creator, language, etc.)
+- `test_extract_multiple_chapters` — multiple spine items produce multiple sections
+- `test_extract_code_blocks` — `<pre><code>` elements extracted with language detection
+- `test_extract_images` — ITEM_IMAGE items extracted with correct content
+- `test_heading_boundary_splitting` — h1/h2 boundaries create new sections
+
+**Negative:**
+- `test_extract_missing_file_raises_error` — FileNotFoundError for nonexistent path
+- `test_extract_invalid_epub_raises_error` — ValueError for corrupted/non-EPUB file
+- `test_extract_deps_not_installed` — RuntimeError with install instructions when ebooklib missing
+
+**Edge cases:**
+- `test_extract_empty_spine` — EPUB with no spine items produces empty sections list
+- `test_extract_spine_item_no_body` — XHTML without `<body>` tag skipped gracefully
+- `test_extract_non_linear_spine_items` — linear="no" items still extracted (included but flagged)
+
+#### [x] Class 3: `TestEpubDrmDetection` (6 tests)
+
+**Happy path:**
+- `test_no_drm_detected` — normal EPUB without encryption.xml returns False
+
+**Negative:**
+- `test_drm_detected_adobe_adept` — encryption.xml with Adobe namespace raises RuntimeError
+- `test_drm_detected_apple_fairplay` — encryption.xml with Apple namespace raises RuntimeError
+- `test_drm_detected_readium_lcp` — encryption.xml with Readium namespace raises RuntimeError
+
+**Edge cases:**
+- `test_font_obfuscation_not_drm` — encryption.xml with only IDPF font obfuscation algorithm (`http://www.idpf.org/2008/embedding`) is NOT DRM, extraction proceeds
+- `test_drm_error_message_is_clear` — error message mentions DRM and suggests removing protection
+
+#### [x] Class 4: `TestEpubCategorization` (8 tests)
+
+**Happy path:**
+- `test_single_source_creates_one_category` — single EPUB creates category named after file
+- `test_keyword_categorization` — sections matched to categories by keyword scoring
+- `test_no_categories_uses_default` — no category config creates single "content" category
+
+**Negative:**
+- `test_categorize_empty_sections` — empty sections list produces empty categories
+- `test_categorize_no_keyword_matches` — unmatched sections go to "other" category
+
+**Edge cases:**
+- `test_categorize_single_section` — one section creates one category
+- `test_categorize_many_sections` — 50+ sections categorized correctly
+- `test_categorize_preserves_section_order` — sections maintain original order within categories
+
+#### [x] Class 5: `TestEpubSkillBuilding` (10 tests)
+
+**Happy path:**
+- `test_build_creates_directory_structure` — output/{name}/, references/, scripts/, assets/ created
+- `test_build_generates_skill_md` — SKILL.md created with YAML frontmatter
+- `test_build_generates_reference_files` — reference markdown files created per category
+- `test_build_generates_index` — references/index.md created with category links
+- `test_skill_md_contains_metadata` — SKILL.md includes title, author, language from metadata
+- `test_skill_md_yaml_frontmatter` — frontmatter has name and description fields
+
+**Negative:**
+- `test_build_without_extracted_data_fails` — calling build_skill() before extraction raises error
+
+**Edge cases:**
+- `test_build_overwrites_existing_output` — re-running build overwrites existing files
+- `test_build_with_long_name` — name > 64 chars truncated in YAML frontmatter
+- `test_build_with_unicode_content` — Unicode text (CJK, Arabic, emoji) preserved correctly
+
+#### [x] Class 6: `TestEpubCodeBlocks` (8 tests)
+
+**Happy path:**
+- `test_code_blocks_included_in_reference_files` — code samples appear in reference markdown
+- `test_code_blocks_in_skill_md_top_15` — SKILL.md shows top 15 code examples by quality
+- `test_code_language_grouped` — code examples grouped by language in SKILL.md
+
+**Edge cases:**
+- `test_empty_code_block` — `<pre><code></code></pre>` with no content skipped
+- `test_code_block_with_html_entities` — `&lt;`, `&gt;`, `&amp;` decoded to `<`, `>`, `&`
+- `test_code_block_with_syntax_highlighting_spans` — `<span class="keyword">` stripped, plain text preserved
+- `test_code_block_language_from_class` — `class="language-python"`, `class="code-rust"` detected
+- `test_code_quality_scoring` — scoring heuristic produces expected ranges (0-10)
+
+#### [x] Class 7: `TestEpubTables` (5 tests)
+
+**Happy path:**
+- `test_tables_in_reference_files` — tables rendered as markdown in reference files
+- `test_table_with_headers` — headers from `<thead>` used correctly
+
+**Edge cases:**
+- `test_table_no_thead` — first row used as headers when no `<thead>`
+- `test_empty_table` — empty `<table>` element handled gracefully
+- `test_table_with_colspan_rowspan` — complex tables don't crash (data may be imperfect)
+
+#### [x] Class 8: `TestEpubImages` (7 tests)
+
+**Happy path:**
+- `test_images_saved_to_assets` — image bytes written to assets/ directory
+- `test_image_references_in_markdown` — markdown `![Image](../assets/...)` references correct
+
+**Negative:**
+- `test_image_with_zero_bytes` — empty image content skipped
+
+**Edge cases:**
+- `test_svg_images_handled` — SVG items (ITEM_VECTOR) extracted or skipped gracefully
+- `test_image_filename_conflicts` — duplicate filenames disambiguated
+- `test_cover_image_identified` — cover image (ITEM_COVER) extracted
+- `test_many_images` — 100+ images extracted without error
+
+#### [x] Class 9: `TestEpubErrorHandling` (10 tests)
+
+**Negative / error cases:**
+- `test_missing_epub_file_raises_error` — FileNotFoundError for nonexistent path
+- `test_not_a_file_raises_error` — ValueError when path is a directory
+- `test_not_epub_extension_raises_error` — ValueError for .txt, .pdf, .doc files
+- `test_corrupted_zip_raises_error` — ValueError or RuntimeError for corrupted ZIP
+- `test_missing_container_xml` — ValueError for ZIP without META-INF/container.xml
+- `test_missing_opf_file` — ValueError when container.xml points to nonexistent OPF
+- `test_drm_protected_raises_error` — RuntimeError with clear DRM message
+- `test_empty_epub_raises_error` — ValueError for EPUB with no content documents
+- `test_ebooklib_not_installed_error` — RuntimeError with install instructions
+- `test_malformed_xhtml_handled_gracefully` — unclosed tags, invalid entities don't crash (BeautifulSoup tolerant parsing)
+
+#### [x] Class 10: `TestEpubJSONWorkflow` (6 tests)
+
+**Happy path:**
+- `test_load_extracted_json` — load previously extracted JSON
+- `test_build_from_json` — full workflow: load JSON → categorize → build
+- `test_json_round_trip` — extract → save JSON → load JSON → build produces same output
+
+**Negative:**
+- `test_load_invalid_json` — malformed JSON raises appropriate error
+- `test_load_nonexistent_json` — FileNotFoundError for missing file
+
+**Edge case:**
+- `test_json_with_missing_fields` — partial JSON (missing optional fields) still works
+
+#### [x] Class 11: `TestEpubCLIArguments` (8 tests)
+
+**Happy path:**
+- `test_epub_flag_accepted` — `--epub path.epub` parsed correctly
+- `test_from_json_flag_accepted` — `--from-json data.json` parsed correctly
+- `test_name_flag_accepted` — `--name mybook` parsed correctly
+- `test_enhance_level_default_zero` — enhance-level defaults to 0 for EPUB
+- `test_dry_run_flag` — `--dry-run` flag parsed correctly
+
+**Negative:**
+- `test_no_args_shows_error` — no `--epub` or `--from-json` shows error
+
+**Integration:**
+- `test_verbose_flag` — `--verbose` accepted
+- `test_quiet_flag` — `--quiet` accepted
+
+#### [x] Class 12: `TestEpubHelperFunctions` (6 tests)
+
+- `test_infer_description_from_metadata_description` — uses description field
+- `test_infer_description_from_metadata_title` — falls back to title
+- `test_infer_description_fallback` — falls back to name-based template
+- `test_infer_description_empty_metadata` — empty dict returns fallback
+- `test_score_code_quality_ranges` — scoring returns 0-10
+- `test_sanitize_filename` — special characters cleaned
+
+#### [x] Class 13: `TestEpubSourceDetection` (6 tests)
+
+- `test_epub_detected_as_epub_type` — `.epub` extension detected correctly
+- `test_epub_suggested_name` — filename stem used as suggested name
+- `test_epub_validation_missing_file` — validation raises ValueError for missing file
+- `test_epub_validation_not_a_file` — validation raises ValueError for directory
+- `test_epub_with_path` — `./books/test.epub` detected with correct file_path
+- `test_pdf_still_detected` — regression test: `.pdf` still detected as pdf type
+
+#### [x] Class 14: `TestEpubEdgeCases` (8 tests)
+
+**Per W3C EPUB 3.3 spec edge cases:**
+- `test_epub2_vs_epub3` — both versions parse successfully (ebooklib handles both)
+- `test_epub_no_toc` — EPUB without table of contents extracts using spine order
+- `test_epub_empty_chapters` — chapters with no text content skipped gracefully
+- `test_epub_single_chapter` — book with one spine item produces valid output
+- `test_epub_unicode_content` — CJK, Arabic, Cyrillic, emoji text preserved
+- `test_epub_large_section_count` — 100+ sections processed without error
+- `test_epub_nested_headings` — h3/h4/h5/h6 become sub-headings within sections
+- `test_fixed_layout_detected` — fixed-layout EPUB produces warning but still extracts text
+
+**Total: ~108 test methods across 14 classes**
+
+### Success Criteria:
+
+#### Automated Verification:
+- [x] `pytest tests/test_epub_scraper.py -v` — all 107 tests pass
+- [x] `pytest tests/ -v -x -m "not slow and not integration"` — 982 passed (1 pre-existing unrelated failure in test_git_sources_e2e)
+- [x] `ruff check tests/test_epub_scraper.py` passes
+- [x] `ruff format --check tests/test_epub_scraper.py` passes
+- [x] Test count >= 100 methods (107 tests across 14 classes)
+
+#### Manual Verification:
+- [x] Review test coverage includes: happy path, negative, edge cases, CLI, source detection, DRM, JSON workflow
+- [x] Verify no tests require actual EPUB files or ebooklib installed (all use mocks/skipTest guards)
+
+**Implementation Note**: After completing this phase and all automated verification passes, pause here for manual confirmation from the human before proceeding to the next phase.
+
+---
+
+## Phase 4: Documentation
+
+### Overview
+Update CLAUDE.md and CHANGELOG.md to reflect the new EPUB support.
+
+### Changes Required:
+
+#### [x] 1. Update CLAUDE.md
+**File**: `CLAUDE.md`
+**Changes**:
+
+Add to Commands section (after pdf line):
+```
+skill-seekers epub --epub book.epub --name myskill
+```
+
+Add to "Unified create" examples:
+```
+skill-seekers create book.epub
+```
+
+Add to Key source files table:
+```
+| Core scraping | `cli/epub_scraper.py` |
+```
+
+Add to "Adding things → New create command flags" section:
+```
+- Source-specific → `EPUB_ARGUMENTS`
+```
+
+#### [x] 2. Update CHANGELOG.md
+**File**: `CHANGELOG.md`
+**Changes**: Add entry for EPUB support under next version
+
+```markdown
+### Added
+- EPUB (.epub) input support via `skill-seekers create book.epub` or `skill-seekers epub --epub book.epub`
+- Extracts chapters, metadata, code blocks, images, and tables from EPUB 2 and EPUB 3 files
+- DRM detection with clear error messages
+- Optional dependency: `pip install "skill-seekers[epub]"`
+```
+
+### Success Criteria:
+
+#### Automated Verification:
+- [x] `ruff check` passes on any modified files
+- [x] `pytest tests/ -v -x -m "not slow and not integration"` — all tests still pass (982 passed, 1 pre-existing failure)
+
+#### Manual Verification:
+- [x] CLAUDE.md accurately reflects new commands
+- [x] CHANGELOG.md entry is clear and complete
+
+**Implementation Note**: After completing this phase and all automated verification passes, pause here for manual confirmation from the human before proceeding.
+
+---
+
+## Testing Strategy
+
+### Unit Tests (Phase 3 — ~108 tests):
+
+**By category:**
+| Category | Count | What's tested |
+|----------|-------|---------------|
+| Initialization | 8 | Config parsing, defaults, edge cases |
+| Extraction | 12 | Spine iteration, metadata, headings, code, images |
+| DRM detection | 6 | Adobe, Apple, Readium, font obfuscation (not DRM) |
+| Categorization | 8 | Single/multi category, keywords, empty, ordering |
+| Skill building | 10 | Directory structure, SKILL.md, references, index |
+| Code blocks | 8 | Extraction, quality, language detection, HTML entities |
+| Tables | 5 | Headers, no-thead fallback, empty, colspan |
+| Images | 7 | Save, references, SVG, conflicts, cover, many |
+| Error handling | 10 | Missing file, corrupt, DRM, no deps, malformed XHTML |
+| JSON workflow | 6 | Load, build, round-trip, invalid, missing fields |
+| CLI arguments | 8 | Flags, defaults, dry-run, verbose/quiet |
+| Helper functions | 6 | Description inference, quality scoring, filename sanitization |
+| Source detection | 6 | Detection, validation, regression |
+| Edge cases | 8 | EPUB 2/3, no TOC, empty chapters, Unicode, fixed-layout |
+
+### Integration Tests:
+- Full extract → categorize → build workflow with mock ebooklib
+- JSON round-trip (extract → save → load → build)
+
+### Manual Testing Steps:
+1. `pip install -e ".[epub]"` — verify install
+2. `skill-seekers create book.epub` with a real EPUB file — verify output directory structure
+3. `skill-seekers epub --epub book.epub --dry-run` — verify dry run output
+4. `skill-seekers create drm-book.epub` — verify DRM error message
+5. `skill-seekers create nonexistent.epub` — verify file-not-found error
+6. Open generated `SKILL.md` — verify content quality and structure
+
+## Performance Considerations
+
+- ebooklib loads entire EPUB into memory. For typical books (<50MB), this is fine
+- For very large EPUBs (100MB+), memory usage may spike. No mitigation needed for v1 — document as known limitation
+- BeautifulSoup parsing of XHTML is fast. No performance concerns expected
+
+## Migration Notes
+
+- No migration needed — this is a new feature with no existing data to migrate
+- Optional dependency (`ebooklib`) means existing installs are unaffected
+- No breaking changes to any existing commands or APIs
+
+## References
+
+- [W3C EPUB 3.3 Specification](https://www.w3.org/TR/epub-33/) — authoritative source of truth
+- [W3C EPUB Reading Systems 3.3](https://www.w3.org/TR/epub-rs-33/) — reading system requirements
+- [ebooklib GitHub](https://github.com/aerkalov/ebooklib) — Python EPUB library
+- [ebooklib PyPI](https://pypi.org/project/EbookLib/) — v0.20, Python 3.9-3.13
+- [Research document](../research/2026-03-14-epub-input-support-affected-files.md) — affected files analysis
+- Similar implementation: `src/skill_seekers/cli/word_scraper.py` — closest analog
+- Similar tests: `tests/test_word_scraper.py` — test pattern template
diff --git a/docs/agents/research/2026-03-14-epub-input-support-affected-files.md b/docs/agents/research/2026-03-14-epub-input-support-affected-files.md
new file mode 100644
index 0000000..058b246
--- /dev/null
+++ b/docs/agents/research/2026-03-14-epub-input-support-affected-files.md
@@ -0,0 +1,271 @@
+---
+date: 2026-03-14T12:54:24.700367+00:00
+git_commit: 7c90a4b9c9bccac8341b0769550d77aae3b4e524
+branch: development
+topic: "What files would be affected to add .epub support for input"
+tags: [research, codebase, epub, input-format, scraper]
+status: complete
+---
+
+# Research: What files would be affected to add .epub support for input
+
+## Research Question
+
+What files would be affected to add .epub support for input.
+
+## Summary
+
+Adding `.epub` input support follows an established pattern already used for PDF and Word (.docx) formats. The codebase has a consistent multi-layer architecture for document input formats: source detection, argument definitions, parser registration, create command routing, standalone scraper module, and tests. Based on analysis of the existing PDF and Word implementations, **16 existing files would need modification** and **4 new files would need to be created**.
+
+## Detailed Findings
+
+### New Files to Create (4 files)
+
+| File | Purpose |
+|------|---------|
+| `src/skill_seekers/cli/epub_scraper.py` | Core EPUB extraction and skill building logic (analog: `word_scraper.py` at ~750 lines) |
+| `src/skill_seekers/cli/arguments/epub.py` | EPUB-specific argument definitions (analog: `arguments/word.py`) |
+| `src/skill_seekers/cli/parsers/epub_parser.py` | Subcommand parser class (analog: `parsers/word_parser.py`) |
+| `tests/test_epub_scraper.py` | Test suite (analog: `test_word_scraper.py` at ~750 lines, 130+ tests) |
+
+### Existing Files to Modify (16 files)
+
+#### 1. Source Detection Layer
+
+**`src/skill_seekers/cli/source_detector.py`** (3 locations)
+
+- **`SourceDetector.detect()`** (line ~60): Add `.epub` extension check, following the `.docx` pattern at line 63-64:
+  ```python
+  if source.endswith(".epub"):
+      return cls._detect_epub(source)
+  ```
+
+- **New method `_detect_epub()`**: Add detection method (following `_detect_word()` at lines 124-129):
+  ```python
+  @classmethod
+  def _detect_epub(cls, source: str) -> SourceInfo:
+      name = os.path.splitext(os.path.basename(source))[0]
+      return SourceInfo(
+          type="epub", parsed={"file_path": source}, suggested_name=name, raw_input=source
+      )
+  ```
+
+- **`validate_source()`** (line ~250): Add epub validation block (following the word block at lines 273-278)
+
+- **Error message** (line ~94): Add EPUB example to the `ValueError` help text
+
+#### 2. CLI Dispatcher
+
+**`src/skill_seekers/cli/main.py`** (2 locations)
+
+- **`COMMAND_MODULES` dict** (line ~46): Add epub entry:
+  ```python
+  "epub": "skill_seekers.cli.epub_scraper",
+  ```
+
+- **Module docstring** (line ~1): Add `epub` to the commands list
+
+#### 3. Create Command Routing
+
+**`src/skill_seekers/cli/create_command.py`** (3 locations)
+
+- **`_route_to_scraper()`** (line ~121): Add `elif self.source_info.type == "epub":` routing case
+
+- **New `_route_epub()` method**: Following the `_route_word()` pattern at lines 331-352:
+  ```python
+  def _route_epub(self) -> int:
+      from skill_seekers.cli import epub_scraper
+      argv = ["epub_scraper"]
+      file_path = self.source_info.parsed["file_path"]
+      argv.extend(["--epub", file_path])
+      self._add_common_args(argv)
+      # epub-specific args here
+      ...
+  ```
+
+- **`main()` epilog** (line ~537): Add EPUB example and source auto-detection entry
+
+- **Progressive help** (line ~590): Add `--help-epub` flag and handler block
+
+#### 4. Argument Definitions
+
+**`src/skill_seekers/cli/arguments/create.py`** (4 locations)
+
+- **New `EPUB_ARGUMENTS` dict** (~line 401): Define epub-specific arguments (e.g., `--epub` file path flag), following the `WORD_ARGUMENTS` pattern at lines 402-411
+
+- **`get_source_specific_arguments()`** (line 595): Add `"epub": EPUB_ARGUMENTS` to the `source_args` dict
+
+- **`add_create_arguments()`** (line 676): Add epub mode block:
+  ```python
+  if mode in ["epub", "all"]:
+      for arg_name, arg_def in EPUB_ARGUMENTS.items():
+          parser.add_argument(*arg_def["flags"], **arg_def["kwargs"])
+  ```
+
+#### 5. Parser Registration
+
+**`src/skill_seekers/cli/parsers/__init__.py`** (2 locations)
+
+- **Import** (line ~15): Add `from .epub_parser import EpubParser`
+
+- **`PARSERS` list** (line ~46): Add `EpubParser()` entry (near `WordParser()` and `PDFParser()`)
+
+#### 6. Package Configuration
+
+**`pyproject.toml`** (3 locations)
+
+- **`[project.optional-dependencies]`** (line ~111): Add `epub` optional dependency group:
+  ```toml
+  epub = [
+      "ebooklib>=0.18",
+  ]
+  ```
+
+- **`all` optional dependency group** (line ~178): Add epub dependency to the combined `all` group
+
+- **`[project.scripts]`** (line ~224): Add standalone entry point:
+  ```toml
+  skill-seekers-epub = "skill_seekers.cli.epub_scraper:main"
+  ```
+
+#### 7. Argument Commons
+
+**`src/skill_seekers/cli/arguments/common.py`**
+
+- No changes strictly required, but `add_all_standard_arguments()` is called by the new `arguments/epub.py` (no modification needed — it's used as-is)
+
+#### 8. Documentation / Configuration
+
+**`CLAUDE.md`** (2 locations)
+
+- **Commands section**: Add `epub` to the list of subcommands
+- **Key source files table**: Add `epub_scraper.py` entry
+
+**`CONTRIBUTING.md`** — Potentially update with epub format mention
+
+**`CHANGELOG.md`** — New feature entry
+
+### Files NOT Affected
+
+These files do **not** need changes:
+
+- **`unified_scraper.py`** — Multi-source configs could add epub support later but it's not required for basic input support
+- **Platform adaptors** (`adaptors/*.py`) — Adaptors work on the output side (packaging), not input
+- **Enhancement system** (`enhance_skill.py`, `enhance_skill_local.py`) — Works generically on SKILL.md
+- **MCP server** (`mcp/server_fastmcp.py`) — Operates on completed skills
+- **`pdf_extractor_poc.py`** — PDF-specific extraction; epub needs its own extractor
+
+## Code References
+
+### Pattern to Follow (Word .docx implementation)
+
+- `src/skill_seekers/cli/word_scraper.py:1-750` — Full scraper with `WordToSkillConverter` class
+- `src/skill_seekers/cli/arguments/word.py:1-75` — Argument definitions with `add_word_arguments()`
+- `src/skill_seekers/cli/parsers/word_parser.py:1-33` — Parser class extending `SubcommandParser`
+- `tests/test_word_scraper.py:1-750` — Comprehensive test suite with 130+ tests
+
+### Key Integration Points
+
+- `src/skill_seekers/cli/source_detector.py:57-65` — File extension detection order
+- `src/skill_seekers/cli/source_detector.py:124-129` — `_detect_word()` method (template for `_detect_epub()`)
+- `src/skill_seekers/cli/create_command.py:121-143` — `_route_to_scraper()` dispatch
+- `src/skill_seekers/cli/create_command.py:331-352` — `_route_word()` (template for `_route_epub()`)
+- `src/skill_seekers/cli/arguments/create.py:401-411` — `WORD_ARGUMENTS` dict (template)
+- `src/skill_seekers/cli/arguments/create.py:595-604` — `get_source_specific_arguments()` mapping
+- `src/skill_seekers/cli/arguments/create.py:676-678` — `add_create_arguments()` mode handling
+- `src/skill_seekers/cli/parsers/__init__.py:35-59` — `PARSERS` registry list
+- `src/skill_seekers/cli/main.py:46-70` — `COMMAND_MODULES` dict
+- `pyproject.toml:111-115` — Optional dependency group pattern (docx)
+- `pyproject.toml:213-246` — Script entry points
+
+### Data Flow Architecture
+
+The epub scraper would follow the same three-step pipeline as Word/PDF:
+
+1. **Extract** — Parse `.epub` file → sections with text, headings, code, images → save to `output/{name}_extracted.json`
+2. **Categorize** — Group sections by chapters/keywords
+3. **Build** — Generate `SKILL.md`, `references/*.md`, `references/index.md`, `assets/`
+
+The intermediate JSON format uses the same structure as Word/PDF:
+```python
+{
+    "source_file": str,
+    "metadata": {"title", "author", "created", ...},
+    "total_sections": int,
+    "total_code_blocks": int,
+    "total_images": int,
+    "languages_detected": {str: int},
+    "pages": [  # sections
+        {
+            "section_number": int,
+            "heading": str,
+            "text": str,
+            "code_samples": [...],
+            "images": [...],
+            "headings": [...]
+        }
+    ]
+}
+```
+
+## Architecture Documentation
+
+### Document Input Format Pattern
+
+Each input format follows a consistent architecture:
+
+```
+[source_detector.py] → detect type by extension
+        ↓
+[create_command.py] → route to scraper
+        ↓
+[{format}_scraper.py] → extract → categorize → build skill
+        ↓
+[output/{name}/] → SKILL.md + references/ + assets/
+```
+
+Supporting files per format:
+- `arguments/{format}.py` — CLI argument definitions
+- `parsers/{format}_parser.py` — Subcommand parser class
+- `tests/test_{format}_scraper.py` — Test suite
+
+### Dependency Guard Pattern
+
+The Word scraper uses an optional dependency guard that epub should replicate:
+
+```python
+try:
+    import ebooklib
+    from ebooklib import epub
+    EPUB_AVAILABLE = True
+except ImportError:
+    EPUB_AVAILABLE = False
+
+def _check_epub_deps():
+    if not EPUB_AVAILABLE:
+        raise RuntimeError(
+            "ebooklib is required for EPUB support.\n"
+            'Install with: pip install "skill-seekers[epub]"\n'
+            "Or: pip install ebooklib"
+        )
+```
+
+## Summary Table
+
+| Category | Files | Action |
+|----------|-------|--------|
+| New files | 4 | Create from scratch |
+| Source detection | 1 | Add epub detection + validation |
+| CLI dispatcher | 1 | Add command module mapping |
+| Create command | 1 | Add routing + help + examples |
+| Arguments | 1 | Add EPUB_ARGUMENTS + register in helpers |
+| Parser registry | 1 | Import + register EpubParser |
+| Package config | 1 | Add deps + entry point |
+| Documentation | 2+ | Update CLAUDE.md, CHANGELOG |
+| **Total** | **12+ modified, 4 new** | |
+
+## Open Questions
+
+- Should epub support reuse any of the existing HTML parsing from `word_scraper.py` (which uses mammoth to convert to HTML then parses with BeautifulSoup)? EPUB internally contains XHTML files, so BeautifulSoup parsing would be directly applicable.
+- Should the epub scraper support DRM-protected files, or only DRM-free epub files?
+- Should epub-specific arguments include options like `--chapter-range` (similar to PDF's `--pages`)?
diff --git a/pyproject.toml b/pyproject.toml
index 6e4eac3..5b10fed 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -114,6 +114,11 @@ docx = [
     "python-docx>=1.1.0",
 ]
 
+# EPUB (.epub) support
+epub = [
+    "ebooklib>=0.18",
+]
+
 # Video processing (lightweight: YouTube transcripts + metadata)
 video = [
     "yt-dlp>=2024.12.0",
@@ -178,6 +183,7 @@ embedding = [
 all = [
     "mammoth>=1.6.0",
     "python-docx>=1.1.0",
+    "ebooklib>=0.18",
     "yt-dlp>=2024.12.0",
     "youtube-transcript-api>=1.2.0",
     "mcp>=1.25,<2",
@@ -222,6 +228,7 @@ skill-seekers-scrape = "skill_seekers.cli.doc_scraper:main"
 skill-seekers-github = "skill_seekers.cli.github_scraper:main"
 skill-seekers-pdf = "skill_seekers.cli.pdf_scraper:main"
 skill-seekers-word = "skill_seekers.cli.word_scraper:main"
+skill-seekers-epub = "skill_seekers.cli.epub_scraper:main"
 skill-seekers-video = "skill_seekers.cli.video_scraper:main"
 skill-seekers-unified = "skill_seekers.cli.unified_scraper:main"
 skill-seekers-enhance = "skill_seekers.cli.enhance_command:main"
diff --git a/src/skill_seekers/cli/arguments/create.py b/src/skill_seekers/cli/arguments/create.py
index e2ed9f9..094590a 100644
--- a/src/skill_seekers/cli/arguments/create.py
+++ b/src/skill_seekers/cli/arguments/create.py
@@ -410,6 +410,18 @@ WORD_ARGUMENTS: dict[str, dict[str, Any]] = {
     },
 }
 
+# EPUB specific (from epub.py)
+EPUB_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "epub": {
+        "flags": ("--epub",),
+        "kwargs": {
+            "type": str,
+            "help": "EPUB file path",
+            "metavar": "PATH",
+        },
+    },
+}
+
 # Video specific (from video.py)
 VIDEO_ARGUMENTS: dict[str, dict[str, Any]] = {
     "video_url": {
@@ -598,6 +610,7 @@ def get_source_specific_arguments(source_type: str) -> dict[str, dict[str, Any]]
         "local": LOCAL_ARGUMENTS,
         "pdf": PDF_ARGUMENTS,
         "word": WORD_ARGUMENTS,
+        "epub": EPUB_ARGUMENTS,
         "video": VIDEO_ARGUMENTS,
         "config": CONFIG_ARGUMENTS,
     }
@@ -636,6 +649,7 @@ def add_create_arguments(parser: argparse.ArgumentParser, mode: str = "default")
     - 'local': Universal + local-specific
     - 'pdf': Universal + pdf-specific
     - 'word': Universal + word-specific
+    - 'epub': Universal + epub-specific
     - 'video': Universal + video-specific
     - 'advanced': Advanced/rare arguments
     - 'all': All 120+ arguments
@@ -677,6 +691,10 @@ def add_create_arguments(parser: argparse.ArgumentParser, mode: str = "default")
         for arg_name, arg_def in WORD_ARGUMENTS.items():
             parser.add_argument(*arg_def["flags"], **arg_def["kwargs"])
 
+    if mode in ["epub", "all"]:
+        for arg_name, arg_def in EPUB_ARGUMENTS.items():
+            parser.add_argument(*arg_def["flags"], **arg_def["kwargs"])
+
     if mode in ["video", "all"]:
         for arg_name, arg_def in VIDEO_ARGUMENTS.items():
             parser.add_argument(*arg_def["flags"], **arg_def["kwargs"])
diff --git a/src/skill_seekers/cli/arguments/epub.py b/src/skill_seekers/cli/arguments/epub.py
new file mode 100644
index 0000000..d41eda4
--- /dev/null
+++ b/src/skill_seekers/cli/arguments/epub.py
@@ -0,0 +1,66 @@
+"""EPUB command argument definitions.
+
+This module defines ALL arguments for the epub command in ONE place.
+Both epub_scraper.py (standalone) and parsers/epub_parser.py (unified CLI)
+import and use these definitions.
+
+Shared arguments (name, description, output, enhance-level, api-key,
+dry-run, verbose, quiet, workflow args) come from common.py / workflow.py
+via ``add_all_standard_arguments()``.
+"""
+
+import argparse
+from typing import Any
+
+from .common import add_all_standard_arguments
+
+# EPUB-specific argument definitions as data structure
+# NOTE: Shared args (name, description, output, enhance_level, api_key, dry_run,
+#       verbose, quiet, workflow args) are registered by add_all_standard_arguments().
+EPUB_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "epub": {
+        "flags": ("--epub",),
+        "kwargs": {
+            "type": str,
+            "help": "Direct EPUB file path",
+            "metavar": "PATH",
+        },
+    },
+    "from_json": {
+        "flags": ("--from-json",),
+        "kwargs": {
+            "type": str,
+            "help": "Build skill from extracted JSON",
+            "metavar": "FILE",
+        },
+    },
+}
+
+
+def add_epub_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add all epub command arguments to a parser.
+
+    Registers shared args (name, description, output, enhance-level, api-key,
+    dry-run, verbose, quiet, workflow args) via add_all_standard_arguments(),
+    then adds EPUB-specific args on top.
+
+    The default for --enhance-level is overridden to 0 (disabled) for EPUB.
+    """
+    # Shared universal args first
+    add_all_standard_arguments(parser)
+
+    # Override enhance-level default to 0 for EPUB
+    for action in parser._actions:
+        if hasattr(action, "dest") and action.dest == "enhance_level":
+            action.default = 0
+            action.help = (
+                "AI enhancement level (auto-detects API vs LOCAL mode): "
+                "0=disabled (default for EPUB), 1=SKILL.md only, 2=+architecture/config, 3=full enhancement. "
+                "Mode selection: uses API if ANTHROPIC_API_KEY is set, otherwise LOCAL (Claude Code)"
+            )
+
+    # EPUB-specific args
+    for arg_name, arg_def in EPUB_ARGUMENTS.items():
+        flags = arg_def["flags"]
+        kwargs = arg_def["kwargs"]
+        parser.add_argument(*flags, **kwargs)
diff --git a/src/skill_seekers/cli/create_command.py b/src/skill_seekers/cli/create_command.py
index 8d57647..d7374e4 100644
--- a/src/skill_seekers/cli/create_command.py
+++ b/src/skill_seekers/cli/create_command.py
@@ -134,6 +134,8 @@ class CreateCommand:
             return self._route_pdf()
         elif self.source_info.type == "word":
             return self._route_word()
+        elif self.source_info.type == "epub":
+            return self._route_epub()
         elif self.source_info.type == "video":
             return self._route_video()
         elif self.source_info.type == "config":
@@ -351,6 +353,29 @@ class CreateCommand:
         finally:
             sys.argv = original_argv
 
+    def _route_epub(self) -> int:
+        """Route to EPUB scraper (epub_scraper.py)."""
+        from skill_seekers.cli import epub_scraper
+
+        # Reconstruct argv for epub_scraper
+        argv = ["epub_scraper"]
+
+        # Add EPUB file
+        file_path = self.source_info.parsed["file_path"]
+        argv.extend(["--epub", file_path])
+
+        # Add universal arguments
+        self._add_common_args(argv)
+
+        # Call epub_scraper with modified argv
+        logger.debug(f"Calling epub_scraper with argv: {argv}")
+        original_argv = sys.argv
+        try:
+            sys.argv = argv
+            return epub_scraper.main()
+        finally:
+            sys.argv = original_argv
+
     def _route_video(self) -> int:
         """Route to video scraper (video_scraper.py)."""
         from skill_seekers.cli import video_scraper
@@ -541,6 +566,7 @@ Examples:
   Local:    skill-seekers create ./my-project -p comprehensive
   PDF:      skill-seekers create tutorial.pdf --ocr
   DOCX:     skill-seekers create document.docx
+  EPUB:     skill-seekers create ebook.epub
   Video:    skill-seekers create https://youtube.com/watch?v=...
   Video:    skill-seekers create recording.mp4
   Config:   skill-seekers create configs/react.json
@@ -551,6 +577,7 @@ Source Auto-Detection:
   • ./path → local codebase
   • file.pdf → PDF extraction
   • file.docx → Word document extraction
+  • file.epub → EPUB extraction
   • youtube.com/... → Video transcript extraction
   • file.mp4 → Video file extraction
   • file.json → multi-source config
@@ -560,6 +587,7 @@ Progressive Help (13 → 120+ flags):
   --help-github    GitHub repository options
   --help-local     Local codebase analysis
   --help-pdf       PDF extraction options
+  --help-epub      EPUB extraction options
   --help-video     Video extraction options
   --help-advanced  Rare/advanced options
   --help-all       All options + compatibility
@@ -591,6 +619,9 @@ Common Workflows:
     parser.add_argument(
         "--help-word", action="store_true", help=argparse.SUPPRESS, dest="_help_word"
     )
+    parser.add_argument(
+        "--help-epub", action="store_true", help=argparse.SUPPRESS, dest="_help_epub"
+    )
     parser.add_argument(
         "--help-video", action="store_true", help=argparse.SUPPRESS, dest="_help_video"
     )
@@ -652,6 +683,15 @@ Common Workflows:
         add_create_arguments(parser_word, mode="word")
         parser_word.print_help()
         return 0
+    elif args._help_epub:
+        parser_epub = argparse.ArgumentParser(
+            prog="skill-seekers create",
+            description="Create skill from EPUB e-book (.epub)",
+            formatter_class=argparse.RawDescriptionHelpFormatter,
+        )
+        add_create_arguments(parser_epub, mode="epub")
+        parser_epub.print_help()
+        return 0
     elif args._help_video:
         parser_video = argparse.ArgumentParser(
             prog="skill-seekers create",
diff --git a/src/skill_seekers/cli/epub_scraper.py b/src/skill_seekers/cli/epub_scraper.py
new file mode 100644
index 0000000..545831f
--- /dev/null
+++ b/src/skill_seekers/cli/epub_scraper.py
@@ -0,0 +1,1206 @@
+#!/usr/bin/env python3
+"""
+EPUB Documentation to Skill Converter
+
+Converts EPUB e-books into skills.
+Uses ebooklib for EPUB parsing, BeautifulSoup for XHTML content extraction.
+
+Usage:
+    skill-seekers epub --epub book.epub --name myskill
+    skill-seekers epub --from-json book_extracted.json
+"""
+
+import argparse
+import json
+import logging
+import os
+import re
+import sys
+from pathlib import Path
+
+# Optional dependency guard
+try:
+    import ebooklib
+    from ebooklib import epub
+
+    EPUB_AVAILABLE = True
+except ImportError:
+    EPUB_AVAILABLE = False
+
+# BeautifulSoup is a core dependency (always available)
+from bs4 import BeautifulSoup, Comment
+
+logger = logging.getLogger(__name__)
+
+
+def _check_epub_deps():
+    """Raise RuntimeError if ebooklib is not installed."""
+    if not EPUB_AVAILABLE:
+        raise RuntimeError(
+            "ebooklib is required for EPUB support.\n"
+            'Install with: pip install "skill-seekers[epub]"\n'
+            "Or: pip install ebooklib"
+        )
+
+
+def infer_description_from_epub(metadata: dict | None = None, name: str = "") -> str:
+    """Infer skill description from EPUB metadata.
+
+    Args:
+        metadata: EPUB Dublin Core metadata dict
+        name: Skill name for fallback
+
+    Returns:
+        Description string suitable for "Use when..." format
+    """
+    if metadata:
+        if metadata.get("description") and len(metadata["description"]) > 20:
+            desc = metadata["description"].strip()
+            if len(desc) > 150:
+                desc = desc[:147] + "..."
+            return f"Use when {desc.lower()}"
+        if metadata.get("title") and len(metadata["title"]) > 10:
+            return f"Use when working with {metadata['title'].lower()}"
+    return (
+        f"Use when referencing {name} documentation"
+        if name
+        else "Use when referencing this documentation"
+    )
+
+
+class EpubToSkillConverter:
+    """Convert EPUB e-book to Claude skill."""
+
+    def __init__(self, config):
+        self.config = config
+        self.name = config["name"]
+        self.epub_path = config.get("epub_path", "")
+        self.description = (
+            config.get("description") or f"Use when referencing {self.name} documentation"
+        )
+
+        # Paths
+        self.skill_dir = f"output/{self.name}"
+        self.data_file = f"output/{self.name}_extracted.json"
+
+        # Categories config
+        self.categories = config.get("categories", {})
+
+        # Extracted data
+        self.extracted_data = None
+
+    def extract_epub(self):
+        """Extract content from EPUB file.
+
+        Workflow:
+        1. Check dependencies (ebooklib)
+        2. Detect DRM via encryption.xml (fail fast)
+        3. Read EPUB via ebooklib with ignore_ncx=True (EPUB 3 TOC bug workaround)
+        4. Extract Dublin Core metadata
+        5. Iterate spine items in reading order
+        6. For each ITEM_DOCUMENT: parse XHTML with BeautifulSoup
+        7. Split by h1/h2 heading boundaries into sections
+        8. Extract code blocks from <pre>/<code> elements
+        9. Extract images from EpubImage items
+        10. Detect code languages via LanguageDetector
+        11. Save intermediate JSON to {name}_extracted.json
+
+        Returns True on success.
+        Raises RuntimeError for DRM-protected files.
+        Raises FileNotFoundError for missing files.
+        Raises ValueError for invalid EPUB files.
+        """
+        _check_epub_deps()
+
+        from skill_seekers.cli.language_detector import LanguageDetector
+
+        print(f"\n🔍 Extracting from EPUB: {self.epub_path}")
+
+        if not os.path.exists(self.epub_path):
+            raise FileNotFoundError(f"EPUB file not found: {self.epub_path}")
+
+        if not os.path.isfile(self.epub_path):
+            raise ValueError(f"Path is not a file: {self.epub_path}")
+
+        if not self.epub_path.lower().endswith(".epub"):
+            raise ValueError(f"Not an EPUB file (expected .epub): {self.epub_path}")
+
+        # Read EPUB with ignore_ncx=True to work around EPUB 3 TOC bug
+        try:
+            book = epub.read_epub(self.epub_path, options={"ignore_ncx": True})
+        except Exception as e:
+            raise ValueError(f"Failed to read EPUB file: {e}") from e
+
+        # DRM detection (fail fast)
+        if self._detect_drm(book):
+            raise RuntimeError(
+                f"EPUB file appears to be DRM-protected: {self.epub_path}\n"
+                "Skill Seekers cannot process DRM-protected files.\n"
+                "Please use a DRM-free version of the e-book."
+            )
+
+        # Extract Dublin Core metadata
+        metadata = self._extract_metadata(book)
+
+        print(f"   Title: {metadata.get('title', 'Unknown')}")
+        print(f"   Author: {metadata.get('author', 'Unknown')}")
+        print(f"   Language: {metadata.get('language', 'Unknown')}")
+
+        # Update description from metadata if not set explicitly
+        if not self.config.get("description"):
+            self.description = infer_description_from_epub(metadata, self.name)
+
+        # Extract content from spine items
+        sections = self._extract_spine_content(book)
+
+        spine_count = sum(1 for _, _ in book.spine)
+        print(f"   Chapters: {spine_count} (spine items)")
+
+        # If no sections were created, create one default section
+        if not sections:
+            logger.warning("No sections extracted from EPUB")
+
+        # Extract images
+        images_extracted = self._extract_images(book)
+
+        # Detect languages for code samples
+        detector = LanguageDetector(min_confidence=0.15)
+        languages_detected: dict[str, int] = {}
+        total_code_blocks = 0
+
+        for section in sections:
+            for code_sample in section.get("code_samples", []):
+                lang = code_sample.get("language", "")
+                if lang:
+                    languages_detected[lang] = languages_detected.get(lang, 0) + 1
+                total_code_blocks += 1
+
+        # Detect languages for samples without language
+        for section in sections:
+            for code_sample in section.get("code_samples", []):
+                if not code_sample.get("language"):
+                    code = code_sample.get("code", "")
+                    if code:
+                        lang, confidence = detector.detect_from_code(code)
+                        if lang and confidence >= 0.3:
+                            code_sample["language"] = lang
+                            languages_detected[lang] = languages_detected.get(lang, 0) + 1
+
+        result_data = {
+            "source_file": self.epub_path,
+            "metadata": metadata,
+            "total_sections": len(sections),
+            "total_code_blocks": total_code_blocks,
+            "total_images": images_extracted,
+            "languages_detected": languages_detected,
+            "pages": sections,  # "pages" key for pipeline compatibility
+        }
+
+        # Save extracted data
+        os.makedirs(os.path.dirname(self.data_file), exist_ok=True)
+        with open(self.data_file, "w", encoding="utf-8") as f:
+            json.dump(result_data, f, indent=2, ensure_ascii=False, default=str)
+
+        print(f"\n💾 Saved extracted data to: {self.data_file}")
+        self.extracted_data = result_data
+        print(
+            f"✅ Extracted {len(sections)} sections, "
+            f"{total_code_blocks} code blocks, "
+            f"{images_extracted} images"
+        )
+        return True
+
+    def _detect_drm(self, book) -> bool:
+        """Detect DRM by checking for encryption.xml with non-font-obfuscation entries.
+
+        Per W3C EPUB 3.3 spec: encryption.xml is present when resources are encrypted.
+        Font obfuscation (IDPF algorithm http://www.idpf.org/2008/embedding or
+        Adobe algorithm http://ns.adobe.com/pdf/enc#RC) is NOT DRM.
+
+        Actual DRM uses algorithms like:
+        - Adobe ADEPT: http://ns.adobe.com/adept namespace
+        - Apple FairPlay: http://itunes.apple.com/dataenc
+        - Readium LCP: http://readium.org/2014/01/lcp
+        """
+        # Font obfuscation URIs — these are NOT DRM
+        font_obfuscation_uris = {
+            "http://www.idpf.org/2008/embedding",
+            "http://ns.adobe.com/pdf/enc#RC",
+        }
+
+        # Known DRM namespace patterns
+        drm_patterns = [
+            "http://ns.adobe.com/adept",
+            "http://itunes.apple.com/dataenc",
+            "http://readium.org/2014/01/lcp",
+        ]
+
+        try:
+            # Look for META-INF/encryption.xml in the EPUB items
+            for item in book.get_items():
+                if hasattr(item, "file_name") and item.file_name == "META-INF/encryption.xml":
+                    content = item.get_content()
+                    if isinstance(content, bytes):
+                        content = content.decode("utf-8", errors="ignore")
+
+                    # Check for DRM namespace patterns
+                    for pattern in drm_patterns:
+                        if pattern in content:
+                            return True
+
+                    # Check if there are encryption entries that are NOT font obfuscation
+                    soup = BeautifulSoup(content, "html.parser")
+                    enc_methods = soup.find_all("encryptionmethod") or soup.find_all(
+                        "EncryptionMethod"
+                    )
+                    for method in enc_methods:
+                        algorithm = method.get("Algorithm", method.get("algorithm", ""))
+                        if algorithm and algorithm not in font_obfuscation_uris:
+                            return True
+        except Exception:
+            # If we can't check for DRM, proceed anyway
+            logger.debug("Could not check for DRM, proceeding with extraction")
+
+        return False
+
+    def _extract_metadata(self, book) -> dict:
+        """Extract Dublin Core metadata from EPUB.
+
+        Per W3C EPUB 3.3 spec: required elements are dc:identifier, dc:title, dc:language.
+        Optional: dc:creator, dc:contributor, dc:date, dc:description, dc:publisher,
+        dc:subject, dc:rights, dc:type, dc:coverage, dc:source, dc:relation, dc:format.
+
+        ebooklib API: book.get_metadata('DC', key) returns list of (value, attrs) tuples.
+        """
+
+        def _get_one(key):
+            data = book.get_metadata("DC", key)
+            return data[0][0] if data else None
+
+        def _get_list(key):
+            data = book.get_metadata("DC", key)
+            return [x[0] for x in data] if data else []
+
+        return {
+            "title": _get_one("title") or "Untitled",
+            "author": ", ".join(_get_list("creator")) or None,
+            "language": _get_one("language") or "en",
+            "publisher": _get_one("publisher"),
+            "date": _get_one("date"),
+            "description": _get_one("description"),
+            "subject": ", ".join(_get_list("subject")) or None,
+            "rights": _get_one("rights"),
+            "identifier": _get_one("identifier"),
+        }
+
+    def _extract_spine_content(self, book) -> list[dict]:
+        """Extract content from spine items in reading order.
+
+        Per W3C EPUB 3.3 spec: spine defines ordered list of content documents.
+        Linear="yes" (default) items form the primary reading order.
+        Linear="no" items are auxiliary (footnotes, glossary).
+
+        Parse with BeautifulSoup, split by h1/h2 heading boundaries.
+        """
+        sections = []
+        section_number = 0
+
+        for item_id, linear in book.spine:
+            item = book.get_item_with_id(item_id)
+            if not item or item.get_type() != ebooklib.ITEM_DOCUMENT:
+                continue
+
+            try:
+                content = item.get_content()
+                if isinstance(content, bytes):
+                    content = content.decode("utf-8", errors="ignore")
+            except Exception:
+                logger.debug(f"Could not read spine item {item_id}, skipping")
+                continue
+
+            soup = BeautifulSoup(content, "html.parser")
+
+            # Remove scripts, styles, comments
+            for tag in soup(["script", "style"]):
+                tag.decompose()
+            for comment in soup.find_all(string=lambda t: isinstance(t, Comment)):
+                comment.extract()
+
+            body = soup.find("body")
+            if not body:
+                # Some EPUBs have content directly without a body tag
+                body = soup
+
+            # Split by h1/h2 heading boundaries
+            current_heading = None
+            current_heading_level = None
+            current_elements = []
+
+            for elem in body.children:
+                if not hasattr(elem, "name") or elem.name is None:
+                    continue
+
+                if elem.name in ("h1", "h2"):
+                    # Flush previous section
+                    if current_heading is not None or current_elements:
+                        section_number += 1
+                        section = _build_section(
+                            section_number,
+                            current_heading,
+                            current_heading_level,
+                            current_elements,
+                        )
+                        sections.append(section)
+                    current_heading = elem.get_text(strip=True)
+                    current_heading_level = elem.name
+                    current_elements = []
+                else:
+                    current_elements.append(elem)
+
+            # Flush last section
+            if current_heading is not None or current_elements:
+                section_number += 1
+                section = _build_section(
+                    section_number,
+                    current_heading,
+                    current_heading_level,
+                    current_elements,
+                )
+                sections.append(section)
+
+        return sections
+
+    def _extract_images(self, book) -> int:
+        """Extract images from EPUB manifest.
+
+        Per W3C EPUB 3.3 spec: core image media types are
+        image/gif, image/jpeg, image/png, image/svg+xml, image/webp.
+
+        Returns count of images found (images are stored in extracted_data sections).
+        """
+        image_count = 0
+        seen_ids: set[int] = set()  # Track items already counted to avoid duplicates
+        try:
+            for item in book.get_items_of_type(ebooklib.ITEM_IMAGE):
+                image_count += 1
+                seen_ids.add(id(item))
+        except Exception:
+            logger.debug("Could not enumerate images in EPUB")
+
+        # Also count SVG items not already included in ITEM_IMAGE
+        try:
+            for item in book.get_items():
+                if (
+                    id(item) not in seen_ids
+                    and hasattr(item, "media_type")
+                    and item.media_type == "image/svg+xml"
+                ):
+                    image_count += 1
+        except Exception:
+            logger.debug("Could not enumerate SVG images in EPUB")
+
+        return image_count
+
+    def load_extracted_data(self, json_path):
+        """Load previously extracted data from JSON."""
+        print(f"\n📂 Loading extracted data from: {json_path}")
+        with open(json_path, encoding="utf-8") as f:
+            self.extracted_data = json.load(f)
+        total = self.extracted_data.get("total_sections", len(self.extracted_data.get("pages", [])))
+        print(f"✅ Loaded {total} sections")
+        return True
+
+    def categorize_content(self):
+        """Categorize sections based on headings or keywords."""
+        print("\n📋 Categorizing content...")
+
+        categorized = {}
+        sections = self.extracted_data.get("pages", [])
+
+        # For single EPUB source, use single category with all sections
+        if self.epub_path:
+            epub_basename = Path(self.epub_path).stem
+            category_key = self._sanitize_filename(epub_basename)
+            categorized[category_key] = {
+                "title": epub_basename,
+                "pages": sections,
+            }
+            print("✅ Created 1 category (single EPUB source)")
+            print(f"   - {epub_basename}: {len(sections)} sections")
+            return categorized
+
+        # Keyword-based categorization (multi-source scenario)
+        if self.categories:
+            first_value = next(iter(self.categories.values()), None)
+            if isinstance(first_value, list) and first_value and isinstance(first_value[0], dict):
+                # Already categorized format
+                for cat_key, pages in self.categories.items():
+                    categorized[cat_key] = {
+                        "title": cat_key.replace("_", " ").title(),
+                        "pages": pages,
+                    }
+            else:
+                # Keyword-based categorization
+                for cat_key in self.categories:
+                    categorized[cat_key] = {
+                        "title": cat_key.replace("_", " ").title(),
+                        "pages": [],
+                    }
+
+                for section in sections:
+                    text = section.get("text", "").lower()
+                    heading_text = section.get("heading", "").lower()
+
+                    scores = {}
+                    for cat_key, keywords in self.categories.items():
+                        if isinstance(keywords, list):
+                            score = sum(
+                                1
+                                for kw in keywords
+                                if isinstance(kw, str)
+                                and (kw.lower() in text or kw.lower() in heading_text)
+                            )
+                        else:
+                            score = 0
+                        if score > 0:
+                            scores[cat_key] = score
+
+                    if scores:
+                        best_cat = max(scores, key=scores.get)
+                        categorized[best_cat]["pages"].append(section)
+                    else:
+                        if "other" not in categorized:
+                            categorized["other"] = {"title": "Other", "pages": []}
+                        categorized["other"]["pages"].append(section)
+        else:
+            # No categorization - single category
+            categorized["content"] = {"title": "Content", "pages": sections}
+
+        print(f"✅ Created {len(categorized)} categories")
+        for _cat_key, cat_data in categorized.items():
+            print(f"   - {cat_data['title']}: {len(cat_data['pages'])} sections")
+
+        return categorized
+
+    def build_skill(self):
+        """Build complete skill structure."""
+        print(f"\n🏗️  Building skill: {self.name}")
+
+        # Create directories
+        os.makedirs(f"{self.skill_dir}/references", exist_ok=True)
+        os.makedirs(f"{self.skill_dir}/scripts", exist_ok=True)
+        os.makedirs(f"{self.skill_dir}/assets", exist_ok=True)
+
+        # Categorize content
+        categorized = self.categorize_content()
+
+        # Generate reference files
+        print("\n📝 Generating reference files...")
+        total_sections = len(categorized)
+        section_num = 1
+        for cat_key, cat_data in categorized.items():
+            self._generate_reference_file(cat_key, cat_data, section_num, total_sections)
+            section_num += 1
+
+        # Generate index
+        self._generate_index(categorized)
+
+        # Generate SKILL.md
+        self._generate_skill_md(categorized)
+
+        print(f"\n✅ Skill built successfully: {self.skill_dir}/")
+        print(f"\n📦 Next step: Package with: skill-seekers package {self.skill_dir}/")
+
+    def _generate_reference_file(self, _cat_key, cat_data, section_num, total_sections):
+        """Generate a reference markdown file for a category."""
+        sections = cat_data["pages"]
+
+        # Use epub basename for filename
+        epub_basename = ""
+        if self.epub_path:
+            epub_basename = Path(self.epub_path).stem
+
+        if sections:
+            section_nums = [s.get("section_number", i + 1) for i, s in enumerate(sections)]
+
+            if total_sections == 1:
+                filename = (
+                    f"{self.skill_dir}/references/{epub_basename}.md"
+                    if epub_basename
+                    else f"{self.skill_dir}/references/main.md"
+                )
+            else:
+                sec_range = f"s{min(section_nums)}-s{max(section_nums)}"
+                base_name = epub_basename if epub_basename else "section"
+                filename = f"{self.skill_dir}/references/{base_name}_{sec_range}.md"
+        else:
+            filename = f"{self.skill_dir}/references/section_{section_num:02d}.md"
+
+        with open(filename, "w", encoding="utf-8") as f:
+            f.write(f"# {cat_data['title']}\n\n")
+
+            for section in sections:
+                sec_num = section.get("section_number", "?")
+                heading = section.get("heading", "")
+                heading_level = section.get("heading_level", "h1")
+
+                f.write(f"---\n\n**📄 Source: Section {sec_num}**\n\n")
+
+                # Add heading
+                if heading:
+                    md_level = "#" * (int(heading_level[1]) + 1) if heading_level else "##"
+                    f.write(f"{md_level} {heading}\n\n")
+
+                # Add sub-headings (h3+) found within the section
+                for sub_heading in section.get("headings", []):
+                    sub_level = sub_heading.get("level", "h3")
+                    sub_text = sub_heading.get("text", "")
+                    if sub_text:
+                        sub_md = "#" * (int(sub_level[1]) + 1) if sub_level else "###"
+                        f.write(f"{sub_md} {sub_text}\n\n")
+
+                # Add text content
+                if section.get("text"):
+                    f.write(f"{section['text']}\n\n")
+
+                # Add code samples
+                code_list = section.get("code_samples", [])
+                if code_list:
+                    f.write("### Code Examples\n\n")
+                    for code in code_list:
+                        lang = code.get("language", "")
+                        f.write(f"```{lang}\n{code['code']}\n```\n\n")
+
+                # Add tables as markdown
+                tables = section.get("tables", [])
+                if tables:
+                    f.write("### Tables\n\n")
+                    for table in tables:
+                        headers = table.get("headers", [])
+                        rows = table.get("rows", [])
+                        if headers:
+                            f.write("| " + " | ".join(str(h) for h in headers) + " |\n")
+                            f.write("| " + " | ".join("---" for _ in headers) + " |\n")
+                        for row in rows:
+                            f.write("| " + " | ".join(str(c) for c in row) + " |\n")
+                        f.write("\n")
+
+                # Add images
+                images = section.get("images", [])
+                if images:
+                    assets_dir = os.path.join(self.skill_dir, "assets")
+                    os.makedirs(assets_dir, exist_ok=True)
+
+                    f.write("### Images\n\n")
+                    for img in images:
+                        img_index = img.get("index", 0)
+                        img_data = img.get("data", b"")
+                        img_filename = f"section_{sec_num}_img_{img_index}.png"
+                        img_path = os.path.join(assets_dir, img_filename)
+
+                        if isinstance(img_data, (bytes, bytearray)):
+                            with open(img_path, "wb") as img_file:
+                                img_file.write(img_data)
+                            f.write(f"![Image {img_index}](../assets/{img_filename})\n\n")
+
+                f.write("---\n\n")
+
+        print(f"   Generated: {filename}")
+
+    def _generate_index(self, categorized):
+        """Generate reference index."""
+        filename = f"{self.skill_dir}/references/index.md"
+
+        epub_basename = ""
+        if self.epub_path:
+            epub_basename = Path(self.epub_path).stem
+
+        total_sections = len(categorized)
+
+        with open(filename, "w", encoding="utf-8") as f:
+            f.write(f"# {self.name.title()} Documentation Reference\n\n")
+            f.write("## Categories\n\n")
+
+            section_num = 1
+            for _cat_key, cat_data in categorized.items():
+                sections = cat_data["pages"]
+                section_count = len(sections)
+
+                if sections:
+                    section_nums = [s.get("section_number", i + 1) for i, s in enumerate(sections)]
+                    sec_range_str = f"Sections {min(section_nums)}-{max(section_nums)}"
+
+                    if total_sections == 1:
+                        link_filename = f"{epub_basename}.md" if epub_basename else "main.md"
+                    else:
+                        sec_range = f"s{min(section_nums)}-s{max(section_nums)}"
+                        base_name = epub_basename if epub_basename else "section"
+                        link_filename = f"{base_name}_{sec_range}.md"
+                else:
+                    link_filename = f"section_{section_num:02d}.md"
+                    sec_range_str = "N/A"
+
+                f.write(
+                    f"- [{cat_data['title']}]({link_filename}) "
+                    f"({section_count} sections, {sec_range_str})\n"
+                )
+                section_num += 1
+
+            f.write("\n## Statistics\n\n")
+            f.write(f"- Total sections: {self.extracted_data.get('total_sections', 0)}\n")
+            f.write(f"- Code blocks: {self.extracted_data.get('total_code_blocks', 0)}\n")
+            f.write(f"- Images: {self.extracted_data.get('total_images', 0)}\n")
+
+            # Metadata
+            metadata = self.extracted_data.get("metadata", {})
+            if metadata.get("author"):
+                f.write(f"- Author: {metadata['author']}\n")
+            if metadata.get("date"):
+                f.write(f"- Date: {metadata['date']}\n")
+
+        print(f"   Generated: {filename}")
+
+    def _generate_skill_md(self, categorized):
+        """Generate main SKILL.md file."""
+        filename = f"{self.skill_dir}/SKILL.md"
+
+        skill_name = self.name.lower().replace("_", "-").replace(" ", "-")[:64]
+        desc = self.description[:1024] if len(self.description) > 1024 else self.description
+
+        with open(filename, "w", encoding="utf-8") as f:
+            # YAML frontmatter
+            f.write("---\n")
+            f.write(f"name: {skill_name}\n")
+            f.write(f"description: {desc}\n")
+            f.write("---\n\n")
+
+            f.write(f"# {self.name.title()} Documentation Skill\n\n")
+            f.write(f"{self.description}\n\n")
+
+            # Document metadata
+            metadata = self.extracted_data.get("metadata", {})
+            if any(v for v in metadata.values() if v):
+                f.write("## 📋 Document Information\n\n")
+                if metadata.get("title"):
+                    f.write(f"**Title:** {metadata['title']}\n\n")
+                if metadata.get("author"):
+                    f.write(f"**Author:** {metadata['author']}\n\n")
+                if metadata.get("language"):
+                    f.write(f"**Language:** {metadata['language']}\n\n")
+                if metadata.get("publisher"):
+                    f.write(f"**Publisher:** {metadata['publisher']}\n\n")
+                if metadata.get("date"):
+                    f.write(f"**Date:** {metadata['date']}\n\n")
+
+            # When to Use
+            f.write("## 💡 When to Use This Skill\n\n")
+            f.write("Use this skill when you need to:\n")
+            f.write(f"- Understand {self.name} concepts and fundamentals\n")
+            f.write("- Look up API references and technical specifications\n")
+            f.write("- Find code examples and implementation patterns\n")
+            f.write("- Review tutorials, guides, and best practices\n")
+            f.write("- Explore the complete documentation structure\n\n")
+
+            # Section Overview
+            total_sections = self.extracted_data.get("total_sections", 0)
+            f.write("## 📖 Section Overview\n\n")
+            f.write(f"**Total Sections:** {total_sections}\n\n")
+            f.write("**Content Breakdown:**\n\n")
+            for _cat_key, cat_data in categorized.items():
+                section_count = len(cat_data["pages"])
+                f.write(f"- **{cat_data['title']}**: {section_count} sections\n")
+            f.write("\n")
+
+            # Key Concepts from headings
+            f.write(self._format_key_concepts())
+
+            # Quick Reference patterns
+            f.write("## ⚡ Quick Reference\n\n")
+            f.write(self._format_patterns_from_content())
+
+            # Code examples (top 15, grouped by language)
+            all_code = []
+            for section in self.extracted_data.get("pages", []):
+                all_code.extend(section.get("code_samples", []))
+
+            all_code.sort(key=lambda x: x.get("quality_score", 0), reverse=True)
+            top_code = all_code[:15]
+
+            if top_code:
+                f.write("## 📝 Code Examples\n\n")
+                f.write("*High-quality examples extracted from documentation*\n\n")
+
+                by_lang: dict[str, list] = {}
+                for code in top_code:
+                    lang = code.get("language", "unknown")
+                    by_lang.setdefault(lang, []).append(code)
+
+                for lang in sorted(by_lang.keys()):
+                    examples = by_lang[lang]
+                    f.write(f"### {lang.title()} Examples ({len(examples)})\n\n")
+                    for i, code in enumerate(examples[:5], 1):
+                        quality = code.get("quality_score", 0)
+                        code_text = code.get("code", "")
+                        f.write(f"**Example {i}** (Quality: {quality:.1f}/10):\n\n")
+                        f.write(f"```{lang}\n")
+                        if len(code_text) <= 500:
+                            f.write(code_text)
+                        else:
+                            f.write(code_text[:500] + "\n...")
+                        f.write("\n```\n\n")
+
+            # Table Summary (first 5 tables)
+            all_tables = []
+            for section in self.extracted_data.get("pages", []):
+                for table in section.get("tables", []):
+                    all_tables.append((section.get("heading", ""), table))
+
+            if all_tables:
+                f.write("## 📊 Table Summary\n\n")
+                f.write(f"*{len(all_tables)} table(s) found in document*\n\n")
+                for section_heading, table in all_tables[:5]:
+                    if section_heading:
+                        f.write(f"**From section: {section_heading}**\n\n")
+                    headers = table.get("headers", [])
+                    rows = table.get("rows", [])
+                    if headers:
+                        f.write("| " + " | ".join(str(h) for h in headers) + " |\n")
+                        f.write("| " + " | ".join("---" for _ in headers) + " |\n")
+                        for row in rows[:5]:
+                            f.write("| " + " | ".join(str(c) for c in row) + " |\n")
+                        f.write("\n")
+
+            # Statistics
+            f.write("## 📊 Documentation Statistics\n\n")
+            f.write(f"- **Total Sections**: {total_sections}\n")
+            f.write(f"- **Code Blocks**: {self.extracted_data.get('total_code_blocks', 0)}\n")
+            f.write(f"- **Images/Diagrams**: {self.extracted_data.get('total_images', 0)}\n")
+            f.write(f"- **Tables**: {len(all_tables)}\n")
+
+            langs = self.extracted_data.get("languages_detected", {})
+            if langs:
+                f.write(f"- **Programming Languages**: {len(langs)}\n\n")
+                f.write("**Language Breakdown:**\n\n")
+                for lang, count in sorted(langs.items(), key=lambda x: x[1], reverse=True):
+                    f.write(f"- {lang}: {count} examples\n")
+                f.write("\n")
+
+            # Navigation
+            f.write("## 🗺️ Navigation\n\n")
+            f.write("**Reference Files:**\n\n")
+            for _cat_key, cat_data in categorized.items():
+                cat_file = self._sanitize_filename(cat_data["title"])
+                f.write(f"- `references/{cat_file}.md` - {cat_data['title']}\n")
+            f.write("\n")
+            f.write("See `references/index.md` for complete documentation structure.\n\n")
+
+            # Footer
+            f.write("---\n\n")
+            f.write("**Generated by Skill Seeker** | EPUB Scraper\n")
+
+        with open(filename, encoding="utf-8") as f:
+            line_count = len(f.read().split("\n"))
+        print(f"   Generated: {filename} ({line_count} lines)")
+
+    def _format_key_concepts(self) -> str:
+        """Extract key concepts from headings across all sections."""
+        all_headings = []
+        for section in self.extracted_data.get("pages", []):
+            # Main heading
+            heading = section.get("heading", "").strip()
+            level = section.get("heading_level", "h1")
+            if heading and len(heading) > 3:
+                all_headings.append((level, heading))
+            # Sub-headings
+            for sub in section.get("headings", []):
+                text = sub.get("text", "").strip()
+                sub_level = sub.get("level", "h3")
+                if text and len(text) > 3:
+                    all_headings.append((sub_level, text))
+
+        if not all_headings:
+            return ""
+
+        content = "## 🔑 Key Concepts\n\n"
+        content += "*Main topics covered in this documentation*\n\n"
+
+        h1_headings = [text for level, text in all_headings if level == "h1"]
+        h2_headings = [text for level, text in all_headings if level == "h2"]
+
+        if h1_headings:
+            content += "**Major Topics:**\n\n"
+            for heading in h1_headings[:10]:
+                content += f"- {heading}\n"
+            content += "\n"
+
+        if h2_headings:
+            content += "**Subtopics:**\n\n"
+            for heading in h2_headings[:15]:
+                content += f"- {heading}\n"
+            content += "\n"
+
+        return content
+
+    def _format_patterns_from_content(self) -> str:
+        """Extract common patterns from text content."""
+        patterns = []
+        pattern_keywords = [
+            "getting started",
+            "installation",
+            "configuration",
+            "usage",
+            "api",
+            "examples",
+            "tutorial",
+            "guide",
+            "best practices",
+            "troubleshooting",
+            "faq",
+        ]
+
+        for section in self.extracted_data.get("pages", []):
+            heading_text = section.get("heading", "").lower()
+            sec_num = section.get("section_number", 0)
+
+            for keyword in pattern_keywords:
+                if keyword in heading_text:
+                    patterns.append(
+                        {
+                            "type": keyword.title(),
+                            "heading": section.get("heading", ""),
+                            "section": sec_num,
+                        }
+                    )
+                    break
+
+        if not patterns:
+            return "*See reference files for detailed content*\n\n"
+
+        content = "*Common documentation patterns found:*\n\n"
+        by_type: dict[str, list] = {}
+        for pattern in patterns:
+            ptype = pattern["type"]
+            by_type.setdefault(ptype, []).append(pattern)
+
+        for ptype in sorted(by_type.keys()):
+            items = by_type[ptype]
+            content += f"**{ptype}** ({len(items)} sections):\n"
+            for item in items[:3]:
+                content += f"- {item['heading']} (section {item['section']})\n"
+            content += "\n"
+
+        return content
+
+    def _sanitize_filename(self, name):
+        """Convert string to safe filename."""
+        safe = re.sub(r"[^\w\s-]", "", name.lower())
+        safe = re.sub(r"[-\s]+", "_", safe)
+        return safe
+
+
+# ---------------------------------------------------------------------------
+# XHTML-to-sections helper (module-level for clarity)
+# ---------------------------------------------------------------------------
+
+
+def _build_section(
+    section_number: int,
+    heading: str | None,
+    heading_level: str | None,
+    elements: list,
+) -> dict:
+    """Build a section dict from a list of BeautifulSoup elements.
+
+    Args:
+        section_number: 1-based section index
+        heading: Heading text (or None for preamble)
+        heading_level: 'h1', 'h2', etc.
+        elements: List of BeautifulSoup Tag objects belonging to this section
+
+    Returns:
+        Section dict compatible with the intermediate JSON format
+    """
+    text_parts = []
+    code_samples = []
+    tables = []
+    sub_headings = []
+    images = []
+
+    for elem in elements:
+        if not hasattr(elem, "name") or elem.name is None:
+            continue
+
+        tag = elem.name
+
+        # Sub-headings (h3, h4, h5, h6) within the section
+        if tag in ("h3", "h4", "h5", "h6"):
+            sub_text = elem.get_text(strip=True)
+            if sub_text:
+                sub_headings.append({"level": tag, "text": sub_text})
+            continue
+
+        # Code blocks
+        if tag == "pre" or (tag == "code" and elem.find_parent("pre") is None):
+            code_elem = elem.find("code") if tag == "pre" else elem
+            code_text = code_elem.get_text() if code_elem else elem.get_text()
+
+            code_text = code_text.strip()
+            if code_text:
+                # Try to detect language from class attribute
+                classes = (code_elem or elem).get("class", [])
+                lang = ""
+                for cls in classes:
+                    if cls.startswith("language-") or cls.startswith("lang-"):
+                        lang = cls.split("-", 1)[1]
+                        break
+                    # Also check for "code-{lang}" pattern
+                    if cls.startswith("code-"):
+                        lang = cls.split("-", 1)[1]
+                        break
+
+                quality_score = _score_code_quality(code_text)
+                code_samples.append(
+                    {"code": code_text, "language": lang, "quality_score": quality_score}
+                )
+            continue
+
+        # Tables
+        if tag == "table":
+            table_data = _extract_table_from_html(elem)
+            if table_data:
+                tables.append(table_data)
+            continue
+
+        # Images
+        if tag == "img":
+            src = elem.get("src", "")
+            if src:
+                images.append(
+                    {
+                        "index": len(images),
+                        "data": b"",  # EPUB images handled separately via manifest
+                        "width": int(elem.get("width", 0) or 0),
+                        "height": int(elem.get("height", 0) or 0),
+                    }
+                )
+            continue
+
+        # Regular text/paragraph content
+        text = elem.get_text(separator=" ", strip=True)
+        if text:
+            text_parts.append(text)
+
+    return {
+        "section_number": section_number,
+        "heading": heading or "",
+        "heading_level": heading_level or "h1",
+        "text": "\n\n".join(text_parts),
+        "headings": sub_headings,
+        "code_samples": code_samples,
+        "tables": tables,
+        "images": images,
+    }
+
+
+def _extract_table_from_html(table_elem) -> dict | None:
+    """Extract headers and rows from a BeautifulSoup <table> element."""
+    headers = []
+    rows = []
+
+    # Try <thead> first for headers
+    thead = table_elem.find("thead")
+    if thead:
+        header_row = thead.find("tr")
+        if header_row:
+            headers = [th.get_text(strip=True) for th in header_row.find_all(["th", "td"])]
+
+    # Body rows
+    tbody = table_elem.find("tbody") or table_elem
+    for row in tbody.find_all("tr"):
+        cells = [td.get_text(strip=True) for td in row.find_all(["td", "th"])]
+        # Skip the header row we already captured
+        if cells and cells != headers:
+            rows.append(cells)
+
+    # If no explicit thead, use first row as header
+    if not headers and rows:
+        headers = rows.pop(0)
+
+    if not headers and not rows:
+        return None
+
+    return {"headers": headers, "rows": rows}
+
+
+def _score_code_quality(code: str) -> float:
+    """Simple quality heuristic for code blocks (0-10 scale)."""
+    if not code:
+        return 0.0
+
+    score = 5.0
+    lines = code.strip().split("\n")
+    line_count = len(lines)
+
+    # More lines = more substantial
+    if line_count >= 10:
+        score += 2.0
+    elif line_count >= 5:
+        score += 1.0
+
+    # Has function/class definitions
+    if re.search(r"\b(def |class |function |func |fn )", code):
+        score += 1.5
+
+    # Has imports/require
+    if re.search(r"\b(import |from .+ import|require\(|#include|using )", code):
+        score += 0.5
+
+    # Has indentation (common in Python, JS, etc.)
+    if re.search(r"^    ", code, re.MULTILINE):
+        score += 0.5
+
+    # Has assignment, operators, or common code syntax
+    if re.search(r"[=:{}()\[\]]", code):
+        score += 0.3
+
+    # Very short snippets get penalized
+    if len(code) < 30:
+        score -= 2.0
+
+    return min(10.0, max(0.0, score))
+
+
+def main():
+    from .arguments.epub import add_epub_arguments
+
+    parser = argparse.ArgumentParser(
+        description="Convert EPUB e-book to skill",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+
+    add_epub_arguments(parser)
+
+    args = parser.parse_args()
+
+    # Set logging level
+    if getattr(args, "quiet", False):
+        logging.getLogger().setLevel(logging.WARNING)
+    elif getattr(args, "verbose", False):
+        logging.getLogger().setLevel(logging.DEBUG)
+
+    # Handle --dry-run
+    if getattr(args, "dry_run", False):
+        source = getattr(args, "epub", None) or getattr(args, "from_json", None) or "(none)"
+        print(f"\n{'=' * 60}")
+        print("DRY RUN: EPUB Extraction")
+        print(f"{'=' * 60}")
+        print(f"Source:         {source}")
+        print(f"Name:           {getattr(args, 'name', None) or '(auto-detect)'}")
+        print(f"Enhance level:  {getattr(args, 'enhance_level', 0)}")
+        print(f"\n✅ Dry run complete")
+        return 0
+
+    # Validate inputs
+    if not (getattr(args, "epub", None) or getattr(args, "from_json", None)):
+        parser.error("Must specify --epub or --from-json")
+
+    # Build from JSON workflow
+    if getattr(args, "from_json", None):
+        name = Path(args.from_json).stem.replace("_extracted", "")
+        config = {
+            "name": getattr(args, "name", None) or name,
+            "description": getattr(args, "description", None)
+            or f"Use when referencing {name} documentation",
+        }
+        try:
+            converter = EpubToSkillConverter(config)
+            converter.load_extracted_data(args.from_json)
+            converter.build_skill()
+        except Exception as e:
+            print(f"\n❌ Error: {e}", file=sys.stderr)
+            sys.exit(1)
+        return 0
+
+    # Direct EPUB mode
+    if not getattr(args, "name", None):
+        # Auto-detect name from filename
+        args.name = Path(args.epub).stem
+
+    config = {
+        "name": args.name,
+        "epub_path": args.epub,
+        # Pass None so extract_epub() can infer from EPUB metadata
+        "description": getattr(args, "description", None),
+    }
+
+    try:
+        converter = EpubToSkillConverter(config)
+
+        # Extract
+        if not converter.extract_epub():
+            print("\n❌ EPUB extraction failed - see error above", file=sys.stderr)
+            sys.exit(1)
+
+        # Build skill
+        converter.build_skill()
+
+        # Enhancement Workflow Integration
+        from skill_seekers.cli.workflow_runner import run_workflows
+
+        workflow_executed, workflow_names = run_workflows(args)
+        workflow_name = ", ".join(workflow_names) if workflow_names else None
+
+        # Traditional enhancement (complements workflow system)
+        if getattr(args, "enhance_level", 0) > 0:
+            import os
+
+            api_key = getattr(args, "api_key", None) or os.environ.get("ANTHROPIC_API_KEY")
+            mode = "API" if api_key else "LOCAL"
+
+            print("\n" + "=" * 80)
+            print(f"🤖 Traditional AI Enhancement ({mode} mode, level {args.enhance_level})")
+            print("=" * 80)
+            if workflow_executed:
+                print(f"   Running after workflow: {workflow_name}")
+                print(
+                    "   (Workflow provides specialized analysis,"
+                    " enhancement provides general improvements)"
+                )
+            print("")
+
+            skill_dir = converter.skill_dir
+            if api_key:
+                try:
+                    from skill_seekers.cli.enhance_skill import enhance_skill_md
+
+                    enhance_skill_md(skill_dir, api_key)
+                    print("✅ API enhancement complete!")
+                except ImportError:
+                    print("❌ API enhancement not available. Falling back to LOCAL mode...")
+                    from skill_seekers.cli.enhance_skill_local import LocalSkillEnhancer
+
+                    enhancer = LocalSkillEnhancer(Path(skill_dir))
+                    enhancer.run(headless=True)
+                    print("✅ Local enhancement complete!")
+            else:
+                from skill_seekers.cli.enhance_skill_local import LocalSkillEnhancer
+
+                enhancer = LocalSkillEnhancer(Path(skill_dir))
+                enhancer.run(headless=True)
+                print("✅ Local enhancement complete!")
+
+    except RuntimeError as e:
+        print(f"\n❌ Error: {e}", file=sys.stderr)
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n❌ Unexpected error during EPUB processing: {e}", file=sys.stderr)
+        import traceback
+
+        traceback.print_exc()
+        sys.exit(1)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/src/skill_seekers/cli/main.py b/src/skill_seekers/cli/main.py
index 8092b5e..f509b8b 100644
--- a/src/skill_seekers/cli/main.py
+++ b/src/skill_seekers/cli/main.py
@@ -13,6 +13,7 @@ Commands:
     github               Scrape GitHub repository
     pdf                  Extract from PDF file
     word                 Extract from Word (.docx) file
+    epub                 Extract from EPUB e-book (.epub)
     video                Extract from video (YouTube or local)
     unified              Multi-source scraping (docs + GitHub + PDF)
     analyze              Analyze local codebase and extract code knowledge
@@ -50,6 +51,7 @@ COMMAND_MODULES = {
     "github": "skill_seekers.cli.github_scraper",
     "pdf": "skill_seekers.cli.pdf_scraper",
     "word": "skill_seekers.cli.word_scraper",
+    "epub": "skill_seekers.cli.epub_scraper",
     "video": "skill_seekers.cli.video_scraper",
     "unified": "skill_seekers.cli.unified_scraper",
     "enhance": "skill_seekers.cli.enhance_command",
diff --git a/src/skill_seekers/cli/parsers/__init__.py b/src/skill_seekers/cli/parsers/__init__.py
index f0aaf2b..06bf6b4 100644
--- a/src/skill_seekers/cli/parsers/__init__.py
+++ b/src/skill_seekers/cli/parsers/__init__.py
@@ -13,6 +13,7 @@ from .scrape_parser import ScrapeParser
 from .github_parser import GitHubParser
 from .pdf_parser import PDFParser
 from .word_parser import WordParser
+from .epub_parser import EpubParser
 from .video_parser import VideoParser
 from .unified_parser import UnifiedParser
 from .enhance_parser import EnhanceParser
@@ -45,6 +46,7 @@ PARSERS = [
     EnhanceStatusParser(),
     PDFParser(),
     WordParser(),
+    EpubParser(),
     VideoParser(),
     UnifiedParser(),
     EstimateParser(),
diff --git a/src/skill_seekers/cli/parsers/epub_parser.py b/src/skill_seekers/cli/parsers/epub_parser.py
new file mode 100644
index 0000000..9ef77da
--- /dev/null
+++ b/src/skill_seekers/cli/parsers/epub_parser.py
@@ -0,0 +1,32 @@
+"""EPUB subcommand parser.
+
+Uses shared argument definitions from arguments.epub to ensure
+consistency with the standalone epub_scraper module.
+"""
+
+from .base import SubcommandParser
+from skill_seekers.cli.arguments.epub import add_epub_arguments
+
+
+class EpubParser(SubcommandParser):
+    """Parser for epub subcommand."""
+
+    @property
+    def name(self) -> str:
+        return "epub"
+
+    @property
+    def help(self) -> str:
+        return "Extract from EPUB e-book (.epub)"
+
+    @property
+    def description(self) -> str:
+        return "Extract content from EPUB e-book (.epub) and generate skill"
+
+    def add_arguments(self, parser):
+        """Add epub-specific arguments.
+
+        Uses shared argument definitions to ensure consistency
+        with epub_scraper.py (standalone scraper).
+        """
+        add_epub_arguments(parser)
diff --git a/src/skill_seekers/cli/source_detector.py b/src/skill_seekers/cli/source_detector.py
index 724f342..572d753 100644
--- a/src/skill_seekers/cli/source_detector.py
+++ b/src/skill_seekers/cli/source_detector.py
@@ -63,6 +63,9 @@ class SourceDetector:
         if source.endswith(".docx"):
             return cls._detect_word(source)
 
+        if source.endswith(".epub"):
+            return cls._detect_epub(source)
+
         # Video file extensions
         VIDEO_EXTENSIONS = (".mp4", ".mkv", ".avi", ".mov", ".webm", ".flv", ".wmv")
         if source.lower().endswith(VIDEO_EXTENSIONS):
@@ -99,6 +102,7 @@ class SourceDetector:
             "  Local:  skill-seekers create ./my-project\n"
             "  PDF:    skill-seekers create tutorial.pdf\n"
             "  DOCX:   skill-seekers create document.docx\n"
+            "  EPUB:   skill-seekers create ebook.epub\n"
             "  Video:  skill-seekers create https://youtube.com/watch?v=...\n"
             "  Video:  skill-seekers create recording.mp4\n"
             "  Config: skill-seekers create configs/react.json"
@@ -128,6 +132,14 @@ class SourceDetector:
             type="word", parsed={"file_path": source}, suggested_name=name, raw_input=source
         )
 
+    @classmethod
+    def _detect_epub(cls, source: str) -> SourceInfo:
+        """Detect EPUB file source."""
+        name = os.path.splitext(os.path.basename(source))[0]
+        return SourceInfo(
+            type="epub", parsed={"file_path": source}, suggested_name=name, raw_input=source
+        )
+
     @classmethod
     def _detect_video_file(cls, source: str) -> SourceInfo:
         """Detect local video file source."""
@@ -277,6 +289,13 @@ class SourceDetector:
             if not os.path.isfile(file_path):
                 raise ValueError(f"Path is not a file: {file_path}")
 
+        elif source_info.type == "epub":
+            file_path = source_info.parsed["file_path"]
+            if not os.path.exists(file_path):
+                raise ValueError(f"EPUB file does not exist: {file_path}")
+            if not os.path.isfile(file_path):
+                raise ValueError(f"Path is not a file: {file_path}")
+
         elif source_info.type == "video":
             if source_info.parsed.get("source_kind") == "file":
                 file_path = source_info.parsed["file_path"]
diff --git a/tests/test_cli_parsers.py b/tests/test_cli_parsers.py
index 55339db..a9878a3 100644
--- a/tests/test_cli_parsers.py
+++ b/tests/test_cli_parsers.py
@@ -24,12 +24,12 @@ class TestParserRegistry:
 
     def test_all_parsers_registered(self):
         """Test that all parsers are registered."""
-        assert len(PARSERS) == 24, f"Expected 24 parsers, got {len(PARSERS)}"
+        assert len(PARSERS) == 25, f"Expected 25 parsers, got {len(PARSERS)}"
 
     def test_get_parser_names(self):
         """Test getting list of parser names."""
         names = get_parser_names()
-        assert len(names) == 24
+        assert len(names) == 25
         assert "scrape" in names
         assert "github" in names
         assert "package" in names
@@ -243,9 +243,9 @@ class TestBackwardCompatibility:
             assert cmd in names, f"Command '{cmd}' not found in parser registry!"
 
     def test_command_count_matches(self):
-        """Test that we have exactly 24 commands (includes create, workflows, word, video, and sync-config commands)."""
-        assert len(PARSERS) == 24
-        assert len(get_parser_names()) == 24
+        """Test that we have exactly 25 commands (includes create, workflows, word, epub, video, and sync-config)."""
+        assert len(PARSERS) == 25
+        assert len(get_parser_names()) == 25
 
 
 if __name__ == "__main__":
diff --git a/tests/test_epub_scraper.py b/tests/test_epub_scraper.py
new file mode 100644
index 0000000..92379bf
--- /dev/null
+++ b/tests/test_epub_scraper.py
@@ -0,0 +1,1626 @@
+"""
+Tests for EPUB scraper (epub_scraper.py).
+
+Covers: initialization, extraction, categorization, skill building,
+code blocks, tables, images, error handling, JSON workflow, CLI arguments,
+helper functions, source detection, DRM detection, and edge cases.
+
+Tests use mock data and do not require actual EPUB files or ebooklib installed.
+"""
+
+import json
+import os
+import shutil
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+
+# Conditional import (same pattern as test_word_scraper.py)
+try:
+    import ebooklib
+
+    EPUB_AVAILABLE = True
+except ImportError:
+    EPUB_AVAILABLE = False
+
+try:
+    from skill_seekers.cli.epub_scraper import (
+        EpubToSkillConverter,
+        _build_section,
+        _extract_table_from_html,
+        _score_code_quality,
+        infer_description_from_epub,
+    )
+
+    IMPORT_OK = True
+except ImportError:
+    IMPORT_OK = False
+
+
+def _make_sample_extracted_data(
+    num_sections=2,
+    include_code=False,
+    include_tables=False,
+    include_images=False,
+) -> dict:
+    """Create minimal extracted_data dict for testing."""
+    sections = []
+    total_code = 0
+    total_images = 0
+    languages = {}
+
+    for i in range(1, num_sections + 1):
+        section = {
+            "section_number": i,
+            "heading": f"Chapter {i}",
+            "heading_level": "h1",
+            "text": f"Content of chapter {i}. This is sample text.",
+            "headings": [{"level": "h2", "text": f"Section {i}.1"}],
+            "code_samples": [],
+            "tables": [],
+            "images": [],
+        }
+
+        if include_code:
+            section["code_samples"] = [
+                {
+                    "code": f"def func_{i}():\n    return {i}",
+                    "language": "python",
+                    "quality_score": 7.5,
+                },
+                {
+                    "code": f"console.log({i})",
+                    "language": "javascript",
+                    "quality_score": 4.0,
+                },
+            ]
+            total_code += 2
+            languages["python"] = languages.get("python", 0) + 1
+            languages["javascript"] = languages.get("javascript", 0) + 1
+
+        if include_tables:
+            section["tables"] = [{"headers": ["Name", "Value"], "rows": [["key", "val"]]}]
+
+        if include_images:
+            section["images"] = [
+                {"index": 0, "data": b"\x89PNG\r\n\x1a\n", "width": 100, "height": 100}
+            ]
+            total_images += 1
+
+        sections.append(section)
+
+    return {
+        "source_file": "test.epub",
+        "metadata": {
+            "title": "Test Book",
+            "author": "Test Author",
+            "language": "en",
+            "publisher": "Test Publisher",
+            "date": "2024-01-01",
+            "description": "A test book for unit testing",
+            "subject": "Testing, Unit Tests",
+            "rights": "Copyright 2024",
+            "identifier": "urn:uuid:12345",
+        },
+        "total_sections": num_sections,
+        "total_code_blocks": total_code,
+        "total_images": total_images,
+        "languages_detected": languages,
+        "pages": sections,
+    }
+
+
+# ============================================================================
+# Class 1: TestEpubToSkillConverterInit
+# ============================================================================
+
+
+class TestEpubToSkillConverterInit(unittest.TestCase):
+    """Test EpubToSkillConverter initialization."""
+
+    def setUp(self):
+        if not IMPORT_OK:
+            self.skipTest("epub_scraper not importable")
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_init_with_name_and_epub_path(self):
+        config = {"name": "test_skill", "epub_path": "test.epub"}
+        converter = EpubToSkillConverter(config)
+        self.assertEqual(converter.name, "test_skill")
+        self.assertEqual(converter.epub_path, "test.epub")
+
+    def test_init_with_full_config(self):
+        config = {
+            "name": "mybook",
+            "epub_path": "/path/to/book.epub",
+            "description": "Custom description",
+            "categories": {"ch1": ["intro"]},
+        }
+        converter = EpubToSkillConverter(config)
+        self.assertEqual(converter.name, "mybook")
+        self.assertEqual(converter.epub_path, "/path/to/book.epub")
+        self.assertEqual(converter.description, "Custom description")
+        self.assertEqual(converter.categories, {"ch1": ["intro"]})
+
+    def test_default_description_uses_name(self):
+        config = {"name": "test_skill"}
+        converter = EpubToSkillConverter(config)
+        self.assertIn("test_skill", converter.description)
+        self.assertTrue(converter.description.startswith("Use when referencing"))
+
+    def test_skill_dir_uses_name(self):
+        config = {"name": "mybook"}
+        converter = EpubToSkillConverter(config)
+        self.assertEqual(converter.skill_dir, "output/mybook")
+
+    def test_data_file_uses_name(self):
+        config = {"name": "mybook"}
+        converter = EpubToSkillConverter(config)
+        self.assertEqual(converter.data_file, "output/mybook_extracted.json")
+
+    def test_init_requires_name(self):
+        with self.assertRaises(KeyError):
+            EpubToSkillConverter({})
+
+    def test_init_empty_name(self):
+        config = {"name": ""}
+        converter = EpubToSkillConverter(config)
+        self.assertEqual(converter.name, "")
+
+    def test_init_with_special_characters_in_name(self):
+        config = {"name": "my-book name_2024"}
+        converter = EpubToSkillConverter(config)
+        self.assertEqual(converter.name, "my-book name_2024")
+        self.assertIn("my-book name_2024", converter.skill_dir)
+
+
+# ============================================================================
+# Class 2: TestEpubExtraction
+# ============================================================================
+
+
+class TestEpubExtraction(unittest.TestCase):
+    """Test EPUB content extraction."""
+
+    def setUp(self):
+        if not IMPORT_OK:
+            self.skipTest("epub_scraper not importable")
+        if not EPUB_AVAILABLE:
+            self.skipTest("ebooklib not installed")
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def _make_mock_book(self, spine_content=None, metadata=None, images=None):
+        """Create a mock ebooklib EpubBook."""
+        book = MagicMock()
+
+        if metadata is None:
+            metadata = {
+                "title": [("Test Book", {})],
+                "creator": [("Test Author", {})],
+                "language": [("en", {})],
+                "publisher": [("Test Publisher", {})],
+                "date": [("2024-01-01", {})],
+                "description": [("A test book", {})],
+                "subject": [("Testing", {})],
+                "rights": [("Copyright 2024", {})],
+                "identifier": [("urn:uuid:12345", {})],
+            }
+
+        def get_metadata(ns, key):
+            if ns == "DC":
+                return metadata.get(key, [])
+            return []
+
+        book.get_metadata = get_metadata
+
+        # Spine items
+        if spine_content is None:
+            spine_content = [
+                (
+                    "ch1",
+                    "<html><body><h1>Chapter 1</h1><p>Content 1</p></body></html>",
+                ),
+            ]
+
+        spine_items = []
+        items_dict = {}
+        for item_id, content in spine_content:
+            item = MagicMock()
+            item.get_type.return_value = ebooklib.ITEM_DOCUMENT
+            item.get_content.return_value = content.encode("utf-8")
+            items_dict[item_id] = item
+            spine_items.append((item_id, "yes"))
+
+        book.spine = spine_items
+        book.get_item_with_id = lambda x: items_dict.get(x)
+
+        # Images
+        if images is None:
+            images = []
+        img_items = []
+        for img in images:
+            img_item = MagicMock()
+            img_item.media_type = img.get("media_type", "image/png")
+            img_item.get_content.return_value = img.get("data", b"\x89PNG")
+            img_item.file_name = img.get("file_name", "image.png")
+            img_items.append(img_item)
+
+        book.get_items_of_type = lambda t: img_items if t == ebooklib.ITEM_IMAGE else []
+
+        # All items (for DRM detection, SVG counting)
+        all_items = list(items_dict.values()) + img_items
+        book.get_items = lambda: all_items
+
+        return book
+
+    @patch("skill_seekers.cli.epub_scraper.epub")
+    @patch("skill_seekers.cli.epub_scraper.os.path.exists", return_value=True)
+    @patch("skill_seekers.cli.epub_scraper.os.path.isfile", return_value=True)
+    def test_extract_basic_epub(self, mock_isfile, mock_exists, mock_epub):
+        mock_book = self._make_mock_book()
+        mock_epub.read_epub.return_value = mock_book
+
+        config = {"name": "test", "epub_path": "test.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+
+        result = converter.extract_epub()
+        self.assertTrue(result)
+        self.assertIsNotNone(converter.extracted_data)
+        self.assertGreaterEqual(len(converter.extracted_data["pages"]), 1)
+
+    @patch("skill_seekers.cli.epub_scraper.epub")
+    @patch("skill_seekers.cli.epub_scraper.os.path.exists", return_value=True)
+    @patch("skill_seekers.cli.epub_scraper.os.path.isfile", return_value=True)
+    def test_extract_metadata(self, mock_isfile, mock_exists, mock_epub):
+        mock_book = self._make_mock_book()
+        mock_epub.read_epub.return_value = mock_book
+
+        config = {"name": "test", "epub_path": "test.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+
+        converter.extract_epub()
+        metadata = converter.extracted_data["metadata"]
+        self.assertEqual(metadata["title"], "Test Book")
+        self.assertEqual(metadata["author"], "Test Author")
+        self.assertEqual(metadata["language"], "en")
+
+    @patch("skill_seekers.cli.epub_scraper.epub")
+    @patch("skill_seekers.cli.epub_scraper.os.path.exists", return_value=True)
+    @patch("skill_seekers.cli.epub_scraper.os.path.isfile", return_value=True)
+    def test_extract_multiple_chapters(self, mock_isfile, mock_exists, mock_epub):
+        spine = [
+            ("ch1", "<html><body><h1>Chapter 1</h1><p>Text 1</p></body></html>"),
+            ("ch2", "<html><body><h1>Chapter 2</h1><p>Text 2</p></body></html>"),
+            ("ch3", "<html><body><h1>Chapter 3</h1><p>Text 3</p></body></html>"),
+        ]
+        mock_book = self._make_mock_book(spine_content=spine)
+        mock_epub.read_epub.return_value = mock_book
+
+        config = {"name": "test", "epub_path": "test.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+
+        converter.extract_epub()
+        self.assertEqual(len(converter.extracted_data["pages"]), 3)
+
+    @patch("skill_seekers.cli.epub_scraper.epub")
+    @patch("skill_seekers.cli.epub_scraper.os.path.exists", return_value=True)
+    @patch("skill_seekers.cli.epub_scraper.os.path.isfile", return_value=True)
+    def test_extract_code_blocks(self, mock_isfile, mock_exists, mock_epub):
+        spine = [
+            (
+                "ch1",
+                "<html><body><h1>Code Chapter</h1>"
+                '<pre><code class="language-python">def hello():\n    print("hi")</code></pre>'
+                "</body></html>",
+            ),
+        ]
+        mock_book = self._make_mock_book(spine_content=spine)
+        mock_epub.read_epub.return_value = mock_book
+
+        config = {"name": "test", "epub_path": "test.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+
+        converter.extract_epub()
+        code_samples = converter.extracted_data["pages"][0]["code_samples"]
+        self.assertGreaterEqual(len(code_samples), 1)
+        self.assertEqual(code_samples[0]["language"], "python")
+
+    @patch("skill_seekers.cli.epub_scraper.epub")
+    @patch("skill_seekers.cli.epub_scraper.os.path.exists", return_value=True)
+    @patch("skill_seekers.cli.epub_scraper.os.path.isfile", return_value=True)
+    def test_extract_images(self, mock_isfile, mock_exists, mock_epub):
+        images = [{"media_type": "image/png", "data": b"\x89PNG", "file_name": "fig1.png"}]
+        mock_book = self._make_mock_book(images=images)
+        mock_epub.read_epub.return_value = mock_book
+
+        config = {"name": "test", "epub_path": "test.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+
+        converter.extract_epub()
+        self.assertGreaterEqual(converter.extracted_data["total_images"], 1)
+
+    @patch("skill_seekers.cli.epub_scraper.epub")
+    @patch("skill_seekers.cli.epub_scraper.os.path.exists", return_value=True)
+    @patch("skill_seekers.cli.epub_scraper.os.path.isfile", return_value=True)
+    def test_heading_boundary_splitting(self, mock_isfile, mock_exists, mock_epub):
+        spine = [
+            (
+                "ch1",
+                "<html><body>"
+                "<h1>First Heading</h1><p>First content</p>"
+                "<h2>Second Heading</h2><p>Second content</p>"
+                "</body></html>",
+            ),
+        ]
+        mock_book = self._make_mock_book(spine_content=spine)
+        mock_epub.read_epub.return_value = mock_book
+
+        config = {"name": "test", "epub_path": "test.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+
+        converter.extract_epub()
+        pages = converter.extracted_data["pages"]
+        self.assertEqual(len(pages), 2)
+        self.assertEqual(pages[0]["heading"], "First Heading")
+        self.assertEqual(pages[1]["heading"], "Second Heading")
+
+    def test_extract_missing_file_raises_error(self):
+        config = {"name": "test", "epub_path": "/nonexistent/book.epub"}
+        converter = EpubToSkillConverter(config)
+        with self.assertRaises(FileNotFoundError):
+            converter.extract_epub()
+
+    def test_extract_invalid_extension_raises_error(self):
+        # Create a real file with wrong extension
+        bad_file = os.path.join(self.temp_dir, "test.txt")
+        Path(bad_file).write_text("not an epub")
+
+        config = {"name": "test", "epub_path": bad_file}
+        converter = EpubToSkillConverter(config)
+        with self.assertRaises(ValueError):
+            converter.extract_epub()
+
+    def test_extract_deps_not_installed(self):
+        from skill_seekers.cli.epub_scraper import _check_epub_deps
+
+        with patch("skill_seekers.cli.epub_scraper.EPUB_AVAILABLE", False):
+            with self.assertRaises(RuntimeError) as ctx:
+                _check_epub_deps()
+            self.assertIn("ebooklib", str(ctx.exception))
+            self.assertIn("pip install", str(ctx.exception))
+
+    @patch("skill_seekers.cli.epub_scraper.epub")
+    @patch("skill_seekers.cli.epub_scraper.os.path.exists", return_value=True)
+    @patch("skill_seekers.cli.epub_scraper.os.path.isfile", return_value=True)
+    def test_extract_empty_spine(self, mock_isfile, mock_exists, mock_epub):
+        mock_book = self._make_mock_book(spine_content=[])
+        mock_book.spine = []
+        mock_epub.read_epub.return_value = mock_book
+
+        config = {"name": "test", "epub_path": "test.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+
+        converter.extract_epub()
+        self.assertEqual(len(converter.extracted_data["pages"]), 0)
+
+    @patch("skill_seekers.cli.epub_scraper.epub")
+    @patch("skill_seekers.cli.epub_scraper.os.path.exists", return_value=True)
+    @patch("skill_seekers.cli.epub_scraper.os.path.isfile", return_value=True)
+    def test_extract_spine_item_no_body(self, mock_isfile, mock_exists, mock_epub):
+        spine = [
+            ("ch1", "<html><head><title>No Body</title></head></html>"),
+        ]
+        mock_book = self._make_mock_book(spine_content=spine)
+        mock_epub.read_epub.return_value = mock_book
+
+        config = {"name": "test", "epub_path": "test.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+
+        # Should not crash — body fallback to soup
+        converter.extract_epub()
+        self.assertIsNotNone(converter.extracted_data)
+
+
+# ============================================================================
+# Class 3: TestEpubDrmDetection
+# ============================================================================
+
+
+class TestEpubDrmDetection(unittest.TestCase):
+    """Test DRM detection logic."""
+
+    def setUp(self):
+        if not IMPORT_OK:
+            self.skipTest("epub_scraper not importable")
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def _make_converter(self):
+        config = {"name": "test", "epub_path": "test.epub"}
+        return EpubToSkillConverter(config)
+
+    def _make_book_with_encryption(self, encryption_xml_content):
+        """Create a mock book with META-INF/encryption.xml."""
+        book = MagicMock()
+        enc_item = MagicMock()
+        enc_item.file_name = "META-INF/encryption.xml"
+        enc_item.get_content.return_value = encryption_xml_content.encode("utf-8")
+        book.get_items.return_value = [enc_item]
+        return book
+
+    def test_no_drm_detected(self):
+        converter = self._make_converter()
+        book = MagicMock()
+        book.get_items.return_value = []
+        self.assertFalse(converter._detect_drm(book))
+
+    def test_drm_detected_adobe_adept(self):
+        converter = self._make_converter()
+        xml = '<encryption xmlns="http://ns.adobe.com/adept"><EncryptedData/></encryption>'
+        book = self._make_book_with_encryption(xml)
+        self.assertTrue(converter._detect_drm(book))
+
+    def test_drm_detected_apple_fairplay(self):
+        converter = self._make_converter()
+        xml = '<encryption><EncryptedData xmlns="http://itunes.apple.com/dataenc"/></encryption>'
+        book = self._make_book_with_encryption(xml)
+        self.assertTrue(converter._detect_drm(book))
+
+    def test_drm_detected_readium_lcp(self):
+        converter = self._make_converter()
+        xml = '<encryption xmlns="http://readium.org/2014/01/lcp"><EncryptedData/></encryption>'
+        book = self._make_book_with_encryption(xml)
+        self.assertTrue(converter._detect_drm(book))
+
+    def test_font_obfuscation_not_drm(self):
+        converter = self._make_converter()
+        xml = (
+            "<encryption>"
+            '<EncryptionMethod Algorithm="http://www.idpf.org/2008/embedding"/>'
+            "</encryption>"
+        )
+        book = self._make_book_with_encryption(xml)
+        self.assertFalse(converter._detect_drm(book))
+
+    def test_drm_error_message_is_clear(self):
+        converter = self._make_converter()
+        xml = '<encryption xmlns="http://ns.adobe.com/adept"><EncryptedData/></encryption>'
+        book = self._make_book_with_encryption(xml)
+        self.assertTrue(converter._detect_drm(book))
+        # The error message is raised in extract_epub, not _detect_drm
+        # Just confirm detection works
+
+
+# ============================================================================
+# Class 4: TestEpubCategorization
+# ============================================================================
+
+
+class TestEpubCategorization(unittest.TestCase):
+    """Test content categorization."""
+
+    def setUp(self):
+        if not IMPORT_OK:
+            self.skipTest("epub_scraper not importable")
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_single_source_creates_one_category(self):
+        config = {"name": "test", "epub_path": "mybook.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.extracted_data = _make_sample_extracted_data(num_sections=3)
+
+        categories = converter.categorize_content()
+        self.assertEqual(len(categories), 1)
+        self.assertIn("mybook", categories)
+
+    def test_keyword_categorization(self):
+        config = {
+            "name": "test",
+            "categories": {
+                "intro": ["introduction", "getting started"],
+                "advanced": ["advanced", "deep dive"],
+            },
+        }
+        converter = EpubToSkillConverter(config)
+        data = _make_sample_extracted_data(num_sections=2)
+        data["pages"][0]["heading"] = "Introduction to Testing"
+        data["pages"][1]["heading"] = "Advanced Techniques"
+        converter.extracted_data = data
+
+        categories = converter.categorize_content()
+        self.assertIn("intro", categories)
+        self.assertIn("advanced", categories)
+
+    def test_no_categories_uses_default(self):
+        config = {"name": "test"}
+        converter = EpubToSkillConverter(config)
+        converter.extracted_data = _make_sample_extracted_data(num_sections=2)
+
+        categories = converter.categorize_content()
+        self.assertIn("content", categories)
+        self.assertEqual(categories["content"]["title"], "Content")
+
+    def test_categorize_empty_sections(self):
+        config = {"name": "test"}
+        converter = EpubToSkillConverter(config)
+        converter.extracted_data = _make_sample_extracted_data(num_sections=0)
+
+        categories = converter.categorize_content()
+        self.assertIn("content", categories)
+        self.assertEqual(len(categories["content"]["pages"]), 0)
+
+    def test_categorize_no_keyword_matches(self):
+        config = {
+            "name": "test",
+            "categories": {"intro": ["zzzzz_no_match"]},
+        }
+        converter = EpubToSkillConverter(config)
+        converter.extracted_data = _make_sample_extracted_data(num_sections=2)
+
+        categories = converter.categorize_content()
+        self.assertIn("other", categories)
+        self.assertEqual(len(categories["other"]["pages"]), 2)
+
+    def test_categorize_single_section(self):
+        config = {"name": "test", "epub_path": "book.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.extracted_data = _make_sample_extracted_data(num_sections=1)
+
+        categories = converter.categorize_content()
+        total_pages = sum(len(c["pages"]) for c in categories.values())
+        self.assertEqual(total_pages, 1)
+
+    def test_categorize_many_sections(self):
+        config = {"name": "test", "epub_path": "book.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.extracted_data = _make_sample_extracted_data(num_sections=50)
+
+        categories = converter.categorize_content()
+        total_pages = sum(len(c["pages"]) for c in categories.values())
+        self.assertEqual(total_pages, 50)
+
+    def test_categorize_preserves_section_order(self):
+        config = {"name": "test", "epub_path": "book.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.extracted_data = _make_sample_extracted_data(num_sections=5)
+
+        categories = converter.categorize_content()
+        for cat_data in categories.values():
+            section_nums = [s["section_number"] for s in cat_data["pages"]]
+            self.assertEqual(section_nums, sorted(section_nums))
+
+
+# ============================================================================
+# Class 5: TestEpubSkillBuilding
+# ============================================================================
+
+
+class TestEpubSkillBuilding(unittest.TestCase):
+    """Test skill building (directory structure, SKILL.md, reference files)."""
+
+    def setUp(self):
+        if not IMPORT_OK:
+            self.skipTest("epub_scraper not importable")
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def _make_converter(self, name="test_book", epub_path="test.epub"):
+        config = {"name": name, "epub_path": epub_path}
+        converter = EpubToSkillConverter(config)
+        converter.skill_dir = os.path.join(self.temp_dir, name)
+        converter.data_file = os.path.join(self.temp_dir, f"{name}_extracted.json")
+        return converter
+
+    def test_build_creates_directory_structure(self):
+        converter = self._make_converter()
+        converter.extracted_data = _make_sample_extracted_data()
+
+        converter.build_skill()
+
+        skill_dir = Path(self.temp_dir) / "test_book"
+        self.assertTrue(skill_dir.exists())
+        self.assertTrue((skill_dir / "references").exists())
+        self.assertTrue((skill_dir / "scripts").exists())
+        self.assertTrue((skill_dir / "assets").exists())
+
+    def test_build_generates_skill_md(self):
+        converter = self._make_converter()
+        converter.extracted_data = _make_sample_extracted_data()
+
+        converter.build_skill()
+
+        skill_md = Path(self.temp_dir) / "test_book" / "SKILL.md"
+        self.assertTrue(skill_md.exists())
+        content = skill_md.read_text()
+        self.assertIn("---", content)
+        self.assertIn("name:", content)
+        self.assertIn("description:", content)
+
+    def test_build_generates_reference_files(self):
+        converter = self._make_converter()
+        converter.extracted_data = _make_sample_extracted_data()
+
+        converter.build_skill()
+
+        refs_dir = Path(self.temp_dir) / "test_book" / "references"
+        md_files = list(refs_dir.glob("*.md"))
+        # At least index.md + one reference file
+        self.assertGreaterEqual(len(md_files), 2)
+
+    def test_build_generates_index(self):
+        converter = self._make_converter()
+        converter.extracted_data = _make_sample_extracted_data()
+
+        converter.build_skill()
+
+        index_path = Path(self.temp_dir) / "test_book" / "references" / "index.md"
+        self.assertTrue(index_path.exists())
+        content = index_path.read_text()
+        self.assertIn("Categories", content)
+        self.assertIn("Statistics", content)
+
+    def test_skill_md_contains_metadata(self):
+        converter = self._make_converter()
+        converter.extracted_data = _make_sample_extracted_data()
+
+        converter.build_skill()
+
+        skill_md = Path(self.temp_dir) / "test_book" / "SKILL.md"
+        content = skill_md.read_text()
+        self.assertIn("Test Book", content)
+        self.assertIn("Test Author", content)
+
+    def test_skill_md_yaml_frontmatter(self):
+        converter = self._make_converter()
+        converter.extracted_data = _make_sample_extracted_data()
+
+        converter.build_skill()
+
+        skill_md = Path(self.temp_dir) / "test_book" / "SKILL.md"
+        content = skill_md.read_text()
+        # YAML frontmatter starts and ends with ---
+        lines = content.split("\n")
+        self.assertEqual(lines[0], "---")
+        # Find closing ---
+        closing_idx = None
+        for i, line in enumerate(lines[1:], 1):
+            if line == "---":
+                closing_idx = i
+                break
+        self.assertIsNotNone(closing_idx)
+
+    def test_build_without_extracted_data_fails(self):
+        converter = self._make_converter()
+        converter.extracted_data = None
+        with self.assertRaises((AttributeError, TypeError)):
+            converter.build_skill()
+
+    def test_build_overwrites_existing_output(self):
+        converter = self._make_converter()
+        converter.extracted_data = _make_sample_extracted_data()
+
+        # Build once
+        converter.build_skill()
+        skill_md_1 = (Path(self.temp_dir) / "test_book" / "SKILL.md").read_text()
+
+        # Build again
+        converter.build_skill()
+        skill_md_2 = (Path(self.temp_dir) / "test_book" / "SKILL.md").read_text()
+
+        self.assertEqual(skill_md_1, skill_md_2)
+
+    def test_build_with_long_name(self):
+        long_name = "a" * 100
+        converter = self._make_converter(name=long_name)
+        converter.extracted_data = _make_sample_extracted_data()
+
+        converter.build_skill()
+
+        skill_md = Path(converter.skill_dir) / "SKILL.md"
+        content = skill_md.read_text()
+        # Name in frontmatter is truncated to 64 chars
+        lines = content.split("\n")
+        for line in lines:
+            if line.startswith("name:"):
+                name_val = line.split(":", 1)[1].strip()
+                self.assertLessEqual(len(name_val), 64)
+                break
+
+    def test_build_with_unicode_content(self):
+        converter = self._make_converter()
+        data = _make_sample_extracted_data()
+        data["pages"][0]["heading"] = (
+            "Unicode: \u4e2d\u6587 \u0627\u0644\u0639\u0631\u0628\u064a\u0629 \U0001f600"
+        )
+        data["pages"][0]["text"] = (
+            "Content with CJK: \u4f60\u597d, Arabic: \u0645\u0631\u062d\u0628\u0627, Emoji: \U0001f680"
+        )
+        converter.extracted_data = data
+
+        converter.build_skill()
+
+        refs_dir = Path(self.temp_dir) / "test_book" / "references"
+        md_files = list(refs_dir.glob("*.md"))
+        # Should have reference files
+        self.assertGreaterEqual(len(md_files), 1)
+        # Unicode should be preserved in at least one file
+        found_unicode = False
+        for f in md_files:
+            content = f.read_text(encoding="utf-8")
+            if "\u4e2d\u6587" in content or "\u4f60\u597d" in content:
+                found_unicode = True
+                break
+        self.assertTrue(found_unicode)
+
+
+# ============================================================================
+# Class 6: TestEpubCodeBlocks
+# ============================================================================
+
+
+class TestEpubCodeBlocks(unittest.TestCase):
+    """Test code block extraction and rendering."""
+
+    def setUp(self):
+        if not IMPORT_OK:
+            self.skipTest("epub_scraper not importable")
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def _make_converter(self):
+        config = {"name": "test", "epub_path": "test.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.skill_dir = os.path.join(self.temp_dir, "test")
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+        return converter
+
+    def test_code_blocks_included_in_reference_files(self):
+        converter = self._make_converter()
+        converter.extracted_data = _make_sample_extracted_data(include_code=True)
+
+        converter.build_skill()
+
+        refs_dir = Path(self.temp_dir) / "test" / "references"
+        found_code = False
+        for f in refs_dir.glob("*.md"):
+            if f.name == "index.md":
+                continue
+            content = f.read_text()
+            if "```python" in content or "def func_" in content:
+                found_code = True
+                break
+        self.assertTrue(found_code)
+
+    def test_code_blocks_in_skill_md_top_15(self):
+        converter = self._make_converter()
+        converter.extracted_data = _make_sample_extracted_data(num_sections=10, include_code=True)
+
+        converter.build_skill()
+
+        skill_md = Path(self.temp_dir) / "test" / "SKILL.md"
+        content = skill_md.read_text()
+        self.assertIn("Code Examples", content)
+
+    def test_code_language_grouped(self):
+        converter = self._make_converter()
+        converter.extracted_data = _make_sample_extracted_data(num_sections=3, include_code=True)
+
+        converter.build_skill()
+
+        skill_md = Path(self.temp_dir) / "test" / "SKILL.md"
+        content = skill_md.read_text()
+        self.assertIn("Python Examples", content)
+        self.assertIn("Javascript Examples", content)
+
+    def test_empty_code_block(self):
+        from bs4 import BeautifulSoup
+
+        html = "<pre><code></code></pre>"
+        soup = BeautifulSoup(html, "html.parser")
+        elements = list(soup.children)
+        section = _build_section(1, "Test", "h1", elements)
+        self.assertEqual(len(section["code_samples"]), 0)
+
+    def test_code_block_with_html_entities(self):
+        from bs4 import BeautifulSoup
+
+        html = "<pre><code>if (x &lt; 10 &amp;&amp; y &gt; 5) {}</code></pre>"
+        soup = BeautifulSoup(html, "html.parser")
+        elements = list(soup.children)
+        section = _build_section(1, "Test", "h1", elements)
+        self.assertEqual(len(section["code_samples"]), 1)
+        code = section["code_samples"][0]["code"]
+        self.assertIn("<", code)
+        self.assertIn(">", code)
+        self.assertIn("&&", code)
+
+    def test_code_block_with_syntax_highlighting_spans(self):
+        from bs4 import BeautifulSoup
+
+        html = (
+            '<pre><code><span class="keyword">def</span> '
+            '<span class="name">foo</span>():</code></pre>'
+        )
+        soup = BeautifulSoup(html, "html.parser")
+        elements = list(soup.children)
+        section = _build_section(1, "Test", "h1", elements)
+        self.assertEqual(len(section["code_samples"]), 1)
+        code = section["code_samples"][0]["code"]
+        self.assertIn("def", code)
+        self.assertIn("foo", code)
+        self.assertNotIn("<span", code)
+
+    def test_code_block_language_from_class(self):
+        from bs4 import BeautifulSoup
+
+        html = '<pre><code class="language-rust">fn main() {}</code></pre>'
+        soup = BeautifulSoup(html, "html.parser")
+        elements = list(soup.children)
+        section = _build_section(1, "Test", "h1", elements)
+        self.assertEqual(section["code_samples"][0]["language"], "rust")
+
+    def test_code_quality_scoring(self):
+        # Short snippet
+        score_short = _score_code_quality("x")
+        self.assertLessEqual(score_short, 5.0)
+
+        # Substantial code
+        code = (
+            "def calculate_sum(numbers):\n"
+            "    total = 0\n"
+            "    for n in numbers:\n"
+            "        total += n\n"
+            "    return total\n"
+            "\n"
+            "result = calculate_sum([1, 2, 3])\n"
+        )
+        score_good = _score_code_quality(code)
+        self.assertGreater(score_good, score_short)
+        self.assertGreaterEqual(score_good, 0.0)
+        self.assertLessEqual(score_good, 10.0)
+
+
+# ============================================================================
+# Class 7: TestEpubTables
+# ============================================================================
+
+
+class TestEpubTables(unittest.TestCase):
+    """Test table extraction and rendering."""
+
+    def setUp(self):
+        if not IMPORT_OK:
+            self.skipTest("epub_scraper not importable")
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_tables_in_reference_files(self):
+        config = {"name": "test", "epub_path": "test.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.skill_dir = os.path.join(self.temp_dir, "test")
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+        converter.extracted_data = _make_sample_extracted_data(include_tables=True)
+
+        converter.build_skill()
+
+        refs_dir = Path(self.temp_dir) / "test" / "references"
+        found_table = False
+        for f in refs_dir.glob("*.md"):
+            if f.name == "index.md":
+                continue
+            content = f.read_text()
+            if "| Name | Value |" in content:
+                found_table = True
+                break
+        self.assertTrue(found_table)
+
+    def test_table_with_headers(self):
+        from bs4 import BeautifulSoup
+
+        html = (
+            "<table><thead><tr><th>Name</th><th>Age</th></tr></thead>"
+            "<tbody><tr><td>Alice</td><td>30</td></tr></tbody></table>"
+        )
+        soup = BeautifulSoup(html, "html.parser")
+        table = soup.find("table")
+        result = _extract_table_from_html(table)
+        self.assertIsNotNone(result)
+        self.assertEqual(result["headers"], ["Name", "Age"])
+        self.assertEqual(result["rows"], [["Alice", "30"]])
+
+    def test_table_no_thead(self):
+        from bs4 import BeautifulSoup
+
+        html = (
+            "<table><tr><td>Header1</td><td>Header2</td></tr>"
+            "<tr><td>Val1</td><td>Val2</td></tr></table>"
+        )
+        soup = BeautifulSoup(html, "html.parser")
+        table = soup.find("table")
+        result = _extract_table_from_html(table)
+        self.assertIsNotNone(result)
+        self.assertEqual(result["headers"], ["Header1", "Header2"])
+        self.assertEqual(result["rows"], [["Val1", "Val2"]])
+
+    def test_empty_table(self):
+        from bs4 import BeautifulSoup
+
+        html = "<table></table>"
+        soup = BeautifulSoup(html, "html.parser")
+        table = soup.find("table")
+        result = _extract_table_from_html(table)
+        self.assertIsNone(result)
+
+    def test_table_with_colspan_rowspan(self):
+        from bs4 import BeautifulSoup
+
+        html = (
+            "<table><tr><th>H1</th><th colspan='2'>H2</th></tr>"
+            "<tr><td>A</td><td rowspan='2'>B</td><td>C</td></tr>"
+            "<tr><td>D</td><td>E</td></tr></table>"
+        )
+        soup = BeautifulSoup(html, "html.parser")
+        table = soup.find("table")
+        # Should not crash
+        result = _extract_table_from_html(table)
+        self.assertIsNotNone(result)
+
+
+# ============================================================================
+# Class 8: TestEpubImages
+# ============================================================================
+
+
+class TestEpubImages(unittest.TestCase):
+    """Test image extraction and handling."""
+
+    def setUp(self):
+        if not IMPORT_OK:
+            self.skipTest("epub_scraper not importable")
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_images_saved_to_assets(self):
+        config = {"name": "test", "epub_path": "test.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.skill_dir = os.path.join(self.temp_dir, "test")
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+        data = _make_sample_extracted_data(include_images=True)
+        converter.extracted_data = data
+
+        converter.build_skill()
+
+        assets_dir = Path(self.temp_dir) / "test" / "assets"
+        self.assertTrue(assets_dir.exists())
+
+    def test_image_references_in_markdown(self):
+        config = {"name": "test", "epub_path": "test.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.skill_dir = os.path.join(self.temp_dir, "test")
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+        data = _make_sample_extracted_data(include_images=True)
+        converter.extracted_data = data
+
+        converter.build_skill()
+
+        refs_dir = Path(self.temp_dir) / "test" / "references"
+        found_img_ref = False
+        for f in refs_dir.glob("*.md"):
+            if f.name == "index.md":
+                continue
+            content = f.read_text()
+            if "![Image" in content and "../assets/" in content:
+                found_img_ref = True
+                break
+        self.assertTrue(found_img_ref)
+
+    def test_image_with_zero_bytes(self):
+        config = {"name": "test", "epub_path": "test.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.skill_dir = os.path.join(self.temp_dir, "test")
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+        data = _make_sample_extracted_data()
+        # Add image with empty data
+        data["pages"][0]["images"] = [{"index": 0, "data": b"", "width": 0, "height": 0}]
+        converter.extracted_data = data
+
+        # Should not crash
+        converter.build_skill()
+
+    def test_svg_images_handled(self):
+        from bs4 import BeautifulSoup
+
+        html = '<img src="diagram.svg" width="200" height="100"/>'
+        soup = BeautifulSoup(f"<div>{html}</div>", "html.parser")
+        elements = list(soup.find("div").children)
+        section = _build_section(1, "Test", "h1", elements)
+        self.assertEqual(len(section["images"]), 1)
+
+    def test_image_filename_conflicts(self):
+        config = {"name": "test", "epub_path": "test.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.skill_dir = os.path.join(self.temp_dir, "test")
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+        data = _make_sample_extracted_data()
+        # Multiple images with unique indexes
+        data["pages"][0]["images"] = [
+            {"index": 0, "data": b"\x89PNG\r\n\x1a\n", "width": 50, "height": 50},
+            {"index": 1, "data": b"\x89PNG\r\n\x1a\n", "width": 50, "height": 50},
+        ]
+        converter.extracted_data = data
+
+        converter.build_skill()
+
+        assets_dir = Path(self.temp_dir) / "test" / "assets"
+        png_files = list(assets_dir.glob("*.png"))
+        self.assertGreaterEqual(len(png_files), 2)
+
+    def test_cover_image_identified(self):
+        from bs4 import BeautifulSoup
+
+        html = '<img src="cover.jpg" width="600" height="900"/>'
+        soup = BeautifulSoup(f"<div>{html}</div>", "html.parser")
+        elements = list(soup.find("div").children)
+        section = _build_section(1, "Cover", "h1", elements)
+        self.assertEqual(len(section["images"]), 1)
+
+    def test_many_images(self):
+        config = {"name": "test", "epub_path": "test.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.skill_dir = os.path.join(self.temp_dir, "test")
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+        data = _make_sample_extracted_data()
+        data["pages"][0]["images"] = [
+            {"index": i, "data": b"\x89PNG\r\n\x1a\n", "width": 10, "height": 10}
+            for i in range(100)
+        ]
+        converter.extracted_data = data
+
+        # Should handle 100+ images without error
+        converter.build_skill()
+
+
+# ============================================================================
+# Class 9: TestEpubErrorHandling
+# ============================================================================
+
+
+class TestEpubErrorHandling(unittest.TestCase):
+    """Test error handling for various failure scenarios."""
+
+    def setUp(self):
+        if not IMPORT_OK:
+            self.skipTest("epub_scraper not importable")
+        if not EPUB_AVAILABLE:
+            self.skipTest("ebooklib not installed")
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_missing_epub_file_raises_error(self):
+        config = {"name": "test", "epub_path": "/nonexistent/path/test.epub"}
+        converter = EpubToSkillConverter(config)
+        with self.assertRaises(FileNotFoundError):
+            converter.extract_epub()
+
+    def test_not_a_file_raises_error(self):
+        config = {"name": "test", "epub_path": self.temp_dir}
+        converter = EpubToSkillConverter(config)
+        with self.assertRaises((ValueError, FileNotFoundError)):
+            converter.extract_epub()
+
+    def test_not_epub_extension_raises_error(self):
+        txt_file = os.path.join(self.temp_dir, "test.txt")
+        Path(txt_file).write_text("not an epub")
+        config = {"name": "test", "epub_path": txt_file}
+        converter = EpubToSkillConverter(config)
+        with self.assertRaises(ValueError):
+            converter.extract_epub()
+
+    @patch("skill_seekers.cli.epub_scraper.epub")
+    @patch("skill_seekers.cli.epub_scraper.os.path.exists", return_value=True)
+    @patch("skill_seekers.cli.epub_scraper.os.path.isfile", return_value=True)
+    def test_corrupted_epub_raises_error(self, mock_isfile, mock_exists, mock_epub):
+        mock_epub.read_epub.side_effect = Exception("Bad ZIP file")
+        config = {"name": "test", "epub_path": "corrupted.epub"}
+        converter = EpubToSkillConverter(config)
+        with self.assertRaises(ValueError):
+            converter.extract_epub()
+
+    @patch("skill_seekers.cli.epub_scraper.epub")
+    @patch("skill_seekers.cli.epub_scraper.os.path.exists", return_value=True)
+    @patch("skill_seekers.cli.epub_scraper.os.path.isfile", return_value=True)
+    def test_drm_protected_raises_error(self, mock_isfile, mock_exists, mock_epub):
+        book = MagicMock()
+        enc_item = MagicMock()
+        enc_item.file_name = "META-INF/encryption.xml"
+        enc_item.get_content.return_value = (
+            b'<encryption xmlns="http://ns.adobe.com/adept"><EncryptedData/></encryption>'
+        )
+        book.get_items.return_value = [enc_item]
+        book.get_metadata.return_value = []
+        mock_epub.read_epub.return_value = book
+
+        config = {"name": "test", "epub_path": "drm.epub"}
+        converter = EpubToSkillConverter(config)
+        with self.assertRaises(RuntimeError) as ctx:
+            converter.extract_epub()
+        self.assertIn("DRM", str(ctx.exception))
+
+    def test_ebooklib_not_installed_error(self):
+        from skill_seekers.cli.epub_scraper import _check_epub_deps
+
+        with patch("skill_seekers.cli.epub_scraper.EPUB_AVAILABLE", False):
+            with self.assertRaises(RuntimeError) as ctx:
+                _check_epub_deps()
+            self.assertIn("ebooklib", str(ctx.exception))
+            self.assertIn("pip install", str(ctx.exception))
+
+    @patch("skill_seekers.cli.epub_scraper.epub")
+    @patch("skill_seekers.cli.epub_scraper.os.path.exists", return_value=True)
+    @patch("skill_seekers.cli.epub_scraper.os.path.isfile", return_value=True)
+    def test_malformed_xhtml_handled_gracefully(self, mock_isfile, mock_exists, mock_epub):
+        """Malformed XHTML should not crash thanks to BeautifulSoup tolerant parsing."""
+        book = MagicMock()
+        item = MagicMock()
+        item.get_type.return_value = ebooklib.ITEM_DOCUMENT
+        item.get_content.return_value = b"<html><body><h1>Test<p>Unclosed tags <div>and more</body>"
+        book.spine = [("ch1", "yes")]
+        book.get_item_with_id = lambda _x: item
+        book.get_metadata.return_value = []
+        book.get_items_of_type = lambda _t: []
+        book.get_items = lambda: [item]
+        mock_epub.read_epub.return_value = book
+
+        config = {"name": "test", "epub_path": "malformed.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+
+        # Should not crash
+        result = converter.extract_epub()
+        self.assertTrue(result)
+
+
+# ============================================================================
+# Class 10: TestEpubJSONWorkflow
+# ============================================================================
+
+
+class TestEpubJSONWorkflow(unittest.TestCase):
+    """Test JSON-based workflow (load/save extracted data)."""
+
+    def setUp(self):
+        if not IMPORT_OK:
+            self.skipTest("epub_scraper not importable")
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_load_extracted_json(self):
+        config = {"name": "test"}
+        converter = EpubToSkillConverter(config)
+
+        data = _make_sample_extracted_data()
+        json_path = os.path.join(self.temp_dir, "test_extracted.json")
+        with open(json_path, "w") as f:
+            json.dump(data, f)
+
+        result = converter.load_extracted_data(json_path)
+        self.assertTrue(result)
+        self.assertIsNotNone(converter.extracted_data)
+        self.assertEqual(converter.extracted_data["total_sections"], 2)
+
+    def test_build_from_json(self):
+        config = {"name": "test"}
+        converter = EpubToSkillConverter(config)
+        converter.skill_dir = os.path.join(self.temp_dir, "test")
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+
+        data = _make_sample_extracted_data()
+        json_path = os.path.join(self.temp_dir, "test_extracted.json")
+        with open(json_path, "w") as f:
+            json.dump(data, f)
+
+        converter.load_extracted_data(json_path)
+        converter.build_skill()
+
+        skill_md = Path(self.temp_dir) / "test" / "SKILL.md"
+        self.assertTrue(skill_md.exists())
+
+    def test_json_round_trip(self):
+        config = {"name": "test"}
+        converter = EpubToSkillConverter(config)
+        converter.skill_dir = os.path.join(self.temp_dir, "test")
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+
+        original_data = _make_sample_extracted_data(include_code=True, include_tables=True)
+
+        # Save
+        json_path = os.path.join(self.temp_dir, "test_extracted.json")
+        with open(json_path, "w") as f:
+            json.dump(original_data, f, default=str)
+
+        # Load
+        converter.load_extracted_data(json_path)
+
+        self.assertEqual(
+            converter.extracted_data["total_sections"],
+            original_data["total_sections"],
+        )
+        self.assertEqual(
+            converter.extracted_data["total_code_blocks"],
+            original_data["total_code_blocks"],
+        )
+
+    def test_load_invalid_json(self):
+        config = {"name": "test"}
+        converter = EpubToSkillConverter(config)
+
+        bad_json = os.path.join(self.temp_dir, "bad.json")
+        Path(bad_json).write_text("{invalid json content")
+
+        with self.assertRaises(json.JSONDecodeError):
+            converter.load_extracted_data(bad_json)
+
+    def test_load_nonexistent_json(self):
+        config = {"name": "test"}
+        converter = EpubToSkillConverter(config)
+
+        with self.assertRaises(FileNotFoundError):
+            converter.load_extracted_data("/nonexistent/path/data.json")
+
+    def test_json_with_missing_fields(self):
+        config = {"name": "test"}
+        converter = EpubToSkillConverter(config)
+        converter.skill_dir = os.path.join(self.temp_dir, "test")
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+
+        # Minimal JSON — missing optional fields
+        minimal_data = {
+            "pages": [
+                {
+                    "section_number": 1,
+                    "heading": "Test",
+                    "heading_level": "h1",
+                    "text": "Content",
+                    "headings": [],
+                    "code_samples": [],
+                    "tables": [],
+                    "images": [],
+                }
+            ],
+            "metadata": {"title": "Test"},
+        }
+        json_path = os.path.join(self.temp_dir, "minimal.json")
+        with open(json_path, "w") as f:
+            json.dump(minimal_data, f)
+
+        converter.load_extracted_data(json_path)
+        # Should not crash when building
+        converter.build_skill()
+
+
+# ============================================================================
+# Class 11: TestEpubCLIArguments
+# ============================================================================
+
+
+class TestEpubCLIArguments(unittest.TestCase):
+    """Test CLI argument parsing."""
+
+    def setUp(self):
+        if not IMPORT_OK:
+            self.skipTest("epub_scraper not importable")
+
+    def _parse_args(self, args_list):
+        import argparse
+
+        from skill_seekers.cli.arguments.epub import add_epub_arguments
+
+        parser = argparse.ArgumentParser()
+        add_epub_arguments(parser)
+        return parser.parse_args(args_list)
+
+    def test_epub_flag_accepted(self):
+        args = self._parse_args(["--epub", "book.epub"])
+        self.assertEqual(args.epub, "book.epub")
+
+    def test_from_json_flag_accepted(self):
+        args = self._parse_args(["--from-json", "data.json"])
+        self.assertEqual(args.from_json, "data.json")
+
+    def test_name_flag_accepted(self):
+        args = self._parse_args(["--epub", "book.epub", "--name", "mybook"])
+        self.assertEqual(args.name, "mybook")
+
+    def test_enhance_level_default_zero(self):
+        args = self._parse_args(["--epub", "book.epub"])
+        self.assertEqual(args.enhance_level, 0)
+
+    def test_dry_run_flag(self):
+        args = self._parse_args(["--epub", "book.epub", "--dry-run"])
+        self.assertTrue(args.dry_run)
+
+    def test_no_args_accepted(self):
+        # Parser itself doesn't enforce --epub or --from-json — main() does
+        args = self._parse_args([])
+        self.assertIsNone(getattr(args, "epub", None))
+
+    def test_verbose_flag(self):
+        args = self._parse_args(["--epub", "book.epub", "--verbose"])
+        self.assertTrue(args.verbose)
+
+    def test_quiet_flag(self):
+        args = self._parse_args(["--epub", "book.epub", "--quiet"])
+        self.assertTrue(args.quiet)
+
+
+# ============================================================================
+# Class 12: TestEpubHelperFunctions
+# ============================================================================
+
+
+class TestEpubHelperFunctions(unittest.TestCase):
+    """Test module-level helper functions."""
+
+    def setUp(self):
+        if not IMPORT_OK:
+            self.skipTest("epub_scraper not importable")
+
+    def test_infer_description_from_metadata_description(self):
+        metadata = {"description": "A comprehensive guide to testing software"}
+        result = infer_description_from_epub(metadata)
+        self.assertTrue(result.startswith("Use when"))
+        self.assertIn("testing", result.lower())
+
+    def test_infer_description_from_metadata_title(self):
+        metadata = {"title": "Programming Rust, 2nd Edition"}
+        result = infer_description_from_epub(metadata)
+        self.assertIn("programming rust", result.lower())
+
+    def test_infer_description_fallback(self):
+        result = infer_description_from_epub(name="mybook")
+        self.assertIn("mybook", result)
+
+    def test_infer_description_empty_metadata(self):
+        result = infer_description_from_epub({})
+        self.assertEqual(result, "Use when referencing this documentation")
+
+    def test_score_code_quality_ranges(self):
+        self.assertEqual(_score_code_quality(""), 0.0)
+
+        score = _score_code_quality("x = 1")
+        self.assertGreaterEqual(score, 0.0)
+        self.assertLessEqual(score, 10.0)
+
+        # Long code with functions scores higher
+        long_code = "\n".join([f"def func_{i}():" for i in range(15)] + ["    return True"])
+        score_long = _score_code_quality(long_code)
+        self.assertGreater(score_long, score)
+
+    def test_sanitize_filename(self):
+        config = {"name": "test"}
+        converter = EpubToSkillConverter(config)
+        self.assertEqual(converter._sanitize_filename("Hello World!"), "hello_world")
+        self.assertEqual(converter._sanitize_filename("my-file_name"), "my_file_name")
+        self.assertEqual(
+            converter._sanitize_filename("Test: Special & Chars"), "test_special_chars"
+        )
+
+
+# ============================================================================
+# Class 13: TestEpubSourceDetection
+# ============================================================================
+
+
+class TestEpubSourceDetection(unittest.TestCase):
+    """Test source detection for EPUB files."""
+
+    def setUp(self):
+        try:
+            from skill_seekers.cli.source_detector import SourceDetector
+
+            self.SourceDetector = SourceDetector
+        except ImportError:
+            self.skipTest("source_detector not importable")
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_epub_detected_as_epub_type(self):
+        result = self.SourceDetector.detect("test.epub")
+        self.assertEqual(result.type, "epub")
+
+    def test_epub_suggested_name(self):
+        result = self.SourceDetector.detect("my-ebook.epub")
+        self.assertEqual(result.suggested_name, "my-ebook")
+
+    def test_epub_validation_missing_file(self):
+        result = self.SourceDetector.detect("/nonexistent/book.epub")
+        with self.assertRaises(ValueError):
+            self.SourceDetector.validate_source(result)
+
+    def test_epub_validation_not_a_file(self):
+        result = self.SourceDetector.detect(self.temp_dir + ".epub")
+        # Path doesn't end with .epub but let's test a directory that would be detected
+        dir_path = os.path.join(self.temp_dir, "test.epub")
+        os.makedirs(dir_path)  # Create a directory with .epub name
+        result = self.SourceDetector.detect(dir_path)
+        with self.assertRaises(ValueError):
+            self.SourceDetector.validate_source(result)
+
+    def test_epub_with_path(self):
+        result = self.SourceDetector.detect("./books/test.epub")
+        self.assertEqual(result.type, "epub")
+        self.assertEqual(result.parsed["file_path"], "./books/test.epub")
+
+    def test_pdf_still_detected(self):
+        """Regression test: .pdf files still detected as pdf type."""
+        result = self.SourceDetector.detect("document.pdf")
+        self.assertEqual(result.type, "pdf")
+
+
+# ============================================================================
+# Class 14: TestEpubEdgeCases
+# ============================================================================
+
+
+class TestEpubEdgeCases(unittest.TestCase):
+    """Test edge cases per W3C EPUB 3.3 spec."""
+
+    def setUp(self):
+        if not IMPORT_OK:
+            self.skipTest("epub_scraper not importable")
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_epub_no_toc(self):
+        """EPUB without TOC should still extract using spine order."""
+        config = {"name": "test"}
+        converter = EpubToSkillConverter(config)
+        converter.skill_dir = os.path.join(self.temp_dir, "test")
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+        converter.extracted_data = _make_sample_extracted_data()
+
+        converter.build_skill()
+        skill_md = Path(self.temp_dir) / "test" / "SKILL.md"
+        self.assertTrue(skill_md.exists())
+
+    def test_epub_empty_chapters(self):
+        """Chapters with no text content handled gracefully."""
+        # Empty body — no elements to process
+        section = _build_section(1, "Empty", "h1", [])
+        self.assertEqual(section["text"], "")
+        self.assertEqual(section["code_samples"], [])
+
+    def test_epub_single_chapter(self):
+        """Single chapter produces valid output."""
+        config = {"name": "test", "epub_path": "test.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.skill_dir = os.path.join(self.temp_dir, "test")
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+        converter.extracted_data = _make_sample_extracted_data(num_sections=1)
+
+        converter.build_skill()
+
+        skill_md = Path(self.temp_dir) / "test" / "SKILL.md"
+        self.assertTrue(skill_md.exists())
+        content = skill_md.read_text()
+        self.assertIn("Chapter 1", content)
+
+    def test_epub_unicode_content(self):
+        """CJK, Arabic, Cyrillic, emoji text preserved."""
+        from bs4 import BeautifulSoup
+
+        html = "<p>\u4f60\u597d\u4e16\u754c \u041f\u0440\u0438\u0432\u0435\u0442 \U0001f600</p>"
+        soup = BeautifulSoup(html, "html.parser")
+        elements = list(soup.children)
+        section = _build_section(1, "Unicode", "h1", elements)
+        self.assertIn("\u4f60\u597d", section["text"])
+        self.assertIn("\U0001f600", section["text"])
+
+    def test_epub_large_section_count(self):
+        """100+ sections processed without error."""
+        config = {"name": "test", "epub_path": "test.epub"}
+        converter = EpubToSkillConverter(config)
+        converter.skill_dir = os.path.join(self.temp_dir, "test")
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+        converter.extracted_data = _make_sample_extracted_data(num_sections=100)
+
+        converter.build_skill()
+
+        skill_md = Path(self.temp_dir) / "test" / "SKILL.md"
+        self.assertTrue(skill_md.exists())
+
+    def test_epub_nested_headings(self):
+        """h3/h4/h5/h6 become sub-headings within sections."""
+        from bs4 import BeautifulSoup
+
+        html = (
+            "<h3>Sub-section A</h3>"
+            "<p>Content A</p>"
+            "<h4>Sub-sub-section B</h4>"
+            "<p>Content B</p>"
+            "<h5>Deep heading</h5>"
+            "<h6>Deepest heading</h6>"
+        )
+        soup = BeautifulSoup(html, "html.parser")
+        elements = list(soup.children)
+        section = _build_section(1, "Main", "h1", elements)
+        self.assertEqual(len(section["headings"]), 4)
+        self.assertEqual(section["headings"][0]["level"], "h3")
+        self.assertEqual(section["headings"][0]["text"], "Sub-section A")
+        self.assertEqual(section["headings"][3]["level"], "h6")
+
+    def test_fixed_layout_detected(self):
+        """Fixed-layout EPUB — we extract whatever text exists."""
+        config = {"name": "test"}
+        converter = EpubToSkillConverter(config)
+        converter.skill_dir = os.path.join(self.temp_dir, "test")
+        converter.data_file = os.path.join(self.temp_dir, "test_extracted.json")
+        data = _make_sample_extracted_data(num_sections=1)
+        data["pages"][0]["text"] = "Some text from fixed-layout EPUB"
+        converter.extracted_data = data
+
+        converter.build_skill()
+        refs_dir = Path(self.temp_dir) / "test" / "references"
+        found = False
+        for f in refs_dir.glob("*.md"):
+            if "fixed-layout" in f.read_text():
+                found = True
+                break
+        self.assertTrue(found)
+
+    def test_epub2_vs_epub3(self):
+        """Both EPUB 2 and EPUB 3 use the same code path — verify section building works."""
+        from bs4 import BeautifulSoup
+
+        # EPUB 2 style (simpler XHTML)
+        html2 = "<p>EPUB 2 content</p>"
+        soup2 = BeautifulSoup(html2, "html.parser")
+        section2 = _build_section(1, "EPUB 2 Chapter", "h1", list(soup2.children))
+        self.assertIn("EPUB 2 content", section2["text"])
+
+        # EPUB 3 style (HTML5-ish XHTML)
+        html3 = "<section><p>EPUB 3 content</p></section>"
+        soup3 = BeautifulSoup(html3, "html.parser")
+        section3 = _build_section(1, "EPUB 3 Chapter", "h1", list(soup3.children))
+        self.assertIn("EPUB 3 content", section3["text"])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/uv.lock b/uv.lock
index c0808ee..d1cd30d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1078,6 +1078,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b0/0d/9feae160378a3553fa9a339b0e9c1a048e147a4127210e286ef18b730f03/durationpy-0.10-py3-none-any.whl", hash = "sha256:3b41e1b601234296b4fb368338fdcd3e13e0b4fb5b67345948f4f2bf9868b286", size = 3922, upload-time = "2025-05-17T13:52:36.463Z" },
 ]
 
+[[package]]
+name = "ebooklib"
+version = "0.20"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "lxml" },
+    { name = "six" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/77/85/322e8882a582d4b707220d1929cfb74c125f2ba513991edbce40dbc462de/ebooklib-0.20.tar.gz", hash = "sha256:35e2f9d7d39907be8d39ae2deb261b19848945903ae3dbb6577b187ead69e985", size = 127066, upload-time = "2025-10-26T20:56:20.968Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bf/ee/aa015c5de8b0dc42a8e507eae8c2de5d1c0e068c896858fec6d502402ed6/ebooklib-0.20-py3-none-any.whl", hash = "sha256:fff5322517a37e31c972d27be7d982cc3928c16b3dcc5fd7e8f7c0f5d7bcf42b", size = 40995, upload-time = "2025-10-26T20:56:19.104Z" },
+]
+
 [[package]]
 name = "exceptiongroup"
 version = "1.3.1"
@@ -5609,6 +5622,7 @@ all = [
     { name = "azure-storage-blob" },
     { name = "boto3" },
     { name = "chromadb" },
+    { name = "ebooklib" },
     { name = "fastapi" },
     { name = "google-cloud-storage" },
     { name = "google-generativeai" },
@@ -5657,6 +5671,9 @@ embedding = [
     { name = "uvicorn" },
     { name = "voyageai" },
 ]
+epub = [
+    { name = "ebooklib" },
+]
 gcs = [
     { name = "google-cloud-storage" },
 ]
@@ -5737,6 +5754,8 @@ requires-dist = [
     { name = "chromadb", marker = "extra == 'chroma'", specifier = ">=0.4.0" },
     { name = "chromadb", marker = "extra == 'rag-upload'", specifier = ">=0.4.0" },
     { name = "click", specifier = ">=8.3.0" },
+    { name = "ebooklib", marker = "extra == 'all'", specifier = ">=0.18" },
+    { name = "ebooklib", marker = "extra == 'epub'", specifier = ">=0.18" },
     { name = "fastapi", marker = "extra == 'all'", specifier = ">=0.109.0" },
     { name = "fastapi", marker = "extra == 'embedding'", specifier = ">=0.109.0" },
     { name = "faster-whisper", marker = "extra == 'video-full'", specifier = ">=1.0.0" },
@@ -5808,7 +5827,7 @@ requires-dist = [
     { name = "yt-dlp", marker = "extra == 'video'", specifier = ">=2024.12.0" },
     { name = "yt-dlp", marker = "extra == 'video-full'", specifier = ">=2024.12.0" },
 ]
-provides-extras = ["mcp", "gemini", "openai", "all-llms", "s3", "gcs", "azure", "docx", "video", "video-full", "chroma", "weaviate", "sentence-transformers", "pinecone", "rag-upload", "all-cloud", "embedding", "all"]
+provides-extras = ["mcp", "gemini", "openai", "all-llms", "s3", "gcs", "azure", "docx", "epub", "video", "video-full", "chroma", "weaviate", "sentence-transformers", "pinecone", "rag-upload", "all-cloud", "embedding", "all"]
 
 [package.metadata.requires-dev]
 dev = [
@@ -6165,6 +6184,13 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0f/8b/4b61d6e13f7108f36910df9ab4b58fd389cc2520d54d81b88660804aad99/torch-2.10.0-2-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:418997cb02d0a0f1497cf6a09f63166f9f5df9f3e16c8a716ab76a72127c714f", size = 79423467, upload-time = "2026-02-10T21:44:48.711Z" },
     { url = "https://files.pythonhosted.org/packages/d3/54/a2ba279afcca44bbd320d4e73675b282fcee3d81400ea1b53934efca6462/torch-2.10.0-2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:13ec4add8c3faaed8d13e0574f5cd4a323c11655546f91fbe6afa77b57423574", size = 79498202, upload-time = "2026-02-10T21:44:52.603Z" },
     { url = "https://files.pythonhosted.org/packages/ec/23/2c9fe0c9c27f7f6cb865abcea8a4568f29f00acaeadfc6a37f6801f84cb4/torch-2.10.0-2-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:e521c9f030a3774ed770a9c011751fb47c4d12029a3d6522116e48431f2ff89e", size = 79498254, upload-time = "2026-02-10T21:44:44.095Z" },
+    { url = "https://files.pythonhosted.org/packages/16/ee/efbd56687be60ef9af0c9c0ebe106964c07400eade5b0af8902a1d8cd58c/torch-2.10.0-3-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a1ff626b884f8c4e897c4c33782bdacdff842a165fee79817b1dd549fdda1321", size = 915510070, upload-time = "2026-03-11T14:16:39.386Z" },
+    { url = "https://files.pythonhosted.org/packages/36/ab/7b562f1808d3f65414cd80a4f7d4bb00979d9355616c034c171249e1a303/torch-2.10.0-3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:ac5bdcbb074384c66fa160c15b1ead77839e3fe7ed117d667249afce0acabfac", size = 915518691, upload-time = "2026-03-11T14:15:43.147Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/7a/abada41517ce0011775f0f4eacc79659bc9bc6c361e6bfe6f7052a6b9363/torch-2.10.0-3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:98c01b8bb5e3240426dcde1446eed6f40c778091c8544767ef1168fc663a05a6", size = 915622781, upload-time = "2026-03-11T14:17:11.354Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/c6/4dfe238342ffdcec5aef1c96c457548762d33c40b45a1ab7033bb26d2ff2/torch-2.10.0-3-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:80b1b5bfe38eb0e9f5ff09f206dcac0a87aadd084230d4a36eea5ec5232c115b", size = 915627275, upload-time = "2026-03-11T14:16:11.325Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/f0/72bf18847f58f877a6a8acf60614b14935e2f156d942483af1ffc081aea0/torch-2.10.0-3-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:46b3574d93a2a8134b3f5475cfb98e2eb46771794c57015f6ad1fb795ec25e49", size = 915523474, upload-time = "2026-03-11T14:17:44.422Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/39/590742415c3030551944edc2ddc273ea1fdfe8ffb2780992e824f1ebee98/torch-2.10.0-3-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:b1d5e2aba4eb7f8e87fbe04f86442887f9167a35f092afe4c237dfcaaef6e328", size = 915632474, upload-time = "2026-03-11T14:15:13.666Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/8e/34949484f764dde5b222b7fe3fede43e4a6f0da9d7f8c370bb617d629ee2/torch-2.10.0-3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:0228d20b06701c05a8f978357f657817a4a63984b0c90745def81c18aedfa591", size = 915523882, upload-time = "2026-03-11T14:14:46.311Z" },
     { url = "https://files.pythonhosted.org/packages/0c/1a/c61f36cfd446170ec27b3a4984f072fd06dab6b5d7ce27e11adb35d6c838/torch-2.10.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:5276fa790a666ee8becaffff8acb711922252521b28fbce5db7db5cf9cb2026d", size = 145992962, upload-time = "2026-01-21T16:24:14.04Z" },
     { url = "https://files.pythonhosted.org/packages/b5/60/6662535354191e2d1555296045b63e4279e5a9dbad49acf55a5d38655a39/torch-2.10.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:aaf663927bcd490ae971469a624c322202a2a1e68936eb952535ca4cd3b90444", size = 915599237, upload-time = "2026-01-21T16:23:25.497Z" },
     { url = "https://files.pythonhosted.org/packages/40/b8/66bbe96f0d79be2b5c697b2e0b187ed792a15c6c4b8904613454651db848/torch-2.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:a4be6a2a190b32ff5c8002a0977a25ea60e64f7ba46b1be37093c141d9c49aeb", size = 113720931, upload-time = "2026-01-21T16:24:23.743Z" },