feat: add 10 new skill source types (17 total) with full pipeline integration

Add Jupyter Notebook, Local HTML, OpenAPI/Swagger, AsciiDoc, PowerPoint, RSS/Atom, Man Pages, Confluence, Notion, and Slack/Discord Chat as new skill source types. Each type is fully integrated across: - Standalone CLI commands (skill-seekers <type>) - Auto-detection via 'skill-seekers create' (file extension + content sniffing) - Unified multi-source configs (scraped_data, dispatch, config validation) - Unified skill builder (generic merge + source-attributed synthesis) - MCP server (scrape_generic tool with per-type flag mapping) - pyproject.toml (entry points, optional deps, [all] group) Also fixes: EPUB unified pipeline gap, missing word/video config validators, OpenAPI yaml import guard, MCP flag mismatch for all 10 types, stale docstrings, and adds 77 integration tests + complex-merge workflow. 50 files changed, +20,201 lines
2026-03-15 15:30:15 +03:00
parent 64403a3686
commit 53b911b697
50 changed files with 20193 additions and 856 deletions
--- a/src/skill_seekers/cli/arguments/asciidoc.py
+++ b/src/skill_seekers/cli/arguments/asciidoc.py
@@ -0,0 +1,68 @@
+"""AsciiDoc command argument definitions.
+
+This module defines ALL arguments for the asciidoc command in ONE place.
+Both asciidoc_scraper.py (standalone) and parsers/asciidoc_parser.py (unified CLI)
+import and use these definitions.
+
+Shared arguments (name, description, output, enhance-level, api-key,
+dry-run, verbose, quiet, workflow args) come from common.py / workflow.py
+via ``add_all_standard_arguments()``.
+"""
+
+import argparse
+from typing import Any
+
+from .common import add_all_standard_arguments
+
+# AsciiDoc-specific argument definitions as data structure
+# NOTE: Shared args (name, description, output, enhance_level, api_key, dry_run,
+#       verbose, quiet, workflow args) are registered by add_all_standard_arguments().
+ASCIIDOC_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "asciidoc_path": {
+        "flags": ("--asciidoc-path",),
+        "kwargs": {
+            "type": str,
+            "help": "Path to AsciiDoc file or directory containing .adoc files",
+            "metavar": "PATH",
+        },
+    },
+    "from_json": {
+        "flags": ("--from-json",),
+        "kwargs": {
+            "type": str,
+            "help": "Build skill from extracted JSON",
+            "metavar": "FILE",
+        },
+    },
+}
+
+
+def add_asciidoc_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add all asciidoc command arguments to a parser.
+
+    Registers shared args (name, description, output, enhance-level, api-key,
+    dry-run, verbose, quiet, workflow args) via add_all_standard_arguments(),
+    then adds AsciiDoc-specific args on top.
+
+    The default for --enhance-level is overridden to 0 (disabled) for AsciiDoc.
+    """
+    # Shared universal args first
+    add_all_standard_arguments(parser)
+
+    # Override enhance-level default to 0 for AsciiDoc
+    for action in parser._actions:
+        if hasattr(action, "dest") and action.dest == "enhance_level":
+            action.default = 0
+            action.help = (
+                "AI enhancement level (auto-detects API vs LOCAL mode): "
+                "0=disabled (default for AsciiDoc), 1=SKILL.md only, "
+                "2=+architecture/config, 3=full enhancement. "
+                "Mode selection: uses API if ANTHROPIC_API_KEY is set, "
+                "otherwise LOCAL (Claude Code)"
+            )
+
+    # AsciiDoc-specific args
+    for arg_name, arg_def in ASCIIDOC_ARGUMENTS.items():
+        flags = arg_def["flags"]
+        kwargs = arg_def["kwargs"]
+        parser.add_argument(*flags, **kwargs)
--- a/src/skill_seekers/cli/arguments/chat.py
+++ b/src/skill_seekers/cli/arguments/chat.py
@@ -0,0 +1,102 @@
+"""Chat command argument definitions.
+
+This module defines ALL arguments for the chat command in ONE place.
+Both chat_scraper.py (standalone) and parsers/chat_parser.py (unified CLI)
+import and use these definitions.
+
+Shared arguments (name, description, output, enhance-level, api-key,
+dry-run, verbose, quiet, workflow args) come from common.py / workflow.py
+via ``add_all_standard_arguments()``.
+"""
+
+import argparse
+from typing import Any
+
+from .common import add_all_standard_arguments
+
+# Chat-specific argument definitions as data structure
+# NOTE: Shared args (name, description, output, enhance_level, api_key, dry_run,
+#       verbose, quiet, workflow args) are registered by add_all_standard_arguments().
+CHAT_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "export_path": {
+        "flags": ("--export-path",),
+        "kwargs": {
+            "type": str,
+            "help": "Path to chat export directory or file",
+            "metavar": "PATH",
+        },
+    },
+    "platform": {
+        "flags": ("--platform",),
+        "kwargs": {
+            "type": str,
+            "choices": ["slack", "discord"],
+            "default": "slack",
+            "help": "Chat platform type (default: slack)",
+        },
+    },
+    "token": {
+        "flags": ("--token",),
+        "kwargs": {
+            "type": str,
+            "help": "API token for chat platform authentication",
+            "metavar": "TOKEN",
+        },
+    },
+    "channel": {
+        "flags": ("--channel",),
+        "kwargs": {
+            "type": str,
+            "help": "Channel name or ID to extract from",
+            "metavar": "CHANNEL",
+        },
+    },
+    "max_messages": {
+        "flags": ("--max-messages",),
+        "kwargs": {
+            "type": int,
+            "default": 10000,
+            "help": "Maximum number of messages to extract (default: 10000)",
+            "metavar": "N",
+        },
+    },
+    "from_json": {
+        "flags": ("--from-json",),
+        "kwargs": {
+            "type": str,
+            "help": "Build skill from extracted JSON",
+            "metavar": "FILE",
+        },
+    },
+}
+
+
+def add_chat_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add all chat command arguments to a parser.
+
+    Registers shared args (name, description, output, enhance-level, api-key,
+    dry-run, verbose, quiet, workflow args) via add_all_standard_arguments(),
+    then adds Chat-specific args on top.
+
+    The default for --enhance-level is overridden to 0 (disabled) for Chat.
+    """
+    # Shared universal args first
+    add_all_standard_arguments(parser)
+
+    # Override enhance-level default to 0 for Chat
+    for action in parser._actions:
+        if hasattr(action, "dest") and action.dest == "enhance_level":
+            action.default = 0
+            action.help = (
+                "AI enhancement level (auto-detects API vs LOCAL mode): "
+                "0=disabled (default for Chat), 1=SKILL.md only, "
+                "2=+architecture/config, 3=full enhancement. "
+                "Mode selection: uses API if ANTHROPIC_API_KEY is set, "
+                "otherwise LOCAL (Claude Code)"
+            )
+
+    # Chat-specific args
+    for arg_name, arg_def in CHAT_ARGUMENTS.items():
+        flags = arg_def["flags"]
+        kwargs = arg_def["kwargs"]
+        parser.add_argument(*flags, **kwargs)
--- a/src/skill_seekers/cli/arguments/confluence.py
+++ b/src/skill_seekers/cli/arguments/confluence.py
@@ -0,0 +1,109 @@
+"""Confluence command argument definitions.
+
+This module defines ALL arguments for the confluence command in ONE place.
+Both confluence_scraper.py (standalone) and parsers/confluence_parser.py (unified CLI)
+import and use these definitions.
+
+Shared arguments (name, description, output, enhance-level, api-key,
+dry-run, verbose, quiet, workflow args) come from common.py / workflow.py
+via ``add_all_standard_arguments()``.
+"""
+
+import argparse
+from typing import Any
+
+from .common import add_all_standard_arguments
+
+# Confluence-specific argument definitions as data structure
+# NOTE: Shared args (name, description, output, enhance_level, api_key, dry_run,
+#       verbose, quiet, workflow args) are registered by add_all_standard_arguments().
+CONFLUENCE_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "base_url": {
+        "flags": ("--base-url",),
+        "kwargs": {
+            "type": str,
+            "help": "Confluence instance base URL",
+            "metavar": "URL",
+        },
+    },
+    "space_key": {
+        "flags": ("--space-key",),
+        "kwargs": {
+            "type": str,
+            "help": "Confluence space key to extract from",
+            "metavar": "KEY",
+        },
+    },
+    "export_path": {
+        "flags": ("--export-path",),
+        "kwargs": {
+            "type": str,
+            "help": "Path to Confluence HTML/XML export directory",
+            "metavar": "PATH",
+        },
+    },
+    "username": {
+        "flags": ("--username",),
+        "kwargs": {
+            "type": str,
+            "help": "Confluence username for API authentication",
+            "metavar": "USER",
+        },
+    },
+    "token": {
+        "flags": ("--token",),
+        "kwargs": {
+            "type": str,
+            "help": "Confluence API token for authentication",
+            "metavar": "TOKEN",
+        },
+    },
+    "max_pages": {
+        "flags": ("--max-pages",),
+        "kwargs": {
+            "type": int,
+            "default": 500,
+            "help": "Maximum number of pages to extract (default: 500)",
+            "metavar": "N",
+        },
+    },
+    "from_json": {
+        "flags": ("--from-json",),
+        "kwargs": {
+            "type": str,
+            "help": "Build skill from extracted JSON",
+            "metavar": "FILE",
+        },
+    },
+}
+
+
+def add_confluence_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add all confluence command arguments to a parser.
+
+    Registers shared args (name, description, output, enhance-level, api-key,
+    dry-run, verbose, quiet, workflow args) via add_all_standard_arguments(),
+    then adds Confluence-specific args on top.
+
+    The default for --enhance-level is overridden to 0 (disabled) for Confluence.
+    """
+    # Shared universal args first
+    add_all_standard_arguments(parser)
+
+    # Override enhance-level default to 0 for Confluence
+    for action in parser._actions:
+        if hasattr(action, "dest") and action.dest == "enhance_level":
+            action.default = 0
+            action.help = (
+                "AI enhancement level (auto-detects API vs LOCAL mode): "
+                "0=disabled (default for Confluence), 1=SKILL.md only, "
+                "2=+architecture/config, 3=full enhancement. "
+                "Mode selection: uses API if ANTHROPIC_API_KEY is set, "
+                "otherwise LOCAL (Claude Code)"
+            )
+
+    # Confluence-specific args
+    for arg_name, arg_def in CONFLUENCE_ARGUMENTS.items():
+        flags = arg_def["flags"]
+        kwargs = arg_def["kwargs"]
+        parser.add_argument(*flags, **kwargs)
--- a/src/skill_seekers/cli/arguments/create.py
+++ b/src/skill_seekers/cli/arguments/create.py
@@ -549,6 +549,121 @@ CONFIG_ARGUMENTS: dict[str, dict[str, Any]] = {
    # For unified config files, use `skill-seekers unified --fresh` directly.
 }

+# New source type arguments (v3.2.0+)
+# These are minimal dicts since most flags are handled by each scraper's own argument module.
+# The create command only needs the primary input flag for routing.
+
+JUPYTER_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "notebook": {
+        "flags": ("--notebook",),
+        "kwargs": {"type": str, "help": "Jupyter Notebook file path (.ipynb)", "metavar": "PATH"},
+    },
+}
+
+HTML_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "html_path": {
+        "flags": ("--html-path",),
+        "kwargs": {"type": str, "help": "Local HTML file or directory path", "metavar": "PATH"},
+    },
+}
+
+OPENAPI_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "spec": {
+        "flags": ("--spec",),
+        "kwargs": {"type": str, "help": "OpenAPI/Swagger spec file path", "metavar": "PATH"},
+    },
+    "spec_url": {
+        "flags": ("--spec-url",),
+        "kwargs": {"type": str, "help": "OpenAPI/Swagger spec URL", "metavar": "URL"},
+    },
+}
+
+ASCIIDOC_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "asciidoc_path": {
+        "flags": ("--asciidoc-path",),
+        "kwargs": {"type": str, "help": "AsciiDoc file or directory path", "metavar": "PATH"},
+    },
+}
+
+PPTX_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "pptx": {
+        "flags": ("--pptx",),
+        "kwargs": {"type": str, "help": "PowerPoint file path (.pptx)", "metavar": "PATH"},
+    },
+}
+
+RSS_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "feed_url": {
+        "flags": ("--feed-url",),
+        "kwargs": {"type": str, "help": "RSS/Atom feed URL", "metavar": "URL"},
+    },
+    "feed_path": {
+        "flags": ("--feed-path",),
+        "kwargs": {"type": str, "help": "RSS/Atom feed file path", "metavar": "PATH"},
+    },
+}
+
+MANPAGE_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "man_names": {
+        "flags": ("--man-names",),
+        "kwargs": {
+            "type": str,
+            "help": "Comma-separated man page names (e.g., 'git,curl')",
+            "metavar": "NAMES",
+        },
+    },
+    "man_path": {
+        "flags": ("--man-path",),
+        "kwargs": {"type": str, "help": "Directory of man page files", "metavar": "PATH"},
+    },
+}
+
+CONFLUENCE_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "conf_base_url": {
+        "flags": ("--conf-base-url",),
+        "kwargs": {"type": str, "help": "Confluence base URL", "metavar": "URL"},
+    },
+    "space_key": {
+        "flags": ("--space-key",),
+        "kwargs": {"type": str, "help": "Confluence space key", "metavar": "KEY"},
+    },
+    "conf_export_path": {
+        "flags": ("--conf-export-path",),
+        "kwargs": {"type": str, "help": "Confluence export directory", "metavar": "PATH"},
+    },
+}
+
+NOTION_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "database_id": {
+        "flags": ("--database-id",),
+        "kwargs": {"type": str, "help": "Notion database ID", "metavar": "ID"},
+    },
+    "page_id": {
+        "flags": ("--page-id",),
+        "kwargs": {"type": str, "help": "Notion page ID", "metavar": "ID"},
+    },
+    "notion_export_path": {
+        "flags": ("--notion-export-path",),
+        "kwargs": {"type": str, "help": "Notion export directory", "metavar": "PATH"},
+    },
+}
+
+CHAT_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "chat_export_path": {
+        "flags": ("--chat-export-path",),
+        "kwargs": {"type": str, "help": "Slack/Discord export directory", "metavar": "PATH"},
+    },
+    "platform": {
+        "flags": ("--platform",),
+        "kwargs": {
+            "type": str,
+            "choices": ["slack", "discord"],
+            "default": "slack",
+            "help": "Chat platform (default: slack)",
+        },
+    },
+}
+
 # =============================================================================
 # TIER 3: ADVANCED/RARE ARGUMENTS
 # =============================================================================
@@ -613,6 +728,17 @@ def get_source_specific_arguments(source_type: str) -> dict[str, dict[str, Any]]
        "epub": EPUB_ARGUMENTS,
        "video": VIDEO_ARGUMENTS,
        "config": CONFIG_ARGUMENTS,
+        # New source types (v3.2.0+)
+        "jupyter": JUPYTER_ARGUMENTS,
+        "html": HTML_ARGUMENTS,
+        "openapi": OPENAPI_ARGUMENTS,
+        "asciidoc": ASCIIDOC_ARGUMENTS,
+        "pptx": PPTX_ARGUMENTS,
+        "rss": RSS_ARGUMENTS,
+        "manpage": MANPAGE_ARGUMENTS,
+        "confluence": CONFLUENCE_ARGUMENTS,
+        "notion": NOTION_ARGUMENTS,
+        "chat": CHAT_ARGUMENTS,
    }
    return source_args.get(source_type, {})

@@ -703,6 +829,24 @@ def add_create_arguments(parser: argparse.ArgumentParser, mode: str = "default")
        for arg_name, arg_def in CONFIG_ARGUMENTS.items():
            parser.add_argument(*arg_def["flags"], **arg_def["kwargs"])

+    # New source types (v3.2.0+)
+    _NEW_SOURCE_ARGS = {
+        "jupyter": JUPYTER_ARGUMENTS,
+        "html": HTML_ARGUMENTS,
+        "openapi": OPENAPI_ARGUMENTS,
+        "asciidoc": ASCIIDOC_ARGUMENTS,
+        "pptx": PPTX_ARGUMENTS,
+        "rss": RSS_ARGUMENTS,
+        "manpage": MANPAGE_ARGUMENTS,
+        "confluence": CONFLUENCE_ARGUMENTS,
+        "notion": NOTION_ARGUMENTS,
+        "chat": CHAT_ARGUMENTS,
+    }
+    for stype, sargs in _NEW_SOURCE_ARGS.items():
+        if mode in [stype, "all"]:
+            for arg_name, arg_def in sargs.items():
+                parser.add_argument(*arg_def["flags"], **arg_def["kwargs"])
+
    # Add advanced arguments if requested
    if mode in ["advanced", "all"]:
        for arg_name, arg_def in ADVANCED_ARGUMENTS.items():
--- a/src/skill_seekers/cli/arguments/html.py
+++ b/src/skill_seekers/cli/arguments/html.py
@@ -0,0 +1,68 @@
+"""HTML command argument definitions.
+
+This module defines ALL arguments for the html command in ONE place.
+Both html_scraper.py (standalone) and parsers/html_parser.py (unified CLI)
+import and use these definitions.
+
+Shared arguments (name, description, output, enhance-level, api-key,
+dry-run, verbose, quiet, workflow args) come from common.py / workflow.py
+via ``add_all_standard_arguments()``.
+"""
+
+import argparse
+from typing import Any
+
+from .common import add_all_standard_arguments
+
+# HTML-specific argument definitions as data structure
+# NOTE: Shared args (name, description, output, enhance_level, api_key, dry_run,
+#       verbose, quiet, workflow args) are registered by add_all_standard_arguments().
+HTML_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "html_path": {
+        "flags": ("--html-path",),
+        "kwargs": {
+            "type": str,
+            "help": "Path to HTML file or directory containing HTML files",
+            "metavar": "PATH",
+        },
+    },
+    "from_json": {
+        "flags": ("--from-json",),
+        "kwargs": {
+            "type": str,
+            "help": "Build skill from extracted JSON",
+            "metavar": "FILE",
+        },
+    },
+}
+
+
+def add_html_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add all html command arguments to a parser.
+
+    Registers shared args (name, description, output, enhance-level, api-key,
+    dry-run, verbose, quiet, workflow args) via add_all_standard_arguments(),
+    then adds HTML-specific args on top.
+
+    The default for --enhance-level is overridden to 0 (disabled) for HTML.
+    """
+    # Shared universal args first
+    add_all_standard_arguments(parser)
+
+    # Override enhance-level default to 0 for HTML
+    for action in parser._actions:
+        if hasattr(action, "dest") and action.dest == "enhance_level":
+            action.default = 0
+            action.help = (
+                "AI enhancement level (auto-detects API vs LOCAL mode): "
+                "0=disabled (default for HTML), 1=SKILL.md only, "
+                "2=+architecture/config, 3=full enhancement. "
+                "Mode selection: uses API if ANTHROPIC_API_KEY is set, "
+                "otherwise LOCAL (Claude Code)"
+            )
+
+    # HTML-specific args
+    for arg_name, arg_def in HTML_ARGUMENTS.items():
+        flags = arg_def["flags"]
+        kwargs = arg_def["kwargs"]
+        parser.add_argument(*flags, **kwargs)
--- a/src/skill_seekers/cli/arguments/jupyter.py
+++ b/src/skill_seekers/cli/arguments/jupyter.py
@@ -0,0 +1,68 @@
+"""Jupyter Notebook command argument definitions.
+
+This module defines ALL arguments for the jupyter command in ONE place.
+Both jupyter_scraper.py (standalone) and parsers/jupyter_parser.py (unified CLI)
+import and use these definitions.
+
+Shared arguments (name, description, output, enhance-level, api-key,
+dry-run, verbose, quiet, workflow args) come from common.py / workflow.py
+via ``add_all_standard_arguments()``.
+"""
+
+import argparse
+from typing import Any
+
+from .common import add_all_standard_arguments
+
+# Jupyter-specific argument definitions as data structure
+# NOTE: Shared args (name, description, output, enhance_level, api_key, dry_run,
+#       verbose, quiet, workflow args) are registered by add_all_standard_arguments().
+JUPYTER_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "notebook": {
+        "flags": ("--notebook",),
+        "kwargs": {
+            "type": str,
+            "help": "Path to .ipynb file or directory containing notebooks",
+            "metavar": "PATH",
+        },
+    },
+    "from_json": {
+        "flags": ("--from-json",),
+        "kwargs": {
+            "type": str,
+            "help": "Build skill from extracted JSON",
+            "metavar": "FILE",
+        },
+    },
+}
+
+
+def add_jupyter_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add all jupyter command arguments to a parser.
+
+    Registers shared args (name, description, output, enhance-level, api-key,
+    dry-run, verbose, quiet, workflow args) via add_all_standard_arguments(),
+    then adds Jupyter-specific args on top.
+
+    The default for --enhance-level is overridden to 0 (disabled) for Jupyter.
+    """
+    # Shared universal args first
+    add_all_standard_arguments(parser)
+
+    # Override enhance-level default to 0 for Jupyter
+    for action in parser._actions:
+        if hasattr(action, "dest") and action.dest == "enhance_level":
+            action.default = 0
+            action.help = (
+                "AI enhancement level (auto-detects API vs LOCAL mode): "
+                "0=disabled (default for Jupyter), 1=SKILL.md only, "
+                "2=+architecture/config, 3=full enhancement. "
+                "Mode selection: uses API if ANTHROPIC_API_KEY is set, "
+                "otherwise LOCAL (Claude Code)"
+            )
+
+    # Jupyter-specific args
+    for arg_name, arg_def in JUPYTER_ARGUMENTS.items():
+        flags = arg_def["flags"]
+        kwargs = arg_def["kwargs"]
+        parser.add_argument(*flags, **kwargs)
--- a/src/skill_seekers/cli/arguments/manpage.py
+++ b/src/skill_seekers/cli/arguments/manpage.py
@@ -0,0 +1,84 @@
+"""Man page command argument definitions.
+
+This module defines ALL arguments for the manpage command in ONE place.
+Both manpage_scraper.py (standalone) and parsers/manpage_parser.py (unified CLI)
+import and use these definitions.
+
+Shared arguments (name, description, output, enhance-level, api-key,
+dry-run, verbose, quiet, workflow args) come from common.py / workflow.py
+via ``add_all_standard_arguments()``.
+"""
+
+import argparse
+from typing import Any
+
+from .common import add_all_standard_arguments
+
+# ManPage-specific argument definitions as data structure
+# NOTE: Shared args (name, description, output, enhance_level, api_key, dry_run,
+#       verbose, quiet, workflow args) are registered by add_all_standard_arguments().
+MANPAGE_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "man_names": {
+        "flags": ("--man-names",),
+        "kwargs": {
+            "type": str,
+            "help": "Comma-separated list of man page names (e.g., 'ls,grep,find')",
+            "metavar": "NAMES",
+        },
+    },
+    "man_path": {
+        "flags": ("--man-path",),
+        "kwargs": {
+            "type": str,
+            "help": "Path to directory containing man page files",
+            "metavar": "PATH",
+        },
+    },
+    "sections": {
+        "flags": ("--sections",),
+        "kwargs": {
+            "type": str,
+            "help": "Comma-separated section numbers to include (e.g., '1,3,8')",
+            "metavar": "SECTIONS",
+        },
+    },
+    "from_json": {
+        "flags": ("--from-json",),
+        "kwargs": {
+            "type": str,
+            "help": "Build skill from extracted JSON",
+            "metavar": "FILE",
+        },
+    },
+}
+
+
+def add_manpage_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add all manpage command arguments to a parser.
+
+    Registers shared args (name, description, output, enhance-level, api-key,
+    dry-run, verbose, quiet, workflow args) via add_all_standard_arguments(),
+    then adds ManPage-specific args on top.
+
+    The default for --enhance-level is overridden to 0 (disabled) for ManPage.
+    """
+    # Shared universal args first
+    add_all_standard_arguments(parser)
+
+    # Override enhance-level default to 0 for ManPage
+    for action in parser._actions:
+        if hasattr(action, "dest") and action.dest == "enhance_level":
+            action.default = 0
+            action.help = (
+                "AI enhancement level (auto-detects API vs LOCAL mode): "
+                "0=disabled (default for ManPage), 1=SKILL.md only, "
+                "2=+architecture/config, 3=full enhancement. "
+                "Mode selection: uses API if ANTHROPIC_API_KEY is set, "
+                "otherwise LOCAL (Claude Code)"
+            )
+
+    # ManPage-specific args
+    for arg_name, arg_def in MANPAGE_ARGUMENTS.items():
+        flags = arg_def["flags"]
+        kwargs = arg_def["kwargs"]
+        parser.add_argument(*flags, **kwargs)
--- a/src/skill_seekers/cli/arguments/notion.py
+++ b/src/skill_seekers/cli/arguments/notion.py
@@ -0,0 +1,101 @@
+"""Notion command argument definitions.
+
+This module defines ALL arguments for the notion command in ONE place.
+Both notion_scraper.py (standalone) and parsers/notion_parser.py (unified CLI)
+import and use these definitions.
+
+Shared arguments (name, description, output, enhance-level, api-key,
+dry-run, verbose, quiet, workflow args) come from common.py / workflow.py
+via ``add_all_standard_arguments()``.
+"""
+
+import argparse
+from typing import Any
+
+from .common import add_all_standard_arguments
+
+# Notion-specific argument definitions as data structure
+# NOTE: Shared args (name, description, output, enhance_level, api_key, dry_run,
+#       verbose, quiet, workflow args) are registered by add_all_standard_arguments().
+NOTION_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "database_id": {
+        "flags": ("--database-id",),
+        "kwargs": {
+            "type": str,
+            "help": "Notion database ID to extract from",
+            "metavar": "ID",
+        },
+    },
+    "page_id": {
+        "flags": ("--page-id",),
+        "kwargs": {
+            "type": str,
+            "help": "Notion page ID to extract from",
+            "metavar": "ID",
+        },
+    },
+    "export_path": {
+        "flags": ("--export-path",),
+        "kwargs": {
+            "type": str,
+            "help": "Path to Notion export directory",
+            "metavar": "PATH",
+        },
+    },
+    "token": {
+        "flags": ("--token",),
+        "kwargs": {
+            "type": str,
+            "help": "Notion integration token for API authentication",
+            "metavar": "TOKEN",
+        },
+    },
+    "max_pages": {
+        "flags": ("--max-pages",),
+        "kwargs": {
+            "type": int,
+            "default": 500,
+            "help": "Maximum number of pages to extract (default: 500)",
+            "metavar": "N",
+        },
+    },
+    "from_json": {
+        "flags": ("--from-json",),
+        "kwargs": {
+            "type": str,
+            "help": "Build skill from extracted JSON",
+            "metavar": "FILE",
+        },
+    },
+}
+
+
+def add_notion_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add all notion command arguments to a parser.
+
+    Registers shared args (name, description, output, enhance-level, api-key,
+    dry-run, verbose, quiet, workflow args) via add_all_standard_arguments(),
+    then adds Notion-specific args on top.
+
+    The default for --enhance-level is overridden to 0 (disabled) for Notion.
+    """
+    # Shared universal args first
+    add_all_standard_arguments(parser)
+
+    # Override enhance-level default to 0 for Notion
+    for action in parser._actions:
+        if hasattr(action, "dest") and action.dest == "enhance_level":
+            action.default = 0
+            action.help = (
+                "AI enhancement level (auto-detects API vs LOCAL mode): "
+                "0=disabled (default for Notion), 1=SKILL.md only, "
+                "2=+architecture/config, 3=full enhancement. "
+                "Mode selection: uses API if ANTHROPIC_API_KEY is set, "
+                "otherwise LOCAL (Claude Code)"
+            )
+
+    # Notion-specific args
+    for arg_name, arg_def in NOTION_ARGUMENTS.items():
+        flags = arg_def["flags"]
+        kwargs = arg_def["kwargs"]
+        parser.add_argument(*flags, **kwargs)
--- a/src/skill_seekers/cli/arguments/openapi.py
+++ b/src/skill_seekers/cli/arguments/openapi.py
@@ -0,0 +1,76 @@
+"""OpenAPI command argument definitions.
+
+This module defines ALL arguments for the openapi command in ONE place.
+Both openapi_scraper.py (standalone) and parsers/openapi_parser.py (unified CLI)
+import and use these definitions.
+
+Shared arguments (name, description, output, enhance-level, api-key,
+dry-run, verbose, quiet, workflow args) come from common.py / workflow.py
+via ``add_all_standard_arguments()``.
+"""
+
+import argparse
+from typing import Any
+
+from .common import add_all_standard_arguments
+
+# OpenAPI-specific argument definitions as data structure
+# NOTE: Shared args (name, description, output, enhance_level, api_key, dry_run,
+#       verbose, quiet, workflow args) are registered by add_all_standard_arguments().
+OPENAPI_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "spec": {
+        "flags": ("--spec",),
+        "kwargs": {
+            "type": str,
+            "help": "Path to OpenAPI/Swagger spec file",
+            "metavar": "PATH",
+        },
+    },
+    "spec_url": {
+        "flags": ("--spec-url",),
+        "kwargs": {
+            "type": str,
+            "help": "URL to OpenAPI/Swagger spec",
+            "metavar": "URL",
+        },
+    },
+    "from_json": {
+        "flags": ("--from-json",),
+        "kwargs": {
+            "type": str,
+            "help": "Build skill from extracted JSON",
+            "metavar": "FILE",
+        },
+    },
+}
+
+
+def add_openapi_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add all openapi command arguments to a parser.
+
+    Registers shared args (name, description, output, enhance-level, api-key,
+    dry-run, verbose, quiet, workflow args) via add_all_standard_arguments(),
+    then adds OpenAPI-specific args on top.
+
+    The default for --enhance-level is overridden to 0 (disabled) for OpenAPI.
+    """
+    # Shared universal args first
+    add_all_standard_arguments(parser)
+
+    # Override enhance-level default to 0 for OpenAPI
+    for action in parser._actions:
+        if hasattr(action, "dest") and action.dest == "enhance_level":
+            action.default = 0
+            action.help = (
+                "AI enhancement level (auto-detects API vs LOCAL mode): "
+                "0=disabled (default for OpenAPI), 1=SKILL.md only, "
+                "2=+architecture/config, 3=full enhancement. "
+                "Mode selection: uses API if ANTHROPIC_API_KEY is set, "
+                "otherwise LOCAL (Claude Code)"
+            )
+
+    # OpenAPI-specific args
+    for arg_name, arg_def in OPENAPI_ARGUMENTS.items():
+        flags = arg_def["flags"]
+        kwargs = arg_def["kwargs"]
+        parser.add_argument(*flags, **kwargs)
--- a/src/skill_seekers/cli/arguments/pptx.py
+++ b/src/skill_seekers/cli/arguments/pptx.py
@@ -0,0 +1,68 @@
+"""PPTX command argument definitions.
+
+This module defines ALL arguments for the pptx command in ONE place.
+Both pptx_scraper.py (standalone) and parsers/pptx_parser.py (unified CLI)
+import and use these definitions.
+
+Shared arguments (name, description, output, enhance-level, api-key,
+dry-run, verbose, quiet, workflow args) come from common.py / workflow.py
+via ``add_all_standard_arguments()``.
+"""
+
+import argparse
+from typing import Any
+
+from .common import add_all_standard_arguments
+
+# PPTX-specific argument definitions as data structure
+# NOTE: Shared args (name, description, output, enhance_level, api_key, dry_run,
+#       verbose, quiet, workflow args) are registered by add_all_standard_arguments().
+PPTX_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "pptx": {
+        "flags": ("--pptx",),
+        "kwargs": {
+            "type": str,
+            "help": "Path to PowerPoint file (.pptx)",
+            "metavar": "PATH",
+        },
+    },
+    "from_json": {
+        "flags": ("--from-json",),
+        "kwargs": {
+            "type": str,
+            "help": "Build skill from extracted JSON",
+            "metavar": "FILE",
+        },
+    },
+}
+
+
+def add_pptx_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add all pptx command arguments to a parser.
+
+    Registers shared args (name, description, output, enhance-level, api-key,
+    dry-run, verbose, quiet, workflow args) via add_all_standard_arguments(),
+    then adds PPTX-specific args on top.
+
+    The default for --enhance-level is overridden to 0 (disabled) for PPTX.
+    """
+    # Shared universal args first
+    add_all_standard_arguments(parser)
+
+    # Override enhance-level default to 0 for PPTX
+    for action in parser._actions:
+        if hasattr(action, "dest") and action.dest == "enhance_level":
+            action.default = 0
+            action.help = (
+                "AI enhancement level (auto-detects API vs LOCAL mode): "
+                "0=disabled (default for PPTX), 1=SKILL.md only, "
+                "2=+architecture/config, 3=full enhancement. "
+                "Mode selection: uses API if ANTHROPIC_API_KEY is set, "
+                "otherwise LOCAL (Claude Code)"
+            )
+
+    # PPTX-specific args
+    for arg_name, arg_def in PPTX_ARGUMENTS.items():
+        flags = arg_def["flags"]
+        kwargs = arg_def["kwargs"]
+        parser.add_argument(*flags, **kwargs)
--- a/src/skill_seekers/cli/arguments/rss.py
+++ b/src/skill_seekers/cli/arguments/rss.py
@@ -0,0 +1,101 @@
+"""RSS command argument definitions.
+
+This module defines ALL arguments for the rss command in ONE place.
+Both rss_scraper.py (standalone) and parsers/rss_parser.py (unified CLI)
+import and use these definitions.
+
+Shared arguments (name, description, output, enhance-level, api-key,
+dry-run, verbose, quiet, workflow args) come from common.py / workflow.py
+via ``add_all_standard_arguments()``.
+"""
+
+import argparse
+from typing import Any
+
+from .common import add_all_standard_arguments
+
+# RSS-specific argument definitions as data structure
+# NOTE: Shared args (name, description, output, enhance_level, api_key, dry_run,
+#       verbose, quiet, workflow args) are registered by add_all_standard_arguments().
+RSS_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "feed_url": {
+        "flags": ("--feed-url",),
+        "kwargs": {
+            "type": str,
+            "help": "URL of the RSS/Atom feed",
+            "metavar": "URL",
+        },
+    },
+    "feed_path": {
+        "flags": ("--feed-path",),
+        "kwargs": {
+            "type": str,
+            "help": "Path to local RSS/Atom feed file",
+            "metavar": "PATH",
+        },
+    },
+    "follow_links": {
+        "flags": ("--follow-links",),
+        "kwargs": {
+            "action": "store_true",
+            "default": True,
+            "help": "Follow article links and extract full content (default: True)",
+        },
+    },
+    "no_follow_links": {
+        "flags": ("--no-follow-links",),
+        "kwargs": {
+            "action": "store_false",
+            "dest": "follow_links",
+            "help": "Do not follow article links; use feed summary only",
+        },
+    },
+    "max_articles": {
+        "flags": ("--max-articles",),
+        "kwargs": {
+            "type": int,
+            "default": 50,
+            "help": "Maximum number of articles to extract (default: 50)",
+            "metavar": "N",
+        },
+    },
+    "from_json": {
+        "flags": ("--from-json",),
+        "kwargs": {
+            "type": str,
+            "help": "Build skill from extracted JSON",
+            "metavar": "FILE",
+        },
+    },
+}
+
+
+def add_rss_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add all rss command arguments to a parser.
+
+    Registers shared args (name, description, output, enhance-level, api-key,
+    dry-run, verbose, quiet, workflow args) via add_all_standard_arguments(),
+    then adds RSS-specific args on top.
+
+    The default for --enhance-level is overridden to 0 (disabled) for RSS.
+    """
+    # Shared universal args first
+    add_all_standard_arguments(parser)
+
+    # Override enhance-level default to 0 for RSS
+    for action in parser._actions:
+        if hasattr(action, "dest") and action.dest == "enhance_level":
+            action.default = 0
+            action.help = (
+                "AI enhancement level (auto-detects API vs LOCAL mode): "
+                "0=disabled (default for RSS), 1=SKILL.md only, "
+                "2=+architecture/config, 3=full enhancement. "
+                "Mode selection: uses API if ANTHROPIC_API_KEY is set, "
+                "otherwise LOCAL (Claude Code)"
+            )
+
+    # RSS-specific args
+    for arg_name, arg_def in RSS_ARGUMENTS.items():
+        flags = arg_def["flags"]
+        kwargs = arg_def["kwargs"]
+        parser.add_argument(*flags, **kwargs)
--- a/src/skill_seekers/cli/asciidoc_scraper.py
+++ b/src/skill_seekers/cli/asciidoc_scraper.py
--- a/src/skill_seekers/cli/chat_scraper.py
+++ b/src/skill_seekers/cli/chat_scraper.py
--- a/src/skill_seekers/cli/config_validator.py
+++ b/src/skill_seekers/cli/config_validator.py
@@ -7,6 +7,19 @@ Validates unified config format that supports multiple sources:
 - github (repository scraping)
 - pdf (PDF document scraping)
 - local (local codebase analysis)
+- word (Word .docx document scraping)
+- video (video transcript/visual extraction)
+- epub (EPUB e-book extraction)
+- jupyter (Jupyter Notebook extraction)
+- html (local HTML file extraction)
+- openapi (OpenAPI/Swagger spec extraction)
+- asciidoc (AsciiDoc document extraction)
+- pptx (PowerPoint presentation extraction)
+- confluence (Confluence wiki extraction)
+- notion (Notion page extraction)
+- rss (RSS/Atom feed extraction)
+- manpage (man page extraction)
+- chat (Slack/Discord chat export extraction)

 Legacy config format support removed in v2.11.0.
 All configs must use unified format with 'sources' array.
@@ -27,7 +40,25 @@ class ConfigValidator:
    """

    # Valid source types
-    VALID_SOURCE_TYPES = {"documentation", "github", "pdf", "local", "word", "video"}
+    VALID_SOURCE_TYPES = {
+        "documentation",
+        "github",
+        "pdf",
+        "local",
+        "word",
+        "video",
+        "epub",
+        "jupyter",
+        "html",
+        "openapi",
+        "asciidoc",
+        "pptx",
+        "confluence",
+        "notion",
+        "rss",
+        "manpage",
+        "chat",
+    }

    # Valid merge modes
    VALID_MERGE_MODES = {"rule-based", "claude-enhanced"}
@@ -159,6 +190,32 @@ class ConfigValidator:
            self._validate_pdf_source(source, index)
        elif source_type == "local":
            self._validate_local_source(source, index)
+        elif source_type == "word":
+            self._validate_word_source(source, index)
+        elif source_type == "video":
+            self._validate_video_source(source, index)
+        elif source_type == "epub":
+            self._validate_epub_source(source, index)
+        elif source_type == "jupyter":
+            self._validate_jupyter_source(source, index)
+        elif source_type == "html":
+            self._validate_html_source(source, index)
+        elif source_type == "openapi":
+            self._validate_openapi_source(source, index)
+        elif source_type == "asciidoc":
+            self._validate_asciidoc_source(source, index)
+        elif source_type == "pptx":
+            self._validate_pptx_source(source, index)
+        elif source_type == "confluence":
+            self._validate_confluence_source(source, index)
+        elif source_type == "notion":
+            self._validate_notion_source(source, index)
+        elif source_type == "rss":
+            self._validate_rss_source(source, index)
+        elif source_type == "manpage":
+            self._validate_manpage_source(source, index)
+        elif source_type == "chat":
+            self._validate_chat_source(source, index)

    def _validate_documentation_source(self, source: dict[str, Any], index: int):
        """Validate documentation source configuration."""
@@ -253,12 +310,126 @@ class ConfigValidator:
                    f"Source {index} (local): Invalid ai_mode '{ai_mode}'. Must be one of {self.VALID_AI_MODES}"
                )

+    def _validate_word_source(self, source: dict[str, Any], index: int):
+        """Validate Word document (.docx) source configuration."""
+        if "path" not in source:
+            raise ValueError(f"Source {index} (word): Missing required field 'path'")
+        word_path = source["path"]
+        if not Path(word_path).exists():
+            logger.warning(f"Source {index} (word): File not found: {word_path}")
+
+    def _validate_video_source(self, source: dict[str, Any], index: int):
+        """Validate video source configuration."""
+        has_url = "url" in source
+        has_path = "path" in source
+        has_playlist = "playlist" in source
+        if not has_url and not has_path and not has_playlist:
+            raise ValueError(
+                f"Source {index} (video): Missing required field 'url', 'path', or 'playlist'"
+            )
+
+    def _validate_epub_source(self, source: dict[str, Any], index: int):
+        """Validate EPUB source configuration."""
+        if "path" not in source:
+            raise ValueError(f"Source {index} (epub): Missing required field 'path'")
+        epub_path = source["path"]
+        if not Path(epub_path).exists():
+            logger.warning(f"Source {index} (epub): File not found: {epub_path}")
+
+    def _validate_jupyter_source(self, source: dict[str, Any], index: int):
+        """Validate Jupyter Notebook source configuration."""
+        if "path" not in source:
+            raise ValueError(f"Source {index} (jupyter): Missing required field 'path'")
+        nb_path = source["path"]
+        if not Path(nb_path).exists():
+            logger.warning(f"Source {index} (jupyter): Path not found: {nb_path}")
+
+    def _validate_html_source(self, source: dict[str, Any], index: int):
+        """Validate local HTML source configuration."""
+        if "path" not in source:
+            raise ValueError(f"Source {index} (html): Missing required field 'path'")
+        html_path = source["path"]
+        if not Path(html_path).exists():
+            logger.warning(f"Source {index} (html): Path not found: {html_path}")
+
+    def _validate_openapi_source(self, source: dict[str, Any], index: int):
+        """Validate OpenAPI/Swagger source configuration."""
+        if "path" not in source and "url" not in source:
+            raise ValueError(f"Source {index} (openapi): Missing required field 'path' or 'url'")
+        if "path" in source and not Path(source["path"]).exists():
+            logger.warning(f"Source {index} (openapi): File not found: {source['path']}")
+
+    def _validate_asciidoc_source(self, source: dict[str, Any], index: int):
+        """Validate AsciiDoc source configuration."""
+        if "path" not in source:
+            raise ValueError(f"Source {index} (asciidoc): Missing required field 'path'")
+        adoc_path = source["path"]
+        if not Path(adoc_path).exists():
+            logger.warning(f"Source {index} (asciidoc): Path not found: {adoc_path}")
+
+    def _validate_pptx_source(self, source: dict[str, Any], index: int):
+        """Validate PowerPoint source configuration."""
+        if "path" not in source:
+            raise ValueError(f"Source {index} (pptx): Missing required field 'path'")
+        pptx_path = source["path"]
+        if not Path(pptx_path).exists():
+            logger.warning(f"Source {index} (pptx): File not found: {pptx_path}")
+
+    def _validate_confluence_source(self, source: dict[str, Any], index: int):
+        """Validate Confluence source configuration."""
+        has_url = "url" in source or "base_url" in source
+        has_path = "path" in source
+        if not has_url and not has_path:
+            raise ValueError(
+                f"Source {index} (confluence): Missing required field 'url'/'base_url' "
+                f"(for API) or 'path' (for export)"
+            )
+        if has_url and "space_key" not in source and "path" not in source:
+            logger.warning(f"Source {index} (confluence): No 'space_key' specified for API mode")
+
+    def _validate_notion_source(self, source: dict[str, Any], index: int):
+        """Validate Notion source configuration."""
+        has_url = "url" in source or "database_id" in source or "page_id" in source
+        has_path = "path" in source
+        if not has_url and not has_path:
+            raise ValueError(
+                f"Source {index} (notion): Missing required field 'url'/'database_id'/'page_id' "
+                f"(for API) or 'path' (for export)"
+            )
+
+    def _validate_rss_source(self, source: dict[str, Any], index: int):
+        """Validate RSS/Atom feed source configuration."""
+        if "url" not in source and "path" not in source:
+            raise ValueError(f"Source {index} (rss): Missing required field 'url' or 'path'")
+
+    def _validate_manpage_source(self, source: dict[str, Any], index: int):
+        """Validate man page source configuration."""
+        if "path" not in source and "names" not in source:
+            raise ValueError(f"Source {index} (manpage): Missing required field 'path' or 'names'")
+        if "path" in source and not Path(source["path"]).exists():
+            logger.warning(f"Source {index} (manpage): Path not found: {source['path']}")
+
+    def _validate_chat_source(self, source: dict[str, Any], index: int):
+        """Validate Slack/Discord chat source configuration."""
+        has_path = "path" in source
+        has_api = "token" in source or "webhook_url" in source
+        has_channel = "channel" in source or "channel_id" in source
+        if not has_path and not has_api:
+            raise ValueError(
+                f"Source {index} (chat): Missing required field 'path' (for export) "
+                f"or 'token' (for API)"
+            )
+        if has_api and not has_channel:
+            logger.warning(
+                f"Source {index} (chat): No 'channel' or 'channel_id' specified for API mode"
+            )
+
    def get_sources_by_type(self, source_type: str) -> list[dict[str, Any]]:
        """
        Get all sources of a specific type.

        Args:
-            source_type: 'documentation', 'github', 'pdf', or 'local'
+            source_type: Any valid source type string

        Returns:
            List of sources matching the type
--- a/src/skill_seekers/cli/confluence_scraper.py
+++ b/src/skill_seekers/cli/confluence_scraper.py
--- a/src/skill_seekers/cli/create_command.py
+++ b/src/skill_seekers/cli/create_command.py
@@ -140,6 +140,26 @@ class CreateCommand:
            return self._route_video()
        elif self.source_info.type == "config":
            return self._route_config()
+        elif self.source_info.type == "jupyter":
+            return self._route_generic("jupyter_scraper", "--notebook")
+        elif self.source_info.type == "html":
+            return self._route_generic("html_scraper", "--html-path")
+        elif self.source_info.type == "openapi":
+            return self._route_generic("openapi_scraper", "--spec")
+        elif self.source_info.type == "asciidoc":
+            return self._route_generic("asciidoc_scraper", "--asciidoc-path")
+        elif self.source_info.type == "pptx":
+            return self._route_generic("pptx_scraper", "--pptx")
+        elif self.source_info.type == "rss":
+            return self._route_generic("rss_scraper", "--feed-path")
+        elif self.source_info.type == "manpage":
+            return self._route_generic("man_scraper", "--man-path")
+        elif self.source_info.type == "confluence":
+            return self._route_generic("confluence_scraper", "--export-path")
+        elif self.source_info.type == "notion":
+            return self._route_generic("notion_scraper", "--export-path")
+        elif self.source_info.type == "chat":
+            return self._route_generic("chat_scraper", "--export-path")
        else:
            logger.error(f"Unknown source type: {self.source_info.type}")
            return 1
@@ -485,6 +505,40 @@ class CreateCommand:
        finally:
            sys.argv = original_argv

+    def _route_generic(self, module_name: str, file_flag: str) -> int:
+        """Generic routing for new source types.
+
+        Most new source types (jupyter, html, openapi, asciidoc, pptx, rss,
+        manpage, confluence, notion, chat) follow the same pattern:
+        import module, build argv with --flag <file_path>, add common args, call main().
+
+        Args:
+            module_name: Python module name under skill_seekers.cli (e.g., "jupyter_scraper")
+            file_flag: CLI flag for the source file (e.g., "--notebook")
+
+        Returns:
+            Exit code from scraper
+        """
+        import importlib
+
+        module = importlib.import_module(f"skill_seekers.cli.{module_name}")
+
+        argv = [module_name]
+
+        file_path = self.source_info.parsed.get("file_path", "")
+        if file_path:
+            argv.extend([file_flag, file_path])
+
+        self._add_common_args(argv)
+
+        logger.debug(f"Calling {module_name} with argv: {argv}")
+        original_argv = sys.argv
+        try:
+            sys.argv = argv
+            return module.main()
+        finally:
+            sys.argv = original_argv
+
    def _add_common_args(self, argv: list[str]) -> None:
        """Add truly universal arguments to argv list.

--- a/src/skill_seekers/cli/html_scraper.py
+++ b/src/skill_seekers/cli/html_scraper.py
--- a/src/skill_seekers/cli/jupyter_scraper.py
+++ b/src/skill_seekers/cli/jupyter_scraper.py
--- a/src/skill_seekers/cli/main.py
+++ b/src/skill_seekers/cli/main.py
@@ -15,7 +15,17 @@ Commands:
    word                 Extract from Word (.docx) file
    epub                 Extract from EPUB e-book (.epub)
    video                Extract from video (YouTube or local)
-    unified              Multi-source scraping (docs + GitHub + PDF)
+    jupyter              Extract from Jupyter Notebook (.ipynb)
+    html                 Extract from local HTML files
+    openapi              Extract from OpenAPI/Swagger spec
+    asciidoc             Extract from AsciiDoc documents (.adoc)
+    pptx                 Extract from PowerPoint (.pptx)
+    rss                  Extract from RSS/Atom feeds
+    manpage              Extract from man pages
+    confluence           Extract from Confluence wiki
+    notion               Extract from Notion pages
+    chat                 Extract from Slack/Discord chat exports
+    unified              Multi-source scraping (docs + GitHub + PDF + more)
    analyze              Analyze local codebase and extract code knowledge
    enhance              AI-powered enhancement (auto: API or LOCAL mode)
    enhance-status       Check enhancement status (for background/daemon modes)
@@ -70,6 +80,17 @@ COMMAND_MODULES = {
    "quality": "skill_seekers.cli.quality_metrics",
    "workflows": "skill_seekers.cli.workflows_command",
    "sync-config": "skill_seekers.cli.sync_config",
+    # New source types (v3.2.0+)
+    "jupyter": "skill_seekers.cli.jupyter_scraper",
+    "html": "skill_seekers.cli.html_scraper",
+    "openapi": "skill_seekers.cli.openapi_scraper",
+    "asciidoc": "skill_seekers.cli.asciidoc_scraper",
+    "pptx": "skill_seekers.cli.pptx_scraper",
+    "rss": "skill_seekers.cli.rss_scraper",
+    "manpage": "skill_seekers.cli.man_scraper",
+    "confluence": "skill_seekers.cli.confluence_scraper",
+    "notion": "skill_seekers.cli.notion_scraper",
+    "chat": "skill_seekers.cli.chat_scraper",
 }


--- a/src/skill_seekers/cli/man_scraper.py
+++ b/src/skill_seekers/cli/man_scraper.py
--- a/src/skill_seekers/cli/notion_scraper.py
+++ b/src/skill_seekers/cli/notion_scraper.py
--- a/src/skill_seekers/cli/openapi_scraper.py
+++ b/src/skill_seekers/cli/openapi_scraper.py
--- a/src/skill_seekers/cli/parsers/init.py
+++ b/src/skill_seekers/cli/parsers/init.py
@@ -33,6 +33,18 @@ from .quality_parser import QualityParser
 from .workflows_parser import WorkflowsParser
 from .sync_config_parser import SyncConfigParser

+# New source type parsers (v3.2.0+)
+from .jupyter_parser import JupyterParser
+from .html_parser import HtmlParser
+from .openapi_parser import OpenAPIParser
+from .asciidoc_parser import AsciiDocParser
+from .pptx_parser import PptxParser
+from .rss_parser import RssParser
+from .manpage_parser import ManPageParser
+from .confluence_parser import ConfluenceParser
+from .notion_parser import NotionParser
+from .chat_parser import ChatParser
+
 # Registry of all parsers (in order of usage frequency)
 PARSERS = [
    CreateParser(),  # NEW: Unified create command (placed first for prominence)
@@ -60,6 +72,17 @@ PARSERS = [
    QualityParser(),
    WorkflowsParser(),
    SyncConfigParser(),
+    # New source types (v3.2.0+)
+    JupyterParser(),
+    HtmlParser(),
+    OpenAPIParser(),
+    AsciiDocParser(),
+    PptxParser(),
+    RssParser(),
+    ManPageParser(),
+    ConfluenceParser(),
+    NotionParser(),
+    ChatParser(),
 ]


--- a/src/skill_seekers/cli/parsers/asciidoc_parser.py
+++ b/src/skill_seekers/cli/parsers/asciidoc_parser.py
@@ -0,0 +1,32 @@
+"""AsciiDoc subcommand parser.
+
+Uses shared argument definitions from arguments.asciidoc to ensure
+consistency with the standalone asciidoc_scraper module.
+"""
+
+from .base import SubcommandParser
+from skill_seekers.cli.arguments.asciidoc import add_asciidoc_arguments
+
+
+class AsciiDocParser(SubcommandParser):
+    """Parser for asciidoc subcommand."""
+
+    @property
+    def name(self) -> str:
+        return "asciidoc"
+
+    @property
+    def help(self) -> str:
+        return "Extract from AsciiDoc documents (.adoc)"
+
+    @property
+    def description(self) -> str:
+        return "Extract content from AsciiDoc documents (.adoc) and generate skill"
+
+    def add_arguments(self, parser):
+        """Add asciidoc-specific arguments.
+
+        Uses shared argument definitions to ensure consistency
+        with asciidoc_scraper.py (standalone scraper).
+        """
+        add_asciidoc_arguments(parser)
--- a/src/skill_seekers/cli/parsers/chat_parser.py
+++ b/src/skill_seekers/cli/parsers/chat_parser.py
@@ -0,0 +1,32 @@
+"""Chat subcommand parser.
+
+Uses shared argument definitions from arguments.chat to ensure
+consistency with the standalone chat_scraper module.
+"""
+
+from .base import SubcommandParser
+from skill_seekers.cli.arguments.chat import add_chat_arguments
+
+
+class ChatParser(SubcommandParser):
+    """Parser for chat subcommand."""
+
+    @property
+    def name(self) -> str:
+        return "chat"
+
+    @property
+    def help(self) -> str:
+        return "Extract from Slack/Discord chat exports"
+
+    @property
+    def description(self) -> str:
+        return "Extract content from Slack/Discord chat exports and generate skill"
+
+    def add_arguments(self, parser):
+        """Add chat-specific arguments.
+
+        Uses shared argument definitions to ensure consistency
+        with chat_scraper.py (standalone scraper).
+        """
+        add_chat_arguments(parser)
--- a/src/skill_seekers/cli/parsers/confluence_parser.py
+++ b/src/skill_seekers/cli/parsers/confluence_parser.py
@@ -0,0 +1,32 @@
+"""Confluence subcommand parser.
+
+Uses shared argument definitions from arguments.confluence to ensure
+consistency with the standalone confluence_scraper module.
+"""
+
+from .base import SubcommandParser
+from skill_seekers.cli.arguments.confluence import add_confluence_arguments
+
+
+class ConfluenceParser(SubcommandParser):
+    """Parser for confluence subcommand."""
+
+    @property
+    def name(self) -> str:
+        return "confluence"
+
+    @property
+    def help(self) -> str:
+        return "Extract from Confluence wiki"
+
+    @property
+    def description(self) -> str:
+        return "Extract content from Confluence wiki and generate skill"
+
+    def add_arguments(self, parser):
+        """Add confluence-specific arguments.
+
+        Uses shared argument definitions to ensure consistency
+        with confluence_scraper.py (standalone scraper).
+        """
+        add_confluence_arguments(parser)
--- a/src/skill_seekers/cli/parsers/html_parser.py
+++ b/src/skill_seekers/cli/parsers/html_parser.py
@@ -0,0 +1,32 @@
+"""HTML subcommand parser.
+
+Uses shared argument definitions from arguments.html to ensure
+consistency with the standalone html_scraper module.
+"""
+
+from .base import SubcommandParser
+from skill_seekers.cli.arguments.html import add_html_arguments
+
+
+class HtmlParser(SubcommandParser):
+    """Parser for html subcommand."""
+
+    @property
+    def name(self) -> str:
+        return "html"
+
+    @property
+    def help(self) -> str:
+        return "Extract from local HTML files (.html/.htm)"
+
+    @property
+    def description(self) -> str:
+        return "Extract content from local HTML files (.html/.htm) and generate skill"
+
+    def add_arguments(self, parser):
+        """Add html-specific arguments.
+
+        Uses shared argument definitions to ensure consistency
+        with html_scraper.py (standalone scraper).
+        """
+        add_html_arguments(parser)
--- a/src/skill_seekers/cli/parsers/jupyter_parser.py
+++ b/src/skill_seekers/cli/parsers/jupyter_parser.py
@@ -0,0 +1,32 @@
+"""Jupyter Notebook subcommand parser.
+
+Uses shared argument definitions from arguments.jupyter to ensure
+consistency with the standalone jupyter_scraper module.
+"""
+
+from .base import SubcommandParser
+from skill_seekers.cli.arguments.jupyter import add_jupyter_arguments
+
+
+class JupyterParser(SubcommandParser):
+    """Parser for jupyter subcommand."""
+
+    @property
+    def name(self) -> str:
+        return "jupyter"
+
+    @property
+    def help(self) -> str:
+        return "Extract from Jupyter Notebook (.ipynb)"
+
+    @property
+    def description(self) -> str:
+        return "Extract content from Jupyter Notebook (.ipynb) and generate skill"
+
+    def add_arguments(self, parser):
+        """Add jupyter-specific arguments.
+
+        Uses shared argument definitions to ensure consistency
+        with jupyter_scraper.py (standalone scraper).
+        """
+        add_jupyter_arguments(parser)
--- a/src/skill_seekers/cli/parsers/manpage_parser.py
+++ b/src/skill_seekers/cli/parsers/manpage_parser.py
@@ -0,0 +1,32 @@
+"""Man page subcommand parser.
+
+Uses shared argument definitions from arguments.manpage to ensure
+consistency with the standalone man_scraper module.
+"""
+
+from .base import SubcommandParser
+from skill_seekers.cli.arguments.manpage import add_manpage_arguments
+
+
+class ManPageParser(SubcommandParser):
+    """Parser for manpage subcommand."""
+
+    @property
+    def name(self) -> str:
+        return "manpage"
+
+    @property
+    def help(self) -> str:
+        return "Extract from man pages"
+
+    @property
+    def description(self) -> str:
+        return "Extract content from man pages and generate skill"
+
+    def add_arguments(self, parser):
+        """Add manpage-specific arguments.
+
+        Uses shared argument definitions to ensure consistency
+        with man_scraper.py (standalone scraper).
+        """
+        add_manpage_arguments(parser)
--- a/src/skill_seekers/cli/parsers/notion_parser.py
+++ b/src/skill_seekers/cli/parsers/notion_parser.py
@@ -0,0 +1,32 @@
+"""Notion subcommand parser.
+
+Uses shared argument definitions from arguments.notion to ensure
+consistency with the standalone notion_scraper module.
+"""
+
+from .base import SubcommandParser
+from skill_seekers.cli.arguments.notion import add_notion_arguments
+
+
+class NotionParser(SubcommandParser):
+    """Parser for notion subcommand."""
+
+    @property
+    def name(self) -> str:
+        return "notion"
+
+    @property
+    def help(self) -> str:
+        return "Extract from Notion pages"
+
+    @property
+    def description(self) -> str:
+        return "Extract content from Notion pages and generate skill"
+
+    def add_arguments(self, parser):
+        """Add notion-specific arguments.
+
+        Uses shared argument definitions to ensure consistency
+        with notion_scraper.py (standalone scraper).
+        """
+        add_notion_arguments(parser)
--- a/src/skill_seekers/cli/parsers/openapi_parser.py
+++ b/src/skill_seekers/cli/parsers/openapi_parser.py
@@ -0,0 +1,32 @@
+"""OpenAPI subcommand parser.
+
+Uses shared argument definitions from arguments.openapi to ensure
+consistency with the standalone openapi_scraper module.
+"""
+
+from .base import SubcommandParser
+from skill_seekers.cli.arguments.openapi import add_openapi_arguments
+
+
+class OpenAPIParser(SubcommandParser):
+    """Parser for openapi subcommand."""
+
+    @property
+    def name(self) -> str:
+        return "openapi"
+
+    @property
+    def help(self) -> str:
+        return "Extract from OpenAPI/Swagger spec"
+
+    @property
+    def description(self) -> str:
+        return "Extract content from OpenAPI/Swagger spec and generate skill"
+
+    def add_arguments(self, parser):
+        """Add openapi-specific arguments.
+
+        Uses shared argument definitions to ensure consistency
+        with openapi_scraper.py (standalone scraper).
+        """
+        add_openapi_arguments(parser)
--- a/src/skill_seekers/cli/parsers/pptx_parser.py
+++ b/src/skill_seekers/cli/parsers/pptx_parser.py
@@ -0,0 +1,32 @@
+"""PPTX subcommand parser.
+
+Uses shared argument definitions from arguments.pptx to ensure
+consistency with the standalone pptx_scraper module.
+"""
+
+from .base import SubcommandParser
+from skill_seekers.cli.arguments.pptx import add_pptx_arguments
+
+
+class PptxParser(SubcommandParser):
+    """Parser for pptx subcommand."""
+
+    @property
+    def name(self) -> str:
+        return "pptx"
+
+    @property
+    def help(self) -> str:
+        return "Extract from PowerPoint presentations (.pptx)"
+
+    @property
+    def description(self) -> str:
+        return "Extract content from PowerPoint presentations (.pptx) and generate skill"
+
+    def add_arguments(self, parser):
+        """Add pptx-specific arguments.
+
+        Uses shared argument definitions to ensure consistency
+        with pptx_scraper.py (standalone scraper).
+        """
+        add_pptx_arguments(parser)
--- a/src/skill_seekers/cli/parsers/rss_parser.py
+++ b/src/skill_seekers/cli/parsers/rss_parser.py
@@ -0,0 +1,32 @@
+"""RSS subcommand parser.
+
+Uses shared argument definitions from arguments.rss to ensure
+consistency with the standalone rss_scraper module.
+"""
+
+from .base import SubcommandParser
+from skill_seekers.cli.arguments.rss import add_rss_arguments
+
+
+class RssParser(SubcommandParser):
+    """Parser for rss subcommand."""
+
+    @property
+    def name(self) -> str:
+        return "rss"
+
+    @property
+    def help(self) -> str:
+        return "Extract from RSS/Atom feeds"
+
+    @property
+    def description(self) -> str:
+        return "Extract content from RSS/Atom feeds and generate skill"
+
+    def add_arguments(self, parser):
+        """Add rss-specific arguments.
+
+        Uses shared argument definitions to ensure consistency
+        with rss_scraper.py (standalone scraper).
+        """
+        add_rss_arguments(parser)
--- a/src/skill_seekers/cli/pptx_scraper.py
+++ b/src/skill_seekers/cli/pptx_scraper.py
--- a/src/skill_seekers/cli/rss_scraper.py
+++ b/src/skill_seekers/cli/rss_scraper.py
--- a/src/skill_seekers/cli/source_detector.py
+++ b/src/skill_seekers/cli/source_detector.py
@@ -1,7 +1,12 @@
 """Source type detection for unified create command.

-Auto-detects whether a source is a web URL, GitHub repository,
-local directory, PDF file, or config file based on patterns.
+Auto-detects source type from user input — supports web URLs, GitHub repos,
+local directories, and 14+ file types (PDF, DOCX, EPUB, IPYNB, HTML, YAML/OpenAPI,
+AsciiDoc, PPTX, RSS/Atom, man pages, video files, and config JSON).
+
+Note: Confluence, Notion, and Slack/Discord chat sources are API/export-based
+and cannot be auto-detected from a single argument. Use their dedicated
+subcommands (``skill-seekers confluence``, ``notion``, ``chat``) instead.
 """

 import os
@@ -66,11 +71,49 @@ class SourceDetector:
        if source.endswith(".epub"):
            return cls._detect_epub(source)

+        if source.endswith(".ipynb"):
+            return cls._detect_jupyter(source)
+
+        if source.lower().endswith((".html", ".htm")):
+            return cls._detect_html(source)
+
+        if source.endswith(".pptx"):
+            return cls._detect_pptx(source)
+
+        if source.lower().endswith((".adoc", ".asciidoc")):
+            return cls._detect_asciidoc(source)
+
+        # Man page file extensions (.1 through .8, .man)
+        # Only match if the basename looks like a man page (e.g., "git.1", not "log.1")
+        # Require basename without the extension to be a plausible command name
+        if source.lower().endswith(".man"):
+            return cls._detect_manpage(source)
+        MAN_SECTION_EXTENSIONS = (".1", ".2", ".3", ".4", ".5", ".6", ".7", ".8")
+        if source.lower().endswith(MAN_SECTION_EXTENSIONS):
+            # Heuristic: man pages have a simple basename (no dots before extension)
+            # e.g., "git.1" is a man page, "access.log.1" is not
+            basename_no_ext = os.path.splitext(os.path.basename(source))[0]
+            if "." not in basename_no_ext:
+                return cls._detect_manpage(source)
+
        # Video file extensions
        VIDEO_EXTENSIONS = (".mp4", ".mkv", ".avi", ".mov", ".webm", ".flv", ".wmv")
        if source.lower().endswith(VIDEO_EXTENSIONS):
            return cls._detect_video_file(source)

+        # RSS/Atom feed file extensions (only .rss and .atom — .xml is too generic)
+        if source.lower().endswith((".rss", ".atom")):
+            return cls._detect_rss(source)
+
+        # OpenAPI/Swagger spec detection (YAML files with OpenAPI content)
+        # Sniff file content for 'openapi:' or 'swagger:' keys before committing
+        if (
+            source.lower().endswith((".yaml", ".yml"))
+            and os.path.isfile(source)
+            and cls._looks_like_openapi(source)
+        ):
+            return cls._detect_openapi(source)
+
        # 2. Video URL detection (before directory check)
        video_url_info = cls._detect_video_url(source)
        if video_url_info:
@@ -97,15 +140,22 @@ class SourceDetector:
        raise ValueError(
            f"Cannot determine source type for: {source}\n\n"
            "Examples:\n"
-            "  Web:    skill-seekers create https://docs.react.dev/\n"
-            "  GitHub: skill-seekers create facebook/react\n"
-            "  Local:  skill-seekers create ./my-project\n"
-            "  PDF:    skill-seekers create tutorial.pdf\n"
-            "  DOCX:   skill-seekers create document.docx\n"
-            "  EPUB:   skill-seekers create ebook.epub\n"
-            "  Video:  skill-seekers create https://youtube.com/watch?v=...\n"
-            "  Video:  skill-seekers create recording.mp4\n"
-            "  Config: skill-seekers create configs/react.json"
+            "  Web:        skill-seekers create https://docs.react.dev/\n"
+            "  GitHub:     skill-seekers create facebook/react\n"
+            "  Local:      skill-seekers create ./my-project\n"
+            "  PDF:        skill-seekers create tutorial.pdf\n"
+            "  DOCX:       skill-seekers create document.docx\n"
+            "  EPUB:       skill-seekers create ebook.epub\n"
+            "  Jupyter:    skill-seekers create notebook.ipynb\n"
+            "  HTML:       skill-seekers create page.html\n"
+            "  OpenAPI:    skill-seekers create openapi.yaml\n"
+            "  AsciiDoc:   skill-seekers create document.adoc\n"
+            "  PowerPoint: skill-seekers create presentation.pptx\n"
+            "  RSS:        skill-seekers create feed.rss\n"
+            "  Man page:   skill-seekers create command.1\n"
+            "  Video:      skill-seekers create https://youtube.com/watch?v=...\n"
+            "  Video:      skill-seekers create recording.mp4\n"
+            "  Config:     skill-seekers create configs/react.json"
        )

    @classmethod
@@ -140,6 +190,90 @@ class SourceDetector:
            type="epub", parsed={"file_path": source}, suggested_name=name, raw_input=source
        )

+    @classmethod
+    def _detect_jupyter(cls, source: str) -> SourceInfo:
+        """Detect Jupyter Notebook file source."""
+        name = os.path.splitext(os.path.basename(source))[0]
+        return SourceInfo(
+            type="jupyter", parsed={"file_path": source}, suggested_name=name, raw_input=source
+        )
+
+    @classmethod
+    def _detect_html(cls, source: str) -> SourceInfo:
+        """Detect local HTML file source."""
+        name = os.path.splitext(os.path.basename(source))[0]
+        return SourceInfo(
+            type="html", parsed={"file_path": source}, suggested_name=name, raw_input=source
+        )
+
+    @classmethod
+    def _detect_pptx(cls, source: str) -> SourceInfo:
+        """Detect PowerPoint file source."""
+        name = os.path.splitext(os.path.basename(source))[0]
+        return SourceInfo(
+            type="pptx", parsed={"file_path": source}, suggested_name=name, raw_input=source
+        )
+
+    @classmethod
+    def _detect_asciidoc(cls, source: str) -> SourceInfo:
+        """Detect AsciiDoc file source."""
+        name = os.path.splitext(os.path.basename(source))[0]
+        return SourceInfo(
+            type="asciidoc", parsed={"file_path": source}, suggested_name=name, raw_input=source
+        )
+
+    @classmethod
+    def _detect_manpage(cls, source: str) -> SourceInfo:
+        """Detect man page file source."""
+        name = os.path.splitext(os.path.basename(source))[0]
+        return SourceInfo(
+            type="manpage", parsed={"file_path": source}, suggested_name=name, raw_input=source
+        )
+
+    @classmethod
+    def _detect_rss(cls, source: str) -> SourceInfo:
+        """Detect RSS/Atom feed file source."""
+        name = os.path.splitext(os.path.basename(source))[0]
+        return SourceInfo(
+            type="rss", parsed={"file_path": source}, suggested_name=name, raw_input=source
+        )
+
+    @classmethod
+    def _looks_like_openapi(cls, source: str) -> bool:
+        """Check if a YAML/JSON file looks like an OpenAPI or Swagger spec.
+
+        Reads the first few lines to look for 'openapi:' or 'swagger:' keys.
+
+        Args:
+            source: Path to the file
+
+        Returns:
+            True if the file appears to be an OpenAPI/Swagger spec
+        """
+        try:
+            with open(source, encoding="utf-8", errors="replace") as f:
+                # Read first 20 lines — the openapi/swagger key is always near the top
+                for _ in range(20):
+                    line = f.readline()
+                    if not line:
+                        break
+                    stripped = line.strip().lower()
+                    if stripped.startswith("openapi:") or stripped.startswith("swagger:"):
+                        return True
+                    if stripped.startswith('"openapi"') or stripped.startswith('"swagger"'):
+                        return True
+        except OSError:
+            pass
+        return False
+
+    @classmethod
+    def _detect_openapi(cls, source: str) -> SourceInfo:
+        """Detect OpenAPI/Swagger spec file source."""
+        name = os.path.splitext(os.path.basename(source))[0]
+        return SourceInfo(
+            type="openapi", parsed={"file_path": source}, suggested_name=name, raw_input=source
+        )
+
    @classmethod
    def _detect_video_file(cls, source: str) -> SourceInfo:
        """Detect local video file source."""
@@ -312,5 +446,19 @@ class SourceDetector:
            if not os.path.isfile(config_path):
                raise ValueError(f"Path is not a file: {config_path}")

-        # For web and github, validation happens during scraping
-        # (URL accessibility, repo existence)
+        elif source_info.type in ("jupyter", "html", "pptx", "asciidoc", "manpage", "openapi"):
+            file_path = source_info.parsed.get("file_path", "")
+            if file_path:
+                type_label = source_info.type.upper()
+                if not os.path.exists(file_path):
+                    raise ValueError(f"{type_label} file does not exist: {file_path}")
+                if not os.path.isfile(file_path) and not os.path.isdir(file_path):
+                    raise ValueError(f"Path is not a file or directory: {file_path}")
+
+        elif source_info.type == "rss":
+            file_path = source_info.parsed.get("file_path", "")
+            if file_path and not os.path.exists(file_path):
+                raise ValueError(f"RSS/Atom file does not exist: {file_path}")
+
+        # For web, github, confluence, notion, chat, rss (URL), validation happens
+        # during scraping (URL accessibility, API auth, etc.)
--- a/src/skill_seekers/cli/unified_scraper.py
+++ b/src/skill_seekers/cli/unified_scraper.py
@@ -76,6 +76,17 @@ class UnifiedScraper:
            "word": [],  # List of word sources
            "video": [],  # List of video sources
            "local": [],  # List of local sources (docs or code)
+            "epub": [],  # List of epub sources
+            "jupyter": [],  # List of Jupyter notebook sources
+            "html": [],  # List of local HTML sources
+            "openapi": [],  # List of OpenAPI/Swagger spec sources
+            "asciidoc": [],  # List of AsciiDoc sources
+            "pptx": [],  # List of PowerPoint sources
+            "confluence": [],  # List of Confluence wiki sources
+            "notion": [],  # List of Notion page sources
+            "rss": [],  # List of RSS/Atom feed sources
+            "manpage": [],  # List of man page sources
+            "chat": [],  # List of Slack/Discord chat sources
        }

        # Track source index for unique naming (multi-source support)
@@ -86,6 +97,17 @@ class UnifiedScraper:
            "word": 0,
            "video": 0,
            "local": 0,
+            "epub": 0,
+            "jupyter": 0,
+            "html": 0,
+            "openapi": 0,
+            "asciidoc": 0,
+            "pptx": 0,
+            "confluence": 0,
+            "notion": 0,
+            "rss": 0,
+            "manpage": 0,
+            "chat": 0,
        }

        # Output paths - cleaner organization
@@ -166,6 +188,28 @@ class UnifiedScraper:
                    self._scrape_video(source)
                elif source_type == "local":
                    self._scrape_local(source)
+                elif source_type == "epub":
+                    self._scrape_epub(source)
+                elif source_type == "jupyter":
+                    self._scrape_jupyter(source)
+                elif source_type == "html":
+                    self._scrape_html(source)
+                elif source_type == "openapi":
+                    self._scrape_openapi(source)
+                elif source_type == "asciidoc":
+                    self._scrape_asciidoc(source)
+                elif source_type == "pptx":
+                    self._scrape_pptx(source)
+                elif source_type == "confluence":
+                    self._scrape_confluence(source)
+                elif source_type == "notion":
+                    self._scrape_notion(source)
+                elif source_type == "rss":
+                    self._scrape_rss(source)
+                elif source_type == "manpage":
+                    self._scrape_manpage(source)
+                elif source_type == "chat":
+                    self._scrape_chat(source)
                else:
                    logger.warning(f"Unknown source type: {source_type}")
            except Exception as e:
@@ -571,6 +615,7 @@ class UnifiedScraper:
            {
                "docx_path": docx_path,
                "docx_id": docx_id,
+                "word_id": docx_id,  # Alias for generic reference generation
                "idx": idx,
                "data": word_data,
                "data_file": cache_word_data,
@@ -788,6 +833,595 @@ class UnifiedScraper:
            logger.debug(f"Traceback: {traceback.format_exc()}")
            raise

+    # ------------------------------------------------------------------
+    # New source type handlers (v3.2.0+)
+    # ------------------------------------------------------------------
+
+    def _scrape_epub(self, source: dict[str, Any]):
+        """Scrape EPUB e-book (.epub)."""
+        try:
+            from skill_seekers.cli.epub_scraper import EpubToSkillConverter
+        except ImportError:
+            logger.error(
+                "EPUB scraper dependencies not installed.\n"
+                "  Install with: pip install skill-seekers[epub]"
+            )
+            return
+
+        idx = self._source_counters["epub"]
+        self._source_counters["epub"] += 1
+
+        epub_path = source["path"]
+        epub_id = os.path.splitext(os.path.basename(epub_path))[0]
+
+        epub_config = {
+            "name": f"{self.name}_epub_{idx}_{epub_id}",
+            "epub_path": source["path"],
+            "description": source.get("description", f"{epub_id} e-book"),
+        }
+
+        logger.info(f"Scraping EPUB: {source['path']}")
+        converter = EpubToSkillConverter(epub_config)
+        converter.extract_epub()
+
+        epub_data_file = converter.data_file
+        with open(epub_data_file, encoding="utf-8") as f:
+            epub_data = json.load(f)
+
+        cache_epub_data = os.path.join(self.data_dir, f"epub_data_{idx}_{epub_id}.json")
+        shutil.copy(epub_data_file, cache_epub_data)
+
+        self.scraped_data["epub"].append(
+            {
+                "epub_path": epub_path,
+                "epub_id": epub_id,
+                "idx": idx,
+                "data": epub_data,
+                "data_file": cache_epub_data,
+            }
+        )
+
+        try:
+            converter.build_skill()
+            logger.info("✅ EPUB: Standalone SKILL.md created")
+        except Exception as e:
+            logger.warning(f"⚠️  Failed to build standalone EPUB SKILL.md: {e}")
+
+        logger.info(f"✅ EPUB: {len(epub_data.get('chapters', []))} chapters extracted")
+
+    def _scrape_jupyter(self, source: dict[str, Any]):
+        """Scrape Jupyter Notebook (.ipynb)."""
+        try:
+            from skill_seekers.cli.jupyter_scraper import JupyterToSkillConverter
+        except ImportError:
+            logger.error(
+                "Jupyter scraper dependencies not installed.\n"
+                "  Install with: pip install skill-seekers[jupyter]"
+            )
+            return
+
+        idx = self._source_counters["jupyter"]
+        self._source_counters["jupyter"] += 1
+
+        nb_path = source["path"]
+        nb_id = os.path.splitext(os.path.basename(nb_path))[0]
+
+        nb_config = {
+            "name": f"{self.name}_jupyter_{idx}_{nb_id}",
+            "notebook_path": source["path"],
+            "description": source.get("description", f"{nb_id} notebook"),
+        }
+
+        logger.info(f"Scraping Jupyter Notebook: {source['path']}")
+        converter = JupyterToSkillConverter(nb_config)
+        converter.extract_notebook()
+
+        nb_data_file = converter.data_file
+        with open(nb_data_file, encoding="utf-8") as f:
+            nb_data = json.load(f)
+
+        cache_nb_data = os.path.join(self.data_dir, f"jupyter_data_{idx}_{nb_id}.json")
+        shutil.copy(nb_data_file, cache_nb_data)
+
+        self.scraped_data["jupyter"].append(
+            {
+                "notebook_path": nb_path,
+                "notebook_id": nb_id,
+                "idx": idx,
+                "data": nb_data,
+                "data_file": cache_nb_data,
+            }
+        )
+
+        try:
+            converter.build_skill()
+            logger.info("✅ Jupyter: Standalone SKILL.md created")
+        except Exception as e:
+            logger.warning(f"⚠️  Failed to build standalone Jupyter SKILL.md: {e}")
+
+        logger.info(f"✅ Jupyter: {len(nb_data.get('cells', []))} cells extracted")
+
+    def _scrape_html(self, source: dict[str, Any]):
+        """Scrape local HTML file(s)."""
+        try:
+            from skill_seekers.cli.html_scraper import HtmlToSkillConverter
+        except ImportError:
+            logger.error("html_scraper.py not found")
+            return
+
+        idx = self._source_counters["html"]
+        self._source_counters["html"] += 1
+
+        html_path = source["path"]
+        html_id = os.path.splitext(os.path.basename(html_path.rstrip("/")))[0]
+
+        html_config = {
+            "name": f"{self.name}_html_{idx}_{html_id}",
+            "html_path": source["path"],
+            "description": source.get("description", f"{html_id} HTML content"),
+        }
+
+        logger.info(f"Scraping local HTML: {source['path']}")
+        converter = HtmlToSkillConverter(html_config)
+        converter.extract_html()
+
+        html_data_file = converter.data_file
+        with open(html_data_file, encoding="utf-8") as f:
+            html_data = json.load(f)
+
+        cache_html_data = os.path.join(self.data_dir, f"html_data_{idx}_{html_id}.json")
+        shutil.copy(html_data_file, cache_html_data)
+
+        self.scraped_data["html"].append(
+            {
+                "html_path": html_path,
+                "html_id": html_id,
+                "idx": idx,
+                "data": html_data,
+                "data_file": cache_html_data,
+            }
+        )
+
+        try:
+            converter.build_skill()
+            logger.info("✅ HTML: Standalone SKILL.md created")
+        except Exception as e:
+            logger.warning(f"⚠️  Failed to build standalone HTML SKILL.md: {e}")
+
+        logger.info(f"✅ HTML: {len(html_data.get('pages', []))} pages extracted")
+
+    def _scrape_openapi(self, source: dict[str, Any]):
+        """Scrape OpenAPI/Swagger specification."""
+        try:
+            from skill_seekers.cli.openapi_scraper import OpenAPIToSkillConverter
+        except ImportError:
+            logger.error("openapi_scraper.py not found")
+            return
+
+        idx = self._source_counters["openapi"]
+        self._source_counters["openapi"] += 1
+
+        spec_path = source.get("path", source.get("url", ""))
+        spec_id = os.path.splitext(os.path.basename(spec_path))[0] if spec_path else f"spec_{idx}"
+
+        openapi_config = {
+            "name": f"{self.name}_openapi_{idx}_{spec_id}",
+            "spec_path": source.get("path"),
+            "spec_url": source.get("url"),
+            "description": source.get("description", f"{spec_id} API spec"),
+        }
+
+        logger.info(f"Scraping OpenAPI spec: {spec_path}")
+        converter = OpenAPIToSkillConverter(openapi_config)
+        converter.extract_spec()
+
+        api_data_file = converter.data_file
+        with open(api_data_file, encoding="utf-8") as f:
+            api_data = json.load(f)
+
+        cache_api_data = os.path.join(self.data_dir, f"openapi_data_{idx}_{spec_id}.json")
+        shutil.copy(api_data_file, cache_api_data)
+
+        self.scraped_data["openapi"].append(
+            {
+                "spec_path": spec_path,
+                "spec_id": spec_id,
+                "idx": idx,
+                "data": api_data,
+                "data_file": cache_api_data,
+            }
+        )
+
+        try:
+            converter.build_skill()
+            logger.info("✅ OpenAPI: Standalone SKILL.md created")
+        except Exception as e:
+            logger.warning(f"⚠️  Failed to build standalone OpenAPI SKILL.md: {e}")
+
+        logger.info(f"✅ OpenAPI: {len(api_data.get('endpoints', []))} endpoints extracted")
+
+    def _scrape_asciidoc(self, source: dict[str, Any]):
+        """Scrape AsciiDoc document(s)."""
+        try:
+            from skill_seekers.cli.asciidoc_scraper import AsciiDocToSkillConverter
+        except ImportError:
+            logger.error(
+                "AsciiDoc scraper dependencies not installed.\n"
+                "  Install with: pip install skill-seekers[asciidoc]"
+            )
+            return
+
+        idx = self._source_counters["asciidoc"]
+        self._source_counters["asciidoc"] += 1
+
+        adoc_path = source["path"]
+        adoc_id = os.path.splitext(os.path.basename(adoc_path.rstrip("/")))[0]
+
+        adoc_config = {
+            "name": f"{self.name}_asciidoc_{idx}_{adoc_id}",
+            "asciidoc_path": source["path"],
+            "description": source.get("description", f"{adoc_id} AsciiDoc content"),
+        }
+
+        logger.info(f"Scraping AsciiDoc: {source['path']}")
+        converter = AsciiDocToSkillConverter(adoc_config)
+        converter.extract_asciidoc()
+
+        adoc_data_file = converter.data_file
+        with open(adoc_data_file, encoding="utf-8") as f:
+            adoc_data = json.load(f)
+
+        cache_adoc_data = os.path.join(self.data_dir, f"asciidoc_data_{idx}_{adoc_id}.json")
+        shutil.copy(adoc_data_file, cache_adoc_data)
+
+        self.scraped_data["asciidoc"].append(
+            {
+                "asciidoc_path": adoc_path,
+                "asciidoc_id": adoc_id,
+                "idx": idx,
+                "data": adoc_data,
+                "data_file": cache_adoc_data,
+            }
+        )
+
+        try:
+            converter.build_skill()
+            logger.info("✅ AsciiDoc: Standalone SKILL.md created")
+        except Exception as e:
+            logger.warning(f"⚠️  Failed to build standalone AsciiDoc SKILL.md: {e}")
+
+        logger.info(f"✅ AsciiDoc: {len(adoc_data.get('sections', []))} sections extracted")
+
+    def _scrape_pptx(self, source: dict[str, Any]):
+        """Scrape PowerPoint presentation (.pptx)."""
+        try:
+            from skill_seekers.cli.pptx_scraper import PptxToSkillConverter
+        except ImportError:
+            logger.error(
+                "PowerPoint scraper dependencies not installed.\n"
+                "  Install with: pip install skill-seekers[pptx]"
+            )
+            return
+
+        idx = self._source_counters["pptx"]
+        self._source_counters["pptx"] += 1
+
+        pptx_path = source["path"]
+        pptx_id = os.path.splitext(os.path.basename(pptx_path))[0]
+
+        pptx_config = {
+            "name": f"{self.name}_pptx_{idx}_{pptx_id}",
+            "pptx_path": source["path"],
+            "description": source.get("description", f"{pptx_id} presentation"),
+        }
+
+        logger.info(f"Scraping PowerPoint: {source['path']}")
+        converter = PptxToSkillConverter(pptx_config)
+        converter.extract_pptx()
+
+        pptx_data_file = converter.data_file
+        with open(pptx_data_file, encoding="utf-8") as f:
+            pptx_data = json.load(f)
+
+        cache_pptx_data = os.path.join(self.data_dir, f"pptx_data_{idx}_{pptx_id}.json")
+        shutil.copy(pptx_data_file, cache_pptx_data)
+
+        self.scraped_data["pptx"].append(
+            {
+                "pptx_path": pptx_path,
+                "pptx_id": pptx_id,
+                "idx": idx,
+                "data": pptx_data,
+                "data_file": cache_pptx_data,
+            }
+        )
+
+        try:
+            converter.build_skill()
+            logger.info("✅ PowerPoint: Standalone SKILL.md created")
+        except Exception as e:
+            logger.warning(f"⚠️  Failed to build standalone PowerPoint SKILL.md: {e}")
+
+        logger.info(f"✅ PowerPoint: {len(pptx_data.get('slides', []))} slides extracted")
+
+    def _scrape_confluence(self, source: dict[str, Any]):
+        """Scrape Confluence wiki (API or exported HTML/XML)."""
+        try:
+            from skill_seekers.cli.confluence_scraper import ConfluenceToSkillConverter
+        except ImportError:
+            logger.error(
+                "Confluence scraper dependencies not installed.\n"
+                "  Install with: pip install skill-seekers[confluence]"
+            )
+            return
+
+        idx = self._source_counters["confluence"]
+        self._source_counters["confluence"] += 1
+
+        source_id = source.get("space_key", source.get("path", f"confluence_{idx}"))
+        if isinstance(source_id, str) and "/" in source_id:
+            source_id = os.path.basename(source_id.rstrip("/"))
+
+        conf_config = {
+            "name": f"{self.name}_confluence_{idx}_{source_id}",
+            "base_url": source.get("base_url", source.get("url")),
+            "space_key": source.get("space_key"),
+            "export_path": source.get("path"),
+            "username": source.get("username"),
+            "token": source.get("token"),
+            "description": source.get("description", f"{source_id} Confluence content"),
+            "max_pages": source.get("max_pages", 500),
+        }
+
+        logger.info(f"Scraping Confluence: {source_id}")
+        converter = ConfluenceToSkillConverter(conf_config)
+        converter.extract_confluence()
+
+        conf_data_file = converter.data_file
+        with open(conf_data_file, encoding="utf-8") as f:
+            conf_data = json.load(f)
+
+        cache_conf_data = os.path.join(self.data_dir, f"confluence_data_{idx}_{source_id}.json")
+        shutil.copy(conf_data_file, cache_conf_data)
+
+        self.scraped_data["confluence"].append(
+            {
+                "source_id": source_id,
+                "idx": idx,
+                "data": conf_data,
+                "data_file": cache_conf_data,
+            }
+        )
+
+        try:
+            converter.build_skill()
+            logger.info("✅ Confluence: Standalone SKILL.md created")
+        except Exception as e:
+            logger.warning(f"⚠️  Failed to build standalone Confluence SKILL.md: {e}")
+
+        logger.info(f"✅ Confluence: {len(conf_data.get('pages', []))} pages extracted")
+
+    def _scrape_notion(self, source: dict[str, Any]):
+        """Scrape Notion pages (API or exported Markdown)."""
+        try:
+            from skill_seekers.cli.notion_scraper import NotionToSkillConverter
+        except ImportError:
+            logger.error(
+                "Notion scraper dependencies not installed.\n"
+                "  Install with: pip install skill-seekers[notion]"
+            )
+            return
+
+        idx = self._source_counters["notion"]
+        self._source_counters["notion"] += 1
+
+        source_id = source.get(
+            "database_id", source.get("page_id", source.get("path", f"notion_{idx}"))
+        )
+        if isinstance(source_id, str) and "/" in source_id:
+            source_id = os.path.basename(source_id.rstrip("/"))
+
+        notion_config = {
+            "name": f"{self.name}_notion_{idx}_{source_id}",
+            "database_id": source.get("database_id"),
+            "page_id": source.get("page_id"),
+            "export_path": source.get("path"),
+            "token": source.get("token"),
+            "description": source.get("description", f"{source_id} Notion content"),
+            "max_pages": source.get("max_pages", 500),
+        }
+
+        logger.info(f"Scraping Notion: {source_id}")
+        converter = NotionToSkillConverter(notion_config)
+        converter.extract_notion()
+
+        notion_data_file = converter.data_file
+        with open(notion_data_file, encoding="utf-8") as f:
+            notion_data = json.load(f)
+
+        cache_notion_data = os.path.join(self.data_dir, f"notion_data_{idx}_{source_id}.json")
+        shutil.copy(notion_data_file, cache_notion_data)
+
+        self.scraped_data["notion"].append(
+            {
+                "source_id": source_id,
+                "idx": idx,
+                "data": notion_data,
+                "data_file": cache_notion_data,
+            }
+        )
+
+        try:
+            converter.build_skill()
+            logger.info("✅ Notion: Standalone SKILL.md created")
+        except Exception as e:
+            logger.warning(f"⚠️  Failed to build standalone Notion SKILL.md: {e}")
+
+        logger.info(f"✅ Notion: {len(notion_data.get('pages', []))} pages extracted")
+
+    def _scrape_rss(self, source: dict[str, Any]):
+        """Scrape RSS/Atom feed (with optional full article scraping)."""
+        try:
+            from skill_seekers.cli.rss_scraper import RssToSkillConverter
+        except ImportError:
+            logger.error(
+                "RSS scraper dependencies not installed.\n"
+                "  Install with: pip install skill-seekers[rss]"
+            )
+            return
+
+        idx = self._source_counters["rss"]
+        self._source_counters["rss"] += 1
+
+        feed_url = source.get("url", source.get("path", ""))
+        feed_id = feed_url.split("/")[-1].split(".")[0] if feed_url else f"feed_{idx}"
+
+        rss_config = {
+            "name": f"{self.name}_rss_{idx}_{feed_id}",
+            "feed_url": source.get("url"),
+            "feed_path": source.get("path"),
+            "follow_links": source.get("follow_links", True),
+            "max_articles": source.get("max_articles", 50),
+            "description": source.get("description", f"{feed_id} RSS/Atom feed"),
+        }
+
+        logger.info(f"Scraping RSS/Atom feed: {feed_url}")
+        converter = RssToSkillConverter(rss_config)
+        converter.extract_feed()
+
+        rss_data_file = converter.data_file
+        with open(rss_data_file, encoding="utf-8") as f:
+            rss_data = json.load(f)
+
+        cache_rss_data = os.path.join(self.data_dir, f"rss_data_{idx}_{feed_id}.json")
+        shutil.copy(rss_data_file, cache_rss_data)
+
+        self.scraped_data["rss"].append(
+            {
+                "feed_url": feed_url,
+                "feed_id": feed_id,
+                "idx": idx,
+                "data": rss_data,
+                "data_file": cache_rss_data,
+            }
+        )
+
+        try:
+            converter.build_skill()
+            logger.info("✅ RSS: Standalone SKILL.md created")
+        except Exception as e:
+            logger.warning(f"⚠️  Failed to build standalone RSS SKILL.md: {e}")
+
+        logger.info(f"✅ RSS: {len(rss_data.get('articles', []))} articles extracted")
+
+    def _scrape_manpage(self, source: dict[str, Any]):
+        """Scrape man page(s)."""
+        try:
+            from skill_seekers.cli.man_scraper import ManPageToSkillConverter
+        except ImportError:
+            logger.error("man_scraper.py not found")
+            return
+
+        idx = self._source_counters["manpage"]
+        self._source_counters["manpage"] += 1
+
+        man_names = source.get("names", [])
+        man_path = source.get("path", "")
+        man_id = man_names[0] if man_names else os.path.basename(man_path.rstrip("/"))
+
+        man_config = {
+            "name": f"{self.name}_manpage_{idx}_{man_id}",
+            "man_names": man_names,
+            "man_path": man_path,
+            "sections": source.get("sections", []),
+            "description": source.get("description", f"{man_id} man pages"),
+        }
+
+        logger.info(f"Scraping man pages: {man_id}")
+        converter = ManPageToSkillConverter(man_config)
+        converter.extract_manpages()
+
+        man_data_file = converter.data_file
+        with open(man_data_file, encoding="utf-8") as f:
+            man_data = json.load(f)
+
+        cache_man_data = os.path.join(self.data_dir, f"manpage_data_{idx}_{man_id}.json")
+        shutil.copy(man_data_file, cache_man_data)
+
+        self.scraped_data["manpage"].append(
+            {
+                "man_id": man_id,
+                "idx": idx,
+                "data": man_data,
+                "data_file": cache_man_data,
+            }
+        )
+
+        try:
+            converter.build_skill()
+            logger.info("✅ Man pages: Standalone SKILL.md created")
+        except Exception as e:
+            logger.warning(f"⚠️  Failed to build standalone man page SKILL.md: {e}")
+
+        logger.info(f"✅ Man pages: {len(man_data.get('pages', []))} man pages extracted")
+
+    def _scrape_chat(self, source: dict[str, Any]):
+        """Scrape Slack/Discord chat export or API."""
+        try:
+            from skill_seekers.cli.chat_scraper import ChatToSkillConverter
+        except ImportError:
+            logger.error(
+                "Chat scraper dependencies not installed.\n"
+                "  Install with: pip install skill-seekers[chat]"
+            )
+            return
+
+        idx = self._source_counters["chat"]
+        self._source_counters["chat"] += 1
+
+        export_path = source.get("path", "")
+        channel = source.get("channel", source.get("channel_id", ""))
+        chat_id = channel or os.path.basename(export_path.rstrip("/")) or f"chat_{idx}"
+
+        chat_config = {
+            "name": f"{self.name}_chat_{idx}_{chat_id}",
+            "export_path": source.get("path"),
+            "platform": source.get("platform", "slack"),
+            "token": source.get("token"),
+            "channel": channel,
+            "max_messages": source.get("max_messages", 10000),
+            "description": source.get("description", f"{chat_id} chat export"),
+        }
+
+        logger.info(f"Scraping chat: {chat_id}")
+        converter = ChatToSkillConverter(chat_config)
+        converter.extract_chat()
+
+        chat_data_file = converter.data_file
+        with open(chat_data_file, encoding="utf-8") as f:
+            chat_data = json.load(f)
+
+        cache_chat_data = os.path.join(self.data_dir, f"chat_data_{idx}_{chat_id}.json")
+        shutil.copy(chat_data_file, cache_chat_data)
+
+        self.scraped_data["chat"].append(
+            {
+                "chat_id": chat_id,
+                "platform": source.get("platform", "slack"),
+                "idx": idx,
+                "data": chat_data,
+                "data_file": cache_chat_data,
+            }
+        )
+
+        try:
+            converter.build_skill()
+            logger.info("✅ Chat: Standalone SKILL.md created")
+        except Exception as e:
+            logger.warning(f"⚠️  Failed to build standalone chat SKILL.md: {e}")
+
+        logger.info(f"✅ Chat: {len(chat_data.get('messages', []))} messages extracted")
+
    def _load_json(self, file_path: Path) -> dict:
        """
        Load JSON file safely.
@@ -1297,14 +1931,33 @@ Examples:
    if args.dry_run:
        logger.info("🔍 DRY RUN MODE - Preview only, no scraping will occur")
        logger.info(f"\nWould scrape {len(scraper.config.get('sources', []))} sources:")
+        # Source type display config: type -> (label, key for detail)
+        _SOURCE_DISPLAY = {
+            "documentation": ("Documentation", "base_url"),
+            "github": ("GitHub", "repo"),
+            "pdf": ("PDF", "path"),
+            "word": ("Word", "path"),
+            "epub": ("EPUB", "path"),
+            "video": ("Video", "url"),
+            "local": ("Local Codebase", "path"),
+            "jupyter": ("Jupyter Notebook", "path"),
+            "html": ("HTML", "path"),
+            "openapi": ("OpenAPI Spec", "path"),
+            "asciidoc": ("AsciiDoc", "path"),
+            "pptx": ("PowerPoint", "path"),
+            "confluence": ("Confluence", "base_url"),
+            "notion": ("Notion", "page_id"),
+            "rss": ("RSS/Atom Feed", "url"),
+            "manpage": ("Man Page", "names"),
+            "chat": ("Chat Export", "path"),
+        }
        for idx, source in enumerate(scraper.config.get("sources", []), 1):
            source_type = source.get("type", "unknown")
-            if source_type == "documentation":
-                logger.info(f"  {idx}. Documentation: {source.get('base_url', 'N/A')}")
-            elif source_type == "github":
-                logger.info(f"  {idx}. GitHub: {source.get('repo', 'N/A')}")
-            elif source_type == "pdf":
-                logger.info(f"  {idx}. PDF: {source.get('pdf_path', 'N/A')}")
+            label, key = _SOURCE_DISPLAY.get(source_type, (source_type.title(), "path"))
+            detail = source.get(key, "N/A")
+            if isinstance(detail, list):
+                detail = ", ".join(str(d) for d in detail)
+            logger.info(f"  {idx}. {label}: {detail}")
        logger.info(f"\nOutput directory: {scraper.output_dir}")
        logger.info(f"Merge mode: {scraper.merge_mode}")
        return
--- a/src/skill_seekers/cli/unified_skill_builder.py
+++ b/src/skill_seekers/cli/unified_skill_builder.py
@@ -136,6 +136,44 @@ class UnifiedSkillBuilder:
            skill_mds["pdf"] = "\n\n---\n\n".join(pdf_sources)
            logger.debug(f"Combined {len(pdf_sources)} PDF SKILL.md files")

+        # Load additional source types using generic glob pattern
+        # Each source type uses: {name}_{type}_{idx}_*/ or {name}_{type}_*/
+        _extra_types = [
+            "word",
+            "epub",
+            "video",
+            "jupyter",
+            "html",
+            "openapi",
+            "asciidoc",
+            "pptx",
+            "confluence",
+            "notion",
+            "rss",
+            "manpage",
+            "chat",
+        ]
+        for source_type in _extra_types:
+            type_sources = []
+            for type_dir in sources_dir.glob(f"{self.name}_{source_type}_*"):
+                type_skill_path = type_dir / "SKILL.md"
+                if type_skill_path.exists():
+                    try:
+                        content = type_skill_path.read_text(encoding="utf-8")
+                        type_sources.append(content)
+                        logger.debug(
+                            f"Loaded {source_type} SKILL.md from {type_dir.name} "
+                            f"({len(content)} chars)"
+                        )
+                    except OSError as e:
+                        logger.warning(
+                            f"Failed to read {source_type} SKILL.md from {type_dir.name}: {e}"
+                        )
+
+            if type_sources:
+                skill_mds[source_type] = "\n\n---\n\n".join(type_sources)
+                logger.debug(f"Combined {len(type_sources)} {source_type} SKILL.md files")
+
        logger.info(f"Loaded {len(skill_mds)} source SKILL.md files")
        return skill_mds

@@ -477,6 +515,18 @@ This skill synthesizes knowledge from multiple sources:
            logger.info("Using PDF SKILL.md as-is")
            content = skill_mds["pdf"]

+        # Generic merge for additional source types not covered by pairwise methods
+        if not content and skill_mds:
+            # At least one source SKILL.md exists but not docs/github/pdf
+            logger.info(f"Generic merge for source types: {list(skill_mds.keys())}")
+            content = self._generic_merge(skill_mds)
+        elif content and len(skill_mds) > (int(has_docs) + int(has_github) + int(has_pdf)):
+            # Pairwise synthesis handled the core types; append additional sources
+            extra_types = set(skill_mds.keys()) - {"documentation", "github", "pdf"}
+            if extra_types:
+                logger.info(f"Appending additional sources: {extra_types}")
+                content = self._append_extra_sources(content, skill_mds, extra_types)
+
        # Fallback: generate minimal SKILL.md (legacy behavior)
        if not content:
            logger.warning("No source SKILL.md files found, generating minimal SKILL.md (legacy)")
@@ -574,6 +624,165 @@ This skill synthesizes knowledge from multiple sources:

        return "\n".join(lines)

+    # ------------------------------------------------------------------
+    # Generic merge system for any combination of source types (v3.2.0+)
+    # ------------------------------------------------------------------
+
+    # Human-readable labels for source types
+    _SOURCE_LABELS: dict[str, str] = {
+        "documentation": "Documentation",
+        "github": "GitHub Repository",
+        "pdf": "PDF Document",
+        "word": "Word Document",
+        "epub": "EPUB E-book",
+        "video": "Video",
+        "local": "Local Codebase",
+        "jupyter": "Jupyter Notebook",
+        "html": "HTML Document",
+        "openapi": "OpenAPI/Swagger Spec",
+        "asciidoc": "AsciiDoc Document",
+        "pptx": "PowerPoint Presentation",
+        "confluence": "Confluence Wiki",
+        "notion": "Notion Page",
+        "rss": "RSS/Atom Feed",
+        "manpage": "Man Page",
+        "chat": "Chat Export",
+    }
+
+    def _generic_merge(self, skill_mds: dict[str, str]) -> str:
+        """Generic merge for any combination of source types.
+
+        Uses a priority-based section ordering approach:
+        1. Parse all source SKILL.md files into sections
+        2. Collect unique sections across all sources
+        3. Merge matching sections with source attribution
+        4. Produce a unified SKILL.md
+
+        This preserves the existing pairwise synthesis for docs+github, docs+pdf, etc.
+        and handles any other combination generically.
+
+        Args:
+            skill_mds: Dict mapping source type to SKILL.md content
+
+        Returns:
+            Merged SKILL.md content string
+        """
+        skill_name = self.name.lower().replace("_", "-").replace(" ", "-")[:64]
+        desc = self.description[:1024] if len(self.description) > 1024 else self.description
+
+        # Parse all source SKILL.md files into sections
+        all_sections: dict[str, dict[str, str]] = {}
+        for source_type, content in skill_mds.items():
+            all_sections[source_type] = self._parse_skill_md_sections(content)
+
+        # Determine all unique section names in priority order
+        # Sections that appear earlier in sources have higher priority
+        seen_sections: list[str] = []
+        for _source_type, sections in all_sections.items():
+            for section_name in sections:
+                if section_name not in seen_sections:
+                    seen_sections.append(section_name)
+
+        # Build merged content
+        source_labels = ", ".join(self._SOURCE_LABELS.get(t, t.title()) for t in skill_mds)
+        lines = [
+            "---",
+            f"name: {skill_name}",
+            f"description: {desc}",
+            "---",
+            "",
+            f"# {self.name.replace('_', ' ').title()}",
+            "",
+            f"{self.description}",
+            "",
+            f"*Merged from: {source_labels}*",
+            "",
+        ]
+
+        # Emit each section, merging content from all sources that have it
+        for section_name in seen_sections:
+            contributing_sources = [
+                (stype, sections[section_name])
+                for stype, sections in all_sections.items()
+                if section_name in sections
+            ]
+
+            if len(contributing_sources) == 1:
+                # Single source for this section — emit as-is
+                stype, content = contributing_sources[0]
+                label = self._SOURCE_LABELS.get(stype, stype.title())
+                lines.append(f"## {section_name}")
+                lines.append("")
+                lines.append(f"*From {label}*")
+                lines.append("")
+                lines.append(content)
+                lines.append("")
+            else:
+                # Multiple sources — merge with attribution
+                lines.append(f"## {section_name}")
+                lines.append("")
+                for stype, content in contributing_sources:
+                    label = self._SOURCE_LABELS.get(stype, stype.title())
+                    lines.append(f"### From {label}")
+                    lines.append("")
+                    lines.append(content)
+                    lines.append("")
+
+        lines.append("---")
+        lines.append("")
+        lines.append("*Generated by Skill Seeker's unified multi-source scraper*")
+
+        return "\n".join(lines)
+
+    def _append_extra_sources(
+        self,
+        base_content: str,
+        skill_mds: dict[str, str],
+        extra_types: set[str],
+    ) -> str:
+        """Append additional source content to existing pairwise-synthesized SKILL.md.
+
+        Used when the core docs+github+pdf synthesis has run, but there are
+        additional source types (epub, jupyter, etc.) that need to be included.
+
+        Args:
+            base_content: Already-synthesized SKILL.md content
+            skill_mds: All source SKILL.md files
+            extra_types: Set of extra source type keys to append
+
+        Returns:
+            Extended SKILL.md content
+        """
+        lines = base_content.split("\n")
+
+        # Find the final separator (---) or end of file
+        insertion_index = len(lines)
+        for i in range(len(lines) - 1, -1, -1):
+            if lines[i].strip() == "---":
+                insertion_index = i
+                break
+
+        # Build extra content
+        extra_lines = [""]
+        for source_type in sorted(extra_types):
+            if source_type not in skill_mds:
+                continue
+            label = self._SOURCE_LABELS.get(source_type, source_type.title())
+            sections = self._parse_skill_md_sections(skill_mds[source_type])
+
+            extra_lines.append(f"## {label} Content")
+            extra_lines.append("")
+
+            for section_name, content in sections.items():
+                extra_lines.append(f"### {section_name}")
+                extra_lines.append("")
+                extra_lines.append(content)
+                extra_lines.append("")
+
+        lines[insertion_index:insertion_index] = extra_lines
+
+        return "\n".join(lines)
+
    def _generate_minimal_skill_md(self) -> str:
        """Generate minimal SKILL.md (legacy fallback behavior).

@@ -597,18 +806,42 @@ This skill combines knowledge from multiple sources:

 """

+        # Source type display keys: type -> (label, primary_key, extra_keys)
+        _source_detail_map = {
+            "documentation": ("Documentation", "base_url", [("Pages", "max_pages", "unlimited")]),
+            "github": (
+                "GitHub Repository",
+                "repo",
+                [("Code Analysis", "code_analysis_depth", "surface"), ("Issues", "max_issues", 0)],
+            ),
+            "pdf": ("PDF Document", "path", []),
+            "word": ("Word Document", "path", []),
+            "epub": ("EPUB E-book", "path", []),
+            "video": ("Video", "url", []),
+            "local": ("Local Codebase", "path", [("Analysis Depth", "analysis_depth", "surface")]),
+            "jupyter": ("Jupyter Notebook", "path", []),
+            "html": ("HTML Document", "path", []),
+            "openapi": ("OpenAPI Spec", "path", []),
+            "asciidoc": ("AsciiDoc Document", "path", []),
+            "pptx": ("PowerPoint", "path", []),
+            "confluence": ("Confluence Wiki", "base_url", []),
+            "notion": ("Notion Page", "page_id", []),
+            "rss": ("RSS/Atom Feed", "url", []),
+            "manpage": ("Man Page", "names", []),
+            "chat": ("Chat Export", "path", []),
+        }
+
        # List sources
        for source in self.config.get("sources", []):
            source_type = source["type"]
-            if source_type == "documentation":
-                content += f"- ✅ **Documentation**: {source.get('base_url', 'N/A')}\n"
-                content += f"  - Pages: {source.get('max_pages', 'unlimited')}\n"
-            elif source_type == "github":
-                content += f"- ✅ **GitHub Repository**: {source.get('repo', 'N/A')}\n"
-                content += f"  - Code Analysis: {source.get('code_analysis_depth', 'surface')}\n"
-                content += f"  - Issues: {source.get('max_issues', 0)}\n"
-            elif source_type == "pdf":
-                content += f"- ✅ **PDF Document**: {source.get('path', 'N/A')}\n"
+            display = _source_detail_map.get(source_type, (source_type.title(), "path", []))
+            label, primary_key, extras = display
+            primary_val = source.get(primary_key, "N/A")
+            if isinstance(primary_val, list):
+                primary_val = ", ".join(str(v) for v in primary_val)
+            content += f"- ✅ **{label}**: {primary_val}\n"
+            for extra_label, extra_key, extra_default in extras:
+                content += f"  - {extra_label}: {source.get(extra_key, extra_default)}\n"

        # C3.x Architecture & Code Analysis section (if available)
        github_data = self.scraped_data.get("github", {})
@@ -796,6 +1029,27 @@ This skill combines knowledge from multiple sources:
        if pdf_list:
            self._generate_pdf_references(pdf_list)

+        # Generate references for all additional source types
+        _extra_source_types = [
+            "word",
+            "epub",
+            "video",
+            "jupyter",
+            "html",
+            "openapi",
+            "asciidoc",
+            "pptx",
+            "confluence",
+            "notion",
+            "rss",
+            "manpage",
+            "chat",
+        ]
+        for source_type in _extra_source_types:
+            source_list = self.scraped_data.get(source_type, [])
+            if source_list:
+                self._generate_generic_references(source_type, source_list)
+
        # Generate merged API reference if available
        if self.merged_data:
            self._generate_merged_api_reference()
@@ -977,6 +1231,63 @@ This skill combines knowledge from multiple sources:

        logger.info(f"Created PDF references ({len(pdf_list)} sources)")

+    def _generate_generic_references(self, source_type: str, source_list: list[dict]):
+        """Generate references for any source type using a generic approach.
+
+        Creates a references/<source_type>/ directory with an index and
+        copies any data files from the source list.
+
+        Args:
+            source_type: The source type key (e.g., 'epub', 'jupyter')
+            source_list: List of scraped source dicts for this type
+        """
+        if not source_list:
+            return
+
+        label = self._SOURCE_LABELS.get(source_type, source_type.title())
+        type_dir = os.path.join(self.skill_dir, "references", source_type)
+        os.makedirs(type_dir, exist_ok=True)
+
+        # Create index
+        index_path = os.path.join(type_dir, "index.md")
+        with open(index_path, "w", encoding="utf-8") as f:
+            f.write(f"# {label} References\n\n")
+            f.write(f"Reference from {len(source_list)} {label} source(s).\n\n")
+
+            for i, source_data in enumerate(source_list):
+                # Try common ID fields
+                source_id = (
+                    source_data.get("source_id")
+                    or source_data.get(f"{source_type}_id")
+                    or source_data.get("notebook_id")
+                    or source_data.get("spec_id")
+                    or source_data.get("feed_id")
+                    or source_data.get("man_id")
+                    or source_data.get("chat_id")
+                    or f"source_{i}"
+                )
+                f.write(f"## {source_id}\n\n")
+
+                # Write summary of extracted data
+                data = source_data.get("data", {})
+                if isinstance(data, dict):
+                    for key in ["title", "description", "metadata"]:
+                        if key in data:
+                            val = data[key]
+                            if isinstance(val, str) and val:
+                                f.write(f"**{key.title()}:** {val}\n\n")
+
+                # Copy data file if available
+                data_file = source_data.get("data_file")
+                if data_file and os.path.isfile(data_file):
+                    dest = os.path.join(type_dir, f"{source_id}_data.json")
+                    import contextlib
+
+                    with contextlib.suppress(OSError):
+                        shutil.copy(data_file, dest)
+
+        logger.info(f"Created {label} references ({len(source_list)} sources)")
+
    def _generate_merged_api_reference(self):
        """Generate merged API reference file."""
        api_dir = os.path.join(self.skill_dir, "references", "api")
--- a/src/skill_seekers/mcp/server_fastmcp.py
+++ b/src/skill_seekers/mcp/server_fastmcp.py
@@ -3,16 +3,16 @@
 Skill Seeker MCP Server (FastMCP Implementation)

 Modern, decorator-based MCP server using FastMCP for simplified tool registration.
-Provides 33 tools for generating Claude AI skills from documentation.
+Provides 34 tools for generating Claude AI skills from documentation.

 This is a streamlined alternative to server.py (2200 lines → 708 lines, 68% reduction).
 All tool implementations are delegated to modular tool files in tools/ directory.

 **Architecture:**
 - FastMCP server with decorator-based tool registration
- 33 tools organized into 7 categories:
+- 34 tools organized into 7 categories:
  * Config tools (3): generate_config, list_configs, validate_config
-  * Scraping tools (10): estimate_pages, scrape_docs, scrape_github, scrape_pdf, scrape_video, scrape_codebase, detect_patterns, extract_test_examples, build_how_to_guides, extract_config_patterns
+  * Scraping tools (11): estimate_pages, scrape_docs, scrape_github, scrape_pdf, scrape_video, scrape_codebase, detect_patterns, extract_test_examples, build_how_to_guides, extract_config_patterns, scrape_generic
  * Packaging tools (4): package_skill, upload_skill, enhance_skill, install_skill
  * Splitting tools (2): split_config, generate_router
  * Source tools (5): fetch_config, submit_config, add_config_source, list_config_sources, remove_config_source
@@ -97,6 +97,7 @@ try:
        remove_config_source_impl,
        scrape_codebase_impl,
        scrape_docs_impl,
+        scrape_generic_impl,
        scrape_github_impl,
        scrape_pdf_impl,
        scrape_video_impl,
@@ -141,6 +142,7 @@ except ImportError:
        remove_config_source_impl,
        scrape_codebase_impl,
        scrape_docs_impl,
+        scrape_generic_impl,
        scrape_github_impl,
        scrape_pdf_impl,
        scrape_video_impl,
@@ -301,7 +303,7 @@ async def sync_config(


 # ============================================================================
-# SCRAPING TOOLS (10 tools)
+# SCRAPING TOOLS (11 tools)
 # ============================================================================


@@ -823,6 +825,50 @@ async def extract_config_patterns(
    return str(result)


+@safe_tool_decorator(
+    description="Scrape content from new source types: jupyter, html, openapi, asciidoc, pptx, confluence, notion, rss, manpage, chat. A generic entry point that delegates to the appropriate CLI scraper module."
+)
+async def scrape_generic(
+    source_type: str,
+    name: str,
+    path: str | None = None,
+    url: str | None = None,
+) -> str:
+    """
+    Scrape content from various source types and build a skill.
+
+    A generic scraper that supports 10 new source types. It delegates to the
+    corresponding CLI scraper module (e.g., skill_seekers.cli.jupyter_scraper).
+
+    File-based types (jupyter, html, openapi, asciidoc, pptx, manpage, chat)
+    typically use the 'path' parameter. URL-based types (confluence, notion, rss)
+    typically use the 'url' parameter.
+
+    Args:
+        source_type: Source type to scrape. One of: jupyter, html, openapi,
+            asciidoc, pptx, confluence, notion, rss, manpage, chat.
+        name: Skill name for the output
+        path: File or directory path (for file-based sources like jupyter, html, pptx)
+        url: URL (for URL-based sources like confluence, notion, rss)
+
+    Returns:
+        Scraping results with file paths and statistics.
+    """
+    args = {
+        "source_type": source_type,
+        "name": name,
+    }
+    if path:
+        args["path"] = path
+    if url:
+        args["url"] = url
+
+    result = await scrape_generic_impl(args)
+    if isinstance(result, list) and result:
+        return result[0].text if hasattr(result[0], "text") else str(result[0])
+    return str(result)
+
+
 # ============================================================================
 # PACKAGING TOOLS (4 tools)
 # ============================================================================
--- a/src/skill_seekers/mcp/tools/init.py
+++ b/src/skill_seekers/mcp/tools/init.py
@@ -63,6 +63,9 @@ from .scraping_tools import (
 from .scraping_tools import (
    scrape_pdf_tool as scrape_pdf_impl,
 )
+from .scraping_tools import (
+    scrape_generic_tool as scrape_generic_impl,
+)
 from .scraping_tools import (
    scrape_video_tool as scrape_video_impl,
 )
@@ -135,6 +138,7 @@ __all__ = [
    "extract_test_examples_impl",
    "build_how_to_guides_impl",
    "extract_config_patterns_impl",
+    "scrape_generic_impl",
    # Packaging tools
    "package_skill_impl",
    "upload_skill_impl",
--- a/src/skill_seekers/mcp/tools/config_tools.py
+++ b/src/skill_seekers/mcp/tools/config_tools.py
@@ -205,6 +205,18 @@ async def validate_config(args: dict) -> list[TextContent]:
                        )
                    elif source["type"] == "pdf":
                        result += f"    Path: {source.get('path', 'N/A')}\n"
+                    elif source["type"] in (
+                        "jupyter",
+                        "html",
+                        "openapi",
+                        "asciidoc",
+                        "pptx",
+                        "manpage",
+                        "chat",
+                    ):
+                        result += f"    Path: {source.get('path', 'N/A')}\n"
+                    elif source["type"] in ("confluence", "notion", "rss"):
+                        result += f"    URL: {source.get('url', 'N/A')}\n"

                # Show merge settings if applicable
                if validator.needs_api_merge():
--- a/src/skill_seekers/mcp/tools/scraping_tools.py
+++ b/src/skill_seekers/mcp/tools/scraping_tools.py
@@ -7,6 +7,8 @@ This module contains all scraping-related MCP tool implementations:
 - scrape_github_tool: Scrape GitHub repositories
 - scrape_pdf_tool: Scrape PDF documentation
 - scrape_codebase_tool: Analyze local codebase and extract code knowledge
+- scrape_generic_tool: Generic scraper for new source types (jupyter, html,
+  openapi, asciidoc, pptx, confluence, notion, rss, manpage, chat)

 Extracted from server.py for better modularity and organization.
 """
@@ -1005,3 +1007,155 @@ async def extract_config_patterns_tool(args: dict) -> list[TextContent]:
        return [TextContent(type="text", text=output_text)]
    else:
        return [TextContent(type="text", text=f"{output_text}\n\n❌ Error:\n{stderr}")]
+
+
+# Valid source types for the generic scraper
+GENERIC_SOURCE_TYPES = (
+    "jupyter",
+    "html",
+    "openapi",
+    "asciidoc",
+    "pptx",
+    "confluence",
+    "notion",
+    "rss",
+    "manpage",
+    "chat",
+)
+
+# Mapping from source type to the CLI flag used for the primary input argument.
+# URL-based types use --url; file/path-based types use --path.
+_URL_BASED_TYPES = {"confluence", "notion", "rss"}
+
+# Friendly emoji labels per source type
+_SOURCE_EMOJIS = {
+    "jupyter": "📓",
+    "html": "🌐",
+    "openapi": "📡",
+    "asciidoc": "📄",
+    "pptx": "📊",
+    "confluence": "🏢",
+    "notion": "📝",
+    "rss": "📰",
+    "manpage": "📖",
+    "chat": "💬",
+}
+
+
+async def scrape_generic_tool(args: dict) -> list[TextContent]:
+    """
+    Generic scraper for new source types.
+
+    Handles all 10 new source types by building the appropriate subprocess
+    command and delegating to the corresponding CLI scraper module.
+
+    Supported source types: jupyter, html, openapi, asciidoc, pptx,
+    confluence, notion, rss, manpage, chat.
+
+    Args:
+        args: Dictionary containing:
+            - source_type (str): One of the supported source types
+            - path (str, optional): File or directory path (for file-based sources)
+            - url (str, optional): URL (for URL-based sources like confluence, notion, rss)
+            - name (str): Skill name for the output
+
+    Returns:
+        List[TextContent]: Tool execution results
+    """
+    source_type = args.get("source_type", "")
+    path = args.get("path")
+    url = args.get("url")
+    name = args.get("name")
+
+    # Validate source_type
+    if source_type not in GENERIC_SOURCE_TYPES:
+        return [
+            TextContent(
+                type="text",
+                text=(
+                    f"❌ Error: Unknown source_type '{source_type}'. "
+                    f"Must be one of: {', '.join(GENERIC_SOURCE_TYPES)}"
+                ),
+            )
+        ]
+
+    # Validate that we have either path or url
+    if not path and not url:
+        return [
+            TextContent(
+                type="text",
+                text="❌ Error: Must specify either 'path' (file/directory) or 'url'",
+            )
+        ]
+
+    if not name:
+        return [
+            TextContent(
+                type="text",
+                text="❌ Error: 'name' parameter is required",
+            )
+        ]
+
+    # Build the subprocess command
+    # Map source type to module name (most are <type>_scraper, but some differ)
+    _MODULE_NAMES = {
+        "manpage": "man_scraper",
+    }
+    module_name = _MODULE_NAMES.get(source_type, f"{source_type}_scraper")
+    cmd = [sys.executable, "-m", f"skill_seekers.cli.{module_name}"]
+
+    # Map source type to the correct CLI flag for file/path input and URL input.
+    # Each scraper has its own flag name — using a generic --path or --url would fail.
+    _PATH_FLAGS: dict[str, str] = {
+        "jupyter": "--notebook",
+        "html": "--html-path",
+        "openapi": "--spec",
+        "asciidoc": "--asciidoc-path",
+        "pptx": "--pptx",
+        "manpage": "--man-path",
+        "confluence": "--export-path",
+        "notion": "--export-path",
+        "rss": "--feed-path",
+        "chat": "--export-path",
+    }
+    _URL_FLAGS: dict[str, str] = {
+        "confluence": "--base-url",
+        "notion": "--page-id",
+        "rss": "--feed-url",
+        "openapi": "--spec-url",
+    }
+
+    # Determine the input flag based on source type
+    if source_type in _URL_BASED_TYPES and url:
+        url_flag = _URL_FLAGS.get(source_type, "--url")
+        cmd.extend([url_flag, url])
+    elif path:
+        path_flag = _PATH_FLAGS.get(source_type, "--path")
+        cmd.extend([path_flag, path])
+    elif url:
+        # Allow url fallback for file-based types (some may accept URLs too)
+        url_flag = _URL_FLAGS.get(source_type, "--url")
+        cmd.extend([url_flag, url])
+
+    cmd.extend(["--name", name])
+
+    # Set a reasonable timeout
+    timeout = 600  # 10 minutes
+
+    emoji = _SOURCE_EMOJIS.get(source_type, "🔧")
+    progress_msg = f"{emoji} Scraping {source_type} source...\n"
+    if path:
+        progress_msg += f"📁 Path: {path}\n"
+    if url:
+        progress_msg += f"🔗 URL: {url}\n"
+    progress_msg += f"📛 Name: {name}\n"
+    progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
+
+    stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
+
+    output = progress_msg + stdout
+
+    if returncode == 0:
+        return [TextContent(type="text", text=output)]
+    else:
+        return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]
--- a/src/skill_seekers/mcp/tools/splitting_tools.py
+++ b/src/skill_seekers/mcp/tools/splitting_tools.py
@@ -106,7 +106,9 @@ async def split_config(args: dict) -> list[TextContent]:

    Supports both documentation and unified (multi-source) configs:
    - Documentation configs: Split by categories, size, or create router skills
-    - Unified configs: Split by source type (documentation, github, pdf)
+    - Unified configs: Split by source type (documentation, github, pdf,
+      jupyter, html, openapi, asciidoc, pptx, confluence, notion, rss,
+      manpage, chat)

    For large documentation sites (10K+ pages), this tool splits the config into
    multiple smaller configs. For unified configs with multiple sources, splits
--- a/src/skill_seekers/workflows/complex-merge.yaml
+++ b/src/skill_seekers/workflows/complex-merge.yaml
@@ -0,0 +1,222 @@
+name: complex-merge
+description: Intelligent multi-source merging with conflict resolution, priority rules, and gap analysis
+version: "1.0"
+author: Skill Seekers
+tags:
+  - merge
+  - multi-source
+  - conflict-resolution
+  - synthesis
+applies_to:
+  - doc_scraping
+  - codebase_analysis
+  - github_analysis
+variables:
+  merge_strategy: priority
+  source_priority_order: "official_docs,code,community"
+  conflict_resolution: highest_priority
+  min_sources_for_consensus: 2
+stages:
+  - name: source_inventory
+    type: custom
+    target: inventory
+    uses_history: false
+    enabled: true
+    prompt: >
+      Catalog every source that contributed content to this skill extraction.
+      For each source, classify its type and assess its characteristics.
+
+      For each source, determine:
+      1. Source type (official_docs, codebase, github_repo, pdf, video, community, blog)
+      2. Content scope — what topics or areas does this source cover?
+      3. Freshness — how recent is the content? Look for version numbers, dates, deprecation notices
+      4. Authority level — is this an official maintainer, core contributor, or third party?
+      5. Content density — roughly how much substantive information does this source provide?
+      6. Format characteristics — prose, code samples, API reference, tutorial, etc.
+
+      Output JSON with:
+      - "sources": array of {id, type, scope_summary, topics_covered, freshness_estimate, authority, density, format}
+      - "source_type_distribution": count of sources by type
+      - "total_topics_identified": number of unique topics across all sources
+      - "coverage_summary": brief overview of what the combined sources cover
+
+  - name: cross_reference
+    type: custom
+    target: cross_references
+    uses_history: true
+    enabled: true
+    prompt: >
+      Using the source inventory, identify overlapping topics across sources.
+      Find where multiple sources discuss the same concept, API, feature, or pattern.
+
+      For each overlapping topic:
+      1. List which sources cover it and how deeply
+      2. Note whether sources agree, complement each other, or diverge
+      3. Identify the richest source for that topic (most detail, best examples)
+      4. Flag any terminology differences across sources for the same concept
+
+      Output JSON with:
+      - "overlapping_topics": array of {topic, sources_covering, agreement_level, richest_source, terminology_variants}
+      - "high_overlap_topics": topics covered by 3+ sources
+      - "complementary_pairs": pairs of sources that cover different aspects of the same topic well
+      - "terminology_map": dictionary mapping variant terms to a canonical term
+
+  - name: conflict_detection
+    type: custom
+    target: conflicts
+    uses_history: true
+    enabled: true
+    prompt: >
+      Examine the cross-referenced topics and identify genuine contradictions
+      between sources. Distinguish between true conflicts and superficial differences.
+
+      Categories of conflict to detect:
+      1. Factual contradictions — sources state opposite things about the same feature
+      2. Version mismatches — sources describe different versions of an API or behavior
+      3. Best practice disagreements — sources recommend conflicting approaches
+      4. Deprecated vs current — one source shows deprecated usage another shows current
+      5. Scope conflicts — sources disagree on what a feature can or cannot do
+
+      For each conflict:
+      - Identify the specific claim from each source
+      - Assess which source is more likely correct and why
+      - Recommend a resolution strategy
+
+      Output JSON with:
+      - "conflicts": array of {topic, type, source_a_claim, source_b_claim, likely_correct, resolution_rationale}
+      - "conflict_count_by_type": breakdown of conflicts by category
+      - "high_severity_conflicts": conflicts that would mislead users if unresolved
+      - "auto_resolvable": conflicts that can be resolved by version/date alone
+
+  - name: priority_merge
+    type: custom
+    target: merged_content
+    uses_history: true
+    enabled: true
+    prompt: >
+      Merge content from all sources using the following priority hierarchy:
+        1. Official documentation (highest authority)
+        2. Source code and inline comments (ground truth for behavior)
+        3. Community content — tutorials, blog posts, Stack Overflow (practical usage)
+
+      Merging rules:
+      - When sources agree, combine the best explanation with the best examples
+      - When sources conflict, prefer the higher-priority source but note the alternative
+      - When only a lower-priority source covers a topic, include it but flag the authority level
+      - Preserve code examples from any source, annotating their origin
+      - Deduplicate content — do not repeat the same information from multiple sources
+      - Normalize terminology using the canonical terms from cross-referencing
+
+      For each merged topic, produce:
+      1. Authoritative explanation (from highest-priority source)
+      2. Practical examples (best available from any source)
+      3. Source attribution (which sources contributed)
+      4. Confidence level (high if official docs confirm, medium if code-only, low if community-only)
+
+      Output JSON with:
+      - "merged_topics": array of {topic, explanation, examples, sources_used, confidence, notes}
+      - "merge_decisions": array of {topic, decision, rationale} for non-trivial merges
+      - "source_contribution_stats": how much each source contributed to the final output
+
+  - name: gap_analysis
+    type: custom
+    target: gaps
+    uses_history: true
+    enabled: true
+    prompt: >
+      Analyse the merged content to identify gaps — topics or areas that are
+      underrepresented or missing entirely.
+
+      Identify:
+      1. Single-source topics — covered by only one source, making them fragile
+      2. Missing fundamentals — core concepts that should be documented but are not
+      3. Missing examples — topics explained in prose but lacking code samples
+      4. Missing edge cases — common error scenarios or limitations not documented
+      5. Broken references — topics that reference other topics not present in any source
+      6. Audience gaps — content assumes knowledge that is never introduced
+
+      For each gap, assess:
+      - Severity (critical, important, nice-to-have)
+      - Whether the gap can be inferred from existing content
+      - Suggested source type that would best fill this gap
+
+      Output JSON with:
+      - "single_source_topics": array of {topic, sole_source, risk_level}
+      - "missing_fundamentals": topics that should exist but do not
+      - "example_gaps": topics needing code examples
+      - "edge_case_gaps": undocumented error scenarios
+      - "broken_references": internal references with no target
+      - "gap_severity_summary": counts by severity level
+
+  - name: synthesis
+    type: custom
+    target: skill_md
+    uses_history: true
+    enabled: true
+    prompt: >
+      Create a unified, coherent narrative from the merged content. The output
+      should read as if written by a single knowledgeable author, not as a
+      patchwork of multiple sources.
+
+      Synthesis guidelines:
+      1. Structure content logically — concepts build on each other
+      2. Lead with the most important information for each topic
+      3. Integrate code examples naturally within explanations
+      4. Use consistent voice, terminology, and formatting throughout
+      5. Add transition text between topics for narrative flow
+      6. Include a "Sources and Confidence" appendix noting where information came from
+      7. Mark any low-confidence or single-source claims with a caveat
+      8. Fill minor gaps by inference where safe to do so, clearly marking inferred content
+
+      Output JSON with:
+      - "synthesized_sections": array of {title, content, sources_used, confidence}
+      - "section_order": recommended reading order
+      - "inferred_content": content that was inferred rather than directly sourced
+      - "caveats": any warnings about content reliability
+
+  - name: quality_check
+    type: custom
+    target: quality
+    uses_history: true
+    enabled: true
+    prompt: >
+      Perform a final quality review of the synthesized output. Evaluate the
+      merge result against multiple quality dimensions.
+
+      Check for:
+      1. Completeness — does the output cover all topics from all sources?
+      2. Accuracy — are merged claims consistent and non-contradictory?
+      3. Coherence — does the document flow logically as a unified piece?
+      4. Attribution — are source contributions properly tracked?
+      5. Confidence calibration — are confidence levels appropriate?
+      6. Example quality — are code examples correct, runnable, and well-annotated?
+      7. Terminology consistency — is the canonical terminology used throughout?
+      8. Gap acknowledgment — are known gaps clearly communicated?
+
+      Scoring:
+      - Rate each dimension 1-10
+      - Provide specific issues found for any dimension scoring below 7
+      - Suggest concrete fixes for each issue
+
+      Output JSON with:
+      - "quality_scores": {completeness, accuracy, coherence, attribution, confidence_calibration, example_quality, terminology_consistency, gap_acknowledgment}
+      - "overall_score": weighted average (accuracy and completeness weighted 2x)
+      - "issues_found": array of {dimension, description, severity, suggested_fix}
+      - "merge_health": "excellent" | "good" | "needs_review" | "poor" based on overall score
+      - "recommendations": top 3 actions to improve merge quality
+
+post_process:
+  reorder_sections:
+    - overview
+    - core_concepts
+    - api_reference
+    - examples
+    - advanced_topics
+    - troubleshooting
+    - sources_and_confidence
+  add_metadata:
+    enhanced: true
+    workflow: complex-merge
+    multi_source: true
+    conflict_resolution: priority
+    quality_checked: true