From b81d55fda06f1b45d5e01830b1e71ddd50ffbfac Mon Sep 17 00:00:00 2001 From: yusyus Date: Wed, 25 Feb 2026 21:47:30 +0300 Subject: [PATCH] feat(B2): add Microsoft Word (.docx) support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements ROADMAP task B2 — full .docx scraping support via mammoth + python-docx, producing SKILL.md + references/ output identical to other source types. New files: - src/skill_seekers/cli/word_scraper.py — WordToSkillConverter class + main() entry point (~600 lines); mammoth → BeautifulSoup pipeline; handles headings, code detection (incl. monospace


blocks), tables, images, metadata extraction - src/skill_seekers/cli/arguments/word.py — add_word_arguments() + WORD_ARGUMENTS dict - src/skill_seekers/cli/parsers/word_parser.py — WordParser for unified CLI parser registry - tests/test_word_scraper.py — comprehensive test suite (~300 lines) Modified files: - src/skill_seekers/cli/main.py — registered "word" command module - src/skill_seekers/cli/source_detector.py — .docx auto-detection + _detect_word() classmethod - src/skill_seekers/cli/create_command.py — _route_word() + --help-word - src/skill_seekers/cli/arguments/create.py — WORD_ARGUMENTS + routing - src/skill_seekers/cli/arguments/__init__.py — export word args - src/skill_seekers/cli/parsers/__init__.py — register WordParser - src/skill_seekers/cli/unified_scraper.py — _scrape_word() integration - src/skill_seekers/cli/pdf_scraper.py — fix: real enhancement instead of stub; remove [:3] reference file limit; capture run_workflows return - src/skill_seekers/cli/github_scraper.py — fix: remove arbitrary open_issues[:20] / closed_issues[:10] reference file limits - pyproject.toml — skill-seekers-word entry point + docx optional dep - tests/test_cli_parsers.py — update parser count 21→22 Bug fixes applied during real-world testing: - Code detection: detect monospace


blocks as code (mammoth renders Courier paragraphs this way, not as

/)
- Language detector: fix wrong method name detect_from_text →
  detect_from_code
- Description inference: pass None from main() so extract_docx() can
  infer description from Word document subject/title metadata
- Bullet-point guard: exclude prose starting with •/-/* from code scoring
- Enhancement: implement real API/LOCAL enhancement (was stub)
- pip install message: add quotes around skill-seekers[docx]

Co-Authored-By: Claude Sonnet 4.6 
---
 AGENTS.md                                    |   72 +-
 pyproject.toml                               |    9 +
 src/skill_seekers/cli/arguments/__init__.py  |    3 +
 src/skill_seekers/cli/arguments/create.py    |   20 +-
 src/skill_seekers/cli/arguments/word.py      |   66 ++
 src/skill_seekers/cli/create_command.py      |   39 +
 src/skill_seekers/cli/github_scraper.py      |    4 +-
 src/skill_seekers/cli/main.py                |    1 +
 src/skill_seekers/cli/parsers/__init__.py    |    2 +
 src/skill_seekers/cli/parsers/word_parser.py |   32 +
 src/skill_seekers/cli/pdf_scraper.py         |   37 +-
 src/skill_seekers/cli/source_detector.py     |   19 +
 src/skill_seekers/cli/unified_scraper.py     |   64 +-
 src/skill_seekers/cli/word_scraper.py        | 1054 ++++++++++++++++++
 tests/test_cli_parsers.py                    |   10 +-
 tests/test_word_scraper.py                   |  677 +++++++++++
 uv.lock                                      |  172 ++-
 17 files changed, 2214 insertions(+), 67 deletions(-)
 create mode 100644 src/skill_seekers/cli/arguments/word.py
 create mode 100644 src/skill_seekers/cli/parsers/word_parser.py
 create mode 100644 src/skill_seekers/cli/word_scraper.py
 create mode 100644 tests/test_word_scraper.py

diff --git a/AGENTS.md b/AGENTS.md
index 6a59bfe..553288a 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -12,10 +12,12 @@ This file provides essential guidance for AI coding agents working with the Skil
 
 | Attribute | Value |
 |-----------|-------|
-| **Current Version** | 3.0.0 |
+| **Current Version** | 3.1.3 |
 | **Python Version** | 3.10+ (tested on 3.10, 3.11, 3.12, 3.13) |
 | **License** | MIT |
 | **Package Name** | `skill-seekers` (PyPI) |
+| **Source Files** | 169 Python files |
+| **Test Files** | 101 test files |
 | **Website** | https://skillseekersweb.com/ |
 | **Repository** | https://github.com/yusufkaraaslan/Skill_Seekers |
 
@@ -55,7 +57,7 @@ This file provides essential guidance for AI coding agents working with the Skil
 ```
 /mnt/1ece809a-2821-4f10-aecb-fcdf34760c0b/Git/Skill_Seekers/
 ├── src/skill_seekers/              # Main source code (src/ layout)
-│   ├── cli/                        # CLI tools and commands (~42k lines)
+│   ├── cli/                        # CLI tools and commands (~70 modules)
 │   │   ├── adaptors/               # Platform adaptors (Strategy pattern)
 │   │   │   ├── base.py             # Abstract base class (SkillAdaptor)
 │   │   │   ├── claude.py           # Claude AI adaptor
@@ -70,12 +72,6 @@ This file provides essential guidance for AI coding agents working with the Skil
 │   │   │   ├── qdrant.py           # Qdrant vector DB adaptor
 │   │   │   ├── weaviate.py         # Weaviate vector DB adaptor
 │   │   │   └── streaming_adaptor.py # Streaming output adaptor
-│   │   ├── storage/                # Cloud storage backends
-│   │   │   ├── base_storage.py     # Storage interface
-│   │   │   ├── s3_storage.py       # AWS S3 support
-│   │   │   ├── gcs_storage.py      # Google Cloud Storage
-│   │   │   └── azure_storage.py    # Azure Blob Storage
-│   │   ├── parsers/                # CLI argument parsers
 │   │   ├── arguments/              # CLI argument definitions
 │   │   ├── presets/                # Preset configuration management
 │   │   ├── main.py                 # Unified CLI entry point
@@ -85,6 +81,7 @@ This file provides essential guidance for AI coding agents working with the Skil
 │   │   ├── pdf_scraper.py          # PDF extraction
 │   │   ├── unified_scraper.py      # Multi-source scraping
 │   │   ├── codebase_scraper.py     # Local codebase analysis
+│   │   ├── enhance_command.py      # AI enhancement command
 │   │   ├── enhance_skill_local.py  # AI enhancement (local mode)
 │   │   ├── package_skill.py        # Skill packager
 │   │   ├── upload_skill.py         # Upload to platforms
@@ -101,8 +98,8 @@ This file provides essential guidance for AI coding agents working with the Skil
 │   │   ├── source_manager.py       # Config source management
 │   │   └── tools/                  # MCP tool implementations
 │   │       ├── config_tools.py     # Configuration tools
-│   │       ├── scraping_tools.py   # Scraping tools
 │   │       ├── packaging_tools.py  # Packaging tools
+│   │       ├── scraping_tools.py   # Scraping tools
 │   │       ├── source_tools.py     # Source management tools
 │   │       ├── splitting_tools.py  # Config splitting tools
 │   │       ├── vector_db_tools.py  # Vector database tools
@@ -124,7 +121,7 @@ This file provides essential guidance for AI coding agents working with the Skil
 │   ├── workflows/                  # YAML workflow presets
 │   ├── _version.py                 # Version information (reads from pyproject.toml)
 │   └── __init__.py                 # Package init
-├── tests/                          # Test suite (98 test files)
+├── tests/                          # Test suite (101 test files)
 ├── configs/                        # Preset configuration files
 ├── docs/                           # Documentation (80+ markdown files)
 │   ├── integrations/               # Platform integration guides
@@ -134,17 +131,6 @@ This file provides essential guidance for AI coding agents working with the Skil
 │   ├── blog/                       # Blog posts
 │   └── roadmap/                    # Roadmap documents
 ├── examples/                       # Usage examples
-│   ├── langchain-rag-pipeline/     # LangChain example
-│   ├── llama-index-query-engine/   # LlamaIndex example
-│   ├── pinecone-upsert/            # Pinecone example
-│   ├── chroma-example/             # Chroma example
-│   ├── weaviate-example/           # Weaviate example
-│   ├── qdrant-example/             # Qdrant example
-│   ├── faiss-example/              # FAISS example
-│   ├── haystack-pipeline/          # Haystack example
-│   ├── cursor-react-skill/         # Cursor IDE example
-│   ├── windsurf-fastapi-context/   # Windsurf example
-│   └── continue-dev-universal/     # Continue.dev example
 ├── .github/workflows/              # CI/CD workflows
 ├── pyproject.toml                  # Main project configuration
 ├── requirements.txt                # Pinned dependencies
@@ -259,7 +245,7 @@ pytest tests/ -v -m "not slow and not integration"
 
 ### Test Architecture
 
-- **98 test files** covering all features
+- **101 test files** covering all features
 - **1880+ tests** passing
 - CI Matrix: Ubuntu + macOS, Python 3.10-3.12
 - Test markers defined in `pyproject.toml`:
@@ -316,22 +302,19 @@ mypy src/skill_seekers --show-error-codes --pretty
 - **Ignored rules:** E501, F541, ARG002, B007, I001, SIM114
 - **Import sorting:** isort style with `skill_seekers` as first-party
 
-### MyPy Configuration (from mypy.ini)
+### MyPy Configuration (from pyproject.toml)
 
-```ini
-[mypy]
-python_version = 3.10
-warn_return_any = False
-warn_unused_configs = True
-disallow_untyped_defs = False
-check_untyped_defs = True
-ignore_missing_imports = True
-no_implicit_optional = True
-show_error_codes = True
-
-# Gradual typing - be lenient for now
-disallow_incomplete_defs = False
-disallow_untyped_calls = False
+```toml
+[tool.mypy]
+python_version = "3.10"
+warn_return_any = true
+warn_unused_configs = true
+disallow_untyped_defs = false
+disallow_incomplete_defs = false
+check_untyped_defs = true
+ignore_missing_imports = true
+show_error_codes = true
+pretty = true
 ```
 
 ### Code Conventions
@@ -662,17 +645,6 @@ Preset configs are in `configs/` directory:
 - `astrovalley_unified.json` - Astrovalley
 - `configs/integrations/` - Integration-specific configs
 
-### Configuration Documentation
-
-Preset configs are in `configs/` directory:
-- `godot.json` - Godot Engine
-- `blender.json` / `blender-unified.json` - Blender Engine
-- `claude-code.json` - Claude Code
-- `httpx_comprehensive.json` - HTTPX library
-- `medusa-mercurjs.json` - Medusa/MercurJS
-- `astrovalley_unified.json` - Astrovalley
-- `configs/integrations/` - Integration-specific configs
-
 ---
 
 ## Key Dependencies
@@ -700,6 +672,8 @@ Preset configs are in `configs/` directory:
 | `python-dotenv` | >=1.1.1 | Environment variables |
 | `jsonschema` | >=4.25.1 | JSON validation |
 | `PyYAML` | >=6.0 | YAML parsing |
+| `langchain` | >=1.2.10 | LangChain integration |
+| `llama-index` | >=0.14.15 | LlamaIndex integration |
 
 ### Optional Dependencies
 
@@ -852,4 +826,4 @@ Skill Seekers uses JSON configuration files to define scraping targets. Example
 
 *This document is maintained for AI coding agents. For human contributors, see README.md and CONTRIBUTING.md.*
 
-*Last updated: 2026-02-16*
+*Last updated: 2026-02-24*
diff --git a/pyproject.toml b/pyproject.toml
index f6b9430..0c2a3ab 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -109,6 +109,12 @@ azure = [
     "azure-storage-blob>=12.19.0",
 ]
 
+# Word document (.docx) support
+docx = [
+    "mammoth>=1.6.0",
+    "python-docx>=1.1.0",
+]
+
 # RAG vector database upload support
 chroma = [
     "chromadb>=0.4.0",
@@ -146,6 +152,8 @@ embedding = [
 
 # All optional dependencies combined (dev dependencies now in [dependency-groups])
 all = [
+    "mammoth>=1.6.0",
+    "python-docx>=1.1.0",
     "mcp>=1.25,<2",
     "httpx>=0.28.1",
     "httpx-sse>=0.4.3",
@@ -186,6 +194,7 @@ skill-seekers-resume = "skill_seekers.cli.resume_command:main"
 skill-seekers-scrape = "skill_seekers.cli.doc_scraper:main"
 skill-seekers-github = "skill_seekers.cli.github_scraper:main"
 skill-seekers-pdf = "skill_seekers.cli.pdf_scraper:main"
+skill-seekers-word = "skill_seekers.cli.word_scraper:main"
 skill-seekers-unified = "skill_seekers.cli.unified_scraper:main"
 skill-seekers-enhance = "skill_seekers.cli.enhance_command:main"
 skill-seekers-enhance-status = "skill_seekers.cli.enhance_status:main"
diff --git a/src/skill_seekers/cli/arguments/__init__.py b/src/skill_seekers/cli/arguments/__init__.py
index 929b36e..128e22c 100644
--- a/src/skill_seekers/cli/arguments/__init__.py
+++ b/src/skill_seekers/cli/arguments/__init__.py
@@ -21,6 +21,7 @@ from .common import add_common_arguments, COMMON_ARGUMENTS
 from .scrape import add_scrape_arguments, SCRAPE_ARGUMENTS
 from .github import add_github_arguments, GITHUB_ARGUMENTS
 from .pdf import add_pdf_arguments, PDF_ARGUMENTS
+from .word import add_word_arguments, WORD_ARGUMENTS
 from .analyze import add_analyze_arguments, ANALYZE_ARGUMENTS
 from .unified import add_unified_arguments, UNIFIED_ARGUMENTS
 from .package import add_package_arguments, PACKAGE_ARGUMENTS
@@ -38,11 +39,13 @@ __all__ = [
     "add_package_arguments",
     "add_upload_arguments",
     "add_enhance_arguments",
+    "add_word_arguments",
     # Data
     "COMMON_ARGUMENTS",
     "SCRAPE_ARGUMENTS",
     "GITHUB_ARGUMENTS",
     "PDF_ARGUMENTS",
+    "WORD_ARGUMENTS",
     "ANALYZE_ARGUMENTS",
     "UNIFIED_ARGUMENTS",
     "PACKAGE_ARGUMENTS",
diff --git a/src/skill_seekers/cli/arguments/create.py b/src/skill_seekers/cli/arguments/create.py
index c1aa7ad..03b30c7 100644
--- a/src/skill_seekers/cli/arguments/create.py
+++ b/src/skill_seekers/cli/arguments/create.py
@@ -389,6 +389,18 @@ PDF_ARGUMENTS: dict[str, dict[str, Any]] = {
     },
 }
 
+# Word document specific (from word.py)
+WORD_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "docx": {
+        "flags": ("--docx",),
+        "kwargs": {
+            "type": str,
+            "help": "DOCX file path",
+            "metavar": "PATH",
+        },
+    },
+}
+
 # Multi-source config specific (from unified_scraper.py)
 CONFIG_ARGUMENTS: dict[str, dict[str, Any]] = {
     "merge_mode": {
@@ -471,6 +483,7 @@ def get_source_specific_arguments(source_type: str) -> dict[str, dict[str, Any]]
         "github": GITHUB_ARGUMENTS,
         "local": LOCAL_ARGUMENTS,
         "pdf": PDF_ARGUMENTS,
+        "word": WORD_ARGUMENTS,
         "config": CONFIG_ARGUMENTS,
     }
     return source_args.get(source_type, {})
@@ -507,12 +520,13 @@ def add_create_arguments(parser: argparse.ArgumentParser, mode: str = "default")
     - 'github': Universal + github-specific
     - 'local': Universal + local-specific
     - 'pdf': Universal + pdf-specific
+    - 'word': Universal + word-specific
     - 'advanced': Advanced/rare arguments
     - 'all': All 120+ arguments
 
     Args:
         parser: ArgumentParser to add arguments to
-        mode: Help mode (default, web, github, local, pdf, advanced, all)
+        mode: Help mode (default, web, github, local, pdf, word, advanced, all)
     """
     # Positional argument for source
     parser.add_argument(
@@ -543,6 +557,10 @@ def add_create_arguments(parser: argparse.ArgumentParser, mode: str = "default")
         for arg_name, arg_def in PDF_ARGUMENTS.items():
             parser.add_argument(*arg_def["flags"], **arg_def["kwargs"])
 
+    if mode in ["word", "all"]:
+        for arg_name, arg_def in WORD_ARGUMENTS.items():
+            parser.add_argument(*arg_def["flags"], **arg_def["kwargs"])
+
     if mode in ["config", "all"]:
         for arg_name, arg_def in CONFIG_ARGUMENTS.items():
             parser.add_argument(*arg_def["flags"], **arg_def["kwargs"])
diff --git a/src/skill_seekers/cli/arguments/word.py b/src/skill_seekers/cli/arguments/word.py
new file mode 100644
index 0000000..0c254b2
--- /dev/null
+++ b/src/skill_seekers/cli/arguments/word.py
@@ -0,0 +1,66 @@
+"""Word document command argument definitions.
+
+This module defines ALL arguments for the word command in ONE place.
+Both word_scraper.py (standalone) and parsers/word_parser.py (unified CLI)
+import and use these definitions.
+
+Shared arguments (name, description, output, enhance-level, api-key,
+dry-run, verbose, quiet, workflow args) come from common.py / workflow.py
+via ``add_all_standard_arguments()``.
+"""
+
+import argparse
+from typing import Any
+
+from .common import add_all_standard_arguments
+
+# Word-specific argument definitions as data structure
+# NOTE: Shared args (name, description, output, enhance_level, api_key, dry_run,
+#       verbose, quiet, workflow args) are registered by add_all_standard_arguments().
+WORD_ARGUMENTS: dict[str, dict[str, Any]] = {
+    "docx": {
+        "flags": ("--docx",),
+        "kwargs": {
+            "type": str,
+            "help": "Direct DOCX file path",
+            "metavar": "PATH",
+        },
+    },
+    "from_json": {
+        "flags": ("--from-json",),
+        "kwargs": {
+            "type": str,
+            "help": "Build skill from extracted JSON",
+            "metavar": "FILE",
+        },
+    },
+}
+
+
+def add_word_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add all word command arguments to a parser.
+
+    Registers shared args (name, description, output, enhance-level, api-key,
+    dry-run, verbose, quiet, workflow args) via add_all_standard_arguments(),
+    then adds Word-specific args on top.
+
+    The default for --enhance-level is overridden to 0 (disabled) for Word.
+    """
+    # Shared universal args first
+    add_all_standard_arguments(parser)
+
+    # Override enhance-level default to 0 for Word
+    for action in parser._actions:
+        if hasattr(action, "dest") and action.dest == "enhance_level":
+            action.default = 0
+            action.help = (
+                "AI enhancement level (auto-detects API vs LOCAL mode): "
+                "0=disabled (default for Word), 1=SKILL.md only, 2=+architecture/config, 3=full enhancement. "
+                "Mode selection: uses API if ANTHROPIC_API_KEY is set, otherwise LOCAL (Claude Code)"
+            )
+
+    # Word-specific args
+    for arg_name, arg_def in WORD_ARGUMENTS.items():
+        flags = arg_def["flags"]
+        kwargs = arg_def["kwargs"]
+        parser.add_argument(*flags, **kwargs)
diff --git a/src/skill_seekers/cli/create_command.py b/src/skill_seekers/cli/create_command.py
index ac9badc..15e68a8 100644
--- a/src/skill_seekers/cli/create_command.py
+++ b/src/skill_seekers/cli/create_command.py
@@ -131,6 +131,8 @@ class CreateCommand:
             return self._route_local()
         elif self.source_info.type == "pdf":
             return self._route_pdf()
+        elif self.source_info.type == "word":
+            return self._route_word()
         elif self.source_info.type == "config":
             return self._route_config()
         else:
@@ -320,6 +322,29 @@ class CreateCommand:
         finally:
             sys.argv = original_argv
 
+    def _route_word(self) -> int:
+        """Route to Word document scraper (word_scraper.py)."""
+        from skill_seekers.cli import word_scraper
+
+        # Reconstruct argv for word_scraper
+        argv = ["word_scraper"]
+
+        # Add DOCX file
+        file_path = self.source_info.parsed["file_path"]
+        argv.extend(["--docx", file_path])
+
+        # Add universal arguments
+        self._add_common_args(argv)
+
+        # Call word_scraper with modified argv
+        logger.debug(f"Calling word_scraper with argv: {argv}")
+        original_argv = sys.argv
+        try:
+            sys.argv = argv
+            return word_scraper.main()
+        finally:
+            sys.argv = original_argv
+
     def _route_config(self) -> int:
         """Route to unified scraper for config files (unified_scraper.py)."""
         from skill_seekers.cli import unified_scraper
@@ -442,6 +467,7 @@ Examples:
   GitHub:   skill-seekers create facebook/react -p standard
   Local:    skill-seekers create ./my-project -p comprehensive
   PDF:      skill-seekers create tutorial.pdf --ocr
+  DOCX:     skill-seekers create document.docx
   Config:   skill-seekers create configs/react.json
 
 Source Auto-Detection:
@@ -449,6 +475,7 @@ Source Auto-Detection:
   • owner/repo → GitHub analysis
   • ./path → local codebase
   • file.pdf → PDF extraction
+  • file.docx → Word document extraction
   • file.json → multi-source config
 
 Progressive Help (13 → 120+ flags):
@@ -483,6 +510,9 @@ Common Workflows:
         "--help-local", action="store_true", help=argparse.SUPPRESS, dest="_help_local"
     )
     parser.add_argument("--help-pdf", action="store_true", help=argparse.SUPPRESS, dest="_help_pdf")
+    parser.add_argument(
+        "--help-word", action="store_true", help=argparse.SUPPRESS, dest="_help_word"
+    )
     parser.add_argument(
         "--help-config", action="store_true", help=argparse.SUPPRESS, dest="_help_config"
     )
@@ -532,6 +562,15 @@ Common Workflows:
         add_create_arguments(parser_pdf, mode="pdf")
         parser_pdf.print_help()
         return 0
+    elif args._help_word:
+        parser_word = argparse.ArgumentParser(
+            prog="skill-seekers create",
+            description="Create skill from Word document (.docx)",
+            formatter_class=argparse.RawDescriptionHelpFormatter,
+        )
+        add_create_arguments(parser_word, mode="word")
+        parser_word.print_help()
+        return 0
     elif args._help_config:
         parser_config = argparse.ArgumentParser(
             prog="skill-seekers create",
diff --git a/src/skill_seekers/cli/github_scraper.py b/src/skill_seekers/cli/github_scraper.py
index 3e1cc88..ebf10e2 100644
--- a/src/skill_seekers/cli/github_scraper.py
+++ b/src/skill_seekers/cli/github_scraper.py
@@ -1296,14 +1296,14 @@ Use this skill when you need to:
         closed_issues = [i for i in issues if i["state"] == "closed"]
 
         content += f"## Open Issues ({len(open_issues)})\n\n"
-        for issue in open_issues[:20]:
+        for issue in open_issues:
             labels = ", ".join(issue["labels"]) if issue["labels"] else "No labels"
             content += f"### #{issue['number']}: {issue['title']}\n"
             content += f"**Labels:** {labels} | **Created:** {issue['created_at'][:10]}\n"
             content += f"[View on GitHub]({issue['url']})\n\n"
 
         content += f"\n## Recently Closed Issues ({len(closed_issues)})\n\n"
-        for issue in closed_issues[:10]:
+        for issue in closed_issues:
             labels = ", ".join(issue["labels"]) if issue["labels"] else "No labels"
             content += f"### #{issue['number']}: {issue['title']}\n"
             content += f"**Labels:** {labels} | **Closed:** {issue['closed_at'][:10]}\n"
diff --git a/src/skill_seekers/cli/main.py b/src/skill_seekers/cli/main.py
index 7c9bf80..fb0a478 100644
--- a/src/skill_seekers/cli/main.py
+++ b/src/skill_seekers/cli/main.py
@@ -47,6 +47,7 @@ COMMAND_MODULES = {
     "scrape": "skill_seekers.cli.doc_scraper",
     "github": "skill_seekers.cli.github_scraper",
     "pdf": "skill_seekers.cli.pdf_scraper",
+    "word": "skill_seekers.cli.word_scraper",
     "unified": "skill_seekers.cli.unified_scraper",
     "enhance": "skill_seekers.cli.enhance_command",
     "enhance-status": "skill_seekers.cli.enhance_status",
diff --git a/src/skill_seekers/cli/parsers/__init__.py b/src/skill_seekers/cli/parsers/__init__.py
index 93143f8..d12c2a6 100644
--- a/src/skill_seekers/cli/parsers/__init__.py
+++ b/src/skill_seekers/cli/parsers/__init__.py
@@ -12,6 +12,7 @@ from .config_parser import ConfigParser
 from .scrape_parser import ScrapeParser
 from .github_parser import GitHubParser
 from .pdf_parser import PDFParser
+from .word_parser import WordParser
 from .unified_parser import UnifiedParser
 from .enhance_parser import EnhanceParser
 from .enhance_status_parser import EnhanceStatusParser
@@ -41,6 +42,7 @@ PARSERS = [
     EnhanceParser(),
     EnhanceStatusParser(),
     PDFParser(),
+    WordParser(),
     UnifiedParser(),
     EstimateParser(),
     InstallParser(),
diff --git a/src/skill_seekers/cli/parsers/word_parser.py b/src/skill_seekers/cli/parsers/word_parser.py
new file mode 100644
index 0000000..2f174f2
--- /dev/null
+++ b/src/skill_seekers/cli/parsers/word_parser.py
@@ -0,0 +1,32 @@
+"""Word document subcommand parser.
+
+Uses shared argument definitions from arguments.word to ensure
+consistency with the standalone word_scraper module.
+"""
+
+from .base import SubcommandParser
+from skill_seekers.cli.arguments.word import add_word_arguments
+
+
+class WordParser(SubcommandParser):
+    """Parser for word subcommand."""
+
+    @property
+    def name(self) -> str:
+        return "word"
+
+    @property
+    def help(self) -> str:
+        return "Extract from Word document (.docx)"
+
+    @property
+    def description(self) -> str:
+        return "Extract content from Word document (.docx) and generate skill"
+
+    def add_arguments(self, parser):
+        """Add word-specific arguments.
+
+        Uses shared argument definitions to ensure consistency
+        with word_scraper.py (standalone scraper).
+        """
+        add_word_arguments(parser)
diff --git a/src/skill_seekers/cli/pdf_scraper.py b/src/skill_seekers/cli/pdf_scraper.py
index 9ffd60f..7328ea4 100644
--- a/src/skill_seekers/cli/pdf_scraper.py
+++ b/src/skill_seekers/cli/pdf_scraper.py
@@ -319,7 +319,7 @@ class PDFToSkillConverter:
                 code_list = page.get("code_samples") or page.get("code_blocks")
                 if code_list:
                     f.write("### Code Examples\n\n")
-                    for code in code_list[:3]:  # Limit to top 3
+                    for code in code_list:
                         lang = code.get("language", "")
                         f.write(f"```{lang}\n{code['code']}\n```\n\n")
 
@@ -721,21 +721,44 @@ def main():
         # ═══════════════════════════════════════════════════════════════════════════
         # Traditional Enhancement (complements workflow system)
         # ═══════════════════════════════════════════════════════════════════════════
-        # Note: Runs independently of workflow system (they complement each other)
         if getattr(args, "enhance_level", 0) > 0:
-            # Traditional AI enhancement (API or LOCAL mode)
+            import os
+
+            api_key = getattr(args, "api_key", None) or os.environ.get("ANTHROPIC_API_KEY")
+            mode = "API" if api_key else "LOCAL"
+
             print("\n" + "=" * 80)
-            print("🤖 Traditional AI Enhancement")
+            print(f"🤖 Traditional AI Enhancement ({mode} mode, level {args.enhance_level})")
             print("=" * 80)
             if workflow_executed:
                 print(f"   Running after workflow: {workflow_name}")
                 print(
                     "   (Workflow provides specialized analysis, enhancement provides general improvements)"
                 )
-            print("   (Use --enhance-workflow for more control)")
             print("")
-            # Note: PDF scraper uses enhance_level instead of enhance/enhance_local
-            # This is consistent with the new unified enhancement system
+
+            skill_dir = converter.skill_dir
+            if api_key:
+                try:
+                    from skill_seekers.cli.enhance_skill import enhance_skill_md
+
+                    enhance_skill_md(skill_dir, api_key)
+                    print("✅ API enhancement complete!")
+                except ImportError:
+                    print("❌ API enhancement not available. Falling back to LOCAL mode...")
+                    from pathlib import Path
+                    from skill_seekers.cli.enhance_skill_local import LocalSkillEnhancer
+
+                    enhancer = LocalSkillEnhancer(Path(skill_dir))
+                    enhancer.run(headless=True)
+                    print("✅ Local enhancement complete!")
+            else:
+                from pathlib import Path
+                from skill_seekers.cli.enhance_skill_local import LocalSkillEnhancer
+
+                enhancer = LocalSkillEnhancer(Path(skill_dir))
+                enhancer.run(headless=True)
+                print("✅ Local enhancement complete!")
 
     except RuntimeError as e:
         print(f"\n❌ Error: {e}", file=sys.stderr)
diff --git a/src/skill_seekers/cli/source_detector.py b/src/skill_seekers/cli/source_detector.py
index 8f98408..7f2397c 100644
--- a/src/skill_seekers/cli/source_detector.py
+++ b/src/skill_seekers/cli/source_detector.py
@@ -60,6 +60,9 @@ class SourceDetector:
         if source.endswith(".pdf"):
             return cls._detect_pdf(source)
 
+        if source.endswith(".docx"):
+            return cls._detect_word(source)
+
         # 2. Directory detection
         if os.path.isdir(source):
             return cls._detect_local(source)
@@ -85,6 +88,7 @@ class SourceDetector:
             "  GitHub: skill-seekers create facebook/react\n"
             "  Local:  skill-seekers create ./my-project\n"
             "  PDF:    skill-seekers create tutorial.pdf\n"
+            "  DOCX:   skill-seekers create document.docx\n"
             "  Config: skill-seekers create configs/react.json"
         )
 
@@ -104,6 +108,14 @@ class SourceDetector:
             type="pdf", parsed={"file_path": source}, suggested_name=name, raw_input=source
         )
 
+    @classmethod
+    def _detect_word(cls, source: str) -> SourceInfo:
+        """Detect Word document (.docx) source."""
+        name = os.path.splitext(os.path.basename(source))[0]
+        return SourceInfo(
+            type="word", parsed={"file_path": source}, suggested_name=name, raw_input=source
+        )
+
     @classmethod
     def _detect_local(cls, source: str) -> SourceInfo:
         """Detect local directory source."""
@@ -190,6 +202,13 @@ class SourceDetector:
             if not os.path.isfile(file_path):
                 raise ValueError(f"Path is not a file: {file_path}")
 
+        elif source_info.type == "word":
+            file_path = source_info.parsed["file_path"]
+            if not os.path.exists(file_path):
+                raise ValueError(f"Word document does not exist: {file_path}")
+            if not os.path.isfile(file_path):
+                raise ValueError(f"Path is not a file: {file_path}")
+
         elif source_info.type == "config":
             config_path = source_info.parsed["config_path"]
             if not os.path.exists(config_path):
diff --git a/src/skill_seekers/cli/unified_scraper.py b/src/skill_seekers/cli/unified_scraper.py
index 7322a9f..092c218 100644
--- a/src/skill_seekers/cli/unified_scraper.py
+++ b/src/skill_seekers/cli/unified_scraper.py
@@ -73,11 +73,12 @@ class UnifiedScraper:
             "documentation": [],  # List of doc sources
             "github": [],  # List of github sources
             "pdf": [],  # List of pdf sources
+            "word": [],  # List of word sources
             "local": [],  # List of local sources (docs or code)
         }
 
         # Track source index for unique naming (multi-source support)
-        self._source_counters = {"documentation": 0, "github": 0, "pdf": 0, "local": 0}
+        self._source_counters = {"documentation": 0, "github": 0, "pdf": 0, "word": 0, "local": 0}
 
         # Output paths - cleaner organization
         self.name = self.config["name"]
@@ -151,6 +152,8 @@ class UnifiedScraper:
                     self._scrape_github(source)
                 elif source_type == "pdf":
                     self._scrape_pdf(source)
+                elif source_type == "word":
+                    self._scrape_word(source)
                 elif source_type == "local":
                     self._scrape_local(source)
                 else:
@@ -514,6 +517,65 @@ class UnifiedScraper:
 
         logger.info(f"✅ PDF: {len(pdf_data.get('pages', []))} pages extracted")
 
+    def _scrape_word(self, source: dict[str, Any]):
+        """Scrape Word document (.docx)."""
+        try:
+            from skill_seekers.cli.word_scraper import WordToSkillConverter
+        except ImportError:
+            logger.error("word_scraper.py not found")
+            return
+
+        # Multi-source support: Get unique index for this Word source
+        idx = self._source_counters["word"]
+        self._source_counters["word"] += 1
+
+        # Extract Word identifier for unique naming (filename without extension)
+        docx_path = source["path"]
+        docx_id = os.path.splitext(os.path.basename(docx_path))[0]
+
+        # Create config for Word scraper
+        word_config = {
+            "name": f"{self.name}_word_{idx}_{docx_id}",
+            "docx_path": source["path"],
+            "description": f"{source.get('name', docx_id)} documentation",
+        }
+
+        # Scrape
+        logger.info(f"Scraping Word document: {source['path']}")
+        converter = WordToSkillConverter(word_config)
+
+        # Extract Word content
+        converter.extract_docx()
+
+        # Load extracted data from file
+        word_data_file = converter.data_file
+        with open(word_data_file, encoding="utf-8") as f:
+            word_data = json.load(f)
+
+        # Copy data file to cache
+        cache_word_data = os.path.join(self.data_dir, f"word_data_{idx}_{docx_id}.json")
+        shutil.copy(word_data_file, cache_word_data)
+
+        # Append to list
+        self.scraped_data["word"].append(
+            {
+                "docx_path": docx_path,
+                "docx_id": docx_id,
+                "idx": idx,
+                "data": word_data,
+                "data_file": cache_word_data,
+            }
+        )
+
+        # Build standalone SKILL.md for synthesis
+        try:
+            converter.build_skill()
+            logger.info("✅ Word: Standalone SKILL.md created")
+        except Exception as e:
+            logger.warning(f"⚠️  Failed to build standalone Word SKILL.md: {e}")
+
+        logger.info(f"✅ Word: {len(word_data.get('pages', []))} sections extracted")
+
     def _scrape_local(self, source: dict[str, Any]):
         """
         Scrape local directory (documentation files or source code).
diff --git a/src/skill_seekers/cli/word_scraper.py b/src/skill_seekers/cli/word_scraper.py
new file mode 100644
index 0000000..76d068f
--- /dev/null
+++ b/src/skill_seekers/cli/word_scraper.py
@@ -0,0 +1,1054 @@
+#!/usr/bin/env python3
+"""
+Word Document (.docx) to Claude Skill Converter (Task B2)
+
+Converts Word documents into Claude AI skills.
+Uses mammoth for HTML conversion and python-docx for metadata/tables.
+
+Usage:
+    python3 word_scraper.py --docx document.docx --name myskill
+    python3 word_scraper.py --from-json document_extracted.json
+"""
+
+import argparse
+import json
+import logging
+import os
+import re
+import sys
+from pathlib import Path
+
+# Optional dependency guard
+try:
+    import mammoth
+    import docx as python_docx
+
+    WORD_AVAILABLE = True
+except ImportError:
+    WORD_AVAILABLE = False
+
+logger = logging.getLogger(__name__)
+
+
+def _check_word_deps():
+    """Raise RuntimeError if mammoth/python-docx are not installed."""
+    if not WORD_AVAILABLE:
+        raise RuntimeError(
+            "mammoth and python-docx are required for Word document support.\n"
+            'Install with: pip install "skill-seekers[docx]"\n'
+            "Or: pip install mammoth python-docx"
+        )
+
+
+def infer_description_from_word(metadata: dict = None, name: str = "") -> str:
+    """Infer skill description from Word document metadata or name.
+
+    Args:
+        metadata: Document metadata dict with title, subject, etc.
+        name: Skill name for fallback
+
+    Returns:
+        Description string suitable for "Use when..." format
+    """
+    if metadata:
+        # Try subject field first
+        if metadata.get("subject"):
+            desc = str(metadata["subject"]).strip()
+            if len(desc) > 20:
+                if len(desc) > 150:
+                    desc = desc[:147] + "..."
+                return f"Use when {desc.lower()}"
+
+        # Try title if meaningful
+        if metadata.get("title"):
+            title = str(metadata["title"]).strip()
+            if len(title) > 10 and not title.lower().endswith(".docx"):
+                return f"Use when working with {title.lower()}"
+
+    return (
+        f"Use when referencing {name} documentation"
+        if name
+        else "Use when referencing this documentation"
+    )
+
+
+class WordToSkillConverter:
+    """Convert Word document (.docx) to Claude skill."""
+
+    def __init__(self, config):
+        self.config = config
+        self.name = config["name"]
+        self.docx_path = config.get("docx_path", "")
+        self.description = config.get("description") or f"Use when referencing {self.name} documentation"
+
+        # Paths
+        self.skill_dir = f"output/{self.name}"
+        self.data_file = f"output/{self.name}_extracted.json"
+
+        # Categories config
+        self.categories = config.get("categories", {})
+
+        # Extracted data
+        self.extracted_data = None
+
+    def extract_docx(self):
+        """Extract content from Word document using mammoth + python-docx.
+
+        - mammoth converts body content to HTML (leverages Word paragraph styles)
+        - python-docx provides metadata and fine-grained table access
+        - BeautifulSoup parses the HTML and splits by h1/h2 heading boundaries
+        - LanguageDetector identifies code language in  blocks
+        """
+        _check_word_deps()
+
+        from bs4 import BeautifulSoup
+        from skill_seekers.cli.language_detector import LanguageDetector
+
+        print(f"\n🔍 Extracting from Word document: {self.docx_path}")
+
+        if not os.path.exists(self.docx_path):
+            raise FileNotFoundError(f"Word document not found: {self.docx_path}")
+
+        # --- Extract metadata via python-docx ---
+        doc = python_docx.Document(self.docx_path)
+        core_props = doc.core_properties
+        metadata = {
+            "title": core_props.title or "",
+            "author": core_props.author or "",
+            "created": str(core_props.created) if core_props.created else "",
+            "modified": str(core_props.modified) if core_props.modified else "",
+            "subject": core_props.subject or "",
+        }
+
+        # Update description from metadata if not set explicitly
+        if not self.config.get("description"):
+            self.description = infer_description_from_word(metadata, self.name)
+
+        # --- Convert body to HTML with mammoth ---
+        with open(self.docx_path, "rb") as f:
+            result = mammoth.convert_to_html(f)
+
+        html_content = result.value
+
+        # --- Parse HTML with BeautifulSoup ---
+        soup = BeautifulSoup(html_content, "html.parser")
+
+        # --- Split by h1/h2 heading boundaries into sections ---
+        sections = []
+        current_heading = None
+        current_heading_level = None
+        current_elements = []
+        section_number = 0
+
+        def _flush_section():
+            nonlocal section_number
+            if current_heading is not None or current_elements:
+                section_number += 1
+                section = _build_section(
+                    section_number,
+                    current_heading,
+                    current_heading_level,
+                    current_elements,
+                    doc,
+                )
+                sections.append(section)
+
+        for elem in soup.children:
+            if not hasattr(elem, "name") or elem.name is None:
+                continue
+
+            if elem.name in ("h1", "h2"):
+                # Flush previous section
+                _flush_section()
+                current_heading = elem.get_text(strip=True)
+                current_heading_level = elem.name
+                current_elements = []
+            else:
+                current_elements.append(elem)
+
+        # Flush last section
+        _flush_section()
+
+        # If no sections were created (no headings), create one default section
+        if not sections:
+            section_number = 1
+            all_elements = [e for e in soup.children if hasattr(e, "name") and e.name]
+            section = _build_section(
+                1,
+                Path(self.docx_path).stem,
+                "h1",
+                all_elements,
+                doc,
+            )
+            sections = [section]
+
+        # --- Collect language statistics ---
+        detector = LanguageDetector(min_confidence=0.15)
+        languages_detected: dict[str, int] = {}
+        total_code_blocks = 0
+
+        for section in sections:
+            for code_sample in section.get("code_samples", []):
+                lang = code_sample.get("language", "")
+                if lang:
+                    languages_detected[lang] = languages_detected.get(lang, 0) + 1
+                total_code_blocks += 1
+
+        # Detect languages for samples without language
+        for section in sections:
+            for code_sample in section.get("code_samples", []):
+                if not code_sample.get("language"):
+                    code = code_sample.get("code", "")
+                    if code:
+                        lang, confidence = detector.detect_from_code(code)
+                        if lang and confidence >= 0.3:
+                            code_sample["language"] = lang
+                            languages_detected[lang] = languages_detected.get(lang, 0) + 1
+
+        result_data = {
+            "source_file": self.docx_path,
+            "metadata": metadata,
+            "total_sections": len(sections),
+            "total_code_blocks": total_code_blocks,
+            "total_images": sum(len(s.get("images", [])) for s in sections),
+            "languages_detected": languages_detected,
+            "pages": sections,  # "pages" key for pipeline compatibility
+        }
+
+        # Save extracted data
+        os.makedirs(os.path.dirname(self.data_file), exist_ok=True)
+        with open(self.data_file, "w", encoding="utf-8") as f:
+            json.dump(result_data, f, indent=2, ensure_ascii=False, default=str)
+
+        print(f"\n💾 Saved extracted data to: {self.data_file}")
+        self.extracted_data = result_data
+        print(
+            f"✅ Extracted {len(sections)} sections, "
+            f"{total_code_blocks} code blocks, "
+            f"{result_data['total_images']} images"
+        )
+        return True
+
+    def load_extracted_data(self, json_path):
+        """Load previously extracted data from JSON."""
+        print(f"\n📂 Loading extracted data from: {json_path}")
+        with open(json_path, encoding="utf-8") as f:
+            self.extracted_data = json.load(f)
+        total = self.extracted_data.get("total_sections", len(self.extracted_data.get("pages", [])))
+        print(f"✅ Loaded {total} sections")
+        return True
+
+    def categorize_content(self):
+        """Categorize sections based on headings or keywords."""
+        print("\n📋 Categorizing content...")
+
+        categorized = {}
+        sections = self.extracted_data.get("pages", [])
+
+        # For single Word source, use single category with all sections
+        if self.docx_path:
+            docx_basename = Path(self.docx_path).stem
+            category_key = self._sanitize_filename(docx_basename)
+            categorized[category_key] = {
+                "title": docx_basename,
+                "pages": sections,
+            }
+            print("✅ Created 1 category (single Word source)")
+            print(f"   - {docx_basename}: {len(sections)} sections")
+            return categorized
+
+        # Keyword-based categorization (multi-source scenario)
+        if self.categories:
+            first_value = next(iter(self.categories.values()), None)
+            if isinstance(first_value, list) and first_value and isinstance(first_value[0], dict):
+                # Already categorized format
+                for cat_key, pages in self.categories.items():
+                    categorized[cat_key] = {
+                        "title": cat_key.replace("_", " ").title(),
+                        "pages": pages,
+                    }
+            else:
+                # Keyword-based categorization
+                for cat_key in self.categories:
+                    categorized[cat_key] = {
+                        "title": cat_key.replace("_", " ").title(),
+                        "pages": [],
+                    }
+
+                for section in sections:
+                    text = section.get("text", "").lower()
+                    heading_text = section.get("heading", "").lower()
+
+                    scores = {}
+                    for cat_key, keywords in self.categories.items():
+                        if isinstance(keywords, list):
+                            score = sum(
+                                1
+                                for kw in keywords
+                                if isinstance(kw, str)
+                                and (kw.lower() in text or kw.lower() in heading_text)
+                            )
+                        else:
+                            score = 0
+                        if score > 0:
+                            scores[cat_key] = score
+
+                    if scores:
+                        best_cat = max(scores, key=scores.get)
+                        categorized[best_cat]["pages"].append(section)
+                    else:
+                        if "other" not in categorized:
+                            categorized["other"] = {"title": "Other", "pages": []}
+                        categorized["other"]["pages"].append(section)
+        else:
+            # No categorization - single category
+            categorized["content"] = {"title": "Content", "pages": sections}
+
+        print(f"✅ Created {len(categorized)} categories")
+        for _cat_key, cat_data in categorized.items():
+            print(f"   - {cat_data['title']}: {len(cat_data['pages'])} sections")
+
+        return categorized
+
+    def build_skill(self):
+        """Build complete skill structure."""
+        print(f"\n🏗️  Building skill: {self.name}")
+
+        # Create directories
+        os.makedirs(f"{self.skill_dir}/references", exist_ok=True)
+        os.makedirs(f"{self.skill_dir}/scripts", exist_ok=True)
+        os.makedirs(f"{self.skill_dir}/assets", exist_ok=True)
+
+        # Categorize content
+        categorized = self.categorize_content()
+
+        # Generate reference files
+        print("\n📝 Generating reference files...")
+        total_sections = len(categorized)
+        section_num = 1
+        for cat_key, cat_data in categorized.items():
+            self._generate_reference_file(cat_key, cat_data, section_num, total_sections)
+            section_num += 1
+
+        # Generate index
+        self._generate_index(categorized)
+
+        # Generate SKILL.md
+        self._generate_skill_md(categorized)
+
+        print(f"\n✅ Skill built successfully: {self.skill_dir}/")
+        print(f"\n📦 Next step: Package with: skill-seekers package {self.skill_dir}/")
+
+    def _generate_reference_file(self, _cat_key, cat_data, section_num, total_sections):
+        """Generate a reference markdown file for a category."""
+        sections = cat_data["pages"]
+
+        # Use docx basename for filename
+        docx_basename = ""
+        if self.docx_path:
+            docx_basename = Path(self.docx_path).stem
+
+        if sections:
+            section_nums = [s.get("section_number", i + 1) for i, s in enumerate(sections)]
+
+            if total_sections == 1:
+                filename = (
+                    f"{self.skill_dir}/references/{docx_basename}.md"
+                    if docx_basename
+                    else f"{self.skill_dir}/references/main.md"
+                )
+            else:
+                sec_range = f"s{min(section_nums)}-s{max(section_nums)}"
+                base_name = docx_basename if docx_basename else "section"
+                filename = f"{self.skill_dir}/references/{base_name}_{sec_range}.md"
+        else:
+            filename = f"{self.skill_dir}/references/section_{section_num:02d}.md"
+
+        with open(filename, "w", encoding="utf-8") as f:
+            f.write(f"# {cat_data['title']}\n\n")
+
+            for section in sections:
+                sec_num = section.get("section_number", "?")
+                heading = section.get("heading", "")
+                heading_level = section.get("heading_level", "h1")
+
+                f.write(f"---\n\n**📄 Source: Section {sec_num}**\n\n")
+
+                # Add heading
+                if heading:
+                    md_level = "#" * (int(heading_level[1]) + 1) if heading_level else "##"
+                    f.write(f"{md_level} {heading}\n\n")
+
+                # Add sub-headings (h3+) found within the section
+                for sub_heading in section.get("headings", []):
+                    sub_level = sub_heading.get("level", "h3")
+                    sub_text = sub_heading.get("text", "")
+                    if sub_text:
+                        sub_md = "#" * (int(sub_level[1]) + 1) if sub_level else "###"
+                        f.write(f"{sub_md} {sub_text}\n\n")
+
+                # Add text content
+                if section.get("text"):
+                    f.write(f"{section['text']}\n\n")
+
+                # Add code samples
+                code_list = section.get("code_samples", [])
+                if code_list:
+                    f.write("### Code Examples\n\n")
+                    for code in code_list:
+                        lang = code.get("language", "")
+                        f.write(f"```{lang}\n{code['code']}\n```\n\n")
+
+                # Add tables as markdown
+                tables = section.get("tables", [])
+                if tables:
+                    f.write("### Tables\n\n")
+                    for table in tables:
+                        headers = table.get("headers", [])
+                        rows = table.get("rows", [])
+                        if headers:
+                            f.write("| " + " | ".join(str(h) for h in headers) + " |\n")
+                            f.write("| " + " | ".join("---" for _ in headers) + " |\n")
+                        for row in rows:
+                            f.write("| " + " | ".join(str(c) for c in row) + " |\n")
+                        f.write("\n")
+
+                # Add images
+                images = section.get("images", [])
+                if images:
+                    assets_dir = os.path.join(self.skill_dir, "assets")
+                    os.makedirs(assets_dir, exist_ok=True)
+
+                    f.write("### Images\n\n")
+                    for img in images:
+                        img_index = img.get("index", 0)
+                        img_data = img.get("data", b"")
+                        img_filename = f"section_{sec_num}_img_{img_index}.png"
+                        img_path = os.path.join(assets_dir, img_filename)
+
+                        if isinstance(img_data, (bytes, bytearray)):
+                            with open(img_path, "wb") as img_file:
+                                img_file.write(img_data)
+                            f.write(f"![Image {img_index}](../assets/{img_filename})\n\n")
+
+                f.write("---\n\n")
+
+        print(f"   Generated: {filename}")
+
+    def _generate_index(self, categorized):
+        """Generate reference index."""
+        filename = f"{self.skill_dir}/references/index.md"
+
+        docx_basename = ""
+        if self.docx_path:
+            docx_basename = Path(self.docx_path).stem
+
+        total_sections = len(categorized)
+
+        with open(filename, "w", encoding="utf-8") as f:
+            f.write(f"# {self.name.title()} Documentation Reference\n\n")
+            f.write("## Categories\n\n")
+
+            section_num = 1
+            for _cat_key, cat_data in categorized.items():
+                sections = cat_data["pages"]
+                section_count = len(sections)
+
+                if sections:
+                    section_nums = [s.get("section_number", i + 1) for i, s in enumerate(sections)]
+                    sec_range_str = f"Sections {min(section_nums)}-{max(section_nums)}"
+
+                    if total_sections == 1:
+                        link_filename = f"{docx_basename}.md" if docx_basename else "main.md"
+                    else:
+                        sec_range = f"s{min(section_nums)}-s{max(section_nums)}"
+                        base_name = docx_basename if docx_basename else "section"
+                        link_filename = f"{base_name}_{sec_range}.md"
+                else:
+                    link_filename = f"section_{section_num:02d}.md"
+                    sec_range_str = "N/A"
+
+                f.write(
+                    f"- [{cat_data['title']}]({link_filename}) "
+                    f"({section_count} sections, {sec_range_str})\n"
+                )
+                section_num += 1
+
+            f.write("\n## Statistics\n\n")
+            f.write(f"- Total sections: {self.extracted_data.get('total_sections', 0)}\n")
+            f.write(f"- Code blocks: {self.extracted_data.get('total_code_blocks', 0)}\n")
+            f.write(f"- Images: {self.extracted_data.get('total_images', 0)}\n")
+
+            # Metadata
+            metadata = self.extracted_data.get("metadata", {})
+            if metadata.get("author"):
+                f.write(f"- Author: {metadata['author']}\n")
+            if metadata.get("created"):
+                f.write(f"- Created: {metadata['created']}\n")
+
+        print(f"   Generated: {filename}")
+
+    def _generate_skill_md(self, categorized):
+        """Generate main SKILL.md file."""
+        filename = f"{self.skill_dir}/SKILL.md"
+
+        skill_name = self.name.lower().replace("_", "-").replace(" ", "-")[:64]
+        desc = self.description[:1024] if len(self.description) > 1024 else self.description
+
+        with open(filename, "w", encoding="utf-8") as f:
+            # YAML frontmatter
+            f.write("---\n")
+            f.write(f"name: {skill_name}\n")
+            f.write(f"description: {desc}\n")
+            f.write("---\n\n")
+
+            f.write(f"# {self.name.title()} Documentation Skill\n\n")
+            f.write(f"{self.description}\n\n")
+
+            # Document metadata
+            metadata = self.extracted_data.get("metadata", {})
+            if any(metadata.values()):
+                f.write("## 📋 Document Information\n\n")
+                if metadata.get("title"):
+                    f.write(f"**Title:** {metadata['title']}\n\n")
+                if metadata.get("author"):
+                    f.write(f"**Author:** {metadata['author']}\n\n")
+                if metadata.get("created"):
+                    f.write(f"**Created:** {metadata['created']}\n\n")
+                if metadata.get("modified"):
+                    f.write(f"**Modified:** {metadata['modified']}\n\n")
+
+            # When to Use
+            f.write("## 💡 When to Use This Skill\n\n")
+            f.write("Use this skill when you need to:\n")
+            f.write(f"- Understand {self.name} concepts and fundamentals\n")
+            f.write("- Look up API references and technical specifications\n")
+            f.write("- Find code examples and implementation patterns\n")
+            f.write("- Review tutorials, guides, and best practices\n")
+            f.write("- Explore the complete documentation structure\n\n")
+
+            # Section Overview
+            total_sections = self.extracted_data.get("total_sections", 0)
+            f.write("## 📖 Section Overview\n\n")
+            f.write(f"**Total Sections:** {total_sections}\n\n")
+            f.write("**Content Breakdown:**\n\n")
+            for _cat_key, cat_data in categorized.items():
+                section_count = len(cat_data["pages"])
+                f.write(f"- **{cat_data['title']}**: {section_count} sections\n")
+            f.write("\n")
+
+            # Key Concepts from headings
+            f.write(self._format_key_concepts())
+
+            # Quick Reference patterns
+            f.write("## ⚡ Quick Reference\n\n")
+            f.write(self._format_patterns_from_content())
+
+            # Code examples (top 15, grouped by language)
+            all_code = []
+            for section in self.extracted_data.get("pages", []):
+                all_code.extend(section.get("code_samples", []))
+
+            all_code.sort(key=lambda x: x.get("quality_score", 0), reverse=True)
+            top_code = all_code[:15]
+
+            if top_code:
+                f.write("## 📝 Code Examples\n\n")
+                f.write("*High-quality examples extracted from documentation*\n\n")
+
+                by_lang: dict[str, list] = {}
+                for code in top_code:
+                    lang = code.get("language", "unknown")
+                    by_lang.setdefault(lang, []).append(code)
+
+                for lang in sorted(by_lang.keys()):
+                    examples = by_lang[lang]
+                    f.write(f"### {lang.title()} Examples ({len(examples)})\n\n")
+                    for i, code in enumerate(examples[:5], 1):
+                        quality = code.get("quality_score", 0)
+                        code_text = code.get("code", "")
+                        f.write(f"**Example {i}** (Quality: {quality:.1f}/10):\n\n")
+                        f.write(f"```{lang}\n")
+                        if len(code_text) <= 500:
+                            f.write(code_text)
+                        else:
+                            f.write(code_text[:500] + "\n...")
+                        f.write("\n```\n\n")
+
+            # Table Summary (first 5 tables)
+            all_tables = []
+            for section in self.extracted_data.get("pages", []):
+                for table in section.get("tables", []):
+                    all_tables.append((section.get("heading", ""), table))
+
+            if all_tables:
+                f.write("## 📊 Table Summary\n\n")
+                f.write(f"*{len(all_tables)} table(s) found in document*\n\n")
+                for section_heading, table in all_tables[:5]:
+                    if section_heading:
+                        f.write(f"**From section: {section_heading}**\n\n")
+                    headers = table.get("headers", [])
+                    rows = table.get("rows", [])
+                    if headers:
+                        f.write("| " + " | ".join(str(h) for h in headers) + " |\n")
+                        f.write("| " + " | ".join("---" for _ in headers) + " |\n")
+                        for row in rows[:5]:
+                            f.write("| " + " | ".join(str(c) for c in row) + " |\n")
+                        f.write("\n")
+
+            # Statistics
+            f.write("## 📊 Documentation Statistics\n\n")
+            f.write(f"- **Total Sections**: {total_sections}\n")
+            f.write(f"- **Code Blocks**: {self.extracted_data.get('total_code_blocks', 0)}\n")
+            f.write(f"- **Images/Diagrams**: {self.extracted_data.get('total_images', 0)}\n")
+            f.write(f"- **Tables**: {len(all_tables)}\n")
+
+            langs = self.extracted_data.get("languages_detected", {})
+            if langs:
+                f.write(f"- **Programming Languages**: {len(langs)}\n\n")
+                f.write("**Language Breakdown:**\n\n")
+                for lang, count in sorted(langs.items(), key=lambda x: x[1], reverse=True):
+                    f.write(f"- {lang}: {count} examples\n")
+                f.write("\n")
+
+            # Navigation
+            f.write("## 🗺️ Navigation\n\n")
+            f.write("**Reference Files:**\n\n")
+            for _cat_key, cat_data in categorized.items():
+                cat_file = self._sanitize_filename(cat_data["title"])
+                f.write(f"- `references/{cat_file}.md` - {cat_data['title']}\n")
+            f.write("\n")
+            f.write("See `references/index.md` for complete documentation structure.\n\n")
+
+            # Footer
+            f.write("---\n\n")
+            f.write("**Generated by Skill Seeker** | Word Document Scraper\n")
+
+        with open(filename, encoding="utf-8") as f:
+            line_count = len(f.read().split("\n"))
+        print(f"   Generated: {filename} ({line_count} lines)")
+
+    def _format_key_concepts(self) -> str:
+        """Extract key concepts from headings across all sections."""
+        all_headings = []
+        for section in self.extracted_data.get("pages", []):
+            # Main heading
+            heading = section.get("heading", "").strip()
+            level = section.get("heading_level", "h1")
+            if heading and len(heading) > 3:
+                all_headings.append((level, heading))
+            # Sub-headings
+            for sub in section.get("headings", []):
+                text = sub.get("text", "").strip()
+                sub_level = sub.get("level", "h3")
+                if text and len(text) > 3:
+                    all_headings.append((sub_level, text))
+
+        if not all_headings:
+            return ""
+
+        content = "## 🔑 Key Concepts\n\n"
+        content += "*Main topics covered in this documentation*\n\n"
+
+        h1_headings = [text for level, text in all_headings if level == "h1"]
+        h2_headings = [text for level, text in all_headings if level == "h2"]
+
+        if h1_headings:
+            content += "**Major Topics:**\n\n"
+            for heading in h1_headings[:10]:
+                content += f"- {heading}\n"
+            content += "\n"
+
+        if h2_headings:
+            content += "**Subtopics:**\n\n"
+            for heading in h2_headings[:15]:
+                content += f"- {heading}\n"
+            content += "\n"
+
+        return content
+
+    def _format_patterns_from_content(self) -> str:
+        """Extract common patterns from text content."""
+        patterns = []
+        pattern_keywords = [
+            "getting started",
+            "installation",
+            "configuration",
+            "usage",
+            "api",
+            "examples",
+            "tutorial",
+            "guide",
+            "best practices",
+            "troubleshooting",
+            "faq",
+        ]
+
+        for section in self.extracted_data.get("pages", []):
+            heading_text = section.get("heading", "").lower()
+            sec_num = section.get("section_number", 0)
+
+            for keyword in pattern_keywords:
+                if keyword in heading_text:
+                    patterns.append(
+                        {
+                            "type": keyword.title(),
+                            "heading": section.get("heading", ""),
+                            "section": sec_num,
+                        }
+                    )
+                    break
+
+        if not patterns:
+            return "*See reference files for detailed content*\n\n"
+
+        content = "*Common documentation patterns found:*\n\n"
+        by_type: dict[str, list] = {}
+        for pattern in patterns:
+            ptype = pattern["type"]
+            by_type.setdefault(ptype, []).append(pattern)
+
+        for ptype in sorted(by_type.keys()):
+            items = by_type[ptype]
+            content += f"**{ptype}** ({len(items)} sections):\n"
+            for item in items[:3]:
+                content += f"- {item['heading']} (section {item['section']})\n"
+            content += "\n"
+
+        return content
+
+    def _sanitize_filename(self, name):
+        """Convert string to safe filename."""
+        safe = re.sub(r"[^\w\s-]", "", name.lower())
+        safe = re.sub(r"[-\s]+", "_", safe)
+        return safe
+
+
+# ---------------------------------------------------------------------------
+# HTML-to-sections helper (module-level for clarity)
+# ---------------------------------------------------------------------------
+
+def _build_section(
+    section_number: int,
+    heading: str | None,
+    heading_level: str | None,
+    elements: list,
+    doc,
+) -> dict:
+    """Build a section dict from a list of BeautifulSoup elements.
+
+    Args:
+        section_number: 1-based section index
+        heading: Heading text (or None for preamble)
+        heading_level: 'h1', 'h2', etc.
+        elements: List of BeautifulSoup Tag objects belonging to this section
+        doc: python-docx Document (used for table cross-reference, not currently used)
+
+    Returns:
+        Section dict compatible with the intermediate JSON format
+    """
+    text_parts = []
+    code_samples = []
+    tables = []
+    sub_headings = []
+    images = []
+
+    for elem in elements:
+        if not hasattr(elem, "name") or elem.name is None:
+            continue
+
+        tag = elem.name
+
+        # Sub-headings (h3, h4, h5, h6) within the section
+        if tag in ("h3", "h4", "h5", "h6"):
+            sub_text = elem.get_text(strip=True)
+            if sub_text:
+                sub_headings.append({"level": tag, "text": sub_text})
+            continue
+
+        # Code blocks
+        if tag == "pre" or (tag == "code" and elem.find_parent("pre") is None):
+            code_elem = elem.find("code") if tag == "pre" else elem
+            if code_elem:
+                code_text = code_elem.get_text()
+            else:
+                code_text = elem.get_text()
+
+            code_text = code_text.strip()
+            if code_text:
+                # Try to detect language from class attribute
+                classes = (code_elem or elem).get("class", [])
+                lang = ""
+                for cls in classes:
+                    if cls.startswith("language-") or cls.startswith("lang-"):
+                        lang = cls.split("-", 1)[1]
+                        break
+
+                quality_score = _score_code_quality(code_text)
+                code_samples.append(
+                    {"code": code_text, "language": lang, "quality_score": quality_score}
+                )
+            continue
+
+        # Tables
+        if tag == "table":
+            table_data = _extract_table_from_html(elem)
+            if table_data:
+                tables.append(table_data)
+            continue
+
+        # Images
+        if tag == "img":
+            # mammoth embeds images as data URIs; extract if present
+            src = elem.get("src", "")
+            if src.startswith("data:"):
+                import base64
+
+                try:
+                    header, b64data = src.split(",", 1)
+                    img_bytes = base64.b64decode(b64data)
+                    images.append(
+                        {
+                            "index": len(images),
+                            "data": img_bytes,
+                            "width": int(elem.get("width", 0) or 0),
+                            "height": int(elem.get("height", 0) or 0),
+                        }
+                    )
+                except Exception:
+                    pass
+            continue
+
+        # Detect code in 

elements that contain
tags (multi-line content) + # Mammoth renders monospace/Courier paragraphs as

with
— not

+        if tag == "p" and elem.find("br"):
+            raw_text = elem.get_text(separator="\n").strip()
+            # Exclude bullet-point / prose lists (•, *, -)
+            if raw_text and not re.search(r"^[•\-\*]\s", raw_text, re.MULTILINE):
+                if _score_code_quality(raw_text) >= 5.5:
+                    quality_score = _score_code_quality(raw_text)
+                    code_samples.append(
+                        {"code": raw_text, "language": "", "quality_score": quality_score}
+                    )
+                    continue
+
+        # Regular text/paragraph content
+        text = elem.get_text(separator=" ", strip=True)
+        if text:
+            text_parts.append(text)
+
+    return {
+        "section_number": section_number,
+        "heading": heading or "",
+        "heading_level": heading_level or "h1",
+        "text": "\n\n".join(text_parts),
+        "headings": sub_headings,
+        "code_samples": code_samples,
+        "tables": tables,
+        "images": images,
+    }
+
+
+def _extract_table_from_html(table_elem) -> dict | None:
+    """Extract headers and rows from a BeautifulSoup  element."""
+    headers = []
+    rows = []
+
+    # Try  first for headers
+    thead = table_elem.find("thead")
+    if thead:
+        header_row = thead.find("tr")
+        if header_row:
+            headers = [th.get_text(strip=True) for th in header_row.find_all(["th", "td"])]
+
+    # Body rows
+    tbody = table_elem.find("tbody") or table_elem
+    for row in tbody.find_all("tr"):
+        cells = [td.get_text(strip=True) for td in row.find_all(["td", "th"])]
+        # Skip the header row we already captured
+        if cells and cells != headers:
+            rows.append(cells)
+
+    # If no explicit thead, use first row as header
+    if not headers and rows:
+        headers = rows.pop(0)
+
+    if not headers and not rows:
+        return None
+
+    return {"headers": headers, "rows": rows}
+
+
+def _score_code_quality(code: str) -> float:
+    """Simple quality heuristic for code blocks (0-10 scale)."""
+    if not code:
+        return 0.0
+
+    score = 5.0
+    lines = code.strip().split("\n")
+    line_count = len(lines)
+
+    # More lines = more substantial
+    if line_count >= 10:
+        score += 2.0
+    elif line_count >= 5:
+        score += 1.0
+
+    # Has function/class definitions
+    if re.search(r"\b(def |class |function |func |fn )", code):
+        score += 1.5
+
+    # Has imports/require
+    if re.search(r"\b(import |from .+ import|require\(|#include|using )", code):
+        score += 0.5
+
+    # Has indentation (common in Python, JS, etc.)
+    if re.search(r"^    ", code, re.MULTILINE):
+        score += 0.5
+
+    # Has assignment, operators, or common code syntax
+    if re.search(r"[=:{}()\[\]]", code):
+        score += 0.3
+
+    # Very short snippets get penalized
+    if len(code) < 30:
+        score -= 2.0
+
+    return min(10.0, max(0.0, score))
+
+
+def main():
+    from .arguments.word import add_word_arguments
+
+    parser = argparse.ArgumentParser(
+        description="Convert Word document (.docx) to Claude skill",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+
+    add_word_arguments(parser)
+
+    args = parser.parse_args()
+
+    # Set logging level
+    if getattr(args, "quiet", False):
+        logging.getLogger().setLevel(logging.WARNING)
+    elif getattr(args, "verbose", False):
+        logging.getLogger().setLevel(logging.DEBUG)
+
+    # Handle --dry-run
+    if getattr(args, "dry_run", False):
+        source = getattr(args, "docx", None) or getattr(args, "from_json", None) or "(none)"
+        print(f"\n{'=' * 60}")
+        print("DRY RUN: Word Document Extraction")
+        print(f"{'=' * 60}")
+        print(f"Source:         {source}")
+        print(f"Name:           {getattr(args, 'name', None) or '(auto-detect)'}")
+        print(f"Enhance level:  {getattr(args, 'enhance_level', 0)}")
+        print(f"\n✅ Dry run complete")
+        return 0
+
+    # Validate inputs
+    if not (getattr(args, "docx", None) or getattr(args, "from_json", None)):
+        parser.error("Must specify --docx or --from-json")
+
+    # Build from JSON workflow
+    if getattr(args, "from_json", None):
+        name = Path(args.from_json).stem.replace("_extracted", "")
+        config = {
+            "name": getattr(args, "name", None) or name,
+            "description": getattr(args, "description", None) or f"Use when referencing {name} documentation",
+        }
+        try:
+            converter = WordToSkillConverter(config)
+            converter.load_extracted_data(args.from_json)
+            converter.build_skill()
+        except Exception as e:
+            print(f"\n❌ Error: {e}", file=sys.stderr)
+            sys.exit(1)
+        return 0
+
+    # Direct DOCX mode
+    if not getattr(args, "name", None):
+        # Auto-detect name from filename
+        args.name = Path(args.docx).stem
+
+    config = {
+        "name": args.name,
+        "docx_path": args.docx,
+        # Pass None so extract_docx() can infer from document metadata (subject/title)
+        "description": getattr(args, "description", None),
+    }
+    if getattr(args, "categories", None):
+        config["categories"] = args.categories
+
+    try:
+        converter = WordToSkillConverter(config)
+
+        # Extract
+        if not converter.extract_docx():
+            print("\n❌ Word extraction failed - see error above", file=sys.stderr)
+            sys.exit(1)
+
+        # Build skill
+        converter.build_skill()
+
+        # Enhancement Workflow Integration
+        from skill_seekers.cli.workflow_runner import run_workflows
+
+        workflow_executed, workflow_names = run_workflows(args)
+        workflow_name = ", ".join(workflow_names) if workflow_names else None
+
+        # Traditional enhancement (complements workflow system)
+        if getattr(args, "enhance_level", 0) > 0:
+            import os
+
+            api_key = getattr(args, "api_key", None) or os.environ.get("ANTHROPIC_API_KEY")
+            mode = "API" if api_key else "LOCAL"
+
+            print("\n" + "=" * 80)
+            print(f"🤖 Traditional AI Enhancement ({mode} mode, level {args.enhance_level})")
+            print("=" * 80)
+            if workflow_executed:
+                print(f"   Running after workflow: {workflow_name}")
+                print(
+                    "   (Workflow provides specialized analysis, enhancement provides general improvements)"
+                )
+            print("")
+
+            skill_dir = converter.skill_dir
+            if api_key:
+                try:
+                    from skill_seekers.cli.enhance_skill import enhance_skill_md
+
+                    enhance_skill_md(skill_dir, api_key)
+                    print("✅ API enhancement complete!")
+                except ImportError:
+                    print("❌ API enhancement not available. Falling back to LOCAL mode...")
+                    from pathlib import Path
+                    from skill_seekers.cli.enhance_skill_local import LocalSkillEnhancer
+
+                    enhancer = LocalSkillEnhancer(Path(skill_dir))
+                    enhancer.run(headless=True)
+                    print("✅ Local enhancement complete!")
+            else:
+                from pathlib import Path
+                from skill_seekers.cli.enhance_skill_local import LocalSkillEnhancer
+
+                enhancer = LocalSkillEnhancer(Path(skill_dir))
+                enhancer.run(headless=True)
+                print("✅ Local enhancement complete!")
+
+    except RuntimeError as e:
+        print(f"\n❌ Error: {e}", file=sys.stderr)
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n❌ Unexpected error during Word processing: {e}", file=sys.stderr)
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/test_cli_parsers.py b/tests/test_cli_parsers.py
index 8266010..57fafef 100644
--- a/tests/test_cli_parsers.py
+++ b/tests/test_cli_parsers.py
@@ -24,12 +24,12 @@ class TestParserRegistry:
 
     def test_all_parsers_registered(self):
         """Test that all parsers are registered."""
-        assert len(PARSERS) == 21, f"Expected 21 parsers, got {len(PARSERS)}"
+        assert len(PARSERS) == 22, f"Expected 22 parsers, got {len(PARSERS)}"
 
     def test_get_parser_names(self):
         """Test getting list of parser names."""
         names = get_parser_names()
-        assert len(names) == 21
+        assert len(names) == 22
         assert "scrape" in names
         assert "github" in names
         assert "package" in names
@@ -242,9 +242,9 @@ class TestBackwardCompatibility:
             assert cmd in names, f"Command '{cmd}' not found in parser registry!"
 
     def test_command_count_matches(self):
-        """Test that we have exactly 21 commands (includes new create and workflows commands)."""
-        assert len(PARSERS) == 21
-        assert len(get_parser_names()) == 21
+        """Test that we have exactly 22 commands (includes new create, workflows, and word commands)."""
+        assert len(PARSERS) == 22
+        assert len(get_parser_names()) == 22
 
 
 if __name__ == "__main__":
diff --git a/tests/test_word_scraper.py b/tests/test_word_scraper.py
new file mode 100644
index 0000000..72dc8c3
--- /dev/null
+++ b/tests/test_word_scraper.py
@@ -0,0 +1,677 @@
+#!/usr/bin/env python3
+"""
+Tests for Word Document Scraper (cli/word_scraper.py)
+
+Tests cover:
+- Config-based initialization
+- Direct DOCX path conversion
+- JSON-based workflow
+- Skill structure generation
+- Categorization
+- Code blocks handling
+- Tables handling
+- Image handling
+- Error handling
+- CLI argument parsing
+"""
+
+import json
+import shutil
+import tempfile
+import unittest
+from pathlib import Path
+
+try:
+    import mammoth  # noqa: F401
+    import docx as python_docx  # noqa: F401
+
+    WORD_AVAILABLE = True
+except ImportError:
+    WORD_AVAILABLE = False
+
+
+def _make_sample_extracted_data(num_sections=2, include_code=False, include_tables=False,
+                                include_images=False):
+    """Helper to build a minimal extracted_data dict for testing."""
+    mock_image_bytes = (
+        b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01"
+        b"\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\nIDATx\x9cc\x00\x01"
+        b"\x00\x00\x05\x00\x01\r\n-\xb4\x00\x00\x00\x00IEND\xaeB`\x82"
+    )
+
+    pages = []
+    for i in range(1, num_sections + 1):
+        section = {
+            "section_number": i,
+            "heading": f"Section {i}",
+            "heading_level": "h1",
+            "text": f"Content for section {i}.",
+            "headings": [],
+            "code_samples": [],
+            "tables": [],
+            "images": [],
+        }
+        if include_code:
+            section["code_samples"] = [
+                {"code": f"def hello_{i}():\n    return 'world'", "language": "python",
+                 "quality_score": 7.5}
+            ]
+        if include_tables:
+            section["tables"] = [
+                {"headers": ["Col A", "Col B"], "rows": [["val1", "val2"], ["val3", "val4"]]}
+            ]
+        if include_images:
+            section["images"] = [
+                {"index": 0, "data": mock_image_bytes, "width": 100, "height": 80}
+            ]
+        pages.append(section)
+
+    return {
+        "source_file": "test.docx",
+        "metadata": {"title": "Test Doc", "author": "Test Author", "created": "", "modified": "",
+                     "subject": ""},
+        "total_sections": num_sections,
+        "total_code_blocks": num_sections if include_code else 0,
+        "total_images": num_sections if include_images else 0,
+        "languages_detected": {"python": num_sections} if include_code else {},
+        "pages": pages,
+    }
+
+
+class TestWordToSkillConverterInit(unittest.TestCase):
+    """Test WordToSkillConverter initialization and basic functionality."""
+
+    def setUp(self):
+        if not WORD_AVAILABLE:
+            self.skipTest("mammoth and python-docx not installed")
+        from skill_seekers.cli.word_scraper import WordToSkillConverter
+        self.WordToSkillConverter = WordToSkillConverter
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        if hasattr(self, "temp_dir"):
+            shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_init_with_name_and_docx_path(self):
+        """Test initialization with name and docx path."""
+        config = {"name": "test_skill", "docx_path": "test.docx"}
+        converter = self.WordToSkillConverter(config)
+        self.assertEqual(converter.name, "test_skill")
+        self.assertEqual(converter.docx_path, "test.docx")
+
+    def test_init_with_full_config(self):
+        """Test initialization with full config."""
+        config = {
+            "name": "my_skill",
+            "docx_path": "docs/api.docx",
+            "description": "API documentation skill",
+        }
+        converter = self.WordToSkillConverter(config)
+        self.assertEqual(converter.name, "my_skill")
+        self.assertEqual(converter.description, "API documentation skill")
+
+    def test_init_requires_name(self):
+        """Test that missing 'name' field raises an error."""
+        with self.assertRaises((KeyError, TypeError)):
+            self.WordToSkillConverter({})
+
+    def test_default_description_uses_name(self):
+        """Test that default description is generated from name."""
+        config = {"name": "my_api", "docx_path": "api.docx"}
+        converter = self.WordToSkillConverter(config)
+        self.assertIn("my_api", converter.description)
+
+    def test_skill_dir_uses_name(self):
+        """Test that skill_dir is derived from name."""
+        config = {"name": "my_skill", "docx_path": "test.docx"}
+        converter = self.WordToSkillConverter(config)
+        self.assertIn("my_skill", converter.skill_dir)
+
+    def test_name_auto_detected_from_filename(self):
+        """Test name can be extracted from filename via infer_description_from_word."""
+        from skill_seekers.cli.word_scraper import infer_description_from_word
+        desc = infer_description_from_word({}, name="my_doc")
+        self.assertIn("my_doc", desc)
+
+
+class TestWordCategorization(unittest.TestCase):
+    """Test content categorization functionality."""
+
+    def setUp(self):
+        if not WORD_AVAILABLE:
+            self.skipTest("mammoth and python-docx not installed")
+        from skill_seekers.cli.word_scraper import WordToSkillConverter
+        self.WordToSkillConverter = WordToSkillConverter
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_single_docx_creates_single_category(self):
+        """With docx_path set, categorize_content creates a single category."""
+        config = {"name": "test", "docx_path": "test.docx"}
+        converter = self.WordToSkillConverter(config)
+        converter.extracted_data = _make_sample_extracted_data(num_sections=3)
+
+        categories = converter.categorize_content()
+
+        self.assertEqual(len(categories), 1)
+        # Category key is sanitized docx basename
+        self.assertIn("test", categories)
+        self.assertEqual(len(categories["test"]["pages"]), 3)
+
+    def test_keyword_based_categorization(self):
+        """Test keyword-based categorization without docx_path."""
+        config = {
+            "name": "test",
+            "docx_path": "",
+            "categories": {
+                "api": ["api", "reference"],
+                "guide": ["getting started", "tutorial"],
+            },
+        }
+        converter = self.WordToSkillConverter(config)
+        converter.docx_path = ""
+        converter.extracted_data = {
+            "pages": [
+                {"section_number": 1, "heading": "API Reference", "text": "api reference docs",
+                 "code_samples": [], "tables": [], "images": []},
+                {"section_number": 2, "heading": "Getting Started", "text": "getting started guide",
+                 "code_samples": [], "tables": [], "images": []},
+            ]
+        }
+
+        categories = converter.categorize_content()
+        self.assertIsInstance(categories, dict)
+        self.assertGreater(len(categories), 0)
+
+    def test_fallback_to_content_category(self):
+        """Without docx_path and no categories config, uses 'content' category."""
+        config = {"name": "test", "docx_path": ""}
+        converter = self.WordToSkillConverter(config)
+        converter.docx_path = ""
+        converter.extracted_data = _make_sample_extracted_data(num_sections=1)
+
+        categories = converter.categorize_content()
+        self.assertIsInstance(categories, dict)
+        self.assertGreater(len(categories), 0)
+
+
+class TestWordSkillBuilding(unittest.TestCase):
+    """Test skill structure generation."""
+
+    def setUp(self):
+        if not WORD_AVAILABLE:
+            self.skipTest("mammoth and python-docx not installed")
+        from skill_seekers.cli.word_scraper import WordToSkillConverter
+        self.WordToSkillConverter = WordToSkillConverter
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_build_skill_creates_directory_structure(self):
+        """build_skill creates required directory structure."""
+        config = {"name": "test_skill", "docx_path": "test.docx"}
+        converter = self.WordToSkillConverter(config)
+        converter.skill_dir = str(Path(self.temp_dir) / "test_skill")
+        converter.extracted_data = _make_sample_extracted_data()
+
+        converter.build_skill()
+
+        skill_dir = Path(self.temp_dir) / "test_skill"
+        self.assertTrue(skill_dir.exists())
+        self.assertTrue((skill_dir / "references").exists())
+        self.assertTrue((skill_dir / "scripts").exists())
+        self.assertTrue((skill_dir / "assets").exists())
+
+    def test_build_skill_creates_skill_md(self):
+        """build_skill creates SKILL.md with correct content."""
+        config = {
+            "name": "test_skill",
+            "docx_path": "test.docx",
+            "description": "Test description for docs",
+        }
+        converter = self.WordToSkillConverter(config)
+        converter.skill_dir = str(Path(self.temp_dir) / "test_skill")
+        converter.extracted_data = _make_sample_extracted_data()
+
+        converter.build_skill()
+
+        skill_md = Path(self.temp_dir) / "test_skill" / "SKILL.md"
+        self.assertTrue(skill_md.exists())
+
+        content = skill_md.read_text()
+        self.assertIn("test_skill", content)
+        self.assertIn("Test description for docs", content)
+
+    def test_build_skill_creates_reference_files(self):
+        """build_skill creates reference markdown files."""
+        config = {"name": "test_skill", "docx_path": "test.docx"}
+        converter = self.WordToSkillConverter(config)
+        converter.skill_dir = str(Path(self.temp_dir) / "test_skill")
+        converter.extracted_data = _make_sample_extracted_data(num_sections=2)
+
+        converter.build_skill()
+
+        refs_dir = Path(self.temp_dir) / "test_skill" / "references"
+        # Single-source: named after docx basename
+        self.assertTrue((refs_dir / "test.md").exists())
+        self.assertTrue((refs_dir / "index.md").exists())
+
+    def test_skill_md_has_yaml_frontmatter(self):
+        """SKILL.md starts with valid YAML frontmatter."""
+        config = {"name": "myskill", "docx_path": "doc.docx"}
+        converter = self.WordToSkillConverter(config)
+        converter.skill_dir = str(Path(self.temp_dir) / "myskill")
+        converter.extracted_data = _make_sample_extracted_data()
+
+        converter.build_skill()
+
+        skill_md = Path(self.temp_dir) / "myskill" / "SKILL.md"
+        content = skill_md.read_text()
+        self.assertTrue(content.startswith("---\n"))
+        self.assertIn("name:", content)
+        self.assertIn("description:", content)
+
+    def test_skill_md_includes_section_overview(self):
+        """SKILL.md includes a Section Overview."""
+        config = {"name": "test_skill", "docx_path": "test.docx"}
+        converter = self.WordToSkillConverter(config)
+        converter.skill_dir = str(Path(self.temp_dir) / "test_skill")
+        converter.extracted_data = _make_sample_extracted_data(num_sections=3)
+
+        converter.build_skill()
+
+        skill_md = Path(self.temp_dir) / "test_skill" / "SKILL.md"
+        content = skill_md.read_text()
+        self.assertIn("Section Overview", content)
+        self.assertIn("Total Sections", content)
+
+
+class TestWordCodeBlocks(unittest.TestCase):
+    """Test code block extraction and inclusion."""
+
+    def setUp(self):
+        if not WORD_AVAILABLE:
+            self.skipTest("mammoth and python-docx not installed")
+        from skill_seekers.cli.word_scraper import WordToSkillConverter
+        self.WordToSkillConverter = WordToSkillConverter
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_code_blocks_included_in_references(self):
+        """Code blocks are included in reference files."""
+        config = {"name": "test_skill", "docx_path": "test.docx"}
+        converter = self.WordToSkillConverter(config)
+        converter.skill_dir = str(Path(self.temp_dir) / "test_skill")
+        converter.extracted_data = _make_sample_extracted_data(include_code=True)
+
+        converter.build_skill()
+
+        ref_file = Path(self.temp_dir) / "test_skill" / "references" / "test.md"
+        content = ref_file.read_text()
+        self.assertIn("```python", content)
+        self.assertIn("def hello_", content)
+
+    def test_code_examples_in_skill_md(self):
+        """SKILL.md includes code examples section when code is present."""
+        config = {"name": "test_skill", "docx_path": "test.docx"}
+        converter = self.WordToSkillConverter(config)
+        converter.skill_dir = str(Path(self.temp_dir) / "test_skill")
+        converter.extracted_data = _make_sample_extracted_data(include_code=True)
+
+        converter.build_skill()
+
+        skill_md = Path(self.temp_dir) / "test_skill" / "SKILL.md"
+        content = skill_md.read_text()
+        self.assertIn("Code Examples", content)
+
+    def test_language_detected_in_statistics(self):
+        """Language statistics are included in SKILL.md."""
+        config = {"name": "test_skill", "docx_path": "test.docx"}
+        converter = self.WordToSkillConverter(config)
+        converter.skill_dir = str(Path(self.temp_dir) / "test_skill")
+        converter.extracted_data = _make_sample_extracted_data(include_code=True)
+
+        converter.build_skill()
+
+        skill_md = Path(self.temp_dir) / "test_skill" / "SKILL.md"
+        content = skill_md.read_text()
+        self.assertIn("python", content)
+
+
+class TestWordTables(unittest.TestCase):
+    """Test table extraction and rendering."""
+
+    def setUp(self):
+        if not WORD_AVAILABLE:
+            self.skipTest("mammoth and python-docx not installed")
+        from skill_seekers.cli.word_scraper import WordToSkillConverter
+        self.WordToSkillConverter = WordToSkillConverter
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_tables_rendered_in_references(self):
+        """Tables are rendered as markdown tables in reference files."""
+        config = {"name": "test_skill", "docx_path": "test.docx"}
+        converter = self.WordToSkillConverter(config)
+        converter.skill_dir = str(Path(self.temp_dir) / "test_skill")
+        converter.extracted_data = _make_sample_extracted_data(include_tables=True)
+
+        converter.build_skill()
+
+        ref_file = Path(self.temp_dir) / "test_skill" / "references" / "test.md"
+        content = ref_file.read_text()
+        # Markdown table syntax
+        self.assertIn("| Col A |", content)
+        self.assertIn("| --- |", content)
+
+    def test_table_summary_in_skill_md(self):
+        """Table summary section appears in SKILL.md when tables exist."""
+        config = {"name": "test_skill", "docx_path": "test.docx"}
+        converter = self.WordToSkillConverter(config)
+        converter.skill_dir = str(Path(self.temp_dir) / "test_skill")
+        converter.extracted_data = _make_sample_extracted_data(include_tables=True)
+
+        converter.build_skill()
+
+        skill_md = Path(self.temp_dir) / "test_skill" / "SKILL.md"
+        content = skill_md.read_text()
+        self.assertIn("Table Summary", content)
+
+
+class TestWordImages(unittest.TestCase):
+    """Test image extraction and handling."""
+
+    def setUp(self):
+        if not WORD_AVAILABLE:
+            self.skipTest("mammoth and python-docx not installed")
+        from skill_seekers.cli.word_scraper import WordToSkillConverter
+        self.WordToSkillConverter = WordToSkillConverter
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_images_saved_to_assets(self):
+        """Images are saved to the assets/ directory."""
+        config = {"name": "test_skill", "docx_path": "test.docx"}
+        converter = self.WordToSkillConverter(config)
+        converter.skill_dir = str(Path(self.temp_dir) / "test_skill")
+        converter.extracted_data = _make_sample_extracted_data(include_images=True)
+
+        converter.build_skill()
+
+        assets_dir = Path(self.temp_dir) / "test_skill" / "assets"
+        png_files = list(assets_dir.glob("*.png"))
+        self.assertGreater(len(png_files), 0)
+
+    def test_image_references_in_markdown(self):
+        """Images are referenced with markdown syntax in reference files."""
+        config = {"name": "test_skill", "docx_path": "test.docx"}
+        converter = self.WordToSkillConverter(config)
+        converter.skill_dir = str(Path(self.temp_dir) / "test_skill")
+        converter.extracted_data = _make_sample_extracted_data(include_images=True)
+
+        converter.build_skill()
+
+        ref_file = Path(self.temp_dir) / "test_skill" / "references" / "test.md"
+        content = ref_file.read_text()
+        self.assertIn("![", content)
+        self.assertIn("../assets/", content)
+
+
+class TestWordErrorHandling(unittest.TestCase):
+    """Test error handling for invalid inputs."""
+
+    def setUp(self):
+        if not WORD_AVAILABLE:
+            self.skipTest("mammoth and python-docx not installed")
+        from skill_seekers.cli.word_scraper import WordToSkillConverter
+        self.WordToSkillConverter = WordToSkillConverter
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_missing_docx_file_raises_error(self):
+        """extract_docx raises FileNotFoundError for missing file."""
+        config = {"name": "test", "docx_path": "/nonexistent/path/test.docx"}
+        converter = self.WordToSkillConverter(config)
+        with self.assertRaises((FileNotFoundError, RuntimeError)):
+            converter.extract_docx()
+
+    def test_invalid_config_raises_error(self):
+        """Non-dict config raises TypeError or AttributeError."""
+        with self.assertRaises((TypeError, AttributeError)):
+            self.WordToSkillConverter("invalid string")
+
+    def test_missing_name_raises_key_error(self):
+        """Config without 'name' raises KeyError."""
+        with self.assertRaises((KeyError, TypeError)):
+            self.WordToSkillConverter({"docx_path": "test.docx"})
+
+
+class TestWordJSONWorkflow(unittest.TestCase):
+    """Test building skills from extracted JSON."""
+
+    def setUp(self):
+        if not WORD_AVAILABLE:
+            self.skipTest("mammoth and python-docx not installed")
+        from skill_seekers.cli.word_scraper import WordToSkillConverter
+        self.WordToSkillConverter = WordToSkillConverter
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_load_from_json(self):
+        """load_extracted_data loads the JSON correctly."""
+        extracted_data = _make_sample_extracted_data(num_sections=3)
+        json_path = Path(self.temp_dir) / "extracted.json"
+        json_path.write_text(json.dumps(extracted_data, indent=2))
+
+        config = {"name": "test_skill", "docx_path": "test.docx"}
+        converter = self.WordToSkillConverter(config)
+        converter.load_extracted_data(str(json_path))
+
+        self.assertEqual(converter.extracted_data["total_sections"], 3)
+        self.assertEqual(len(converter.extracted_data["pages"]), 3)
+
+    def test_build_from_json_without_extraction(self):
+        """JSON workflow skips extract_docx() and goes directly to build."""
+        extracted_data = _make_sample_extracted_data(num_sections=2)
+        json_path = Path(self.temp_dir) / "extracted.json"
+        json_path.write_text(json.dumps(extracted_data))
+
+        config = {"name": "test_skill", "docx_path": "test.docx"}
+        converter = self.WordToSkillConverter(config)
+        converter.load_extracted_data(str(json_path))
+
+        self.assertIsNotNone(converter.extracted_data)
+        self.assertEqual(len(converter.extracted_data["pages"]), 2)
+
+    def test_skill_built_from_json_has_skill_md(self):
+        """build_skill() works after load_extracted_data()."""
+        extracted_data = _make_sample_extracted_data(num_sections=2)
+        json_path = Path(self.temp_dir) / "extracted.json"
+        json_path.write_text(json.dumps(extracted_data))
+
+        config = {"name": "test_skill", "docx_path": "test.docx"}
+        converter = self.WordToSkillConverter(config)
+        converter.skill_dir = str(Path(self.temp_dir) / "test_skill")
+        converter.load_extracted_data(str(json_path))
+        converter.build_skill()
+
+        skill_md = Path(self.temp_dir) / "test_skill" / "SKILL.md"
+        self.assertTrue(skill_md.exists())
+
+
+class TestWordCLIArguments(unittest.TestCase):
+    """Test word subcommand CLI argument parsing via the main CLI."""
+
+    def setUp(self):
+        import sys
+        from pathlib import Path as P
+
+        sys.path.insert(0, str(P(__file__).parent.parent / "src"))
+        from skill_seekers.cli.main import create_parser
+
+        self.parser = create_parser()
+
+    def test_docx_argument_accepted(self):
+        """--docx flag is accepted for the word subcommand."""
+        args = self.parser.parse_args(["word", "--docx", "test.docx"])
+        self.assertEqual(args.docx, "test.docx")
+
+    def test_api_key_accepted(self):
+        """--api-key is accepted for word subcommand."""
+        args = self.parser.parse_args(["word", "--docx", "test.docx", "--api-key", "sk-ant-test"])
+        self.assertEqual(args.api_key, "sk-ant-test")
+
+    def test_enhance_level_accepted(self):
+        """--enhance-level is accepted for word subcommand."""
+        args = self.parser.parse_args(["word", "--docx", "test.docx", "--enhance-level", "1"])
+        self.assertEqual(args.enhance_level, 1)
+
+    def test_enhance_workflow_accepted(self):
+        """--enhance-workflow is accepted and stores a list."""
+        args = self.parser.parse_args(
+            ["word", "--docx", "test.docx", "--enhance-workflow", "minimal"]
+        )
+        self.assertEqual(args.enhance_workflow, ["minimal"])
+
+    def test_workflow_dry_run_accepted(self):
+        """--workflow-dry-run is accepted."""
+        args = self.parser.parse_args(["word", "--docx", "test.docx", "--workflow-dry-run"])
+        self.assertTrue(args.workflow_dry_run)
+
+    def test_dry_run_accepted(self):
+        """--dry-run is accepted for word subcommand."""
+        args = self.parser.parse_args(["word", "--docx", "test.docx", "--dry-run"])
+        self.assertTrue(args.dry_run)
+
+    def test_from_json_accepted(self):
+        """--from-json is accepted."""
+        args = self.parser.parse_args(["word", "--from-json", "data.json"])
+        self.assertEqual(args.from_json, "data.json")
+
+    def test_name_accepted(self):
+        """--name is accepted."""
+        args = self.parser.parse_args(["word", "--docx", "test.docx", "--name", "myskill"])
+        self.assertEqual(args.name, "myskill")
+
+
+class TestWordHelperFunctions(unittest.TestCase):
+    """Test module-level helper functions."""
+
+    def setUp(self):
+        if not WORD_AVAILABLE:
+            self.skipTest("mammoth and python-docx not installed")
+
+    def test_build_section_basic(self):
+        """_build_section returns a well-formed dict."""
+        from skill_seekers.cli.word_scraper import _build_section
+        from bs4 import BeautifulSoup
+
+        html = "

Hello world.

Second paragraph.

" + soup = BeautifulSoup(html, "html.parser") + elements = list(soup.children) + + section = _build_section(1, "Intro", "h1", elements, None) + + self.assertEqual(section["section_number"], 1) + self.assertEqual(section["heading"], "Intro") + self.assertEqual(section["heading_level"], "h1") + self.assertIn("Hello world", section["text"]) + + def test_extract_table_from_html(self): + """_extract_table_from_html extracts headers and rows.""" + from skill_seekers.cli.word_scraper import _extract_table_from_html + from bs4 import BeautifulSoup + + html = """ +
+ + + + + +
NameValue
foo1
bar2
""" + soup = BeautifulSoup(html, "html.parser") + table_elem = soup.find("table") + + result = _extract_table_from_html(table_elem) + + self.assertIsNotNone(result) + self.assertEqual(result["headers"], ["Name", "Value"]) + self.assertEqual(len(result["rows"]), 2) + self.assertIn(["foo", "1"], result["rows"]) + + def test_score_code_quality_basic(self): + """_score_code_quality returns a score in [0, 10].""" + from skill_seekers.cli.word_scraper import _score_code_quality + + score = _score_code_quality("def foo():\n return 'bar'\n") + self.assertGreaterEqual(score, 0.0) + self.assertLessEqual(score, 10.0) + + def test_score_code_quality_empty(self): + """_score_code_quality returns 0.0 for empty code.""" + from skill_seekers.cli.word_scraper import _score_code_quality + + self.assertEqual(_score_code_quality(""), 0.0) + + def test_infer_description_from_word_subject(self): + """infer_description_from_word uses subject field when available.""" + from skill_seekers.cli.word_scraper import infer_description_from_word + + metadata = {"title": "Some Doc", "subject": "Writing API documentation for REST services"} + desc = infer_description_from_word(metadata, "api_docs") + self.assertIn("writing api documentation", desc.lower()) + + def test_infer_description_from_word_fallback(self): + """infer_description_from_word falls back to name.""" + from skill_seekers.cli.word_scraper import infer_description_from_word + + desc = infer_description_from_word({}, name="myskill") + self.assertIn("myskill", desc) + + +class TestWordSourceDetection(unittest.TestCase): + """Test .docx source detection in SourceDetector.""" + + def test_docx_detected_as_word_type(self): + """SourceDetector.detect() returns type='word' for .docx files.""" + from skill_seekers.cli.source_detector import SourceDetector + + # Use a path that ends in .docx (doesn't need to exist for detection) + source_info = SourceDetector.detect("/tmp/test_document.docx") + self.assertEqual(source_info.type, "word") + self.assertEqual(source_info.parsed["file_path"], "/tmp/test_document.docx") + self.assertEqual(source_info.suggested_name, "test_document") + + def test_docx_validation_missing_file(self): + """validate_source raises ValueError for missing .docx file.""" + from skill_seekers.cli.source_detector import SourceDetector + + source_info = SourceDetector.detect("/tmp/nonexistent_12345.docx") + with self.assertRaises(ValueError) as ctx: + SourceDetector.validate_source(source_info) + self.assertIn("does not exist", str(ctx.exception)) + + def test_pdf_still_detected(self): + """Existing PDF detection is unaffected by Word support.""" + from skill_seekers.cli.source_detector import SourceDetector + + source_info = SourceDetector.detect("/tmp/test.pdf") + self.assertEqual(source_info.type, "pdf") + + +if __name__ == "__main__": + unittest.main() diff --git a/uv.lock b/uv.lock index ca708f7..6d7bf71 100644 --- a/uv.lock +++ b/uv.lock @@ -684,6 +684,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" }, ] +[[package]] +name = "cobble" +version = "0.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/7a/a507c709be2c96e1bb6102eb7b7f4026c5e5e223ef7d745a17d239e9d844/cobble-0.1.4.tar.gz", hash = "sha256:de38be1539992c8a06e569630717c485a5f91be2192c461ea2b220607dfa78aa", size = 3805, upload-time = "2024-06-01T18:11:09.528Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/e1/3714a2f371985215c219c2a70953d38e3eed81ef165aed061d21de0e998b/cobble-0.1.4-py3-none-any.whl", hash = "sha256:36c91b1655e599fd428e2b95fdd5f0da1ca2e9f1abb0bc871dec21a0e78a2b44", size = 3984, upload-time = "2024-06-01T18:11:07.911Z" }, +] + [[package]] name = "colorama" version = "0.4.6" @@ -2391,6 +2400,142 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/05/50/c5ccd2a50daa0a10c7f3f7d4e6992392454198cd8a7d99fcb96cb60d0686/llama_parse-0.6.54-py3-none-any.whl", hash = "sha256:c66c8d51cf6f29a44eaa8595a595de5d2598afc86e5a33a4cebe5fe228036920", size = 4879, upload-time = "2025-08-01T20:09:22.651Z" }, ] +[[package]] +name = "lxml" +version = "6.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/88/262177de60548e5a2bfc46ad28232c9e9cbde697bd94132aeb80364675cb/lxml-6.0.2.tar.gz", hash = "sha256:cd79f3367bd74b317dda655dc8fcfa304d9eb6e4fb06b7168c5cf27f96e0cd62", size = 4073426, upload-time = "2025-09-22T04:04:59.287Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/8a/f8192a08237ef2fb1b19733f709db88a4c43bc8ab8357f01cb41a27e7f6a/lxml-6.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e77dd455b9a16bbd2a5036a63ddbd479c19572af81b624e79ef422f929eef388", size = 8590589, upload-time = "2025-09-22T04:00:10.51Z" }, + { url = "https://files.pythonhosted.org/packages/12/64/27bcd07ae17ff5e5536e8d88f4c7d581b48963817a13de11f3ac3329bfa2/lxml-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d444858b9f07cefff6455b983aea9a67f7462ba1f6cbe4a21e8bf6791bf2153", size = 4629671, upload-time = "2025-09-22T04:00:15.411Z" }, + { url = "https://files.pythonhosted.org/packages/02/5a/a7d53b3291c324e0b6e48f3c797be63836cc52156ddf8f33cd72aac78866/lxml-6.0.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f952dacaa552f3bb8834908dddd500ba7d508e6ea6eb8c52eb2d28f48ca06a31", size = 4999961, upload-time = "2025-09-22T04:00:17.619Z" }, + { url = "https://files.pythonhosted.org/packages/f5/55/d465e9b89df1761674d8672bb3e4ae2c47033b01ec243964b6e334c6743f/lxml-6.0.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:71695772df6acea9f3c0e59e44ba8ac50c4f125217e84aab21074a1a55e7e5c9", size = 5157087, upload-time = "2025-09-22T04:00:19.868Z" }, + { url = "https://files.pythonhosted.org/packages/62/38/3073cd7e3e8dfc3ba3c3a139e33bee3a82de2bfb0925714351ad3d255c13/lxml-6.0.2-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:17f68764f35fd78d7c4cc4ef209a184c38b65440378013d24b8aecd327c3e0c8", size = 5067620, upload-time = "2025-09-22T04:00:21.877Z" }, + { url = "https://files.pythonhosted.org/packages/4a/d3/1e001588c5e2205637b08985597827d3827dbaaece16348c8822bfe61c29/lxml-6.0.2-cp310-cp310-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:058027e261afed589eddcfe530fcc6f3402d7fd7e89bfd0532df82ebc1563dba", size = 5406664, upload-time = "2025-09-22T04:00:23.714Z" }, + { url = "https://files.pythonhosted.org/packages/20/cf/cab09478699b003857ed6ebfe95e9fb9fa3d3c25f1353b905c9b73cfb624/lxml-6.0.2-cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8ffaeec5dfea5881d4c9d8913a32d10cfe3923495386106e4a24d45300ef79c", size = 5289397, upload-time = "2025-09-22T04:00:25.544Z" }, + { url = "https://files.pythonhosted.org/packages/a3/84/02a2d0c38ac9a8b9f9e5e1bbd3f24b3f426044ad618b552e9549ee91bd63/lxml-6.0.2-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:f2e3b1a6bb38de0bc713edd4d612969dd250ca8b724be8d460001a387507021c", size = 4772178, upload-time = "2025-09-22T04:00:27.602Z" }, + { url = "https://files.pythonhosted.org/packages/56/87/e1ceadcc031ec4aa605fe95476892d0b0ba3b7f8c7dcdf88fdeff59a9c86/lxml-6.0.2-cp310-cp310-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d6690ec5ec1cce0385cb20896b16be35247ac8c2046e493d03232f1c2414d321", size = 5358148, upload-time = "2025-09-22T04:00:29.323Z" }, + { url = "https://files.pythonhosted.org/packages/fe/13/5bb6cf42bb228353fd4ac5f162c6a84fd68a4d6f67c1031c8cf97e131fc6/lxml-6.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f2a50c3c1d11cad0ebebbac357a97b26aa79d2bcaf46f256551152aa85d3a4d1", size = 5112035, upload-time = "2025-09-22T04:00:31.061Z" }, + { url = "https://files.pythonhosted.org/packages/e4/e2/ea0498552102e59834e297c5c6dff8d8ded3db72ed5e8aad77871476f073/lxml-6.0.2-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:3efe1b21c7801ffa29a1112fab3b0f643628c30472d507f39544fd48e9549e34", size = 4799111, upload-time = "2025-09-22T04:00:33.11Z" }, + { url = "https://files.pythonhosted.org/packages/6a/9e/8de42b52a73abb8af86c66c969b3b4c2a96567b6ac74637c037d2e3baa60/lxml-6.0.2-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:59c45e125140b2c4b33920d21d83681940ca29f0b83f8629ea1a2196dc8cfe6a", size = 5351662, upload-time = "2025-09-22T04:00:35.237Z" }, + { url = "https://files.pythonhosted.org/packages/28/a2/de776a573dfb15114509a37351937c367530865edb10a90189d0b4b9b70a/lxml-6.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:452b899faa64f1805943ec1c0c9ebeaece01a1af83e130b69cdefeda180bb42c", size = 5314973, upload-time = "2025-09-22T04:00:37.086Z" }, + { url = "https://files.pythonhosted.org/packages/50/a0/3ae1b1f8964c271b5eec91db2043cf8c6c0bce101ebb2a633b51b044db6c/lxml-6.0.2-cp310-cp310-win32.whl", hash = "sha256:1e786a464c191ca43b133906c6903a7e4d56bef376b75d97ccbb8ec5cf1f0a4b", size = 3611953, upload-time = "2025-09-22T04:00:39.224Z" }, + { url = "https://files.pythonhosted.org/packages/d1/70/bd42491f0634aad41bdfc1e46f5cff98825fb6185688dc82baa35d509f1a/lxml-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:dacf3c64ef3f7440e3167aa4b49aa9e0fb99e0aa4f9ff03795640bf94531bcb0", size = 4032695, upload-time = "2025-09-22T04:00:41.402Z" }, + { url = "https://files.pythonhosted.org/packages/d2/d0/05c6a72299f54c2c561a6c6cbb2f512e047fca20ea97a05e57931f194ac4/lxml-6.0.2-cp310-cp310-win_arm64.whl", hash = "sha256:45f93e6f75123f88d7f0cfd90f2d05f441b808562bf0bc01070a00f53f5028b5", size = 3680051, upload-time = "2025-09-22T04:00:43.525Z" }, + { url = "https://files.pythonhosted.org/packages/77/d5/becbe1e2569b474a23f0c672ead8a29ac50b2dc1d5b9de184831bda8d14c/lxml-6.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:13e35cbc684aadf05d8711a5d1b5857c92e5e580efa9a0d2be197199c8def607", size = 8634365, upload-time = "2025-09-22T04:00:45.672Z" }, + { url = "https://files.pythonhosted.org/packages/28/66/1ced58f12e804644426b85d0bb8a4478ca77bc1761455da310505f1a3526/lxml-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b1675e096e17c6fe9c0e8c81434f5736c0739ff9ac6123c87c2d452f48fc938", size = 4650793, upload-time = "2025-09-22T04:00:47.783Z" }, + { url = "https://files.pythonhosted.org/packages/11/84/549098ffea39dfd167e3f174b4ce983d0eed61f9d8d25b7bf2a57c3247fc/lxml-6.0.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac6e5811ae2870953390452e3476694196f98d447573234592d30488147404d", size = 4944362, upload-time = "2025-09-22T04:00:49.845Z" }, + { url = "https://files.pythonhosted.org/packages/ac/bd/f207f16abf9749d2037453d56b643a7471d8fde855a231a12d1e095c4f01/lxml-6.0.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5aa0fc67ae19d7a64c3fe725dc9a1bb11f80e01f78289d05c6f62545affec438", size = 5083152, upload-time = "2025-09-22T04:00:51.709Z" }, + { url = "https://files.pythonhosted.org/packages/15/ae/bd813e87d8941d52ad5b65071b1affb48da01c4ed3c9c99e40abb266fbff/lxml-6.0.2-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:de496365750cc472b4e7902a485d3f152ecf57bd3ba03ddd5578ed8ceb4c5964", size = 5023539, upload-time = "2025-09-22T04:00:53.593Z" }, + { url = "https://files.pythonhosted.org/packages/02/cd/9bfef16bd1d874fbe0cb51afb00329540f30a3283beb9f0780adbb7eec03/lxml-6.0.2-cp311-cp311-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:200069a593c5e40b8f6fc0d84d86d970ba43138c3e68619ffa234bc9bb806a4d", size = 5344853, upload-time = "2025-09-22T04:00:55.524Z" }, + { url = "https://files.pythonhosted.org/packages/b8/89/ea8f91594bc5dbb879734d35a6f2b0ad50605d7fb419de2b63d4211765cc/lxml-6.0.2-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7d2de809c2ee3b888b59f995625385f74629707c9355e0ff856445cdcae682b7", size = 5225133, upload-time = "2025-09-22T04:00:57.269Z" }, + { url = "https://files.pythonhosted.org/packages/b9/37/9c735274f5dbec726b2db99b98a43950395ba3d4a1043083dba2ad814170/lxml-6.0.2-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:b2c3da8d93cf5db60e8858c17684c47d01fee6405e554fb55018dd85fc23b178", size = 4677944, upload-time = "2025-09-22T04:00:59.052Z" }, + { url = "https://files.pythonhosted.org/packages/20/28/7dfe1ba3475d8bfca3878365075abe002e05d40dfaaeb7ec01b4c587d533/lxml-6.0.2-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:442de7530296ef5e188373a1ea5789a46ce90c4847e597856570439621d9c553", size = 5284535, upload-time = "2025-09-22T04:01:01.335Z" }, + { url = "https://files.pythonhosted.org/packages/e7/cf/5f14bc0de763498fc29510e3532bf2b4b3a1c1d5d0dff2e900c16ba021ef/lxml-6.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2593c77efde7bfea7f6389f1ab249b15ed4aa5bc5cb5131faa3b843c429fbedb", size = 5067343, upload-time = "2025-09-22T04:01:03.13Z" }, + { url = "https://files.pythonhosted.org/packages/1c/b0/bb8275ab5472f32b28cfbbcc6db7c9d092482d3439ca279d8d6fa02f7025/lxml-6.0.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:3e3cb08855967a20f553ff32d147e14329b3ae70ced6edc2f282b94afbc74b2a", size = 4725419, upload-time = "2025-09-22T04:01:05.013Z" }, + { url = "https://files.pythonhosted.org/packages/25/4c/7c222753bc72edca3b99dbadba1b064209bc8ed4ad448af990e60dcce462/lxml-6.0.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2ed6c667fcbb8c19c6791bbf40b7268ef8ddf5a96940ba9404b9f9a304832f6c", size = 5275008, upload-time = "2025-09-22T04:01:07.327Z" }, + { url = "https://files.pythonhosted.org/packages/6c/8c/478a0dc6b6ed661451379447cdbec77c05741a75736d97e5b2b729687828/lxml-6.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b8f18914faec94132e5b91e69d76a5c1d7b0c73e2489ea8929c4aaa10b76bbf7", size = 5248906, upload-time = "2025-09-22T04:01:09.452Z" }, + { url = "https://files.pythonhosted.org/packages/2d/d9/5be3a6ab2784cdf9accb0703b65e1b64fcdd9311c9f007630c7db0cfcce1/lxml-6.0.2-cp311-cp311-win32.whl", hash = "sha256:6605c604e6daa9e0d7f0a2137bdc47a2e93b59c60a65466353e37f8272f47c46", size = 3610357, upload-time = "2025-09-22T04:01:11.102Z" }, + { url = "https://files.pythonhosted.org/packages/e2/7d/ca6fb13349b473d5732fb0ee3eec8f6c80fc0688e76b7d79c1008481bf1f/lxml-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e5867f2651016a3afd8dd2c8238baa66f1e2802f44bc17e236f547ace6647078", size = 4036583, upload-time = "2025-09-22T04:01:12.766Z" }, + { url = "https://files.pythonhosted.org/packages/ab/a2/51363b5ecd3eab46563645f3a2c3836a2fc67d01a1b87c5017040f39f567/lxml-6.0.2-cp311-cp311-win_arm64.whl", hash = "sha256:4197fb2534ee05fd3e7afaab5d8bfd6c2e186f65ea7f9cd6a82809c887bd1285", size = 3680591, upload-time = "2025-09-22T04:01:14.874Z" }, + { url = "https://files.pythonhosted.org/packages/f3/c8/8ff2bc6b920c84355146cd1ab7d181bc543b89241cfb1ebee824a7c81457/lxml-6.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a59f5448ba2ceccd06995c95ea59a7674a10de0810f2ce90c9006f3cbc044456", size = 8661887, upload-time = "2025-09-22T04:01:17.265Z" }, + { url = "https://files.pythonhosted.org/packages/37/6f/9aae1008083bb501ef63284220ce81638332f9ccbfa53765b2b7502203cf/lxml-6.0.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e8113639f3296706fbac34a30813929e29247718e88173ad849f57ca59754924", size = 4667818, upload-time = "2025-09-22T04:01:19.688Z" }, + { url = "https://files.pythonhosted.org/packages/f1/ca/31fb37f99f37f1536c133476674c10b577e409c0a624384147653e38baf2/lxml-6.0.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a8bef9b9825fa8bc816a6e641bb67219489229ebc648be422af695f6e7a4fa7f", size = 4950807, upload-time = "2025-09-22T04:01:21.487Z" }, + { url = "https://files.pythonhosted.org/packages/da/87/f6cb9442e4bada8aab5ae7e1046264f62fdbeaa6e3f6211b93f4c0dd97f1/lxml-6.0.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:65ea18d710fd14e0186c2f973dc60bb52039a275f82d3c44a0e42b43440ea534", size = 5109179, upload-time = "2025-09-22T04:01:23.32Z" }, + { url = "https://files.pythonhosted.org/packages/c8/20/a7760713e65888db79bbae4f6146a6ae5c04e4a204a3c48896c408cd6ed2/lxml-6.0.2-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c371aa98126a0d4c739ca93ceffa0fd7a5d732e3ac66a46e74339acd4d334564", size = 5023044, upload-time = "2025-09-22T04:01:25.118Z" }, + { url = "https://files.pythonhosted.org/packages/a2/b0/7e64e0460fcb36471899f75831509098f3fd7cd02a3833ac517433cb4f8f/lxml-6.0.2-cp312-cp312-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:700efd30c0fa1a3581d80a748157397559396090a51d306ea59a70020223d16f", size = 5359685, upload-time = "2025-09-22T04:01:27.398Z" }, + { url = "https://files.pythonhosted.org/packages/b9/e1/e5df362e9ca4e2f48ed6411bd4b3a0ae737cc842e96877f5bf9428055ab4/lxml-6.0.2-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c33e66d44fe60e72397b487ee92e01da0d09ba2d66df8eae42d77b6d06e5eba0", size = 5654127, upload-time = "2025-09-22T04:01:29.629Z" }, + { url = "https://files.pythonhosted.org/packages/c6/d1/232b3309a02d60f11e71857778bfcd4acbdb86c07db8260caf7d008b08f8/lxml-6.0.2-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90a345bbeaf9d0587a3aaffb7006aa39ccb6ff0e96a57286c0cb2fd1520ea192", size = 5253958, upload-time = "2025-09-22T04:01:31.535Z" }, + { url = "https://files.pythonhosted.org/packages/35/35/d955a070994725c4f7d80583a96cab9c107c57a125b20bb5f708fe941011/lxml-6.0.2-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:064fdadaf7a21af3ed1dcaa106b854077fbeada827c18f72aec9346847cd65d0", size = 4711541, upload-time = "2025-09-22T04:01:33.801Z" }, + { url = "https://files.pythonhosted.org/packages/1e/be/667d17363b38a78c4bd63cfd4b4632029fd68d2c2dc81f25ce9eb5224dd5/lxml-6.0.2-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fbc74f42c3525ac4ffa4b89cbdd00057b6196bcefe8bce794abd42d33a018092", size = 5267426, upload-time = "2025-09-22T04:01:35.639Z" }, + { url = "https://files.pythonhosted.org/packages/ea/47/62c70aa4a1c26569bc958c9ca86af2bb4e1f614e8c04fb2989833874f7ae/lxml-6.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6ddff43f702905a4e32bc24f3f2e2edfe0f8fde3277d481bffb709a4cced7a1f", size = 5064917, upload-time = "2025-09-22T04:01:37.448Z" }, + { url = "https://files.pythonhosted.org/packages/bd/55/6ceddaca353ebd0f1908ef712c597f8570cc9c58130dbb89903198e441fd/lxml-6.0.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6da5185951d72e6f5352166e3da7b0dc27aa70bd1090b0eb3f7f7212b53f1bb8", size = 4788795, upload-time = "2025-09-22T04:01:39.165Z" }, + { url = "https://files.pythonhosted.org/packages/cf/e8/fd63e15da5e3fd4c2146f8bbb3c14e94ab850589beab88e547b2dbce22e1/lxml-6.0.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:57a86e1ebb4020a38d295c04fc79603c7899e0df71588043eb218722dabc087f", size = 5676759, upload-time = "2025-09-22T04:01:41.506Z" }, + { url = "https://files.pythonhosted.org/packages/76/47/b3ec58dc5c374697f5ba37412cd2728f427d056315d124dd4b61da381877/lxml-6.0.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:2047d8234fe735ab77802ce5f2297e410ff40f5238aec569ad7c8e163d7b19a6", size = 5255666, upload-time = "2025-09-22T04:01:43.363Z" }, + { url = "https://files.pythonhosted.org/packages/19/93/03ba725df4c3d72afd9596eef4a37a837ce8e4806010569bedfcd2cb68fd/lxml-6.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6f91fd2b2ea15a6800c8e24418c0775a1694eefc011392da73bc6cef2623b322", size = 5277989, upload-time = "2025-09-22T04:01:45.215Z" }, + { url = "https://files.pythonhosted.org/packages/c6/80/c06de80bfce881d0ad738576f243911fccf992687ae09fd80b734712b39c/lxml-6.0.2-cp312-cp312-win32.whl", hash = "sha256:3ae2ce7d6fedfb3414a2b6c5e20b249c4c607f72cb8d2bb7cc9c6ec7c6f4e849", size = 3611456, upload-time = "2025-09-22T04:01:48.243Z" }, + { url = "https://files.pythonhosted.org/packages/f7/d7/0cdfb6c3e30893463fb3d1e52bc5f5f99684a03c29a0b6b605cfae879cd5/lxml-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:72c87e5ee4e58a8354fb9c7c84cbf95a1c8236c127a5d1b7683f04bed8361e1f", size = 4011793, upload-time = "2025-09-22T04:01:50.042Z" }, + { url = "https://files.pythonhosted.org/packages/ea/7b/93c73c67db235931527301ed3785f849c78991e2e34f3fd9a6663ffda4c5/lxml-6.0.2-cp312-cp312-win_arm64.whl", hash = "sha256:61cb10eeb95570153e0c0e554f58df92ecf5109f75eacad4a95baa709e26c3d6", size = 3672836, upload-time = "2025-09-22T04:01:52.145Z" }, + { url = "https://files.pythonhosted.org/packages/53/fd/4e8f0540608977aea078bf6d79f128e0e2c2bba8af1acf775c30baa70460/lxml-6.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9b33d21594afab46f37ae58dfadd06636f154923c4e8a4d754b0127554eb2e77", size = 8648494, upload-time = "2025-09-22T04:01:54.242Z" }, + { url = "https://files.pythonhosted.org/packages/5d/f4/2a94a3d3dfd6c6b433501b8d470a1960a20ecce93245cf2db1706adf6c19/lxml-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c8963287d7a4c5c9a432ff487c52e9c5618667179c18a204bdedb27310f022f", size = 4661146, upload-time = "2025-09-22T04:01:56.282Z" }, + { url = "https://files.pythonhosted.org/packages/25/2e/4efa677fa6b322013035d38016f6ae859d06cac67437ca7dc708a6af7028/lxml-6.0.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1941354d92699fb5ffe6ed7b32f9649e43c2feb4b97205f75866f7d21aa91452", size = 4946932, upload-time = "2025-09-22T04:01:58.989Z" }, + { url = "https://files.pythonhosted.org/packages/ce/0f/526e78a6d38d109fdbaa5049c62e1d32fdd70c75fb61c4eadf3045d3d124/lxml-6.0.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bb2f6ca0ae2d983ded09357b84af659c954722bbf04dea98030064996d156048", size = 5100060, upload-time = "2025-09-22T04:02:00.812Z" }, + { url = "https://files.pythonhosted.org/packages/81/76/99de58d81fa702cc0ea7edae4f4640416c2062813a00ff24bd70ac1d9c9b/lxml-6.0.2-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb2a12d704f180a902d7fa778c6d71f36ceb7b0d317f34cdc76a5d05aa1dd1df", size = 5019000, upload-time = "2025-09-22T04:02:02.671Z" }, + { url = "https://files.pythonhosted.org/packages/b5/35/9e57d25482bc9a9882cb0037fdb9cc18f4b79d85df94fa9d2a89562f1d25/lxml-6.0.2-cp313-cp313-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:6ec0e3f745021bfed19c456647f0298d60a24c9ff86d9d051f52b509663feeb1", size = 5348496, upload-time = "2025-09-22T04:02:04.904Z" }, + { url = "https://files.pythonhosted.org/packages/a6/8e/cb99bd0b83ccc3e8f0f528e9aa1f7a9965dfec08c617070c5db8d63a87ce/lxml-6.0.2-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:846ae9a12d54e368933b9759052d6206a9e8b250291109c48e350c1f1f49d916", size = 5643779, upload-time = "2025-09-22T04:02:06.689Z" }, + { url = "https://files.pythonhosted.org/packages/d0/34/9e591954939276bb679b73773836c6684c22e56d05980e31d52a9a8deb18/lxml-6.0.2-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef9266d2aa545d7374938fb5c484531ef5a2ec7f2d573e62f8ce722c735685fd", size = 5244072, upload-time = "2025-09-22T04:02:08.587Z" }, + { url = "https://files.pythonhosted.org/packages/8d/27/b29ff065f9aaca443ee377aff699714fcbffb371b4fce5ac4ca759e436d5/lxml-6.0.2-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:4077b7c79f31755df33b795dc12119cb557a0106bfdab0d2c2d97bd3cf3dffa6", size = 4718675, upload-time = "2025-09-22T04:02:10.783Z" }, + { url = "https://files.pythonhosted.org/packages/2b/9f/f756f9c2cd27caa1a6ef8c32ae47aadea697f5c2c6d07b0dae133c244fbe/lxml-6.0.2-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a7c5d5e5f1081955358533be077166ee97ed2571d6a66bdba6ec2f609a715d1a", size = 5255171, upload-time = "2025-09-22T04:02:12.631Z" }, + { url = "https://files.pythonhosted.org/packages/61/46/bb85ea42d2cb1bd8395484fd72f38e3389611aa496ac7772da9205bbda0e/lxml-6.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8f8d0cbd0674ee89863a523e6994ac25fd5be9c8486acfc3e5ccea679bad2679", size = 5057175, upload-time = "2025-09-22T04:02:14.718Z" }, + { url = "https://files.pythonhosted.org/packages/95/0c/443fc476dcc8e41577f0af70458c50fe299a97bb6b7505bb1ae09aa7f9ac/lxml-6.0.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:2cbcbf6d6e924c28f04a43f3b6f6e272312a090f269eff68a2982e13e5d57659", size = 4785688, upload-time = "2025-09-22T04:02:16.957Z" }, + { url = "https://files.pythonhosted.org/packages/48/78/6ef0b359d45bb9697bc5a626e1992fa5d27aa3f8004b137b2314793b50a0/lxml-6.0.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dfb874cfa53340009af6bdd7e54ebc0d21012a60a4e65d927c2e477112e63484", size = 5660655, upload-time = "2025-09-22T04:02:18.815Z" }, + { url = "https://files.pythonhosted.org/packages/ff/ea/e1d33808f386bc1339d08c0dcada6e4712d4ed8e93fcad5f057070b7988a/lxml-6.0.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:fb8dae0b6b8b7f9e96c26fdd8121522ce5de9bb5538010870bd538683d30e9a2", size = 5247695, upload-time = "2025-09-22T04:02:20.593Z" }, + { url = "https://files.pythonhosted.org/packages/4f/47/eba75dfd8183673725255247a603b4ad606f4ae657b60c6c145b381697da/lxml-6.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:358d9adae670b63e95bc59747c72f4dc97c9ec58881d4627fe0120da0f90d314", size = 5269841, upload-time = "2025-09-22T04:02:22.489Z" }, + { url = "https://files.pythonhosted.org/packages/76/04/5c5e2b8577bc936e219becb2e98cdb1aca14a4921a12995b9d0c523502ae/lxml-6.0.2-cp313-cp313-win32.whl", hash = "sha256:e8cd2415f372e7e5a789d743d133ae474290a90b9023197fd78f32e2dc6873e2", size = 3610700, upload-time = "2025-09-22T04:02:24.465Z" }, + { url = "https://files.pythonhosted.org/packages/fe/0a/4643ccc6bb8b143e9f9640aa54e38255f9d3b45feb2cbe7ae2ca47e8782e/lxml-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:b30d46379644fbfc3ab81f8f82ae4de55179414651f110a1514f0b1f8f6cb2d7", size = 4010347, upload-time = "2025-09-22T04:02:26.286Z" }, + { url = "https://files.pythonhosted.org/packages/31/ef/dcf1d29c3f530577f61e5fe2f1bd72929acf779953668a8a47a479ae6f26/lxml-6.0.2-cp313-cp313-win_arm64.whl", hash = "sha256:13dcecc9946dca97b11b7c40d29fba63b55ab4170d3c0cf8c0c164343b9bfdcf", size = 3671248, upload-time = "2025-09-22T04:02:27.918Z" }, + { url = "https://files.pythonhosted.org/packages/03/15/d4a377b385ab693ce97b472fe0c77c2b16ec79590e688b3ccc71fba19884/lxml-6.0.2-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:b0c732aa23de8f8aec23f4b580d1e52905ef468afb4abeafd3fec77042abb6fe", size = 8659801, upload-time = "2025-09-22T04:02:30.113Z" }, + { url = "https://files.pythonhosted.org/packages/c8/e8/c128e37589463668794d503afaeb003987373c5f94d667124ffd8078bbd9/lxml-6.0.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4468e3b83e10e0317a89a33d28f7aeba1caa4d1a6fd457d115dd4ffe90c5931d", size = 4659403, upload-time = "2025-09-22T04:02:32.119Z" }, + { url = "https://files.pythonhosted.org/packages/00/ce/74903904339decdf7da7847bb5741fc98a5451b42fc419a86c0c13d26fe2/lxml-6.0.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:abd44571493973bad4598a3be7e1d807ed45aa2adaf7ab92ab7c62609569b17d", size = 4966974, upload-time = "2025-09-22T04:02:34.155Z" }, + { url = "https://files.pythonhosted.org/packages/1f/d3/131dec79ce61c5567fecf82515bd9bc36395df42501b50f7f7f3bd065df0/lxml-6.0.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:370cd78d5855cfbffd57c422851f7d3864e6ae72d0da615fca4dad8c45d375a5", size = 5102953, upload-time = "2025-09-22T04:02:36.054Z" }, + { url = "https://files.pythonhosted.org/packages/3a/ea/a43ba9bb750d4ffdd885f2cd333572f5bb900cd2408b67fdda07e85978a0/lxml-6.0.2-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:901e3b4219fa04ef766885fb40fa516a71662a4c61b80c94d25336b4934b71c0", size = 5055054, upload-time = "2025-09-22T04:02:38.154Z" }, + { url = "https://files.pythonhosted.org/packages/60/23/6885b451636ae286c34628f70a7ed1fcc759f8d9ad382d132e1c8d3d9bfd/lxml-6.0.2-cp314-cp314-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:a4bf42d2e4cf52c28cc1812d62426b9503cdb0c87a6de81442626aa7d69707ba", size = 5352421, upload-time = "2025-09-22T04:02:40.413Z" }, + { url = "https://files.pythonhosted.org/packages/48/5b/fc2ddfc94ddbe3eebb8e9af6e3fd65e2feba4967f6a4e9683875c394c2d8/lxml-6.0.2-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2c7fdaa4d7c3d886a42534adec7cfac73860b89b4e5298752f60aa5984641a0", size = 5673684, upload-time = "2025-09-22T04:02:42.288Z" }, + { url = "https://files.pythonhosted.org/packages/29/9c/47293c58cc91769130fbf85531280e8cc7868f7fbb6d92f4670071b9cb3e/lxml-6.0.2-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98a5e1660dc7de2200b00d53fa00bcd3c35a3608c305d45a7bbcaf29fa16e83d", size = 5252463, upload-time = "2025-09-22T04:02:44.165Z" }, + { url = "https://files.pythonhosted.org/packages/9b/da/ba6eceb830c762b48e711ded880d7e3e89fc6c7323e587c36540b6b23c6b/lxml-6.0.2-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:dc051506c30b609238d79eda75ee9cab3e520570ec8219844a72a46020901e37", size = 4698437, upload-time = "2025-09-22T04:02:46.524Z" }, + { url = "https://files.pythonhosted.org/packages/a5/24/7be3f82cb7990b89118d944b619e53c656c97dc89c28cfb143fdb7cd6f4d/lxml-6.0.2-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8799481bbdd212470d17513a54d568f44416db01250f49449647b5ab5b5dccb9", size = 5269890, upload-time = "2025-09-22T04:02:48.812Z" }, + { url = "https://files.pythonhosted.org/packages/1b/bd/dcfb9ea1e16c665efd7538fc5d5c34071276ce9220e234217682e7d2c4a5/lxml-6.0.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9261bb77c2dab42f3ecd9103951aeca2c40277701eb7e912c545c1b16e0e4917", size = 5097185, upload-time = "2025-09-22T04:02:50.746Z" }, + { url = "https://files.pythonhosted.org/packages/21/04/a60b0ff9314736316f28316b694bccbbabe100f8483ad83852d77fc7468e/lxml-6.0.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:65ac4a01aba353cfa6d5725b95d7aed6356ddc0a3cd734de00124d285b04b64f", size = 4745895, upload-time = "2025-09-22T04:02:52.968Z" }, + { url = "https://files.pythonhosted.org/packages/d6/bd/7d54bd1846e5a310d9c715921c5faa71cf5c0853372adf78aee70c8d7aa2/lxml-6.0.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b22a07cbb82fea98f8a2fd814f3d1811ff9ed76d0fc6abc84eb21527596e7cc8", size = 5695246, upload-time = "2025-09-22T04:02:54.798Z" }, + { url = "https://files.pythonhosted.org/packages/fd/32/5643d6ab947bc371da21323acb2a6e603cedbe71cb4c99c8254289ab6f4e/lxml-6.0.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:d759cdd7f3e055d6bc8d9bec3ad905227b2e4c785dc16c372eb5b5e83123f48a", size = 5260797, upload-time = "2025-09-22T04:02:57.058Z" }, + { url = "https://files.pythonhosted.org/packages/33/da/34c1ec4cff1eea7d0b4cd44af8411806ed943141804ac9c5d565302afb78/lxml-6.0.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:945da35a48d193d27c188037a05fec5492937f66fb1958c24fc761fb9d40d43c", size = 5277404, upload-time = "2025-09-22T04:02:58.966Z" }, + { url = "https://files.pythonhosted.org/packages/82/57/4eca3e31e54dc89e2c3507e1cd411074a17565fa5ffc437c4ae0a00d439e/lxml-6.0.2-cp314-cp314-win32.whl", hash = "sha256:be3aaa60da67e6153eb15715cc2e19091af5dc75faef8b8a585aea372507384b", size = 3670072, upload-time = "2025-09-22T04:03:38.05Z" }, + { url = "https://files.pythonhosted.org/packages/e3/e0/c96cf13eccd20c9421ba910304dae0f619724dcf1702864fd59dd386404d/lxml-6.0.2-cp314-cp314-win_amd64.whl", hash = "sha256:fa25afbadead523f7001caf0c2382afd272c315a033a7b06336da2637d92d6ed", size = 4080617, upload-time = "2025-09-22T04:03:39.835Z" }, + { url = "https://files.pythonhosted.org/packages/d5/5d/b3f03e22b3d38d6f188ef044900a9b29b2fe0aebb94625ce9fe244011d34/lxml-6.0.2-cp314-cp314-win_arm64.whl", hash = "sha256:063eccf89df5b24e361b123e257e437f9e9878f425ee9aae3144c77faf6da6d8", size = 3754930, upload-time = "2025-09-22T04:03:41.565Z" }, + { url = "https://files.pythonhosted.org/packages/5e/5c/42c2c4c03554580708fc738d13414801f340c04c3eff90d8d2d227145275/lxml-6.0.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:6162a86d86893d63084faaf4ff937b3daea233e3682fb4474db07395794fa80d", size = 8910380, upload-time = "2025-09-22T04:03:01.645Z" }, + { url = "https://files.pythonhosted.org/packages/bf/4f/12df843e3e10d18d468a7557058f8d3733e8b6e12401f30b1ef29360740f/lxml-6.0.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:414aaa94e974e23a3e92e7ca5b97d10c0cf37b6481f50911032c69eeb3991bba", size = 4775632, upload-time = "2025-09-22T04:03:03.814Z" }, + { url = "https://files.pythonhosted.org/packages/e4/0c/9dc31e6c2d0d418483cbcb469d1f5a582a1cd00a1f4081953d44051f3c50/lxml-6.0.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48461bd21625458dd01e14e2c38dd0aea69addc3c4f960c30d9f59d7f93be601", size = 4975171, upload-time = "2025-09-22T04:03:05.651Z" }, + { url = "https://files.pythonhosted.org/packages/e7/2b/9b870c6ca24c841bdd887504808f0417aa9d8d564114689266f19ddf29c8/lxml-6.0.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:25fcc59afc57d527cfc78a58f40ab4c9b8fd096a9a3f964d2781ffb6eb33f4ed", size = 5110109, upload-time = "2025-09-22T04:03:07.452Z" }, + { url = "https://files.pythonhosted.org/packages/bf/0c/4f5f2a4dd319a178912751564471355d9019e220c20d7db3fb8307ed8582/lxml-6.0.2-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5179c60288204e6ddde3f774a93350177e08876eaf3ab78aa3a3649d43eb7d37", size = 5041061, upload-time = "2025-09-22T04:03:09.297Z" }, + { url = "https://files.pythonhosted.org/packages/12/64/554eed290365267671fe001a20d72d14f468ae4e6acef1e179b039436967/lxml-6.0.2-cp314-cp314t-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:967aab75434de148ec80597b75062d8123cadf2943fb4281f385141e18b21338", size = 5306233, upload-time = "2025-09-22T04:03:11.651Z" }, + { url = "https://files.pythonhosted.org/packages/7a/31/1d748aa275e71802ad9722df32a7a35034246b42c0ecdd8235412c3396ef/lxml-6.0.2-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d100fcc8930d697c6561156c6810ab4a508fb264c8b6779e6e61e2ed5e7558f9", size = 5604739, upload-time = "2025-09-22T04:03:13.592Z" }, + { url = "https://files.pythonhosted.org/packages/8f/41/2c11916bcac09ed561adccacceaedd2bf0e0b25b297ea92aab99fd03d0fa/lxml-6.0.2-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ca59e7e13e5981175b8b3e4ab84d7da57993eeff53c07764dcebda0d0e64ecd", size = 5225119, upload-time = "2025-09-22T04:03:15.408Z" }, + { url = "https://files.pythonhosted.org/packages/99/05/4e5c2873d8f17aa018e6afde417c80cc5d0c33be4854cce3ef5670c49367/lxml-6.0.2-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:957448ac63a42e2e49531b9d6c0fa449a1970dbc32467aaad46f11545be9af1d", size = 4633665, upload-time = "2025-09-22T04:03:17.262Z" }, + { url = "https://files.pythonhosted.org/packages/0f/c9/dcc2da1bebd6275cdc723b515f93edf548b82f36a5458cca3578bc899332/lxml-6.0.2-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b7fc49c37f1786284b12af63152fe1d0990722497e2d5817acfe7a877522f9a9", size = 5234997, upload-time = "2025-09-22T04:03:19.14Z" }, + { url = "https://files.pythonhosted.org/packages/9c/e2/5172e4e7468afca64a37b81dba152fc5d90e30f9c83c7c3213d6a02a5ce4/lxml-6.0.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e19e0643cc936a22e837f79d01a550678da8377d7d801a14487c10c34ee49c7e", size = 5090957, upload-time = "2025-09-22T04:03:21.436Z" }, + { url = "https://files.pythonhosted.org/packages/a5/b3/15461fd3e5cd4ddcb7938b87fc20b14ab113b92312fc97afe65cd7c85de1/lxml-6.0.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:1db01e5cf14345628e0cbe71067204db658e2fb8e51e7f33631f5f4735fefd8d", size = 4764372, upload-time = "2025-09-22T04:03:23.27Z" }, + { url = "https://files.pythonhosted.org/packages/05/33/f310b987c8bf9e61c4dd8e8035c416bd3230098f5e3cfa69fc4232de7059/lxml-6.0.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:875c6b5ab39ad5291588aed6925fac99d0097af0dd62f33c7b43736043d4a2ec", size = 5634653, upload-time = "2025-09-22T04:03:25.767Z" }, + { url = "https://files.pythonhosted.org/packages/70/ff/51c80e75e0bc9382158133bdcf4e339b5886c6ee2418b5199b3f1a61ed6d/lxml-6.0.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:cdcbed9ad19da81c480dfd6dd161886db6096083c9938ead313d94b30aadf272", size = 5233795, upload-time = "2025-09-22T04:03:27.62Z" }, + { url = "https://files.pythonhosted.org/packages/56/4d/4856e897df0d588789dd844dbed9d91782c4ef0b327f96ce53c807e13128/lxml-6.0.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:80dadc234ebc532e09be1975ff538d154a7fa61ea5031c03d25178855544728f", size = 5257023, upload-time = "2025-09-22T04:03:30.056Z" }, + { url = "https://files.pythonhosted.org/packages/0f/85/86766dfebfa87bea0ab78e9ff7a4b4b45225df4b4d3b8cc3c03c5cd68464/lxml-6.0.2-cp314-cp314t-win32.whl", hash = "sha256:da08e7bb297b04e893d91087df19638dc7a6bb858a954b0cc2b9f5053c922312", size = 3911420, upload-time = "2025-09-22T04:03:32.198Z" }, + { url = "https://files.pythonhosted.org/packages/fe/1a/b248b355834c8e32614650b8008c69ffeb0ceb149c793961dd8c0b991bb3/lxml-6.0.2-cp314-cp314t-win_amd64.whl", hash = "sha256:252a22982dca42f6155125ac76d3432e548a7625d56f5a273ee78a5057216eca", size = 4406837, upload-time = "2025-09-22T04:03:34.027Z" }, + { url = "https://files.pythonhosted.org/packages/92/aa/df863bcc39c5e0946263454aba394de8a9084dbaff8ad143846b0d844739/lxml-6.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:bb4c1847b303835d89d785a18801a883436cdfd5dc3d62947f9c49e24f0f5a2c", size = 3822205, upload-time = "2025-09-22T04:03:36.249Z" }, + { url = "https://files.pythonhosted.org/packages/e7/9c/780c9a8fce3f04690b374f72f41306866b0400b9d0fdf3e17aaa37887eed/lxml-6.0.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:e748d4cf8fef2526bb2a589a417eba0c8674e29ffcb570ce2ceca44f1e567bf6", size = 3939264, upload-time = "2025-09-22T04:04:32.892Z" }, + { url = "https://files.pythonhosted.org/packages/f5/5a/1ab260c00adf645d8bf7dec7f920f744b032f69130c681302821d5debea6/lxml-6.0.2-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4ddb1049fa0579d0cbd00503ad8c58b9ab34d1254c77bc6a5576d96ec7853dba", size = 4216435, upload-time = "2025-09-22T04:04:34.907Z" }, + { url = "https://files.pythonhosted.org/packages/f2/37/565f3b3d7ffede22874b6d86be1a1763d00f4ea9fc5b9b6ccb11e4ec8612/lxml-6.0.2-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cb233f9c95f83707dae461b12b720c1af9c28c2d19208e1be03387222151daf5", size = 4325913, upload-time = "2025-09-22T04:04:37.205Z" }, + { url = "https://files.pythonhosted.org/packages/22/ec/f3a1b169b2fb9d03467e2e3c0c752ea30e993be440a068b125fc7dd248b0/lxml-6.0.2-pp310-pypy310_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bc456d04db0515ce3320d714a1eac7a97774ff0849e7718b492d957da4631dd4", size = 4269357, upload-time = "2025-09-22T04:04:39.322Z" }, + { url = "https://files.pythonhosted.org/packages/77/a2/585a28fe3e67daa1cf2f06f34490d556d121c25d500b10082a7db96e3bcd/lxml-6.0.2-pp310-pypy310_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2613e67de13d619fd283d58bda40bff0ee07739f624ffee8b13b631abf33083d", size = 4412295, upload-time = "2025-09-22T04:04:41.647Z" }, + { url = "https://files.pythonhosted.org/packages/7b/d9/a57dd8bcebd7c69386c20263830d4fa72d27e6b72a229ef7a48e88952d9a/lxml-6.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:24a8e756c982c001ca8d59e87c80c4d9dcd4d9b44a4cbeb8d9be4482c514d41d", size = 3516913, upload-time = "2025-09-22T04:04:43.602Z" }, + { url = "https://files.pythonhosted.org/packages/0b/11/29d08bc103a62c0eba8016e7ed5aeebbf1e4312e83b0b1648dd203b0e87d/lxml-6.0.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1c06035eafa8404b5cf475bb37a9f6088b0aca288d4ccc9d69389750d5543700", size = 3949829, upload-time = "2025-09-22T04:04:45.608Z" }, + { url = "https://files.pythonhosted.org/packages/12/b3/52ab9a3b31e5ab8238da241baa19eec44d2ab426532441ee607165aebb52/lxml-6.0.2-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c7d13103045de1bdd6fe5d61802565f1a3537d70cd3abf596aa0af62761921ee", size = 4226277, upload-time = "2025-09-22T04:04:47.754Z" }, + { url = "https://files.pythonhosted.org/packages/a0/33/1eaf780c1baad88224611df13b1c2a9dfa460b526cacfe769103ff50d845/lxml-6.0.2-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0a3c150a95fbe5ac91de323aa756219ef9cf7fde5a3f00e2281e30f33fa5fa4f", size = 4330433, upload-time = "2025-09-22T04:04:49.907Z" }, + { url = "https://files.pythonhosted.org/packages/7a/c1/27428a2ff348e994ab4f8777d3a0ad510b6b92d37718e5887d2da99952a2/lxml-6.0.2-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:60fa43be34f78bebb27812ed90f1925ec99560b0fa1decdb7d12b84d857d31e9", size = 4272119, upload-time = "2025-09-22T04:04:51.801Z" }, + { url = "https://files.pythonhosted.org/packages/f0/d0/3020fa12bcec4ab62f97aab026d57c2f0cfd480a558758d9ca233bb6a79d/lxml-6.0.2-pp311-pypy311_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:21c73b476d3cfe836be731225ec3421fa2f048d84f6df6a8e70433dff1376d5a", size = 4417314, upload-time = "2025-09-22T04:04:55.024Z" }, + { url = "https://files.pythonhosted.org/packages/6c/77/d7f491cbc05303ac6801651aabeb262d43f319288c1ea96c66b1d2692ff3/lxml-6.0.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:27220da5be049e936c3aca06f174e8827ca6445a4353a1995584311487fc4e3e", size = 3518768, upload-time = "2025-09-22T04:04:57.097Z" }, +] + +[[package]] +name = "mammoth" +version = "1.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cobble" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ed/3c/a58418d2af00f2da60d4a51e18cd0311307b72d48d2fffec36a97b4a5e44/mammoth-1.11.0.tar.gz", hash = "sha256:a0f59e442f34d5b6447f4b0999306cbf3e67aaabfa8cb516f878fb1456744637", size = 53142, upload-time = "2025-09-19T10:35:20.373Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ca/54/2e39566a131b13f6d8d193f974cb6a34e81bb7cc2fa6f7e03de067b36588/mammoth-1.11.0-py2.py3-none-any.whl", hash = "sha256:c077ab0d450bd7c0c6ecd529a23bf7e0fa8190c929e28998308ff4eada3f063b", size = 54752, upload-time = "2025-09-19T10:35:18.699Z" }, +] + [[package]] name = "markdown-it-py" version = "4.0.0" @@ -4406,6 +4551,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, ] +[[package]] +name = "python-docx" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lxml" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a9/f7/eddfe33871520adab45aaa1a71f0402a2252050c14c7e3009446c8f4701c/python_docx-1.2.0.tar.gz", hash = "sha256:7bc9d7b7d8a69c9c02ca09216118c86552704edc23bac179283f2e38f86220ce", size = 5723256, upload-time = "2025-06-16T20:46:27.921Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/00/1e03a4989fa5795da308cd774f05b704ace555a70f9bf9d3be057b680bcf/python_docx-1.2.0-py3-none-any.whl", hash = "sha256:3fd478f3250fbbbfd3b94fe1e985955737c145627498896a8a6bf81f4baf66c7", size = 252987, upload-time = "2025-06-16T20:46:22.506Z" }, +] + [[package]] name = "python-dotenv" version = "1.2.1" @@ -5204,7 +5362,7 @@ wheels = [ [[package]] name = "skill-seekers" -version = "3.1.2" +version = "3.1.3" source = { editable = "." } dependencies = [ { name = "anthropic" }, @@ -5242,10 +5400,12 @@ all = [ { name = "google-generativeai" }, { name = "httpx" }, { name = "httpx-sse" }, + { name = "mammoth" }, { name = "mcp" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "openai" }, + { name = "python-docx" }, { name = "sentence-transformers" }, { name = "sse-starlette" }, { name = "starlette" }, @@ -5268,6 +5428,10 @@ azure = [ chroma = [ { name = "chromadb" }, ] +docx = [ + { name = "mammoth" }, + { name = "python-docx" }, +] embedding = [ { name = "fastapi" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, @@ -5357,6 +5521,8 @@ requires-dist = [ { name = "jsonschema", specifier = ">=4.25.1" }, { name = "langchain", specifier = ">=1.2.10" }, { name = "llama-index", specifier = ">=0.14.15" }, + { name = "mammoth", marker = "extra == 'all'", specifier = ">=1.6.0" }, + { name = "mammoth", marker = "extra == 'docx'", specifier = ">=1.6.0" }, { name = "mcp", marker = "extra == 'all'", specifier = ">=1.25,<2" }, { name = "mcp", marker = "extra == 'mcp'", specifier = ">=1.25,<2" }, { name = "networkx", specifier = ">=3.0" }, @@ -5373,6 +5539,8 @@ requires-dist = [ { name = "pygments", specifier = ">=2.19.2" }, { name = "pymupdf", specifier = ">=1.24.14" }, { name = "pytesseract", specifier = ">=0.3.13" }, + { name = "python-docx", marker = "extra == 'all'", specifier = ">=1.1.0" }, + { name = "python-docx", marker = "extra == 'docx'", specifier = ">=1.1.0" }, { name = "python-dotenv", specifier = ">=1.1.1" }, { name = "pyyaml", specifier = ">=6.0" }, { name = "requests", specifier = ">=2.32.5" }, @@ -5395,7 +5563,7 @@ requires-dist = [ { name = "weaviate-client", marker = "extra == 'rag-upload'", specifier = ">=3.25.0" }, { name = "weaviate-client", marker = "extra == 'weaviate'", specifier = ">=3.25.0" }, ] -provides-extras = ["mcp", "gemini", "openai", "all-llms", "s3", "gcs", "azure", "chroma", "weaviate", "sentence-transformers", "rag-upload", "all-cloud", "embedding", "all"] +provides-extras = ["mcp", "gemini", "openai", "all-llms", "s3", "gcs", "azure", "docx", "chroma", "weaviate", "sentence-transformers", "rag-upload", "all-cloud", "embedding", "all"] [package.metadata.requires-dev] dev = [