Implements ROADMAP task B2 — full .docx scraping support via mammoth + python-docx, producing SKILL.md + references/ output identical to other source types. New files: - src/skill_seekers/cli/word_scraper.py — WordToSkillConverter class + main() entry point (~600 lines); mammoth → BeautifulSoup pipeline; handles headings, code detection (incl. monospace <p><br> blocks), tables, images, metadata extraction - src/skill_seekers/cli/arguments/word.py — add_word_arguments() + WORD_ARGUMENTS dict - src/skill_seekers/cli/parsers/word_parser.py — WordParser for unified CLI parser registry - tests/test_word_scraper.py — comprehensive test suite (~300 lines) Modified files: - src/skill_seekers/cli/main.py — registered "word" command module - src/skill_seekers/cli/source_detector.py — .docx auto-detection + _detect_word() classmethod - src/skill_seekers/cli/create_command.py — _route_word() + --help-word - src/skill_seekers/cli/arguments/create.py — WORD_ARGUMENTS + routing - src/skill_seekers/cli/arguments/__init__.py — export word args - src/skill_seekers/cli/parsers/__init__.py — register WordParser - src/skill_seekers/cli/unified_scraper.py — _scrape_word() integration - src/skill_seekers/cli/pdf_scraper.py — fix: real enhancement instead of stub; remove [:3] reference file limit; capture run_workflows return - src/skill_seekers/cli/github_scraper.py — fix: remove arbitrary open_issues[:20] / closed_issues[:10] reference file limits - pyproject.toml — skill-seekers-word entry point + docx optional dep - tests/test_cli_parsers.py — update parser count 21→22 Bug fixes applied during real-world testing: - Code detection: detect monospace <p><br> blocks as code (mammoth renders Courier paragraphs this way, not as <pre>/<code>) - Language detector: fix wrong method name detect_from_text → detect_from_code - Description inference: pass None from main() so extract_docx() can infer description from Word document subject/title metadata - Bullet-point guard: exclude prose starting with •/-/* from code scoring - Enhancement: implement real API/LOCAL enhancement (was stub) - pip install message: add quotes around skill-seekers[docx] Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
55 lines
1.9 KiB
Python
55 lines
1.9 KiB
Python
"""Shared CLI argument definitions.
|
|
|
|
This module provides a single source of truth for all CLI argument definitions.
|
|
Both standalone modules and unified CLI parsers import from here.
|
|
|
|
Usage:
|
|
from skill_seekers.cli.arguments.scrape import add_scrape_arguments
|
|
from skill_seekers.cli.arguments.github import add_github_arguments
|
|
from skill_seekers.cli.arguments.pdf import add_pdf_arguments
|
|
from skill_seekers.cli.arguments.analyze import add_analyze_arguments
|
|
from skill_seekers.cli.arguments.unified import add_unified_arguments
|
|
from skill_seekers.cli.arguments.package import add_package_arguments
|
|
from skill_seekers.cli.arguments.upload import add_upload_arguments
|
|
from skill_seekers.cli.arguments.enhance import add_enhance_arguments
|
|
|
|
parser = argparse.ArgumentParser()
|
|
add_scrape_arguments(parser)
|
|
"""
|
|
|
|
from .common import add_common_arguments, COMMON_ARGUMENTS
|
|
from .scrape import add_scrape_arguments, SCRAPE_ARGUMENTS
|
|
from .github import add_github_arguments, GITHUB_ARGUMENTS
|
|
from .pdf import add_pdf_arguments, PDF_ARGUMENTS
|
|
from .word import add_word_arguments, WORD_ARGUMENTS
|
|
from .analyze import add_analyze_arguments, ANALYZE_ARGUMENTS
|
|
from .unified import add_unified_arguments, UNIFIED_ARGUMENTS
|
|
from .package import add_package_arguments, PACKAGE_ARGUMENTS
|
|
from .upload import add_upload_arguments, UPLOAD_ARGUMENTS
|
|
from .enhance import add_enhance_arguments, ENHANCE_ARGUMENTS
|
|
|
|
__all__ = [
|
|
# Functions
|
|
"add_common_arguments",
|
|
"add_scrape_arguments",
|
|
"add_github_arguments",
|
|
"add_pdf_arguments",
|
|
"add_analyze_arguments",
|
|
"add_unified_arguments",
|
|
"add_package_arguments",
|
|
"add_upload_arguments",
|
|
"add_enhance_arguments",
|
|
"add_word_arguments",
|
|
# Data
|
|
"COMMON_ARGUMENTS",
|
|
"SCRAPE_ARGUMENTS",
|
|
"GITHUB_ARGUMENTS",
|
|
"PDF_ARGUMENTS",
|
|
"WORD_ARGUMENTS",
|
|
"ANALYZE_ARGUMENTS",
|
|
"UNIFIED_ARGUMENTS",
|
|
"PACKAGE_ARGUMENTS",
|
|
"UPLOAD_ARGUMENTS",
|
|
"ENHANCE_ARGUMENTS",
|
|
]
|