Sync with latest development changes including ruff formatting, bug fixes, and pinecone adaptor additions. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
714 lines
26 KiB
Python
714 lines
26 KiB
Python
"""Unified create command - single entry point for skill creation.
|
|
|
|
Auto-detects source type (web, GitHub, local, PDF, config) and routes
|
|
to appropriate scraper while maintaining full backward compatibility.
|
|
"""
|
|
|
|
import sys
|
|
import logging
|
|
import argparse
|
|
|
|
from skill_seekers.cli.source_detector import SourceDetector, SourceInfo
|
|
from skill_seekers.cli.arguments.create import (
|
|
get_compatible_arguments,
|
|
get_universal_argument_names,
|
|
)
|
|
from skill_seekers.cli.arguments.common import DEFAULT_CHUNK_TOKENS, DEFAULT_CHUNK_OVERLAP_TOKENS
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class CreateCommand:
|
|
"""Unified create command implementation."""
|
|
|
|
def __init__(self, args: argparse.Namespace):
|
|
"""Initialize create command.
|
|
|
|
Args:
|
|
args: Parsed command-line arguments
|
|
"""
|
|
self.args = args
|
|
self.source_info: SourceInfo | None = None
|
|
|
|
def execute(self) -> int:
|
|
"""Execute the create command.
|
|
|
|
Returns:
|
|
Exit code (0 for success, non-zero for error)
|
|
"""
|
|
# 1. Detect source type
|
|
try:
|
|
self.source_info = SourceDetector.detect(self.args.source)
|
|
logger.info(f"Detected source type: {self.source_info.type}")
|
|
logger.debug(f"Parsed info: {self.source_info.parsed}")
|
|
except ValueError as e:
|
|
logger.error(str(e))
|
|
return 1
|
|
|
|
# 2. Validate source accessibility
|
|
try:
|
|
SourceDetector.validate_source(self.source_info)
|
|
except ValueError as e:
|
|
logger.error(f"Source validation failed: {e}")
|
|
return 1
|
|
|
|
# 3. Validate and warn about incompatible arguments
|
|
self._validate_arguments()
|
|
|
|
# 4. Route to appropriate scraper
|
|
logger.info(f"Routing to {self.source_info.type} scraper...")
|
|
return self._route_to_scraper()
|
|
|
|
def _validate_arguments(self) -> None:
|
|
"""Validate arguments and warn about incompatible ones."""
|
|
# Get compatible arguments for this source type
|
|
compatible = set(get_compatible_arguments(self.source_info.type))
|
|
universal = get_universal_argument_names()
|
|
|
|
# Check all provided arguments
|
|
for arg_name, arg_value in vars(self.args).items():
|
|
# Skip if not explicitly set (has default value)
|
|
if not self._is_explicitly_set(arg_name, arg_value):
|
|
continue
|
|
|
|
# Skip if compatible
|
|
if arg_name in compatible:
|
|
continue
|
|
|
|
# Skip internal arguments
|
|
if arg_name in ["source", "func", "subcommand"]:
|
|
continue
|
|
|
|
# Warn about incompatible argument
|
|
if arg_name not in universal:
|
|
logger.warning(
|
|
f"--{arg_name.replace('_', '-')} is not applicable for "
|
|
f"{self.source_info.type} sources and will be ignored"
|
|
)
|
|
|
|
def _is_explicitly_set(self, arg_name: str, arg_value: any) -> bool:
|
|
"""Check if an argument was explicitly set by the user.
|
|
|
|
Args:
|
|
arg_name: Argument name
|
|
arg_value: Argument value
|
|
|
|
Returns:
|
|
True if user explicitly set this argument
|
|
"""
|
|
# Boolean flags - True means it was set
|
|
if isinstance(arg_value, bool):
|
|
return arg_value
|
|
|
|
# None means not set
|
|
if arg_value is None:
|
|
return False
|
|
|
|
# Check against common defaults
|
|
defaults = {
|
|
"max_issues": 100,
|
|
"chunk_tokens": DEFAULT_CHUNK_TOKENS,
|
|
"chunk_overlap_tokens": DEFAULT_CHUNK_OVERLAP_TOKENS,
|
|
"output": None,
|
|
}
|
|
|
|
if arg_name in defaults:
|
|
return arg_value != defaults[arg_name]
|
|
|
|
# Any other non-None value means it was set
|
|
return True
|
|
|
|
def _route_to_scraper(self) -> int:
|
|
"""Route to appropriate scraper based on source type.
|
|
|
|
Returns:
|
|
Exit code from scraper
|
|
"""
|
|
if self.source_info.type == "web":
|
|
return self._route_web()
|
|
elif self.source_info.type == "github":
|
|
return self._route_github()
|
|
elif self.source_info.type == "local":
|
|
return self._route_local()
|
|
elif self.source_info.type == "pdf":
|
|
return self._route_pdf()
|
|
elif self.source_info.type == "word":
|
|
return self._route_word()
|
|
elif self.source_info.type == "video":
|
|
return self._route_video()
|
|
elif self.source_info.type == "config":
|
|
return self._route_config()
|
|
else:
|
|
logger.error(f"Unknown source type: {self.source_info.type}")
|
|
return 1
|
|
|
|
def _route_web(self) -> int:
|
|
"""Route to web documentation scraper (doc_scraper.py)."""
|
|
from skill_seekers.cli import doc_scraper
|
|
|
|
# Reconstruct argv for doc_scraper
|
|
argv = ["doc_scraper"]
|
|
|
|
# Add URL
|
|
url = self.source_info.parsed["url"]
|
|
argv.append(url)
|
|
|
|
# Add universal arguments
|
|
self._add_common_args(argv)
|
|
|
|
# Config file (web-specific — loads selectors, categories, etc.)
|
|
if self.args.config:
|
|
argv.extend(["--config", self.args.config])
|
|
|
|
# RAG arguments (web scraper only)
|
|
if getattr(self.args, "chunk_for_rag", False):
|
|
argv.append("--chunk-for-rag")
|
|
if (
|
|
getattr(self.args, "chunk_tokens", None)
|
|
and self.args.chunk_tokens != DEFAULT_CHUNK_TOKENS
|
|
):
|
|
argv.extend(["--chunk-tokens", str(self.args.chunk_tokens)])
|
|
if (
|
|
getattr(self.args, "chunk_overlap_tokens", None)
|
|
and self.args.chunk_overlap_tokens != DEFAULT_CHUNK_OVERLAP_TOKENS
|
|
):
|
|
argv.extend(["--chunk-overlap-tokens", str(self.args.chunk_overlap_tokens)])
|
|
|
|
# Advanced web-specific arguments
|
|
if getattr(self.args, "no_preserve_code_blocks", False):
|
|
argv.append("--no-preserve-code-blocks")
|
|
if getattr(self.args, "no_preserve_paragraphs", False):
|
|
argv.append("--no-preserve-paragraphs")
|
|
if getattr(self.args, "interactive_enhancement", False):
|
|
argv.append("--interactive-enhancement")
|
|
|
|
# Web-specific arguments
|
|
if getattr(self.args, "max_pages", None):
|
|
argv.extend(["--max-pages", str(self.args.max_pages)])
|
|
if getattr(self.args, "skip_scrape", False):
|
|
argv.append("--skip-scrape")
|
|
if getattr(self.args, "resume", False):
|
|
argv.append("--resume")
|
|
if getattr(self.args, "fresh", False):
|
|
argv.append("--fresh")
|
|
if getattr(self.args, "rate_limit", None):
|
|
argv.extend(["--rate-limit", str(self.args.rate_limit)])
|
|
if getattr(self.args, "workers", None):
|
|
argv.extend(["--workers", str(self.args.workers)])
|
|
if getattr(self.args, "async_mode", False):
|
|
argv.append("--async")
|
|
if getattr(self.args, "no_rate_limit", False):
|
|
argv.append("--no-rate-limit")
|
|
|
|
# Call doc_scraper with modified argv
|
|
logger.debug(f"Calling doc_scraper with argv: {argv}")
|
|
original_argv = sys.argv
|
|
try:
|
|
sys.argv = argv
|
|
return doc_scraper.main()
|
|
finally:
|
|
sys.argv = original_argv
|
|
|
|
def _route_github(self) -> int:
|
|
"""Route to GitHub repository scraper (github_scraper.py)."""
|
|
from skill_seekers.cli import github_scraper
|
|
|
|
# Reconstruct argv for github_scraper
|
|
argv = ["github_scraper"]
|
|
|
|
# Add repo
|
|
repo = self.source_info.parsed["repo"]
|
|
argv.extend(["--repo", repo])
|
|
|
|
# Add universal arguments
|
|
self._add_common_args(argv)
|
|
|
|
# Config file (github-specific)
|
|
if self.args.config:
|
|
argv.extend(["--config", self.args.config])
|
|
|
|
# Add GitHub-specific arguments
|
|
if getattr(self.args, "token", None):
|
|
argv.extend(["--token", self.args.token])
|
|
if getattr(self.args, "profile", None):
|
|
argv.extend(["--profile", self.args.profile])
|
|
if getattr(self.args, "non_interactive", False):
|
|
argv.append("--non-interactive")
|
|
if getattr(self.args, "no_issues", False):
|
|
argv.append("--no-issues")
|
|
if getattr(self.args, "no_changelog", False):
|
|
argv.append("--no-changelog")
|
|
if getattr(self.args, "no_releases", False):
|
|
argv.append("--no-releases")
|
|
if getattr(self.args, "max_issues", None) and self.args.max_issues != 100:
|
|
argv.extend(["--max-issues", str(self.args.max_issues)])
|
|
if getattr(self.args, "scrape_only", False):
|
|
argv.append("--scrape-only")
|
|
if getattr(self.args, "local_repo_path", None):
|
|
argv.extend(["--local-repo-path", self.args.local_repo_path])
|
|
|
|
# Call github_scraper with modified argv
|
|
logger.debug(f"Calling github_scraper with argv: {argv}")
|
|
original_argv = sys.argv
|
|
try:
|
|
sys.argv = argv
|
|
return github_scraper.main()
|
|
finally:
|
|
sys.argv = original_argv
|
|
|
|
def _route_local(self) -> int:
|
|
"""Route to local codebase analyzer (codebase_scraper.py)."""
|
|
from skill_seekers.cli import codebase_scraper
|
|
|
|
# Reconstruct argv for codebase_scraper
|
|
argv = ["codebase_scraper"]
|
|
|
|
# Add directory
|
|
directory = self.source_info.parsed["directory"]
|
|
argv.extend(["--directory", directory])
|
|
|
|
# Add universal arguments
|
|
self._add_common_args(argv)
|
|
|
|
# Preset (local codebase scraper has preset support)
|
|
if getattr(self.args, "preset", None):
|
|
argv.extend(["--preset", self.args.preset])
|
|
|
|
# Add local-specific arguments
|
|
if getattr(self.args, "languages", None):
|
|
argv.extend(["--languages", self.args.languages])
|
|
if getattr(self.args, "file_patterns", None):
|
|
argv.extend(["--file-patterns", self.args.file_patterns])
|
|
if getattr(self.args, "skip_patterns", False):
|
|
argv.append("--skip-patterns")
|
|
if getattr(self.args, "skip_test_examples", False):
|
|
argv.append("--skip-test-examples")
|
|
if getattr(self.args, "skip_how_to_guides", False):
|
|
argv.append("--skip-how-to-guides")
|
|
if getattr(self.args, "skip_config", False):
|
|
argv.append("--skip-config")
|
|
if getattr(self.args, "skip_docs", False):
|
|
argv.append("--skip-docs")
|
|
|
|
# Call codebase_scraper with modified argv
|
|
logger.debug(f"Calling codebase_scraper with argv: {argv}")
|
|
original_argv = sys.argv
|
|
try:
|
|
sys.argv = argv
|
|
return codebase_scraper.main()
|
|
finally:
|
|
sys.argv = original_argv
|
|
|
|
def _route_pdf(self) -> int:
|
|
"""Route to PDF scraper (pdf_scraper.py)."""
|
|
from skill_seekers.cli import pdf_scraper
|
|
|
|
# Reconstruct argv for pdf_scraper
|
|
argv = ["pdf_scraper"]
|
|
|
|
# Add PDF file
|
|
file_path = self.source_info.parsed["file_path"]
|
|
argv.extend(["--pdf", file_path])
|
|
|
|
# Add universal arguments
|
|
self._add_common_args(argv)
|
|
|
|
# Add PDF-specific arguments
|
|
if getattr(self.args, "ocr", False):
|
|
argv.append("--ocr")
|
|
if getattr(self.args, "pages", None):
|
|
argv.extend(["--pages", self.args.pages])
|
|
|
|
# Call pdf_scraper with modified argv
|
|
logger.debug(f"Calling pdf_scraper with argv: {argv}")
|
|
original_argv = sys.argv
|
|
try:
|
|
sys.argv = argv
|
|
return pdf_scraper.main()
|
|
finally:
|
|
sys.argv = original_argv
|
|
|
|
def _route_word(self) -> int:
|
|
"""Route to Word document scraper (word_scraper.py)."""
|
|
from skill_seekers.cli import word_scraper
|
|
|
|
# Reconstruct argv for word_scraper
|
|
argv = ["word_scraper"]
|
|
|
|
# Add DOCX file
|
|
file_path = self.source_info.parsed["file_path"]
|
|
argv.extend(["--docx", file_path])
|
|
|
|
# Add universal arguments
|
|
self._add_common_args(argv)
|
|
|
|
# Call word_scraper with modified argv
|
|
logger.debug(f"Calling word_scraper with argv: {argv}")
|
|
original_argv = sys.argv
|
|
try:
|
|
sys.argv = argv
|
|
return word_scraper.main()
|
|
finally:
|
|
sys.argv = original_argv
|
|
|
|
def _route_video(self) -> int:
|
|
"""Route to video scraper (video_scraper.py)."""
|
|
from skill_seekers.cli import video_scraper
|
|
|
|
# Reconstruct argv for video_scraper
|
|
argv = ["video_scraper"]
|
|
|
|
# Add video source (URL or file)
|
|
parsed = self.source_info.parsed
|
|
if parsed.get("source_kind") == "file":
|
|
argv.extend(["--video-file", parsed["file_path"]])
|
|
elif parsed.get("url"):
|
|
url = parsed["url"]
|
|
# Detect playlist vs single video
|
|
if "playlist" in url.lower():
|
|
argv.extend(["--playlist", url])
|
|
else:
|
|
argv.extend(["--url", url])
|
|
|
|
# Add universal arguments
|
|
self._add_common_args(argv)
|
|
|
|
# Add video-specific arguments
|
|
video_langs = getattr(self.args, "video_languages", None) or getattr(self.args, "languages", None)
|
|
if video_langs:
|
|
argv.extend(["--languages", video_langs])
|
|
if getattr(self.args, "visual", False):
|
|
argv.append("--visual")
|
|
if getattr(self.args, "whisper_model", None) and self.args.whisper_model != "base":
|
|
argv.extend(["--whisper-model", self.args.whisper_model])
|
|
vi = getattr(self.args, "visual_interval", None)
|
|
if vi is not None and vi != 0.7:
|
|
argv.extend(["--visual-interval", str(vi)])
|
|
vmg = getattr(self.args, "visual_min_gap", None)
|
|
if vmg is not None and vmg != 0.5:
|
|
argv.extend(["--visual-min-gap", str(vmg)])
|
|
vs = getattr(self.args, "visual_similarity", None)
|
|
if vs is not None and vs != 3.0:
|
|
argv.extend(["--visual-similarity", str(vs)])
|
|
|
|
# Call video_scraper with modified argv
|
|
logger.debug(f"Calling video_scraper with argv: {argv}")
|
|
original_argv = sys.argv
|
|
try:
|
|
sys.argv = argv
|
|
return video_scraper.main()
|
|
finally:
|
|
sys.argv = original_argv
|
|
|
|
def _route_config(self) -> int:
|
|
"""Route to unified scraper for config files (unified_scraper.py)."""
|
|
from skill_seekers.cli import unified_scraper
|
|
|
|
# Reconstruct argv for unified_scraper
|
|
argv = ["unified_scraper"]
|
|
|
|
# Add config file
|
|
config_path = self.source_info.parsed["config_path"]
|
|
argv.extend(["--config", config_path])
|
|
|
|
# Behavioral flags supported by unified_scraper
|
|
# Note: name/output/enhance-level come from the JSON config file, not CLI
|
|
if self.args.dry_run:
|
|
argv.append("--dry-run")
|
|
if getattr(self.args, "fresh", False):
|
|
argv.append("--fresh")
|
|
|
|
# Config-specific flags (--merge-mode, --skip-codebase-analysis)
|
|
if getattr(self.args, "merge_mode", None):
|
|
argv.extend(["--merge-mode", self.args.merge_mode])
|
|
if getattr(self.args, "skip_codebase_analysis", False):
|
|
argv.append("--skip-codebase-analysis")
|
|
|
|
# Enhancement workflow flags (unified_scraper now supports these)
|
|
if getattr(self.args, "enhance_workflow", None):
|
|
for wf in self.args.enhance_workflow:
|
|
argv.extend(["--enhance-workflow", wf])
|
|
if getattr(self.args, "enhance_stage", None):
|
|
for stage in self.args.enhance_stage:
|
|
argv.extend(["--enhance-stage", stage])
|
|
if getattr(self.args, "var", None):
|
|
for var in self.args.var:
|
|
argv.extend(["--var", var])
|
|
if getattr(self.args, "workflow_dry_run", False):
|
|
argv.append("--workflow-dry-run")
|
|
|
|
# Call unified_scraper with modified argv
|
|
logger.debug(f"Calling unified_scraper with argv: {argv}")
|
|
original_argv = sys.argv
|
|
try:
|
|
sys.argv = argv
|
|
return unified_scraper.main()
|
|
finally:
|
|
sys.argv = original_argv
|
|
|
|
def _add_common_args(self, argv: list[str]) -> None:
|
|
"""Add truly universal arguments to argv list.
|
|
|
|
These flags are accepted by ALL scrapers (doc, github, codebase, pdf)
|
|
because each scraper calls ``add_all_standard_arguments(parser)``
|
|
which registers: name, description, output, enhance-level, api-key,
|
|
dry-run, verbose, quiet, and workflow args.
|
|
|
|
Route-specific flags (preset, config, RAG, preserve, etc.) are
|
|
forwarded only by the _route_*() method that needs them.
|
|
"""
|
|
# Identity arguments
|
|
if self.args.name:
|
|
argv.extend(["--name", self.args.name])
|
|
elif hasattr(self, "source_info") and self.source_info:
|
|
# Use suggested name from source detection
|
|
argv.extend(["--name", self.source_info.suggested_name])
|
|
|
|
if self.args.description:
|
|
argv.extend(["--description", self.args.description])
|
|
if self.args.output:
|
|
argv.extend(["--output", self.args.output])
|
|
|
|
# Enhancement arguments (consolidated to --enhance-level only)
|
|
if self.args.enhance_level > 0:
|
|
argv.extend(["--enhance-level", str(self.args.enhance_level)])
|
|
if self.args.api_key:
|
|
argv.extend(["--api-key", self.args.api_key])
|
|
|
|
# Behavior arguments
|
|
if self.args.dry_run:
|
|
argv.append("--dry-run")
|
|
if self.args.verbose:
|
|
argv.append("--verbose")
|
|
if self.args.quiet:
|
|
argv.append("--quiet")
|
|
|
|
# Documentation version metadata
|
|
if getattr(self.args, "doc_version", ""):
|
|
argv.extend(["--doc-version", self.args.doc_version])
|
|
|
|
# Enhancement Workflow arguments
|
|
if getattr(self.args, "enhance_workflow", None):
|
|
for wf in self.args.enhance_workflow:
|
|
argv.extend(["--enhance-workflow", wf])
|
|
if getattr(self.args, "enhance_stage", None):
|
|
for stage in self.args.enhance_stage:
|
|
argv.extend(["--enhance-stage", stage])
|
|
if getattr(self.args, "var", None):
|
|
for var in self.args.var:
|
|
argv.extend(["--var", var])
|
|
if getattr(self.args, "workflow_dry_run", False):
|
|
argv.append("--workflow-dry-run")
|
|
|
|
|
|
def main() -> int:
|
|
"""Entry point for create command.
|
|
|
|
Returns:
|
|
Exit code (0 for success, non-zero for error)
|
|
"""
|
|
import textwrap
|
|
from skill_seekers.cli.arguments.create import add_create_arguments
|
|
|
|
# Parse arguments
|
|
# Custom formatter to prevent line wrapping in epilog
|
|
class NoWrapFormatter(argparse.RawDescriptionHelpFormatter):
|
|
def _split_lines(self, text, width):
|
|
return text.splitlines()
|
|
|
|
parser = argparse.ArgumentParser(
|
|
prog="skill-seekers create",
|
|
description="Create skill from any source (auto-detects type)",
|
|
formatter_class=NoWrapFormatter,
|
|
epilog=textwrap.dedent("""\
|
|
Examples:
|
|
Web: skill-seekers create https://docs.react.dev/
|
|
GitHub: skill-seekers create facebook/react -p standard
|
|
Local: skill-seekers create ./my-project -p comprehensive
|
|
PDF: skill-seekers create tutorial.pdf --ocr
|
|
DOCX: skill-seekers create document.docx
|
|
Video: skill-seekers create https://youtube.com/watch?v=...
|
|
Video: skill-seekers create recording.mp4
|
|
Config: skill-seekers create configs/react.json
|
|
|
|
Source Auto-Detection:
|
|
• URLs/domains → web scraping
|
|
• owner/repo → GitHub analysis
|
|
• ./path → local codebase
|
|
• file.pdf → PDF extraction
|
|
• file.docx → Word document extraction
|
|
• youtube.com/... → Video transcript extraction
|
|
• file.mp4 → Video file extraction
|
|
• file.json → multi-source config
|
|
|
|
Progressive Help (13 → 120+ flags):
|
|
--help-web Web scraping options
|
|
--help-github GitHub repository options
|
|
--help-local Local codebase analysis
|
|
--help-pdf PDF extraction options
|
|
--help-video Video extraction options
|
|
--help-advanced Rare/advanced options
|
|
--help-all All options + compatibility
|
|
|
|
Presets (NEW: Use -p shortcut):
|
|
-p quick Fast (1-2 min, basic features)
|
|
-p standard Balanced (5-10 min, recommended)
|
|
-p comprehensive Full (20-60 min, all features)
|
|
|
|
Common Workflows:
|
|
skill-seekers create <source> -p quick
|
|
skill-seekers create <source> -p standard --enhance-level 2
|
|
skill-seekers create <source> --chunk-for-rag
|
|
"""),
|
|
)
|
|
|
|
# Add arguments in default mode (universal only)
|
|
add_create_arguments(parser, mode="default")
|
|
|
|
# Add hidden help mode flags (use underscore prefix to match CreateParser)
|
|
parser.add_argument("--help-web", action="store_true", help=argparse.SUPPRESS, dest="_help_web")
|
|
parser.add_argument(
|
|
"--help-github", action="store_true", help=argparse.SUPPRESS, dest="_help_github"
|
|
)
|
|
parser.add_argument(
|
|
"--help-local", action="store_true", help=argparse.SUPPRESS, dest="_help_local"
|
|
)
|
|
parser.add_argument("--help-pdf", action="store_true", help=argparse.SUPPRESS, dest="_help_pdf")
|
|
parser.add_argument(
|
|
"--help-word", action="store_true", help=argparse.SUPPRESS, dest="_help_word"
|
|
)
|
|
parser.add_argument(
|
|
"--help-video", action="store_true", help=argparse.SUPPRESS, dest="_help_video"
|
|
)
|
|
parser.add_argument(
|
|
"--help-config", action="store_true", help=argparse.SUPPRESS, dest="_help_config"
|
|
)
|
|
parser.add_argument(
|
|
"--help-advanced", action="store_true", help=argparse.SUPPRESS, dest="_help_advanced"
|
|
)
|
|
parser.add_argument("--help-all", action="store_true", help=argparse.SUPPRESS, dest="_help_all")
|
|
|
|
# Parse arguments
|
|
args = parser.parse_args()
|
|
|
|
# Handle source-specific help modes
|
|
if args._help_web:
|
|
# Recreate parser with web-specific arguments
|
|
parser_web = argparse.ArgumentParser(
|
|
prog="skill-seekers create",
|
|
description="Create skill from web documentation",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
)
|
|
add_create_arguments(parser_web, mode="web")
|
|
parser_web.print_help()
|
|
return 0
|
|
elif args._help_github:
|
|
parser_github = argparse.ArgumentParser(
|
|
prog="skill-seekers create",
|
|
description="Create skill from GitHub repository",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
)
|
|
add_create_arguments(parser_github, mode="github")
|
|
parser_github.print_help()
|
|
return 0
|
|
elif args._help_local:
|
|
parser_local = argparse.ArgumentParser(
|
|
prog="skill-seekers create",
|
|
description="Create skill from local codebase",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
)
|
|
add_create_arguments(parser_local, mode="local")
|
|
parser_local.print_help()
|
|
return 0
|
|
elif args._help_pdf:
|
|
parser_pdf = argparse.ArgumentParser(
|
|
prog="skill-seekers create",
|
|
description="Create skill from PDF file",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
)
|
|
add_create_arguments(parser_pdf, mode="pdf")
|
|
parser_pdf.print_help()
|
|
return 0
|
|
elif args._help_word:
|
|
parser_word = argparse.ArgumentParser(
|
|
prog="skill-seekers create",
|
|
description="Create skill from Word document (.docx)",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
)
|
|
add_create_arguments(parser_word, mode="word")
|
|
parser_word.print_help()
|
|
return 0
|
|
elif args._help_video:
|
|
parser_video = argparse.ArgumentParser(
|
|
prog="skill-seekers create",
|
|
description="Create skill from video (YouTube, Vimeo, local files)",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
)
|
|
add_create_arguments(parser_video, mode="video")
|
|
parser_video.print_help()
|
|
return 0
|
|
elif args._help_config:
|
|
parser_config = argparse.ArgumentParser(
|
|
prog="skill-seekers create",
|
|
description="Create skill from multi-source config file (unified scraper)",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
)
|
|
add_create_arguments(parser_config, mode="config")
|
|
parser_config.print_help()
|
|
return 0
|
|
elif args._help_advanced:
|
|
parser_advanced = argparse.ArgumentParser(
|
|
prog="skill-seekers create",
|
|
description="Create skill - advanced options",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
)
|
|
add_create_arguments(parser_advanced, mode="advanced")
|
|
parser_advanced.print_help()
|
|
return 0
|
|
elif args._help_all:
|
|
parser_all = argparse.ArgumentParser(
|
|
prog="skill-seekers create",
|
|
description="Create skill - all options",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
)
|
|
add_create_arguments(parser_all, mode="all")
|
|
parser_all.print_help()
|
|
return 0
|
|
|
|
# Setup logging
|
|
log_level = logging.DEBUG if args.verbose else (logging.WARNING if args.quiet else logging.INFO)
|
|
logging.basicConfig(level=log_level, format="%(levelname)s: %(message)s")
|
|
|
|
# Validate source provided (config file can serve as source)
|
|
if not args.source and not args.config:
|
|
parser.error("source is required (or use --config to specify a config file)")
|
|
|
|
# If config is provided but no source, peek at the JSON to route correctly
|
|
if not args.source and args.config:
|
|
import json
|
|
|
|
try:
|
|
with open(args.config) as f:
|
|
config_peek = json.load(f)
|
|
if "sources" in config_peek:
|
|
# Unified format → route to unified_scraper via config type detection
|
|
args.source = args.config
|
|
elif "base_url" in config_peek:
|
|
# Simple web config → route to doc_scraper by using the base_url
|
|
args.source = config_peek["base_url"]
|
|
# source will be detected as web URL; --config is already set
|
|
else:
|
|
parser.error("Config file must contain 'sources' (unified) or 'base_url' (web)")
|
|
except json.JSONDecodeError as e:
|
|
parser.error(f"Cannot parse config file as JSON: {e}")
|
|
except FileNotFoundError:
|
|
parser.error(f"Config file not found: {args.config}")
|
|
|
|
# Execute create command
|
|
command = CreateCommand(args)
|
|
return command.execute()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|