- Fix extract_visual_data returning 2-tuple instead of 3 (ValueError crash) - Move pytesseract from core deps to [video-full] optional group - Add 30-min timeout + user feedback to video enhancement subprocess - Add scrape_video_impl to MCP server fallback import block - Detect auto-generated YouTube captions via is_generated property - Forward --vision-ocr and --video-playlist through create command - Fix filename collision for non-ASCII video titles (fallback to video_id) - Make _vision_used a proper dataclass field on FrameSubSection - Expose 6 visual params in MCP scrape_video tool - Add install instructions on missing video deps in unified scraper - Update MCP docstring tool counts (25→33, 7 categories) - Add video and word commands to main.py docstring - Document video-full exclusion from [all] deps in pyproject.toml - Update parser registry test count (22→23 for video parser) All 2437 tests passing, 0 failures. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
405 lines
14 KiB
Python
405 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Skill Seekers - Unified CLI Entry Point
|
|
|
|
Provides a git-style unified command-line interface for all Skill Seekers tools.
|
|
|
|
Usage:
|
|
skill-seekers <command> [options]
|
|
|
|
Commands:
|
|
config Configure GitHub tokens, API keys, and settings
|
|
scrape Scrape documentation website
|
|
github Scrape GitHub repository
|
|
pdf Extract from PDF file
|
|
word Extract from Word (.docx) file
|
|
video Extract from video (YouTube or local)
|
|
unified Multi-source scraping (docs + GitHub + PDF)
|
|
analyze Analyze local codebase and extract code knowledge
|
|
enhance AI-powered enhancement (auto: API or LOCAL mode)
|
|
enhance-status Check enhancement status (for background/daemon modes)
|
|
package Package skill into .zip file
|
|
upload Upload skill to Claude
|
|
estimate Estimate page count before scraping
|
|
extract-test-examples Extract usage examples from test files
|
|
install-agent Install skill to AI agent directories
|
|
resume Resume interrupted scraping job
|
|
|
|
Examples:
|
|
skill-seekers scrape --config configs/react.json
|
|
skill-seekers github --repo microsoft/TypeScript
|
|
skill-seekers unified --config configs/react_unified.json
|
|
skill-seekers extract-test-examples tests/ --language python
|
|
skill-seekers package output/react/
|
|
skill-seekers install-agent output/react/ --agent cursor
|
|
"""
|
|
|
|
import argparse
|
|
import importlib
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
from skill_seekers.cli import __version__
|
|
|
|
|
|
# Command module mapping (command name -> module path)
|
|
COMMAND_MODULES = {
|
|
"create": "skill_seekers.cli.create_command", # NEW: Unified create command
|
|
"config": "skill_seekers.cli.config_command",
|
|
"scrape": "skill_seekers.cli.doc_scraper",
|
|
"github": "skill_seekers.cli.github_scraper",
|
|
"pdf": "skill_seekers.cli.pdf_scraper",
|
|
"word": "skill_seekers.cli.word_scraper",
|
|
"video": "skill_seekers.cli.video_scraper",
|
|
"unified": "skill_seekers.cli.unified_scraper",
|
|
"enhance": "skill_seekers.cli.enhance_command",
|
|
"enhance-status": "skill_seekers.cli.enhance_status",
|
|
"package": "skill_seekers.cli.package_skill",
|
|
"upload": "skill_seekers.cli.upload_skill",
|
|
"estimate": "skill_seekers.cli.estimate_pages",
|
|
"extract-test-examples": "skill_seekers.cli.test_example_extractor",
|
|
"install-agent": "skill_seekers.cli.install_agent",
|
|
"analyze": "skill_seekers.cli.codebase_scraper",
|
|
"install": "skill_seekers.cli.install_skill",
|
|
"resume": "skill_seekers.cli.resume_command",
|
|
"stream": "skill_seekers.cli.streaming_ingest",
|
|
"update": "skill_seekers.cli.incremental_updater",
|
|
"multilang": "skill_seekers.cli.multilang_support",
|
|
"quality": "skill_seekers.cli.quality_metrics",
|
|
"workflows": "skill_seekers.cli.workflows_command",
|
|
}
|
|
|
|
|
|
def create_parser() -> argparse.ArgumentParser:
|
|
"""Create the main argument parser with subcommands."""
|
|
from skill_seekers.cli.parsers import register_parsers
|
|
|
|
parser = argparse.ArgumentParser(
|
|
prog="skill-seekers",
|
|
description="Convert documentation, GitHub repos, and PDFs into Claude AI skills",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# Scrape documentation
|
|
skill-seekers scrape --config configs/react.json
|
|
|
|
# Scrape GitHub repository
|
|
skill-seekers github --repo microsoft/TypeScript --name typescript
|
|
|
|
# Multi-source scraping (unified)
|
|
skill-seekers unified --config configs/react_unified.json
|
|
|
|
# AI-powered enhancement
|
|
skill-seekers enhance output/react/
|
|
|
|
# Package and upload
|
|
skill-seekers package output/react/
|
|
skill-seekers upload output/react.zip
|
|
|
|
For more information: https://github.com/yusufkaraaslan/Skill_Seekers
|
|
""",
|
|
)
|
|
|
|
parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
|
|
|
|
# Create subparsers
|
|
subparsers = parser.add_subparsers(
|
|
dest="command",
|
|
title="commands",
|
|
description="Available Skill Seekers commands",
|
|
help="Command to run",
|
|
)
|
|
|
|
# Register all subcommand parsers
|
|
register_parsers(subparsers)
|
|
|
|
return parser
|
|
|
|
|
|
def _reconstruct_argv(command: str, args: argparse.Namespace) -> list[str]:
|
|
"""Reconstruct sys.argv from args namespace for command module.
|
|
|
|
Args:
|
|
command: Command name
|
|
args: Parsed arguments namespace
|
|
|
|
Returns:
|
|
List of command-line arguments for the command module
|
|
"""
|
|
argv = [f"{command}_command.py"]
|
|
|
|
# Convert args to sys.argv format
|
|
for key, value in vars(args).items():
|
|
if key == "command":
|
|
continue
|
|
|
|
# Handle internal/progressive help flags for create command
|
|
# Convert _help_web to --help-web etc.
|
|
if key.startswith("_help_"):
|
|
if value:
|
|
# Convert _help_web -> --help-web
|
|
help_flag = key.replace("_help_", "help-")
|
|
argv.append(f"--{help_flag}")
|
|
continue
|
|
|
|
# Handle positional arguments (no -- prefix)
|
|
if key in [
|
|
"source", # create command
|
|
"directory",
|
|
"file",
|
|
"job_id",
|
|
"skill_directory",
|
|
"zip_file",
|
|
"input_file",
|
|
]:
|
|
if value is not None and value != "":
|
|
argv.append(str(value))
|
|
continue
|
|
|
|
# Handle flags and options
|
|
arg_name = f"--{key.replace('_', '-')}"
|
|
|
|
if isinstance(value, bool):
|
|
if value:
|
|
argv.append(arg_name)
|
|
elif isinstance(value, list):
|
|
for item in value:
|
|
argv.extend([arg_name, str(item)])
|
|
elif value is not None:
|
|
argv.extend([arg_name, str(value)])
|
|
|
|
return argv
|
|
|
|
|
|
def main(argv: list[str] | None = None) -> int:
|
|
"""Main entry point for the unified CLI.
|
|
|
|
Args:
|
|
argv: Command-line arguments (defaults to sys.argv)
|
|
|
|
Returns:
|
|
Exit code (0 for success, non-zero for error)
|
|
"""
|
|
# Special handling for analyze --preset-list (no directory required)
|
|
if argv is None:
|
|
argv = sys.argv[1:]
|
|
if len(argv) >= 2 and argv[0] == "analyze" and "--preset-list" in argv:
|
|
from skill_seekers.cli.codebase_scraper import main as analyze_main
|
|
|
|
original_argv = sys.argv.copy()
|
|
sys.argv = ["codebase_scraper.py", "--preset-list"]
|
|
try:
|
|
return analyze_main() or 0
|
|
finally:
|
|
sys.argv = original_argv
|
|
|
|
parser = create_parser()
|
|
args = parser.parse_args(argv)
|
|
|
|
if not args.command:
|
|
parser.print_help()
|
|
return 1
|
|
|
|
# Get command module
|
|
module_name = COMMAND_MODULES.get(args.command)
|
|
if not module_name:
|
|
print(f"Error: Unknown command '{args.command}'", file=sys.stderr)
|
|
parser.print_help()
|
|
return 1
|
|
|
|
# Special handling for 'analyze' command (has post-processing)
|
|
if args.command == "analyze":
|
|
return _handle_analyze_command(args)
|
|
|
|
# Standard delegation for all other commands
|
|
try:
|
|
# Import and execute command module
|
|
module = importlib.import_module(module_name)
|
|
|
|
# Reconstruct sys.argv for command module
|
|
original_argv = sys.argv.copy()
|
|
sys.argv = _reconstruct_argv(args.command, args)
|
|
|
|
# Execute command
|
|
try:
|
|
result = module.main()
|
|
return result if result is not None else 0
|
|
finally:
|
|
sys.argv = original_argv
|
|
|
|
except KeyboardInterrupt:
|
|
print("\n\nInterrupted by user", file=sys.stderr)
|
|
return 130
|
|
except Exception as e:
|
|
error_msg = str(e) if str(e) else f"{type(e).__name__} occurred"
|
|
print(f"Error: {error_msg}", file=sys.stderr)
|
|
|
|
# Show traceback in verbose mode
|
|
import traceback
|
|
|
|
if hasattr(args, "verbose") and getattr(args, "verbose", False):
|
|
traceback.print_exc()
|
|
|
|
return 1
|
|
|
|
|
|
def _handle_analyze_command(args: argparse.Namespace) -> int:
|
|
"""Handle analyze command with special post-processing logic.
|
|
|
|
Args:
|
|
args: Parsed arguments
|
|
|
|
Returns:
|
|
Exit code
|
|
"""
|
|
from skill_seekers.cli.codebase_scraper import main as analyze_main
|
|
|
|
# Reconstruct sys.argv for analyze command
|
|
original_argv = sys.argv.copy()
|
|
sys.argv = ["codebase_scraper.py", "--directory", args.directory]
|
|
|
|
if args.output:
|
|
sys.argv.extend(["--output", args.output])
|
|
|
|
# Handle preset flags (depth and features)
|
|
if args.quick:
|
|
sys.argv.extend(
|
|
[
|
|
"--depth",
|
|
"surface",
|
|
"--skip-patterns",
|
|
"--skip-test-examples",
|
|
"--skip-how-to-guides",
|
|
"--skip-config-patterns",
|
|
]
|
|
)
|
|
elif args.comprehensive:
|
|
sys.argv.extend(["--depth", "full"])
|
|
elif args.depth:
|
|
sys.argv.extend(["--depth", args.depth])
|
|
|
|
# Determine enhance_level (simplified - use default or override)
|
|
enhance_level = getattr(args, "enhance_level", 2) # Default is 2
|
|
if getattr(args, "quick", False):
|
|
enhance_level = 0 # Quick mode disables enhancement
|
|
|
|
sys.argv.extend(["--enhance-level", str(enhance_level)])
|
|
|
|
# Pass through remaining arguments
|
|
if args.languages:
|
|
sys.argv.extend(["--languages", args.languages])
|
|
if args.file_patterns:
|
|
sys.argv.extend(["--file-patterns", args.file_patterns])
|
|
if args.skip_api_reference:
|
|
sys.argv.append("--skip-api-reference")
|
|
if args.skip_dependency_graph:
|
|
sys.argv.append("--skip-dependency-graph")
|
|
if args.skip_patterns:
|
|
sys.argv.append("--skip-patterns")
|
|
if args.skip_test_examples:
|
|
sys.argv.append("--skip-test-examples")
|
|
if args.skip_how_to_guides:
|
|
sys.argv.append("--skip-how-to-guides")
|
|
if args.skip_config_patterns:
|
|
sys.argv.append("--skip-config-patterns")
|
|
if args.skip_docs:
|
|
sys.argv.append("--skip-docs")
|
|
if args.no_comments:
|
|
sys.argv.append("--no-comments")
|
|
if args.verbose:
|
|
sys.argv.append("--verbose")
|
|
if getattr(args, "quiet", False):
|
|
sys.argv.append("--quiet")
|
|
if getattr(args, "dry_run", False):
|
|
sys.argv.append("--dry-run")
|
|
if getattr(args, "preset", None):
|
|
sys.argv.extend(["--preset", args.preset])
|
|
if getattr(args, "name", None):
|
|
sys.argv.extend(["--name", args.name])
|
|
if getattr(args, "description", None):
|
|
sys.argv.extend(["--description", args.description])
|
|
if getattr(args, "api_key", None):
|
|
sys.argv.extend(["--api-key", args.api_key])
|
|
# Enhancement Workflow arguments
|
|
if getattr(args, "enhance_workflow", None):
|
|
for wf in args.enhance_workflow:
|
|
sys.argv.extend(["--enhance-workflow", wf])
|
|
if getattr(args, "enhance_stage", None):
|
|
for stage in args.enhance_stage:
|
|
sys.argv.extend(["--enhance-stage", stage])
|
|
if getattr(args, "var", None):
|
|
for var in args.var:
|
|
sys.argv.extend(["--var", var])
|
|
if getattr(args, "workflow_dry_run", False):
|
|
sys.argv.append("--workflow-dry-run")
|
|
|
|
try:
|
|
result = analyze_main() or 0
|
|
|
|
# Enhance SKILL.md if enhance_level >= 1
|
|
if result == 0 and enhance_level >= 1:
|
|
skill_dir = Path(args.output)
|
|
skill_md = skill_dir / "SKILL.md"
|
|
|
|
if skill_md.exists():
|
|
print("\n" + "=" * 60)
|
|
print(f"ENHANCING SKILL.MD WITH AI (Level {enhance_level})")
|
|
print("=" * 60 + "\n")
|
|
|
|
try:
|
|
from skill_seekers.cli.enhance_command import (
|
|
_is_root,
|
|
_pick_mode,
|
|
_run_api_mode,
|
|
_run_local_mode,
|
|
)
|
|
import argparse as _ap
|
|
|
|
_fake_args = _ap.Namespace(
|
|
skill_directory=str(skill_dir),
|
|
target=None,
|
|
api_key=None,
|
|
dry_run=False,
|
|
agent=None,
|
|
agent_cmd=None,
|
|
interactive_enhancement=False,
|
|
background=False,
|
|
daemon=False,
|
|
no_force=False,
|
|
timeout=600,
|
|
)
|
|
_mode, _target = _pick_mode(_fake_args)
|
|
|
|
if _mode == "api":
|
|
print(f"\n🤖 Enhancement mode: API ({_target})")
|
|
success = _run_api_mode(_fake_args, _target) == 0
|
|
elif _is_root():
|
|
print("\n⚠️ Skipping SKILL.md enhancement: running as root")
|
|
print(" Set ANTHROPIC_API_KEY / GOOGLE_API_KEY to enable API mode")
|
|
success = False
|
|
else:
|
|
print("\n🤖 Enhancement mode: LOCAL (Claude Code CLI)")
|
|
success = _run_local_mode(_fake_args) == 0
|
|
|
|
if success:
|
|
print("\n✅ SKILL.md enhancement complete!")
|
|
with open(skill_md) as f:
|
|
lines = len(f.readlines())
|
|
print(f" Enhanced SKILL.md: {lines} lines")
|
|
else:
|
|
print("\n⚠️ SKILL.md enhancement did not complete")
|
|
print(" You can retry with: skill-seekers enhance " + str(skill_dir))
|
|
except Exception as e:
|
|
print(f"\n⚠️ SKILL.md enhancement failed: {e}")
|
|
print(" You can retry with: skill-seekers enhance " + str(skill_dir))
|
|
else:
|
|
print(f"\n⚠️ SKILL.md not found at {skill_md}, skipping enhancement")
|
|
|
|
return result
|
|
finally:
|
|
sys.argv = original_argv
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|