Files
skill-seekers-reference/src/skill_seekers/cli/main.py
yusyus 12bc29ab36 fix: resolve 15 bugs and gaps in video scraper pipeline
- Fix extract_visual_data returning 2-tuple instead of 3 (ValueError crash)
- Move pytesseract from core deps to [video-full] optional group
- Add 30-min timeout + user feedback to video enhancement subprocess
- Add scrape_video_impl to MCP server fallback import block
- Detect auto-generated YouTube captions via is_generated property
- Forward --vision-ocr and --video-playlist through create command
- Fix filename collision for non-ASCII video titles (fallback to video_id)
- Make _vision_used a proper dataclass field on FrameSubSection
- Expose 6 visual params in MCP scrape_video tool
- Add install instructions on missing video deps in unified scraper
- Update MCP docstring tool counts (25→33, 7 categories)
- Add video and word commands to main.py docstring
- Document video-full exclusion from [all] deps in pyproject.toml
- Update parser registry test count (22→23 for video parser)

All 2437 tests passing, 0 failures.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-01 12:39:21 +03:00

405 lines
14 KiB
Python

#!/usr/bin/env python3
"""
Skill Seekers - Unified CLI Entry Point
Provides a git-style unified command-line interface for all Skill Seekers tools.
Usage:
skill-seekers <command> [options]
Commands:
config Configure GitHub tokens, API keys, and settings
scrape Scrape documentation website
github Scrape GitHub repository
pdf Extract from PDF file
word Extract from Word (.docx) file
video Extract from video (YouTube or local)
unified Multi-source scraping (docs + GitHub + PDF)
analyze Analyze local codebase and extract code knowledge
enhance AI-powered enhancement (auto: API or LOCAL mode)
enhance-status Check enhancement status (for background/daemon modes)
package Package skill into .zip file
upload Upload skill to Claude
estimate Estimate page count before scraping
extract-test-examples Extract usage examples from test files
install-agent Install skill to AI agent directories
resume Resume interrupted scraping job
Examples:
skill-seekers scrape --config configs/react.json
skill-seekers github --repo microsoft/TypeScript
skill-seekers unified --config configs/react_unified.json
skill-seekers extract-test-examples tests/ --language python
skill-seekers package output/react/
skill-seekers install-agent output/react/ --agent cursor
"""
import argparse
import importlib
import sys
from pathlib import Path
from skill_seekers.cli import __version__
# Command module mapping (command name -> module path)
COMMAND_MODULES = {
"create": "skill_seekers.cli.create_command", # NEW: Unified create command
"config": "skill_seekers.cli.config_command",
"scrape": "skill_seekers.cli.doc_scraper",
"github": "skill_seekers.cli.github_scraper",
"pdf": "skill_seekers.cli.pdf_scraper",
"word": "skill_seekers.cli.word_scraper",
"video": "skill_seekers.cli.video_scraper",
"unified": "skill_seekers.cli.unified_scraper",
"enhance": "skill_seekers.cli.enhance_command",
"enhance-status": "skill_seekers.cli.enhance_status",
"package": "skill_seekers.cli.package_skill",
"upload": "skill_seekers.cli.upload_skill",
"estimate": "skill_seekers.cli.estimate_pages",
"extract-test-examples": "skill_seekers.cli.test_example_extractor",
"install-agent": "skill_seekers.cli.install_agent",
"analyze": "skill_seekers.cli.codebase_scraper",
"install": "skill_seekers.cli.install_skill",
"resume": "skill_seekers.cli.resume_command",
"stream": "skill_seekers.cli.streaming_ingest",
"update": "skill_seekers.cli.incremental_updater",
"multilang": "skill_seekers.cli.multilang_support",
"quality": "skill_seekers.cli.quality_metrics",
"workflows": "skill_seekers.cli.workflows_command",
}
def create_parser() -> argparse.ArgumentParser:
"""Create the main argument parser with subcommands."""
from skill_seekers.cli.parsers import register_parsers
parser = argparse.ArgumentParser(
prog="skill-seekers",
description="Convert documentation, GitHub repos, and PDFs into Claude AI skills",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Scrape documentation
skill-seekers scrape --config configs/react.json
# Scrape GitHub repository
skill-seekers github --repo microsoft/TypeScript --name typescript
# Multi-source scraping (unified)
skill-seekers unified --config configs/react_unified.json
# AI-powered enhancement
skill-seekers enhance output/react/
# Package and upload
skill-seekers package output/react/
skill-seekers upload output/react.zip
For more information: https://github.com/yusufkaraaslan/Skill_Seekers
""",
)
parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
# Create subparsers
subparsers = parser.add_subparsers(
dest="command",
title="commands",
description="Available Skill Seekers commands",
help="Command to run",
)
# Register all subcommand parsers
register_parsers(subparsers)
return parser
def _reconstruct_argv(command: str, args: argparse.Namespace) -> list[str]:
"""Reconstruct sys.argv from args namespace for command module.
Args:
command: Command name
args: Parsed arguments namespace
Returns:
List of command-line arguments for the command module
"""
argv = [f"{command}_command.py"]
# Convert args to sys.argv format
for key, value in vars(args).items():
if key == "command":
continue
# Handle internal/progressive help flags for create command
# Convert _help_web to --help-web etc.
if key.startswith("_help_"):
if value:
# Convert _help_web -> --help-web
help_flag = key.replace("_help_", "help-")
argv.append(f"--{help_flag}")
continue
# Handle positional arguments (no -- prefix)
if key in [
"source", # create command
"directory",
"file",
"job_id",
"skill_directory",
"zip_file",
"input_file",
]:
if value is not None and value != "":
argv.append(str(value))
continue
# Handle flags and options
arg_name = f"--{key.replace('_', '-')}"
if isinstance(value, bool):
if value:
argv.append(arg_name)
elif isinstance(value, list):
for item in value:
argv.extend([arg_name, str(item)])
elif value is not None:
argv.extend([arg_name, str(value)])
return argv
def main(argv: list[str] | None = None) -> int:
"""Main entry point for the unified CLI.
Args:
argv: Command-line arguments (defaults to sys.argv)
Returns:
Exit code (0 for success, non-zero for error)
"""
# Special handling for analyze --preset-list (no directory required)
if argv is None:
argv = sys.argv[1:]
if len(argv) >= 2 and argv[0] == "analyze" and "--preset-list" in argv:
from skill_seekers.cli.codebase_scraper import main as analyze_main
original_argv = sys.argv.copy()
sys.argv = ["codebase_scraper.py", "--preset-list"]
try:
return analyze_main() or 0
finally:
sys.argv = original_argv
parser = create_parser()
args = parser.parse_args(argv)
if not args.command:
parser.print_help()
return 1
# Get command module
module_name = COMMAND_MODULES.get(args.command)
if not module_name:
print(f"Error: Unknown command '{args.command}'", file=sys.stderr)
parser.print_help()
return 1
# Special handling for 'analyze' command (has post-processing)
if args.command == "analyze":
return _handle_analyze_command(args)
# Standard delegation for all other commands
try:
# Import and execute command module
module = importlib.import_module(module_name)
# Reconstruct sys.argv for command module
original_argv = sys.argv.copy()
sys.argv = _reconstruct_argv(args.command, args)
# Execute command
try:
result = module.main()
return result if result is not None else 0
finally:
sys.argv = original_argv
except KeyboardInterrupt:
print("\n\nInterrupted by user", file=sys.stderr)
return 130
except Exception as e:
error_msg = str(e) if str(e) else f"{type(e).__name__} occurred"
print(f"Error: {error_msg}", file=sys.stderr)
# Show traceback in verbose mode
import traceback
if hasattr(args, "verbose") and getattr(args, "verbose", False):
traceback.print_exc()
return 1
def _handle_analyze_command(args: argparse.Namespace) -> int:
"""Handle analyze command with special post-processing logic.
Args:
args: Parsed arguments
Returns:
Exit code
"""
from skill_seekers.cli.codebase_scraper import main as analyze_main
# Reconstruct sys.argv for analyze command
original_argv = sys.argv.copy()
sys.argv = ["codebase_scraper.py", "--directory", args.directory]
if args.output:
sys.argv.extend(["--output", args.output])
# Handle preset flags (depth and features)
if args.quick:
sys.argv.extend(
[
"--depth",
"surface",
"--skip-patterns",
"--skip-test-examples",
"--skip-how-to-guides",
"--skip-config-patterns",
]
)
elif args.comprehensive:
sys.argv.extend(["--depth", "full"])
elif args.depth:
sys.argv.extend(["--depth", args.depth])
# Determine enhance_level (simplified - use default or override)
enhance_level = getattr(args, "enhance_level", 2) # Default is 2
if getattr(args, "quick", False):
enhance_level = 0 # Quick mode disables enhancement
sys.argv.extend(["--enhance-level", str(enhance_level)])
# Pass through remaining arguments
if args.languages:
sys.argv.extend(["--languages", args.languages])
if args.file_patterns:
sys.argv.extend(["--file-patterns", args.file_patterns])
if args.skip_api_reference:
sys.argv.append("--skip-api-reference")
if args.skip_dependency_graph:
sys.argv.append("--skip-dependency-graph")
if args.skip_patterns:
sys.argv.append("--skip-patterns")
if args.skip_test_examples:
sys.argv.append("--skip-test-examples")
if args.skip_how_to_guides:
sys.argv.append("--skip-how-to-guides")
if args.skip_config_patterns:
sys.argv.append("--skip-config-patterns")
if args.skip_docs:
sys.argv.append("--skip-docs")
if args.no_comments:
sys.argv.append("--no-comments")
if args.verbose:
sys.argv.append("--verbose")
if getattr(args, "quiet", False):
sys.argv.append("--quiet")
if getattr(args, "dry_run", False):
sys.argv.append("--dry-run")
if getattr(args, "preset", None):
sys.argv.extend(["--preset", args.preset])
if getattr(args, "name", None):
sys.argv.extend(["--name", args.name])
if getattr(args, "description", None):
sys.argv.extend(["--description", args.description])
if getattr(args, "api_key", None):
sys.argv.extend(["--api-key", args.api_key])
# Enhancement Workflow arguments
if getattr(args, "enhance_workflow", None):
for wf in args.enhance_workflow:
sys.argv.extend(["--enhance-workflow", wf])
if getattr(args, "enhance_stage", None):
for stage in args.enhance_stage:
sys.argv.extend(["--enhance-stage", stage])
if getattr(args, "var", None):
for var in args.var:
sys.argv.extend(["--var", var])
if getattr(args, "workflow_dry_run", False):
sys.argv.append("--workflow-dry-run")
try:
result = analyze_main() or 0
# Enhance SKILL.md if enhance_level >= 1
if result == 0 and enhance_level >= 1:
skill_dir = Path(args.output)
skill_md = skill_dir / "SKILL.md"
if skill_md.exists():
print("\n" + "=" * 60)
print(f"ENHANCING SKILL.MD WITH AI (Level {enhance_level})")
print("=" * 60 + "\n")
try:
from skill_seekers.cli.enhance_command import (
_is_root,
_pick_mode,
_run_api_mode,
_run_local_mode,
)
import argparse as _ap
_fake_args = _ap.Namespace(
skill_directory=str(skill_dir),
target=None,
api_key=None,
dry_run=False,
agent=None,
agent_cmd=None,
interactive_enhancement=False,
background=False,
daemon=False,
no_force=False,
timeout=600,
)
_mode, _target = _pick_mode(_fake_args)
if _mode == "api":
print(f"\n🤖 Enhancement mode: API ({_target})")
success = _run_api_mode(_fake_args, _target) == 0
elif _is_root():
print("\n⚠️ Skipping SKILL.md enhancement: running as root")
print(" Set ANTHROPIC_API_KEY / GOOGLE_API_KEY to enable API mode")
success = False
else:
print("\n🤖 Enhancement mode: LOCAL (Claude Code CLI)")
success = _run_local_mode(_fake_args) == 0
if success:
print("\n✅ SKILL.md enhancement complete!")
with open(skill_md) as f:
lines = len(f.readlines())
print(f" Enhanced SKILL.md: {lines} lines")
else:
print("\n⚠️ SKILL.md enhancement did not complete")
print(" You can retry with: skill-seekers enhance " + str(skill_dir))
except Exception as e:
print(f"\n⚠️ SKILL.md enhancement failed: {e}")
print(" You can retry with: skill-seekers enhance " + str(skill_dir))
else:
print(f"\n⚠️ SKILL.md not found at {skill_md}, skipping enhancement")
return result
finally:
sys.argv = original_argv
if __name__ == "__main__":
sys.exit(main())