fix: unify scraper argument interface and fix create command forwarding

All scrapers (scrape, github, analyze, pdf) now share a common argument
contract via add_all_standard_arguments() in arguments/common.py.
Universal flags (--dry-run, --verbose, --quiet, --name, --description,
workflow args) work consistently across all source types.

Previously, `create <url> --dry-run`, `create owner/repo --dry-run`,
and `create ./path --dry-run` would crash because sub-scrapers didn't
accept those flags. Also fixes main.py _handle_analyze_command() not
forwarding --dry-run, --preset, --quiet, --name, --description to
codebase_scraper.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
YusufKaraaslanSpyke
2026-02-23 20:56:13 +03:00
parent 022b8a440c
commit 3adc5a8c1d
13 changed files with 431 additions and 505 deletions

View File

@@ -5,13 +5,21 @@ Both codebase_scraper.py (standalone) and parsers/analyze_parser.py (unified CLI
import and use these definitions.
Includes preset system support for #268.
Shared arguments (name, description, output, enhance-level, api-key,
dry-run, verbose, quiet, workflow args) come from common.py / workflow.py
via ``add_all_standard_arguments()``.
"""
import argparse
from typing import Any
from .workflow import WORKFLOW_ARGUMENTS
from .common import add_all_standard_arguments
# Analyze-specific argument definitions as data structure
# NOTE: Shared args (name, description, output, enhance_level, api_key, dry_run,
# verbose, quiet, workflow args) are registered by add_all_standard_arguments().
# The default enhance_level for analyze is 0 (overridden after registration).
ANALYZE_ARGUMENTS: dict[str, dict[str, Any]] = {
# Core options
"directory": {
@@ -23,15 +31,6 @@ ANALYZE_ARGUMENTS: dict[str, dict[str, Any]] = {
"metavar": "DIR",
},
},
"output": {
"flags": ("--output",),
"kwargs": {
"type": str,
"default": "output/codebase/",
"help": "Output directory (default: output/codebase/)",
"metavar": "DIR",
},
},
# Preset system (Issue #268)
"preset": {
"flags": ("--preset",),
@@ -91,21 +90,6 @@ ANALYZE_ARGUMENTS: dict[str, dict[str, Any]] = {
"metavar": "PATTERNS",
},
},
# Enhancement options
"enhance_level": {
"flags": ("--enhance-level",),
"kwargs": {
"type": int,
"choices": [0, 1, 2, 3],
"default": 2,
"help": (
"AI enhancement level (auto-detects API vs LOCAL mode): "
"0=disabled, 1=SKILL.md only, 2=+architecture/config (default), 3=full enhancement. "
"Mode selection: uses API if ANTHROPIC_API_KEY is set, otherwise LOCAL (Claude Code)"
),
"metavar": "LEVEL",
},
},
# Feature skip options
"skip_api_reference": {
"flags": ("--skip-api-reference",),
@@ -163,38 +147,32 @@ ANALYZE_ARGUMENTS: dict[str, dict[str, Any]] = {
"help": "Skip comment extraction",
},
},
# Output options
"verbose": {
"flags": ("--verbose",),
"kwargs": {
"action": "store_true",
"help": "Enable verbose logging",
},
},
# Dry-run and API key (parity with scrape/github/pdf)
"dry_run": {
"flags": ("--dry-run",),
"kwargs": {
"action": "store_true",
"help": "Preview what will be analyzed without creating output",
},
},
"api_key": {
"flags": ("--api-key",),
"kwargs": {
"type": str,
"help": "Anthropic API key (or set ANTHROPIC_API_KEY env var)",
"metavar": "KEY",
},
},
}
# Add workflow arguments (enhance_workflow, enhance_stage, var, workflow_dry_run, workflow_history)
ANALYZE_ARGUMENTS.update(WORKFLOW_ARGUMENTS)
def add_analyze_arguments(parser: argparse.ArgumentParser) -> None:
"""Add all analyze command arguments to a parser."""
"""Add all analyze command arguments to a parser.
Registers shared args (name, description, output, enhance-level, api-key,
dry-run, verbose, quiet, workflow args) via add_all_standard_arguments(),
then adds analyze-specific args on top.
The default for --enhance-level is overridden to 0 (off) for analyze,
and --output default is set to 'output/codebase/'.
"""
# Shared universal args first
add_all_standard_arguments(parser)
# Override defaults that differ for the analyze command
# enhance-level defaults to 0 (off) for codebase analysis
for action in parser._actions:
if hasattr(action, "dest"):
if action.dest == "enhance_level":
action.default = 0
elif action.dest == "output":
action.default = "output/codebase/"
# Analyze-specific args
for arg_name, arg_def in ANALYZE_ARGUMENTS.items():
flags = arg_def["flags"]
kwargs = arg_def["kwargs"]
@@ -203,4 +181,6 @@ def add_analyze_arguments(parser: argparse.ArgumentParser) -> None:
def get_analyze_argument_names() -> set:
"""Get the set of analyze argument destination names."""
return set(ANALYZE_ARGUMENTS.keys())
from .common import get_all_standard_argument_names
return get_all_standard_argument_names() | set(ANALYZE_ARGUMENTS.keys())

View File

@@ -2,6 +2,14 @@
These arguments are used by most commands (scrape, github, pdf, analyze, etc.)
and provide consistent behavior for configuration, output control, and help.
Hierarchy:
COMMON_ARGUMENTS - Identity + enhancement (name, description, output, enhance-level, api-key)
BEHAVIOR_ARGUMENTS - Runtime behavior (dry-run, verbose, quiet)
WORKFLOW_ARGUMENTS - Enhancement workflows (from workflow.py)
add_all_standard_arguments(parser) - Registers all three groups at once.
Every scraper should call this so the `create` command can forward flags safely.
"""
import argparse
@@ -10,14 +18,6 @@ from typing import Any
# Common argument definitions as data structure
# These are arguments that appear in MULTIPLE commands
COMMON_ARGUMENTS: dict[str, dict[str, Any]] = {
"config": {
"flags": ("--config", "-c"),
"kwargs": {
"type": str,
"help": "Load configuration from JSON file (e.g., configs/react.json)",
"metavar": "FILE",
},
},
"name": {
"flags": ("--name",),
"kwargs": {
@@ -66,6 +66,31 @@ COMMON_ARGUMENTS: dict[str, dict[str, Any]] = {
},
}
# Behavior arguments — runtime flags shared by every scraper
BEHAVIOR_ARGUMENTS: dict[str, dict[str, Any]] = {
"dry_run": {
"flags": ("--dry-run",),
"kwargs": {
"action": "store_true",
"help": "Preview what will happen without actually executing",
},
},
"verbose": {
"flags": ("--verbose", "-v"),
"kwargs": {
"action": "store_true",
"help": "Enable verbose output (DEBUG level logging)",
},
},
"quiet": {
"flags": ("--quiet", "-q"),
"kwargs": {
"action": "store_true",
"help": "Minimize output (WARNING level logging only)",
},
},
}
# RAG (Retrieval-Augmented Generation) arguments
# These are shared across commands that support RAG chunking
RAG_ARGUMENTS: dict[str, dict[str, Any]] = {
@@ -108,7 +133,7 @@ def add_common_arguments(parser: argparse.ArgumentParser) -> None:
Example:
>>> parser = argparse.ArgumentParser()
>>> add_common_arguments(parser)
>>> # Now parser has --config, --name, --description, etc.
>>> # Now parser has --name, --description, etc.
"""
for arg_name, arg_def in COMMON_ARGUMENTS.items():
flags = arg_def["flags"]
@@ -116,11 +141,33 @@ def add_common_arguments(parser: argparse.ArgumentParser) -> None:
parser.add_argument(*flags, **kwargs)
def add_behavior_arguments(parser: argparse.ArgumentParser) -> None:
"""Add behavior arguments (--dry-run, --verbose, --quiet) to a parser."""
for arg_name, arg_def in BEHAVIOR_ARGUMENTS.items():
flags = arg_def["flags"]
kwargs = arg_def["kwargs"]
parser.add_argument(*flags, **kwargs)
def add_all_standard_arguments(parser: argparse.ArgumentParser) -> None:
"""Add common + behavior + workflow arguments to a parser.
This is the ONE call every scraper should make to accept all universal flags
that the ``create`` command may forward.
"""
add_common_arguments(parser)
add_behavior_arguments(parser)
# Import here to avoid circular imports
from .workflow import add_workflow_arguments
add_workflow_arguments(parser)
def get_common_argument_names() -> set:
"""Get the set of common argument destination names.
Returns:
Set of argument dest names (e.g., {'config', 'name', 'description', ...})
Set of argument dest names (e.g., {'name', 'description', ...})
"""
return set(COMMON_ARGUMENTS.keys())
@@ -153,16 +200,34 @@ def get_rag_argument_names() -> set:
return set(RAG_ARGUMENTS.keys())
def get_behavior_argument_names() -> set:
"""Get the set of behavior argument destination names."""
return set(BEHAVIOR_ARGUMENTS.keys())
def get_all_standard_argument_names() -> set:
"""Get the combined set of common + behavior + workflow dest names."""
from .workflow import WORKFLOW_ARGUMENTS
return (
set(COMMON_ARGUMENTS.keys())
| set(BEHAVIOR_ARGUMENTS.keys())
| set(WORKFLOW_ARGUMENTS.keys())
)
def get_argument_help(arg_name: str) -> str:
"""Get the help text for a common argument.
"""Get the help text for a common or behavior argument.
Args:
arg_name: Name of the argument (e.g., 'config')
arg_name: Name of the argument (e.g., 'name', 'dry_run')
Returns:
Help text string
Raises:
KeyError: If argument doesn't exist
KeyError: If argument doesn't exist in either dict
"""
return COMMON_ARGUMENTS[arg_name]["kwargs"]["help"]
if arg_name in COMMON_ARGUMENTS:
return COMMON_ARGUMENTS[arg_name]["kwargs"]["help"]
return BEHAVIOR_ARGUMENTS[arg_name]["kwargs"]["help"]

View File

@@ -5,12 +5,20 @@ Both github_scraper.py (standalone) and parsers/github_parser.py (unified CLI)
import and use these definitions.
This ensures the parsers NEVER drift out of sync.
Shared arguments (name, description, output, enhance-level, api-key,
dry-run, verbose, quiet, workflow args) come from common.py / workflow.py
via ``add_all_standard_arguments()``.
"""
import argparse
from typing import Any
from .common import add_all_standard_arguments
# GitHub-specific argument definitions as data structure
# NOTE: Shared args (name, description, enhance_level, api_key, dry_run,
# verbose, quiet, workflow args) are registered by add_all_standard_arguments().
GITHUB_ARGUMENTS: dict[str, dict[str, Any]] = {
# Core GitHub options
"repo": {
@@ -37,22 +45,6 @@ GITHUB_ARGUMENTS: dict[str, dict[str, Any]] = {
"metavar": "TOKEN",
},
},
"name": {
"flags": ("--name",),
"kwargs": {
"type": str,
"help": "Skill name (default: repo name)",
"metavar": "NAME",
},
},
"description": {
"flags": ("--description",),
"kwargs": {
"type": str,
"help": "Skill description",
"metavar": "TEXT",
},
},
# Content options
"no_issues": {
"flags": ("--no-issues",),
@@ -92,61 +84,6 @@ GITHUB_ARGUMENTS: dict[str, dict[str, Any]] = {
"help": "Only scrape, don't build skill",
},
},
# Enhancement options
"enhance_level": {
"flags": ("--enhance-level",),
"kwargs": {
"type": int,
"choices": [0, 1, 2, 3],
"default": 2,
"help": (
"AI enhancement level (auto-detects API vs LOCAL mode): "
"0=disabled, 1=SKILL.md only, 2=+architecture/config (default), 3=full enhancement. "
"Mode selection: uses API if ANTHROPIC_API_KEY is set, otherwise LOCAL (Claude Code)"
),
"metavar": "LEVEL",
},
},
"api_key": {
"flags": ("--api-key",),
"kwargs": {
"type": str,
"help": "Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)",
"metavar": "KEY",
},
},
# Enhancement Workflow arguments (NEW - Phase 2)
"enhance_workflow": {
"flags": ("--enhance-workflow",),
"kwargs": {
"action": "append",
"help": "Apply enhancement workflow (file path or preset: security-focus, minimal, api-documentation, architecture-comprehensive). Can use multiple times to chain workflows.",
"metavar": "WORKFLOW",
},
},
"enhance_stage": {
"flags": ("--enhance-stage",),
"kwargs": {
"action": "append",
"help": "Add inline enhancement stage ('name:prompt'). Can use multiple times.",
"metavar": "STAGE",
},
},
"var": {
"flags": ("--var",),
"kwargs": {
"action": "append",
"help": "Override workflow variable ('key=value'). Can use multiple times.",
"metavar": "VAR",
},
},
"workflow_dry_run": {
"flags": ("--workflow-dry-run",),
"kwargs": {
"action": "store_true",
"help": "Preview workflow without executing (requires --enhance-workflow)",
},
},
# Mode options
"non_interactive": {
"flags": ("--non-interactive",),
@@ -182,6 +119,10 @@ def add_github_arguments(parser: argparse.ArgumentParser) -> None:
- github_scraper.py (standalone scraper)
- parsers/github_parser.py (unified CLI)
Registers shared args (name, description, output, enhance-level, api-key,
dry-run, verbose, quiet, workflow args) via add_all_standard_arguments(),
then adds GitHub-specific args on top.
Args:
parser: The ArgumentParser to add arguments to
@@ -189,6 +130,10 @@ def add_github_arguments(parser: argparse.ArgumentParser) -> None:
>>> parser = argparse.ArgumentParser()
>>> add_github_arguments(parser) # Adds all github args
"""
# Shared universal args first
add_all_standard_arguments(parser)
# GitHub-specific args
for arg_name, arg_def in GITHUB_ARGUMENTS.items():
flags = arg_def["flags"]
kwargs = arg_def["kwargs"]
@@ -199,9 +144,11 @@ def get_github_argument_names() -> set:
"""Get the set of github argument destination names.
Returns:
Set of argument dest names
Set of argument dest names (includes shared + github-specific)
"""
return set(GITHUB_ARGUMENTS.keys())
from .common import get_all_standard_argument_names
return get_all_standard_argument_names() | set(GITHUB_ARGUMENTS.keys())
def get_github_argument_count() -> int:
@@ -210,4 +157,12 @@ def get_github_argument_count() -> int:
Returns:
Number of arguments
"""
return len(GITHUB_ARGUMENTS)
from .common import COMMON_ARGUMENTS, BEHAVIOR_ARGUMENTS
from .workflow import WORKFLOW_ARGUMENTS
return (
len(GITHUB_ARGUMENTS)
+ len(COMMON_ARGUMENTS)
+ len(BEHAVIOR_ARGUMENTS)
+ len(WORKFLOW_ARGUMENTS)
)

View File

@@ -3,11 +3,20 @@
This module defines ALL arguments for the pdf command in ONE place.
Both pdf_scraper.py (standalone) and parsers/pdf_parser.py (unified CLI)
import and use these definitions.
Shared arguments (name, description, output, enhance-level, api-key,
dry-run, verbose, quiet, workflow args) come from common.py / workflow.py
via ``add_all_standard_arguments()``.
"""
import argparse
from typing import Any
from .common import add_all_standard_arguments
# PDF-specific argument definitions as data structure
# NOTE: Shared args (name, description, output, enhance_level, api_key, dry_run,
# verbose, quiet, workflow args) are registered by add_all_standard_arguments().
PDF_ARGUMENTS: dict[str, dict[str, Any]] = {
"config": {
"flags": ("--config",),
@@ -25,22 +34,6 @@ PDF_ARGUMENTS: dict[str, dict[str, Any]] = {
"metavar": "PATH",
},
},
"name": {
"flags": ("--name",),
"kwargs": {
"type": str,
"help": "Skill name (used with --pdf)",
"metavar": "NAME",
},
},
"description": {
"flags": ("--description",),
"kwargs": {
"type": str,
"help": "Skill description",
"metavar": "TEXT",
},
},
"from_json": {
"flags": ("--from-json",),
"kwargs": {
@@ -49,67 +42,32 @@ PDF_ARGUMENTS: dict[str, dict[str, Any]] = {
"metavar": "FILE",
},
},
# Enhancement Workflow arguments (NEW - Phase 2)
"enhance_workflow": {
"flags": ("--enhance-workflow",),
"kwargs": {
"action": "append",
"help": "Apply enhancement workflow (file path or preset: security-focus, minimal, api-documentation, architecture-comprehensive). Can use multiple times to chain workflows.",
"metavar": "WORKFLOW",
},
},
"enhance_stage": {
"flags": ("--enhance-stage",),
"kwargs": {
"action": "append",
"help": "Add inline enhancement stage ('name:prompt'). Can use multiple times.",
"metavar": "STAGE",
},
},
"var": {
"flags": ("--var",),
"kwargs": {
"action": "append",
"help": "Override workflow variable ('key=value'). Can use multiple times.",
"metavar": "VAR",
},
},
"workflow_dry_run": {
"flags": ("--workflow-dry-run",),
"kwargs": {
"action": "store_true",
"help": "Preview workflow without executing (requires --enhance-workflow)",
},
},
# API key (parity with scrape/github/analyze)
"api_key": {
"flags": ("--api-key",),
"kwargs": {
"type": str,
"help": "Anthropic API key (or set ANTHROPIC_API_KEY env var)",
"metavar": "KEY",
},
},
# Enhancement level
"enhance_level": {
"flags": ("--enhance-level",),
"kwargs": {
"type": int,
"choices": [0, 1, 2, 3],
"default": 0,
"help": (
"AI enhancement level (auto-detects API vs LOCAL mode): "
"0=disabled (default for PDF), 1=SKILL.md only, 2=+architecture/config, 3=full enhancement. "
"Mode selection: uses API if ANTHROPIC_API_KEY is set, otherwise LOCAL (Claude Code)"
),
"metavar": "LEVEL",
},
},
}
def add_pdf_arguments(parser: argparse.ArgumentParser) -> None:
"""Add all pdf command arguments to a parser."""
"""Add all pdf command arguments to a parser.
Registers shared args (name, description, output, enhance-level, api-key,
dry-run, verbose, quiet, workflow args) via add_all_standard_arguments(),
then adds PDF-specific args on top.
The default for --enhance-level is overridden to 0 (disabled) for PDF.
"""
# Shared universal args first
add_all_standard_arguments(parser)
# Override enhance-level default to 0 for PDF
for action in parser._actions:
if hasattr(action, "dest") and action.dest == "enhance_level":
action.default = 0
action.help = (
"AI enhancement level (auto-detects API vs LOCAL mode): "
"0=disabled (default for PDF), 1=SKILL.md only, 2=+architecture/config, 3=full enhancement. "
"Mode selection: uses API if ANTHROPIC_API_KEY is set, otherwise LOCAL (Claude Code)"
)
# PDF-specific args
for arg_name, arg_def in PDF_ARGUMENTS.items():
flags = arg_def["flags"]
kwargs = arg_def["kwargs"]

View File

@@ -5,16 +5,21 @@ Both doc_scraper.py (standalone) and parsers/scrape_parser.py (unified CLI)
import and use these definitions.
This ensures the parsers NEVER drift out of sync.
Shared arguments (name, description, output, enhance-level, api-key,
dry-run, verbose, quiet, workflow args) come from common.py / workflow.py
via ``add_all_standard_arguments()``.
"""
import argparse
from typing import Any
from skill_seekers.cli.constants import DEFAULT_RATE_LIMIT
from .common import RAG_ARGUMENTS
from .common import add_all_standard_arguments, RAG_ARGUMENTS
# Scrape-specific argument definitions as data structure
# This enables introspection for UI generation and testing
# NOTE: Shared args (name, description, enhance_level, api_key, dry_run,
# verbose, quiet, workflow args) are registered by add_all_standard_arguments().
SCRAPE_ARGUMENTS: dict[str, dict[str, Any]] = {
# Positional argument
"url_positional": {
@@ -25,7 +30,7 @@ SCRAPE_ARGUMENTS: dict[str, dict[str, Any]] = {
"help": "Base documentation URL (alternative to --url)",
},
},
# Common arguments (also defined in common.py for other commands)
# Config file (scrape-specific — loads selectors, categories, etc.)
"config": {
"flags": ("--config", "-c"),
"kwargs": {
@@ -34,77 +39,6 @@ SCRAPE_ARGUMENTS: dict[str, dict[str, Any]] = {
"metavar": "FILE",
},
},
"name": {
"flags": ("--name",),
"kwargs": {
"type": str,
"help": "Skill name (used for output directory and filenames)",
"metavar": "NAME",
},
},
"description": {
"flags": ("--description", "-d"),
"kwargs": {
"type": str,
"help": "Skill description (used in SKILL.md)",
"metavar": "TEXT",
},
},
# Enhancement arguments
"enhance_level": {
"flags": ("--enhance-level",),
"kwargs": {
"type": int,
"choices": [0, 1, 2, 3],
"default": 2,
"help": (
"AI enhancement level (auto-detects API vs LOCAL mode): "
"0=disabled, 1=SKILL.md only, 2=+architecture/config (default), 3=full enhancement. "
"Mode selection: uses API if ANTHROPIC_API_KEY is set, otherwise LOCAL (Claude Code)"
),
"metavar": "LEVEL",
},
},
"api_key": {
"flags": ("--api-key",),
"kwargs": {
"type": str,
"help": "Anthropic API key for --enhance (or set ANTHROPIC_API_KEY env var)",
"metavar": "KEY",
},
},
# Enhancement Workflow arguments (NEW - Phase 2)
"enhance_workflow": {
"flags": ("--enhance-workflow",),
"kwargs": {
"action": "append",
"help": "Apply enhancement workflow (file path or preset: security-focus, minimal, api-documentation, architecture-comprehensive). Can use multiple times to chain workflows.",
"metavar": "WORKFLOW",
},
},
"enhance_stage": {
"flags": ("--enhance-stage",),
"kwargs": {
"action": "append",
"help": "Add inline enhancement stage ('name:prompt'). Can use multiple times.",
"metavar": "STAGE",
},
},
"var": {
"flags": ("--var",),
"kwargs": {
"action": "append",
"help": "Override workflow variable ('key=value'). Can use multiple times.",
"metavar": "VAR",
},
},
"workflow_dry_run": {
"flags": ("--workflow-dry-run",),
"kwargs": {
"action": "store_true",
"help": "Preview workflow without executing (requires --enhance-workflow)",
},
},
# Scrape-specific options
"interactive": {
"flags": ("--interactive", "-i"),
@@ -136,13 +70,6 @@ SCRAPE_ARGUMENTS: dict[str, dict[str, Any]] = {
"help": "Skip scraping, use existing data",
},
},
"dry_run": {
"flags": ("--dry-run",),
"kwargs": {
"action": "store_true",
"help": "Preview what will be scraped without actually scraping",
},
},
"resume": {
"flags": ("--resume",),
"kwargs": {
@@ -195,20 +122,6 @@ SCRAPE_ARGUMENTS: dict[str, dict[str, Any]] = {
"help": "Open terminal window for enhancement (use with --enhance-local)",
},
},
"verbose": {
"flags": ("--verbose", "-v"),
"kwargs": {
"action": "store_true",
"help": "Enable verbose output (DEBUG level logging)",
},
},
"quiet": {
"flags": ("--quiet", "-q"),
"kwargs": {
"action": "store_true",
"help": "Minimize output (WARNING level logging only)",
},
},
# RAG chunking options (imported from common.py - see RAG_ARGUMENTS)
# Note: RAG arguments will be merged at runtime
"no_preserve_code_blocks": {
@@ -239,13 +152,21 @@ def add_scrape_arguments(parser: argparse.ArgumentParser) -> None:
- doc_scraper.py (standalone scraper)
- parsers/scrape_parser.py (unified CLI)
Registers shared args (name, description, output, enhance-level, api-key,
dry-run, verbose, quiet, workflow args) via add_all_standard_arguments(),
then adds scrape-specific args on top.
Args:
parser: The ArgumentParser to add arguments to
Example:
>>> parser = argparse.ArgumentParser()
>>> add_scrape_arguments(parser) # Adds all 26 scrape args
>>> add_scrape_arguments(parser)
"""
# Shared universal args first
add_all_standard_arguments(parser)
# Scrape-specific args
for arg_name, arg_def in SCRAPE_ARGUMENTS.items():
flags = arg_def["flags"]
kwargs = arg_def["kwargs"]
@@ -256,9 +177,11 @@ def get_scrape_argument_names() -> set:
"""Get the set of scrape argument destination names.
Returns:
Set of argument dest names
Set of argument dest names (includes shared + scrape-specific)
"""
return set(SCRAPE_ARGUMENTS.keys())
from .common import get_all_standard_argument_names
return get_all_standard_argument_names() | set(SCRAPE_ARGUMENTS.keys())
def get_scrape_argument_count() -> int:
@@ -267,4 +190,12 @@ def get_scrape_argument_count() -> int:
Returns:
Number of arguments
"""
return len(SCRAPE_ARGUMENTS)
from .common import COMMON_ARGUMENTS, BEHAVIOR_ARGUMENTS
from .workflow import WORKFLOW_ARGUMENTS
return (
len(SCRAPE_ARGUMENTS)
+ len(COMMON_ARGUMENTS)
+ len(BEHAVIOR_ARGUMENTS)
+ len(WORKFLOW_ARGUMENTS)
)

View File

@@ -1056,6 +1056,8 @@ def analyze_codebase(
extract_config_patterns: bool = True,
extract_docs: bool = True,
enhance_level: int = 0,
skill_name: str | None = None,
skill_description: str | None = None,
) -> dict[str, Any]:
"""
Analyze local codebase and extract code knowledge.
@@ -1075,6 +1077,8 @@ def analyze_codebase(
extract_config_patterns: Extract configuration patterns from config files (C3.4)
extract_docs: Extract and process markdown documentation files (default: True)
enhance_level: AI enhancement level (0=off, 1=SKILL.md only, 2=+config+arch+docs, 3=full)
skill_name: Optional override for skill name (default: directory name)
skill_description: Optional override for skill description
Returns:
Analysis results dictionary
@@ -1598,6 +1602,8 @@ def analyze_codebase(
extract_config_patterns=extract_config_patterns,
extract_docs=extract_docs,
docs_data=docs_data,
skill_name=skill_name,
skill_description=skill_description,
)
return results
@@ -1615,6 +1621,8 @@ def _generate_skill_md(
extract_config_patterns: bool,
extract_docs: bool = True,
docs_data: dict[str, Any] | None = None,
skill_name: str | None = None,
skill_description: str | None = None,
):
"""
Generate rich SKILL.md from codebase analysis results.
@@ -1633,10 +1641,14 @@ def _generate_skill_md(
repo_name = directory.name
# Generate skill name (lowercase, hyphens only, max 64 chars)
skill_name = repo_name.lower().replace("_", "-").replace(" ", "-")[:64]
# Use CLI override if provided, otherwise derive from directory name
if skill_name:
skill_name = skill_name.lower().replace("_", "-").replace(" ", "-")[:64]
else:
skill_name = repo_name.lower().replace("_", "-").replace(" ", "-")[:64]
# Generate description
description = f"Local codebase analysis for {repo_name}"
# Generate description (use CLI override if provided)
description = skill_description or f"Local codebase analysis for {repo_name}"
# Count files by language
language_stats = _get_language_stats(results.get("files", []))
@@ -2257,6 +2269,8 @@ def _check_deprecated_flags(args):
def main():
"""Command-line interface for codebase analysis."""
from skill_seekers.cli.arguments.analyze import add_analyze_arguments
parser = argparse.ArgumentParser(
description="Analyze local codebases and extract code knowledge",
formatter_class=argparse.RawDescriptionHelpFormatter,
@@ -2285,92 +2299,10 @@ Examples:
""",
)
parser.add_argument("--directory", required=True, help="Directory to analyze")
parser.add_argument(
"--output", default="output/codebase/", help="Output directory (default: output/codebase/)"
)
# Register all args from the shared definitions module
add_analyze_arguments(parser)
# Preset selection (NEW - recommended way)
parser.add_argument(
"--preset",
choices=["quick", "standard", "comprehensive"],
help="Analysis preset: quick (1-2 min), standard (5-10 min, DEFAULT), comprehensive (20-60 min)",
)
parser.add_argument(
"--preset-list", action="store_true", help="Show available presets and exit"
)
# Legacy preset flags (kept for backward compatibility)
parser.add_argument(
"--quick",
action="store_true",
help="[DEPRECATED] Quick analysis - use '--preset quick' instead",
)
parser.add_argument(
"--comprehensive",
action="store_true",
help="[DEPRECATED] Comprehensive analysis - use '--preset comprehensive' instead",
)
parser.add_argument(
"--depth",
choices=["surface", "deep", "full"],
default=None, # Don't set default here - let preset system handle it
help=(
"[DEPRECATED] Analysis depth - use --preset instead. "
"surface (basic code structure, ~1-2 min), "
"deep (code + patterns + tests, ~5-10 min, DEFAULT), "
"full (everything + AI enhancement, ~20-60 min)"
),
)
parser.add_argument(
"--languages", help="Comma-separated languages to analyze (e.g., Python,JavaScript,C++)"
)
parser.add_argument(
"--file-patterns", help="Comma-separated file patterns (e.g., *.py,src/**/*.js)"
)
parser.add_argument(
"--skip-api-reference",
action="store_true",
default=False,
help="Skip API reference markdown documentation generation (default: enabled)",
)
parser.add_argument(
"--skip-dependency-graph",
action="store_true",
default=False,
help="Skip dependency graph and circular dependency detection (default: enabled)",
)
parser.add_argument(
"--skip-patterns",
action="store_true",
default=False,
help="Skip design pattern detection (Singleton, Factory, Observer, etc.) (default: enabled)",
)
parser.add_argument(
"--skip-test-examples",
action="store_true",
default=False,
help="Skip test example extraction (instantiation, method calls, configs, etc.) (default: enabled)",
)
parser.add_argument(
"--skip-how-to-guides",
action="store_true",
default=False,
help="Skip how-to guide generation from workflow examples (default: enabled)",
)
parser.add_argument(
"--skip-config-patterns",
action="store_true",
default=False,
help="Skip configuration pattern extraction from config files (JSON, YAML, TOML, ENV, etc.) (default: enabled)",
)
parser.add_argument(
"--skip-docs",
action="store_true",
default=False,
help="Skip project documentation extraction from markdown files (README, docs/, etc.) (default: enabled)",
)
# Extra legacy arg only used by standalone CLI (not in arguments/analyze.py)
parser.add_argument(
"--ai-mode",
choices=["auto", "api", "local", "none"],
@@ -2384,61 +2316,6 @@ Examples:
"💡 TIP: Use --enhance flag instead for simpler UX!"
),
)
parser.add_argument("--no-comments", action="store_true", help="Skip comment extraction")
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
parser.add_argument(
"--enhance-level",
type=int,
choices=[0, 1, 2, 3],
default=0,
help=(
"AI enhancement level: "
"0=off (default), "
"1=SKILL.md only, "
"2=SKILL.md+Architecture+Config, "
"3=full (patterns, tests, config, architecture, SKILL.md)"
),
)
# Workflow enhancement arguments
parser.add_argument(
"--enhance-workflow",
action="append",
help=(
"Enhancement workflow to use (name or path to YAML file). "
"Can be used multiple times to chain workflows. "
"Examples: 'security-focus', 'architecture-comprehensive', "
"'.skill-seekers/my-workflow.yaml'. "
"Overrides --enhance-level when provided."
),
metavar="WORKFLOW",
)
parser.add_argument(
"--enhance-stage",
type=str,
action="append",
help=(
"Add inline enhancement stage. Format: 'name:prompt'. "
"Can be used multiple times. Example: "
"--enhance-stage 'security:Analyze for security issues'"
),
metavar="NAME:PROMPT",
)
parser.add_argument(
"--var",
type=str,
action="append",
help=(
"Override workflow variable. Format: 'key=value'. "
"Can be used multiple times. Example: --var focus_area=performance"
),
metavar="KEY=VALUE",
)
parser.add_argument(
"--workflow-dry-run",
action="store_true",
help="Show workflow stages without executing (dry run mode)",
)
# Check for deprecated flags
deprecated_flags = {
@@ -2506,9 +2383,40 @@ Examples:
args.depth = "deep" # Default depth
# Set logging level
if args.verbose:
if getattr(args, "quiet", False):
logging.getLogger().setLevel(logging.WARNING)
elif args.verbose:
logging.getLogger().setLevel(logging.DEBUG)
# Handle --dry-run
if getattr(args, "dry_run", False):
directory = Path(args.directory)
print(f"\n{'=' * 60}")
print(f"DRY RUN: Codebase Analysis")
print(f"{'=' * 60}")
print(f"Directory: {directory.resolve()}")
print(f"Output: {args.output}")
print(f"Preset: {preset_name}")
print(f"Depth: {args.depth or 'deep (default)'}")
print(f"Name: {getattr(args, 'name', None) or directory.name}")
print(f"Enhance: level {args.enhance_level}")
print(f"Skip flags: ", end="")
skips = []
for flag in [
"skip_api_reference",
"skip_dependency_graph",
"skip_patterns",
"skip_test_examples",
"skip_how_to_guides",
"skip_config_patterns",
"skip_docs",
]:
if getattr(args, flag, False):
skips.append(f"--{flag.replace('_', '-')}")
print(", ".join(skips) if skips else "(none)")
print(f"\n✅ Dry run complete")
return 0
# Validate directory
directory = Path(args.directory)
if not directory.exists():
@@ -2546,6 +2454,8 @@ Examples:
extract_config_patterns=not args.skip_config_patterns,
extract_docs=not args.skip_docs,
enhance_level=args.enhance_level, # AI enhancement level (0-3)
skill_name=getattr(args, "name", None),
skill_description=getattr(args, "description", None),
)
# ============================================================

View File

@@ -151,7 +151,27 @@ class CreateCommand:
# Add universal arguments
self._add_common_args(argv)
# Add web-specific arguments
# Config file (web-specific — loads selectors, categories, etc.)
if self.args.config:
argv.extend(["--config", self.args.config])
# RAG arguments (web scraper only)
if getattr(self.args, "chunk_for_rag", False):
argv.append("--chunk-for-rag")
if getattr(self.args, "chunk_size", None) and self.args.chunk_size != 512:
argv.extend(["--chunk-size", str(self.args.chunk_size)])
if getattr(self.args, "chunk_overlap", None) and self.args.chunk_overlap != 50:
argv.extend(["--chunk-overlap", str(self.args.chunk_overlap)])
# Advanced web-specific arguments
if getattr(self.args, "no_preserve_code_blocks", False):
argv.append("--no-preserve-code-blocks")
if getattr(self.args, "no_preserve_paragraphs", False):
argv.append("--no-preserve-paragraphs")
if getattr(self.args, "interactive_enhancement", False):
argv.append("--interactive-enhancement")
# Web-specific arguments
if getattr(self.args, "max_pages", None):
argv.extend(["--max-pages", str(self.args.max_pages)])
if getattr(self.args, "skip_scrape", False):
@@ -192,6 +212,10 @@ class CreateCommand:
# Add universal arguments
self._add_common_args(argv)
# Config file (github-specific)
if self.args.config:
argv.extend(["--config", self.args.config])
# Add GitHub-specific arguments
if getattr(self.args, "token", None):
argv.extend(["--token", self.args.token])
@@ -235,6 +259,10 @@ class CreateCommand:
# Add universal arguments
self._add_common_args(argv)
# Preset (local codebase scraper has preset support)
if getattr(self.args, "preset", None):
argv.extend(["--preset", self.args.preset])
# Add local-specific arguments
if getattr(self.args, "languages", None):
argv.extend(["--languages", self.args.languages])
@@ -336,10 +364,15 @@ class CreateCommand:
sys.argv = original_argv
def _add_common_args(self, argv: list[str]) -> None:
"""Add common/universal arguments to argv list.
"""Add truly universal arguments to argv list.
Args:
argv: Argument list to append to
These flags are accepted by ALL scrapers (doc, github, codebase, pdf)
because each scraper calls ``add_all_standard_arguments(parser)``
which registers: name, description, output, enhance-level, api-key,
dry-run, verbose, quiet, and workflow args.
Route-specific flags (preset, config, RAG, preserve, etc.) are
forwarded only by the _route_*() method that needs them.
"""
# Identity arguments
if self.args.name:
@@ -367,31 +400,7 @@ class CreateCommand:
if self.args.quiet:
argv.append("--quiet")
# RAG arguments (NEW - universal!)
if getattr(self.args, "chunk_for_rag", False):
argv.append("--chunk-for-rag")
if getattr(self.args, "chunk_size", None) and self.args.chunk_size != 512:
argv.extend(["--chunk-size", str(self.args.chunk_size)])
if getattr(self.args, "chunk_overlap", None) and self.args.chunk_overlap != 50:
argv.extend(["--chunk-overlap", str(self.args.chunk_overlap)])
# Preset argument
if getattr(self.args, "preset", None):
argv.extend(["--preset", self.args.preset])
# Config file
if self.args.config:
argv.extend(["--config", self.args.config])
# Advanced arguments
if getattr(self.args, "no_preserve_code_blocks", False):
argv.append("--no-preserve-code-blocks")
if getattr(self.args, "no_preserve_paragraphs", False):
argv.append("--no-preserve-paragraphs")
if getattr(self.args, "interactive_enhancement", False):
argv.append("--interactive-enhancement")
# Enhancement Workflow arguments (NEW - Phase 2)
# Enhancement Workflow arguments
if getattr(self.args, "enhance_workflow", None):
for wf in self.args.enhance_workflow:
argv.extend(["--enhance-workflow", wf])

View File

@@ -1391,6 +1391,29 @@ def main():
parser = setup_argument_parser()
args = parser.parse_args()
# Set logging level from behavior args
if getattr(args, "quiet", False):
logging.getLogger().setLevel(logging.WARNING)
elif getattr(args, "verbose", False):
logging.getLogger().setLevel(logging.DEBUG)
# Handle --dry-run
if getattr(args, "dry_run", False):
repo = args.repo or (args.config and "(from config)")
print(f"\n{'=' * 60}")
print(f"DRY RUN: GitHub Repository Analysis")
print(f"{'=' * 60}")
print(f"Repository: {repo}")
print(f"Name: {getattr(args, 'name', None) or '(auto-detect)'}")
print(f"Include issues: {not getattr(args, 'no_issues', False)}")
print(f"Include releases: {not getattr(args, 'no_releases', False)}")
print(f"Include changelog: {not getattr(args, 'no_changelog', False)}")
print(f"Max issues: {getattr(args, 'max_issues', 100)}")
print(f"Enhance level: {getattr(args, 'enhance_level', 0)}")
print(f"Profile: {getattr(args, 'profile', None) or '(default)'}")
print(f"\n✅ Dry run complete")
return 0
# Build config from args or file
if args.config:
with open(args.config, encoding="utf-8") as f:

View File

@@ -305,6 +305,30 @@ def _handle_analyze_command(args: argparse.Namespace) -> int:
sys.argv.append("--no-comments")
if args.verbose:
sys.argv.append("--verbose")
if getattr(args, "quiet", False):
sys.argv.append("--quiet")
if getattr(args, "dry_run", False):
sys.argv.append("--dry-run")
if getattr(args, "preset", None):
sys.argv.extend(["--preset", args.preset])
if getattr(args, "name", None):
sys.argv.extend(["--name", args.name])
if getattr(args, "description", None):
sys.argv.extend(["--description", args.description])
if getattr(args, "api_key", None):
sys.argv.extend(["--api-key", args.api_key])
# Enhancement Workflow arguments
if getattr(args, "enhance_workflow", None):
for wf in args.enhance_workflow:
sys.argv.extend(["--enhance-workflow", wf])
if getattr(args, "enhance_stage", None):
for stage in args.enhance_stage:
sys.argv.extend(["--enhance-stage", stage])
if getattr(args, "workflow_var", None):
for var in args.workflow_var:
sys.argv.extend(["--var", var])
if getattr(args, "workflow_dry_run", False):
sys.argv.append("--workflow-dry-run")
try:
result = analyze_main() or 0

View File

@@ -13,6 +13,7 @@ Usage:
import argparse
import json
import logging
import os
import re
import sys
@@ -644,6 +645,24 @@ def main():
args = parser.parse_args()
# Set logging level from behavior args
if getattr(args, "quiet", False):
logging.getLogger().setLevel(logging.WARNING)
elif getattr(args, "verbose", False):
logging.getLogger().setLevel(logging.DEBUG)
# Handle --dry-run
if getattr(args, "dry_run", False):
source = args.pdf or args.config or args.from_json or "(none)"
print(f"\n{'=' * 60}")
print(f"DRY RUN: PDF Extraction")
print(f"{'=' * 60}")
print(f"Source: {source}")
print(f"Name: {getattr(args, 'name', None) or '(auto-detect)'}")
print(f"Enhance level: {getattr(args, 'enhance_level', 0)}")
print(f"\n✅ Dry run complete")
return
# Validate inputs
if not (args.config or args.pdf or args.from_json):
parser.error("Must specify --config, --pdf, or --from-json")