feat(cli): Phase 2 - Organize RAG arguments into common.py (DRY principle)

Changes:
- Added RAG_ARGUMENTS dict to common.py with 3 flags:
  - --chunk-for-rag (enable semantic chunking)
  - --chunk-size (default: 512 tokens)
  - --chunk-overlap (default: 50 tokens)
- Removed duplicate RAG arguments from create.py and scrape.py
- Used .update() pattern to merge RAG_ARGUMENTS into UNIVERSAL_ARGUMENTS and SCRAPE_ARGUMENTS
- Added helper functions: add_rag_arguments(), get_rag_argument_names()
- Updated tests to reflect new argument count (15 → 13 universal arguments)
- Fixed test expectations for boolean_args (removed 'enhance', 'enhance_local')

Result:
- Single source of truth for RAG arguments in common.py
- DRY principle maintained across all commands
- All 88 key tests passing

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
yusyus
2026-02-15 14:41:04 +03:00
parent ba1670a220
commit 13838cb5a9
4 changed files with 81 additions and 62 deletions

View File

@@ -68,6 +68,37 @@ COMMON_ARGUMENTS: Dict[str, Dict[str, Any]] = {
}
# RAG (Retrieval-Augmented Generation) arguments
# These are shared across commands that support RAG chunking
RAG_ARGUMENTS: Dict[str, Dict[str, Any]] = {
"chunk_for_rag": {
"flags": ("--chunk-for-rag",),
"kwargs": {
"action": "store_true",
"help": "Enable semantic chunking for RAG pipelines",
},
},
"chunk_size": {
"flags": ("--chunk-size",),
"kwargs": {
"type": int,
"default": 512,
"metavar": "TOKENS",
"help": "Chunk size in tokens for RAG (default: 512)",
},
},
"chunk_overlap": {
"flags": ("--chunk-overlap",),
"kwargs": {
"type": int,
"default": 50,
"metavar": "TOKENS",
"help": "Overlap between chunks in tokens (default: 50)",
},
},
}
def add_common_arguments(parser: argparse.ArgumentParser) -> None:
"""Add common arguments to a parser.
@@ -89,13 +120,41 @@ def add_common_arguments(parser: argparse.ArgumentParser) -> None:
def get_common_argument_names() -> set:
"""Get the set of common argument destination names.
Returns:
Set of argument dest names (e.g., {'config', 'name', 'description', ...})
"""
return set(COMMON_ARGUMENTS.keys())
def add_rag_arguments(parser: argparse.ArgumentParser) -> None:
"""Add RAG (Retrieval-Augmented Generation) arguments to a parser.
These arguments enable semantic chunking for RAG pipelines.
Args:
parser: The ArgumentParser to add arguments to
Example:
>>> parser = argparse.ArgumentParser()
>>> add_rag_arguments(parser)
>>> # Now parser has --chunk-for-rag, --chunk-size, --chunk-overlap
"""
for arg_name, arg_def in RAG_ARGUMENTS.items():
flags = arg_def["flags"]
kwargs = arg_def["kwargs"]
parser.add_argument(*flags, **kwargs)
def get_rag_argument_names() -> set:
"""Get the set of RAG argument destination names.
Returns:
Set of argument dest names (e.g., {'chunk_for_rag', 'chunk_size', 'chunk_overlap'})
"""
return set(RAG_ARGUMENTS.keys())
def get_argument_help(arg_name: str) -> str:
"""Get the help text for a common argument.

View File

@@ -13,6 +13,7 @@ import argparse
from typing import Dict, Any, Set, List
from skill_seekers.cli.constants import DEFAULT_RATE_LIMIT
from .common import RAG_ARGUMENTS
# =============================================================================
@@ -91,32 +92,8 @@ UNIVERSAL_ARGUMENTS: Dict[str, Dict[str, Any]] = {
"help": "Minimize output (WARNING level only)",
},
},
# RAG features (NEW - universal for all sources!)
"chunk_for_rag": {
"flags": ("--chunk-for-rag",),
"kwargs": {
"action": "store_true",
"help": "Enable semantic chunking for RAG pipelines (all sources)",
},
},
"chunk_size": {
"flags": ("--chunk-size",),
"kwargs": {
"type": int,
"default": 512,
"metavar": "TOKENS",
"help": "Chunk size in tokens for RAG (default: 512)",
},
},
"chunk_overlap": {
"flags": ("--chunk-overlap",),
"kwargs": {
"type": int,
"default": 50,
"metavar": "TOKENS",
"help": "Overlap between chunks in tokens (default: 50)",
},
},
# RAG features (imported from common.py - see RAG_ARGUMENTS)
# Note: RAG arguments are merged into UNIVERSAL_ARGUMENTS at runtime
# Preset system
"preset": {
"flags": ("--preset",),
@@ -138,6 +115,9 @@ UNIVERSAL_ARGUMENTS: Dict[str, Dict[str, Any]] = {
},
}
# Merge RAG arguments from common.py into universal arguments
UNIVERSAL_ARGUMENTS.update(RAG_ARGUMENTS)
# =============================================================================
# TIER 2: SOURCE-SPECIFIC ARGUMENTS

View File

@@ -11,6 +11,7 @@ import argparse
from typing import Dict, Any
from skill_seekers.cli.constants import DEFAULT_RATE_LIMIT
from .common import RAG_ARGUMENTS
# Scrape-specific argument definitions as data structure
@@ -177,32 +178,8 @@ SCRAPE_ARGUMENTS: Dict[str, Dict[str, Any]] = {
"help": "Minimize output (WARNING level logging only)",
},
},
# RAG chunking options (v2.10.0)
"chunk_for_rag": {
"flags": ("--chunk-for-rag",),
"kwargs": {
"action": "store_true",
"help": "Enable semantic chunking for RAG pipelines (generates rag_chunks.json)",
},
},
"chunk_size": {
"flags": ("--chunk-size",),
"kwargs": {
"type": int,
"default": 512,
"metavar": "TOKENS",
"help": "Target chunk size in tokens for RAG (default: 512)",
},
},
"chunk_overlap": {
"flags": ("--chunk-overlap",),
"kwargs": {
"type": int,
"default": 50,
"metavar": "TOKENS",
"help": "Overlap size between chunks in tokens (default: 50)",
},
},
# RAG chunking options (imported from common.py - see RAG_ARGUMENTS)
# Note: RAG arguments will be merged at runtime
"no_preserve_code_blocks": {
"flags": ("--no-preserve-code-blocks",),
"kwargs": {
@@ -219,6 +196,9 @@ SCRAPE_ARGUMENTS: Dict[str, Dict[str, Any]] = {
},
}
# Merge RAG arguments from common.py
SCRAPE_ARGUMENTS.update(RAG_ARGUMENTS)
def add_scrape_arguments(parser: argparse.ArgumentParser) -> None:
"""Add all scrape command arguments to a parser.