feat(cli): Phase 2 - Organize RAG arguments into common.py (DRY principle)
Changes: - Added RAG_ARGUMENTS dict to common.py with 3 flags: - --chunk-for-rag (enable semantic chunking) - --chunk-size (default: 512 tokens) - --chunk-overlap (default: 50 tokens) - Removed duplicate RAG arguments from create.py and scrape.py - Used .update() pattern to merge RAG_ARGUMENTS into UNIVERSAL_ARGUMENTS and SCRAPE_ARGUMENTS - Added helper functions: add_rag_arguments(), get_rag_argument_names() - Updated tests to reflect new argument count (15 → 13 universal arguments) - Fixed test expectations for boolean_args (removed 'enhance', 'enhance_local') Result: - Single source of truth for RAG arguments in common.py - DRY principle maintained across all commands - All 88 key tests passing Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -68,6 +68,37 @@ COMMON_ARGUMENTS: Dict[str, Dict[str, Any]] = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# RAG (Retrieval-Augmented Generation) arguments
|
||||||
|
# These are shared across commands that support RAG chunking
|
||||||
|
RAG_ARGUMENTS: Dict[str, Dict[str, Any]] = {
|
||||||
|
"chunk_for_rag": {
|
||||||
|
"flags": ("--chunk-for-rag",),
|
||||||
|
"kwargs": {
|
||||||
|
"action": "store_true",
|
||||||
|
"help": "Enable semantic chunking for RAG pipelines",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"chunk_size": {
|
||||||
|
"flags": ("--chunk-size",),
|
||||||
|
"kwargs": {
|
||||||
|
"type": int,
|
||||||
|
"default": 512,
|
||||||
|
"metavar": "TOKENS",
|
||||||
|
"help": "Chunk size in tokens for RAG (default: 512)",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"chunk_overlap": {
|
||||||
|
"flags": ("--chunk-overlap",),
|
||||||
|
"kwargs": {
|
||||||
|
"type": int,
|
||||||
|
"default": 50,
|
||||||
|
"metavar": "TOKENS",
|
||||||
|
"help": "Overlap between chunks in tokens (default: 50)",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def add_common_arguments(parser: argparse.ArgumentParser) -> None:
|
def add_common_arguments(parser: argparse.ArgumentParser) -> None:
|
||||||
"""Add common arguments to a parser.
|
"""Add common arguments to a parser.
|
||||||
|
|
||||||
@@ -89,13 +120,41 @@ def add_common_arguments(parser: argparse.ArgumentParser) -> None:
|
|||||||
|
|
||||||
def get_common_argument_names() -> set:
|
def get_common_argument_names() -> set:
|
||||||
"""Get the set of common argument destination names.
|
"""Get the set of common argument destination names.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Set of argument dest names (e.g., {'config', 'name', 'description', ...})
|
Set of argument dest names (e.g., {'config', 'name', 'description', ...})
|
||||||
"""
|
"""
|
||||||
return set(COMMON_ARGUMENTS.keys())
|
return set(COMMON_ARGUMENTS.keys())
|
||||||
|
|
||||||
|
|
||||||
|
def add_rag_arguments(parser: argparse.ArgumentParser) -> None:
|
||||||
|
"""Add RAG (Retrieval-Augmented Generation) arguments to a parser.
|
||||||
|
|
||||||
|
These arguments enable semantic chunking for RAG pipelines.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
parser: The ArgumentParser to add arguments to
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> parser = argparse.ArgumentParser()
|
||||||
|
>>> add_rag_arguments(parser)
|
||||||
|
>>> # Now parser has --chunk-for-rag, --chunk-size, --chunk-overlap
|
||||||
|
"""
|
||||||
|
for arg_name, arg_def in RAG_ARGUMENTS.items():
|
||||||
|
flags = arg_def["flags"]
|
||||||
|
kwargs = arg_def["kwargs"]
|
||||||
|
parser.add_argument(*flags, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def get_rag_argument_names() -> set:
|
||||||
|
"""Get the set of RAG argument destination names.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Set of argument dest names (e.g., {'chunk_for_rag', 'chunk_size', 'chunk_overlap'})
|
||||||
|
"""
|
||||||
|
return set(RAG_ARGUMENTS.keys())
|
||||||
|
|
||||||
|
|
||||||
def get_argument_help(arg_name: str) -> str:
|
def get_argument_help(arg_name: str) -> str:
|
||||||
"""Get the help text for a common argument.
|
"""Get the help text for a common argument.
|
||||||
|
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ import argparse
|
|||||||
from typing import Dict, Any, Set, List
|
from typing import Dict, Any, Set, List
|
||||||
|
|
||||||
from skill_seekers.cli.constants import DEFAULT_RATE_LIMIT
|
from skill_seekers.cli.constants import DEFAULT_RATE_LIMIT
|
||||||
|
from .common import RAG_ARGUMENTS
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -91,32 +92,8 @@ UNIVERSAL_ARGUMENTS: Dict[str, Dict[str, Any]] = {
|
|||||||
"help": "Minimize output (WARNING level only)",
|
"help": "Minimize output (WARNING level only)",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# RAG features (NEW - universal for all sources!)
|
# RAG features (imported from common.py - see RAG_ARGUMENTS)
|
||||||
"chunk_for_rag": {
|
# Note: RAG arguments are merged into UNIVERSAL_ARGUMENTS at runtime
|
||||||
"flags": ("--chunk-for-rag",),
|
|
||||||
"kwargs": {
|
|
||||||
"action": "store_true",
|
|
||||||
"help": "Enable semantic chunking for RAG pipelines (all sources)",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"chunk_size": {
|
|
||||||
"flags": ("--chunk-size",),
|
|
||||||
"kwargs": {
|
|
||||||
"type": int,
|
|
||||||
"default": 512,
|
|
||||||
"metavar": "TOKENS",
|
|
||||||
"help": "Chunk size in tokens for RAG (default: 512)",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"chunk_overlap": {
|
|
||||||
"flags": ("--chunk-overlap",),
|
|
||||||
"kwargs": {
|
|
||||||
"type": int,
|
|
||||||
"default": 50,
|
|
||||||
"metavar": "TOKENS",
|
|
||||||
"help": "Overlap between chunks in tokens (default: 50)",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
# Preset system
|
# Preset system
|
||||||
"preset": {
|
"preset": {
|
||||||
"flags": ("--preset",),
|
"flags": ("--preset",),
|
||||||
@@ -138,6 +115,9 @@ UNIVERSAL_ARGUMENTS: Dict[str, Dict[str, Any]] = {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Merge RAG arguments from common.py into universal arguments
|
||||||
|
UNIVERSAL_ARGUMENTS.update(RAG_ARGUMENTS)
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# TIER 2: SOURCE-SPECIFIC ARGUMENTS
|
# TIER 2: SOURCE-SPECIFIC ARGUMENTS
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import argparse
|
|||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
|
||||||
from skill_seekers.cli.constants import DEFAULT_RATE_LIMIT
|
from skill_seekers.cli.constants import DEFAULT_RATE_LIMIT
|
||||||
|
from .common import RAG_ARGUMENTS
|
||||||
|
|
||||||
|
|
||||||
# Scrape-specific argument definitions as data structure
|
# Scrape-specific argument definitions as data structure
|
||||||
@@ -177,32 +178,8 @@ SCRAPE_ARGUMENTS: Dict[str, Dict[str, Any]] = {
|
|||||||
"help": "Minimize output (WARNING level logging only)",
|
"help": "Minimize output (WARNING level logging only)",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# RAG chunking options (v2.10.0)
|
# RAG chunking options (imported from common.py - see RAG_ARGUMENTS)
|
||||||
"chunk_for_rag": {
|
# Note: RAG arguments will be merged at runtime
|
||||||
"flags": ("--chunk-for-rag",),
|
|
||||||
"kwargs": {
|
|
||||||
"action": "store_true",
|
|
||||||
"help": "Enable semantic chunking for RAG pipelines (generates rag_chunks.json)",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"chunk_size": {
|
|
||||||
"flags": ("--chunk-size",),
|
|
||||||
"kwargs": {
|
|
||||||
"type": int,
|
|
||||||
"default": 512,
|
|
||||||
"metavar": "TOKENS",
|
|
||||||
"help": "Target chunk size in tokens for RAG (default: 512)",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"chunk_overlap": {
|
|
||||||
"flags": ("--chunk-overlap",),
|
|
||||||
"kwargs": {
|
|
||||||
"type": int,
|
|
||||||
"default": 50,
|
|
||||||
"metavar": "TOKENS",
|
|
||||||
"help": "Overlap size between chunks in tokens (default: 50)",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"no_preserve_code_blocks": {
|
"no_preserve_code_blocks": {
|
||||||
"flags": ("--no-preserve-code-blocks",),
|
"flags": ("--no-preserve-code-blocks",),
|
||||||
"kwargs": {
|
"kwargs": {
|
||||||
@@ -219,6 +196,9 @@ SCRAPE_ARGUMENTS: Dict[str, Dict[str, Any]] = {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Merge RAG arguments from common.py
|
||||||
|
SCRAPE_ARGUMENTS.update(RAG_ARGUMENTS)
|
||||||
|
|
||||||
|
|
||||||
def add_scrape_arguments(parser: argparse.ArgumentParser) -> None:
|
def add_scrape_arguments(parser: argparse.ArgumentParser) -> None:
|
||||||
"""Add all scrape command arguments to a parser.
|
"""Add all scrape command arguments to a parser.
|
||||||
|
|||||||
@@ -25,16 +25,16 @@ class TestUniversalArguments:
|
|||||||
"""Test universal argument definitions."""
|
"""Test universal argument definitions."""
|
||||||
|
|
||||||
def test_universal_count(self):
|
def test_universal_count(self):
|
||||||
"""Should have exactly 15 universal arguments."""
|
"""Should have exactly 13 universal arguments (after Phase 1 consolidation)."""
|
||||||
assert len(UNIVERSAL_ARGUMENTS) == 15
|
assert len(UNIVERSAL_ARGUMENTS) == 13
|
||||||
|
|
||||||
def test_universal_argument_names(self):
|
def test_universal_argument_names(self):
|
||||||
"""Universal arguments should have expected names."""
|
"""Universal arguments should have expected names."""
|
||||||
expected_names = {
|
expected_names = {
|
||||||
'name', 'description', 'output',
|
'name', 'description', 'output',
|
||||||
'enhance', 'enhance_local', 'enhance_level', 'api_key',
|
'enhance_level', 'api_key', # Phase 1: consolidated from enhance + enhance_local
|
||||||
'dry_run', 'verbose', 'quiet',
|
'dry_run', 'verbose', 'quiet',
|
||||||
'chunk_for_rag', 'chunk_size', 'chunk_overlap',
|
'chunk_for_rag', 'chunk_size', 'chunk_overlap', # Phase 2: RAG args from common.py
|
||||||
'preset', 'config'
|
'preset', 'config'
|
||||||
}
|
}
|
||||||
assert set(UNIVERSAL_ARGUMENTS.keys()) == expected_names
|
assert set(UNIVERSAL_ARGUMENTS.keys()) == expected_names
|
||||||
@@ -114,9 +114,9 @@ class TestArgumentHelpers:
|
|||||||
"""Should return set of universal argument names."""
|
"""Should return set of universal argument names."""
|
||||||
names = get_universal_argument_names()
|
names = get_universal_argument_names()
|
||||||
assert isinstance(names, set)
|
assert isinstance(names, set)
|
||||||
assert len(names) == 15
|
assert len(names) == 13
|
||||||
assert 'name' in names
|
assert 'name' in names
|
||||||
assert 'enhance' in names
|
assert 'enhance_level' in names # Phase 1: consolidated flag
|
||||||
|
|
||||||
def test_get_source_specific_web(self):
|
def test_get_source_specific_web(self):
|
||||||
"""Should return web-specific arguments."""
|
"""Should return web-specific arguments."""
|
||||||
@@ -158,7 +158,7 @@ class TestCompatibleArguments:
|
|||||||
|
|
||||||
# Should include universal arguments
|
# Should include universal arguments
|
||||||
assert 'name' in compatible
|
assert 'name' in compatible
|
||||||
assert 'enhance' in compatible
|
assert 'enhance_level' in compatible # Phase 1: consolidated flag
|
||||||
|
|
||||||
# Should include web-specific arguments
|
# Should include web-specific arguments
|
||||||
assert 'max_pages' in compatible
|
assert 'max_pages' in compatible
|
||||||
@@ -232,7 +232,7 @@ class TestAddCreateArguments:
|
|||||||
|
|
||||||
# Should have universal arguments
|
# Should have universal arguments
|
||||||
assert 'name' in args
|
assert 'name' in args
|
||||||
assert 'enhance' in args
|
assert 'enhance_level' in args
|
||||||
assert 'chunk_for_rag' in args
|
assert 'chunk_for_rag' in args
|
||||||
|
|
||||||
# Should not have source-specific arguments (they're not added in default mode)
|
# Should not have source-specific arguments (they're not added in default mode)
|
||||||
@@ -351,7 +351,7 @@ class TestArgumentQuality:
|
|||||||
}
|
}
|
||||||
|
|
||||||
boolean_args = [
|
boolean_args = [
|
||||||
'enhance', 'enhance_local', 'dry_run', 'verbose', 'quiet',
|
'dry_run', 'verbose', 'quiet',
|
||||||
'chunk_for_rag', 'skip_scrape', 'resume', 'fresh', 'async_mode',
|
'chunk_for_rag', 'skip_scrape', 'resume', 'fresh', 'async_mode',
|
||||||
'no_issues', 'no_changelog', 'no_releases', 'scrape_only',
|
'no_issues', 'no_changelog', 'no_releases', 'scrape_only',
|
||||||
'skip_patterns', 'skip_test_examples', 'ocr', 'no_rate_limit'
|
'skip_patterns', 'skip_test_examples', 'ocr', 'no_rate_limit'
|
||||||
|
|||||||
Reference in New Issue
Block a user