feat(cli): Phase 2 - Organize RAG arguments into common.py (DRY principle)

Changes: - Added RAG_ARGUMENTS dict to common.py with 3 flags: - --chunk-for-rag (enable semantic chunking) - --chunk-size (default: 512 tokens) - --chunk-overlap (default: 50 tokens) - Removed duplicate RAG arguments from create.py and scrape.py - Used .update() pattern to merge RAG_ARGUMENTS into UNIVERSAL_ARGUMENTS and SCRAPE_ARGUMENTS - Added helper functions: add_rag_arguments(), get_rag_argument_names() - Updated tests to reflect new argument count (15 → 13 universal arguments) - Fixed test expectations for boolean_args (removed 'enhance', 'enhance_local') Result: - Single source of truth for RAG arguments in common.py - DRY principle maintained across all commands - All 88 key tests passing Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-15 14:41:04 +03:00
parent ba1670a220
commit 13838cb5a9
4 changed files with 81 additions and 62 deletions
--- a/src/skill_seekers/cli/arguments/common.py
+++ b/src/skill_seekers/cli/arguments/common.py
@@ -68,6 +68,37 @@ COMMON_ARGUMENTS: Dict[str, Dict[str, Any]] = {
 }


+# RAG (Retrieval-Augmented Generation) arguments
+# These are shared across commands that support RAG chunking
+RAG_ARGUMENTS: Dict[str, Dict[str, Any]] = {
+    "chunk_for_rag": {
+        "flags": ("--chunk-for-rag",),
+        "kwargs": {
+            "action": "store_true",
+            "help": "Enable semantic chunking for RAG pipelines",
+        },
+    },
+    "chunk_size": {
+        "flags": ("--chunk-size",),
+        "kwargs": {
+            "type": int,
+            "default": 512,
+            "metavar": "TOKENS",
+            "help": "Chunk size in tokens for RAG (default: 512)",
+        },
+    },
+    "chunk_overlap": {
+        "flags": ("--chunk-overlap",),
+        "kwargs": {
+            "type": int,
+            "default": 50,
+            "metavar": "TOKENS",
+            "help": "Overlap between chunks in tokens (default: 50)",
+        },
+    },
+}
+
+
 def add_common_arguments(parser: argparse.ArgumentParser) -> None:
    """Add common arguments to a parser.
    
@@ -89,13 +120,41 @@ def add_common_arguments(parser: argparse.ArgumentParser) -> None:

 def get_common_argument_names() -> set:
    """Get the set of common argument destination names.
-    
+
    Returns:
        Set of argument dest names (e.g., {'config', 'name', 'description', ...})
    """
    return set(COMMON_ARGUMENTS.keys())


+def add_rag_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add RAG (Retrieval-Augmented Generation) arguments to a parser.
+
+    These arguments enable semantic chunking for RAG pipelines.
+
+    Args:
+        parser: The ArgumentParser to add arguments to
+
+    Example:
+        >>> parser = argparse.ArgumentParser()
+        >>> add_rag_arguments(parser)
+        >>> # Now parser has --chunk-for-rag, --chunk-size, --chunk-overlap
+    """
+    for arg_name, arg_def in RAG_ARGUMENTS.items():
+        flags = arg_def["flags"]
+        kwargs = arg_def["kwargs"]
+        parser.add_argument(*flags, **kwargs)
+
+
+def get_rag_argument_names() -> set:
+    """Get the set of RAG argument destination names.
+
+    Returns:
+        Set of argument dest names (e.g., {'chunk_for_rag', 'chunk_size', 'chunk_overlap'})
+    """
+    return set(RAG_ARGUMENTS.keys())
+
+
 def get_argument_help(arg_name: str) -> str:
    """Get the help text for a common argument.
    
--- a/src/skill_seekers/cli/arguments/create.py
+++ b/src/skill_seekers/cli/arguments/create.py
@@ -13,6 +13,7 @@ import argparse
 from typing import Dict, Any, Set, List

 from skill_seekers.cli.constants import DEFAULT_RATE_LIMIT
+from .common import RAG_ARGUMENTS


 # =============================================================================
@@ -91,32 +92,8 @@ UNIVERSAL_ARGUMENTS: Dict[str, Dict[str, Any]] = {
            "help": "Minimize output (WARNING level only)",
        },
    },
-    # RAG features (NEW - universal for all sources!)
-    "chunk_for_rag": {
-        "flags": ("--chunk-for-rag",),
-        "kwargs": {
-            "action": "store_true",
-            "help": "Enable semantic chunking for RAG pipelines (all sources)",
-        },
-    },
-    "chunk_size": {
-        "flags": ("--chunk-size",),
-        "kwargs": {
-            "type": int,
-            "default": 512,
-            "metavar": "TOKENS",
-            "help": "Chunk size in tokens for RAG (default: 512)",
-        },
-    },
-    "chunk_overlap": {
-        "flags": ("--chunk-overlap",),
-        "kwargs": {
-            "type": int,
-            "default": 50,
-            "metavar": "TOKENS",
-            "help": "Overlap between chunks in tokens (default: 50)",
-        },
-    },
+    # RAG features (imported from common.py - see RAG_ARGUMENTS)
+    # Note: RAG arguments are merged into UNIVERSAL_ARGUMENTS at runtime
    # Preset system
    "preset": {
        "flags": ("--preset",),
@@ -138,6 +115,9 @@ UNIVERSAL_ARGUMENTS: Dict[str, Dict[str, Any]] = {
    },
 }

+# Merge RAG arguments from common.py into universal arguments
+UNIVERSAL_ARGUMENTS.update(RAG_ARGUMENTS)
+

 # =============================================================================
 # TIER 2: SOURCE-SPECIFIC ARGUMENTS
--- a/src/skill_seekers/cli/arguments/scrape.py
+++ b/src/skill_seekers/cli/arguments/scrape.py
@@ -11,6 +11,7 @@ import argparse
 from typing import Dict, Any

 from skill_seekers.cli.constants import DEFAULT_RATE_LIMIT
+from .common import RAG_ARGUMENTS


 # Scrape-specific argument definitions as data structure
@@ -177,32 +178,8 @@ SCRAPE_ARGUMENTS: Dict[str, Dict[str, Any]] = {
            "help": "Minimize output (WARNING level logging only)",
        },
    },
-    # RAG chunking options (v2.10.0)
-    "chunk_for_rag": {
-        "flags": ("--chunk-for-rag",),
-        "kwargs": {
-            "action": "store_true",
-            "help": "Enable semantic chunking for RAG pipelines (generates rag_chunks.json)",
-        },
-    },
-    "chunk_size": {
-        "flags": ("--chunk-size",),
-        "kwargs": {
-            "type": int,
-            "default": 512,
-            "metavar": "TOKENS",
-            "help": "Target chunk size in tokens for RAG (default: 512)",
-        },
-    },
-    "chunk_overlap": {
-        "flags": ("--chunk-overlap",),
-        "kwargs": {
-            "type": int,
-            "default": 50,
-            "metavar": "TOKENS",
-            "help": "Overlap size between chunks in tokens (default: 50)",
-        },
-    },
+    # RAG chunking options (imported from common.py - see RAG_ARGUMENTS)
+    # Note: RAG arguments will be merged at runtime
    "no_preserve_code_blocks": {
        "flags": ("--no-preserve-code-blocks",),
        "kwargs": {
@@ -219,6 +196,9 @@ SCRAPE_ARGUMENTS: Dict[str, Dict[str, Any]] = {
    },
 }

+# Merge RAG arguments from common.py
+SCRAPE_ARGUMENTS.update(RAG_ARGUMENTS)
+

 def add_scrape_arguments(parser: argparse.ArgumentParser) -> None:
    """Add all scrape command arguments to a parser.
--- a/tests/test_create_arguments.py
+++ b/tests/test_create_arguments.py
@@ -25,16 +25,16 @@ class TestUniversalArguments:
    """Test universal argument definitions."""

    def test_universal_count(self):
-        """Should have exactly 15 universal arguments."""
-        assert len(UNIVERSAL_ARGUMENTS) == 15
+        """Should have exactly 13 universal arguments (after Phase 1 consolidation)."""
+        assert len(UNIVERSAL_ARGUMENTS) == 13

    def test_universal_argument_names(self):
        """Universal arguments should have expected names."""
        expected_names = {
            'name', 'description', 'output',
-            'enhance', 'enhance_local', 'enhance_level', 'api_key',
+            'enhance_level', 'api_key',  # Phase 1: consolidated from enhance + enhance_local
            'dry_run', 'verbose', 'quiet',
-            'chunk_for_rag', 'chunk_size', 'chunk_overlap',
+            'chunk_for_rag', 'chunk_size', 'chunk_overlap',  # Phase 2: RAG args from common.py
            'preset', 'config'
        }
        assert set(UNIVERSAL_ARGUMENTS.keys()) == expected_names
@@ -114,9 +114,9 @@ class TestArgumentHelpers:
        """Should return set of universal argument names."""
        names = get_universal_argument_names()
        assert isinstance(names, set)
-        assert len(names) == 15
+        assert len(names) == 13
        assert 'name' in names
-        assert 'enhance' in names
+        assert 'enhance_level' in names  # Phase 1: consolidated flag

    def test_get_source_specific_web(self):
        """Should return web-specific arguments."""
@@ -158,7 +158,7 @@ class TestCompatibleArguments:

        # Should include universal arguments
        assert 'name' in compatible
-        assert 'enhance' in compatible
+        assert 'enhance_level' in compatible  # Phase 1: consolidated flag

        # Should include web-specific arguments
        assert 'max_pages' in compatible
@@ -232,7 +232,7 @@ class TestAddCreateArguments:

        # Should have universal arguments
        assert 'name' in args
-        assert 'enhance' in args
+        assert 'enhance_level' in args
        assert 'chunk_for_rag' in args

        # Should not have source-specific arguments (they're not added in default mode)
@@ -351,7 +351,7 @@ class TestArgumentQuality:
        }

        boolean_args = [
-            'enhance', 'enhance_local', 'dry_run', 'verbose', 'quiet',
+            'dry_run', 'verbose', 'quiet',
            'chunk_for_rag', 'skip_scrape', 'resume', 'fresh', 'async_mode',
            'no_issues', 'no_changelog', 'no_releases', 'scrape_only',
            'skip_patterns', 'skip_test_examples', 'ocr', 'no_rate_limit'