refactor: rename all chunk flags to include explicit units

Replace ambiguous --chunk-size / --chunk-overlap names that meant different
things in different contexts (tokens vs characters) with fully explicit names:

- --chunk-size (RAG tokens)     → --chunk-tokens
- --chunk-overlap (RAG tokens)  → --chunk-overlap-tokens
- --chunk (enable RAG chunking) → --chunk-for-rag
- --streaming-chunk-size (chars) → --streaming-chunk-chars
- --streaming-overlap (chars)    → --streaming-overlap-chars
- --chunk-size (PDF pages)       → --pdf-pages-per-chunk (poc file)

Also aligns stream_parser.py help with streaming_ingest.py standalone parser.
All 2167 tests pass.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
yusyus
2026-02-24 22:07:56 +03:00
parent b636a0a292
commit 7a2ffb286c
12 changed files with 40 additions and 40 deletions

View File

@@ -291,7 +291,7 @@ class TestChunkingCLIIntegration:
"""Test chunking via CLI arguments."""
def test_chunk_flag(self, tmp_path):
"""Test --chunk flag enables chunking."""
"""Test --chunk-for-rag flag enables chunking."""
from skill_seekers.cli.package_skill import package_skill
skill_dir = create_test_skill(tmp_path, large_doc=True)
@@ -301,7 +301,7 @@ class TestChunkingCLIIntegration:
open_folder_after=False,
skip_quality_check=True,
target="langchain",
enable_chunking=True, # --chunk flag
enable_chunking=True, # --chunk-for-rag flag
chunk_max_tokens=512,
preserve_code_blocks=True,
)

View File

@@ -32,8 +32,8 @@ class TestParserSync:
["skill-seekers", "scrape", "--help"], capture_output=True, text=True
)
assert "--chunk-for-rag" in result.stdout, "Help should show --chunk-for-rag flag"
assert "--chunk-size" in result.stdout, "Help should show --chunk-size flag"
assert "--chunk-overlap" in result.stdout, "Help should show --chunk-overlap flag"
assert "--chunk-tokens" in result.stdout, "Help should show --chunk-tokens flag"
assert "--chunk-overlap-tokens" in result.stdout, "Help should show --chunk-overlap-tokens flag"
def test_scrape_verbose_flag_works(self):
"""Test that --verbose flag (previously missing) now works."""

View File

@@ -40,8 +40,8 @@ class TestUniversalArguments:
"verbose",
"quiet",
"chunk_for_rag",
"chunk_size",
"chunk_overlap", # Phase 2: RAG args from common.py
"chunk_tokens",
"chunk_overlap_tokens", # Phase 2: RAG args from common.py
"preset",
"config",
# Phase 2: Workflow arguments (universal workflow support)