refactor: rename all chunk flags to include explicit units
Replace ambiguous --chunk-size / --chunk-overlap names that meant different things in different contexts (tokens vs characters) with fully explicit names: - --chunk-size (RAG tokens) → --chunk-tokens - --chunk-overlap (RAG tokens) → --chunk-overlap-tokens - --chunk (enable RAG chunking) → --chunk-for-rag - --streaming-chunk-size (chars) → --streaming-chunk-chars - --streaming-overlap (chars) → --streaming-overlap-chars - --chunk-size (PDF pages) → --pdf-pages-per-chunk (poc file) Also aligns stream_parser.py help with streaming_ingest.py standalone parser. All 2167 tests pass. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -291,7 +291,7 @@ class TestChunkingCLIIntegration:
|
||||
"""Test chunking via CLI arguments."""
|
||||
|
||||
def test_chunk_flag(self, tmp_path):
|
||||
"""Test --chunk flag enables chunking."""
|
||||
"""Test --chunk-for-rag flag enables chunking."""
|
||||
from skill_seekers.cli.package_skill import package_skill
|
||||
|
||||
skill_dir = create_test_skill(tmp_path, large_doc=True)
|
||||
@@ -301,7 +301,7 @@ class TestChunkingCLIIntegration:
|
||||
open_folder_after=False,
|
||||
skip_quality_check=True,
|
||||
target="langchain",
|
||||
enable_chunking=True, # --chunk flag
|
||||
enable_chunking=True, # --chunk-for-rag flag
|
||||
chunk_max_tokens=512,
|
||||
preserve_code_blocks=True,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user