refactor: rename all chunk flags to include explicit units

Replace ambiguous --chunk-size / --chunk-overlap names that meant different things in different contexts (tokens vs characters) with fully explicit names: - --chunk-size (RAG tokens) → --chunk-tokens - --chunk-overlap (RAG tokens) → --chunk-overlap-tokens - --chunk (enable RAG chunking) → --chunk-for-rag - --streaming-chunk-size (chars) → --streaming-chunk-chars - --streaming-overlap (chars) → --streaming-overlap-chars - --chunk-size (PDF pages) → --pdf-pages-per-chunk (poc file) Also aligns stream_parser.py help with streaming_ingest.py standalone parser. All 2167 tests pass. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-24 22:07:56 +03:00
parent b636a0a292
commit 7a2ffb286c
12 changed files with 40 additions and 40 deletions
--- a/src/skill_seekers/cli/rag_chunker.py
+++ b/src/skill_seekers/cli/rag_chunker.py
@@ -383,8 +383,8 @@ def main():
    )
    parser.add_argument("skill_dir", type=Path, help="Path to skill directory")
    parser.add_argument("--output", "-o", type=Path, help="Output JSON file")
-    parser.add_argument("--chunk-size", type=int, default=512, help="Target chunk size in tokens")
-    parser.add_argument("--chunk-overlap", type=int, default=50, help="Overlap size in tokens")
+    parser.add_argument("--chunk-tokens", type=int, default=512, help="Target chunk size in tokens")
+    parser.add_argument("--chunk-overlap-tokens", type=int, default=50, help="Overlap size in tokens")
    parser.add_argument("--no-code-blocks", action="store_true", help="Don't preserve code blocks")
    parser.add_argument("--no-paragraphs", action="store_true", help="Don't preserve paragraphs")

@@ -392,8 +392,8 @@ def main():

    # Create chunker
    chunker = RAGChunker(
-        chunk_size=args.chunk_size,
-        chunk_overlap=args.chunk_overlap,
+        chunk_size=args.chunk_tokens,
+        chunk_overlap=args.chunk_overlap_tokens,
        preserve_code_blocks=not args.no_code_blocks,
        preserve_paragraphs=not args.no_paragraphs,
    )