Bug fixes: - Fix --var flag silently dropped in create routing (args.workflow_var → args.var) - Fix double _score_code_quality() call in word scraper - Add .docx file extension validation in WordToSkillConverter - Fix weaviate ImportError masked by generic Exception handler - Fix RAG chunking crash using non-existent converter.output_dir Chunking pipeline improvements: - Wire --chunk-overlap-tokens through entire package pipeline (package_skill → adaptor.package → format_skill_md → _maybe_chunk_content → RAGChunker) - Add auto-scaling overlap: max(50, chunk_tokens//10) when chunk size is non-default - Rename --no-preserve-code to --no-preserve-code-blocks (backward-compat alias kept) - Replace hardcoded 512/50 chunk defaults with DEFAULT_CHUNK_TOKENS/DEFAULT_CHUNK_OVERLAP_TOKENS constants across all 12 concrete adaptors, rag_chunker, base, and package_skill Code quality: - Extract shared _generate_openai_embeddings() and _generate_st_embeddings() to SkillAdaptor base class, removing ~150 lines of duplication from chroma/weaviate/pinecone - Add Pinecone adaptor with full upload support (pinecone_adaptor.py) Tests (14 new): - chunk_overlap_tokens parameter wiring, auto-scaling overlap, preserve_code_blocks flag - .docx/.doc/no-extension file validation, --var flag routing E2E - Embedding method inheritance verification, backward-compatible flag aliases Docs: - Update CHANGELOG, CLI_REFERENCE, API_REFERENCE, packaging guide (EN+ZH) - Update README test count badge (1880+ → 2283+) All 2283 tests passing, 8 skipped, 0 failures. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
153 lines
4.5 KiB
Python
153 lines
4.5 KiB
Python
"""Package command argument definitions.
|
|
|
|
This module defines ALL arguments for the package command in ONE place.
|
|
Both package_skill.py (standalone) and parsers/package_parser.py (unified CLI)
|
|
import and use these definitions.
|
|
"""
|
|
|
|
import argparse
|
|
from typing import Any
|
|
|
|
from .common import DEFAULT_CHUNK_TOKENS, DEFAULT_CHUNK_OVERLAP_TOKENS
|
|
|
|
PACKAGE_ARGUMENTS: dict[str, dict[str, Any]] = {
|
|
# Positional argument
|
|
"skill_directory": {
|
|
"flags": ("skill_directory",),
|
|
"kwargs": {
|
|
"type": str,
|
|
"help": "Skill directory path (e.g., output/react/)",
|
|
},
|
|
},
|
|
# Control options
|
|
"no_open": {
|
|
"flags": ("--no-open",),
|
|
"kwargs": {
|
|
"action": "store_true",
|
|
"help": "Don't open output folder after packaging",
|
|
},
|
|
},
|
|
"skip_quality_check": {
|
|
"flags": ("--skip-quality-check",),
|
|
"kwargs": {
|
|
"action": "store_true",
|
|
"help": "Skip quality checks before packaging",
|
|
},
|
|
},
|
|
# Target platform
|
|
"target": {
|
|
"flags": ("--target",),
|
|
"kwargs": {
|
|
"type": str,
|
|
"choices": [
|
|
"claude",
|
|
"gemini",
|
|
"openai",
|
|
"markdown",
|
|
"langchain",
|
|
"llama-index",
|
|
"haystack",
|
|
"weaviate",
|
|
"chroma",
|
|
"faiss",
|
|
"qdrant",
|
|
"pinecone",
|
|
],
|
|
"default": "claude",
|
|
"help": "Target LLM platform (default: claude)",
|
|
"metavar": "PLATFORM",
|
|
},
|
|
},
|
|
"upload": {
|
|
"flags": ("--upload",),
|
|
"kwargs": {
|
|
"action": "store_true",
|
|
"help": "Automatically upload after packaging (requires platform API key)",
|
|
},
|
|
},
|
|
# Streaming options
|
|
"streaming": {
|
|
"flags": ("--streaming",),
|
|
"kwargs": {
|
|
"action": "store_true",
|
|
"help": "Use streaming ingestion for large docs (memory-efficient)",
|
|
},
|
|
},
|
|
"streaming_chunk_chars": {
|
|
"flags": ("--streaming-chunk-chars",),
|
|
"kwargs": {
|
|
"type": int,
|
|
"default": 4000,
|
|
"help": "Maximum characters per chunk (streaming mode, default: 4000)",
|
|
"metavar": "N",
|
|
},
|
|
},
|
|
"streaming_overlap_chars": {
|
|
"flags": ("--streaming-overlap-chars",),
|
|
"kwargs": {
|
|
"type": int,
|
|
"default": 200,
|
|
"help": "Character overlap between chunks (streaming mode, default: 200)",
|
|
"metavar": "N",
|
|
},
|
|
},
|
|
"batch_size": {
|
|
"flags": ("--batch-size",),
|
|
"kwargs": {
|
|
"type": int,
|
|
"default": 100,
|
|
"help": "Number of chunks per batch (streaming mode, default: 100)",
|
|
"metavar": "N",
|
|
},
|
|
},
|
|
# RAG chunking options
|
|
"chunk_for_rag": {
|
|
"flags": ("--chunk-for-rag",),
|
|
"kwargs": {
|
|
"action": "store_true",
|
|
"help": "Enable intelligent chunking for RAG platforms (auto-enabled for RAG adaptors)",
|
|
},
|
|
},
|
|
"chunk_tokens": {
|
|
"flags": ("--chunk-tokens",),
|
|
"kwargs": {
|
|
"type": int,
|
|
"default": DEFAULT_CHUNK_TOKENS,
|
|
"help": f"Maximum tokens per chunk (default: {DEFAULT_CHUNK_TOKENS})",
|
|
"metavar": "N",
|
|
},
|
|
},
|
|
"chunk_overlap_tokens": {
|
|
"flags": ("--chunk-overlap-tokens",),
|
|
"kwargs": {
|
|
"type": int,
|
|
"default": DEFAULT_CHUNK_OVERLAP_TOKENS,
|
|
"help": f"Overlap between chunks in tokens (default: {DEFAULT_CHUNK_OVERLAP_TOKENS})",
|
|
"metavar": "N",
|
|
},
|
|
},
|
|
"no_preserve_code_blocks": {
|
|
"flags": ("--no-preserve-code-blocks",),
|
|
"kwargs": {
|
|
"action": "store_true",
|
|
"help": "Allow code block splitting (default: code blocks preserved)",
|
|
},
|
|
},
|
|
}
|
|
|
|
|
|
def add_package_arguments(parser: argparse.ArgumentParser) -> None:
|
|
"""Add all package command arguments to a parser."""
|
|
for arg_name, arg_def in PACKAGE_ARGUMENTS.items():
|
|
flags = arg_def["flags"]
|
|
kwargs = arg_def["kwargs"]
|
|
parser.add_argument(*flags, **kwargs)
|
|
|
|
# Deprecated alias for backward compatibility (removed in v4.0.0)
|
|
parser.add_argument(
|
|
"--no-preserve-code",
|
|
dest="no_preserve_code_blocks",
|
|
action="store_true",
|
|
help=argparse.SUPPRESS,
|
|
)
|