Add large documentation handling (40K+ pages support)
Implement comprehensive system for handling very large documentation sites with intelligent splitting strategies and router/hub architecture. **New CLI Tools:** - cli/split_config.py: Split large configs into focused sub-skills * Strategies: auto, category, router, size * Configurable target pages per skill (default: 5000) * Dry-run mode for preview - cli/generate_router.py: Create intelligent router/hub skills * Auto-generates routing logic based on keywords * Creates SKILL.md with topic-to-skill mapping * Infers router name from sub-skills - cli/package_multi.py: Batch package multiple skills * Package router + all sub-skills in one command * Progress tracking for each skill **MCP Integration:** - Added split_config tool (8 total MCP tools now) - Added generate_router tool - Supports 40K+ page documentation via MCP **Configuration:** - New split_strategy parameter in configs - split_config section for fine-tuned control - checkpoint section for resume capability (ready for Phase 4) - Example: configs/godot-large-example.json **Documentation:** - docs/LARGE_DOCUMENTATION.md (500+ lines) * Complete guide for 10K+ page documentation * All splitting strategies explained * Detailed workflows with examples * Best practices and troubleshooting * Real-world examples (AWS, Microsoft, Godot) **Features:** ✅ Handle 40K+ page documentation efficiently ✅ Parallel scraping support (5x-10x faster) ✅ Router + sub-skills architecture ✅ Intelligent keyword-based routing ✅ Multiple splitting strategies ✅ Full MCP integration 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
81
cli/package_multi.py
Normal file
81
cli/package_multi.py
Normal file
@@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Multi-Skill Packager
|
||||
|
||||
Package multiple skills at once. Useful for packaging router + sub-skills together.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
import subprocess
|
||||
|
||||
|
||||
def package_skill(skill_dir: Path) -> bool:
|
||||
"""Package a single skill"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(Path(__file__).parent / "package_skill.py"), str(skill_dir)],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
return result.returncode == 0
|
||||
except Exception as e:
|
||||
print(f"❌ Error packaging {skill_dir}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Package multiple skills at once",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Package all godot skills
|
||||
python3 package_multi.py output/godot*/
|
||||
|
||||
# Package specific skills
|
||||
python3 package_multi.py output/godot-2d/ output/godot-3d/ output/godot-scripting/
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'skill_dirs',
|
||||
nargs='+',
|
||||
help='Skill directories to package'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"MULTI-SKILL PACKAGER")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
skill_dirs = [Path(d) for d in args.skill_dirs]
|
||||
success_count = 0
|
||||
total_count = len(skill_dirs)
|
||||
|
||||
for skill_dir in skill_dirs:
|
||||
if not skill_dir.exists():
|
||||
print(f"⚠️ Skipping (not found): {skill_dir}")
|
||||
continue
|
||||
|
||||
if not (skill_dir / "SKILL.md").exists():
|
||||
print(f"⚠️ Skipping (no SKILL.md): {skill_dir}")
|
||||
continue
|
||||
|
||||
print(f"📦 Packaging: {skill_dir.name}")
|
||||
if package_skill(skill_dir):
|
||||
success_count += 1
|
||||
print(f" ✅ Success")
|
||||
else:
|
||||
print(f" ❌ Failed")
|
||||
print("")
|
||||
|
||||
print(f"{'='*60}")
|
||||
print(f"SUMMARY: {success_count}/{total_count} skills packaged")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user