fix: Enforce min_chunk_size in RAG chunker
- Filter out chunks smaller than min_chunk_size (default 100 tokens) - Exception: Keep all chunks if entire document is smaller than target size - All 15 tests passing (100% pass rate) Fixes edge case where very small chunks (e.g., 'Short.' = 6 chars) were being created despite min_chunk_size=100 setting. Test: pytest tests/test_rag_chunker.py -v
This commit is contained in:
312
src/skill_seekers/cli/benchmark_cli.py
Normal file
312
src/skill_seekers/cli/benchmark_cli.py
Normal file
@@ -0,0 +1,312 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Performance benchmarking CLI.
|
||||
|
||||
Measure and analyze performance of scraping, embedding, and storage operations.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from ..benchmark import Benchmark, BenchmarkRunner, BenchmarkReport
|
||||
|
||||
|
||||
def run_command(args):
|
||||
"""Run benchmark from config."""
|
||||
runner = BenchmarkRunner(output_dir=Path(args.output_dir))
|
||||
|
||||
# Load benchmark config
|
||||
with open(args.config) as f:
|
||||
config = json.load(f)
|
||||
|
||||
benchmark_type = config.get("type", "custom")
|
||||
|
||||
if benchmark_type == "scraping":
|
||||
run_scraping_benchmark(runner, config)
|
||||
elif benchmark_type == "embedding":
|
||||
run_embedding_benchmark(runner, config)
|
||||
elif benchmark_type == "storage":
|
||||
run_storage_benchmark(runner, config)
|
||||
else:
|
||||
print(f"❌ Unknown benchmark type: {benchmark_type}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def run_scraping_benchmark(runner, config):
|
||||
"""Run scraping benchmark."""
|
||||
from .doc_scraper import scrape_all, build_skill
|
||||
|
||||
def benchmark_func(bench: Benchmark):
|
||||
scrape_config_path = config.get("scrape_config")
|
||||
|
||||
# Time scraping
|
||||
with bench.timer("scrape_docs"):
|
||||
with bench.memory("scrape_docs"):
|
||||
pages = scrape_all(scrape_config_path)
|
||||
|
||||
# Track metrics
|
||||
bench.metric("pages_scraped", len(pages), "pages")
|
||||
|
||||
# Time building
|
||||
with bench.timer("build_skill"):
|
||||
with bench.memory("build_skill"):
|
||||
build_skill(scrape_config_path, pages)
|
||||
|
||||
name = config.get("name", "scraping-benchmark")
|
||||
report = runner.run(name, benchmark_func)
|
||||
|
||||
print(f"\n{report.summary}")
|
||||
|
||||
|
||||
def run_embedding_benchmark(runner, config):
|
||||
"""Run embedding benchmark."""
|
||||
from ..embedding.generator import EmbeddingGenerator
|
||||
|
||||
def benchmark_func(bench: Benchmark):
|
||||
generator = EmbeddingGenerator()
|
||||
|
||||
model = config.get("model", "text-embedding-3-small")
|
||||
texts = config.get("sample_texts", ["Test text"])
|
||||
|
||||
# Single embedding
|
||||
with bench.timer("single_embedding"):
|
||||
generator.generate(texts[0], model=model)
|
||||
|
||||
# Batch embedding
|
||||
if len(texts) > 1:
|
||||
with bench.timer("batch_embedding"):
|
||||
with bench.memory("batch_embedding"):
|
||||
embeddings = generator.generate_batch(texts, model=model)
|
||||
|
||||
bench.metric("embeddings_per_sec", len(embeddings) / bench.result.timings[-1].duration, "emb/sec")
|
||||
|
||||
name = config.get("name", "embedding-benchmark")
|
||||
report = runner.run(name, benchmark_func)
|
||||
|
||||
print(f"\n{report.summary}")
|
||||
|
||||
|
||||
def run_storage_benchmark(runner, config):
|
||||
"""Run storage benchmark."""
|
||||
from .storage import get_storage_adaptor
|
||||
from tempfile import NamedTemporaryFile
|
||||
|
||||
def benchmark_func(bench: Benchmark):
|
||||
provider = config.get("provider", "s3")
|
||||
bucket = config.get("bucket")
|
||||
|
||||
storage = get_storage_adaptor(provider, bucket=bucket)
|
||||
|
||||
# Create test file
|
||||
with NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
|
||||
f.write("Test data" * 1000)
|
||||
test_file = Path(f.name)
|
||||
|
||||
try:
|
||||
# Upload benchmark
|
||||
with bench.timer("upload"):
|
||||
storage.upload_file(test_file, "benchmark_test.txt")
|
||||
|
||||
# Download benchmark
|
||||
download_path = test_file.parent / "downloaded.txt"
|
||||
with bench.timer("download"):
|
||||
storage.download_file("benchmark_test.txt", download_path)
|
||||
|
||||
# Cleanup
|
||||
storage.delete_file("benchmark_test.txt")
|
||||
download_path.unlink(missing_ok=True)
|
||||
|
||||
finally:
|
||||
test_file.unlink(missing_ok=True)
|
||||
|
||||
name = config.get("name", "storage-benchmark")
|
||||
report = runner.run(name, benchmark_func)
|
||||
|
||||
print(f"\n{report.summary}")
|
||||
|
||||
|
||||
def compare_command(args):
|
||||
"""Compare two benchmarks."""
|
||||
runner = BenchmarkRunner()
|
||||
|
||||
comparison = runner.compare(
|
||||
baseline_path=Path(args.baseline),
|
||||
current_path=Path(args.current)
|
||||
)
|
||||
|
||||
print(f"\n📊 Comparison: {comparison.name}\n")
|
||||
print(f"Overall: {comparison.overall_improvement}\n")
|
||||
|
||||
if comparison.improvements:
|
||||
print("✅ Improvements:")
|
||||
for improvement in comparison.improvements:
|
||||
print(f" • {improvement}")
|
||||
|
||||
if comparison.regressions:
|
||||
print("\n⚠️ Regressions:")
|
||||
for regression in comparison.regressions:
|
||||
print(f" • {regression}")
|
||||
|
||||
if args.fail_on_regression and comparison.has_regressions:
|
||||
print("\n❌ Benchmark failed: regressions detected")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def list_command(args):
|
||||
"""List saved benchmarks."""
|
||||
runner = BenchmarkRunner(output_dir=Path(args.output_dir))
|
||||
|
||||
benchmarks = runner.list_benchmarks()
|
||||
|
||||
if not benchmarks:
|
||||
print("No benchmarks found")
|
||||
return
|
||||
|
||||
print(f"\n📊 Saved benchmarks ({len(benchmarks)}):\n")
|
||||
|
||||
for bench in benchmarks:
|
||||
print(f"• {bench['name']}")
|
||||
print(f" Date: {bench['started_at']}")
|
||||
print(f" Duration: {bench['duration']:.2f}s")
|
||||
print(f" Operations: {bench['operations']}")
|
||||
print(f" Path: {bench['path']}\n")
|
||||
|
||||
|
||||
def show_command(args):
|
||||
"""Show benchmark details."""
|
||||
with open(args.path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
report = BenchmarkReport(**data)
|
||||
|
||||
print(f"\n{report.summary}\n")
|
||||
|
||||
if report.timings:
|
||||
print("⏱️ Timings:")
|
||||
for timing in sorted(report.timings, key=lambda t: t.duration, reverse=True):
|
||||
print(f" • {timing.operation}: {timing.duration:.2f}s")
|
||||
|
||||
if report.memory:
|
||||
print("\n💾 Memory:")
|
||||
for mem in sorted(report.memory, key=lambda m: m.peak_mb, reverse=True):
|
||||
print(f" • {mem.operation}: {mem.peak_mb:.0f}MB peak ({mem.allocated_mb:+.0f}MB)")
|
||||
|
||||
if report.metrics:
|
||||
print("\n📈 Metrics:")
|
||||
for metric in report.metrics:
|
||||
print(f" • {metric.name}: {metric.value:.2f} {metric.unit}")
|
||||
|
||||
if report.recommendations:
|
||||
print("\n💡 Recommendations:")
|
||||
for rec in report.recommendations:
|
||||
print(f" • {rec}")
|
||||
|
||||
|
||||
def cleanup_command(args):
|
||||
"""Cleanup old benchmarks."""
|
||||
runner = BenchmarkRunner(output_dir=Path(args.output_dir))
|
||||
|
||||
runner.cleanup_old(keep_latest=args.keep)
|
||||
|
||||
print("✅ Cleanup complete")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Performance benchmarking suite',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Run scraping benchmark
|
||||
skill-seekers-benchmark run --config benchmarks/scraping.json
|
||||
|
||||
# Compare two benchmarks
|
||||
skill-seekers-benchmark compare \\
|
||||
--baseline benchmarks/v1_20250101.json \\
|
||||
--current benchmarks/v2_20250115.json
|
||||
|
||||
# List all benchmarks
|
||||
skill-seekers-benchmark list
|
||||
|
||||
# Show benchmark details
|
||||
skill-seekers-benchmark show benchmarks/scraping_20250115.json
|
||||
|
||||
# Cleanup old benchmarks
|
||||
skill-seekers-benchmark cleanup --keep 5
|
||||
"""
|
||||
)
|
||||
|
||||
subparsers = parser.add_subparsers(dest='command', help='Command to execute')
|
||||
|
||||
# Run command
|
||||
run_parser = subparsers.add_parser('run', help='Run benchmark')
|
||||
run_parser.add_argument('--config', required=True, help='Benchmark config file')
|
||||
run_parser.add_argument(
|
||||
'--output-dir', '-o',
|
||||
default='benchmarks',
|
||||
help='Output directory (default: benchmarks)'
|
||||
)
|
||||
|
||||
# Compare command
|
||||
compare_parser = subparsers.add_parser('compare', help='Compare two benchmarks')
|
||||
compare_parser.add_argument('--baseline', required=True, help='Baseline benchmark')
|
||||
compare_parser.add_argument('--current', required=True, help='Current benchmark')
|
||||
compare_parser.add_argument(
|
||||
'--fail-on-regression',
|
||||
action='store_true',
|
||||
help='Exit with error if regressions detected'
|
||||
)
|
||||
|
||||
# List command
|
||||
list_parser = subparsers.add_parser('list', help='List saved benchmarks')
|
||||
list_parser.add_argument(
|
||||
'--output-dir', '-o',
|
||||
default='benchmarks',
|
||||
help='Benchmark directory (default: benchmarks)'
|
||||
)
|
||||
|
||||
# Show command
|
||||
show_parser = subparsers.add_parser('show', help='Show benchmark details')
|
||||
show_parser.add_argument('path', help='Path to benchmark file')
|
||||
|
||||
# Cleanup command
|
||||
cleanup_parser = subparsers.add_parser('cleanup', help='Cleanup old benchmarks')
|
||||
cleanup_parser.add_argument(
|
||||
'--output-dir', '-o',
|
||||
default='benchmarks',
|
||||
help='Benchmark directory (default: benchmarks)'
|
||||
)
|
||||
cleanup_parser.add_argument(
|
||||
'--keep',
|
||||
type=int,
|
||||
default=5,
|
||||
help='Number of latest benchmarks to keep per name (default: 5)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
if args.command == 'run':
|
||||
run_command(args)
|
||||
elif args.command == 'compare':
|
||||
compare_command(args)
|
||||
elif args.command == 'list':
|
||||
list_command(args)
|
||||
elif args.command == 'show':
|
||||
show_command(args)
|
||||
elif args.command == 'cleanup':
|
||||
cleanup_command(args)
|
||||
except Exception as e:
|
||||
print(f"\n❌ Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user