#!/usr/bin/env python3 """ Performance benchmarking CLI. Measure and analyze performance of scraping, embedding, and storage operations. """ import sys import argparse import json from pathlib import Path from ..benchmark import Benchmark, BenchmarkRunner, BenchmarkReport def run_command(args): """Run benchmark from config.""" runner = BenchmarkRunner(output_dir=Path(args.output_dir)) # Load benchmark config with open(args.config) as f: config = json.load(f) benchmark_type = config.get("type", "custom") if benchmark_type == "scraping": run_scraping_benchmark(runner, config) elif benchmark_type == "embedding": run_embedding_benchmark(runner, config) elif benchmark_type == "storage": run_storage_benchmark(runner, config) else: print(f"āŒ Unknown benchmark type: {benchmark_type}") sys.exit(1) def run_scraping_benchmark(runner, config): """Run scraping benchmark.""" from .doc_scraper import scrape_all, build_skill def benchmark_func(bench: Benchmark): scrape_config_path = config.get("scrape_config") # Time scraping with bench.timer("scrape_docs"), bench.memory("scrape_docs"): pages = scrape_all(scrape_config_path) # Track metrics bench.metric("pages_scraped", len(pages), "pages") # Time building with bench.timer("build_skill"), bench.memory("build_skill"): build_skill(scrape_config_path, pages) name = config.get("name", "scraping-benchmark") report = runner.run(name, benchmark_func) print(f"\n{report.summary}") def run_embedding_benchmark(runner, config): """Run embedding benchmark.""" from ..embedding.generator import EmbeddingGenerator def benchmark_func(bench: Benchmark): generator = EmbeddingGenerator() model = config.get("model", "text-embedding-3-small") texts = config.get("sample_texts", ["Test text"]) # Single embedding with bench.timer("single_embedding"): generator.generate(texts[0], model=model) # Batch embedding if len(texts) > 1: with bench.timer("batch_embedding"), bench.memory("batch_embedding"): embeddings = generator.generate_batch(texts, model=model) bench.metric( "embeddings_per_sec", len(embeddings) / bench.result.timings[-1].duration, "emb/sec" ) name = config.get("name", "embedding-benchmark") report = runner.run(name, benchmark_func) print(f"\n{report.summary}") def run_storage_benchmark(runner, config): """Run storage benchmark.""" from .storage import get_storage_adaptor from tempfile import NamedTemporaryFile def benchmark_func(bench: Benchmark): provider = config.get("provider", "s3") bucket = config.get("bucket") storage = get_storage_adaptor(provider, bucket=bucket) # Create test file with NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as f: f.write("Test data" * 1000) test_file = Path(f.name) try: # Upload benchmark with bench.timer("upload"): storage.upload_file(test_file, "benchmark_test.txt") # Download benchmark download_path = test_file.parent / "downloaded.txt" with bench.timer("download"): storage.download_file("benchmark_test.txt", download_path) # Cleanup storage.delete_file("benchmark_test.txt") download_path.unlink(missing_ok=True) finally: test_file.unlink(missing_ok=True) name = config.get("name", "storage-benchmark") report = runner.run(name, benchmark_func) print(f"\n{report.summary}") def compare_command(args): """Compare two benchmarks.""" runner = BenchmarkRunner() comparison = runner.compare(baseline_path=Path(args.baseline), current_path=Path(args.current)) print(f"\nšŸ“Š Comparison: {comparison.name}\n") print(f"Overall: {comparison.overall_improvement}\n") if comparison.improvements: print("āœ… Improvements:") for improvement in comparison.improvements: print(f" • {improvement}") if comparison.regressions: print("\nāš ļø Regressions:") for regression in comparison.regressions: print(f" • {regression}") if args.fail_on_regression and comparison.has_regressions: print("\nāŒ Benchmark failed: regressions detected") sys.exit(1) def list_command(args): """List saved benchmarks.""" runner = BenchmarkRunner(output_dir=Path(args.output_dir)) benchmarks = runner.list_benchmarks() if not benchmarks: print("No benchmarks found") return print(f"\nšŸ“Š Saved benchmarks ({len(benchmarks)}):\n") for bench in benchmarks: print(f"• {bench['name']}") print(f" Date: {bench['started_at']}") print(f" Duration: {bench['duration']:.2f}s") print(f" Operations: {bench['operations']}") print(f" Path: {bench['path']}\n") def show_command(args): """Show benchmark details.""" with open(args.path) as f: data = json.load(f) report = BenchmarkReport(**data) print(f"\n{report.summary}\n") if report.timings: print("ā±ļø Timings:") for timing in sorted(report.timings, key=lambda t: t.duration, reverse=True): print(f" • {timing.operation}: {timing.duration:.2f}s") if report.memory: print("\nšŸ’¾ Memory:") for mem in sorted(report.memory, key=lambda m: m.peak_mb, reverse=True): print(f" • {mem.operation}: {mem.peak_mb:.0f}MB peak ({mem.allocated_mb:+.0f}MB)") if report.metrics: print("\nšŸ“ˆ Metrics:") for metric in report.metrics: print(f" • {metric.name}: {metric.value:.2f} {metric.unit}") if report.recommendations: print("\nšŸ’” Recommendations:") for rec in report.recommendations: print(f" • {rec}") def cleanup_command(args): """Cleanup old benchmarks.""" runner = BenchmarkRunner(output_dir=Path(args.output_dir)) runner.cleanup_old(keep_latest=args.keep) print("āœ… Cleanup complete") def main(): """Main entry point.""" parser = argparse.ArgumentParser( description="Performance benchmarking suite", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Run scraping benchmark skill-seekers-benchmark run --config benchmarks/scraping.json # Compare two benchmarks skill-seekers-benchmark compare \\ --baseline benchmarks/v1_20250101.json \\ --current benchmarks/v2_20250115.json # List all benchmarks skill-seekers-benchmark list # Show benchmark details skill-seekers-benchmark show benchmarks/scraping_20250115.json # Cleanup old benchmarks skill-seekers-benchmark cleanup --keep 5 """, ) subparsers = parser.add_subparsers(dest="command", help="Command to execute") # Run command run_parser = subparsers.add_parser("run", help="Run benchmark") run_parser.add_argument("--config", required=True, help="Benchmark config file") run_parser.add_argument( "--output-dir", "-o", default="benchmarks", help="Output directory (default: benchmarks)" ) # Compare command compare_parser = subparsers.add_parser("compare", help="Compare two benchmarks") compare_parser.add_argument("--baseline", required=True, help="Baseline benchmark") compare_parser.add_argument("--current", required=True, help="Current benchmark") compare_parser.add_argument( "--fail-on-regression", action="store_true", help="Exit with error if regressions detected" ) # List command list_parser = subparsers.add_parser("list", help="List saved benchmarks") list_parser.add_argument( "--output-dir", "-o", default="benchmarks", help="Benchmark directory (default: benchmarks)" ) # Show command show_parser = subparsers.add_parser("show", help="Show benchmark details") show_parser.add_argument("path", help="Path to benchmark file") # Cleanup command cleanup_parser = subparsers.add_parser("cleanup", help="Cleanup old benchmarks") cleanup_parser.add_argument( "--output-dir", "-o", default="benchmarks", help="Benchmark directory (default: benchmarks)" ) cleanup_parser.add_argument( "--keep", type=int, default=5, help="Number of latest benchmarks to keep per name (default: 5)", ) args = parser.parse_args() if not args.command: parser.print_help() sys.exit(1) try: if args.command == "run": run_command(args) elif args.command == "compare": compare_command(args) elif args.command == "list": list_command(args) elif args.command == "show": show_command(args) elif args.command == "cleanup": cleanup_command(args) except Exception as e: print(f"\nāŒ Error: {e}", file=sys.stderr) sys.exit(1) if __name__ == "__main__": main()