- Formatted 103 files to comply with ruff format requirements - No code logic changes, only formatting/whitespace - Fixes CI formatting check failures
301 lines
9.1 KiB
Python
301 lines
9.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Performance benchmarking CLI.
|
|
|
|
Measure and analyze performance of scraping, embedding, and storage operations.
|
|
"""
|
|
|
|
import sys
|
|
import argparse
|
|
import json
|
|
from pathlib import Path
|
|
|
|
from ..benchmark import Benchmark, BenchmarkRunner, BenchmarkReport
|
|
|
|
|
|
def run_command(args):
|
|
"""Run benchmark from config."""
|
|
runner = BenchmarkRunner(output_dir=Path(args.output_dir))
|
|
|
|
# Load benchmark config
|
|
with open(args.config) as f:
|
|
config = json.load(f)
|
|
|
|
benchmark_type = config.get("type", "custom")
|
|
|
|
if benchmark_type == "scraping":
|
|
run_scraping_benchmark(runner, config)
|
|
elif benchmark_type == "embedding":
|
|
run_embedding_benchmark(runner, config)
|
|
elif benchmark_type == "storage":
|
|
run_storage_benchmark(runner, config)
|
|
else:
|
|
print(f"❌ Unknown benchmark type: {benchmark_type}")
|
|
sys.exit(1)
|
|
|
|
|
|
def run_scraping_benchmark(runner, config):
|
|
"""Run scraping benchmark."""
|
|
from .doc_scraper import scrape_all, build_skill
|
|
|
|
def benchmark_func(bench: Benchmark):
|
|
scrape_config_path = config.get("scrape_config")
|
|
|
|
# Time scraping
|
|
with bench.timer("scrape_docs"), bench.memory("scrape_docs"):
|
|
pages = scrape_all(scrape_config_path)
|
|
|
|
# Track metrics
|
|
bench.metric("pages_scraped", len(pages), "pages")
|
|
|
|
# Time building
|
|
with bench.timer("build_skill"), bench.memory("build_skill"):
|
|
build_skill(scrape_config_path, pages)
|
|
|
|
name = config.get("name", "scraping-benchmark")
|
|
report = runner.run(name, benchmark_func)
|
|
|
|
print(f"\n{report.summary}")
|
|
|
|
|
|
def run_embedding_benchmark(runner, config):
|
|
"""Run embedding benchmark."""
|
|
from ..embedding.generator import EmbeddingGenerator
|
|
|
|
def benchmark_func(bench: Benchmark):
|
|
generator = EmbeddingGenerator()
|
|
|
|
model = config.get("model", "text-embedding-3-small")
|
|
texts = config.get("sample_texts", ["Test text"])
|
|
|
|
# Single embedding
|
|
with bench.timer("single_embedding"):
|
|
generator.generate(texts[0], model=model)
|
|
|
|
# Batch embedding
|
|
if len(texts) > 1:
|
|
with bench.timer("batch_embedding"), bench.memory("batch_embedding"):
|
|
embeddings = generator.generate_batch(texts, model=model)
|
|
|
|
bench.metric(
|
|
"embeddings_per_sec", len(embeddings) / bench.result.timings[-1].duration, "emb/sec"
|
|
)
|
|
|
|
name = config.get("name", "embedding-benchmark")
|
|
report = runner.run(name, benchmark_func)
|
|
|
|
print(f"\n{report.summary}")
|
|
|
|
|
|
def run_storage_benchmark(runner, config):
|
|
"""Run storage benchmark."""
|
|
from .storage import get_storage_adaptor
|
|
from tempfile import NamedTemporaryFile
|
|
|
|
def benchmark_func(bench: Benchmark):
|
|
provider = config.get("provider", "s3")
|
|
bucket = config.get("bucket")
|
|
|
|
storage = get_storage_adaptor(provider, bucket=bucket)
|
|
|
|
# Create test file
|
|
with NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as f:
|
|
f.write("Test data" * 1000)
|
|
test_file = Path(f.name)
|
|
|
|
try:
|
|
# Upload benchmark
|
|
with bench.timer("upload"):
|
|
storage.upload_file(test_file, "benchmark_test.txt")
|
|
|
|
# Download benchmark
|
|
download_path = test_file.parent / "downloaded.txt"
|
|
with bench.timer("download"):
|
|
storage.download_file("benchmark_test.txt", download_path)
|
|
|
|
# Cleanup
|
|
storage.delete_file("benchmark_test.txt")
|
|
download_path.unlink(missing_ok=True)
|
|
|
|
finally:
|
|
test_file.unlink(missing_ok=True)
|
|
|
|
name = config.get("name", "storage-benchmark")
|
|
report = runner.run(name, benchmark_func)
|
|
|
|
print(f"\n{report.summary}")
|
|
|
|
|
|
def compare_command(args):
|
|
"""Compare two benchmarks."""
|
|
runner = BenchmarkRunner()
|
|
|
|
comparison = runner.compare(baseline_path=Path(args.baseline), current_path=Path(args.current))
|
|
|
|
print(f"\n📊 Comparison: {comparison.name}\n")
|
|
print(f"Overall: {comparison.overall_improvement}\n")
|
|
|
|
if comparison.improvements:
|
|
print("✅ Improvements:")
|
|
for improvement in comparison.improvements:
|
|
print(f" • {improvement}")
|
|
|
|
if comparison.regressions:
|
|
print("\n⚠️ Regressions:")
|
|
for regression in comparison.regressions:
|
|
print(f" • {regression}")
|
|
|
|
if args.fail_on_regression and comparison.has_regressions:
|
|
print("\n❌ Benchmark failed: regressions detected")
|
|
sys.exit(1)
|
|
|
|
|
|
def list_command(args):
|
|
"""List saved benchmarks."""
|
|
runner = BenchmarkRunner(output_dir=Path(args.output_dir))
|
|
|
|
benchmarks = runner.list_benchmarks()
|
|
|
|
if not benchmarks:
|
|
print("No benchmarks found")
|
|
return
|
|
|
|
print(f"\n📊 Saved benchmarks ({len(benchmarks)}):\n")
|
|
|
|
for bench in benchmarks:
|
|
print(f"• {bench['name']}")
|
|
print(f" Date: {bench['started_at']}")
|
|
print(f" Duration: {bench['duration']:.2f}s")
|
|
print(f" Operations: {bench['operations']}")
|
|
print(f" Path: {bench['path']}\n")
|
|
|
|
|
|
def show_command(args):
|
|
"""Show benchmark details."""
|
|
with open(args.path) as f:
|
|
data = json.load(f)
|
|
|
|
report = BenchmarkReport(**data)
|
|
|
|
print(f"\n{report.summary}\n")
|
|
|
|
if report.timings:
|
|
print("⏱️ Timings:")
|
|
for timing in sorted(report.timings, key=lambda t: t.duration, reverse=True):
|
|
print(f" • {timing.operation}: {timing.duration:.2f}s")
|
|
|
|
if report.memory:
|
|
print("\n💾 Memory:")
|
|
for mem in sorted(report.memory, key=lambda m: m.peak_mb, reverse=True):
|
|
print(f" • {mem.operation}: {mem.peak_mb:.0f}MB peak ({mem.allocated_mb:+.0f}MB)")
|
|
|
|
if report.metrics:
|
|
print("\n📈 Metrics:")
|
|
for metric in report.metrics:
|
|
print(f" • {metric.name}: {metric.value:.2f} {metric.unit}")
|
|
|
|
if report.recommendations:
|
|
print("\n💡 Recommendations:")
|
|
for rec in report.recommendations:
|
|
print(f" • {rec}")
|
|
|
|
|
|
def cleanup_command(args):
|
|
"""Cleanup old benchmarks."""
|
|
runner = BenchmarkRunner(output_dir=Path(args.output_dir))
|
|
|
|
runner.cleanup_old(keep_latest=args.keep)
|
|
|
|
print("✅ Cleanup complete")
|
|
|
|
|
|
def main():
|
|
"""Main entry point."""
|
|
parser = argparse.ArgumentParser(
|
|
description="Performance benchmarking suite",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# Run scraping benchmark
|
|
skill-seekers-benchmark run --config benchmarks/scraping.json
|
|
|
|
# Compare two benchmarks
|
|
skill-seekers-benchmark compare \\
|
|
--baseline benchmarks/v1_20250101.json \\
|
|
--current benchmarks/v2_20250115.json
|
|
|
|
# List all benchmarks
|
|
skill-seekers-benchmark list
|
|
|
|
# Show benchmark details
|
|
skill-seekers-benchmark show benchmarks/scraping_20250115.json
|
|
|
|
# Cleanup old benchmarks
|
|
skill-seekers-benchmark cleanup --keep 5
|
|
""",
|
|
)
|
|
|
|
subparsers = parser.add_subparsers(dest="command", help="Command to execute")
|
|
|
|
# Run command
|
|
run_parser = subparsers.add_parser("run", help="Run benchmark")
|
|
run_parser.add_argument("--config", required=True, help="Benchmark config file")
|
|
run_parser.add_argument(
|
|
"--output-dir", "-o", default="benchmarks", help="Output directory (default: benchmarks)"
|
|
)
|
|
|
|
# Compare command
|
|
compare_parser = subparsers.add_parser("compare", help="Compare two benchmarks")
|
|
compare_parser.add_argument("--baseline", required=True, help="Baseline benchmark")
|
|
compare_parser.add_argument("--current", required=True, help="Current benchmark")
|
|
compare_parser.add_argument(
|
|
"--fail-on-regression", action="store_true", help="Exit with error if regressions detected"
|
|
)
|
|
|
|
# List command
|
|
list_parser = subparsers.add_parser("list", help="List saved benchmarks")
|
|
list_parser.add_argument(
|
|
"--output-dir", "-o", default="benchmarks", help="Benchmark directory (default: benchmarks)"
|
|
)
|
|
|
|
# Show command
|
|
show_parser = subparsers.add_parser("show", help="Show benchmark details")
|
|
show_parser.add_argument("path", help="Path to benchmark file")
|
|
|
|
# Cleanup command
|
|
cleanup_parser = subparsers.add_parser("cleanup", help="Cleanup old benchmarks")
|
|
cleanup_parser.add_argument(
|
|
"--output-dir", "-o", default="benchmarks", help="Benchmark directory (default: benchmarks)"
|
|
)
|
|
cleanup_parser.add_argument(
|
|
"--keep",
|
|
type=int,
|
|
default=5,
|
|
help="Number of latest benchmarks to keep per name (default: 5)",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if not args.command:
|
|
parser.print_help()
|
|
sys.exit(1)
|
|
|
|
try:
|
|
if args.command == "run":
|
|
run_command(args)
|
|
elif args.command == "compare":
|
|
compare_command(args)
|
|
elif args.command == "list":
|
|
list_command(args)
|
|
elif args.command == "show":
|
|
show_command(args)
|
|
elif args.command == "cleanup":
|
|
cleanup_command(args)
|
|
except Exception as e:
|
|
print(f"\n❌ Error: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|