Files
skill-seekers-reference/src/skill_seekers/cli/benchmark_cli.py
yusyus 0265de5816 style: Format all Python files with ruff
- Formatted 103 files to comply with ruff format requirements
- No code logic changes, only formatting/whitespace
- Fixes CI formatting check failures
2026-02-08 14:42:27 +03:00

301 lines
9.1 KiB
Python

#!/usr/bin/env python3
"""
Performance benchmarking CLI.
Measure and analyze performance of scraping, embedding, and storage operations.
"""
import sys
import argparse
import json
from pathlib import Path
from ..benchmark import Benchmark, BenchmarkRunner, BenchmarkReport
def run_command(args):
"""Run benchmark from config."""
runner = BenchmarkRunner(output_dir=Path(args.output_dir))
# Load benchmark config
with open(args.config) as f:
config = json.load(f)
benchmark_type = config.get("type", "custom")
if benchmark_type == "scraping":
run_scraping_benchmark(runner, config)
elif benchmark_type == "embedding":
run_embedding_benchmark(runner, config)
elif benchmark_type == "storage":
run_storage_benchmark(runner, config)
else:
print(f"❌ Unknown benchmark type: {benchmark_type}")
sys.exit(1)
def run_scraping_benchmark(runner, config):
"""Run scraping benchmark."""
from .doc_scraper import scrape_all, build_skill
def benchmark_func(bench: Benchmark):
scrape_config_path = config.get("scrape_config")
# Time scraping
with bench.timer("scrape_docs"), bench.memory("scrape_docs"):
pages = scrape_all(scrape_config_path)
# Track metrics
bench.metric("pages_scraped", len(pages), "pages")
# Time building
with bench.timer("build_skill"), bench.memory("build_skill"):
build_skill(scrape_config_path, pages)
name = config.get("name", "scraping-benchmark")
report = runner.run(name, benchmark_func)
print(f"\n{report.summary}")
def run_embedding_benchmark(runner, config):
"""Run embedding benchmark."""
from ..embedding.generator import EmbeddingGenerator
def benchmark_func(bench: Benchmark):
generator = EmbeddingGenerator()
model = config.get("model", "text-embedding-3-small")
texts = config.get("sample_texts", ["Test text"])
# Single embedding
with bench.timer("single_embedding"):
generator.generate(texts[0], model=model)
# Batch embedding
if len(texts) > 1:
with bench.timer("batch_embedding"), bench.memory("batch_embedding"):
embeddings = generator.generate_batch(texts, model=model)
bench.metric(
"embeddings_per_sec", len(embeddings) / bench.result.timings[-1].duration, "emb/sec"
)
name = config.get("name", "embedding-benchmark")
report = runner.run(name, benchmark_func)
print(f"\n{report.summary}")
def run_storage_benchmark(runner, config):
"""Run storage benchmark."""
from .storage import get_storage_adaptor
from tempfile import NamedTemporaryFile
def benchmark_func(bench: Benchmark):
provider = config.get("provider", "s3")
bucket = config.get("bucket")
storage = get_storage_adaptor(provider, bucket=bucket)
# Create test file
with NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as f:
f.write("Test data" * 1000)
test_file = Path(f.name)
try:
# Upload benchmark
with bench.timer("upload"):
storage.upload_file(test_file, "benchmark_test.txt")
# Download benchmark
download_path = test_file.parent / "downloaded.txt"
with bench.timer("download"):
storage.download_file("benchmark_test.txt", download_path)
# Cleanup
storage.delete_file("benchmark_test.txt")
download_path.unlink(missing_ok=True)
finally:
test_file.unlink(missing_ok=True)
name = config.get("name", "storage-benchmark")
report = runner.run(name, benchmark_func)
print(f"\n{report.summary}")
def compare_command(args):
"""Compare two benchmarks."""
runner = BenchmarkRunner()
comparison = runner.compare(baseline_path=Path(args.baseline), current_path=Path(args.current))
print(f"\n📊 Comparison: {comparison.name}\n")
print(f"Overall: {comparison.overall_improvement}\n")
if comparison.improvements:
print("✅ Improvements:")
for improvement in comparison.improvements:
print(f"{improvement}")
if comparison.regressions:
print("\n⚠️ Regressions:")
for regression in comparison.regressions:
print(f"{regression}")
if args.fail_on_regression and comparison.has_regressions:
print("\n❌ Benchmark failed: regressions detected")
sys.exit(1)
def list_command(args):
"""List saved benchmarks."""
runner = BenchmarkRunner(output_dir=Path(args.output_dir))
benchmarks = runner.list_benchmarks()
if not benchmarks:
print("No benchmarks found")
return
print(f"\n📊 Saved benchmarks ({len(benchmarks)}):\n")
for bench in benchmarks:
print(f"{bench['name']}")
print(f" Date: {bench['started_at']}")
print(f" Duration: {bench['duration']:.2f}s")
print(f" Operations: {bench['operations']}")
print(f" Path: {bench['path']}\n")
def show_command(args):
"""Show benchmark details."""
with open(args.path) as f:
data = json.load(f)
report = BenchmarkReport(**data)
print(f"\n{report.summary}\n")
if report.timings:
print("⏱️ Timings:")
for timing in sorted(report.timings, key=lambda t: t.duration, reverse=True):
print(f"{timing.operation}: {timing.duration:.2f}s")
if report.memory:
print("\n💾 Memory:")
for mem in sorted(report.memory, key=lambda m: m.peak_mb, reverse=True):
print(f"{mem.operation}: {mem.peak_mb:.0f}MB peak ({mem.allocated_mb:+.0f}MB)")
if report.metrics:
print("\n📈 Metrics:")
for metric in report.metrics:
print(f"{metric.name}: {metric.value:.2f} {metric.unit}")
if report.recommendations:
print("\n💡 Recommendations:")
for rec in report.recommendations:
print(f"{rec}")
def cleanup_command(args):
"""Cleanup old benchmarks."""
runner = BenchmarkRunner(output_dir=Path(args.output_dir))
runner.cleanup_old(keep_latest=args.keep)
print("✅ Cleanup complete")
def main():
"""Main entry point."""
parser = argparse.ArgumentParser(
description="Performance benchmarking suite",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Run scraping benchmark
skill-seekers-benchmark run --config benchmarks/scraping.json
# Compare two benchmarks
skill-seekers-benchmark compare \\
--baseline benchmarks/v1_20250101.json \\
--current benchmarks/v2_20250115.json
# List all benchmarks
skill-seekers-benchmark list
# Show benchmark details
skill-seekers-benchmark show benchmarks/scraping_20250115.json
# Cleanup old benchmarks
skill-seekers-benchmark cleanup --keep 5
""",
)
subparsers = parser.add_subparsers(dest="command", help="Command to execute")
# Run command
run_parser = subparsers.add_parser("run", help="Run benchmark")
run_parser.add_argument("--config", required=True, help="Benchmark config file")
run_parser.add_argument(
"--output-dir", "-o", default="benchmarks", help="Output directory (default: benchmarks)"
)
# Compare command
compare_parser = subparsers.add_parser("compare", help="Compare two benchmarks")
compare_parser.add_argument("--baseline", required=True, help="Baseline benchmark")
compare_parser.add_argument("--current", required=True, help="Current benchmark")
compare_parser.add_argument(
"--fail-on-regression", action="store_true", help="Exit with error if regressions detected"
)
# List command
list_parser = subparsers.add_parser("list", help="List saved benchmarks")
list_parser.add_argument(
"--output-dir", "-o", default="benchmarks", help="Benchmark directory (default: benchmarks)"
)
# Show command
show_parser = subparsers.add_parser("show", help="Show benchmark details")
show_parser.add_argument("path", help="Path to benchmark file")
# Cleanup command
cleanup_parser = subparsers.add_parser("cleanup", help="Cleanup old benchmarks")
cleanup_parser.add_argument(
"--output-dir", "-o", default="benchmarks", help="Benchmark directory (default: benchmarks)"
)
cleanup_parser.add_argument(
"--keep",
type=int,
default=5,
help="Number of latest benchmarks to keep per name (default: 5)",
)
args = parser.parse_args()
if not args.command:
parser.print_help()
sys.exit(1)
try:
if args.command == "run":
run_command(args)
elif args.command == "compare":
compare_command(args)
elif args.command == "list":
list_command(args)
elif args.command == "show":
show_command(args)
elif args.command == "cleanup":
cleanup_command(args)
except Exception as e:
print(f"\n❌ Error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()