- Formatted 103 files to comply with ruff format requirements - No code logic changes, only formatting/whitespace - Fixes CI formatting check failures
625 lines
19 KiB
Python
625 lines
19 KiB
Python
"""
|
|
Tests for benchmarking suite.
|
|
"""
|
|
|
|
import time
|
|
import json
|
|
from datetime import datetime
|
|
|
|
|
|
from skill_seekers.benchmark import (
|
|
Benchmark,
|
|
BenchmarkResult,
|
|
BenchmarkRunner,
|
|
BenchmarkReport,
|
|
Metric,
|
|
)
|
|
from skill_seekers.benchmark.models import TimingResult, MemoryUsage
|
|
|
|
|
|
class TestBenchmarkResult:
|
|
"""Test BenchmarkResult class."""
|
|
|
|
def test_result_initialization(self):
|
|
"""Test result initialization."""
|
|
result = BenchmarkResult("test-benchmark")
|
|
|
|
assert result.name == "test-benchmark"
|
|
assert isinstance(result.started_at, datetime)
|
|
assert result.finished_at is None
|
|
assert result.timings == []
|
|
assert result.memory == []
|
|
assert result.metrics == []
|
|
assert result.system_info == {}
|
|
assert result.recommendations == []
|
|
|
|
def test_add_timing(self):
|
|
"""Test adding timing result."""
|
|
result = BenchmarkResult("test")
|
|
|
|
timing = TimingResult(operation="test_op", duration=1.5, iterations=1, avg_duration=1.5)
|
|
|
|
result.add_timing(timing)
|
|
|
|
assert len(result.timings) == 1
|
|
assert result.timings[0].operation == "test_op"
|
|
assert result.timings[0].duration == 1.5
|
|
|
|
def test_add_memory(self):
|
|
"""Test adding memory usage."""
|
|
result = BenchmarkResult("test")
|
|
|
|
usage = MemoryUsage(
|
|
operation="test_op", before_mb=100.0, after_mb=150.0, peak_mb=160.0, allocated_mb=50.0
|
|
)
|
|
|
|
result.add_memory(usage)
|
|
|
|
assert len(result.memory) == 1
|
|
assert result.memory[0].operation == "test_op"
|
|
assert result.memory[0].allocated_mb == 50.0
|
|
|
|
def test_add_metric(self):
|
|
"""Test adding custom metric."""
|
|
result = BenchmarkResult("test")
|
|
|
|
metric = Metric(name="pages_per_sec", value=12.5, unit="pages/sec")
|
|
|
|
result.add_metric(metric)
|
|
|
|
assert len(result.metrics) == 1
|
|
assert result.metrics[0].name == "pages_per_sec"
|
|
assert result.metrics[0].value == 12.5
|
|
|
|
def test_add_recommendation(self):
|
|
"""Test adding recommendation."""
|
|
result = BenchmarkResult("test")
|
|
|
|
result.add_recommendation("Consider caching")
|
|
|
|
assert len(result.recommendations) == 1
|
|
assert result.recommendations[0] == "Consider caching"
|
|
|
|
def test_set_system_info(self):
|
|
"""Test collecting system info."""
|
|
result = BenchmarkResult("test")
|
|
|
|
result.set_system_info()
|
|
|
|
assert "cpu_count" in result.system_info
|
|
assert "memory_total_gb" in result.system_info
|
|
assert result.system_info["cpu_count"] > 0
|
|
|
|
def test_to_report(self):
|
|
"""Test report generation."""
|
|
result = BenchmarkResult("test")
|
|
|
|
timing = TimingResult(operation="test_op", duration=1.0, iterations=1, avg_duration=1.0)
|
|
result.add_timing(timing)
|
|
|
|
report = result.to_report()
|
|
|
|
assert isinstance(report, BenchmarkReport)
|
|
assert report.name == "test"
|
|
assert report.finished_at is not None
|
|
assert len(report.timings) == 1
|
|
assert report.total_duration > 0
|
|
|
|
|
|
class TestBenchmark:
|
|
"""Test Benchmark class."""
|
|
|
|
def test_benchmark_initialization(self):
|
|
"""Test benchmark initialization."""
|
|
benchmark = Benchmark("test")
|
|
|
|
assert benchmark.name == "test"
|
|
assert isinstance(benchmark.result, BenchmarkResult)
|
|
|
|
def test_timer_context_manager(self):
|
|
"""Test timer context manager."""
|
|
benchmark = Benchmark("test")
|
|
|
|
with benchmark.timer("operation"):
|
|
time.sleep(0.1)
|
|
|
|
assert len(benchmark.result.timings) == 1
|
|
assert benchmark.result.timings[0].operation == "operation"
|
|
assert benchmark.result.timings[0].duration >= 0.1
|
|
|
|
def test_timer_with_iterations(self):
|
|
"""Test timer with iterations."""
|
|
benchmark = Benchmark("test")
|
|
|
|
with benchmark.timer("operation", iterations=5):
|
|
time.sleep(0.05)
|
|
|
|
timing = benchmark.result.timings[0]
|
|
assert timing.iterations == 5
|
|
assert timing.avg_duration < timing.duration
|
|
|
|
def test_memory_context_manager(self):
|
|
"""Test memory context manager."""
|
|
benchmark = Benchmark("test")
|
|
|
|
with benchmark.memory("operation"):
|
|
# Allocate some memory
|
|
pass
|
|
|
|
assert len(benchmark.result.memory) == 1
|
|
assert benchmark.result.memory[0].operation == "operation"
|
|
assert benchmark.result.memory[0].allocated_mb >= 0
|
|
|
|
def test_measure_function(self):
|
|
"""Test measure function."""
|
|
benchmark = Benchmark("test")
|
|
|
|
def slow_function(x):
|
|
time.sleep(0.1)
|
|
return x * 2
|
|
|
|
result = benchmark.measure(slow_function, 5, operation="multiply")
|
|
|
|
assert result == 10
|
|
assert len(benchmark.result.timings) == 1
|
|
assert benchmark.result.timings[0].operation == "multiply"
|
|
|
|
def test_measure_with_memory_tracking(self):
|
|
"""Test measure with memory tracking."""
|
|
benchmark = Benchmark("test")
|
|
|
|
def allocate_memory():
|
|
return [0] * 1000000
|
|
|
|
benchmark.measure(allocate_memory, operation="allocate", track_memory=True)
|
|
|
|
assert len(benchmark.result.timings) == 1
|
|
assert len(benchmark.result.memory) == 1
|
|
|
|
def test_timed_decorator(self):
|
|
"""Test timed decorator."""
|
|
benchmark = Benchmark("test")
|
|
|
|
@benchmark.timed("decorated_func")
|
|
def my_function(x):
|
|
time.sleep(0.05)
|
|
return x + 1
|
|
|
|
result = my_function(5)
|
|
|
|
assert result == 6
|
|
assert len(benchmark.result.timings) == 1
|
|
assert benchmark.result.timings[0].operation == "decorated_func"
|
|
|
|
def test_timed_decorator_with_memory(self):
|
|
"""Test timed decorator with memory tracking."""
|
|
benchmark = Benchmark("test")
|
|
|
|
@benchmark.timed("memory_func", track_memory=True)
|
|
def allocate():
|
|
return [0] * 1000000
|
|
|
|
allocate()
|
|
|
|
assert len(benchmark.result.timings) == 1
|
|
assert len(benchmark.result.memory) == 1
|
|
|
|
def test_metric_recording(self):
|
|
"""Test metric recording."""
|
|
benchmark = Benchmark("test")
|
|
|
|
benchmark.metric("throughput", 125.5, "ops/sec")
|
|
|
|
assert len(benchmark.result.metrics) == 1
|
|
assert benchmark.result.metrics[0].name == "throughput"
|
|
assert benchmark.result.metrics[0].value == 125.5
|
|
|
|
def test_recommendation_recording(self):
|
|
"""Test recommendation recording."""
|
|
benchmark = Benchmark("test")
|
|
|
|
benchmark.recommend("Use batch processing")
|
|
|
|
assert len(benchmark.result.recommendations) == 1
|
|
assert "batch" in benchmark.result.recommendations[0].lower()
|
|
|
|
def test_report_generation(self):
|
|
"""Test report generation."""
|
|
benchmark = Benchmark("test")
|
|
|
|
with benchmark.timer("op1"):
|
|
time.sleep(0.05)
|
|
|
|
benchmark.metric("count", 10, "items")
|
|
|
|
report = benchmark.report()
|
|
|
|
assert isinstance(report, BenchmarkReport)
|
|
assert report.name == "test"
|
|
assert len(report.timings) == 1
|
|
assert len(report.metrics) == 1
|
|
|
|
def test_save_report(self, tmp_path):
|
|
"""Test saving report to file."""
|
|
benchmark = Benchmark("test")
|
|
|
|
with benchmark.timer("operation"):
|
|
time.sleep(0.05)
|
|
|
|
output_path = tmp_path / "benchmark.json"
|
|
benchmark.save(output_path)
|
|
|
|
assert output_path.exists()
|
|
|
|
# Verify contents
|
|
with open(output_path) as f:
|
|
data = json.load(f)
|
|
|
|
assert data["name"] == "test"
|
|
assert len(data["timings"]) == 1
|
|
|
|
def test_analyze_bottlenecks(self):
|
|
"""Test bottleneck analysis."""
|
|
benchmark = Benchmark("test")
|
|
|
|
# Create operations with different durations
|
|
with benchmark.timer("fast"):
|
|
time.sleep(0.01)
|
|
|
|
with benchmark.timer("slow"):
|
|
time.sleep(0.2)
|
|
|
|
benchmark.analyze()
|
|
|
|
# Should have recommendation about bottleneck
|
|
assert len(benchmark.result.recommendations) > 0
|
|
assert any("bottleneck" in r.lower() for r in benchmark.result.recommendations)
|
|
|
|
def test_analyze_high_memory(self):
|
|
"""Test high memory usage detection."""
|
|
benchmark = Benchmark("test")
|
|
|
|
# Simulate high memory usage
|
|
usage = MemoryUsage(
|
|
operation="allocate",
|
|
before_mb=100.0,
|
|
after_mb=1200.0,
|
|
peak_mb=1500.0,
|
|
allocated_mb=1100.0,
|
|
)
|
|
benchmark.result.add_memory(usage)
|
|
|
|
benchmark.analyze()
|
|
|
|
# Should have recommendation about memory
|
|
assert len(benchmark.result.recommendations) > 0
|
|
assert any("memory" in r.lower() for r in benchmark.result.recommendations)
|
|
|
|
|
|
class TestBenchmarkRunner:
|
|
"""Test BenchmarkRunner class."""
|
|
|
|
def test_runner_initialization(self, tmp_path):
|
|
"""Test runner initialization."""
|
|
runner = BenchmarkRunner(output_dir=tmp_path)
|
|
|
|
assert runner.output_dir == tmp_path
|
|
assert runner.output_dir.exists()
|
|
|
|
def test_run_benchmark(self, tmp_path):
|
|
"""Test running single benchmark."""
|
|
runner = BenchmarkRunner(output_dir=tmp_path)
|
|
|
|
def test_benchmark(bench):
|
|
with bench.timer("operation"):
|
|
time.sleep(0.05)
|
|
|
|
report = runner.run("test", test_benchmark, save=True)
|
|
|
|
assert isinstance(report, BenchmarkReport)
|
|
assert report.name == "test"
|
|
assert len(report.timings) == 1
|
|
|
|
# Check file was saved
|
|
saved_files = list(tmp_path.glob("test_*.json"))
|
|
assert len(saved_files) == 1
|
|
|
|
def test_run_benchmark_no_save(self, tmp_path):
|
|
"""Test running benchmark without saving."""
|
|
runner = BenchmarkRunner(output_dir=tmp_path)
|
|
|
|
def test_benchmark(bench):
|
|
with bench.timer("operation"):
|
|
time.sleep(0.05)
|
|
|
|
report = runner.run("test", test_benchmark, save=False)
|
|
|
|
assert isinstance(report, BenchmarkReport)
|
|
|
|
# No files should be saved
|
|
saved_files = list(tmp_path.glob("*.json"))
|
|
assert len(saved_files) == 0
|
|
|
|
def test_run_suite(self, tmp_path):
|
|
"""Test running benchmark suite."""
|
|
runner = BenchmarkRunner(output_dir=tmp_path)
|
|
|
|
def bench1(bench):
|
|
with bench.timer("op1"):
|
|
time.sleep(0.02)
|
|
|
|
def bench2(bench):
|
|
with bench.timer("op2"):
|
|
time.sleep(0.03)
|
|
|
|
reports = runner.run_suite({"test1": bench1, "test2": bench2})
|
|
|
|
assert len(reports) == 2
|
|
assert "test1" in reports
|
|
assert "test2" in reports
|
|
|
|
# Check both files saved
|
|
saved_files = list(tmp_path.glob("*.json"))
|
|
assert len(saved_files) == 2
|
|
|
|
def test_compare_benchmarks(self, tmp_path):
|
|
"""Test comparing benchmarks."""
|
|
runner = BenchmarkRunner(output_dir=tmp_path)
|
|
|
|
# Create baseline
|
|
def baseline_bench(bench):
|
|
with bench.timer("operation"):
|
|
time.sleep(0.1)
|
|
|
|
runner.run("baseline", baseline_bench, save=True)
|
|
baseline_path = list(tmp_path.glob("baseline_*.json"))[0]
|
|
|
|
# Create faster version
|
|
def improved_bench(bench):
|
|
with bench.timer("operation"):
|
|
time.sleep(0.05)
|
|
|
|
runner.run("improved", improved_bench, save=True)
|
|
improved_path = list(tmp_path.glob("improved_*.json"))[0]
|
|
|
|
# Compare
|
|
from skill_seekers.benchmark.models import ComparisonReport
|
|
|
|
comparison = runner.compare(baseline_path, improved_path)
|
|
|
|
assert isinstance(comparison, ComparisonReport)
|
|
assert comparison.speedup_factor > 1.0
|
|
assert len(comparison.improvements) > 0
|
|
|
|
def test_list_benchmarks(self, tmp_path):
|
|
"""Test listing benchmarks."""
|
|
runner = BenchmarkRunner(output_dir=tmp_path)
|
|
|
|
# Create some benchmarks
|
|
def test_bench(bench):
|
|
with bench.timer("op"):
|
|
time.sleep(0.02)
|
|
|
|
runner.run("bench1", test_bench, save=True)
|
|
runner.run("bench2", test_bench, save=True)
|
|
|
|
benchmarks = runner.list_benchmarks()
|
|
|
|
assert len(benchmarks) == 2
|
|
assert all("name" in b for b in benchmarks)
|
|
assert all("duration" in b for b in benchmarks)
|
|
|
|
def test_get_latest(self, tmp_path):
|
|
"""Test getting latest benchmark."""
|
|
runner = BenchmarkRunner(output_dir=tmp_path)
|
|
|
|
def test_bench(bench):
|
|
with bench.timer("op"):
|
|
time.sleep(0.02)
|
|
|
|
# Run same benchmark twice
|
|
runner.run("test", test_bench, save=True)
|
|
time.sleep(0.1) # Ensure different timestamps
|
|
runner.run("test", test_bench, save=True)
|
|
|
|
latest = runner.get_latest("test")
|
|
|
|
assert latest is not None
|
|
assert "test_" in latest.name
|
|
|
|
def test_get_latest_not_found(self, tmp_path):
|
|
"""Test getting latest when benchmark doesn't exist."""
|
|
runner = BenchmarkRunner(output_dir=tmp_path)
|
|
|
|
latest = runner.get_latest("nonexistent")
|
|
|
|
assert latest is None
|
|
|
|
def test_cleanup_old(self, tmp_path):
|
|
"""Test cleaning up old benchmarks."""
|
|
import os
|
|
|
|
runner = BenchmarkRunner(output_dir=tmp_path)
|
|
|
|
# Create 10 benchmark files with different timestamps
|
|
base_time = time.time()
|
|
for i in range(10):
|
|
filename = f"test_{i:08d}.json"
|
|
file_path = tmp_path / filename
|
|
|
|
# Create minimal valid report
|
|
report_data = {
|
|
"name": "test",
|
|
"started_at": datetime.utcnow().isoformat(),
|
|
"finished_at": datetime.utcnow().isoformat(),
|
|
"total_duration": 1.0,
|
|
"timings": [],
|
|
"memory": [],
|
|
"metrics": [],
|
|
"system_info": {},
|
|
"recommendations": [],
|
|
}
|
|
|
|
with open(file_path, "w") as f:
|
|
json.dump(report_data, f)
|
|
|
|
# Set different modification times
|
|
mtime = base_time - (10 - i) * 60 # Older files have older mtimes
|
|
os.utime(file_path, (mtime, mtime))
|
|
|
|
# Verify we have 10 files
|
|
assert len(list(tmp_path.glob("test_*.json"))) == 10
|
|
|
|
# Keep only latest 3
|
|
runner.cleanup_old(keep_latest=3)
|
|
|
|
remaining = list(tmp_path.glob("test_*.json"))
|
|
assert len(remaining) == 3
|
|
|
|
# Verify we kept the newest files (7, 8, 9)
|
|
remaining_names = {f.stem for f in remaining}
|
|
assert "test_00000007" in remaining_names or "test_00000008" in remaining_names
|
|
|
|
|
|
class TestBenchmarkModels:
|
|
"""Test benchmark model classes."""
|
|
|
|
def test_timing_result_model(self):
|
|
"""Test TimingResult model."""
|
|
timing = TimingResult(operation="test", duration=1.5, iterations=10, avg_duration=0.15)
|
|
|
|
assert timing.operation == "test"
|
|
assert timing.duration == 1.5
|
|
assert timing.iterations == 10
|
|
assert timing.avg_duration == 0.15
|
|
|
|
def test_memory_usage_model(self):
|
|
"""Test MemoryUsage model."""
|
|
usage = MemoryUsage(
|
|
operation="allocate", before_mb=100.0, after_mb=200.0, peak_mb=250.0, allocated_mb=100.0
|
|
)
|
|
|
|
assert usage.operation == "allocate"
|
|
assert usage.allocated_mb == 100.0
|
|
assert usage.peak_mb == 250.0
|
|
|
|
def test_metric_model(self):
|
|
"""Test Metric model."""
|
|
metric = Metric(name="throughput", value=125.5, unit="ops/sec")
|
|
|
|
assert metric.name == "throughput"
|
|
assert metric.value == 125.5
|
|
assert metric.unit == "ops/sec"
|
|
assert isinstance(metric.timestamp, datetime)
|
|
|
|
def test_benchmark_report_summary(self):
|
|
"""Test BenchmarkReport summary property."""
|
|
report = BenchmarkReport(
|
|
name="test",
|
|
started_at=datetime.utcnow(),
|
|
finished_at=datetime.utcnow(),
|
|
total_duration=5.0,
|
|
timings=[TimingResult(operation="op1", duration=2.0, iterations=1, avg_duration=2.0)],
|
|
memory=[
|
|
MemoryUsage(
|
|
operation="op1",
|
|
before_mb=100.0,
|
|
after_mb=200.0,
|
|
peak_mb=250.0,
|
|
allocated_mb=100.0,
|
|
)
|
|
],
|
|
metrics=[],
|
|
system_info={},
|
|
recommendations=[],
|
|
)
|
|
|
|
summary = report.summary
|
|
|
|
assert "test" in summary
|
|
assert "5.00s" in summary
|
|
assert "250.0MB" in summary
|
|
|
|
def test_comparison_report_has_regressions(self):
|
|
"""Test ComparisonReport has_regressions property."""
|
|
from skill_seekers.benchmark.models import ComparisonReport
|
|
|
|
baseline = BenchmarkReport(
|
|
name="baseline",
|
|
started_at=datetime.utcnow(),
|
|
finished_at=datetime.utcnow(),
|
|
total_duration=5.0,
|
|
timings=[],
|
|
memory=[],
|
|
metrics=[],
|
|
system_info={},
|
|
recommendations=[],
|
|
)
|
|
|
|
current = BenchmarkReport(
|
|
name="current",
|
|
started_at=datetime.utcnow(),
|
|
finished_at=datetime.utcnow(),
|
|
total_duration=10.0,
|
|
timings=[],
|
|
memory=[],
|
|
metrics=[],
|
|
system_info={},
|
|
recommendations=[],
|
|
)
|
|
|
|
comparison = ComparisonReport(
|
|
name="test",
|
|
baseline=baseline,
|
|
current=current,
|
|
improvements=[],
|
|
regressions=["Slower performance"],
|
|
speedup_factor=0.5,
|
|
memory_change_mb=0.0,
|
|
)
|
|
|
|
assert comparison.has_regressions is True
|
|
|
|
def test_comparison_report_overall_improvement(self):
|
|
"""Test ComparisonReport overall_improvement property."""
|
|
from skill_seekers.benchmark.models import ComparisonReport
|
|
|
|
baseline = BenchmarkReport(
|
|
name="baseline",
|
|
started_at=datetime.utcnow(),
|
|
finished_at=datetime.utcnow(),
|
|
total_duration=10.0,
|
|
timings=[],
|
|
memory=[],
|
|
metrics=[],
|
|
system_info={},
|
|
recommendations=[],
|
|
)
|
|
|
|
current = BenchmarkReport(
|
|
name="current",
|
|
started_at=datetime.utcnow(),
|
|
finished_at=datetime.utcnow(),
|
|
total_duration=5.0,
|
|
timings=[],
|
|
memory=[],
|
|
metrics=[],
|
|
system_info={},
|
|
recommendations=[],
|
|
)
|
|
|
|
comparison = ComparisonReport(
|
|
name="test",
|
|
baseline=baseline,
|
|
current=current,
|
|
improvements=[],
|
|
regressions=[],
|
|
speedup_factor=2.0,
|
|
memory_change_mb=0.0,
|
|
)
|
|
|
|
improvement = comparison.overall_improvement
|
|
|
|
assert "100.0% faster" in improvement
|
|
assert "✅" in improvement
|