Files
skill-seekers-reference/tests/test_benchmark.py
yusyus 0265de5816 style: Format all Python files with ruff
- Formatted 103 files to comply with ruff format requirements
- No code logic changes, only formatting/whitespace
- Fixes CI formatting check failures
2026-02-08 14:42:27 +03:00

625 lines
19 KiB
Python

"""
Tests for benchmarking suite.
"""
import time
import json
from datetime import datetime
from skill_seekers.benchmark import (
Benchmark,
BenchmarkResult,
BenchmarkRunner,
BenchmarkReport,
Metric,
)
from skill_seekers.benchmark.models import TimingResult, MemoryUsage
class TestBenchmarkResult:
"""Test BenchmarkResult class."""
def test_result_initialization(self):
"""Test result initialization."""
result = BenchmarkResult("test-benchmark")
assert result.name == "test-benchmark"
assert isinstance(result.started_at, datetime)
assert result.finished_at is None
assert result.timings == []
assert result.memory == []
assert result.metrics == []
assert result.system_info == {}
assert result.recommendations == []
def test_add_timing(self):
"""Test adding timing result."""
result = BenchmarkResult("test")
timing = TimingResult(operation="test_op", duration=1.5, iterations=1, avg_duration=1.5)
result.add_timing(timing)
assert len(result.timings) == 1
assert result.timings[0].operation == "test_op"
assert result.timings[0].duration == 1.5
def test_add_memory(self):
"""Test adding memory usage."""
result = BenchmarkResult("test")
usage = MemoryUsage(
operation="test_op", before_mb=100.0, after_mb=150.0, peak_mb=160.0, allocated_mb=50.0
)
result.add_memory(usage)
assert len(result.memory) == 1
assert result.memory[0].operation == "test_op"
assert result.memory[0].allocated_mb == 50.0
def test_add_metric(self):
"""Test adding custom metric."""
result = BenchmarkResult("test")
metric = Metric(name="pages_per_sec", value=12.5, unit="pages/sec")
result.add_metric(metric)
assert len(result.metrics) == 1
assert result.metrics[0].name == "pages_per_sec"
assert result.metrics[0].value == 12.5
def test_add_recommendation(self):
"""Test adding recommendation."""
result = BenchmarkResult("test")
result.add_recommendation("Consider caching")
assert len(result.recommendations) == 1
assert result.recommendations[0] == "Consider caching"
def test_set_system_info(self):
"""Test collecting system info."""
result = BenchmarkResult("test")
result.set_system_info()
assert "cpu_count" in result.system_info
assert "memory_total_gb" in result.system_info
assert result.system_info["cpu_count"] > 0
def test_to_report(self):
"""Test report generation."""
result = BenchmarkResult("test")
timing = TimingResult(operation="test_op", duration=1.0, iterations=1, avg_duration=1.0)
result.add_timing(timing)
report = result.to_report()
assert isinstance(report, BenchmarkReport)
assert report.name == "test"
assert report.finished_at is not None
assert len(report.timings) == 1
assert report.total_duration > 0
class TestBenchmark:
"""Test Benchmark class."""
def test_benchmark_initialization(self):
"""Test benchmark initialization."""
benchmark = Benchmark("test")
assert benchmark.name == "test"
assert isinstance(benchmark.result, BenchmarkResult)
def test_timer_context_manager(self):
"""Test timer context manager."""
benchmark = Benchmark("test")
with benchmark.timer("operation"):
time.sleep(0.1)
assert len(benchmark.result.timings) == 1
assert benchmark.result.timings[0].operation == "operation"
assert benchmark.result.timings[0].duration >= 0.1
def test_timer_with_iterations(self):
"""Test timer with iterations."""
benchmark = Benchmark("test")
with benchmark.timer("operation", iterations=5):
time.sleep(0.05)
timing = benchmark.result.timings[0]
assert timing.iterations == 5
assert timing.avg_duration < timing.duration
def test_memory_context_manager(self):
"""Test memory context manager."""
benchmark = Benchmark("test")
with benchmark.memory("operation"):
# Allocate some memory
pass
assert len(benchmark.result.memory) == 1
assert benchmark.result.memory[0].operation == "operation"
assert benchmark.result.memory[0].allocated_mb >= 0
def test_measure_function(self):
"""Test measure function."""
benchmark = Benchmark("test")
def slow_function(x):
time.sleep(0.1)
return x * 2
result = benchmark.measure(slow_function, 5, operation="multiply")
assert result == 10
assert len(benchmark.result.timings) == 1
assert benchmark.result.timings[0].operation == "multiply"
def test_measure_with_memory_tracking(self):
"""Test measure with memory tracking."""
benchmark = Benchmark("test")
def allocate_memory():
return [0] * 1000000
benchmark.measure(allocate_memory, operation="allocate", track_memory=True)
assert len(benchmark.result.timings) == 1
assert len(benchmark.result.memory) == 1
def test_timed_decorator(self):
"""Test timed decorator."""
benchmark = Benchmark("test")
@benchmark.timed("decorated_func")
def my_function(x):
time.sleep(0.05)
return x + 1
result = my_function(5)
assert result == 6
assert len(benchmark.result.timings) == 1
assert benchmark.result.timings[0].operation == "decorated_func"
def test_timed_decorator_with_memory(self):
"""Test timed decorator with memory tracking."""
benchmark = Benchmark("test")
@benchmark.timed("memory_func", track_memory=True)
def allocate():
return [0] * 1000000
allocate()
assert len(benchmark.result.timings) == 1
assert len(benchmark.result.memory) == 1
def test_metric_recording(self):
"""Test metric recording."""
benchmark = Benchmark("test")
benchmark.metric("throughput", 125.5, "ops/sec")
assert len(benchmark.result.metrics) == 1
assert benchmark.result.metrics[0].name == "throughput"
assert benchmark.result.metrics[0].value == 125.5
def test_recommendation_recording(self):
"""Test recommendation recording."""
benchmark = Benchmark("test")
benchmark.recommend("Use batch processing")
assert len(benchmark.result.recommendations) == 1
assert "batch" in benchmark.result.recommendations[0].lower()
def test_report_generation(self):
"""Test report generation."""
benchmark = Benchmark("test")
with benchmark.timer("op1"):
time.sleep(0.05)
benchmark.metric("count", 10, "items")
report = benchmark.report()
assert isinstance(report, BenchmarkReport)
assert report.name == "test"
assert len(report.timings) == 1
assert len(report.metrics) == 1
def test_save_report(self, tmp_path):
"""Test saving report to file."""
benchmark = Benchmark("test")
with benchmark.timer("operation"):
time.sleep(0.05)
output_path = tmp_path / "benchmark.json"
benchmark.save(output_path)
assert output_path.exists()
# Verify contents
with open(output_path) as f:
data = json.load(f)
assert data["name"] == "test"
assert len(data["timings"]) == 1
def test_analyze_bottlenecks(self):
"""Test bottleneck analysis."""
benchmark = Benchmark("test")
# Create operations with different durations
with benchmark.timer("fast"):
time.sleep(0.01)
with benchmark.timer("slow"):
time.sleep(0.2)
benchmark.analyze()
# Should have recommendation about bottleneck
assert len(benchmark.result.recommendations) > 0
assert any("bottleneck" in r.lower() for r in benchmark.result.recommendations)
def test_analyze_high_memory(self):
"""Test high memory usage detection."""
benchmark = Benchmark("test")
# Simulate high memory usage
usage = MemoryUsage(
operation="allocate",
before_mb=100.0,
after_mb=1200.0,
peak_mb=1500.0,
allocated_mb=1100.0,
)
benchmark.result.add_memory(usage)
benchmark.analyze()
# Should have recommendation about memory
assert len(benchmark.result.recommendations) > 0
assert any("memory" in r.lower() for r in benchmark.result.recommendations)
class TestBenchmarkRunner:
"""Test BenchmarkRunner class."""
def test_runner_initialization(self, tmp_path):
"""Test runner initialization."""
runner = BenchmarkRunner(output_dir=tmp_path)
assert runner.output_dir == tmp_path
assert runner.output_dir.exists()
def test_run_benchmark(self, tmp_path):
"""Test running single benchmark."""
runner = BenchmarkRunner(output_dir=tmp_path)
def test_benchmark(bench):
with bench.timer("operation"):
time.sleep(0.05)
report = runner.run("test", test_benchmark, save=True)
assert isinstance(report, BenchmarkReport)
assert report.name == "test"
assert len(report.timings) == 1
# Check file was saved
saved_files = list(tmp_path.glob("test_*.json"))
assert len(saved_files) == 1
def test_run_benchmark_no_save(self, tmp_path):
"""Test running benchmark without saving."""
runner = BenchmarkRunner(output_dir=tmp_path)
def test_benchmark(bench):
with bench.timer("operation"):
time.sleep(0.05)
report = runner.run("test", test_benchmark, save=False)
assert isinstance(report, BenchmarkReport)
# No files should be saved
saved_files = list(tmp_path.glob("*.json"))
assert len(saved_files) == 0
def test_run_suite(self, tmp_path):
"""Test running benchmark suite."""
runner = BenchmarkRunner(output_dir=tmp_path)
def bench1(bench):
with bench.timer("op1"):
time.sleep(0.02)
def bench2(bench):
with bench.timer("op2"):
time.sleep(0.03)
reports = runner.run_suite({"test1": bench1, "test2": bench2})
assert len(reports) == 2
assert "test1" in reports
assert "test2" in reports
# Check both files saved
saved_files = list(tmp_path.glob("*.json"))
assert len(saved_files) == 2
def test_compare_benchmarks(self, tmp_path):
"""Test comparing benchmarks."""
runner = BenchmarkRunner(output_dir=tmp_path)
# Create baseline
def baseline_bench(bench):
with bench.timer("operation"):
time.sleep(0.1)
runner.run("baseline", baseline_bench, save=True)
baseline_path = list(tmp_path.glob("baseline_*.json"))[0]
# Create faster version
def improved_bench(bench):
with bench.timer("operation"):
time.sleep(0.05)
runner.run("improved", improved_bench, save=True)
improved_path = list(tmp_path.glob("improved_*.json"))[0]
# Compare
from skill_seekers.benchmark.models import ComparisonReport
comparison = runner.compare(baseline_path, improved_path)
assert isinstance(comparison, ComparisonReport)
assert comparison.speedup_factor > 1.0
assert len(comparison.improvements) > 0
def test_list_benchmarks(self, tmp_path):
"""Test listing benchmarks."""
runner = BenchmarkRunner(output_dir=tmp_path)
# Create some benchmarks
def test_bench(bench):
with bench.timer("op"):
time.sleep(0.02)
runner.run("bench1", test_bench, save=True)
runner.run("bench2", test_bench, save=True)
benchmarks = runner.list_benchmarks()
assert len(benchmarks) == 2
assert all("name" in b for b in benchmarks)
assert all("duration" in b for b in benchmarks)
def test_get_latest(self, tmp_path):
"""Test getting latest benchmark."""
runner = BenchmarkRunner(output_dir=tmp_path)
def test_bench(bench):
with bench.timer("op"):
time.sleep(0.02)
# Run same benchmark twice
runner.run("test", test_bench, save=True)
time.sleep(0.1) # Ensure different timestamps
runner.run("test", test_bench, save=True)
latest = runner.get_latest("test")
assert latest is not None
assert "test_" in latest.name
def test_get_latest_not_found(self, tmp_path):
"""Test getting latest when benchmark doesn't exist."""
runner = BenchmarkRunner(output_dir=tmp_path)
latest = runner.get_latest("nonexistent")
assert latest is None
def test_cleanup_old(self, tmp_path):
"""Test cleaning up old benchmarks."""
import os
runner = BenchmarkRunner(output_dir=tmp_path)
# Create 10 benchmark files with different timestamps
base_time = time.time()
for i in range(10):
filename = f"test_{i:08d}.json"
file_path = tmp_path / filename
# Create minimal valid report
report_data = {
"name": "test",
"started_at": datetime.utcnow().isoformat(),
"finished_at": datetime.utcnow().isoformat(),
"total_duration": 1.0,
"timings": [],
"memory": [],
"metrics": [],
"system_info": {},
"recommendations": [],
}
with open(file_path, "w") as f:
json.dump(report_data, f)
# Set different modification times
mtime = base_time - (10 - i) * 60 # Older files have older mtimes
os.utime(file_path, (mtime, mtime))
# Verify we have 10 files
assert len(list(tmp_path.glob("test_*.json"))) == 10
# Keep only latest 3
runner.cleanup_old(keep_latest=3)
remaining = list(tmp_path.glob("test_*.json"))
assert len(remaining) == 3
# Verify we kept the newest files (7, 8, 9)
remaining_names = {f.stem for f in remaining}
assert "test_00000007" in remaining_names or "test_00000008" in remaining_names
class TestBenchmarkModels:
"""Test benchmark model classes."""
def test_timing_result_model(self):
"""Test TimingResult model."""
timing = TimingResult(operation="test", duration=1.5, iterations=10, avg_duration=0.15)
assert timing.operation == "test"
assert timing.duration == 1.5
assert timing.iterations == 10
assert timing.avg_duration == 0.15
def test_memory_usage_model(self):
"""Test MemoryUsage model."""
usage = MemoryUsage(
operation="allocate", before_mb=100.0, after_mb=200.0, peak_mb=250.0, allocated_mb=100.0
)
assert usage.operation == "allocate"
assert usage.allocated_mb == 100.0
assert usage.peak_mb == 250.0
def test_metric_model(self):
"""Test Metric model."""
metric = Metric(name="throughput", value=125.5, unit="ops/sec")
assert metric.name == "throughput"
assert metric.value == 125.5
assert metric.unit == "ops/sec"
assert isinstance(metric.timestamp, datetime)
def test_benchmark_report_summary(self):
"""Test BenchmarkReport summary property."""
report = BenchmarkReport(
name="test",
started_at=datetime.utcnow(),
finished_at=datetime.utcnow(),
total_duration=5.0,
timings=[TimingResult(operation="op1", duration=2.0, iterations=1, avg_duration=2.0)],
memory=[
MemoryUsage(
operation="op1",
before_mb=100.0,
after_mb=200.0,
peak_mb=250.0,
allocated_mb=100.0,
)
],
metrics=[],
system_info={},
recommendations=[],
)
summary = report.summary
assert "test" in summary
assert "5.00s" in summary
assert "250.0MB" in summary
def test_comparison_report_has_regressions(self):
"""Test ComparisonReport has_regressions property."""
from skill_seekers.benchmark.models import ComparisonReport
baseline = BenchmarkReport(
name="baseline",
started_at=datetime.utcnow(),
finished_at=datetime.utcnow(),
total_duration=5.0,
timings=[],
memory=[],
metrics=[],
system_info={},
recommendations=[],
)
current = BenchmarkReport(
name="current",
started_at=datetime.utcnow(),
finished_at=datetime.utcnow(),
total_duration=10.0,
timings=[],
memory=[],
metrics=[],
system_info={},
recommendations=[],
)
comparison = ComparisonReport(
name="test",
baseline=baseline,
current=current,
improvements=[],
regressions=["Slower performance"],
speedup_factor=0.5,
memory_change_mb=0.0,
)
assert comparison.has_regressions is True
def test_comparison_report_overall_improvement(self):
"""Test ComparisonReport overall_improvement property."""
from skill_seekers.benchmark.models import ComparisonReport
baseline = BenchmarkReport(
name="baseline",
started_at=datetime.utcnow(),
finished_at=datetime.utcnow(),
total_duration=10.0,
timings=[],
memory=[],
metrics=[],
system_info={},
recommendations=[],
)
current = BenchmarkReport(
name="current",
started_at=datetime.utcnow(),
finished_at=datetime.utcnow(),
total_duration=5.0,
timings=[],
memory=[],
metrics=[],
system_info={},
recommendations=[],
)
comparison = ComparisonReport(
name="test",
baseline=baseline,
current=current,
improvements=[],
regressions=[],
speedup_factor=2.0,
memory_change_mb=0.0,
)
improvement = comparison.overall_improvement
assert "100.0% faster" in improvement
assert "" in improvement