**Bug fixes (run_experiment.py):** - Fix broken revert logic: was saving HEAD as pre_commit (no-op revert), now uses git reset --hard HEAD~1 for correct rollback - Remove broken --loop mode (agent IS the loop, script handles one iteration) - Fix shell injection: all git commands use subprocess list form - Replace shell tail with Python file read **Bug fixes (other scripts):** - setup_experiment.py: fix shell injection in git branch creation, remove dead --skip-baseline flag, fix evaluator docstring parsing - log_results.py: fix 6 falsy-zero bugs (baseline=0 treated as None), add domain_filter to CSV/markdown export, move import time to top - evaluators: add FileNotFoundError handling, fix output format mismatch in llm_judge_copy, add peak_kb on macOS, add ValueError handling **Plugin packaging (NEW):** - plugin.json, settings.json, CLAUDE.md for plugin registry - 5 slash commands: /ar:setup, /ar:run, /ar:loop, /ar:status, /ar:resume - /ar:loop supports user-selected intervals (10m, 1h, daily, weekly, monthly) - experiment-runner agent for autonomous loop iterations - Registered in marketplace.json as plugin #20 **SKILL.md rewrite:** - Replace ambiguous "Loop Protocol" with clear "Agent Protocol" - Add results.tsv format spec, strategy escalation, self-improvement - Replace "NEVER STOP" with resumable stopping logic **Docs & sync:** - Codex (157 skills), Gemini (229 items), convert.sh all pick up the skill - 6 new MkDocs pages, mkdocs.yml nav updated - Counts updated: 17 agents, 22 slash commands Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
61 lines
1.9 KiB
Python
61 lines
1.9 KiB
Python
#!/usr/bin/env python3
|
|
"""Measure file, bundle, or Docker image size.
|
|
DO NOT MODIFY after experiment starts — this is the fixed evaluator."""
|
|
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
|
|
# --- CONFIGURE ONE OF THESE ---
|
|
# Option 1: File size
|
|
TARGET_FILE = "dist/main.js"
|
|
|
|
# Option 2: Directory size (uncomment to use)
|
|
# TARGET_DIR = "dist/"
|
|
|
|
# Option 3: Docker image (uncomment to use)
|
|
# DOCKER_IMAGE = "myapp:latest"
|
|
# DOCKER_BUILD_CMD = "docker build -t myapp:latest ."
|
|
|
|
# Option 4: Build first, then measure (uncomment to use)
|
|
# BUILD_CMD = "npm run build"
|
|
# --- END CONFIG ---
|
|
|
|
# Build if needed
|
|
if "BUILD_CMD" in dir() or "BUILD_CMD" in globals():
|
|
result = subprocess.run(BUILD_CMD, shell=True, capture_output=True)
|
|
if result.returncode != 0:
|
|
print(f"Build failed: {result.stderr.decode()[:200]}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Measure
|
|
if "DOCKER_IMAGE" in dir() or "DOCKER_IMAGE" in globals():
|
|
if "DOCKER_BUILD_CMD" in dir():
|
|
subprocess.run(DOCKER_BUILD_CMD, shell=True, capture_output=True)
|
|
result = subprocess.run(
|
|
f"docker image inspect {DOCKER_IMAGE} --format '{{{{.Size}}}}'",
|
|
shell=True, capture_output=True, text=True
|
|
)
|
|
try:
|
|
size_bytes = int(result.stdout.strip())
|
|
except ValueError:
|
|
print(f"Could not parse size from: {result.stdout[:100]}", file=sys.stderr)
|
|
sys.exit(1)
|
|
elif "TARGET_DIR" in dir() or "TARGET_DIR" in globals():
|
|
size_bytes = sum(
|
|
os.path.getsize(os.path.join(dp, f))
|
|
for dp, _, fns in os.walk(TARGET_DIR) for f in fns
|
|
)
|
|
elif os.path.exists(TARGET_FILE):
|
|
size_bytes = os.path.getsize(TARGET_FILE)
|
|
else:
|
|
print(f"Target not found: {TARGET_FILE}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
size_kb = size_bytes / 1024
|
|
size_mb = size_bytes / (1024 * 1024)
|
|
|
|
print(f"size_bytes: {size_bytes}")
|
|
print(f"size_kb: {size_kb:.1f}")
|
|
print(f"size_mb: {size_mb:.2f}")
|