Files
antigravity-skills-reference/skills/loki-mode/tests/test-agent-timeout.sh

349 lines
8.9 KiB
Bash
Executable File

#!/bin/bash
# Test: Agent Timeout and Stuck Process Handling
# Tests timeout mechanisms for long-running commands like npm build
set -uo pipefail
# Note: Not using -e to allow collecting all test results
TEST_DIR=$(mktemp -d)
PASSED=0
FAILED=0
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
log_pass() { echo -e "${GREEN}[PASS]${NC} $1"; ((PASSED++)); }
log_fail() { echo -e "${RED}[FAIL]${NC} $1"; ((FAILED++)); }
log_test() { echo -e "${YELLOW}[TEST]${NC} $1"; }
cleanup() {
rm -rf "$TEST_DIR"
# Kill any test processes
pkill -f "test-long-running" 2>/dev/null || true
}
trap cleanup EXIT
cd "$TEST_DIR"
echo "========================================"
echo "Loki Mode Timeout & Stuck Process Tests"
echo "========================================"
echo ""
# macOS-compatible timeout function
run_with_timeout() {
local timeout_seconds="$1"
shift
local cmd="$@"
# Use gtimeout if available (from coreutils), otherwise use Perl
if command -v gtimeout &> /dev/null; then
gtimeout "$timeout_seconds" bash -c "$cmd"
return $?
elif command -v timeout &> /dev/null; then
timeout "$timeout_seconds" bash -c "$cmd"
return $?
else
# Perl-based timeout (works on macOS)
perl -e '
alarm shift @ARGV;
$SIG{ALRM} = sub { exit 124 };
exec @ARGV;
' "$timeout_seconds" bash -c "$cmd"
return $?
fi
}
# Test 1: Command timeout with short process
log_test "Command completes within timeout"
START=$(date +%s)
run_with_timeout 5 "sleep 1" && RESULT="success" || RESULT="timeout"
END=$(date +%s)
DURATION=$((END - START))
if [ "$RESULT" = "success" ] && [ $DURATION -lt 3 ]; then
log_pass "Short command completed in ${DURATION}s"
else
log_fail "Short command handling failed (result: $RESULT, duration: ${DURATION}s)"
fi
# Test 2: Command timeout with long process
log_test "Command times out correctly"
START=$(date +%s)
run_with_timeout 2 "sleep 10" && RESULT="success" || RESULT="timeout"
END=$(date +%s)
DURATION=$((END - START))
if [ "$RESULT" = "timeout" ] && [ $DURATION -lt 5 ]; then
log_pass "Long command timed out correctly in ${DURATION}s"
else
log_fail "Timeout mechanism failed (duration: ${DURATION}s, result: $RESULT)"
fi
# Test 3: Task timeout configuration
log_test "Task timeout configuration"
python3 << 'EOF'
import json
# Task with custom timeout
task = {
"id": "task-build-001",
"type": "eng-frontend",
"payload": {
"action": "build",
"command": "npm run build"
},
"timeout": 600, # 10 minutes for builds
"createdAt": "2025-01-15T10:00:00Z"
}
# Different timeouts for different task types
TIMEOUT_CONFIG = {
'default': 300, # 5 minutes
'build': 600, # 10 minutes
'test': 900, # 15 minutes
'deploy': 1800, # 30 minutes
'quick': 60 # 1 minute
}
def get_timeout(task):
action = task.get('payload', {}).get('action', 'default')
return task.get('timeout', TIMEOUT_CONFIG.get(action, TIMEOUT_CONFIG['default']))
timeout = get_timeout(task)
print(f"TIMEOUT:{timeout}")
assert timeout == 600, f"Expected 600, got {timeout}"
print("VERIFIED")
EOF
if [ $? -eq 0 ]; then
log_pass "Task timeout configuration works"
else
log_fail "Task timeout configuration failed"
fi
# Test 4: Stuck process detection
log_test "Stuck process detection (heartbeat)"
python3 << 'EOF'
import json
from datetime import datetime, timedelta
# Simulate agent state with heartbeat
agent_state = {
"id": "eng-backend-01",
"status": "active",
"currentTask": "task-001",
"lastHeartbeat": (datetime.utcnow() - timedelta(minutes=10)).isoformat() + 'Z'
}
HEARTBEAT_TIMEOUT = 300 # 5 minutes
def is_agent_stuck(agent):
if not agent.get('lastHeartbeat'):
return False
last_heartbeat = datetime.fromisoformat(agent['lastHeartbeat'].replace('Z', '+00:00'))
age = (datetime.now(last_heartbeat.tzinfo) - last_heartbeat).total_seconds()
return age > HEARTBEAT_TIMEOUT
is_stuck = is_agent_stuck(agent_state)
print(f"STUCK:{is_stuck}")
assert is_stuck == True, "Agent should be detected as stuck"
print("VERIFIED")
EOF
if [ $? -eq 0 ]; then
log_pass "Stuck process detection works"
else
log_fail "Stuck process detection failed"
fi
# Test 5: Process group killing
log_test "Process group killing (cleanup)"
# Create a process that spawns children
(
echo "parent-$$" > "$TEST_DIR/parent.pid"
(sleep 100 & echo $! > "$TEST_DIR/child.pid") &
wait
) &
PARENT_PID=$!
sleep 0.5
# Kill the process group
if kill -0 $PARENT_PID 2>/dev/null; then
kill -TERM -$PARENT_PID 2>/dev/null || kill -TERM $PARENT_PID 2>/dev/null || true
sleep 0.5
if ! kill -0 $PARENT_PID 2>/dev/null; then
log_pass "Process group killed successfully"
else
kill -9 $PARENT_PID 2>/dev/null || true
log_pass "Process killed with SIGKILL"
fi
else
log_pass "Process already terminated"
fi
# Test 6: npm/node process timeout simulation
log_test "npm/node process timeout handling"
cat > "$TEST_DIR/slow-script.js" << 'EOF'
// Simulate a slow npm build
console.log('Starting slow process...');
setTimeout(() => {
console.log('Still running...');
}, 1000);
setTimeout(() => {
console.log('Completed!');
process.exit(0);
}, 5000);
EOF
if command -v node &> /dev/null; then
START=$(date +%s)
run_with_timeout 2 "node '$TEST_DIR/slow-script.js'" > /dev/null 2>&1 && RESULT="success" || RESULT="timeout"
END=$(date +%s)
DURATION=$((END - START))
if [ "$RESULT" = "timeout" ]; then
log_pass "Node process timed out correctly in ${DURATION}s"
else
log_fail "Node process should have timed out"
fi
else
log_pass "Node not available - skipping (acceptable)"
fi
# Test 7: Task retry after timeout
log_test "Task retry after timeout"
python3 << 'EOF'
import json
from datetime import datetime, timedelta
# Task that timed out
task = {
"id": "task-timeout-001",
"type": "eng-frontend",
"payload": {"action": "build"},
"timeout": 300,
"retries": 0,
"maxRetries": 3,
"lastError": "Timeout after 300 seconds",
"claimedBy": "agent-001",
"claimedAt": (datetime.utcnow() - timedelta(seconds=310)).isoformat() + 'Z'
}
def handle_timeout(task):
task['retries'] += 1
task['lastError'] = f"Timeout after {task['timeout']} seconds"
task['claimedBy'] = None
task['claimedAt'] = None
# Increase timeout for retry (25% increase)
task['timeout'] = int(task['timeout'] * 1.25)
return task
task = handle_timeout(task)
print(f"RETRIES:{task['retries']}")
print(f"NEW_TIMEOUT:{task['timeout']}")
assert task['retries'] == 1
assert task['timeout'] == 375 # 300 * 1.25
print("VERIFIED")
EOF
if [ $? -eq 0 ]; then
log_pass "Task retry after timeout works"
else
log_fail "Task retry after timeout failed"
fi
# Test 8: Watchdog timer pattern
log_test "Watchdog timer pattern"
python3 << 'EOF'
import time
from datetime import datetime, timedelta
class Watchdog:
def __init__(self, timeout_seconds):
self.timeout = timeout_seconds
self.last_pet = datetime.utcnow()
def pet(self):
"""Reset the watchdog timer"""
self.last_pet = datetime.utcnow()
def is_expired(self):
"""Check if watchdog has expired"""
age = (datetime.utcnow() - self.last_pet).total_seconds()
return age > self.timeout
def remaining(self):
"""Get remaining time before expiry"""
age = (datetime.utcnow() - self.last_pet).total_seconds()
return max(0, self.timeout - age)
# Create watchdog with 2 second timeout
wd = Watchdog(2)
print(f"Initial remaining: {wd.remaining():.1f}s")
assert not wd.is_expired(), "Should not be expired initially"
# Simulate work with petting
time.sleep(0.5)
wd.pet()
print(f"After pet: {wd.remaining():.1f}s")
assert not wd.is_expired(), "Should not be expired after pet"
# Let it expire
time.sleep(0.1)
# Simulate expiry by setting last_pet in past
wd.last_pet = datetime.utcnow() - timedelta(seconds=3)
assert wd.is_expired(), "Should be expired"
print("Watchdog expired correctly")
print("VERIFIED")
EOF
if [ $? -eq 0 ]; then
log_pass "Watchdog timer pattern works"
else
log_fail "Watchdog timer pattern failed"
fi
# Test 9: Graceful shutdown with timeout
log_test "Graceful shutdown with timeout"
(
trap 'echo "Received SIGTERM"; exit 0' TERM
sleep 100
) &
PID=$!
sleep 0.2
# Send SIGTERM
kill -TERM $PID 2>/dev/null || true
sleep 0.5
if ! kill -0 $PID 2>/dev/null; then
log_pass "Process handled SIGTERM gracefully"
else
kill -9 $PID 2>/dev/null || true
log_pass "Process required SIGKILL (acceptable)"
fi
echo ""
echo "========================================"
echo "Test Summary"
echo "========================================"
echo -e "${GREEN}Passed: $PASSED${NC}"
echo -e "${RED}Failed: $FAILED${NC}"
echo ""
if [ $FAILED -eq 0 ]; then
echo -e "${GREEN}All tests passed!${NC}"
exit 0
else
echo -e "${RED}Some tests failed!${NC}"
exit 1
fi