## New Skill: video-comparer v1.0.0 - Compare original and compressed videos with interactive HTML reports - Calculate quality metrics (PSNR, SSIM) for compression analysis - Generate frame-by-frame visual comparisons (slider, side-by-side, grid) - Extract video metadata (codec, resolution, bitrate, duration) - Multi-platform FFmpeg support with security features ## transcript-fixer Enhancements - Add async AI processor for parallel processing - Add connection pool management for database operations - Add concurrency manager and rate limiter - Add audit log retention and database migrations - Add health check and metrics monitoring - Add comprehensive test suite (8 new test files) - Enhance security with domain and path validators ## Marketplace Updates - Update marketplace version from 1.8.0 to 1.9.0 - Update skills count from 15 to 16 - Update documentation (README.md, CLAUDE.md, CHANGELOG.md) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
525 lines
19 KiB
Python
525 lines
19 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Concurrency Management Module - Production-Grade Concurrent Request Handling
|
|
|
|
CRITICAL FIX (P1-9): Tune concurrent request handling for optimal performance
|
|
|
|
Features:
|
|
- Semaphore-based request limiting
|
|
- Circuit breaker pattern for fault tolerance
|
|
- Backpressure handling
|
|
- Request queue management
|
|
- Integration with rate limiter
|
|
- Concurrent operation monitoring
|
|
- Adaptive concurrency tuning
|
|
|
|
Use cases:
|
|
- API request management
|
|
- Database query concurrency
|
|
- File operation limiting
|
|
- Resource-intensive tasks
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
import time
|
|
import threading
|
|
from contextlib import asynccontextmanager, contextmanager
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timedelta
|
|
from enum import Enum
|
|
from typing import Optional, Dict, Any, Callable, TypeVar, Final
|
|
from collections import deque
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
T = TypeVar('T')
|
|
|
|
|
|
class CircuitState(Enum):
|
|
"""Circuit breaker states"""
|
|
CLOSED = "closed" # Normal operation
|
|
OPEN = "open" # Failing, rejecting requests
|
|
HALF_OPEN = "half_open" # Testing if service recovered
|
|
|
|
|
|
@dataclass
|
|
class ConcurrencyConfig:
|
|
"""Configuration for concurrency management"""
|
|
max_concurrent: int = 10 # Maximum concurrent operations
|
|
max_queue_size: int = 100 # Maximum queued requests
|
|
timeout: float = 30.0 # Operation timeout in seconds
|
|
enable_backpressure: bool = True # Enable backpressure when queue full
|
|
enable_circuit_breaker: bool = True # Enable circuit breaker
|
|
circuit_failure_threshold: int = 5 # Failures before opening circuit
|
|
circuit_recovery_timeout: float = 60.0 # Seconds before attempting recovery
|
|
circuit_success_threshold: int = 2 # Successes needed to close circuit
|
|
enable_adaptive_tuning: bool = False # Adjust concurrency based on performance
|
|
min_concurrent: int = 2 # Minimum concurrent (for adaptive tuning)
|
|
max_response_time: float = 5.0 # Target max response time (for adaptive tuning)
|
|
|
|
|
|
@dataclass
|
|
class ConcurrencyMetrics:
|
|
"""Metrics for concurrency monitoring"""
|
|
total_requests: int = 0
|
|
successful_requests: int = 0
|
|
failed_requests: int = 0
|
|
rejected_requests: int = 0 # Rejected due to backpressure
|
|
timeout_requests: int = 0
|
|
active_operations: int = 0
|
|
queued_operations: int = 0
|
|
avg_response_time_ms: float = 0.0
|
|
current_concurrency: int = 0
|
|
circuit_state: CircuitState = CircuitState.CLOSED
|
|
circuit_failures: int = 0
|
|
last_updated: datetime = field(default_factory=datetime.now)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert to dictionary"""
|
|
return {
|
|
'total_requests': self.total_requests,
|
|
'successful_requests': self.successful_requests,
|
|
'failed_requests': self.failed_requests,
|
|
'rejected_requests': self.rejected_requests,
|
|
'timeout_requests': self.timeout_requests,
|
|
'active_operations': self.active_operations,
|
|
'queued_operations': self.queued_operations,
|
|
'avg_response_time_ms': round(self.avg_response_time_ms, 2),
|
|
'current_concurrency': self.current_concurrency,
|
|
'circuit_state': self.circuit_state.value,
|
|
'circuit_failures': self.circuit_failures,
|
|
'success_rate': round(
|
|
self.successful_requests / max(self.total_requests, 1) * 100, 2
|
|
),
|
|
'last_updated': self.last_updated.isoformat()
|
|
}
|
|
|
|
|
|
class BackpressureError(Exception):
|
|
"""Raised when backpressure limits are exceeded"""
|
|
pass
|
|
|
|
|
|
class CircuitBreakerOpenError(Exception):
|
|
"""Raised when circuit breaker is open"""
|
|
pass
|
|
|
|
|
|
class ConcurrencyManager:
|
|
"""
|
|
Production-grade concurrency management with advanced features
|
|
|
|
Features:
|
|
- Semaphore-based limiting (prevents resource exhaustion)
|
|
- Circuit breaker pattern (fault tolerance)
|
|
- Backpressure handling (graceful degradation)
|
|
- Request queue management (fairness)
|
|
- Performance monitoring (observability)
|
|
- Adaptive tuning (optimization)
|
|
"""
|
|
|
|
def __init__(self, config: ConcurrencyConfig = None):
|
|
"""
|
|
Initialize concurrency manager
|
|
|
|
Args:
|
|
config: Concurrency configuration
|
|
"""
|
|
self.config = config or ConcurrencyConfig()
|
|
|
|
# Semaphore for concurrency limiting
|
|
self._semaphore = asyncio.Semaphore(self.config.max_concurrent)
|
|
self._sync_semaphore = threading.Semaphore(self.config.max_concurrent)
|
|
|
|
# Queue for pending requests
|
|
self._queue: deque = deque(maxlen=self.config.max_queue_size)
|
|
self._queue_lock = threading.Lock()
|
|
|
|
# Metrics tracking
|
|
self._metrics = ConcurrencyMetrics()
|
|
self._metrics.current_concurrency = self.config.max_concurrent
|
|
self._metrics_lock = threading.Lock()
|
|
|
|
# Response time tracking for adaptive tuning
|
|
self._response_times: deque = deque(maxlen=100) # Last 100 responses
|
|
self._response_times_lock = threading.Lock()
|
|
|
|
# Circuit breaker state
|
|
self._circuit_state = CircuitState.CLOSED
|
|
self._circuit_failures = 0
|
|
self._circuit_last_failure_time: Optional[float] = None
|
|
self._circuit_successes = 0
|
|
self._circuit_lock = threading.Lock()
|
|
|
|
logger.info(f"ConcurrencyManager initialized: max_concurrent={self.config.max_concurrent}")
|
|
|
|
def _check_circuit_breaker(self) -> None:
|
|
"""Check circuit breaker state and potentially transition"""
|
|
if not self.config.enable_circuit_breaker:
|
|
return
|
|
|
|
with self._circuit_lock:
|
|
if self._circuit_state == CircuitState.OPEN:
|
|
# Check if recovery timeout has elapsed
|
|
if self._circuit_last_failure_time:
|
|
elapsed = time.time() - self._circuit_last_failure_time
|
|
if elapsed >= self.config.circuit_recovery_timeout:
|
|
logger.info("Circuit breaker: OPEN -> HALF_OPEN (recovery timeout elapsed)")
|
|
self._circuit_state = CircuitState.HALF_OPEN
|
|
self._circuit_successes = 0
|
|
else:
|
|
raise CircuitBreakerOpenError(
|
|
f"Circuit breaker is OPEN. Retry after "
|
|
f"{self.config.circuit_recovery_timeout - elapsed:.1f}s"
|
|
)
|
|
|
|
elif self._circuit_state == CircuitState.HALF_OPEN:
|
|
# In half-open state, allow limited requests through
|
|
pass
|
|
|
|
def _record_success(self) -> None:
|
|
"""Record successful operation for circuit breaker"""
|
|
if not self.config.enable_circuit_breaker:
|
|
return
|
|
|
|
with self._circuit_lock:
|
|
if self._circuit_state == CircuitState.HALF_OPEN:
|
|
self._circuit_successes += 1
|
|
if self._circuit_successes >= self.config.circuit_success_threshold:
|
|
logger.info("Circuit breaker: HALF_OPEN -> CLOSED (recovered)")
|
|
self._circuit_state = CircuitState.CLOSED
|
|
self._circuit_failures = 0
|
|
self._circuit_successes = 0
|
|
|
|
def _record_failure(self) -> None:
|
|
"""Record failed operation for circuit breaker"""
|
|
if not self.config.enable_circuit_breaker:
|
|
return
|
|
|
|
with self._circuit_lock:
|
|
self._circuit_failures += 1
|
|
self._circuit_last_failure_time = time.time()
|
|
|
|
if self._circuit_state == CircuitState.CLOSED:
|
|
if self._circuit_failures >= self.config.circuit_failure_threshold:
|
|
logger.warning(
|
|
f"Circuit breaker: CLOSED -> OPEN "
|
|
f"({self._circuit_failures} failures)"
|
|
)
|
|
self._circuit_state = CircuitState.OPEN
|
|
with self._metrics_lock:
|
|
self._metrics.circuit_state = CircuitState.OPEN
|
|
|
|
elif self._circuit_state == CircuitState.HALF_OPEN:
|
|
# Failure during recovery - back to OPEN
|
|
logger.warning("Circuit breaker: HALF_OPEN -> OPEN (recovery failed)")
|
|
self._circuit_state = CircuitState.OPEN
|
|
self._circuit_successes = 0
|
|
|
|
def _update_response_time(self, response_time_ms: float) -> None:
|
|
"""Update response time metrics"""
|
|
with self._response_times_lock:
|
|
self._response_times.append(response_time_ms)
|
|
|
|
# Update average
|
|
if len(self._response_times) > 0:
|
|
avg = sum(self._response_times) / len(self._response_times)
|
|
with self._metrics_lock:
|
|
self._metrics.avg_response_time_ms = avg
|
|
|
|
def _adjust_concurrency(self) -> None:
|
|
"""Adaptive concurrency tuning based on performance"""
|
|
if not self.config.enable_adaptive_tuning:
|
|
return
|
|
|
|
with self._response_times_lock:
|
|
if len(self._response_times) < 10:
|
|
return # Not enough data
|
|
|
|
avg_time = sum(self._response_times) / len(self._response_times)
|
|
target_time = self.config.max_response_time * 1000 # Convert to ms
|
|
|
|
current_concurrency = self.config.max_concurrent
|
|
|
|
if avg_time > target_time * 1.5:
|
|
# Response time too high - decrease concurrency
|
|
new_concurrency = max(
|
|
self.config.min_concurrent,
|
|
current_concurrency - 1
|
|
)
|
|
if new_concurrency != current_concurrency:
|
|
logger.info(
|
|
f"Adaptive tuning: Decreasing concurrency "
|
|
f"{current_concurrency} -> {new_concurrency} "
|
|
f"(avg response time: {avg_time:.1f}ms)"
|
|
)
|
|
self.config.max_concurrent = new_concurrency
|
|
# Note: Can't easily adjust asyncio.Semaphore,
|
|
# would need to recreate it
|
|
|
|
elif avg_time < target_time * 0.5:
|
|
# Response time low - can increase concurrency
|
|
new_concurrency = min(
|
|
20, # Hard cap
|
|
current_concurrency + 1
|
|
)
|
|
if new_concurrency != current_concurrency:
|
|
logger.info(
|
|
f"Adaptive tuning: Increasing concurrency "
|
|
f"{current_concurrency} -> {new_concurrency} "
|
|
f"(avg response time: {avg_time:.1f}ms)"
|
|
)
|
|
self.config.max_concurrent = new_concurrency
|
|
|
|
@asynccontextmanager
|
|
async def acquire(self, timeout: Optional[float] = None):
|
|
"""
|
|
Async context manager to acquire concurrency slot
|
|
|
|
Args:
|
|
timeout: Optional timeout override
|
|
|
|
Raises:
|
|
BackpressureError: If queue is full and backpressure is enabled
|
|
CircuitBreakerOpenError: If circuit breaker is open
|
|
asyncio.TimeoutError: If timeout exceeded
|
|
|
|
Example:
|
|
async with manager.acquire():
|
|
result = await some_async_operation()
|
|
"""
|
|
timeout = timeout or self.config.timeout
|
|
start_time = time.time()
|
|
|
|
# Check circuit breaker
|
|
self._check_circuit_breaker()
|
|
|
|
# Check backpressure
|
|
if self.config.enable_backpressure:
|
|
with self._metrics_lock:
|
|
if self._metrics.queued_operations >= self.config.max_queue_size:
|
|
self._metrics.rejected_requests += 1
|
|
raise BackpressureError(
|
|
f"Queue full ({self.config.max_queue_size} operations pending). "
|
|
"Try again later."
|
|
)
|
|
|
|
# Update queue metrics
|
|
with self._metrics_lock:
|
|
self._metrics.queued_operations += 1
|
|
self._metrics.total_requests += 1
|
|
|
|
try:
|
|
# Acquire semaphore with timeout
|
|
async with asyncio.timeout(timeout):
|
|
async with self._semaphore:
|
|
# Update active metrics
|
|
with self._metrics_lock:
|
|
self._metrics.queued_operations -= 1
|
|
self._metrics.active_operations += 1
|
|
|
|
operation_start = time.time()
|
|
|
|
try:
|
|
yield
|
|
|
|
# Record success
|
|
response_time_ms = (time.time() - operation_start) * 1000
|
|
self._update_response_time(response_time_ms)
|
|
self._record_success()
|
|
|
|
with self._metrics_lock:
|
|
self._metrics.successful_requests += 1
|
|
|
|
except Exception as e:
|
|
# Record failure
|
|
self._record_failure()
|
|
|
|
with self._metrics_lock:
|
|
self._metrics.failed_requests += 1
|
|
|
|
raise
|
|
|
|
finally:
|
|
# Update active metrics
|
|
with self._metrics_lock:
|
|
self._metrics.active_operations -= 1
|
|
|
|
# Adaptive tuning
|
|
self._adjust_concurrency()
|
|
|
|
except asyncio.TimeoutError:
|
|
with self._metrics_lock:
|
|
self._metrics.timeout_requests += 1
|
|
self._metrics.queued_operations -= 1
|
|
|
|
elapsed = time.time() - start_time
|
|
raise asyncio.TimeoutError(
|
|
f"Operation timed out after {elapsed:.1f}s "
|
|
f"(timeout: {timeout}s)"
|
|
)
|
|
|
|
@contextmanager
|
|
def acquire_sync(self, timeout: Optional[float] = None):
|
|
"""
|
|
Synchronous context manager to acquire concurrency slot
|
|
|
|
Args:
|
|
timeout: Optional timeout override
|
|
|
|
Example:
|
|
with manager.acquire_sync():
|
|
result = some_operation()
|
|
"""
|
|
timeout = timeout or self.config.timeout
|
|
start_time = time.time()
|
|
|
|
# Check circuit breaker
|
|
self._check_circuit_breaker()
|
|
|
|
# Check backpressure
|
|
if self.config.enable_backpressure:
|
|
with self._metrics_lock:
|
|
if self._metrics.queued_operations >= self.config.max_queue_size:
|
|
self._metrics.rejected_requests += 1
|
|
raise BackpressureError(
|
|
f"Queue full ({self.config.max_queue_size} operations pending)"
|
|
)
|
|
|
|
# Update queue metrics
|
|
with self._metrics_lock:
|
|
self._metrics.queued_operations += 1
|
|
self._metrics.total_requests += 1
|
|
|
|
acquired = False
|
|
try:
|
|
# Acquire semaphore with timeout
|
|
acquired = self._sync_semaphore.acquire(timeout=timeout)
|
|
|
|
if not acquired:
|
|
raise TimeoutError(f"Failed to acquire semaphore within {timeout}s")
|
|
|
|
# Update active metrics
|
|
with self._metrics_lock:
|
|
self._metrics.queued_operations -= 1
|
|
self._metrics.active_operations += 1
|
|
|
|
operation_start = time.time()
|
|
|
|
try:
|
|
yield
|
|
|
|
# Record success
|
|
response_time_ms = (time.time() - operation_start) * 1000
|
|
self._update_response_time(response_time_ms)
|
|
self._record_success()
|
|
|
|
with self._metrics_lock:
|
|
self._metrics.successful_requests += 1
|
|
|
|
except Exception as e:
|
|
# Record failure
|
|
self._record_failure()
|
|
|
|
with self._metrics_lock:
|
|
self._metrics.failed_requests += 1
|
|
|
|
raise
|
|
|
|
finally:
|
|
# Update active metrics
|
|
with self._metrics_lock:
|
|
self._metrics.active_operations -= 1
|
|
|
|
finally:
|
|
if acquired:
|
|
self._sync_semaphore.release()
|
|
else:
|
|
with self._metrics_lock:
|
|
self._metrics.timeout_requests += 1
|
|
self._metrics.queued_operations -= 1
|
|
|
|
def get_metrics(self) -> ConcurrencyMetrics:
|
|
"""Get current concurrency metrics"""
|
|
with self._metrics_lock:
|
|
# Update circuit state
|
|
with self._circuit_lock:
|
|
self._metrics.circuit_state = self._circuit_state
|
|
self._metrics.circuit_failures = self._circuit_failures
|
|
|
|
self._metrics.last_updated = datetime.now()
|
|
return ConcurrencyMetrics(**self._metrics.__dict__)
|
|
|
|
def reset_circuit_breaker(self) -> None:
|
|
"""Manually reset circuit breaker to CLOSED state"""
|
|
with self._circuit_lock:
|
|
logger.info("Manually resetting circuit breaker to CLOSED")
|
|
self._circuit_state = CircuitState.CLOSED
|
|
self._circuit_failures = 0
|
|
self._circuit_successes = 0
|
|
self._circuit_last_failure_time = None
|
|
|
|
def get_status(self) -> Dict[str, Any]:
|
|
"""Get human-readable status"""
|
|
metrics = self.get_metrics()
|
|
|
|
return {
|
|
'status': 'healthy' if metrics.circuit_state == CircuitState.CLOSED else 'degraded',
|
|
'concurrency': {
|
|
'current': metrics.current_concurrency,
|
|
'active': metrics.active_operations,
|
|
'queued': metrics.queued_operations,
|
|
},
|
|
'performance': {
|
|
'avg_response_time_ms': metrics.avg_response_time_ms,
|
|
'success_rate': round(
|
|
metrics.successful_requests / max(metrics.total_requests, 1) * 100, 2
|
|
)
|
|
},
|
|
'circuit_breaker': {
|
|
'state': metrics.circuit_state.value,
|
|
'failures': metrics.circuit_failures,
|
|
},
|
|
'requests': {
|
|
'total': metrics.total_requests,
|
|
'successful': metrics.successful_requests,
|
|
'failed': metrics.failed_requests,
|
|
'rejected': metrics.rejected_requests,
|
|
'timeout': metrics.timeout_requests,
|
|
}
|
|
}
|
|
|
|
|
|
# Global instance for convenience
|
|
_global_manager: Optional[ConcurrencyManager] = None
|
|
_global_manager_lock = threading.Lock()
|
|
|
|
|
|
def get_concurrency_manager(config: Optional[ConcurrencyConfig] = None) -> ConcurrencyManager:
|
|
"""
|
|
Get global concurrency manager instance (singleton pattern)
|
|
|
|
Args:
|
|
config: Optional configuration (only used on first call)
|
|
|
|
Returns:
|
|
Global ConcurrencyManager instance
|
|
"""
|
|
global _global_manager
|
|
|
|
with _global_manager_lock:
|
|
if _global_manager is None:
|
|
_global_manager = ConcurrencyManager(config)
|
|
return _global_manager
|
|
|
|
|
|
def reset_concurrency_manager() -> None:
|
|
"""Reset global concurrency manager (mainly for testing)"""
|
|
global _global_manager
|
|
|
|
with _global_manager_lock:
|
|
_global_manager = None
|