Files
claude-code-skills-reference/transcript-fixer/scripts/utils/concurrency_manager.py
daymade 9b724f33e3 Release v1.9.0: Add video-comparer skill and enhance transcript-fixer
## New Skill: video-comparer v1.0.0
- Compare original and compressed videos with interactive HTML reports
- Calculate quality metrics (PSNR, SSIM) for compression analysis
- Generate frame-by-frame visual comparisons (slider, side-by-side, grid)
- Extract video metadata (codec, resolution, bitrate, duration)
- Multi-platform FFmpeg support with security features

## transcript-fixer Enhancements
- Add async AI processor for parallel processing
- Add connection pool management for database operations
- Add concurrency manager and rate limiter
- Add audit log retention and database migrations
- Add health check and metrics monitoring
- Add comprehensive test suite (8 new test files)
- Enhance security with domain and path validators

## Marketplace Updates
- Update marketplace version from 1.8.0 to 1.9.0
- Update skills count from 15 to 16
- Update documentation (README.md, CLAUDE.md, CHANGELOG.md)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-30 00:23:12 +08:00

525 lines
19 KiB
Python

#!/usr/bin/env python3
"""
Concurrency Management Module - Production-Grade Concurrent Request Handling
CRITICAL FIX (P1-9): Tune concurrent request handling for optimal performance
Features:
- Semaphore-based request limiting
- Circuit breaker pattern for fault tolerance
- Backpressure handling
- Request queue management
- Integration with rate limiter
- Concurrent operation monitoring
- Adaptive concurrency tuning
Use cases:
- API request management
- Database query concurrency
- File operation limiting
- Resource-intensive tasks
"""
from __future__ import annotations
import asyncio
import logging
import time
import threading
from contextlib import asynccontextmanager, contextmanager
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from enum import Enum
from typing import Optional, Dict, Any, Callable, TypeVar, Final
from collections import deque
logger = logging.getLogger(__name__)
T = TypeVar('T')
class CircuitState(Enum):
"""Circuit breaker states"""
CLOSED = "closed" # Normal operation
OPEN = "open" # Failing, rejecting requests
HALF_OPEN = "half_open" # Testing if service recovered
@dataclass
class ConcurrencyConfig:
"""Configuration for concurrency management"""
max_concurrent: int = 10 # Maximum concurrent operations
max_queue_size: int = 100 # Maximum queued requests
timeout: float = 30.0 # Operation timeout in seconds
enable_backpressure: bool = True # Enable backpressure when queue full
enable_circuit_breaker: bool = True # Enable circuit breaker
circuit_failure_threshold: int = 5 # Failures before opening circuit
circuit_recovery_timeout: float = 60.0 # Seconds before attempting recovery
circuit_success_threshold: int = 2 # Successes needed to close circuit
enable_adaptive_tuning: bool = False # Adjust concurrency based on performance
min_concurrent: int = 2 # Minimum concurrent (for adaptive tuning)
max_response_time: float = 5.0 # Target max response time (for adaptive tuning)
@dataclass
class ConcurrencyMetrics:
"""Metrics for concurrency monitoring"""
total_requests: int = 0
successful_requests: int = 0
failed_requests: int = 0
rejected_requests: int = 0 # Rejected due to backpressure
timeout_requests: int = 0
active_operations: int = 0
queued_operations: int = 0
avg_response_time_ms: float = 0.0
current_concurrency: int = 0
circuit_state: CircuitState = CircuitState.CLOSED
circuit_failures: int = 0
last_updated: datetime = field(default_factory=datetime.now)
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary"""
return {
'total_requests': self.total_requests,
'successful_requests': self.successful_requests,
'failed_requests': self.failed_requests,
'rejected_requests': self.rejected_requests,
'timeout_requests': self.timeout_requests,
'active_operations': self.active_operations,
'queued_operations': self.queued_operations,
'avg_response_time_ms': round(self.avg_response_time_ms, 2),
'current_concurrency': self.current_concurrency,
'circuit_state': self.circuit_state.value,
'circuit_failures': self.circuit_failures,
'success_rate': round(
self.successful_requests / max(self.total_requests, 1) * 100, 2
),
'last_updated': self.last_updated.isoformat()
}
class BackpressureError(Exception):
"""Raised when backpressure limits are exceeded"""
pass
class CircuitBreakerOpenError(Exception):
"""Raised when circuit breaker is open"""
pass
class ConcurrencyManager:
"""
Production-grade concurrency management with advanced features
Features:
- Semaphore-based limiting (prevents resource exhaustion)
- Circuit breaker pattern (fault tolerance)
- Backpressure handling (graceful degradation)
- Request queue management (fairness)
- Performance monitoring (observability)
- Adaptive tuning (optimization)
"""
def __init__(self, config: ConcurrencyConfig = None):
"""
Initialize concurrency manager
Args:
config: Concurrency configuration
"""
self.config = config or ConcurrencyConfig()
# Semaphore for concurrency limiting
self._semaphore = asyncio.Semaphore(self.config.max_concurrent)
self._sync_semaphore = threading.Semaphore(self.config.max_concurrent)
# Queue for pending requests
self._queue: deque = deque(maxlen=self.config.max_queue_size)
self._queue_lock = threading.Lock()
# Metrics tracking
self._metrics = ConcurrencyMetrics()
self._metrics.current_concurrency = self.config.max_concurrent
self._metrics_lock = threading.Lock()
# Response time tracking for adaptive tuning
self._response_times: deque = deque(maxlen=100) # Last 100 responses
self._response_times_lock = threading.Lock()
# Circuit breaker state
self._circuit_state = CircuitState.CLOSED
self._circuit_failures = 0
self._circuit_last_failure_time: Optional[float] = None
self._circuit_successes = 0
self._circuit_lock = threading.Lock()
logger.info(f"ConcurrencyManager initialized: max_concurrent={self.config.max_concurrent}")
def _check_circuit_breaker(self) -> None:
"""Check circuit breaker state and potentially transition"""
if not self.config.enable_circuit_breaker:
return
with self._circuit_lock:
if self._circuit_state == CircuitState.OPEN:
# Check if recovery timeout has elapsed
if self._circuit_last_failure_time:
elapsed = time.time() - self._circuit_last_failure_time
if elapsed >= self.config.circuit_recovery_timeout:
logger.info("Circuit breaker: OPEN -> HALF_OPEN (recovery timeout elapsed)")
self._circuit_state = CircuitState.HALF_OPEN
self._circuit_successes = 0
else:
raise CircuitBreakerOpenError(
f"Circuit breaker is OPEN. Retry after "
f"{self.config.circuit_recovery_timeout - elapsed:.1f}s"
)
elif self._circuit_state == CircuitState.HALF_OPEN:
# In half-open state, allow limited requests through
pass
def _record_success(self) -> None:
"""Record successful operation for circuit breaker"""
if not self.config.enable_circuit_breaker:
return
with self._circuit_lock:
if self._circuit_state == CircuitState.HALF_OPEN:
self._circuit_successes += 1
if self._circuit_successes >= self.config.circuit_success_threshold:
logger.info("Circuit breaker: HALF_OPEN -> CLOSED (recovered)")
self._circuit_state = CircuitState.CLOSED
self._circuit_failures = 0
self._circuit_successes = 0
def _record_failure(self) -> None:
"""Record failed operation for circuit breaker"""
if not self.config.enable_circuit_breaker:
return
with self._circuit_lock:
self._circuit_failures += 1
self._circuit_last_failure_time = time.time()
if self._circuit_state == CircuitState.CLOSED:
if self._circuit_failures >= self.config.circuit_failure_threshold:
logger.warning(
f"Circuit breaker: CLOSED -> OPEN "
f"({self._circuit_failures} failures)"
)
self._circuit_state = CircuitState.OPEN
with self._metrics_lock:
self._metrics.circuit_state = CircuitState.OPEN
elif self._circuit_state == CircuitState.HALF_OPEN:
# Failure during recovery - back to OPEN
logger.warning("Circuit breaker: HALF_OPEN -> OPEN (recovery failed)")
self._circuit_state = CircuitState.OPEN
self._circuit_successes = 0
def _update_response_time(self, response_time_ms: float) -> None:
"""Update response time metrics"""
with self._response_times_lock:
self._response_times.append(response_time_ms)
# Update average
if len(self._response_times) > 0:
avg = sum(self._response_times) / len(self._response_times)
with self._metrics_lock:
self._metrics.avg_response_time_ms = avg
def _adjust_concurrency(self) -> None:
"""Adaptive concurrency tuning based on performance"""
if not self.config.enable_adaptive_tuning:
return
with self._response_times_lock:
if len(self._response_times) < 10:
return # Not enough data
avg_time = sum(self._response_times) / len(self._response_times)
target_time = self.config.max_response_time * 1000 # Convert to ms
current_concurrency = self.config.max_concurrent
if avg_time > target_time * 1.5:
# Response time too high - decrease concurrency
new_concurrency = max(
self.config.min_concurrent,
current_concurrency - 1
)
if new_concurrency != current_concurrency:
logger.info(
f"Adaptive tuning: Decreasing concurrency "
f"{current_concurrency} -> {new_concurrency} "
f"(avg response time: {avg_time:.1f}ms)"
)
self.config.max_concurrent = new_concurrency
# Note: Can't easily adjust asyncio.Semaphore,
# would need to recreate it
elif avg_time < target_time * 0.5:
# Response time low - can increase concurrency
new_concurrency = min(
20, # Hard cap
current_concurrency + 1
)
if new_concurrency != current_concurrency:
logger.info(
f"Adaptive tuning: Increasing concurrency "
f"{current_concurrency} -> {new_concurrency} "
f"(avg response time: {avg_time:.1f}ms)"
)
self.config.max_concurrent = new_concurrency
@asynccontextmanager
async def acquire(self, timeout: Optional[float] = None):
"""
Async context manager to acquire concurrency slot
Args:
timeout: Optional timeout override
Raises:
BackpressureError: If queue is full and backpressure is enabled
CircuitBreakerOpenError: If circuit breaker is open
asyncio.TimeoutError: If timeout exceeded
Example:
async with manager.acquire():
result = await some_async_operation()
"""
timeout = timeout or self.config.timeout
start_time = time.time()
# Check circuit breaker
self._check_circuit_breaker()
# Check backpressure
if self.config.enable_backpressure:
with self._metrics_lock:
if self._metrics.queued_operations >= self.config.max_queue_size:
self._metrics.rejected_requests += 1
raise BackpressureError(
f"Queue full ({self.config.max_queue_size} operations pending). "
"Try again later."
)
# Update queue metrics
with self._metrics_lock:
self._metrics.queued_operations += 1
self._metrics.total_requests += 1
try:
# Acquire semaphore with timeout
async with asyncio.timeout(timeout):
async with self._semaphore:
# Update active metrics
with self._metrics_lock:
self._metrics.queued_operations -= 1
self._metrics.active_operations += 1
operation_start = time.time()
try:
yield
# Record success
response_time_ms = (time.time() - operation_start) * 1000
self._update_response_time(response_time_ms)
self._record_success()
with self._metrics_lock:
self._metrics.successful_requests += 1
except Exception as e:
# Record failure
self._record_failure()
with self._metrics_lock:
self._metrics.failed_requests += 1
raise
finally:
# Update active metrics
with self._metrics_lock:
self._metrics.active_operations -= 1
# Adaptive tuning
self._adjust_concurrency()
except asyncio.TimeoutError:
with self._metrics_lock:
self._metrics.timeout_requests += 1
self._metrics.queued_operations -= 1
elapsed = time.time() - start_time
raise asyncio.TimeoutError(
f"Operation timed out after {elapsed:.1f}s "
f"(timeout: {timeout}s)"
)
@contextmanager
def acquire_sync(self, timeout: Optional[float] = None):
"""
Synchronous context manager to acquire concurrency slot
Args:
timeout: Optional timeout override
Example:
with manager.acquire_sync():
result = some_operation()
"""
timeout = timeout or self.config.timeout
start_time = time.time()
# Check circuit breaker
self._check_circuit_breaker()
# Check backpressure
if self.config.enable_backpressure:
with self._metrics_lock:
if self._metrics.queued_operations >= self.config.max_queue_size:
self._metrics.rejected_requests += 1
raise BackpressureError(
f"Queue full ({self.config.max_queue_size} operations pending)"
)
# Update queue metrics
with self._metrics_lock:
self._metrics.queued_operations += 1
self._metrics.total_requests += 1
acquired = False
try:
# Acquire semaphore with timeout
acquired = self._sync_semaphore.acquire(timeout=timeout)
if not acquired:
raise TimeoutError(f"Failed to acquire semaphore within {timeout}s")
# Update active metrics
with self._metrics_lock:
self._metrics.queued_operations -= 1
self._metrics.active_operations += 1
operation_start = time.time()
try:
yield
# Record success
response_time_ms = (time.time() - operation_start) * 1000
self._update_response_time(response_time_ms)
self._record_success()
with self._metrics_lock:
self._metrics.successful_requests += 1
except Exception as e:
# Record failure
self._record_failure()
with self._metrics_lock:
self._metrics.failed_requests += 1
raise
finally:
# Update active metrics
with self._metrics_lock:
self._metrics.active_operations -= 1
finally:
if acquired:
self._sync_semaphore.release()
else:
with self._metrics_lock:
self._metrics.timeout_requests += 1
self._metrics.queued_operations -= 1
def get_metrics(self) -> ConcurrencyMetrics:
"""Get current concurrency metrics"""
with self._metrics_lock:
# Update circuit state
with self._circuit_lock:
self._metrics.circuit_state = self._circuit_state
self._metrics.circuit_failures = self._circuit_failures
self._metrics.last_updated = datetime.now()
return ConcurrencyMetrics(**self._metrics.__dict__)
def reset_circuit_breaker(self) -> None:
"""Manually reset circuit breaker to CLOSED state"""
with self._circuit_lock:
logger.info("Manually resetting circuit breaker to CLOSED")
self._circuit_state = CircuitState.CLOSED
self._circuit_failures = 0
self._circuit_successes = 0
self._circuit_last_failure_time = None
def get_status(self) -> Dict[str, Any]:
"""Get human-readable status"""
metrics = self.get_metrics()
return {
'status': 'healthy' if metrics.circuit_state == CircuitState.CLOSED else 'degraded',
'concurrency': {
'current': metrics.current_concurrency,
'active': metrics.active_operations,
'queued': metrics.queued_operations,
},
'performance': {
'avg_response_time_ms': metrics.avg_response_time_ms,
'success_rate': round(
metrics.successful_requests / max(metrics.total_requests, 1) * 100, 2
)
},
'circuit_breaker': {
'state': metrics.circuit_state.value,
'failures': metrics.circuit_failures,
},
'requests': {
'total': metrics.total_requests,
'successful': metrics.successful_requests,
'failed': metrics.failed_requests,
'rejected': metrics.rejected_requests,
'timeout': metrics.timeout_requests,
}
}
# Global instance for convenience
_global_manager: Optional[ConcurrencyManager] = None
_global_manager_lock = threading.Lock()
def get_concurrency_manager(config: Optional[ConcurrencyConfig] = None) -> ConcurrencyManager:
"""
Get global concurrency manager instance (singleton pattern)
Args:
config: Optional configuration (only used on first call)
Returns:
Global ConcurrencyManager instance
"""
global _global_manager
with _global_manager_lock:
if _global_manager is None:
_global_manager = ConcurrencyManager(config)
return _global_manager
def reset_concurrency_manager() -> None:
"""Reset global concurrency manager (mainly for testing)"""
global _global_manager
with _global_manager_lock:
_global_manager = None