Files
claude-code-skills-reference/transcript-fixer/scripts/utils/rate_limiter.py
daymade 9b724f33e3 Release v1.9.0: Add video-comparer skill and enhance transcript-fixer
## New Skill: video-comparer v1.0.0
- Compare original and compressed videos with interactive HTML reports
- Calculate quality metrics (PSNR, SSIM) for compression analysis
- Generate frame-by-frame visual comparisons (slider, side-by-side, grid)
- Extract video metadata (codec, resolution, bitrate, duration)
- Multi-platform FFmpeg support with security features

## transcript-fixer Enhancements
- Add async AI processor for parallel processing
- Add connection pool management for database operations
- Add concurrency manager and rate limiter
- Add audit log retention and database migrations
- Add health check and metrics monitoring
- Add comprehensive test suite (8 new test files)
- Enhance security with domain and path validators

## Marketplace Updates
- Update marketplace version from 1.8.0 to 1.9.0
- Update skills count from 15 to 16
- Update documentation (README.md, CLAUDE.md, CHANGELOG.md)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-30 00:23:12 +08:00

442 lines
14 KiB
Python

#!/usr/bin/env python3
"""
Rate Limiting Module
CRITICAL FIX (P1-8): Production-grade rate limiting for API protection
Features:
- Token Bucket algorithm (smooth rate limiting)
- Sliding Window algorithm (precise rate limiting)
- Fixed Window algorithm (simple, memory-efficient)
- Thread-safe operations
- Burst support
- Multiple rate limit tiers
- Metrics integration
Use cases:
- API rate limiting (e.g., 100 requests/minute)
- Resource protection (e.g., max 5 concurrent DB connections)
- DoS prevention
- Cost control (e.g., limit API calls)
"""
from __future__ import annotations
import logging
import threading
import time
from collections import deque
from dataclasses import dataclass
from enum import Enum
from typing import Optional, Deque, Final
from contextlib import contextmanager
logger = logging.getLogger(__name__)
class RateLimitStrategy(Enum):
"""Rate limiting strategy"""
TOKEN_BUCKET = "token_bucket"
SLIDING_WINDOW = "sliding_window"
FIXED_WINDOW = "fixed_window"
@dataclass
class RateLimitConfig:
"""Rate limit configuration"""
max_requests: int # Maximum requests allowed
window_seconds: float # Time window in seconds
strategy: RateLimitStrategy = RateLimitStrategy.TOKEN_BUCKET
burst_size: Optional[int] = None # Burst allowance (for token bucket)
def __post_init__(self):
"""Validate configuration"""
if self.max_requests <= 0:
raise ValueError("max_requests must be positive")
if self.window_seconds <= 0:
raise ValueError("window_seconds must be positive")
# Default burst size = max_requests (allow full burst)
if self.burst_size is None:
self.burst_size = self.max_requests
class RateLimitExceeded(Exception):
"""Raised when rate limit is exceeded"""
def __init__(self, message: str, retry_after: float):
super().__init__(message)
self.retry_after = retry_after # Seconds to wait before retry
class TokenBucketLimiter:
"""
Token Bucket algorithm implementation.
Properties:
- Smooth rate limiting
- Allows bursts up to bucket capacity
- Memory efficient (O(1))
- Fast (O(1) per request)
Use for: API rate limiting, general request throttling
"""
def __init__(self, config: RateLimitConfig):
self.config = config
self.capacity = config.burst_size or config.max_requests
self.refill_rate = config.max_requests / config.window_seconds
self._tokens = float(self.capacity)
self._last_refill = time.time()
self._lock = threading.Lock()
logger.debug(
f"TokenBucket initialized: capacity={self.capacity}, "
f"refill_rate={self.refill_rate:.2f}/s"
)
def _refill(self) -> None:
"""Refill tokens based on elapsed time"""
now = time.time()
elapsed = now - self._last_refill
# Add tokens based on time elapsed
tokens_to_add = elapsed * self.refill_rate
self._tokens = min(self.capacity, self._tokens + tokens_to_add)
self._last_refill = now
def acquire(self, tokens: int = 1, blocking: bool = True, timeout: Optional[float] = None) -> bool:
"""
Acquire tokens from bucket.
Args:
tokens: Number of tokens to acquire (default: 1)
blocking: If True, wait for tokens. If False, return immediately
timeout: Maximum time to wait (seconds). None = wait forever
Returns:
True if tokens acquired, False if rate limit exceeded (non-blocking only)
Raises:
RateLimitExceeded: If rate limit exceeded in blocking mode
"""
if tokens <= 0:
raise ValueError("tokens must be positive")
start_time = time.time()
while True:
with self._lock:
self._refill()
if self._tokens >= tokens:
# Sufficient tokens available
self._tokens -= tokens
return True
if not blocking:
# Non-blocking mode - return immediately
return False
# Calculate retry_after
tokens_needed = tokens - self._tokens
retry_after = tokens_needed / self.refill_rate
# Check timeout
if timeout is not None:
elapsed = time.time() - start_time
if elapsed >= timeout:
raise RateLimitExceeded(
f"Rate limit exceeded: need {tokens} tokens, have {self._tokens:.1f}",
retry_after=retry_after
)
# Wait before retry (but not longer than needed or timeout)
wait_time = min(retry_after, 0.1) # Check at least every 100ms
if timeout is not None:
remaining_timeout = timeout - (time.time() - start_time)
wait_time = min(wait_time, remaining_timeout)
if wait_time > 0:
time.sleep(wait_time)
def get_available_tokens(self) -> float:
"""Get current number of available tokens"""
with self._lock:
self._refill()
return self._tokens
def reset(self) -> None:
"""Reset to full capacity"""
with self._lock:
self._tokens = float(self.capacity)
self._last_refill = time.time()
class SlidingWindowLimiter:
"""
Sliding Window algorithm implementation.
Properties:
- Precise rate limiting
- No "boundary problem" (unlike fixed window)
- Memory: O(max_requests)
- Fast: O(n) per request, where n = requests in window
Use for: Strict rate limits, billing, quota enforcement
"""
def __init__(self, config: RateLimitConfig):
self.config = config
self.max_requests = config.max_requests
self.window_seconds = config.window_seconds
self._timestamps: Deque[float] = deque()
self._lock = threading.Lock()
logger.debug(
f"SlidingWindow initialized: max_requests={self.max_requests}, "
f"window={self.window_seconds}s"
)
def _cleanup_old_timestamps(self, now: float) -> None:
"""Remove timestamps outside the window"""
cutoff = now - self.window_seconds
while self._timestamps and self._timestamps[0] < cutoff:
self._timestamps.popleft()
def acquire(self, tokens: int = 1, blocking: bool = True, timeout: Optional[float] = None) -> bool:
"""
Acquire tokens (check if request allowed).
Args:
tokens: Number of requests to make (usually 1)
blocking: If True, wait for capacity. If False, return immediately
timeout: Maximum time to wait (seconds)
Returns:
True if allowed, False if rate limit exceeded (non-blocking only)
Raises:
RateLimitExceeded: If rate limit exceeded in blocking mode
"""
if tokens <= 0:
raise ValueError("tokens must be positive")
start_time = time.time()
while True:
now = time.time()
with self._lock:
self._cleanup_old_timestamps(now)
current_count = len(self._timestamps)
if current_count + tokens <= self.max_requests:
# Allowed - record timestamps
for _ in range(tokens):
self._timestamps.append(now)
return True
if not blocking:
# Non-blocking mode
return False
# Calculate retry_after (when oldest request falls out of window)
if self._timestamps:
oldest = self._timestamps[0]
retry_after = oldest + self.window_seconds - now
else:
retry_after = 0.1
# Check timeout
if timeout is not None:
elapsed = time.time() - start_time
if elapsed >= timeout:
raise RateLimitExceeded(
f"Rate limit exceeded: {current_count}/{self.max_requests} "
f"requests in {self.window_seconds}s window",
retry_after=max(retry_after, 0.1)
)
# Wait before retry
wait_time = min(retry_after, 0.1)
if timeout is not None:
remaining_timeout = timeout - (time.time() - start_time)
wait_time = min(wait_time, remaining_timeout)
if wait_time > 0:
time.sleep(wait_time)
def get_current_count(self) -> int:
"""Get current request count in window"""
with self._lock:
self._cleanup_old_timestamps(time.time())
return len(self._timestamps)
def reset(self) -> None:
"""Reset (clear all timestamps)"""
with self._lock:
self._timestamps.clear()
class RateLimiter:
"""
Main rate limiter with configurable strategy.
CRITICAL FIX (P1-8): Thread-safe rate limiting for production use
"""
def __init__(self, config: RateLimitConfig):
self.config = config
# Select implementation based on strategy
if config.strategy == RateLimitStrategy.TOKEN_BUCKET:
self._impl = TokenBucketLimiter(config)
elif config.strategy == RateLimitStrategy.SLIDING_WINDOW:
self._impl = SlidingWindowLimiter(config)
else:
raise ValueError(f"Unsupported strategy: {config.strategy}")
logger.info(
f"RateLimiter created: {config.strategy.value}, "
f"{config.max_requests}/{config.window_seconds}s"
)
def acquire(self, tokens: int = 1, blocking: bool = True, timeout: Optional[float] = None) -> bool:
"""
Acquire permission to proceed.
Args:
tokens: Number of requests (default: 1)
blocking: Wait for availability (default: True)
timeout: Maximum wait time in seconds (default: None = forever)
Returns:
True if allowed, False if rate limit exceeded (non-blocking only)
Raises:
RateLimitExceeded: If rate limit exceeded in blocking mode
"""
return self._impl.acquire(tokens=tokens, blocking=blocking, timeout=timeout)
@contextmanager
def limit(self, tokens: int = 1):
"""
Context manager for rate-limited operations.
Usage:
with rate_limiter.limit():
# Make API call
response = client.post(...)
Raises:
RateLimitExceeded: If rate limit exceeded
"""
self.acquire(tokens=tokens, blocking=True)
try:
yield
finally:
pass # Tokens already consumed
def check_available(self) -> bool:
"""Check if capacity available (non-blocking)"""
return self.acquire(tokens=1, blocking=False)
def reset(self) -> None:
"""Reset rate limiter state"""
self._impl.reset()
def get_info(self) -> dict:
"""Get current rate limiter information"""
info = {
'strategy': self.config.strategy.value,
'max_requests': self.config.max_requests,
'window_seconds': self.config.window_seconds,
}
if isinstance(self._impl, TokenBucketLimiter):
info['available_tokens'] = self._impl.get_available_tokens()
info['capacity'] = self._impl.capacity
elif isinstance(self._impl, SlidingWindowLimiter):
info['current_count'] = self._impl.get_current_count()
return info
# Predefined rate limit configurations
class RateLimitPresets:
"""Common rate limit configurations"""
# API rate limits
API_CONSERVATIVE = RateLimitConfig(
max_requests=10,
window_seconds=60.0,
strategy=RateLimitStrategy.TOKEN_BUCKET
)
API_MODERATE = RateLimitConfig(
max_requests=60,
window_seconds=60.0,
strategy=RateLimitStrategy.TOKEN_BUCKET
)
API_AGGRESSIVE = RateLimitConfig(
max_requests=100,
window_seconds=60.0,
strategy=RateLimitStrategy.TOKEN_BUCKET
)
# Burst limits
BURST_ALLOWED = RateLimitConfig(
max_requests=50,
window_seconds=60.0,
burst_size=100, # Allow double burst
strategy=RateLimitStrategy.TOKEN_BUCKET
)
# Strict limits (sliding window)
STRICT_LIMIT = RateLimitConfig(
max_requests=100,
window_seconds=60.0,
strategy=RateLimitStrategy.SLIDING_WINDOW
)
# Global rate limiters
_global_limiters: dict[str, RateLimiter] = {}
_limiters_lock = threading.Lock()
def get_rate_limiter(name: str, config: Optional[RateLimitConfig] = None) -> RateLimiter:
"""
Get or create a named rate limiter.
Args:
name: Unique name for this rate limiter
config: Rate limit configuration (required if creating new)
Returns:
RateLimiter instance
"""
global _global_limiters
with _limiters_lock:
if name not in _global_limiters:
if config is None:
raise ValueError(f"Rate limiter '{name}' not found and no config provided")
_global_limiters[name] = RateLimiter(config)
logger.info(f"Created global rate limiter: {name}")
return _global_limiters[name]
def reset_all_limiters() -> None:
"""Reset all global rate limiters (mainly for testing)"""
with _limiters_lock:
for limiter in _global_limiters.values():
limiter.reset()
logger.info("Reset all rate limiters")