Files
claude-code-skills-reference/transcript-fixer/scripts/core/connection_pool.py
daymade 9b724f33e3 Release v1.9.0: Add video-comparer skill and enhance transcript-fixer
## New Skill: video-comparer v1.0.0
- Compare original and compressed videos with interactive HTML reports
- Calculate quality metrics (PSNR, SSIM) for compression analysis
- Generate frame-by-frame visual comparisons (slider, side-by-side, grid)
- Extract video metadata (codec, resolution, bitrate, duration)
- Multi-platform FFmpeg support with security features

## transcript-fixer Enhancements
- Add async AI processor for parallel processing
- Add connection pool management for database operations
- Add concurrency manager and rate limiter
- Add audit log retention and database migrations
- Add health check and metrics monitoring
- Add comprehensive test suite (8 new test files)
- Enhance security with domain and path validators

## Marketplace Updates
- Update marketplace version from 1.8.0 to 1.9.0
- Update skills count from 15 to 16
- Update documentation (README.md, CLAUDE.md, CHANGELOG.md)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-30 00:23:12 +08:00

376 lines
12 KiB
Python

#!/usr/bin/env python3
"""
Thread-Safe SQLite Connection Pool
CRITICAL FIX: Replaces unsafe check_same_thread=False pattern
ISSUE: Critical-1 in Engineering Excellence Plan
This module provides:
1. Thread-safe connection pooling
2. Proper connection lifecycle management
3. Timeout and limit enforcement
4. WAL mode for better concurrency
5. Explicit connection cleanup
Author: Chief Engineer (20 years experience)
Date: 2025-10-28
Priority: P0 - Critical
"""
from __future__ import annotations
import sqlite3
import threading
import queue
import logging
from pathlib import Path
from contextlib import contextmanager
from typing import Optional, Final
from dataclasses import dataclass
from datetime import datetime
logger = logging.getLogger(__name__)
# Constants (immutable, explicit)
MAX_CONNECTIONS: Final[int] = 5 # Limit to prevent file descriptor exhaustion
CONNECTION_TIMEOUT: Final[float] = 30.0 # 30s timeout instead of infinite
POOL_TIMEOUT: Final[float] = 5.0 # Max wait time for available connection
BUSY_TIMEOUT: Final[int] = 30000 # SQLite busy timeout in milliseconds
@dataclass
class PoolStatistics:
"""Connection pool statistics for monitoring"""
total_connections: int
active_connections: int
waiting_threads: int
total_acquired: int
total_released: int
total_timeouts: int
created_at: datetime
class PoolExhaustedError(Exception):
"""Raised when connection pool is exhausted and timeout occurs"""
pass
class ConnectionPool:
"""
Thread-safe connection pool for SQLite.
Design Decisions:
1. Fixed pool size - prevents resource exhaustion
2. Queue-based - FIFO fairness, no thread starvation
3. WAL mode - allows concurrent reads, better performance
4. Explicit timeouts - prevents infinite hangs
5. Statistics tracking - enables monitoring
Usage:
pool = ConnectionPool(db_path, max_connections=5)
with pool.get_connection() as conn:
conn.execute("SELECT * FROM table")
# Cleanup when done
pool.close_all()
Thread Safety:
- Each connection used by one thread at a time
- Queue provides synchronization
- No global state, no race conditions
"""
def __init__(
self,
db_path: Path,
max_connections: int = MAX_CONNECTIONS,
connection_timeout: float = CONNECTION_TIMEOUT,
pool_timeout: float = POOL_TIMEOUT
):
"""
Initialize connection pool.
Args:
db_path: Path to SQLite database file
max_connections: Maximum number of connections (default: 5)
connection_timeout: SQLite connection timeout in seconds (default: 30)
pool_timeout: Max wait time for available connection (default: 5)
Raises:
ValueError: If max_connections < 1 or timeouts < 0
FileNotFoundError: If db_path parent directory doesn't exist
"""
# Input validation (fail fast, clear errors)
if max_connections < 1:
raise ValueError(f"max_connections must be >= 1, got {max_connections}")
if connection_timeout < 0:
raise ValueError(f"connection_timeout must be >= 0, got {connection_timeout}")
if pool_timeout < 0:
raise ValueError(f"pool_timeout must be >= 0, got {pool_timeout}")
self.db_path = Path(db_path)
if not self.db_path.parent.exists():
raise FileNotFoundError(f"Database directory doesn't exist: {self.db_path.parent}")
self.max_connections = max_connections
self.connection_timeout = connection_timeout
self.pool_timeout = pool_timeout
# Thread-safe queue for connection pool
self._pool: queue.Queue[sqlite3.Connection] = queue.Queue(maxsize=max_connections)
# Lock for pool initialization (create connections once)
self._init_lock = threading.Lock()
self._initialized = False
# Statistics (for monitoring and debugging)
self._stats_lock = threading.Lock()
self._total_acquired = 0
self._total_released = 0
self._total_timeouts = 0
self._created_at = datetime.now()
logger.info(
"Connection pool initialized",
extra={
"db_path": str(self.db_path),
"max_connections": self.max_connections,
"connection_timeout": self.connection_timeout,
"pool_timeout": self.pool_timeout
}
)
def _initialize_pool(self) -> None:
"""
Create initial connections (lazy initialization).
Called on first use, not in __init__ to allow
database directory creation after pool object creation.
"""
with self._init_lock:
if self._initialized:
return
logger.debug(f"Creating {self.max_connections} database connections")
for i in range(self.max_connections):
try:
conn = self._create_connection()
self._pool.put(conn, block=False)
logger.debug(f"Created connection {i+1}/{self.max_connections}")
except Exception as e:
logger.error(f"Failed to create connection {i+1}: {e}", exc_info=True)
# Cleanup partial initialization
self._cleanup_partial_pool()
raise
self._initialized = True
logger.info(f"Connection pool ready with {self.max_connections} connections")
def _cleanup_partial_pool(self) -> None:
"""Cleanup connections if initialization fails"""
while not self._pool.empty():
try:
conn = self._pool.get(block=False)
conn.close()
except queue.Empty:
break
except Exception as e:
logger.warning(f"Error closing connection during cleanup: {e}")
def _create_connection(self) -> sqlite3.Connection:
"""
Create a new SQLite connection with optimal settings.
Settings explained:
1. check_same_thread=True - ENFORCE thread safety (critical fix)
2. timeout=30.0 - Prevent infinite locks
3. isolation_level='DEFERRED' - Explicit transaction control
4. WAL mode - Better concurrency (allows concurrent reads)
5. busy_timeout - How long to wait on locks
Returns:
Configured SQLite connection
Raises:
sqlite3.Error: If connection creation fails
"""
try:
conn = sqlite3.connect(
str(self.db_path),
check_same_thread=True, # CRITICAL FIX: Enforce thread safety
timeout=self.connection_timeout,
isolation_level='DEFERRED' # Explicit transaction control
)
# Enable Write-Ahead Logging for better concurrency
# WAL allows multiple readers + one writer simultaneously
conn.execute('PRAGMA journal_mode=WAL')
# Set busy timeout (how long to wait on locks)
conn.execute(f'PRAGMA busy_timeout={BUSY_TIMEOUT}')
# Enable foreign key constraints
conn.execute('PRAGMA foreign_keys=ON')
# Use Row factory for dict-like access
conn.row_factory = sqlite3.Row
logger.debug(f"Created connection to {self.db_path}")
return conn
except sqlite3.Error as e:
logger.error(f"Failed to create connection: {e}", exc_info=True)
raise
@contextmanager
def get_connection(self):
"""
Get a connection from the pool (context manager).
This is the main API. Always use with 'with' statement:
with pool.get_connection() as conn:
conn.execute("SELECT * FROM table")
Thread Safety:
- Blocks until connection available (up to pool_timeout)
- Connection returned to pool automatically
- Safe to use from multiple threads
Yields:
sqlite3.Connection: Database connection
Raises:
PoolExhaustedError: If no connection available within timeout
RuntimeError: If pool is closed
"""
# Lazy initialization (only create connections when first needed)
if not self._initialized:
self._initialize_pool()
conn = None
acquired_at = datetime.now()
try:
# Wait for available connection (blocks up to pool_timeout seconds)
try:
conn = self._pool.get(timeout=self.pool_timeout)
logger.debug("Connection acquired from pool")
# Update statistics
with self._stats_lock:
self._total_acquired += 1
except queue.Empty:
# Pool exhausted, all connections in use
with self._stats_lock:
self._total_timeouts += 1
logger.error(
"Connection pool exhausted",
extra={
"pool_size": self.max_connections,
"timeout": self.pool_timeout,
"total_timeouts": self._total_timeouts
}
)
raise PoolExhaustedError(
f"No connection available within {self.pool_timeout}s. "
f"Pool size: {self.max_connections}. "
f"Consider increasing pool size or reducing concurrency."
)
# Yield connection to caller
yield conn
finally:
# CRITICAL: Always return connection to pool
if conn is not None:
try:
# Rollback any uncommitted transaction
# This ensures clean state for next user
conn.rollback()
# Return to pool
self._pool.put(conn, block=False)
# Update statistics
with self._stats_lock:
self._total_released += 1
duration_ms = (datetime.now() - acquired_at).total_seconds() * 1000
logger.debug(f"Connection returned to pool (held for {duration_ms:.1f}ms)")
except Exception as e:
# This should never happen, but if it does, log and close connection
logger.error(f"Failed to return connection to pool: {e}", exc_info=True)
try:
conn.close()
except Exception:
pass
def get_statistics(self) -> PoolStatistics:
"""
Get current pool statistics.
Useful for monitoring and debugging. Can expose via
health check endpoint or metrics.
Returns:
PoolStatistics with current state
"""
with self._stats_lock:
return PoolStatistics(
total_connections=self.max_connections,
active_connections=self.max_connections - self._pool.qsize(),
waiting_threads=self._pool.qsize(),
total_acquired=self._total_acquired,
total_released=self._total_released,
total_timeouts=self._total_timeouts,
created_at=self._created_at
)
def close_all(self) -> None:
"""
Close all connections in pool.
Call this on application shutdown to ensure clean cleanup.
After calling this, pool cannot be used anymore.
Thread Safety:
Safe to call from any thread, but only call once.
"""
logger.info("Closing connection pool")
closed_count = 0
error_count = 0
# Close all connections in pool
while not self._pool.empty():
try:
conn = self._pool.get(block=False)
conn.close()
closed_count += 1
except queue.Empty:
break
except Exception as e:
logger.warning(f"Error closing connection: {e}")
error_count += 1
logger.info(
f"Connection pool closed: {closed_count} connections closed, {error_count} errors"
)
self._initialized = False
def __enter__(self) -> ConnectionPool:
"""Support using pool as context manager"""
return self
def __exit__(self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: object | None) -> bool:
"""Cleanup on context exit"""
self.close_all()
return False