Files
claude-code-skills-reference/transcript-fixer/scripts/utils/config.py
daymade 9b724f33e3 Release v1.9.0: Add video-comparer skill and enhance transcript-fixer
## New Skill: video-comparer v1.0.0
- Compare original and compressed videos with interactive HTML reports
- Calculate quality metrics (PSNR, SSIM) for compression analysis
- Generate frame-by-frame visual comparisons (slider, side-by-side, grid)
- Extract video metadata (codec, resolution, bitrate, duration)
- Multi-platform FFmpeg support with security features

## transcript-fixer Enhancements
- Add async AI processor for parallel processing
- Add connection pool management for database operations
- Add concurrency manager and rate limiter
- Add audit log retention and database migrations
- Add health check and metrics monitoring
- Add comprehensive test suite (8 new test files)
- Enhance security with domain and path validators

## Marketplace Updates
- Update marketplace version from 1.8.0 to 1.9.0
- Update skills count from 15 to 16
- Update documentation (README.md, CLAUDE.md, CHANGELOG.md)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-30 00:23:12 +08:00

539 lines
17 KiB
Python

#!/usr/bin/env python3
"""
Configuration Management Module
CRITICAL FIX (P1-5): Production-grade configuration management
Features:
- Centralized configuration (single source of truth)
- Environment-based config (dev/staging/prod)
- Type-safe access with validation
- Multiple config sources (env vars, files, defaults)
- Config schema validation
- Secure secrets management
Use cases:
- Application configuration
- Environment-specific settings
- API keys and secrets management
- Path configuration
- Feature flags
"""
from __future__ import annotations
import json
import logging
import os
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
from typing import Optional, Dict, Any, Final
logger = logging.getLogger(__name__)
class Environment(Enum):
"""Application environment"""
DEVELOPMENT = "development"
STAGING = "staging"
PRODUCTION = "production"
TEST = "test"
@dataclass
class DatabaseConfig:
"""Database configuration"""
path: Path
max_connections: int = 5
connection_timeout: float = 30.0
enable_wal_mode: bool = True # Write-Ahead Logging for better concurrency
def __post_init__(self):
"""Validate database configuration"""
if self.max_connections <= 0:
raise ValueError("max_connections must be positive")
if self.connection_timeout <= 0:
raise ValueError("connection_timeout must be positive")
# Ensure database directory exists
self.path = Path(self.path)
self.path.parent.mkdir(parents=True, exist_ok=True)
@dataclass
class APIConfig:
"""API configuration"""
api_key: Optional[str] = None
base_url: Optional[str] = None
timeout: float = 60.0
max_retries: int = 3
retry_backoff: float = 1.0 # Exponential backoff base (seconds)
def __post_init__(self):
"""Validate API configuration"""
if self.timeout <= 0:
raise ValueError("timeout must be positive")
if self.max_retries < 0:
raise ValueError("max_retries must be non-negative")
if self.retry_backoff < 0:
raise ValueError("retry_backoff must be non-negative")
@dataclass
class PathConfig:
"""Path configuration"""
config_dir: Path
data_dir: Path
log_dir: Path
cache_dir: Path
def __post_init__(self):
"""Validate and create directories"""
self.config_dir = Path(self.config_dir)
self.data_dir = Path(self.data_dir)
self.log_dir = Path(self.log_dir)
self.cache_dir = Path(self.cache_dir)
# Create all directories
for dir_path in [self.config_dir, self.data_dir, self.log_dir, self.cache_dir]:
dir_path.mkdir(parents=True, exist_ok=True)
@dataclass
class ResourceLimits:
"""Resource limits configuration"""
max_text_length: int = 1_000_000 # 1MB max text
max_file_size: int = 10_000_000 # 10MB max file
max_concurrent_tasks: int = 10
max_memory_mb: int = 512
rate_limit_requests: int = 100
rate_limit_window_seconds: float = 60.0
def __post_init__(self):
"""Validate resource limits"""
if self.max_text_length <= 0:
raise ValueError("max_text_length must be positive")
if self.max_file_size <= 0:
raise ValueError("max_file_size must be positive")
if self.max_concurrent_tasks <= 0:
raise ValueError("max_concurrent_tasks must be positive")
@dataclass
class FeatureFlags:
"""Feature flags for conditional functionality"""
enable_learning: bool = True
enable_metrics: bool = True
enable_health_checks: bool = True
enable_rate_limiting: bool = True
enable_caching: bool = True
enable_auto_approval: bool = False # Auto-approve learned suggestions
@dataclass
class Config:
"""
Main configuration class - Single source of truth for all configuration.
Configuration precedence (highest to lowest):
1. Environment variables
2. Config file (if provided)
3. Default values
"""
# Environment
environment: Environment = Environment.DEVELOPMENT
# Sub-configurations
database: DatabaseConfig = field(default_factory=lambda: DatabaseConfig(
path=Path.home() / ".transcript-fixer" / "corrections.db"
))
api: APIConfig = field(default_factory=APIConfig)
paths: PathConfig = field(default_factory=lambda: PathConfig(
config_dir=Path.home() / ".transcript-fixer",
data_dir=Path.home() / ".transcript-fixer" / "data",
log_dir=Path.home() / ".transcript-fixer" / "logs",
cache_dir=Path.home() / ".transcript-fixer" / "cache",
))
resources: ResourceLimits = field(default_factory=ResourceLimits)
features: FeatureFlags = field(default_factory=FeatureFlags)
# Application metadata
app_name: str = "transcript-fixer"
app_version: str = "1.0.0"
debug: bool = False
def __post_init__(self):
"""Post-initialization validation"""
logger.debug(f"Config initialized for environment: {self.environment.value}")
@classmethod
def from_env(cls) -> Config:
"""
Create configuration from environment variables.
Environment variables:
- TRANSCRIPT_FIXER_ENV: Environment (development/staging/production)
- TRANSCRIPT_FIXER_CONFIG_DIR: Config directory path
- TRANSCRIPT_FIXER_DB_PATH: Database path
- GLM_API_KEY: API key for GLM service
- ANTHROPIC_API_KEY: Alternative API key
- ANTHROPIC_BASE_URL: API base URL
- TRANSCRIPT_FIXER_DEBUG: Enable debug mode (1/true/yes)
Returns:
Config instance with values from environment variables
"""
# Parse environment
env_str = os.getenv("TRANSCRIPT_FIXER_ENV", "development").lower()
try:
environment = Environment(env_str)
except ValueError:
logger.warning(f"Invalid environment '{env_str}', defaulting to development")
environment = Environment.DEVELOPMENT
# Parse debug flag
debug_str = os.getenv("TRANSCRIPT_FIXER_DEBUG", "0").lower()
debug = debug_str in ("1", "true", "yes", "on")
# Parse paths
config_dir = Path(os.getenv(
"TRANSCRIPT_FIXER_CONFIG_DIR",
str(Path.home() / ".transcript-fixer")
))
# Database config
db_path = Path(os.getenv(
"TRANSCRIPT_FIXER_DB_PATH",
str(config_dir / "corrections.db")
))
db_max_connections = int(os.getenv("TRANSCRIPT_FIXER_DB_MAX_CONNECTIONS", "5"))
database = DatabaseConfig(
path=db_path,
max_connections=db_max_connections,
)
# API config
api_key = os.getenv("GLM_API_KEY") or os.getenv("ANTHROPIC_API_KEY")
base_url = os.getenv("ANTHROPIC_BASE_URL")
api_timeout = float(os.getenv("TRANSCRIPT_FIXER_API_TIMEOUT", "60.0"))
api = APIConfig(
api_key=api_key,
base_url=base_url,
timeout=api_timeout,
)
# Path config
paths = PathConfig(
config_dir=config_dir,
data_dir=config_dir / "data",
log_dir=config_dir / "logs",
cache_dir=config_dir / "cache",
)
# Resource limits
resources = ResourceLimits(
max_concurrent_tasks=int(os.getenv("TRANSCRIPT_FIXER_MAX_CONCURRENT", "10")),
rate_limit_requests=int(os.getenv("TRANSCRIPT_FIXER_RATE_LIMIT", "100")),
)
# Feature flags
features = FeatureFlags(
enable_learning=os.getenv("TRANSCRIPT_FIXER_ENABLE_LEARNING", "1") != "0",
enable_metrics=os.getenv("TRANSCRIPT_FIXER_ENABLE_METRICS", "1") != "0",
enable_auto_approval=os.getenv("TRANSCRIPT_FIXER_AUTO_APPROVE", "0") == "1",
)
return cls(
environment=environment,
database=database,
api=api,
paths=paths,
resources=resources,
features=features,
debug=debug,
)
@classmethod
def from_file(cls, config_path: Path) -> Config:
"""
Load configuration from JSON file.
Args:
config_path: Path to JSON config file
Returns:
Config instance with values from file
Raises:
FileNotFoundError: If config file doesn't exist
ValueError: If config file is invalid
"""
config_path = Path(config_path)
if not config_path.exists():
raise FileNotFoundError(f"Config file not found: {config_path}")
try:
with open(config_path, 'r', encoding='utf-8') as f:
data = json.load(f)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON in config file: {e}")
# Parse environment
env_str = data.get("environment", "development")
try:
environment = Environment(env_str)
except ValueError:
logger.warning(f"Invalid environment '{env_str}', defaulting to development")
environment = Environment.DEVELOPMENT
# Parse database config
db_data = data.get("database", {})
database = DatabaseConfig(
path=Path(db_data.get("path", str(Path.home() / ".transcript-fixer" / "corrections.db"))),
max_connections=db_data.get("max_connections", 5),
connection_timeout=db_data.get("connection_timeout", 30.0),
)
# Parse API config
api_data = data.get("api", {})
api = APIConfig(
api_key=api_data.get("api_key"),
base_url=api_data.get("base_url"),
timeout=api_data.get("timeout", 60.0),
max_retries=api_data.get("max_retries", 3),
)
# Parse path config
paths_data = data.get("paths", {})
config_dir = Path(paths_data.get("config_dir", str(Path.home() / ".transcript-fixer")))
paths = PathConfig(
config_dir=config_dir,
data_dir=Path(paths_data.get("data_dir", str(config_dir / "data"))),
log_dir=Path(paths_data.get("log_dir", str(config_dir / "logs"))),
cache_dir=Path(paths_data.get("cache_dir", str(config_dir / "cache"))),
)
# Parse resource limits
resources_data = data.get("resources", {})
resources = ResourceLimits(
max_text_length=resources_data.get("max_text_length", 1_000_000),
max_file_size=resources_data.get("max_file_size", 10_000_000),
max_concurrent_tasks=resources_data.get("max_concurrent_tasks", 10),
)
# Parse feature flags
features_data = data.get("features", {})
features = FeatureFlags(
enable_learning=features_data.get("enable_learning", True),
enable_metrics=features_data.get("enable_metrics", True),
enable_auto_approval=features_data.get("enable_auto_approval", False),
)
return cls(
environment=environment,
database=database,
api=api,
paths=paths,
resources=resources,
features=features,
debug=data.get("debug", False),
)
def save_to_file(self, config_path: Path) -> None:
"""
Save configuration to JSON file.
Args:
config_path: Path to save config file
"""
config_path = Path(config_path)
config_path.parent.mkdir(parents=True, exist_ok=True)
data = {
"environment": self.environment.value,
"database": {
"path": str(self.database.path),
"max_connections": self.database.max_connections,
"connection_timeout": self.database.connection_timeout,
},
"api": {
"api_key": self.api.api_key,
"base_url": self.api.base_url,
"timeout": self.api.timeout,
"max_retries": self.api.max_retries,
},
"paths": {
"config_dir": str(self.paths.config_dir),
"data_dir": str(self.paths.data_dir),
"log_dir": str(self.paths.log_dir),
"cache_dir": str(self.paths.cache_dir),
},
"resources": {
"max_text_length": self.resources.max_text_length,
"max_file_size": self.resources.max_file_size,
"max_concurrent_tasks": self.resources.max_concurrent_tasks,
},
"features": {
"enable_learning": self.features.enable_learning,
"enable_metrics": self.features.enable_metrics,
"enable_auto_approval": self.features.enable_auto_approval,
},
"debug": self.debug,
}
with open(config_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
logger.info(f"Configuration saved to {config_path}")
def validate(self) -> tuple[list[str], list[str]]:
"""
Validate configuration completeness and correctness.
Returns:
Tuple of (errors, warnings)
"""
errors = []
warnings = []
# Check API key for production
if self.environment == Environment.PRODUCTION:
if not self.api.api_key:
errors.append("API key is required in production environment")
elif not self.api.api_key:
warnings.append("API key not set (required for AI corrections)")
# Check database path
if not self.database.path.parent.exists():
errors.append(f"Database directory doesn't exist: {self.database.path.parent}")
# Check paths exist
for name, path in [
("config_dir", self.paths.config_dir),
("data_dir", self.paths.data_dir),
("log_dir", self.paths.log_dir),
]:
if not path.exists():
warnings.append(f"{name} doesn't exist: {path}")
# Check resource limits are reasonable
if self.resources.max_concurrent_tasks > 50:
warnings.append(f"max_concurrent_tasks is very high: {self.resources.max_concurrent_tasks}")
return errors, warnings
def get_database_url(self) -> str:
"""Get database connection URL"""
return f"sqlite:///{self.database.path}"
def is_production(self) -> bool:
"""Check if running in production"""
return self.environment == Environment.PRODUCTION
def is_development(self) -> bool:
"""Check if running in development"""
return self.environment == Environment.DEVELOPMENT
# Global configuration instance
_config: Optional[Config] = None
def get_config() -> Config:
"""
Get global configuration instance (singleton pattern).
Returns:
Config instance loaded from environment variables
"""
global _config
if _config is None:
# Load from environment by default
_config = Config.from_env()
logger.info(f"Configuration loaded: {_config.environment.value}")
# Validate
errors, warnings = _config.validate()
if errors:
logger.error(f"Configuration errors: {errors}")
if warnings:
logger.warning(f"Configuration warnings: {warnings}")
return _config
def set_config(config: Config) -> None:
"""
Set global configuration instance (for testing or manual config).
Args:
config: Config instance to set globally
"""
global _config
_config = config
logger.info(f"Configuration set: {config.environment.value}")
def reset_config() -> None:
"""Reset global configuration (mainly for testing)"""
global _config
_config = None
logger.debug("Configuration reset")
# Example configuration file template
CONFIG_FILE_TEMPLATE: Final[str] = """{
"environment": "development",
"database": {
"path": "~/.transcript-fixer/corrections.db",
"max_connections": 5,
"connection_timeout": 30.0
},
"api": {
"api_key": "your-api-key-here",
"base_url": null,
"timeout": 60.0,
"max_retries": 3
},
"paths": {
"config_dir": "~/.transcript-fixer",
"data_dir": "~/.transcript-fixer/data",
"log_dir": "~/.transcript-fixer/logs",
"cache_dir": "~/.transcript-fixer/cache"
},
"resources": {
"max_text_length": 1000000,
"max_file_size": 10000000,
"max_concurrent_tasks": 10
},
"features": {
"enable_learning": true,
"enable_metrics": true,
"enable_auto_approval": false
},
"debug": false
}
"""
def create_example_config(output_path: Path) -> None:
"""
Create example configuration file.
Args:
output_path: Path to write example config
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(CONFIG_FILE_TEMPLATE)
logger.info(f"Example config created: {output_path}")