Release v1.9.0: Add video-comparer skill and enhance transcript-fixer

## New Skill: video-comparer v1.0.0
- Compare original and compressed videos with interactive HTML reports
- Calculate quality metrics (PSNR, SSIM) for compression analysis
- Generate frame-by-frame visual comparisons (slider, side-by-side, grid)
- Extract video metadata (codec, resolution, bitrate, duration)
- Multi-platform FFmpeg support with security features

## transcript-fixer Enhancements
- Add async AI processor for parallel processing
- Add connection pool management for database operations
- Add concurrency manager and rate limiter
- Add audit log retention and database migrations
- Add health check and metrics monitoring
- Add comprehensive test suite (8 new test files)
- Enhance security with domain and path validators

## Marketplace Updates
- Update marketplace version from 1.8.0 to 1.9.0
- Update skills count from 15 to 16
- Update documentation (README.md, CLAUDE.md, CHANGELOG.md)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
daymade
2025-10-30 00:23:12 +08:00
parent bd0aa12004
commit 9b724f33e3
49 changed files with 15357 additions and 270 deletions

View File

@@ -0,0 +1,478 @@
#!/usr/bin/env python3
"""
Path Validation and Security
CRITICAL FIX: Prevents path traversal and symlink attacks
ISSUE: Critical-5 in Engineering Excellence Plan
This module provides:
1. Path whitelist validation
2. Path traversal prevention (../)
3. Symlink attack detection
4. File extension validation
5. Directory containment checks
Author: Chief Engineer
Date: 2025-10-28
Priority: P0 - Critical
"""
from __future__ import annotations
import os
from pathlib import Path
from typing import Set, Optional, Final, List
import logging
logger = logging.getLogger(__name__)
# Allowed base directories (whitelist)
# Only files under these directories can be accessed
ALLOWED_BASE_DIRS: Final[Set[Path]] = {
Path.home() / ".transcript-fixer", # Config/data directory
Path.home() / "Downloads", # Common download location
Path.home() / "Documents", # Common documents location
Path.home() / "Desktop", # Desktop files
Path("/tmp"), # Temporary files
}
# Allowed file extensions for reading
ALLOWED_READ_EXTENSIONS: Final[Set[str]] = {
'.md', # Markdown
'.txt', # Text
'.html', # HTML output
'.json', # JSON config
'.sql', # SQL schema
}
# Allowed file extensions for writing
ALLOWED_WRITE_EXTENSIONS: Final[Set[str]] = {
'.md', # Markdown output
'.html', # HTML diff
'.db', # SQLite database
'.log', # Log files
}
# Dangerous patterns to reject
DANGEROUS_PATTERNS: Final[List[str]] = [
'..', # Parent directory traversal
'\x00', # Null byte
'\n', # Newline injection
'\r', # Carriage return injection
]
class PathValidationError(Exception):
"""Path validation failed"""
pass
class PathValidator:
"""
Validates file paths for security.
Prevents:
- Path traversal attacks (../)
- Symlink attacks
- Access outside whitelisted directories
- Dangerous file types
- Null byte injection
Usage:
validator = PathValidator()
safe_path = validator.validate_input_path("/path/to/file.md")
safe_output = validator.validate_output_path("/path/to/output.md")
"""
def __init__(
self,
allowed_base_dirs: Optional[Set[Path]] = None,
allowed_read_extensions: Optional[Set[str]] = None,
allowed_write_extensions: Optional[Set[str]] = None,
allow_symlinks: bool = False
):
"""
Initialize path validator.
Args:
allowed_base_dirs: Whitelist of allowed base directories
allowed_read_extensions: Allowed file extensions for reading
allowed_write_extensions: Allowed file extensions for writing
allow_symlinks: Allow symlinks (default: False for security)
"""
self.allowed_base_dirs = allowed_base_dirs or ALLOWED_BASE_DIRS
self.allowed_read_extensions = allowed_read_extensions or ALLOWED_READ_EXTENSIONS
self.allowed_write_extensions = allowed_write_extensions or ALLOWED_WRITE_EXTENSIONS
self.allow_symlinks = allow_symlinks
def _check_dangerous_patterns(self, path_str: str) -> None:
"""
Check for dangerous patterns in path string.
Args:
path_str: Path string to check
Raises:
PathValidationError: If dangerous pattern found
"""
for pattern in DANGEROUS_PATTERNS:
if pattern in path_str:
raise PathValidationError(
f"Dangerous pattern '{pattern}' detected in path: {path_str}"
)
def _is_under_allowed_directory(self, path: Path) -> bool:
"""
Check if path is under any allowed base directory.
Args:
path: Resolved path to check
Returns:
True if path is under allowed directory
"""
for allowed_dir in self.allowed_base_dirs:
try:
# Check if path is relative to allowed_dir
path.relative_to(allowed_dir)
return True
except ValueError:
# Not relative to this allowed_dir
continue
return False
def _check_symlink(self, path: Path) -> None:
"""
Check for symlink attacks.
Args:
path: Path to check
Raises:
PathValidationError: If symlink detected and not allowed
"""
if not self.allow_symlinks and path.is_symlink():
raise PathValidationError(
f"Symlink detected and not allowed: {path}"
)
# Check parent directories for symlinks (but stop at system dirs)
if not self.allow_symlinks:
current = path.parent
# Stop checking at common system directories (they may be symlinks on macOS)
system_dirs = {Path('/'), Path('/usr'), Path('/etc'), Path('/var')}
while current != current.parent: # Until root
if current in system_dirs:
break
if current.is_symlink():
raise PathValidationError(
f"Symlink in path hierarchy detected: {current}"
)
current = current.parent
def _validate_extension(
self,
path: Path,
allowed_extensions: Set[str],
operation: str
) -> None:
"""
Validate file extension.
Args:
path: Path to validate
allowed_extensions: Set of allowed extensions
operation: Operation name (for error message)
Raises:
PathValidationError: If extension not allowed
"""
extension = path.suffix.lower()
if extension not in allowed_extensions:
raise PathValidationError(
f"File extension '{extension}' not allowed for {operation}. "
f"Allowed: {sorted(allowed_extensions)}"
)
def validate_input_path(self, path_str: str) -> Path:
"""
Validate an input file path for reading.
Security checks:
1. No dangerous patterns (.., null bytes, etc.)
2. Path resolves to absolute path
3. No symlinks (unless explicitly allowed)
4. Under allowed base directory
5. Allowed file extension for reading
6. File exists
Args:
path_str: Path string to validate
Returns:
Validated, resolved Path object
Raises:
PathValidationError: If validation fails
Example:
>>> validator = PathValidator()
>>> safe_path = validator.validate_input_path("~/Documents/file.md")
>>> # Returns: Path('/home/username/Documents/file.md') or similar
"""
# Check dangerous patterns in raw string
self._check_dangerous_patterns(path_str)
# Convert to Path (but don't resolve yet - need to check symlinks first)
try:
path = Path(path_str).expanduser().absolute()
except Exception as e:
raise PathValidationError(f"Invalid path format: {path_str}") from e
# Check if file exists
if not path.exists():
raise PathValidationError(f"File does not exist: {path}")
# Check if it's a file (not directory)
if not path.is_file():
raise PathValidationError(f"Path is not a file: {path}")
# CRITICAL: Check for symlinks BEFORE resolving
self._check_symlink(path)
# Now resolve to get canonical path
path = path.resolve()
# Check if under allowed directory
if not self._is_under_allowed_directory(path):
raise PathValidationError(
f"Path not under allowed directories: {path}\n"
f"Allowed directories: {[str(d) for d in self.allowed_base_dirs]}"
)
# Check file extension
self._validate_extension(path, self.allowed_read_extensions, "reading")
logger.info(f"Input path validated: {path}")
return path
def validate_output_path(self, path_str: str, create_parent: bool = True) -> Path:
"""
Validate an output file path for writing.
Security checks:
1. No dangerous patterns
2. Path resolves to absolute path
3. No symlinks in path hierarchy
4. Under allowed base directory
5. Allowed file extension for writing
6. Parent directory exists or can be created
Args:
path_str: Path string to validate
create_parent: Create parent directory if it doesn't exist
Returns:
Validated, resolved Path object
Raises:
PathValidationError: If validation fails
Example:
>>> validator = PathValidator()
>>> safe_path = validator.validate_output_path("~/Documents/output.md")
>>> # Returns: Path('/home/username/Documents/output.md') or similar
"""
# Check dangerous patterns
self._check_dangerous_patterns(path_str)
# Convert to Path and resolve
try:
path = Path(path_str).expanduser().resolve()
except Exception as e:
raise PathValidationError(f"Invalid path format: {path_str}") from e
# Check parent directory exists
parent = path.parent
if not parent.exists():
if create_parent:
# Validate parent directory first
if not self._is_under_allowed_directory(parent):
raise PathValidationError(
f"Parent directory not under allowed directories: {parent}"
)
try:
parent.mkdir(parents=True, exist_ok=True)
logger.info(f"Created parent directory: {parent}")
except Exception as e:
raise PathValidationError(
f"Failed to create parent directory: {parent}"
) from e
else:
raise PathValidationError(f"Parent directory does not exist: {parent}")
# Check for symlinks in path hierarchy (but file itself doesn't exist yet)
if not self.allow_symlinks:
current = parent
while current != current.parent:
if current.is_symlink():
raise PathValidationError(
f"Symlink in path hierarchy: {current}"
)
current = current.parent
# Check if under allowed directory
if not self._is_under_allowed_directory(path):
raise PathValidationError(
f"Path not under allowed directories: {path}\n"
f"Allowed directories: {[str(d) for d in self.allowed_base_dirs]}"
)
# Check file extension
self._validate_extension(path, self.allowed_write_extensions, "writing")
logger.info(f"Output path validated: {path}")
return path
def add_allowed_directory(self, directory: str | Path) -> None:
"""
Add a directory to the whitelist.
Args:
directory: Directory path to add
Example:
>>> validator.add_allowed_directory("/home/username/Projects")
"""
dir_path = Path(directory).expanduser().resolve()
self.allowed_base_dirs.add(dir_path)
logger.info(f"Added allowed directory: {dir_path}")
def is_path_safe(self, path_str: str, for_writing: bool = False) -> bool:
"""
Check if a path is safe without raising exceptions.
Args:
path_str: Path to check
for_writing: Check for writing (vs reading)
Returns:
True if path is safe
Example:
>>> if validator.is_path_safe("~/Documents/file.md"):
... process_file()
"""
try:
if for_writing:
self.validate_output_path(path_str, create_parent=False)
else:
self.validate_input_path(path_str)
return True
except PathValidationError:
return False
# Global validator instance
_global_validator: Optional[PathValidator] = None
def get_validator() -> PathValidator:
"""
Get global validator instance.
Returns:
Global PathValidator instance
Example:
>>> validator = get_validator()
>>> safe_path = validator.validate_input_path("file.md")
"""
global _global_validator
if _global_validator is None:
_global_validator = PathValidator()
return _global_validator
# Convenience functions
def validate_input_path(path_str: str) -> Path:
"""Validate input path using global validator"""
return get_validator().validate_input_path(path_str)
def validate_output_path(path_str: str, create_parent: bool = True) -> Path:
"""Validate output path using global validator"""
return get_validator().validate_output_path(path_str, create_parent)
def add_allowed_directory(directory: str | Path) -> None:
"""Add allowed directory to global validator"""
get_validator().add_allowed_directory(directory)
# Example usage and testing
if __name__ == "__main__":
import logging
logging.basicConfig(level=logging.INFO)
print("=== Testing PathValidator ===\n")
validator = PathValidator()
# Test 1: Valid input path (create a test file first)
print("Test 1: Valid input path")
test_file = Path.home() / "Documents" / "test.md"
test_file.parent.mkdir(parents=True, exist_ok=True)
test_file.write_text("test")
try:
result = validator.validate_input_path(str(test_file))
print(f"✓ Valid: {result}\n")
except PathValidationError as e:
print(f"✗ Failed: {e}\n")
# Test 2: Path traversal attack
print("Test 2: Path traversal attack")
try:
result = validator.validate_input_path("../../etc/passwd")
print(f"✗ Should have failed: {result}\n")
except PathValidationError as e:
print(f"✓ Correctly rejected: {e}\n")
# Test 3: Invalid extension
print("Test 3: Invalid extension")
dangerous_file = Path.home() / "Documents" / "script.sh"
dangerous_file.write_text("#!/bin/bash")
try:
result = validator.validate_input_path(str(dangerous_file))
print(f"✗ Should have failed: {result}\n")
except PathValidationError as e:
print(f"✓ Correctly rejected: {e}\n")
# Test 4: Valid output path
print("Test 4: Valid output path")
try:
result = validator.validate_output_path(str(Path.home() / "Documents" / "output.html"))
print(f"✓ Valid: {result}\n")
except PathValidationError as e:
print(f"✗ Failed: {e}\n")
# Test 5: Null byte injection
print("Test 5: Null byte injection")
try:
result = validator.validate_input_path("file.md\x00.txt")
print(f"✗ Should have failed: {result}\n")
except PathValidationError as e:
print(f"✓ Correctly rejected: {e}\n")
# Cleanup
test_file.unlink(missing_ok=True)
dangerous_file.unlink(missing_ok=True)
print("=== All tests completed ===")