Release v1.9.0: Add video-comparer skill and enhance transcript-fixer
## New Skill: video-comparer v1.0.0 - Compare original and compressed videos with interactive HTML reports - Calculate quality metrics (PSNR, SSIM) for compression analysis - Generate frame-by-frame visual comparisons (slider, side-by-side, grid) - Extract video metadata (codec, resolution, bitrate, duration) - Multi-platform FFmpeg support with security features ## transcript-fixer Enhancements - Add async AI processor for parallel processing - Add connection pool management for database operations - Add concurrency manager and rate limiter - Add audit log retention and database migrations - Add health check and metrics monitoring - Add comprehensive test suite (8 new test files) - Enhance security with domain and path validators ## Marketplace Updates - Update marketplace version from 1.8.0 to 1.9.0 - Update skills count from 15 to 16 - Update documentation (README.md, CLAUDE.md, CHANGELOG.md) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
302
transcript-fixer/scripts/tests/test_domain_validator.py
Normal file
302
transcript-fixer/scripts/tests/test_domain_validator.py
Normal file
@@ -0,0 +1,302 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test Suite for Domain Validator
|
||||
|
||||
CRITICAL FIX VERIFICATION: Tests for Critical-3
|
||||
Purpose: Verify SQL injection prevention and input validation
|
||||
|
||||
Test Coverage:
|
||||
1. Domain whitelist validation
|
||||
2. Source whitelist validation
|
||||
3. Text sanitization
|
||||
4. Confidence validation
|
||||
5. SQL injection attack prevention
|
||||
6. DoS prevention (length limits)
|
||||
|
||||
Author: Chief Engineer
|
||||
Priority: P0 - Critical
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from utils.domain_validator import (
|
||||
validate_domain,
|
||||
validate_source,
|
||||
sanitize_text_field,
|
||||
validate_correction_inputs,
|
||||
validate_confidence,
|
||||
is_safe_sql_identifier,
|
||||
ValidationError,
|
||||
VALID_DOMAINS,
|
||||
VALID_SOURCES,
|
||||
MAX_FROM_TEXT_LENGTH,
|
||||
MAX_TO_TEXT_LENGTH,
|
||||
)
|
||||
|
||||
|
||||
class TestDomainValidation:
|
||||
"""Test domain whitelist validation"""
|
||||
|
||||
def test_valid_domains(self):
|
||||
"""Test all valid domains are accepted"""
|
||||
for domain in VALID_DOMAINS:
|
||||
result = validate_domain(domain)
|
||||
assert result == domain
|
||||
|
||||
def test_case_insensitive(self):
|
||||
"""Test domain validation is case-insensitive"""
|
||||
assert validate_domain("GENERAL") == "general"
|
||||
assert validate_domain("General") == "general"
|
||||
assert validate_domain("embodied_AI") == "embodied_ai"
|
||||
|
||||
def test_whitespace_trimmed(self):
|
||||
"""Test whitespace is trimmed"""
|
||||
assert validate_domain(" general ") == "general"
|
||||
assert validate_domain("\ngeneral\t") == "general"
|
||||
|
||||
def test_sql_injection_domain(self):
|
||||
"""CRITICAL: Test SQL injection is rejected"""
|
||||
malicious_inputs = [
|
||||
"general'; DROP TABLE corrections--",
|
||||
"general' OR '1'='1",
|
||||
"'; DELETE FROM corrections WHERE '1'='1",
|
||||
"general\"; DROP TABLE--",
|
||||
"1' UNION SELECT * FROM corrections--",
|
||||
]
|
||||
|
||||
for malicious in malicious_inputs:
|
||||
with pytest.raises(ValidationError, match="Invalid domain"):
|
||||
validate_domain(malicious)
|
||||
|
||||
def test_empty_domain(self):
|
||||
"""Test empty domain is rejected"""
|
||||
with pytest.raises(ValidationError, match="cannot be empty"):
|
||||
validate_domain("")
|
||||
|
||||
with pytest.raises(ValidationError, match="cannot be empty"):
|
||||
validate_domain(" ")
|
||||
|
||||
|
||||
class TestSourceValidation:
|
||||
"""Test source whitelist validation"""
|
||||
|
||||
def test_valid_sources(self):
|
||||
"""Test all valid sources are accepted"""
|
||||
for source in VALID_SOURCES:
|
||||
result = validate_source(source)
|
||||
assert result == source
|
||||
|
||||
def test_invalid_source(self):
|
||||
"""Test invalid source is rejected"""
|
||||
with pytest.raises(ValidationError, match="Invalid source"):
|
||||
validate_source("hacked")
|
||||
|
||||
with pytest.raises(ValidationError, match="Invalid source"):
|
||||
validate_source("'; DROP TABLE--")
|
||||
|
||||
|
||||
class TestTextSanitization:
|
||||
"""Test text field sanitization"""
|
||||
|
||||
def test_valid_text(self):
|
||||
"""Test normal text passes"""
|
||||
text = "Hello world!"
|
||||
result = sanitize_text_field(text, 100, "test")
|
||||
assert result == text
|
||||
|
||||
def test_length_limit(self):
|
||||
"""Test length limit is enforced"""
|
||||
long_text = "a" * 1000
|
||||
with pytest.raises(ValidationError, match="too long"):
|
||||
sanitize_text_field(long_text, 100, "test")
|
||||
|
||||
def test_null_byte_rejection(self):
|
||||
"""CRITICAL: Test null bytes are rejected (can break SQLite)"""
|
||||
malicious = "hello\x00world"
|
||||
with pytest.raises(ValidationError, match="null bytes"):
|
||||
sanitize_text_field(malicious, 100, "test")
|
||||
|
||||
def test_control_characters(self):
|
||||
"""Test control characters are removed"""
|
||||
text_with_controls = "hello\x01\x02world\x1f"
|
||||
result = sanitize_text_field(text_with_controls, 100, "test")
|
||||
assert result == "helloworld"
|
||||
|
||||
def test_whitespace_preserved(self):
|
||||
"""Test normal whitespace is preserved"""
|
||||
text = "hello\tworld\ntest\r\nline"
|
||||
result = sanitize_text_field(text, 100, "test")
|
||||
assert "\t" in result
|
||||
assert "\n" in result
|
||||
|
||||
def test_empty_after_sanitization(self):
|
||||
"""Test rejects text that becomes empty after sanitization"""
|
||||
with pytest.raises(ValidationError, match="empty after sanitization"):
|
||||
sanitize_text_field(" ", 100, "test")
|
||||
|
||||
|
||||
class TestCorrectionInputsValidation:
|
||||
"""Test full correction validation"""
|
||||
|
||||
def test_valid_inputs(self):
|
||||
"""Test valid inputs pass"""
|
||||
result = validate_correction_inputs(
|
||||
from_text="teh",
|
||||
to_text="the",
|
||||
domain="general",
|
||||
source="manual",
|
||||
notes="Typo fix",
|
||||
added_by="test_user"
|
||||
)
|
||||
|
||||
assert result[0] == "teh"
|
||||
assert result[1] == "the"
|
||||
assert result[2] == "general"
|
||||
assert result[3] == "manual"
|
||||
assert result[4] == "Typo fix"
|
||||
assert result[5] == "test_user"
|
||||
|
||||
def test_invalid_domain_in_full_validation(self):
|
||||
"""Test invalid domain is rejected in full validation"""
|
||||
with pytest.raises(ValidationError, match="Invalid domain"):
|
||||
validate_correction_inputs(
|
||||
from_text="test",
|
||||
to_text="test",
|
||||
domain="hacked'; DROP--",
|
||||
source="manual"
|
||||
)
|
||||
|
||||
def test_text_too_long(self):
|
||||
"""Test excessively long text is rejected"""
|
||||
long_text = "a" * (MAX_FROM_TEXT_LENGTH + 1)
|
||||
|
||||
with pytest.raises(ValidationError, match="too long"):
|
||||
validate_correction_inputs(
|
||||
from_text=long_text,
|
||||
to_text="test",
|
||||
domain="general",
|
||||
source="manual"
|
||||
)
|
||||
|
||||
def test_optional_fields_none(self):
|
||||
"""Test optional fields can be None"""
|
||||
result = validate_correction_inputs(
|
||||
from_text="test",
|
||||
to_text="test",
|
||||
domain="general",
|
||||
source="manual",
|
||||
notes=None,
|
||||
added_by=None
|
||||
)
|
||||
|
||||
assert result[4] is None # notes
|
||||
assert result[5] is None # added_by
|
||||
|
||||
|
||||
class TestConfidenceValidation:
|
||||
"""Test confidence score validation"""
|
||||
|
||||
def test_valid_confidence(self):
|
||||
"""Test valid confidence values"""
|
||||
assert validate_confidence(0.0) == 0.0
|
||||
assert validate_confidence(0.5) == 0.5
|
||||
assert validate_confidence(1.0) == 1.0
|
||||
|
||||
def test_confidence_out_of_range(self):
|
||||
"""Test out-of-range confidence is rejected"""
|
||||
with pytest.raises(ValidationError, match="between 0.0 and 1.0"):
|
||||
validate_confidence(-0.1)
|
||||
|
||||
with pytest.raises(ValidationError, match="between 0.0 and 1.0"):
|
||||
validate_confidence(1.1)
|
||||
|
||||
with pytest.raises(ValidationError, match="between 0.0 and 1.0"):
|
||||
validate_confidence(100.0)
|
||||
|
||||
def test_confidence_type_check(self):
|
||||
"""Test non-numeric confidence is rejected"""
|
||||
with pytest.raises(ValidationError, match="must be a number"):
|
||||
validate_confidence("high") # type: ignore
|
||||
|
||||
|
||||
class TestSQLIdentifierValidation:
|
||||
"""Test SQL identifier safety checks"""
|
||||
|
||||
def test_safe_identifiers(self):
|
||||
"""Test valid SQL identifiers"""
|
||||
assert is_safe_sql_identifier("table_name")
|
||||
assert is_safe_sql_identifier("_private")
|
||||
assert is_safe_sql_identifier("Column123")
|
||||
|
||||
def test_unsafe_identifiers(self):
|
||||
"""Test unsafe SQL identifiers are rejected"""
|
||||
assert not is_safe_sql_identifier("table-name") # Hyphen
|
||||
assert not is_safe_sql_identifier("123table") # Starts with number
|
||||
assert not is_safe_sql_identifier("table name") # Space
|
||||
assert not is_safe_sql_identifier("table; DROP") # Semicolon
|
||||
assert not is_safe_sql_identifier("table' OR") # Quote
|
||||
|
||||
def test_empty_identifier(self):
|
||||
"""Test empty identifier is rejected"""
|
||||
assert not is_safe_sql_identifier("")
|
||||
|
||||
def test_too_long_identifier(self):
|
||||
"""Test excessively long identifier is rejected"""
|
||||
long_id = "a" * 65
|
||||
assert not is_safe_sql_identifier(long_id)
|
||||
|
||||
|
||||
class TestSecurityScenarios:
|
||||
"""Test realistic attack scenarios"""
|
||||
|
||||
def test_sql_injection_via_from_text(self):
|
||||
"""Test SQL injection via from_text is handled safely"""
|
||||
# These should be sanitized, not cause SQL injection
|
||||
malicious_from = "test'; DROP TABLE corrections--"
|
||||
|
||||
# Should NOT raise exception - text fields allow any content
|
||||
# They're protected by parameterized queries
|
||||
result = validate_correction_inputs(
|
||||
from_text=malicious_from,
|
||||
to_text="safe",
|
||||
domain="general",
|
||||
source="manual"
|
||||
)
|
||||
|
||||
assert result[0] == malicious_from # Text preserved as-is
|
||||
|
||||
def test_dos_via_long_input(self):
|
||||
"""Test DoS prevention via length limits"""
|
||||
# Attempt to create extremely long input
|
||||
dos_text = "a" * 10000
|
||||
|
||||
with pytest.raises(ValidationError, match="too long"):
|
||||
validate_correction_inputs(
|
||||
from_text=dos_text,
|
||||
to_text="test",
|
||||
domain="general",
|
||||
source="manual"
|
||||
)
|
||||
|
||||
def test_domain_bypass_attempts(self):
|
||||
"""Test various domain bypass attempts"""
|
||||
bypass_attempts = [
|
||||
"general\x00hacked", # Null byte injection
|
||||
"general\nmalicious", # Newline injection
|
||||
"general -- comment", # SQL comment
|
||||
"general' UNION", # SQL union
|
||||
]
|
||||
|
||||
for attempt in bypass_attempts:
|
||||
with pytest.raises(ValidationError):
|
||||
validate_domain(attempt)
|
||||
|
||||
|
||||
# Run tests with: pytest -v test_domain_validator.py
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v", "--tb=short"])
|
||||
Reference in New Issue
Block a user