Files
claude-code-skills-reference/transcript-fixer/scripts/tests/test_domain_validator.py
daymade 9b724f33e3 Release v1.9.0: Add video-comparer skill and enhance transcript-fixer
## New Skill: video-comparer v1.0.0
- Compare original and compressed videos with interactive HTML reports
- Calculate quality metrics (PSNR, SSIM) for compression analysis
- Generate frame-by-frame visual comparisons (slider, side-by-side, grid)
- Extract video metadata (codec, resolution, bitrate, duration)
- Multi-platform FFmpeg support with security features

## transcript-fixer Enhancements
- Add async AI processor for parallel processing
- Add connection pool management for database operations
- Add concurrency manager and rate limiter
- Add audit log retention and database migrations
- Add health check and metrics monitoring
- Add comprehensive test suite (8 new test files)
- Enhance security with domain and path validators

## Marketplace Updates
- Update marketplace version from 1.8.0 to 1.9.0
- Update skills count from 15 to 16
- Update documentation (README.md, CLAUDE.md, CHANGELOG.md)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-30 00:23:12 +08:00

303 lines
9.8 KiB
Python

#!/usr/bin/env python3
"""
Test Suite for Domain Validator
CRITICAL FIX VERIFICATION: Tests for Critical-3
Purpose: Verify SQL injection prevention and input validation
Test Coverage:
1. Domain whitelist validation
2. Source whitelist validation
3. Text sanitization
4. Confidence validation
5. SQL injection attack prevention
6. DoS prevention (length limits)
Author: Chief Engineer
Priority: P0 - Critical
"""
import pytest
import sys
from pathlib import Path
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from utils.domain_validator import (
validate_domain,
validate_source,
sanitize_text_field,
validate_correction_inputs,
validate_confidence,
is_safe_sql_identifier,
ValidationError,
VALID_DOMAINS,
VALID_SOURCES,
MAX_FROM_TEXT_LENGTH,
MAX_TO_TEXT_LENGTH,
)
class TestDomainValidation:
"""Test domain whitelist validation"""
def test_valid_domains(self):
"""Test all valid domains are accepted"""
for domain in VALID_DOMAINS:
result = validate_domain(domain)
assert result == domain
def test_case_insensitive(self):
"""Test domain validation is case-insensitive"""
assert validate_domain("GENERAL") == "general"
assert validate_domain("General") == "general"
assert validate_domain("embodied_AI") == "embodied_ai"
def test_whitespace_trimmed(self):
"""Test whitespace is trimmed"""
assert validate_domain(" general ") == "general"
assert validate_domain("\ngeneral\t") == "general"
def test_sql_injection_domain(self):
"""CRITICAL: Test SQL injection is rejected"""
malicious_inputs = [
"general'; DROP TABLE corrections--",
"general' OR '1'='1",
"'; DELETE FROM corrections WHERE '1'='1",
"general\"; DROP TABLE--",
"1' UNION SELECT * FROM corrections--",
]
for malicious in malicious_inputs:
with pytest.raises(ValidationError, match="Invalid domain"):
validate_domain(malicious)
def test_empty_domain(self):
"""Test empty domain is rejected"""
with pytest.raises(ValidationError, match="cannot be empty"):
validate_domain("")
with pytest.raises(ValidationError, match="cannot be empty"):
validate_domain(" ")
class TestSourceValidation:
"""Test source whitelist validation"""
def test_valid_sources(self):
"""Test all valid sources are accepted"""
for source in VALID_SOURCES:
result = validate_source(source)
assert result == source
def test_invalid_source(self):
"""Test invalid source is rejected"""
with pytest.raises(ValidationError, match="Invalid source"):
validate_source("hacked")
with pytest.raises(ValidationError, match="Invalid source"):
validate_source("'; DROP TABLE--")
class TestTextSanitization:
"""Test text field sanitization"""
def test_valid_text(self):
"""Test normal text passes"""
text = "Hello world!"
result = sanitize_text_field(text, 100, "test")
assert result == text
def test_length_limit(self):
"""Test length limit is enforced"""
long_text = "a" * 1000
with pytest.raises(ValidationError, match="too long"):
sanitize_text_field(long_text, 100, "test")
def test_null_byte_rejection(self):
"""CRITICAL: Test null bytes are rejected (can break SQLite)"""
malicious = "hello\x00world"
with pytest.raises(ValidationError, match="null bytes"):
sanitize_text_field(malicious, 100, "test")
def test_control_characters(self):
"""Test control characters are removed"""
text_with_controls = "hello\x01\x02world\x1f"
result = sanitize_text_field(text_with_controls, 100, "test")
assert result == "helloworld"
def test_whitespace_preserved(self):
"""Test normal whitespace is preserved"""
text = "hello\tworld\ntest\r\nline"
result = sanitize_text_field(text, 100, "test")
assert "\t" in result
assert "\n" in result
def test_empty_after_sanitization(self):
"""Test rejects text that becomes empty after sanitization"""
with pytest.raises(ValidationError, match="empty after sanitization"):
sanitize_text_field(" ", 100, "test")
class TestCorrectionInputsValidation:
"""Test full correction validation"""
def test_valid_inputs(self):
"""Test valid inputs pass"""
result = validate_correction_inputs(
from_text="teh",
to_text="the",
domain="general",
source="manual",
notes="Typo fix",
added_by="test_user"
)
assert result[0] == "teh"
assert result[1] == "the"
assert result[2] == "general"
assert result[3] == "manual"
assert result[4] == "Typo fix"
assert result[5] == "test_user"
def test_invalid_domain_in_full_validation(self):
"""Test invalid domain is rejected in full validation"""
with pytest.raises(ValidationError, match="Invalid domain"):
validate_correction_inputs(
from_text="test",
to_text="test",
domain="hacked'; DROP--",
source="manual"
)
def test_text_too_long(self):
"""Test excessively long text is rejected"""
long_text = "a" * (MAX_FROM_TEXT_LENGTH + 1)
with pytest.raises(ValidationError, match="too long"):
validate_correction_inputs(
from_text=long_text,
to_text="test",
domain="general",
source="manual"
)
def test_optional_fields_none(self):
"""Test optional fields can be None"""
result = validate_correction_inputs(
from_text="test",
to_text="test",
domain="general",
source="manual",
notes=None,
added_by=None
)
assert result[4] is None # notes
assert result[5] is None # added_by
class TestConfidenceValidation:
"""Test confidence score validation"""
def test_valid_confidence(self):
"""Test valid confidence values"""
assert validate_confidence(0.0) == 0.0
assert validate_confidence(0.5) == 0.5
assert validate_confidence(1.0) == 1.0
def test_confidence_out_of_range(self):
"""Test out-of-range confidence is rejected"""
with pytest.raises(ValidationError, match="between 0.0 and 1.0"):
validate_confidence(-0.1)
with pytest.raises(ValidationError, match="between 0.0 and 1.0"):
validate_confidence(1.1)
with pytest.raises(ValidationError, match="between 0.0 and 1.0"):
validate_confidence(100.0)
def test_confidence_type_check(self):
"""Test non-numeric confidence is rejected"""
with pytest.raises(ValidationError, match="must be a number"):
validate_confidence("high") # type: ignore
class TestSQLIdentifierValidation:
"""Test SQL identifier safety checks"""
def test_safe_identifiers(self):
"""Test valid SQL identifiers"""
assert is_safe_sql_identifier("table_name")
assert is_safe_sql_identifier("_private")
assert is_safe_sql_identifier("Column123")
def test_unsafe_identifiers(self):
"""Test unsafe SQL identifiers are rejected"""
assert not is_safe_sql_identifier("table-name") # Hyphen
assert not is_safe_sql_identifier("123table") # Starts with number
assert not is_safe_sql_identifier("table name") # Space
assert not is_safe_sql_identifier("table; DROP") # Semicolon
assert not is_safe_sql_identifier("table' OR") # Quote
def test_empty_identifier(self):
"""Test empty identifier is rejected"""
assert not is_safe_sql_identifier("")
def test_too_long_identifier(self):
"""Test excessively long identifier is rejected"""
long_id = "a" * 65
assert not is_safe_sql_identifier(long_id)
class TestSecurityScenarios:
"""Test realistic attack scenarios"""
def test_sql_injection_via_from_text(self):
"""Test SQL injection via from_text is handled safely"""
# These should be sanitized, not cause SQL injection
malicious_from = "test'; DROP TABLE corrections--"
# Should NOT raise exception - text fields allow any content
# They're protected by parameterized queries
result = validate_correction_inputs(
from_text=malicious_from,
to_text="safe",
domain="general",
source="manual"
)
assert result[0] == malicious_from # Text preserved as-is
def test_dos_via_long_input(self):
"""Test DoS prevention via length limits"""
# Attempt to create extremely long input
dos_text = "a" * 10000
with pytest.raises(ValidationError, match="too long"):
validate_correction_inputs(
from_text=dos_text,
to_text="test",
domain="general",
source="manual"
)
def test_domain_bypass_attempts(self):
"""Test various domain bypass attempts"""
bypass_attempts = [
"general\x00hacked", # Null byte injection
"general\nmalicious", # Newline injection
"general -- comment", # SQL comment
"general' UNION", # SQL union
]
for attempt in bypass_attempts:
with pytest.raises(ValidationError):
validate_domain(attempt)
# Run tests with: pytest -v test_domain_validator.py
if __name__ == "__main__":
pytest.main([__file__, "-v", "--tb=short"])