#!/usr/bin/env python3 """ Security Utilities CRITICAL FIX: Secure handling of sensitive data ISSUE: Critical-2 in Engineering Excellence Plan This module provides: 1. Secret masking for logs 2. Secure memory handling 3. API key validation 4. Input sanitization Author: Chief Engineer Date: 2025-10-28 Priority: P0 - Critical """ from __future__ import annotations import re import ctypes import sys from typing import Optional, Final # Constants MIN_API_KEY_LENGTH: Final[int] = 20 # Minimum reasonable API key length MASK_PREFIX_LENGTH: Final[int] = 4 # Show first 4 chars MASK_SUFFIX_LENGTH: Final[int] = 4 # Show last 4 chars def mask_secret(secret: str, visible_chars: int = 4) -> str: """ Safely mask secrets for logging. CRITICAL: Never log full secrets. Always use this function. Args: secret: The secret to mask (API key, token, password) visible_chars: Number of chars to show at start/end (default: 4) Returns: Masked string like "7fb3...DPRR" Examples: >>> mask_secret("7fb3ab7b186242288fe93a27227b7149.bJCOEAsUfejvWDPR") '7fb3...DPRR' >>> mask_secret("short") '***' >>> mask_secret("") '***' """ if not secret: return "***" secret_len = len(secret) # Very short secrets: completely hide if secret_len < 2 * visible_chars: return "***" # Show prefix and suffix with ... in middle prefix = secret[:visible_chars] suffix = secret[-visible_chars:] return f"{prefix}...{suffix}" def mask_secret_in_text(text: str, secret: str) -> str: """ Replace all occurrences of secret in text with masked version. Useful for sanitizing error messages, logs, etc. Args: text: Text that might contain secrets secret: The secret to mask Returns: Text with secret masked Examples: >>> text = "API key example-fake-key-1234567890abcdef.test failed" >>> secret = "example-fake-key-1234567890abcdef.test" >>> mask_secret_in_text(text, secret) 'API key exam...test failed' """ if not secret or not text: return text masked = mask_secret(secret) return text.replace(secret, masked) def validate_api_key(key: str) -> bool: """ Validate API key format (basic checks). This doesn't verify if the key is valid with the API, just checks if it looks reasonable. Args: key: API key to validate Returns: True if key format is valid Checks: - Not empty - Minimum length (20 chars) - No suspicious patterns (only whitespace, etc.) """ if not key: return False # Remove whitespace key_stripped = key.strip() # Check minimum length if len(key_stripped) < MIN_API_KEY_LENGTH: return False # Check it's not all spaces or special chars if key_stripped.isspace(): return False # Check it contains some alphanumeric characters if not any(c.isalnum() for c in key_stripped): return False return True def sanitize_for_logging(text: str, max_length: int = 200) -> str: """ Sanitize text for safe logging. Prevents: - Log injection attacks - Excessively long log entries - Binary data in logs - Control characters Args: text: Text to sanitize max_length: Maximum length (default: 200) Returns: Safe text for logging """ if not text: return "" # Truncate if too long if len(text) > max_length: text = text[:max_length] + "... (truncated)" # Remove control characters (except newline, tab) text = ''.join(char for char in text if ord(char) >= 32 or char in '\n\t') # Escape newlines to prevent log injection text = text.replace('\n', '\\n').replace('\r', '\\r') return text def detect_and_mask_api_keys(text: str) -> str: """ Automatically detect and mask potential API keys in text. Patterns detected: - Typical API key formats (alphanumeric + special chars, 20+ chars) - Bearer tokens - Authorization headers Args: text: Text that might contain API keys Returns: Text with API keys masked Warning: This is heuristic-based and may have false positives/negatives. Best practice: Don't let keys get into logs in the first place. """ # Pattern for typical API keys # Looks for: 20+ chars of alphanumeric, dots, dashes, underscores api_key_pattern = r'\b[A-Za-z0-9._-]{20,}\b' def replace_with_mask(match): potential_key = match.group(0) # Only mask if it looks like a real key if validate_api_key(potential_key): return mask_secret(potential_key) return potential_key # Replace potential keys text = re.sub(api_key_pattern, replace_with_mask, text) # Also mask Authorization headers text = re.sub( r'Authorization:\s*Bearer\s+([A-Za-z0-9._-]+)', lambda m: f'Authorization: Bearer {mask_secret(m.group(1))}', text, flags=re.IGNORECASE ) return text def zero_memory(data: str) -> None: """ Attempt to overwrite sensitive data in memory. NOTE: This is best-effort in Python due to string immutability. Python strings cannot be truly zeroed. This is a defense-in-depth measure that may help in some scenarios but is not guaranteed. For truly secure secret handling, consider: - Using memoryview/bytearray for mutable secrets - Storing secrets in kernel memory (OS features) - Hardware security modules (HSM) Args: data: String to attempt to zero Limitations: - Python strings are immutable - GC may have already copied the data - This is NOT cryptographically secure erasure """ try: # This is best-effort only # Python strings are immutable, so we can't truly zero them # But we can try to overwrite the memory location location = id(data) + sys.getsizeof('') size = len(data.encode('utf-8')) ctypes.memset(location, 0, size) except Exception: # Silently fail - this is best-effort pass class SecretStr: """ Wrapper for secrets that prevents accidental logging. Usage: api_key = SecretStr("7fb3ab7b186242288fe93a27227b7149.bJCOEAsUfejvWDPR") print(api_key) # Prints: SecretStr(7fb3...DPRR) print(api_key.get()) # Get actual value when needed This prevents accidentally logging secrets: logger.info(f"Using key: {api_key}") # Safe! Automatically masked """ def __init__(self, secret: str): """ Initialize with secret value. Args: secret: The secret to wrap """ self._secret = secret def get(self) -> str: """ Get the actual secret value. Use this only when you need the real value. Never log the result! Returns: The actual secret """ return self._secret def __str__(self) -> str: """String representation (masked)""" return f"SecretStr({mask_secret(self._secret)})" def __repr__(self) -> str: """Repr (masked)""" return f"SecretStr({mask_secret(self._secret)})" def __del__(self): """Attempt to zero memory on deletion""" zero_memory(self._secret) # Example usage and testing if __name__ == "__main__": # Test masking (using fake example key for testing) api_key = "example-fake-key-for-testing-only-not-real" print(f"Original: {api_key}") print(f"Masked: {mask_secret(api_key)}") # Test in text text = f"Connection failed with key {api_key}" print(f"Sanitized: {mask_secret_in_text(text, api_key)}") # Test SecretStr secret = SecretStr(api_key) print(f"SecretStr: {secret}") # Automatically masked # Test validation print(f"Valid: {validate_api_key(api_key)}") print(f"Invalid: {validate_api_key('short')}") # Test auto-detection log_text = f"ERROR: API request failed with key {api_key}" print(f"Auto-masked: {detect_and_mask_api_keys(log_text)}")