-- Transcript Fixer Database Schema v2.0 -- Migration from JSON to SQLite for ACID compliance and scalability -- Author: ISTJ Chief Engineer -- Date: 2025-01-28 -- Enable foreign keys PRAGMA foreign_keys = ON; -- Table: corrections -- Stores all correction mappings with metadata CREATE TABLE IF NOT EXISTS corrections ( id INTEGER PRIMARY KEY AUTOINCREMENT, from_text TEXT NOT NULL, to_text TEXT NOT NULL, domain TEXT NOT NULL DEFAULT 'general', source TEXT NOT NULL CHECK(source IN ('manual', 'learned', 'imported')), confidence REAL NOT NULL DEFAULT 1.0 CHECK(confidence >= 0.0 AND confidence <= 1.0), added_by TEXT, added_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, usage_count INTEGER NOT NULL DEFAULT 0 CHECK(usage_count >= 0), last_used TIMESTAMP, notes TEXT, is_active BOOLEAN NOT NULL DEFAULT 1, UNIQUE(from_text, domain) ); CREATE INDEX IF NOT EXISTS idx_corrections_domain ON corrections(domain); CREATE INDEX IF NOT EXISTS idx_corrections_source ON corrections(source); CREATE INDEX IF NOT EXISTS idx_corrections_added_at ON corrections(added_at); CREATE INDEX IF NOT EXISTS idx_corrections_is_active ON corrections(is_active); CREATE INDEX IF NOT EXISTS idx_corrections_from_text ON corrections(from_text); -- Table: context_rules -- Regex-based context-aware correction rules CREATE TABLE IF NOT EXISTS context_rules ( id INTEGER PRIMARY KEY AUTOINCREMENT, pattern TEXT NOT NULL UNIQUE, replacement TEXT NOT NULL, description TEXT, priority INTEGER NOT NULL DEFAULT 0, is_active BOOLEAN NOT NULL DEFAULT 1, added_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, added_by TEXT ); CREATE INDEX IF NOT EXISTS idx_context_rules_priority ON context_rules(priority DESC); CREATE INDEX IF NOT EXISTS idx_context_rules_is_active ON context_rules(is_active); -- Table: correction_history -- Audit log for all correction runs CREATE TABLE IF NOT EXISTS correction_history ( id INTEGER PRIMARY KEY AUTOINCREMENT, filename TEXT NOT NULL, domain TEXT NOT NULL, run_timestamp TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, original_length INTEGER NOT NULL CHECK(original_length >= 0), stage1_changes INTEGER NOT NULL DEFAULT 0 CHECK(stage1_changes >= 0), stage2_changes INTEGER NOT NULL DEFAULT 0 CHECK(stage2_changes >= 0), model TEXT, execution_time_ms INTEGER CHECK(execution_time_ms >= 0), success BOOLEAN NOT NULL DEFAULT 1, error_message TEXT ); CREATE INDEX IF NOT EXISTS idx_history_run_timestamp ON correction_history(run_timestamp DESC); CREATE INDEX IF NOT EXISTS idx_history_domain ON correction_history(domain); CREATE INDEX IF NOT EXISTS idx_history_success ON correction_history(success); -- Table: correction_changes -- Detailed changes made in each correction run CREATE TABLE IF NOT EXISTS correction_changes ( id INTEGER PRIMARY KEY AUTOINCREMENT, history_id INTEGER NOT NULL, line_number INTEGER, from_text TEXT NOT NULL, to_text TEXT NOT NULL, rule_type TEXT NOT NULL CHECK(rule_type IN ('context', 'dictionary', 'ai')), rule_id INTEGER, context_before TEXT, context_after TEXT, FOREIGN KEY (history_id) REFERENCES correction_history(id) ON DELETE CASCADE ); CREATE INDEX IF NOT EXISTS idx_changes_history_id ON correction_changes(history_id); CREATE INDEX IF NOT EXISTS idx_changes_rule_type ON correction_changes(rule_type); -- Table: learned_suggestions -- AI-learned patterns pending user review CREATE TABLE IF NOT EXISTS learned_suggestions ( id INTEGER PRIMARY KEY AUTOINCREMENT, from_text TEXT NOT NULL, to_text TEXT NOT NULL, domain TEXT NOT NULL DEFAULT 'general', frequency INTEGER NOT NULL DEFAULT 1 CHECK(frequency > 0), confidence REAL NOT NULL CHECK(confidence >= 0.0 AND confidence <= 1.0), first_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, last_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, status TEXT NOT NULL DEFAULT 'pending' CHECK(status IN ('pending', 'approved', 'rejected')), reviewed_at TIMESTAMP, reviewed_by TEXT, UNIQUE(from_text, to_text, domain) ); CREATE INDEX IF NOT EXISTS idx_suggestions_status ON learned_suggestions(status); CREATE INDEX IF NOT EXISTS idx_suggestions_domain ON learned_suggestions(domain); CREATE INDEX IF NOT EXISTS idx_suggestions_confidence ON learned_suggestions(confidence DESC); CREATE INDEX IF NOT EXISTS idx_suggestions_frequency ON learned_suggestions(frequency DESC); -- Table: suggestion_examples -- Example occurrences of learned patterns CREATE TABLE IF NOT EXISTS suggestion_examples ( id INTEGER PRIMARY KEY AUTOINCREMENT, suggestion_id INTEGER NOT NULL, filename TEXT NOT NULL, line_number INTEGER, context TEXT NOT NULL, occurred_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (suggestion_id) REFERENCES learned_suggestions(id) ON DELETE CASCADE ); CREATE INDEX IF NOT EXISTS idx_examples_suggestion_id ON suggestion_examples(suggestion_id); -- Table: system_config -- System configuration and preferences CREATE TABLE IF NOT EXISTS system_config ( key TEXT PRIMARY KEY, value TEXT NOT NULL, value_type TEXT NOT NULL CHECK(value_type IN ('string', 'int', 'float', 'boolean', 'json')), description TEXT, updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); -- Insert default configuration INSERT OR IGNORE INTO system_config (key, value, value_type, description) VALUES ('schema_version', '2.0', 'string', 'Database schema version'), ('api_provider', 'GLM', 'string', 'API provider name'), ('api_model', 'GLM-4.6', 'string', 'Default AI model'), ('api_base_url', 'https://open.bigmodel.cn/api/anthropic', 'string', 'API endpoint URL'), ('default_domain', 'general', 'string', 'Default correction domain'), ('auto_learn_enabled', 'true', 'boolean', 'Enable automatic pattern learning'), ('backup_enabled', 'true', 'boolean', 'Create backups before operations'), ('learning_frequency_threshold', '3', 'int', 'Min frequency for learned suggestions'), ('learning_confidence_threshold', '0.8', 'float', 'Min confidence for learned suggestions'), ('history_retention_days', '90', 'int', 'Days to retain correction history'), ('max_correction_length', '1000', 'int', 'Maximum length for correction text'); -- Table: audit_log -- Comprehensive audit trail for all operations CREATE TABLE IF NOT EXISTS audit_log ( id INTEGER PRIMARY KEY AUTOINCREMENT, timestamp TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, action TEXT NOT NULL, entity_type TEXT NOT NULL, entity_id INTEGER, user TEXT, details TEXT, success BOOLEAN NOT NULL DEFAULT 1, error_message TEXT ); CREATE INDEX IF NOT EXISTS idx_audit_timestamp ON audit_log(timestamp DESC); CREATE INDEX IF NOT EXISTS idx_audit_action ON audit_log(action); CREATE INDEX IF NOT EXISTS idx_audit_entity_type ON audit_log(entity_type); CREATE INDEX IF NOT EXISTS idx_audit_success ON audit_log(success); -- View: active_corrections -- Quick access to active corrections CREATE VIEW IF NOT EXISTS active_corrections AS SELECT id, from_text, to_text, domain, source, confidence, usage_count, last_used, added_at FROM corrections WHERE is_active = 1 ORDER BY domain, from_text; -- View: pending_suggestions -- Quick access to suggestions pending review CREATE VIEW IF NOT EXISTS pending_suggestions AS SELECT s.id, s.from_text, s.to_text, s.domain, s.frequency, s.confidence, s.first_seen, s.last_seen, COUNT(e.id) as example_count FROM learned_suggestions s LEFT JOIN suggestion_examples e ON s.id = e.suggestion_id WHERE s.status = 'pending' GROUP BY s.id ORDER BY s.confidence DESC, s.frequency DESC; -- View: correction_statistics -- Statistics per domain CREATE VIEW IF NOT EXISTS correction_statistics AS SELECT domain, COUNT(*) as total_corrections, COUNT(CASE WHEN source = 'manual' THEN 1 END) as manual_count, COUNT(CASE WHEN source = 'learned' THEN 1 END) as learned_count, COUNT(CASE WHEN source = 'imported' THEN 1 END) as imported_count, SUM(usage_count) as total_usage, MAX(added_at) as last_updated FROM corrections WHERE is_active = 1 GROUP BY domain;