Release v1.8.0: Add transcript-fixer skill
## New Skill: transcript-fixer v1.0.0 Correct speech-to-text (ASR/STT) transcription errors through dictionary-based rules and AI-powered corrections with automatic pattern learning. **Features:** - Two-stage correction pipeline (dictionary + AI) - Automatic pattern detection and learning - Domain-specific dictionaries (general, embodied_ai, finance, medical) - SQLite-based correction repository - Team collaboration with import/export - GLM API integration for AI corrections - Cost optimization through dictionary promotion **Use cases:** - Correcting meeting notes, lecture recordings, or interview transcripts - Fixing Chinese/English homophone errors and technical terminology - Building domain-specific correction dictionaries - Improving transcript accuracy through iterative learning **Documentation:** - Complete workflow guides in references/ - SQL query templates - Troubleshooting guide - Team collaboration patterns - API setup instructions **Marketplace updates:** - Updated marketplace to v1.8.0 - Added transcript-fixer plugin (category: productivity) - Updated README.md with skill description and use cases - Updated CLAUDE.md with skill listing and counts 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
141
transcript-fixer/scripts/utils/validation.py
Normal file
141
transcript-fixer/scripts/utils/validation.py
Normal file
@@ -0,0 +1,141 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Validation Utility - Configuration Health Checker
|
||||
|
||||
SINGLE RESPONSIBILITY: Validate transcript-fixer configuration and JSON files
|
||||
|
||||
Features:
|
||||
- Check directory structure
|
||||
- Validate JSON syntax in all config files
|
||||
- Check environment variables
|
||||
- Report statistics and health status
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Handle imports for both standalone and package usage
|
||||
try:
|
||||
from core import CorrectionRepository, CorrectionService
|
||||
except ImportError:
|
||||
# Fallback for when run from scripts directory directly
|
||||
import sys
|
||||
from pathlib import Path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
from core import CorrectionRepository, CorrectionService
|
||||
|
||||
|
||||
def validate_configuration() -> tuple[list[str], list[str]]:
|
||||
"""
|
||||
Validate transcript-fixer configuration.
|
||||
|
||||
Returns:
|
||||
Tuple of (errors, warnings) as string lists
|
||||
"""
|
||||
config_dir = Path.home() / ".transcript-fixer"
|
||||
db_path = config_dir / "corrections.db"
|
||||
|
||||
errors = []
|
||||
warnings = []
|
||||
|
||||
print("🔍 Validating transcript-fixer configuration...\n")
|
||||
|
||||
# Check directory exists
|
||||
if not config_dir.exists():
|
||||
errors.append(f"Configuration directory not found: {config_dir}")
|
||||
print(f"❌ {errors[-1]}")
|
||||
print("\n💡 Run: python fix_transcription.py --init")
|
||||
return errors, warnings
|
||||
|
||||
print(f"✅ Configuration directory exists: {config_dir}")
|
||||
|
||||
# Validate SQLite database
|
||||
if db_path.exists():
|
||||
try:
|
||||
repository = CorrectionRepository(db_path)
|
||||
service = CorrectionService(repository)
|
||||
|
||||
# Query basic stats
|
||||
stats = service.get_statistics()
|
||||
print(f"✅ Database valid: {stats['total_corrections']} corrections")
|
||||
|
||||
# Check tables exist
|
||||
conn = repository._get_connection()
|
||||
cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
||||
tables = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
expected_tables = [
|
||||
'corrections', 'context_rules', 'correction_history',
|
||||
'correction_changes', 'learned_suggestions', 'suggestion_examples',
|
||||
'system_config', 'audit_log'
|
||||
]
|
||||
|
||||
missing_tables = [t for t in expected_tables if t not in tables]
|
||||
if missing_tables:
|
||||
errors.append(f"Database missing tables: {missing_tables}")
|
||||
print(f"❌ {errors[-1]}")
|
||||
else:
|
||||
print(f"✅ All {len(expected_tables)} tables present")
|
||||
|
||||
service.close()
|
||||
|
||||
except Exception as e:
|
||||
errors.append(f"Database validation failed: {e}")
|
||||
print(f"❌ {errors[-1]}")
|
||||
else:
|
||||
warnings.append("Database not found (will be created on first use)")
|
||||
print(f"⚠️ Database not found: {db_path}")
|
||||
|
||||
# Check API key
|
||||
api_key = os.getenv("GLM_API_KEY")
|
||||
if not api_key:
|
||||
warnings.append("GLM_API_KEY environment variable not set")
|
||||
print("⚠️ GLM_API_KEY not set (required for Stage 2 AI corrections)")
|
||||
else:
|
||||
print("✅ GLM_API_KEY is set")
|
||||
|
||||
return errors, warnings
|
||||
|
||||
|
||||
def print_validation_summary(errors: list[str], warnings: list[str]) -> int:
|
||||
"""
|
||||
Print validation summary and return exit code.
|
||||
|
||||
Returns:
|
||||
0 if valid, 1 if errors found
|
||||
"""
|
||||
print("\n" + "=" * 60)
|
||||
|
||||
if errors:
|
||||
print(f"❌ {len(errors)} error(s) found:")
|
||||
for err in errors:
|
||||
print(f" - {err}")
|
||||
print("\n💡 Fix errors and run --validate again")
|
||||
print("=" * 60)
|
||||
return 1
|
||||
elif warnings:
|
||||
print(f"⚠️ {len(warnings)} warning(s):")
|
||||
for warn in warnings:
|
||||
print(f" - {warn}")
|
||||
print("\n✅ Configuration is valid (with warnings)")
|
||||
print("=" * 60)
|
||||
return 0
|
||||
else:
|
||||
print("✅ All checks passed! Configuration is valid.")
|
||||
print("=" * 60)
|
||||
return 0
|
||||
|
||||
|
||||
def main():
|
||||
"""Run validation as standalone script"""
|
||||
errors, warnings = validate_configuration()
|
||||
exit_code = print_validation_summary(errors, warnings)
|
||||
sys.exit(exit_code)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user