## New Skill: transcript-fixer v1.0.0 Correct speech-to-text (ASR/STT) transcription errors through dictionary-based rules and AI-powered corrections with automatic pattern learning. **Features:** - Two-stage correction pipeline (dictionary + AI) - Automatic pattern detection and learning - Domain-specific dictionaries (general, embodied_ai, finance, medical) - SQLite-based correction repository - Team collaboration with import/export - GLM API integration for AI corrections - Cost optimization through dictionary promotion **Use cases:** - Correcting meeting notes, lecture recordings, or interview transcripts - Fixing Chinese/English homophone errors and technical terminology - Building domain-specific correction dictionaries - Improving transcript accuracy through iterative learning **Documentation:** - Complete workflow guides in references/ - SQL query templates - Troubleshooting guide - Team collaboration patterns - API setup instructions **Marketplace updates:** - Updated marketplace to v1.8.0 - Added transcript-fixer plugin (category: productivity) - Updated README.md with skill description and use cases - Updated CLAUDE.md with skill listing and counts 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
154 lines
4.4 KiB
Python
154 lines
4.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Example: Bulk Import Corrections to SQLite Database
|
|
|
|
This script demonstrates how to import corrections from various sources
|
|
into the transcript-fixer SQLite database.
|
|
|
|
Usage:
|
|
uv run scripts/examples/bulk_import.py
|
|
"""
|
|
|
|
from pathlib import Path
|
|
from core import CorrectionRepository, CorrectionService
|
|
|
|
|
|
def import_from_dict():
|
|
"""Example: Import corrections from Python dictionary"""
|
|
|
|
# Initialize service
|
|
db_path = Path.home() / ".transcript-fixer" / "corrections.db"
|
|
repository = CorrectionRepository(db_path)
|
|
service = CorrectionService(repository)
|
|
|
|
# Define corrections as dictionary
|
|
corrections_dict = {
|
|
"巨升智能": "具身智能",
|
|
"巨升": "具身",
|
|
"奇迹创坛": "奇绩创坛",
|
|
"火星营": "火星营",
|
|
"矩阵公司": "初创公司",
|
|
"股价": "框架",
|
|
"三观": "三关"
|
|
}
|
|
|
|
# Convert to list format for import
|
|
corrections_list = []
|
|
for from_text, to_text in corrections_dict.items():
|
|
corrections_list.append({
|
|
"from_text": from_text,
|
|
"to_text": to_text,
|
|
"domain": "embodied_ai",
|
|
"source": "imported",
|
|
"confidence": 1.0
|
|
})
|
|
|
|
# Import
|
|
inserted, updated, skipped = service.import_corrections(
|
|
corrections=corrections_list,
|
|
merge=True
|
|
)
|
|
|
|
print(f"✅ Import complete:")
|
|
print(f" - Inserted: {inserted}")
|
|
print(f" - Updated: {updated}")
|
|
print(f" - Skipped: {skipped}")
|
|
|
|
service.close()
|
|
|
|
|
|
def import_from_json_file():
|
|
"""Example: Import from old JSON format file"""
|
|
import json
|
|
|
|
# Sample JSON structure (v1.0 format)
|
|
sample_json = {
|
|
"metadata": {
|
|
"version": "1.0",
|
|
"domains": ["embodied_ai"],
|
|
},
|
|
"corrections": {
|
|
"巨升智能": "具身智能",
|
|
"巨升": "具身",
|
|
}
|
|
}
|
|
|
|
# Initialize service
|
|
db_path = Path.home() / ".transcript-fixer" / "corrections.db"
|
|
repository = CorrectionRepository(db_path)
|
|
service = CorrectionService(repository)
|
|
|
|
# Convert JSON to import format
|
|
domain = sample_json["metadata"].get("domains", ["general"])[0]
|
|
corrections_list = []
|
|
|
|
for from_text, to_text in sample_json["corrections"].items():
|
|
corrections_list.append({
|
|
"from_text": from_text,
|
|
"to_text": to_text,
|
|
"domain": domain,
|
|
"source": "imported",
|
|
"confidence": 1.0
|
|
})
|
|
|
|
# Import
|
|
inserted, updated, skipped = service.import_corrections(
|
|
corrections=corrections_list,
|
|
merge=True
|
|
)
|
|
|
|
print(f"✅ JSON import complete:")
|
|
print(f" - Inserted: {inserted}")
|
|
print(f" - Updated: {updated}")
|
|
print(f" - Skipped: {skipped}")
|
|
|
|
service.close()
|
|
|
|
|
|
def add_context_rules():
|
|
"""Example: Add context-aware regex rules directly"""
|
|
|
|
db_path = Path.home() / ".transcript-fixer" / "corrections.db"
|
|
repository = CorrectionRepository(db_path)
|
|
|
|
# Add context rules via SQL
|
|
with repository._transaction() as conn:
|
|
rules = [
|
|
("巨升方向", "具身方向", "巨升→具身", 10),
|
|
("巨升现在", "具身现在", "巨升→具身", 10),
|
|
("近距离的去看", "近距离地去看", "的→地 副词修饰", 5),
|
|
("近距离搏杀", "近距离搏杀", "这里的'近距离'是正确的", 5),
|
|
]
|
|
|
|
for pattern, replacement, description, priority in rules:
|
|
conn.execute("""
|
|
INSERT OR IGNORE INTO context_rules
|
|
(pattern, replacement, description, priority)
|
|
VALUES (?, ?, ?, ?)
|
|
""", (pattern, replacement, description, priority))
|
|
|
|
print("✅ Context rules added successfully")
|
|
repository.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("Transcript-Fixer Bulk Import Examples\n")
|
|
print("=" * 60)
|
|
|
|
# Example 1: Import from dictionary
|
|
print("\n1. Importing from Python dictionary...")
|
|
import_from_dict()
|
|
|
|
# Example 2: Import from JSON file
|
|
print("\n2. Importing from JSON format...")
|
|
import_from_json_file()
|
|
|
|
# Example 3: Add context rules
|
|
print("\n3. Adding context rules...")
|
|
add_context_rules()
|
|
|
|
print("\n" + "=" * 60)
|
|
print("✅ All examples completed!")
|
|
print("\nVerify with:")
|
|
print(" sqlite3 ~/.transcript-fixer/corrections.db 'SELECT COUNT(*) FROM active_corrections;'")
|