Release v1.8.0: Add transcript-fixer skill
## New Skill: transcript-fixer v1.0.0 Correct speech-to-text (ASR/STT) transcription errors through dictionary-based rules and AI-powered corrections with automatic pattern learning. **Features:** - Two-stage correction pipeline (dictionary + AI) - Automatic pattern detection and learning - Domain-specific dictionaries (general, embodied_ai, finance, medical) - SQLite-based correction repository - Team collaboration with import/export - GLM API integration for AI corrections - Cost optimization through dictionary promotion **Use cases:** - Correcting meeting notes, lecture recordings, or interview transcripts - Fixing Chinese/English homophone errors and technical terminology - Building domain-specific correction dictionaries - Improving transcript accuracy through iterative learning **Documentation:** - Complete workflow guides in references/ - SQL query templates - Troubleshooting guide - Team collaboration patterns - API setup instructions **Marketplace updates:** - Updated marketplace to v1.8.0 - Added transcript-fixer plugin (category: productivity) - Updated README.md with skill description and use cases - Updated CLAUDE.md with skill listing and counts 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
153
transcript-fixer/scripts/examples/bulk_import.py
Normal file
153
transcript-fixer/scripts/examples/bulk_import.py
Normal file
@@ -0,0 +1,153 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Example: Bulk Import Corrections to SQLite Database
|
||||
|
||||
This script demonstrates how to import corrections from various sources
|
||||
into the transcript-fixer SQLite database.
|
||||
|
||||
Usage:
|
||||
uv run scripts/examples/bulk_import.py
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from core import CorrectionRepository, CorrectionService
|
||||
|
||||
|
||||
def import_from_dict():
|
||||
"""Example: Import corrections from Python dictionary"""
|
||||
|
||||
# Initialize service
|
||||
db_path = Path.home() / ".transcript-fixer" / "corrections.db"
|
||||
repository = CorrectionRepository(db_path)
|
||||
service = CorrectionService(repository)
|
||||
|
||||
# Define corrections as dictionary
|
||||
corrections_dict = {
|
||||
"巨升智能": "具身智能",
|
||||
"巨升": "具身",
|
||||
"奇迹创坛": "奇绩创坛",
|
||||
"火星营": "火星营",
|
||||
"矩阵公司": "初创公司",
|
||||
"股价": "框架",
|
||||
"三观": "三关"
|
||||
}
|
||||
|
||||
# Convert to list format for import
|
||||
corrections_list = []
|
||||
for from_text, to_text in corrections_dict.items():
|
||||
corrections_list.append({
|
||||
"from_text": from_text,
|
||||
"to_text": to_text,
|
||||
"domain": "embodied_ai",
|
||||
"source": "imported",
|
||||
"confidence": 1.0
|
||||
})
|
||||
|
||||
# Import
|
||||
inserted, updated, skipped = service.import_corrections(
|
||||
corrections=corrections_list,
|
||||
merge=True
|
||||
)
|
||||
|
||||
print(f"✅ Import complete:")
|
||||
print(f" - Inserted: {inserted}")
|
||||
print(f" - Updated: {updated}")
|
||||
print(f" - Skipped: {skipped}")
|
||||
|
||||
service.close()
|
||||
|
||||
|
||||
def import_from_json_file():
|
||||
"""Example: Import from old JSON format file"""
|
||||
import json
|
||||
|
||||
# Sample JSON structure (v1.0 format)
|
||||
sample_json = {
|
||||
"metadata": {
|
||||
"version": "1.0",
|
||||
"domains": ["embodied_ai"],
|
||||
},
|
||||
"corrections": {
|
||||
"巨升智能": "具身智能",
|
||||
"巨升": "具身",
|
||||
}
|
||||
}
|
||||
|
||||
# Initialize service
|
||||
db_path = Path.home() / ".transcript-fixer" / "corrections.db"
|
||||
repository = CorrectionRepository(db_path)
|
||||
service = CorrectionService(repository)
|
||||
|
||||
# Convert JSON to import format
|
||||
domain = sample_json["metadata"].get("domains", ["general"])[0]
|
||||
corrections_list = []
|
||||
|
||||
for from_text, to_text in sample_json["corrections"].items():
|
||||
corrections_list.append({
|
||||
"from_text": from_text,
|
||||
"to_text": to_text,
|
||||
"domain": domain,
|
||||
"source": "imported",
|
||||
"confidence": 1.0
|
||||
})
|
||||
|
||||
# Import
|
||||
inserted, updated, skipped = service.import_corrections(
|
||||
corrections=corrections_list,
|
||||
merge=True
|
||||
)
|
||||
|
||||
print(f"✅ JSON import complete:")
|
||||
print(f" - Inserted: {inserted}")
|
||||
print(f" - Updated: {updated}")
|
||||
print(f" - Skipped: {skipped}")
|
||||
|
||||
service.close()
|
||||
|
||||
|
||||
def add_context_rules():
|
||||
"""Example: Add context-aware regex rules directly"""
|
||||
|
||||
db_path = Path.home() / ".transcript-fixer" / "corrections.db"
|
||||
repository = CorrectionRepository(db_path)
|
||||
|
||||
# Add context rules via SQL
|
||||
with repository._transaction() as conn:
|
||||
rules = [
|
||||
("巨升方向", "具身方向", "巨升→具身", 10),
|
||||
("巨升现在", "具身现在", "巨升→具身", 10),
|
||||
("近距离的去看", "近距离地去看", "的→地 副词修饰", 5),
|
||||
("近距离搏杀", "近距离搏杀", "这里的'近距离'是正确的", 5),
|
||||
]
|
||||
|
||||
for pattern, replacement, description, priority in rules:
|
||||
conn.execute("""
|
||||
INSERT OR IGNORE INTO context_rules
|
||||
(pattern, replacement, description, priority)
|
||||
VALUES (?, ?, ?, ?)
|
||||
""", (pattern, replacement, description, priority))
|
||||
|
||||
print("✅ Context rules added successfully")
|
||||
repository.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Transcript-Fixer Bulk Import Examples\n")
|
||||
print("=" * 60)
|
||||
|
||||
# Example 1: Import from dictionary
|
||||
print("\n1. Importing from Python dictionary...")
|
||||
import_from_dict()
|
||||
|
||||
# Example 2: Import from JSON file
|
||||
print("\n2. Importing from JSON format...")
|
||||
import_from_json_file()
|
||||
|
||||
# Example 3: Add context rules
|
||||
print("\n3. Adding context rules...")
|
||||
add_context_rules()
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("✅ All examples completed!")
|
||||
print("\nVerify with:")
|
||||
print(" sqlite3 ~/.transcript-fixer/corrections.db 'SELECT COUNT(*) FROM active_corrections;'")
|
||||
Reference in New Issue
Block a user