Files
claude-code-skills-reference/transcript-fixer/scripts/examples/bulk_import.py
daymade bd0aa12004 Release v1.8.0: Add transcript-fixer skill
## New Skill: transcript-fixer v1.0.0

Correct speech-to-text (ASR/STT) transcription errors through dictionary-based rules and AI-powered corrections with automatic pattern learning.

**Features:**
- Two-stage correction pipeline (dictionary + AI)
- Automatic pattern detection and learning
- Domain-specific dictionaries (general, embodied_ai, finance, medical)
- SQLite-based correction repository
- Team collaboration with import/export
- GLM API integration for AI corrections
- Cost optimization through dictionary promotion

**Use cases:**
- Correcting meeting notes, lecture recordings, or interview transcripts
- Fixing Chinese/English homophone errors and technical terminology
- Building domain-specific correction dictionaries
- Improving transcript accuracy through iterative learning

**Documentation:**
- Complete workflow guides in references/
- SQL query templates
- Troubleshooting guide
- Team collaboration patterns
- API setup instructions

**Marketplace updates:**
- Updated marketplace to v1.8.0
- Added transcript-fixer plugin (category: productivity)
- Updated README.md with skill description and use cases
- Updated CLAUDE.md with skill listing and counts

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-28 13:16:37 +08:00

154 lines
4.4 KiB
Python

#!/usr/bin/env python3
"""
Example: Bulk Import Corrections to SQLite Database
This script demonstrates how to import corrections from various sources
into the transcript-fixer SQLite database.
Usage:
uv run scripts/examples/bulk_import.py
"""
from pathlib import Path
from core import CorrectionRepository, CorrectionService
def import_from_dict():
"""Example: Import corrections from Python dictionary"""
# Initialize service
db_path = Path.home() / ".transcript-fixer" / "corrections.db"
repository = CorrectionRepository(db_path)
service = CorrectionService(repository)
# Define corrections as dictionary
corrections_dict = {
"巨升智能": "具身智能",
"巨升": "具身",
"奇迹创坛": "奇绩创坛",
"火星营": "火星营",
"矩阵公司": "初创公司",
"股价": "框架",
"三观": "三关"
}
# Convert to list format for import
corrections_list = []
for from_text, to_text in corrections_dict.items():
corrections_list.append({
"from_text": from_text,
"to_text": to_text,
"domain": "embodied_ai",
"source": "imported",
"confidence": 1.0
})
# Import
inserted, updated, skipped = service.import_corrections(
corrections=corrections_list,
merge=True
)
print(f"✅ Import complete:")
print(f" - Inserted: {inserted}")
print(f" - Updated: {updated}")
print(f" - Skipped: {skipped}")
service.close()
def import_from_json_file():
"""Example: Import from old JSON format file"""
import json
# Sample JSON structure (v1.0 format)
sample_json = {
"metadata": {
"version": "1.0",
"domains": ["embodied_ai"],
},
"corrections": {
"巨升智能": "具身智能",
"巨升": "具身",
}
}
# Initialize service
db_path = Path.home() / ".transcript-fixer" / "corrections.db"
repository = CorrectionRepository(db_path)
service = CorrectionService(repository)
# Convert JSON to import format
domain = sample_json["metadata"].get("domains", ["general"])[0]
corrections_list = []
for from_text, to_text in sample_json["corrections"].items():
corrections_list.append({
"from_text": from_text,
"to_text": to_text,
"domain": domain,
"source": "imported",
"confidence": 1.0
})
# Import
inserted, updated, skipped = service.import_corrections(
corrections=corrections_list,
merge=True
)
print(f"✅ JSON import complete:")
print(f" - Inserted: {inserted}")
print(f" - Updated: {updated}")
print(f" - Skipped: {skipped}")
service.close()
def add_context_rules():
"""Example: Add context-aware regex rules directly"""
db_path = Path.home() / ".transcript-fixer" / "corrections.db"
repository = CorrectionRepository(db_path)
# Add context rules via SQL
with repository._transaction() as conn:
rules = [
("巨升方向", "具身方向", "巨升→具身", 10),
("巨升现在", "具身现在", "巨升→具身", 10),
("近距离的去看", "近距离地去看", "的→地 副词修饰", 5),
("近距离搏杀", "近距离搏杀", "这里的'近距离'是正确的", 5),
]
for pattern, replacement, description, priority in rules:
conn.execute("""
INSERT OR IGNORE INTO context_rules
(pattern, replacement, description, priority)
VALUES (?, ?, ?, ?)
""", (pattern, replacement, description, priority))
print("✅ Context rules added successfully")
repository.close()
if __name__ == "__main__":
print("Transcript-Fixer Bulk Import Examples\n")
print("=" * 60)
# Example 1: Import from dictionary
print("\n1. Importing from Python dictionary...")
import_from_dict()
# Example 2: Import from JSON file
print("\n2. Importing from JSON format...")
import_from_json_file()
# Example 3: Add context rules
print("\n3. Adding context rules...")
add_context_rules()
print("\n" + "=" * 60)
print("✅ All examples completed!")
print("\nVerify with:")
print(" sqlite3 ~/.transcript-fixer/corrections.db 'SELECT COUNT(*) FROM active_corrections;'")