Release v1.8.0: Add transcript-fixer skill
## New Skill: transcript-fixer v1.0.0 Correct speech-to-text (ASR/STT) transcription errors through dictionary-based rules and AI-powered corrections with automatic pattern learning. **Features:** - Two-stage correction pipeline (dictionary + AI) - Automatic pattern detection and learning - Domain-specific dictionaries (general, embodied_ai, finance, medical) - SQLite-based correction repository - Team collaboration with import/export - GLM API integration for AI corrections - Cost optimization through dictionary promotion **Use cases:** - Correcting meeting notes, lecture recordings, or interview transcripts - Fixing Chinese/English homophone errors and technical terminology - Building domain-specific correction dictionaries - Improving transcript accuracy through iterative learning **Documentation:** - Complete workflow guides in references/ - SQL query templates - Troubleshooting guide - Team collaboration patterns - API setup instructions **Marketplace updates:** - Updated marketplace to v1.8.0 - Added transcript-fixer plugin (category: productivity) - Updated README.md with skill description and use cases - Updated CLAUDE.md with skill listing and counts 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
140
transcript-fixer/scripts/core/dictionary_processor.py
Normal file
140
transcript-fixer/scripts/core/dictionary_processor.py
Normal file
@@ -0,0 +1,140 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Dictionary Processor - Stage 1: Dictionary-based Text Corrections
|
||||
|
||||
SINGLE RESPONSIBILITY: Apply dictionary and regex-based corrections to text
|
||||
|
||||
Features:
|
||||
- Apply simple dictionary replacements
|
||||
- Apply context-aware regex rules
|
||||
- Track all changes for history
|
||||
- Case-sensitive and insensitive matching
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Dict, List, Tuple
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class Change:
|
||||
"""Represents a single text change"""
|
||||
line_number: int
|
||||
from_text: str
|
||||
to_text: str
|
||||
rule_type: str # "dictionary" or "context_rule"
|
||||
rule_name: str
|
||||
|
||||
|
||||
class DictionaryProcessor:
|
||||
"""
|
||||
Stage 1 Processor: Apply dictionary-based corrections
|
||||
|
||||
Process:
|
||||
1. Apply context-aware regex rules first (more specific)
|
||||
2. Apply simple dictionary replacements (more general)
|
||||
3. Track all changes for learning
|
||||
"""
|
||||
|
||||
def __init__(self, corrections: Dict[str, str], context_rules: List[Dict]):
|
||||
"""
|
||||
Initialize processor with corrections and rules
|
||||
|
||||
Args:
|
||||
corrections: Dictionary of {wrong: correct} pairs
|
||||
context_rules: List of context-aware regex rules
|
||||
"""
|
||||
self.corrections = corrections
|
||||
self.context_rules = context_rules
|
||||
|
||||
def process(self, text: str) -> Tuple[str, List[Change]]:
|
||||
"""
|
||||
Apply all corrections to text
|
||||
|
||||
Returns:
|
||||
(corrected_text, list_of_changes)
|
||||
"""
|
||||
corrected_text = text
|
||||
all_changes = []
|
||||
|
||||
# Step 1: Apply context rules (more specific, higher priority)
|
||||
corrected_text, context_changes = self._apply_context_rules(corrected_text)
|
||||
all_changes.extend(context_changes)
|
||||
|
||||
# Step 2: Apply dictionary replacements (more general)
|
||||
corrected_text, dict_changes = self._apply_dictionary(corrected_text)
|
||||
all_changes.extend(dict_changes)
|
||||
|
||||
return corrected_text, all_changes
|
||||
|
||||
def _apply_context_rules(self, text: str) -> Tuple[str, List[Change]]:
|
||||
"""Apply context-aware regex rules"""
|
||||
changes = []
|
||||
corrected = text
|
||||
|
||||
for rule in self.context_rules:
|
||||
pattern = rule["pattern"]
|
||||
replacement = rule["replacement"]
|
||||
description = rule.get("description", "")
|
||||
|
||||
# Find all matches with their positions
|
||||
for match in re.finditer(pattern, corrected):
|
||||
line_num = corrected[:match.start()].count('\n') + 1
|
||||
changes.append(Change(
|
||||
line_number=line_num,
|
||||
from_text=match.group(0),
|
||||
to_text=replacement,
|
||||
rule_type="context_rule",
|
||||
rule_name=description or pattern
|
||||
))
|
||||
|
||||
# Apply replacement
|
||||
corrected = re.sub(pattern, replacement, corrected)
|
||||
|
||||
return corrected, changes
|
||||
|
||||
def _apply_dictionary(self, text: str) -> Tuple[str, List[Change]]:
|
||||
"""Apply simple dictionary replacements"""
|
||||
changes = []
|
||||
corrected = text
|
||||
|
||||
for wrong, correct in self.corrections.items():
|
||||
if wrong not in corrected:
|
||||
continue
|
||||
|
||||
# Find all occurrences
|
||||
occurrences = []
|
||||
start = 0
|
||||
while True:
|
||||
pos = corrected.find(wrong, start)
|
||||
if pos == -1:
|
||||
break
|
||||
line_num = corrected[:pos].count('\n') + 1
|
||||
occurrences.append(line_num)
|
||||
start = pos + len(wrong)
|
||||
|
||||
# Track changes
|
||||
for line_num in occurrences:
|
||||
changes.append(Change(
|
||||
line_number=line_num,
|
||||
from_text=wrong,
|
||||
to_text=correct,
|
||||
rule_type="dictionary",
|
||||
rule_name="corrections_dict"
|
||||
))
|
||||
|
||||
# Apply replacement
|
||||
corrected = corrected.replace(wrong, correct)
|
||||
|
||||
return corrected, changes
|
||||
|
||||
def get_summary(self, changes: List[Change]) -> Dict[str, int]:
|
||||
"""Generate summary statistics"""
|
||||
summary = {
|
||||
"total_changes": len(changes),
|
||||
"dictionary_changes": sum(1 for c in changes if c.rule_type == "dictionary"),
|
||||
"context_rule_changes": sum(1 for c in changes if c.rule_type == "context_rule")
|
||||
}
|
||||
return summary
|
||||
Reference in New Issue
Block a user