Release v1.8.0: Add transcript-fixer skill
## New Skill: transcript-fixer v1.0.0 Correct speech-to-text (ASR/STT) transcription errors through dictionary-based rules and AI-powered corrections with automatic pattern learning. **Features:** - Two-stage correction pipeline (dictionary + AI) - Automatic pattern detection and learning - Domain-specific dictionaries (general, embodied_ai, finance, medical) - SQLite-based correction repository - Team collaboration with import/export - GLM API integration for AI corrections - Cost optimization through dictionary promotion **Use cases:** - Correcting meeting notes, lecture recordings, or interview transcripts - Fixing Chinese/English homophone errors and technical terminology - Building domain-specific correction dictionaries - Improving transcript accuracy through iterative learning **Documentation:** - Complete workflow guides in references/ - SQL query templates - Troubleshooting guide - Team collaboration patterns - API setup instructions **Marketplace updates:** - Updated marketplace to v1.8.0 - Added transcript-fixer plugin (category: productivity) - Updated README.md with skill description and use cases - Updated CLAUDE.md with skill listing and counts 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
102
transcript-fixer/scripts/utils/diff_formats/change_extractor.py
Normal file
102
transcript-fixer/scripts/utils/diff_formats/change_extractor.py
Normal file
@@ -0,0 +1,102 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Change extraction and summarization
|
||||
|
||||
SINGLE RESPONSIBILITY: Extract and summarize changes between text versions
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import difflib
|
||||
|
||||
from .text_splitter import split_into_words
|
||||
|
||||
|
||||
def extract_changes(original: str, fixed: str) -> list[dict]:
|
||||
"""
|
||||
Extract all changes and return change list
|
||||
|
||||
Args:
|
||||
original: Original text
|
||||
fixed: Fixed text
|
||||
|
||||
Returns:
|
||||
List of change dictionaries with type, context, and content
|
||||
"""
|
||||
original_words = split_into_words(original)
|
||||
fixed_words = split_into_words(fixed)
|
||||
|
||||
diff = difflib.SequenceMatcher(None, original_words, fixed_words)
|
||||
changes = []
|
||||
|
||||
for tag, i1, i2, j1, j2 in diff.get_opcodes():
|
||||
if tag == 'replace':
|
||||
original_text = ''.join(original_words[i1:i2])
|
||||
fixed_text = ''.join(fixed_words[j1:j2])
|
||||
changes.append({
|
||||
'type': 'replace',
|
||||
'original': original_text,
|
||||
'fixed': fixed_text,
|
||||
'context_before': ''.join(original_words[max(0, i1-5):i1]),
|
||||
'context_after': ''.join(original_words[i2:min(len(original_words), i2+5)])
|
||||
})
|
||||
elif tag == 'delete':
|
||||
original_text = ''.join(original_words[i1:i2])
|
||||
changes.append({
|
||||
'type': 'delete',
|
||||
'original': original_text,
|
||||
'fixed': '',
|
||||
'context_before': ''.join(original_words[max(0, i1-5):i1]),
|
||||
'context_after': ''.join(original_words[i2:min(len(original_words), i2+5)])
|
||||
})
|
||||
elif tag == 'insert':
|
||||
fixed_text = ''.join(fixed_words[j1:j2])
|
||||
changes.append({
|
||||
'type': 'insert',
|
||||
'original': '',
|
||||
'fixed': fixed_text,
|
||||
'context_before': ''.join(fixed_words[max(0, j1-5):j1]) if j1 > 0 else '',
|
||||
'context_after': ''.join(fixed_words[j2:min(len(fixed_words), j2+5)])
|
||||
})
|
||||
|
||||
return changes
|
||||
|
||||
|
||||
def generate_change_summary(changes: list[dict]) -> str:
|
||||
"""
|
||||
Generate change summary
|
||||
|
||||
Args:
|
||||
changes: List of change dictionaries
|
||||
|
||||
Returns:
|
||||
Formatted summary string
|
||||
"""
|
||||
result = []
|
||||
result.append("=" * 80)
|
||||
result.append(f"修改摘要 (共 {len(changes)} 处修改)")
|
||||
result.append("=" * 80)
|
||||
result.append("")
|
||||
|
||||
for i, change in enumerate(changes, 1):
|
||||
change_type = {
|
||||
'replace': '替换',
|
||||
'delete': '删除',
|
||||
'insert': '添加'
|
||||
}[change['type']]
|
||||
|
||||
result.append(f"[{i}] {change_type}")
|
||||
|
||||
if change['original']:
|
||||
result.append(f" 原文: {change['original']}")
|
||||
if change['fixed']:
|
||||
result.append(f" 修复: {change['fixed']}")
|
||||
|
||||
# Show context
|
||||
context = change['context_before'] + "【修改处】" + change['context_after']
|
||||
if context.strip():
|
||||
result.append(f" 上下文: ...{context}...")
|
||||
|
||||
result.append("")
|
||||
|
||||
return '\n'.join(result)
|
||||
Reference in New Issue
Block a user