chore: bump transcript-fixer skill version

2026-04-06 08:50:10 +08:00
parent efda299a9e
commit 681994316b
8 changed files with 247 additions and 1 deletions
--- a/transcript-fixer/scripts/fix_transcript_enhanced.py
+++ b/transcript-fixer/scripts/fix_transcript_enhanced.py
@@ -1,4 +1,11 @@
 #!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "httpx>=0.24.0",
+#     "filelock>=3.13.0",
+# ]
+# ///
 """
 Enhanced transcript fixer wrapper with improved user experience.

--- a/transcript-fixer/scripts/fix_transcript_timestamps.py
+++ b/transcript-fixer/scripts/fix_transcript_timestamps.py
@@ -1,4 +1,8 @@
 #!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.10"
+# dependencies = []
+# ///
 """Normalize and repair speaker timestamp lines in ASR transcripts.

 This script targets transcript lines shaped like:
--- a/transcript-fixer/scripts/fix_transcription.py
+++ b/transcript-fixer/scripts/fix_transcription.py
@@ -1,4 +1,11 @@
 #!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "httpx>=0.24.0",
+#     "filelock>=3.13.0",
+# ]
+# ///
 """
 Transcript Fixer - Main Entry Point

--- a/transcript-fixer/scripts/generate_word_diff.py
+++ b/transcript-fixer/scripts/generate_word_diff.py
@@ -1,4 +1,8 @@
 #!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.10"
+# dependencies = []
+# ///
 """
 Generate Word-Level Diff HTML Comparison

--- a/transcript-fixer/scripts/split_transcript_sections.py
+++ b/transcript-fixer/scripts/split_transcript_sections.py
@@ -1,4 +1,8 @@
 #!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.10"
+# dependencies = []
+# ///
 """Split a transcript into named sections and optionally rebase timestamps.

 Example:
--- a/transcript-fixer/scripts/utils/common_words.py
+++ b/transcript-fixer/scripts/utils/common_words.py
@@ -64,6 +64,10 @@ COMMON_WORDS_2CHAR: Set[str] = {
    "明确", "清晰", "具体", "详细", "准确", "完整", "稳定", "灵活",
    # --- Domain terms that look like ASR errors but are valid ---
    "线数", "曲线", "分母", "正面", "旗号", "无果", "演技",
+    # --- Common verb+一 patterns (打一个/来一个/做一下 etc.) ---
+    # "打一" caused production false positive: "打一个锚" → "答疑个锚" (2026-04)
+    "打一", "来一", "做一", "写一", "给一", "拉一", "开一", "看一",
+    "跑一", "找一", "选一", "试一", "走一", "问一", "搞一", "聊一",
 }

 # Common 3+ character words that should also be protected.
@@ -88,6 +92,14 @@ COMMON_WORDS_3PLUS: Set[str] = {
    "保健品", "保健操", "医疗保健",
    "文化内涵",
    "无果而终",
+    # --- Common verb+一+量词 patterns (防止"打一"→X 类误纠) ---
+    "打一个", "打一针", "打一下", "打一次", "打一把",
+    "来一个", "来一下", "来一次", "来一杯",
+    "做一个", "做一下", "做一次",
+    "写一个", "写一下", "写一篇",
+    "给一个", "看一下", "看一看", "看一遍",
+    "跑一下", "跑一遍", "跑一次",
+    "试一下", "试一试", "试一次",
    # --- Common Chinese idioms/phrases containing short words ---
    # These are needed to prevent idiom corruption
    "正面临", "正面对",
@@ -132,6 +144,8 @@ SUBSTRING_COLLISION_MAP: dict[str, list[str]] = {
    "保健": ["保健品", "保健操", "医疗保健"],
    # "内涵" common in compound words
    "内涵": ["内涵段子", "文化内涵"],
+    # "打一" common in verb+一+量词 (2026-04 production false positive)
+    "打一": ["打一个", "打一针", "打一下", "打一次", "打一把"],
 }

 ALL_COMMON_WORDS: Set[str] = COMMON_WORDS_2CHAR | COMMON_WORDS_3PLUS