New scripts: - fix_transcript_timestamps.py: Repair malformed timestamps (HH:MM:SS format) - split_transcript_sections.py: Split transcript by keywords and rebase timestamps - Automated tests for both scripts Features: - Timestamp validation and repair (handle missing colons, invalid ranges) - Section splitting with custom names - Rebase timestamps to 00:00:00 for each section - Preserve speaker format and content integrity - In-place editing with backup Documentation updates: - Add usage examples to SKILL.md - Clarify dictionary iteration workflow (save stable patterns only) - Update workflow guides with new script references - Add script parameter documentation Use cases: - Fix ASR output with broken timestamps - Split long meetings into focused sections - Prepare sections for independent processing Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
55 lines
1.5 KiB
Python
55 lines
1.5 KiB
Python
#!/usr/bin/env python3
|
|
"""Tests for transcript timestamp normalization and rebasing."""
|
|
|
|
import sys
|
|
import unittest
|
|
from pathlib import Path
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from fix_transcript_timestamps import repair_timestamps
|
|
|
|
|
|
class TestFixTranscriptTimestamps(unittest.TestCase):
|
|
def test_rollover_fix(self):
|
|
text = (
|
|
"甲 58:50\n"
|
|
"内容 A\n"
|
|
"乙 59:58\n"
|
|
"内容 B\n"
|
|
"丙 00:05\n"
|
|
"内容 C\n"
|
|
)
|
|
result = repair_timestamps(
|
|
text,
|
|
output_format="hhmmss",
|
|
rollover_backjump_seconds=15 * 60,
|
|
jitter_seconds=5,
|
|
rebase_to_zero=False,
|
|
)
|
|
self.assertIn("甲 00:58:50", result.repaired_text)
|
|
self.assertIn("乙 00:59:58", result.repaired_text)
|
|
self.assertIn("丙 01:00:05", result.repaired_text)
|
|
self.assertEqual(len(result.anomalies), 0)
|
|
|
|
def test_rebase_to_zero(self):
|
|
text = (
|
|
"甲 01:31:10\n"
|
|
"内容 A\n"
|
|
"乙 01:31:12\n"
|
|
"内容 B\n"
|
|
)
|
|
result = repair_timestamps(
|
|
text,
|
|
output_format="hhmmss",
|
|
rollover_backjump_seconds=15 * 60,
|
|
jitter_seconds=5,
|
|
rebase_to_zero=True,
|
|
)
|
|
self.assertIn("甲 00:00:00", result.repaired_text)
|
|
self.assertIn("乙 00:00:02", result.repaired_text)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|