Files
claude-code-skills-reference/transcript-fixer/scripts/fix_transcript_enhanced.py
daymade 9b724f33e3 Release v1.9.0: Add video-comparer skill and enhance transcript-fixer
## New Skill: video-comparer v1.0.0
- Compare original and compressed videos with interactive HTML reports
- Calculate quality metrics (PSNR, SSIM) for compression analysis
- Generate frame-by-frame visual comparisons (slider, side-by-side, grid)
- Extract video metadata (codec, resolution, bitrate, duration)
- Multi-platform FFmpeg support with security features

## transcript-fixer Enhancements
- Add async AI processor for parallel processing
- Add connection pool management for database operations
- Add concurrency manager and rate limiter
- Add audit log retention and database migrations
- Add health check and metrics monitoring
- Add comprehensive test suite (8 new test files)
- Enhance security with domain and path validators

## Marketplace Updates
- Update marketplace version from 1.8.0 to 1.9.0
- Update skills count from 15 to 16
- Update documentation (README.md, CLAUDE.md, CHANGELOG.md)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-30 00:23:12 +08:00

257 lines
9.2 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Enhanced transcript fixer wrapper with improved user experience.
Features:
- Custom output directory support
- Automatic HTML diff opening in browser
- Smart API key detection from shell config files
- Progress feedback
CRITICAL FIX: Now uses secure API key handling (Critical-2)
"""
import argparse
import os
import subprocess
import sys
from pathlib import Path
# CRITICAL FIX: Import secure secret handling
sys.path.insert(0, str(Path(__file__).parent))
from utils.security import mask_secret, SecretStr, validate_api_key
# CRITICAL FIX: Import path validation (Critical-5)
from utils.path_validator import PathValidator, PathValidationError, add_allowed_directory
# Initialize path validator
path_validator = PathValidator()
def find_glm_api_key():
"""
Search for GLM API key in common shell config files.
Looks for keys near ANTHROPIC_BASE_URL or GLM-related configs,
not just by exact variable name.
Returns:
str or None: API key if found, None otherwise
"""
shell_configs = [
Path.home() / ".zshrc",
Path.home() / ".bashrc",
Path.home() / ".bash_profile",
Path.home() / ".profile",
]
for config_file in shell_configs:
if not config_file.exists():
continue
try:
with open(config_file, 'r', encoding='utf-8') as f:
lines = f.readlines()
# Look for ANTHROPIC_BASE_URL with bigmodel
for i, line in enumerate(lines):
if 'ANTHROPIC_BASE_URL' in line and 'bigmodel.cn' in line:
# Check surrounding lines for API key
start = max(0, i - 2)
end = min(len(lines), i + 3)
for check_line in lines[start:end]:
# Look for uncommented export with token/key
if check_line.strip().startswith('#'):
# Check if it's a commented export with token
if 'export' in check_line and ('TOKEN' in check_line or 'KEY' in check_line):
parts = check_line.split('=', 1)
if len(parts) == 2:
key = parts[1].strip().strip('"').strip("'")
# CRITICAL FIX: Validate and mask API key
if validate_api_key(key):
print(f"✓ Found API key in {config_file}: {mask_secret(key)}")
return key
elif 'export' in check_line and ('TOKEN' in check_line or 'KEY' in check_line):
parts = check_line.split('=', 1)
if len(parts) == 2:
key = parts[1].strip().strip('"').strip("'")
# CRITICAL FIX: Validate and mask API key
if validate_api_key(key):
print(f"✓ Found API key in {config_file}: {mask_secret(key)}")
return key
except Exception as e:
print(f"⚠️ Could not read {config_file}: {e}", file=sys.stderr)
continue
return None
def open_html_in_browser(html_path):
"""
Open HTML file in default browser.
Args:
html_path: Path to HTML file
"""
if not Path(html_path).exists():
print(f"⚠️ HTML file not found: {html_path}")
return
try:
if sys.platform == 'darwin': # macOS
subprocess.run(['open', html_path], check=True)
elif sys.platform == 'win32': # Windows
# Use os.startfile for safer Windows file opening
import os
os.startfile(html_path)
else: # Linux
subprocess.run(['xdg-open', html_path], check=True)
print(f"✓ Opened HTML diff in browser: {html_path}")
except Exception as e:
print(f"⚠️ Could not open browser: {e}")
print(f" Please manually open: {html_path}")
def main():
parser = argparse.ArgumentParser(
description="Enhanced transcript fixer with auto-open HTML diff",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Fix transcript and save to custom output directory
%(prog)s input.md --output ./corrected --auto-open
# Fix without opening browser
%(prog)s input.md --output ./corrected --no-auto-open
# Use specific domain
%(prog)s input.md --output ./corrected --domain embodied_ai
"""
)
parser.add_argument('input', help='Input transcript file (.md or .txt)')
parser.add_argument('--output', '-o', help='Output directory (default: same as input file)')
parser.add_argument('--domain', default='general',
choices=['general', 'embodied_ai', 'finance', 'medical'],
help='Domain for corrections (default: general)')
parser.add_argument('--stage', type=int, default=3, choices=[1, 2, 3],
help='Processing stage: 1=dict, 2=AI, 3=both (default: 3)')
parser.add_argument('--auto-open', action='store_true', default=True,
help='Automatically open HTML diff in browser (default: True)')
parser.add_argument('--no-auto-open', dest='auto_open', action='store_false',
help='Do not open HTML diff automatically')
args = parser.parse_args()
# CRITICAL FIX: Validate input file with security checks
try:
# Add current directory to allowed paths (for user convenience)
add_allowed_directory(Path.cwd())
input_path = path_validator.validate_input_path(args.input)
print(f"✓ Input file validated: {input_path}")
except PathValidationError as e:
print(f"❌ Input file validation failed: {e}")
sys.exit(1)
# CRITICAL FIX: Validate output directory
if args.output:
try:
# Add output directory to allowed paths
output_dir_path = Path(args.output).expanduser().absolute()
add_allowed_directory(output_dir_path.parent if output_dir_path.parent.exists() else output_dir_path)
output_dir = output_dir_path
output_dir.mkdir(parents=True, exist_ok=True)
print(f"✓ Output directory validated: {output_dir}")
except PathValidationError as e:
print(f"❌ Output directory validation failed: {e}")
sys.exit(1)
else:
output_dir = input_path.parent
# Check/find API key if Stage 2 or 3
if args.stage in [2, 3]:
api_key = os.environ.get('GLM_API_KEY')
if not api_key:
print("🔍 GLM_API_KEY not set, searching shell configs...")
api_key = find_glm_api_key()
if api_key:
os.environ['GLM_API_KEY'] = api_key
else:
print("❌ GLM_API_KEY not found. Please set it or run with --stage 1")
print(" Get API key from: https://open.bigmodel.cn/")
sys.exit(1)
# Get script directory
script_dir = Path(__file__).parent
main_script = script_dir / "fix_transcription.py"
if not main_script.exists():
print(f"❌ Main script not found: {main_script}")
sys.exit(1)
# Build command
cmd = [
'uv', 'run', '--with', 'httpx',
str(main_script),
'--input', str(input_path),
'--stage', str(args.stage),
'--domain', args.domain
]
print(f"📖 Processing: {input_path.name}")
print(f"📁 Output directory: {output_dir}")
print(f"🎯 Domain: {args.domain}")
print(f"⚙️ Stage: {args.stage}")
print()
# Run main script
try:
result = subprocess.run(cmd, check=True, cwd=script_dir.parent)
except subprocess.CalledProcessError as e:
print(f"❌ Processing failed with exit code {e.returncode}")
sys.exit(e.returncode)
# Move output files to desired directory if different from input directory
if output_dir != input_path.parent:
print(f"\n📦 Moving output files to {output_dir}...")
base_name = input_path.stem
output_patterns = [
f"{base_name}_stage1.md",
f"{base_name}_stage2.md",
f"{base_name}_对比.html",
f"{base_name}_对比报告.md",
f"{base_name}_修复报告.md",
]
for pattern in output_patterns:
source = input_path.parent / pattern
if source.exists():
dest = output_dir / pattern
source.rename(dest)
print(f"{pattern}")
# Auto-open HTML diff
if args.auto_open:
html_file = output_dir / f"{input_path.stem}_对比.html"
if html_file.exists():
print("\n🌐 Opening HTML diff in browser...")
open_html_in_browser(html_file)
else:
print(f"\n⚠️ HTML diff not generated (may require Stage 2/3)")
print("\n✅ Processing complete!")
print(f"\n📄 Output files in: {output_dir}")
print(f" - {input_path.stem}_stage1.md (dictionary corrections)")
print(f" - {input_path.stem}_stage2.md (AI corrections - final version)")
print(f" - {input_path.stem}_对比.html (visual diff)")
if __name__ == '__main__':
main()