claude-code-skills-reference/transcript-fixer/scripts/fix_transcript_enhanced.py

#!/usr/bin/env python3
# /// script
# requires-python = ">=3.10"
# dependencies = [
#     "httpx>=0.24.0",
#     "filelock>=3.13.0",
# ]
# ///
"""
Enhanced transcript fixer wrapper with improved user experience.

Features:
- Custom output directory support
- Automatic HTML diff opening in browser
- Smart API key detection from shell config files
- Progress feedback

CRITICAL FIX: Now uses secure API key handling (Critical-2)
"""

import argparse
import os
import subprocess
import sys
from pathlib import Path

# CRITICAL FIX: Import secure secret handling
sys.path.insert(0, str(Path(__file__).parent))
from utils.security import mask_secret, SecretStr, validate_api_key

# CRITICAL FIX: Import path validation (Critical-5)
from utils.path_validator import PathValidator, PathValidationError, add_allowed_directory

# Initialize path validator
path_validator = PathValidator()


def find_glm_api_key():
    """
    Search for GLM API key in common shell config files.

    Looks for keys near ANTHROPIC_BASE_URL or GLM-related configs,
    not just by exact variable name.

    Returns:
        str or None: API key if found, None otherwise
    """
    shell_configs = [
        Path.home() / ".zshrc",
        Path.home() / ".bashrc",
        Path.home() / ".bash_profile",
        Path.home() / ".profile",
    ]

    for config_file in shell_configs:
        if not config_file.exists():
            continue

        try:
            with open(config_file, 'r', encoding='utf-8') as f:
                lines = f.readlines()

            # Look for ANTHROPIC_BASE_URL with bigmodel
            for i, line in enumerate(lines):
                if 'ANTHROPIC_BASE_URL' in line and 'bigmodel.cn' in line:
                    # Check surrounding lines for API key
                    start = max(0, i - 2)
                    end = min(len(lines), i + 3)

                    for check_line in lines[start:end]:
                        # Look for uncommented export with token/key
                        if check_line.strip().startswith('#'):
                            # Check if it's a commented export with token
                            if 'export' in check_line and ('TOKEN' in check_line or 'KEY' in check_line):
                                parts = check_line.split('=', 1)
                                if len(parts) == 2:
                                    key = parts[1].strip().strip('"').strip("'")
                                    # CRITICAL FIX: Validate and mask API key
                                    if validate_api_key(key):
                                        print(f"✓ Found API key in {config_file}: {mask_secret(key)}")
                                        return key
                        elif 'export' in check_line and ('TOKEN' in check_line or 'KEY' in check_line):
                            parts = check_line.split('=', 1)
                            if len(parts) == 2:
                                key = parts[1].strip().strip('"').strip("'")
                                # CRITICAL FIX: Validate and mask API key
                                if validate_api_key(key):
                                    print(f"✓ Found API key in {config_file}: {mask_secret(key)}")
                                    return key
        except Exception as e:
            print(f"⚠️  Could not read {config_file}: {e}", file=sys.stderr)
            continue

    return None


def open_html_in_browser(html_path):
    """
    Open HTML file in default browser.

    Args:
        html_path: Path to HTML file
    """
    if not Path(html_path).exists():
        print(f"⚠️  HTML file not found: {html_path}")
        return

    try:
        if sys.platform == 'darwin':  # macOS
            subprocess.run(['open', html_path], check=True)
        elif sys.platform == 'win32':  # Windows
            # Use os.startfile for safer Windows file opening
            import os
            os.startfile(html_path)
        else:  # Linux
            subprocess.run(['xdg-open', html_path], check=True)
        print(f"✓ Opened HTML diff in browser: {html_path}")
    except Exception as e:
        print(f"⚠️  Could not open browser: {e}")
        print(f"   Please manually open: {html_path}")


def main():
    parser = argparse.ArgumentParser(
        description="Enhanced transcript fixer with auto-open HTML diff",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Fix transcript and save to custom output directory
  %(prog)s input.md --output ./corrected --auto-open

  # Fix without opening browser
  %(prog)s input.md --output ./corrected --no-auto-open

  # Use specific domain
  %(prog)s input.md --output ./corrected --domain embodied_ai
        """
    )

    parser.add_argument('input', help='Input transcript file (.md or .txt)')
    parser.add_argument('--output', '-o', help='Output directory (default: same as input file)')
    parser.add_argument('--domain', default='general',
                       choices=['general', 'embodied_ai', 'finance', 'medical'],
                       help='Domain for corrections (default: general)')
    parser.add_argument('--stage', type=int, default=3, choices=[1, 2, 3],
                       help='Processing stage: 1=dict, 2=AI, 3=both (default: 3)')
    parser.add_argument('--auto-open', action='store_true', default=True,
                       help='Automatically open HTML diff in browser (default: True)')
    parser.add_argument('--no-auto-open', dest='auto_open', action='store_false',
                       help='Do not open HTML diff automatically')

    args = parser.parse_args()

    # CRITICAL FIX: Validate input file with security checks
    try:
        # Add current directory to allowed paths (for user convenience)
        add_allowed_directory(Path.cwd())

        input_path = path_validator.validate_input_path(args.input)
        print(f"✓ Input file validated: {input_path}")

    except PathValidationError as e:
        print(f"❌ Input file validation failed: {e}")
        sys.exit(1)

    # CRITICAL FIX: Validate output directory
    if args.output:
        try:
            # Add output directory to allowed paths
            output_dir_path = Path(args.output).expanduser().absolute()
            add_allowed_directory(output_dir_path.parent if output_dir_path.parent.exists() else output_dir_path)

            output_dir = output_dir_path
            output_dir.mkdir(parents=True, exist_ok=True)
            print(f"✓ Output directory validated: {output_dir}")

        except PathValidationError as e:
            print(f"❌ Output directory validation failed: {e}")
            sys.exit(1)
    else:
        output_dir = input_path.parent

    # Check/find API key if Stage 2 or 3
    if args.stage in [2, 3]:
        api_key = os.environ.get('GLM_API_KEY')
        if not api_key:
            print("🔍 GLM_API_KEY not set, searching shell configs...")
            api_key = find_glm_api_key()
            if api_key:
                os.environ['GLM_API_KEY'] = api_key
            else:
                print("❌ GLM_API_KEY not found. Please set it or run with --stage 1")
                print("   Get API key from: https://open.bigmodel.cn/")
                sys.exit(1)

    # Get script directory
    script_dir = Path(__file__).parent
    main_script = script_dir / "fix_transcription.py"

    if not main_script.exists():
        print(f"❌ Main script not found: {main_script}")
        sys.exit(1)

    # Build command
    cmd = [
        'uv', 'run', '--with', 'httpx',
        str(main_script),
        '--input', str(input_path),
        '--stage', str(args.stage),
        '--domain', args.domain
    ]

    print(f"📖 Processing: {input_path.name}")
    print(f"📁 Output directory: {output_dir}")
    print(f"🎯 Domain: {args.domain}")
    print(f"⚙️  Stage: {args.stage}")
    print()

    # Run main script
    try:
        result = subprocess.run(cmd, check=True, cwd=script_dir.parent)
    except subprocess.CalledProcessError as e:
        print(f"❌ Processing failed with exit code {e.returncode}")
        sys.exit(e.returncode)

    # Move output files to desired directory if different from input directory
    if output_dir != input_path.parent:
        print(f"\n📦 Moving output files to {output_dir}...")

        base_name = input_path.stem
        output_patterns = [
            f"{base_name}_stage1.md",
            f"{base_name}_stage2.md",
            f"{base_name}_对比.html",
            f"{base_name}_对比报告.md",
            f"{base_name}_修复报告.md",
        ]

        for pattern in output_patterns:
            source = input_path.parent / pattern
            if source.exists():
                dest = output_dir / pattern
                source.rename(dest)
                print(f"  ✓ {pattern}")

    # Auto-open HTML diff
    if args.auto_open:
        html_file = output_dir / f"{input_path.stem}_对比.html"
        if html_file.exists():
            print("\n🌐 Opening HTML diff in browser...")
            open_html_in_browser(html_file)
        else:
            print(f"\n⚠️  HTML diff not generated (may require Stage 2/3)")

    print("\n✅ Processing complete!")
    print(f"\n📄 Output files in: {output_dir}")
    print(f"   - {input_path.stem}_stage1.md (dictionary corrections)")
    print(f"   - {input_path.stem}_stage2.md (AI corrections - final version)")
    print(f"   - {input_path.stem}_对比.html (visual diff)")


if __name__ == '__main__':
    main()