#!/usr/bin/env python3 """Unmix a repomix file to restore original file structure. Supports XML, Markdown, and JSON repomix output formats. """ import re import os import sys import json from pathlib import Path def unmix_xml(content, output_dir): """Extract files from repomix XML format.""" # Pattern: content file_pattern = r'\n(.*?)\n' matches = re.finditer(file_pattern, content, re.DOTALL) extracted_files = [] for match in matches: file_path = match.group(1) file_content = match.group(2) # Create full output path full_path = Path(output_dir) / file_path full_path.parent.mkdir(parents=True, exist_ok=True) # Write the file with open(full_path, 'w', encoding='utf-8') as f: f.write(file_content) extracted_files.append(file_path) print(f"✓ Extracted: {file_path}") return extracted_files def unmix_markdown(content, output_dir): """Extract files from repomix Markdown format.""" # Pattern: ## File: path\n```\ncontent\n``` file_pattern = r'## File: ([^\n]+)\n```[^\n]*\n(.*?)\n```' matches = re.finditer(file_pattern, content, re.DOTALL) extracted_files = [] for match in matches: file_path = match.group(1).strip() file_content = match.group(2) # Create full output path full_path = Path(output_dir) / file_path full_path.parent.mkdir(parents=True, exist_ok=True) # Write the file with open(full_path, 'w', encoding='utf-8') as f: f.write(file_content) extracted_files.append(file_path) print(f"✓ Extracted: {file_path}") return extracted_files def unmix_json(content, output_dir): """Extract files from repomix JSON format.""" try: data = json.loads(content) files = data.get('files', []) extracted_files = [] for file_entry in files: file_path = file_entry.get('path') file_content = file_entry.get('content', '') if not file_path: continue # Create full output path full_path = Path(output_dir) / file_path full_path.parent.mkdir(parents=True, exist_ok=True) # Write the file with open(full_path, 'w', encoding='utf-8') as f: f.write(file_content) extracted_files.append(file_path) print(f"✓ Extracted: {file_path}") return extracted_files except json.JSONDecodeError as e: print(f"Error: Failed to parse JSON: {e}") return [] def detect_format(content): """Detect the repomix file format.""" # Check for XML format if '' in content: return 'xml' # Check for JSON format if content.strip().startswith('{') and '"files"' in content: return 'json' # Check for Markdown format if '## File:' in content: return 'markdown' return None def unmix_repomix(repomix_file, output_dir): """Extract files from a repomix file (auto-detects format).""" # Read the repomix file with open(repomix_file, 'r', encoding='utf-8') as f: content = f.read() # Detect format format_type = detect_format(content) if format_type is None: print("Error: Could not detect repomix format") print("Expected XML (), Markdown (## File:), or JSON format") return [] print(f"Detected format: {format_type.upper()}") # Extract based on format if format_type == 'xml': return unmix_xml(content, output_dir) elif format_type == 'markdown': return unmix_markdown(content, output_dir) elif format_type == 'json': return unmix_json(content, output_dir) return [] def main(): """Main entry point.""" if len(sys.argv) < 2: print("Usage: unmix_repomix.py [output_directory]") print() print("Arguments:") print(" repomix_file Path to the repomix output file (XML, Markdown, or JSON)") print(" output_directory Optional: Directory to extract files to (default: ./extracted)") print() print("Examples:") print(" unmix_repomix.py skills.xml /tmp/extracted-skills") print(" unmix_repomix.py repo-output.md") sys.exit(1) repomix_file = sys.argv[1] output_dir = sys.argv[2] if len(sys.argv) > 2 else "./extracted" # Validate input file exists if not os.path.exists(repomix_file): print(f"Error: File not found: {repomix_file}") sys.exit(1) print(f"Unmixing {repomix_file}...") print(f"Output directory: {output_dir}\n") # Extract files extracted = unmix_repomix(repomix_file, output_dir) if not extracted: print("\n⚠️ No files extracted!") print("Check that the input file is a valid repomix output file.") sys.exit(1) print(f"\n✅ Successfully extracted {len(extracted)} files!") print(f"\nExtracted files are in: {output_dir}") if __name__ == "__main__": main()