diff --git a/cli/conflict_detector.py b/cli/conflict_detector.py new file mode 100644 index 0000000..ab1d97f --- /dev/null +++ b/cli/conflict_detector.py @@ -0,0 +1,495 @@ +#!/usr/bin/env python3 +""" +Conflict Detector for Multi-Source Skills + +Detects conflicts between documentation and code: +- missing_in_docs: API exists in code but not documented +- missing_in_code: API documented but doesn't exist in code +- signature_mismatch: Different parameters/types between docs and code +- description_mismatch: Docs say one thing, code comments say another + +Used by unified scraper to identify discrepancies before merging. +""" + +import json +import logging +from typing import Dict, List, Any, Optional, Tuple +from dataclasses import dataclass, asdict +from difflib import SequenceMatcher + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +@dataclass +class Conflict: + """Represents a conflict between documentation and code.""" + type: str # 'missing_in_docs', 'missing_in_code', 'signature_mismatch', 'description_mismatch' + severity: str # 'low', 'medium', 'high' + api_name: str + docs_info: Optional[Dict[str, Any]] = None + code_info: Optional[Dict[str, Any]] = None + difference: Optional[str] = None + suggestion: Optional[str] = None + + +class ConflictDetector: + """ + Detects conflicts between documentation and code sources. + """ + + def __init__(self, docs_data: Dict[str, Any], github_data: Dict[str, Any]): + """ + Initialize conflict detector. + + Args: + docs_data: Data from documentation scraper + github_data: Data from GitHub scraper with code analysis + """ + self.docs_data = docs_data + self.github_data = github_data + + # Extract API information from both sources + self.docs_apis = self._extract_docs_apis() + self.code_apis = self._extract_code_apis() + + logger.info(f"Loaded {len(self.docs_apis)} APIs from documentation") + logger.info(f"Loaded {len(self.code_apis)} APIs from code") + + def _extract_docs_apis(self) -> Dict[str, Dict[str, Any]]: + """ + Extract API information from documentation data. + + Returns: + Dict mapping API name to API info + """ + apis = {} + + # Documentation structure varies, but typically has 'pages' or 'references' + pages = self.docs_data.get('pages', {}) + + # Look for API reference pages + for url, page_data in pages.items(): + content = page_data.get('content', '') + title = page_data.get('title', '') + + # Simple heuristic: if title or URL contains "api", "reference", "class", "function" + # it might be an API page + if any(keyword in title.lower() or keyword in url.lower() + for keyword in ['api', 'reference', 'class', 'function', 'method']): + + # Extract API signatures from content (simplified) + extracted_apis = self._parse_doc_content_for_apis(content, url) + apis.update(extracted_apis) + + return apis + + def _parse_doc_content_for_apis(self, content: str, source_url: str) -> Dict[str, Dict]: + """ + Parse documentation content to extract API signatures. + + This is a simplified approach - real implementation would need + to understand the documentation format (Sphinx, JSDoc, etc.) + """ + apis = {} + + # Look for function/method signatures in code blocks + # Common patterns: + # - function_name(param1, param2) + # - ClassName.method_name(param1, param2) + # - def function_name(param1: type, param2: type) -> return_type + + import re + + # Pattern for common API signatures + patterns = [ + # Python style: def name(params) -> return + r'def\s+(\w+)\s*\(([^)]*)\)(?:\s*->\s*(\w+))?', + # JavaScript style: function name(params) + r'function\s+(\w+)\s*\(([^)]*)\)', + # C++ style: return_type name(params) + r'(\w+)\s+(\w+)\s*\(([^)]*)\)', + # Method style: ClassName.method_name(params) + r'(\w+)\.(\w+)\s*\(([^)]*)\)' + ] + + for pattern in patterns: + for match in re.finditer(pattern, content): + groups = match.groups() + + # Parse based on pattern matched + if 'def' in pattern: + # Python function + name = groups[0] + params_str = groups[1] + return_type = groups[2] if len(groups) > 2 else None + elif 'function' in pattern: + # JavaScript function + name = groups[0] + params_str = groups[1] + return_type = None + elif '.' in pattern: + # Class method + class_name = groups[0] + method_name = groups[1] + name = f"{class_name}.{method_name}" + params_str = groups[2] if len(groups) > 2 else groups[1] + return_type = None + else: + # C++ function + return_type = groups[0] + name = groups[1] + params_str = groups[2] + + # Parse parameters + params = self._parse_param_string(params_str) + + apis[name] = { + 'name': name, + 'parameters': params, + 'return_type': return_type, + 'source': source_url, + 'raw_signature': match.group(0) + } + + return apis + + def _parse_param_string(self, params_str: str) -> List[Dict]: + """Parse parameter string into list of parameter dicts.""" + if not params_str.strip(): + return [] + + params = [] + for param in params_str.split(','): + param = param.strip() + if not param: + continue + + # Try to extract name and type + param_info = {'name': param, 'type': None, 'default': None} + + # Check for type annotation (: type) + if ':' in param: + parts = param.split(':', 1) + param_info['name'] = parts[0].strip() + type_part = parts[1].strip() + + # Check for default value (= value) + if '=' in type_part: + type_str, default_str = type_part.split('=', 1) + param_info['type'] = type_str.strip() + param_info['default'] = default_str.strip() + else: + param_info['type'] = type_part + + # Check for default without type (= value) + elif '=' in param: + parts = param.split('=', 1) + param_info['name'] = parts[0].strip() + param_info['default'] = parts[1].strip() + + params.append(param_info) + + return params + + def _extract_code_apis(self) -> Dict[str, Dict[str, Any]]: + """ + Extract API information from GitHub code analysis. + + Returns: + Dict mapping API name to API info + """ + apis = {} + + code_analysis = self.github_data.get('code_analysis', {}) + if not code_analysis: + return apis + + files = code_analysis.get('files', []) + + for file_info in files: + file_path = file_info['file'] + + # Extract classes and their methods + for class_info in file_info.get('classes', []): + class_name = class_info['name'] + + # Add class itself + apis[class_name] = { + 'name': class_name, + 'type': 'class', + 'source': file_path, + 'line': class_info.get('line_number'), + 'base_classes': class_info.get('base_classes', []), + 'docstring': class_info.get('docstring') + } + + # Add methods + for method in class_info.get('methods', []): + method_name = f"{class_name}.{method['name']}" + apis[method_name] = { + 'name': method_name, + 'type': 'method', + 'parameters': method.get('parameters', []), + 'return_type': method.get('return_type'), + 'source': file_path, + 'line': method.get('line_number'), + 'docstring': method.get('docstring'), + 'is_async': method.get('is_async', False) + } + + # Extract standalone functions + for func_info in file_info.get('functions', []): + func_name = func_info['name'] + apis[func_name] = { + 'name': func_name, + 'type': 'function', + 'parameters': func_info.get('parameters', []), + 'return_type': func_info.get('return_type'), + 'source': file_path, + 'line': func_info.get('line_number'), + 'docstring': func_info.get('docstring'), + 'is_async': func_info.get('is_async', False) + } + + return apis + + def detect_all_conflicts(self) -> List[Conflict]: + """ + Detect all types of conflicts. + + Returns: + List of Conflict objects + """ + logger.info("Detecting conflicts between documentation and code...") + + conflicts = [] + + # 1. Find APIs missing in documentation + conflicts.extend(self._find_missing_in_docs()) + + # 2. Find APIs missing in code + conflicts.extend(self._find_missing_in_code()) + + # 3. Find signature mismatches + conflicts.extend(self._find_signature_mismatches()) + + logger.info(f"Found {len(conflicts)} conflicts total") + + return conflicts + + def _find_missing_in_docs(self) -> List[Conflict]: + """Find APIs that exist in code but not in documentation.""" + conflicts = [] + + for api_name, code_info in self.code_apis.items(): + # Simple name matching (can be enhanced with fuzzy matching) + if api_name not in self.docs_apis: + # Check if it's a private/internal API (often not documented) + is_private = api_name.startswith('_') or '__' in api_name + severity = 'low' if is_private else 'medium' + + conflicts.append(Conflict( + type='missing_in_docs', + severity=severity, + api_name=api_name, + code_info=code_info, + difference=f"API exists in code ({code_info['source']}) but not found in documentation", + suggestion="Add documentation for this API" if not is_private else "Consider if this internal API should be documented" + )) + + logger.info(f"Found {len(conflicts)} APIs missing in documentation") + return conflicts + + def _find_missing_in_code(self) -> List[Conflict]: + """Find APIs that are documented but don't exist in code.""" + conflicts = [] + + for api_name, docs_info in self.docs_apis.items(): + if api_name not in self.code_apis: + conflicts.append(Conflict( + type='missing_in_code', + severity='high', # This is serious - documented but doesn't exist + api_name=api_name, + docs_info=docs_info, + difference=f"API documented ({docs_info.get('source', 'unknown')}) but not found in code", + suggestion="Update documentation to remove this API, or add it to codebase" + )) + + logger.info(f"Found {len(conflicts)} APIs missing in code") + return conflicts + + def _find_signature_mismatches(self) -> List[Conflict]: + """Find APIs where signature differs between docs and code.""" + conflicts = [] + + # Find APIs that exist in both + common_apis = set(self.docs_apis.keys()) & set(self.code_apis.keys()) + + for api_name in common_apis: + docs_info = self.docs_apis[api_name] + code_info = self.code_apis[api_name] + + # Compare signatures + mismatch = self._compare_signatures(docs_info, code_info) + + if mismatch: + conflicts.append(Conflict( + type='signature_mismatch', + severity=mismatch['severity'], + api_name=api_name, + docs_info=docs_info, + code_info=code_info, + difference=mismatch['difference'], + suggestion=mismatch['suggestion'] + )) + + logger.info(f"Found {len(conflicts)} signature mismatches") + return conflicts + + def _compare_signatures(self, docs_info: Dict, code_info: Dict) -> Optional[Dict]: + """ + Compare signatures between docs and code. + + Returns: + Dict with mismatch details if conflict found, None otherwise + """ + docs_params = docs_info.get('parameters', []) + code_params = code_info.get('parameters', []) + + # Compare parameter counts + if len(docs_params) != len(code_params): + return { + 'severity': 'medium', + 'difference': f"Parameter count mismatch: docs has {len(docs_params)}, code has {len(code_params)}", + 'suggestion': f"Documentation shows {len(docs_params)} parameters, but code has {len(code_params)}" + } + + # Compare parameter names and types + for i, (doc_param, code_param) in enumerate(zip(docs_params, code_params)): + doc_name = doc_param.get('name', '') + code_name = code_param.get('name', '') + + # Parameter name mismatch + if doc_name != code_name: + # Use fuzzy matching for slight variations + similarity = SequenceMatcher(None, doc_name, code_name).ratio() + if similarity < 0.8: # Not similar enough + return { + 'severity': 'medium', + 'difference': f"Parameter {i+1} name mismatch: '{doc_name}' in docs vs '{code_name}' in code", + 'suggestion': f"Update documentation to use parameter name '{code_name}'" + } + + # Type mismatch + doc_type = doc_param.get('type') + code_type = code_param.get('type_hint') + + if doc_type and code_type and doc_type != code_type: + return { + 'severity': 'low', + 'difference': f"Parameter '{doc_name}' type mismatch: '{doc_type}' in docs vs '{code_type}' in code", + 'suggestion': f"Verify correct type for parameter '{doc_name}'" + } + + # Compare return types if both have them + docs_return = docs_info.get('return_type') + code_return = code_info.get('return_type') + + if docs_return and code_return and docs_return != code_return: + return { + 'severity': 'low', + 'difference': f"Return type mismatch: '{docs_return}' in docs vs '{code_return}' in code", + 'suggestion': "Verify correct return type" + } + + return None + + def generate_summary(self, conflicts: List[Conflict]) -> Dict[str, Any]: + """ + Generate summary statistics for conflicts. + + Args: + conflicts: List of Conflict objects + + Returns: + Summary dict with statistics + """ + summary = { + 'total': len(conflicts), + 'by_type': {}, + 'by_severity': {}, + 'apis_affected': len(set(c.api_name for c in conflicts)) + } + + # Count by type + for conflict_type in ['missing_in_docs', 'missing_in_code', 'signature_mismatch', 'description_mismatch']: + count = sum(1 for c in conflicts if c.type == conflict_type) + summary['by_type'][conflict_type] = count + + # Count by severity + for severity in ['low', 'medium', 'high']: + count = sum(1 for c in conflicts if c.severity == severity) + summary['by_severity'][severity] = count + + return summary + + def save_conflicts(self, conflicts: List[Conflict], output_path: str): + """ + Save conflicts to JSON file. + + Args: + conflicts: List of Conflict objects + output_path: Path to output JSON file + """ + data = { + 'conflicts': [asdict(c) for c in conflicts], + 'summary': self.generate_summary(conflicts) + } + + with open(output_path, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=2, ensure_ascii=False) + + logger.info(f"Conflicts saved to: {output_path}") + + +if __name__ == '__main__': + import sys + + if len(sys.argv) < 3: + print("Usage: python conflict_detector.py ") + sys.exit(1) + + docs_file = sys.argv[1] + github_file = sys.argv[2] + + # Load data + with open(docs_file, 'r') as f: + docs_data = json.load(f) + + with open(github_file, 'r') as f: + github_data = json.load(f) + + # Detect conflicts + detector = ConflictDetector(docs_data, github_data) + conflicts = detector.detect_all_conflicts() + + # Print summary + summary = detector.generate_summary(conflicts) + print("\nšŸ“Š Conflict Summary:") + print(f" Total conflicts: {summary['total']}") + print(f" APIs affected: {summary['apis_affected']}") + print("\n By Type:") + for conflict_type, count in summary['by_type'].items(): + if count > 0: + print(f" {conflict_type}: {count}") + print("\n By Severity:") + for severity, count in summary['by_severity'].items(): + if count > 0: + emoji = 'šŸ”“' if severity == 'high' else '🟔' if severity == 'medium' else '🟢' + print(f" {emoji} {severity}: {count}") + + # Save to file + output_file = 'conflicts.json' + detector.save_conflicts(conflicts, output_file) + print(f"\nāœ… Full report saved to: {output_file}") diff --git a/cli/merge_sources.py b/cli/merge_sources.py new file mode 100644 index 0000000..d6f7830 --- /dev/null +++ b/cli/merge_sources.py @@ -0,0 +1,513 @@ +#!/usr/bin/env python3 +""" +Source Merger for Multi-Source Skills + +Merges documentation and code data intelligently: +- Rule-based merge: Fast, deterministic rules +- Claude-enhanced merge: AI-powered reconciliation + +Handles conflicts and creates unified API reference. +""" + +import json +import logging +import subprocess +import tempfile +import os +from pathlib import Path +from typing import Dict, List, Any, Optional +from conflict_detector import Conflict, ConflictDetector + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class RuleBasedMerger: + """ + Rule-based API merger using deterministic rules. + + Rules: + 1. If API only in docs → Include with [DOCS_ONLY] tag + 2. If API only in code → Include with [UNDOCUMENTED] tag + 3. If both match perfectly → Include normally + 4. If conflict → Include both versions with [CONFLICT] tag, prefer code signature + """ + + def __init__(self, docs_data: Dict, github_data: Dict, conflicts: List[Conflict]): + """ + Initialize rule-based merger. + + Args: + docs_data: Documentation scraper data + github_data: GitHub scraper data + conflicts: List of detected conflicts + """ + self.docs_data = docs_data + self.github_data = github_data + self.conflicts = conflicts + + # Build conflict index for fast lookup + self.conflict_index = {c.api_name: c for c in conflicts} + + # Extract APIs from both sources + detector = ConflictDetector(docs_data, github_data) + self.docs_apis = detector.docs_apis + self.code_apis = detector.code_apis + + def merge_all(self) -> Dict[str, Any]: + """ + Merge all APIs using rule-based logic. + + Returns: + Dict containing merged API data + """ + logger.info("Starting rule-based merge...") + + merged_apis = {} + + # Get all unique API names + all_api_names = set(self.docs_apis.keys()) | set(self.code_apis.keys()) + + for api_name in sorted(all_api_names): + merged_api = self._merge_single_api(api_name) + merged_apis[api_name] = merged_api + + logger.info(f"Merged {len(merged_apis)} APIs") + + return { + 'merge_mode': 'rule-based', + 'apis': merged_apis, + 'summary': { + 'total_apis': len(merged_apis), + 'docs_only': sum(1 for api in merged_apis.values() if api['status'] == 'docs_only'), + 'code_only': sum(1 for api in merged_apis.values() if api['status'] == 'code_only'), + 'matched': sum(1 for api in merged_apis.values() if api['status'] == 'matched'), + 'conflict': sum(1 for api in merged_apis.values() if api['status'] == 'conflict') + } + } + + def _merge_single_api(self, api_name: str) -> Dict[str, Any]: + """ + Merge a single API using rules. + + Args: + api_name: Name of the API to merge + + Returns: + Merged API dict + """ + in_docs = api_name in self.docs_apis + in_code = api_name in self.code_apis + has_conflict = api_name in self.conflict_index + + # Rule 1: Only in docs + if in_docs and not in_code: + conflict = self.conflict_index.get(api_name) + return { + 'name': api_name, + 'status': 'docs_only', + 'source': 'documentation', + 'data': self.docs_apis[api_name], + 'warning': 'This API is documented but not found in codebase', + 'conflict': conflict.__dict__ if conflict else None + } + + # Rule 2: Only in code + if in_code and not in_docs: + is_private = api_name.startswith('_') + conflict = self.conflict_index.get(api_name) + return { + 'name': api_name, + 'status': 'code_only', + 'source': 'code', + 'data': self.code_apis[api_name], + 'warning': 'This API exists in code but is not documented' if not is_private else 'Internal/private API', + 'conflict': conflict.__dict__ if conflict else None + } + + # Both exist - check for conflicts + docs_info = self.docs_apis[api_name] + code_info = self.code_apis[api_name] + + # Rule 3: Both match perfectly (no conflict) + if not has_conflict: + return { + 'name': api_name, + 'status': 'matched', + 'source': 'both', + 'docs_data': docs_info, + 'code_data': code_info, + 'merged_signature': self._create_merged_signature(code_info, docs_info), + 'merged_description': docs_info.get('docstring') or code_info.get('docstring') + } + + # Rule 4: Conflict exists - prefer code signature, keep docs description + conflict = self.conflict_index[api_name] + + return { + 'name': api_name, + 'status': 'conflict', + 'source': 'both', + 'docs_data': docs_info, + 'code_data': code_info, + 'conflict': conflict.__dict__, + 'resolution': 'prefer_code_signature', + 'merged_signature': self._create_merged_signature(code_info, docs_info), + 'merged_description': docs_info.get('docstring') or code_info.get('docstring'), + 'warning': conflict.difference + } + + def _create_merged_signature(self, code_info: Dict, docs_info: Dict) -> str: + """ + Create merged signature preferring code data. + + Args: + code_info: API info from code + docs_info: API info from docs + + Returns: + Merged signature string + """ + name = code_info.get('name', docs_info.get('name')) + params = code_info.get('parameters', docs_info.get('parameters', [])) + return_type = code_info.get('return_type', docs_info.get('return_type')) + + # Build parameter string + param_strs = [] + for param in params: + param_str = param['name'] + if param.get('type_hint'): + param_str += f": {param['type_hint']}" + if param.get('default'): + param_str += f" = {param['default']}" + param_strs.append(param_str) + + signature = f"{name}({', '.join(param_strs)})" + + if return_type: + signature += f" -> {return_type}" + + return signature + + +class ClaudeEnhancedMerger: + """ + Claude-enhanced API merger using local Claude Code. + + Opens Claude Code in a new terminal to intelligently reconcile conflicts. + Uses the same approach as enhance_skill_local.py. + """ + + def __init__(self, docs_data: Dict, github_data: Dict, conflicts: List[Conflict]): + """ + Initialize Claude-enhanced merger. + + Args: + docs_data: Documentation scraper data + github_data: GitHub scraper data + conflicts: List of detected conflicts + """ + self.docs_data = docs_data + self.github_data = github_data + self.conflicts = conflicts + + # First do rule-based merge as baseline + self.rule_merger = RuleBasedMerger(docs_data, github_data, conflicts) + + def merge_all(self) -> Dict[str, Any]: + """ + Merge all APIs using Claude enhancement. + + Returns: + Dict containing merged API data + """ + logger.info("Starting Claude-enhanced merge...") + + # Create temporary workspace + workspace_dir = self._create_workspace() + + # Launch Claude Code for enhancement + logger.info("Launching Claude Code for intelligent merging...") + logger.info("Claude will analyze conflicts and create reconciled API reference") + + try: + self._launch_claude_merge(workspace_dir) + + # Read enhanced results + merged_data = self._read_merged_results(workspace_dir) + + logger.info("Claude-enhanced merge complete") + return merged_data + + except Exception as e: + logger.error(f"Claude enhancement failed: {e}") + logger.info("Falling back to rule-based merge") + return self.rule_merger.merge_all() + + def _create_workspace(self) -> str: + """ + Create temporary workspace with merge context. + + Returns: + Path to workspace directory + """ + workspace = tempfile.mkdtemp(prefix='skill_merge_') + logger.info(f"Created merge workspace: {workspace}") + + # Write context files for Claude + self._write_context_files(workspace) + + return workspace + + def _write_context_files(self, workspace: str): + """Write context files for Claude to analyze.""" + + # 1. Write conflicts summary + conflicts_file = os.path.join(workspace, 'conflicts.json') + with open(conflicts_file, 'w') as f: + json.dump({ + 'conflicts': [c.__dict__ for c in self.conflicts], + 'summary': { + 'total': len(self.conflicts), + 'by_type': self._count_by_field('type'), + 'by_severity': self._count_by_field('severity') + } + }, f, indent=2) + + # 2. Write documentation APIs + docs_apis_file = os.path.join(workspace, 'docs_apis.json') + detector = ConflictDetector(self.docs_data, self.github_data) + with open(docs_apis_file, 'w') as f: + json.dump(detector.docs_apis, f, indent=2) + + # 3. Write code APIs + code_apis_file = os.path.join(workspace, 'code_apis.json') + with open(code_apis_file, 'w') as f: + json.dump(detector.code_apis, f, indent=2) + + # 4. Write merge instructions for Claude + instructions = """# API Merge Task + +You are merging API documentation from two sources: +1. Official documentation (user-facing) +2. Source code analysis (implementation reality) + +## Context Files: +- `conflicts.json` - All detected conflicts between sources +- `docs_apis.json` - APIs from documentation +- `code_apis.json` - APIs from source code + +## Your Task: +For each conflict, reconcile the differences intelligently: + +1. **Prefer code signatures as source of truth** + - Use actual parameter names, types, defaults from code + - Code is what actually runs, docs might be outdated + +2. **Keep documentation descriptions** + - Docs are user-friendly, code comments might be technical + - Keep the docs' explanation of what the API does + +3. **Add implementation notes for discrepancies** + - If docs differ from code, explain the difference + - Example: "āš ļø The `snap` parameter exists in code but is not documented" + +4. **Flag missing APIs clearly** + - Missing in docs → Add [UNDOCUMENTED] tag + - Missing in code → Add [REMOVED] or [DOCS_ERROR] tag + +5. **Create unified API reference** + - One definitive signature per API + - Clear warnings about conflicts + - Implementation notes where helpful + +## Output Format: +Create `merged_apis.json` with this structure: + +```json +{ + "apis": { + "API.name": { + "signature": "final_signature_here", + "parameters": [...], + "return_type": "type", + "description": "user-friendly description", + "implementation_notes": "Any discrepancies or warnings", + "source": "both|docs_only|code_only", + "confidence": "high|medium|low" + } + } +} +``` + +Take your time to analyze each conflict carefully. The goal is to create the most accurate and helpful API reference possible. +""" + + instructions_file = os.path.join(workspace, 'MERGE_INSTRUCTIONS.md') + with open(instructions_file, 'w') as f: + f.write(instructions) + + logger.info(f"Wrote context files to {workspace}") + + def _count_by_field(self, field: str) -> Dict[str, int]: + """Count conflicts by a specific field.""" + counts = {} + for conflict in self.conflicts: + value = getattr(conflict, field) + counts[value] = counts.get(value, 0) + 1 + return counts + + def _launch_claude_merge(self, workspace: str): + """ + Launch Claude Code to perform merge. + + Similar to enhance_skill_local.py approach. + """ + # Create a script that Claude will execute + script_path = os.path.join(workspace, 'merge_script.sh') + + script_content = f"""#!/bin/bash +# Automatic merge script for Claude Code + +cd "{workspace}" + +echo "šŸ“Š Analyzing conflicts..." +cat conflicts.json | head -20 + +echo "" +echo "šŸ“– Documentation APIs: $(cat docs_apis.json | grep -c '\"name\"')" +echo "šŸ’» Code APIs: $(cat code_apis.json | grep -c '\"name\"')" +echo "" +echo "Please review the conflicts and create merged_apis.json" +echo "Follow the instructions in MERGE_INSTRUCTIONS.md" +echo "" +echo "When done, save merged_apis.json and close this terminal." + +# Wait for user to complete merge +read -p "Press Enter when merge is complete..." +""" + + with open(script_path, 'w') as f: + f.write(script_content) + + os.chmod(script_path, 0o755) + + # Open new terminal with Claude Code + # Try different terminal emulators + terminals = [ + ['x-terminal-emulator', '-e'], + ['gnome-terminal', '--'], + ['xterm', '-e'], + ['konsole', '-e'] + ] + + for terminal_cmd in terminals: + try: + cmd = terminal_cmd + ['bash', script_path] + subprocess.Popen(cmd) + logger.info(f"Opened terminal with {terminal_cmd[0]}") + break + except FileNotFoundError: + continue + + # Wait for merge to complete + merged_file = os.path.join(workspace, 'merged_apis.json') + logger.info(f"Waiting for merged results at: {merged_file}") + logger.info("Close the terminal when done to continue...") + + # Poll for file existence + import time + timeout = 3600 # 1 hour max + elapsed = 0 + while not os.path.exists(merged_file) and elapsed < timeout: + time.sleep(5) + elapsed += 5 + + if not os.path.exists(merged_file): + raise TimeoutError("Claude merge timed out after 1 hour") + + def _read_merged_results(self, workspace: str) -> Dict[str, Any]: + """Read merged results from workspace.""" + merged_file = os.path.join(workspace, 'merged_apis.json') + + if not os.path.exists(merged_file): + raise FileNotFoundError(f"Merged results not found: {merged_file}") + + with open(merged_file, 'r') as f: + merged_data = json.load(f) + + return { + 'merge_mode': 'claude-enhanced', + **merged_data + } + + +def merge_sources(docs_data_path: str, + github_data_path: str, + output_path: str, + mode: str = 'rule-based') -> Dict[str, Any]: + """ + Merge documentation and GitHub data. + + Args: + docs_data_path: Path to documentation data JSON + github_data_path: Path to GitHub data JSON + output_path: Path to save merged output + mode: 'rule-based' or 'claude-enhanced' + + Returns: + Merged data dict + """ + # Load data + with open(docs_data_path, 'r') as f: + docs_data = json.load(f) + + with open(github_data_path, 'r') as f: + github_data = json.load(f) + + # Detect conflicts + detector = ConflictDetector(docs_data, github_data) + conflicts = detector.detect_all_conflicts() + + logger.info(f"Detected {len(conflicts)} conflicts") + + # Merge based on mode + if mode == 'claude-enhanced': + merger = ClaudeEnhancedMerger(docs_data, github_data, conflicts) + else: + merger = RuleBasedMerger(docs_data, github_data, conflicts) + + merged_data = merger.merge_all() + + # Save merged data + with open(output_path, 'w') as f: + json.dump(merged_data, f, indent=2, ensure_ascii=False) + + logger.info(f"Merged data saved to: {output_path}") + + return merged_data + + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser(description='Merge documentation and code sources') + parser.add_argument('docs_data', help='Path to documentation data JSON') + parser.add_argument('github_data', help='Path to GitHub data JSON') + parser.add_argument('--output', '-o', default='merged_data.json', help='Output file path') + parser.add_argument('--mode', '-m', choices=['rule-based', 'claude-enhanced'], + default='rule-based', help='Merge mode') + + args = parser.parse_args() + + merged = merge_sources(args.docs_data, args.github_data, args.output, args.mode) + + # Print summary + summary = merged.get('summary', {}) + print(f"\nāœ… Merge complete ({merged.get('merge_mode')})") + print(f" Total APIs: {summary.get('total_apis', 0)}") + print(f" Matched: {summary.get('matched', 0)}") + print(f" Docs only: {summary.get('docs_only', 0)}") + print(f" Code only: {summary.get('code_only', 0)}") + print(f" Conflicts: {summary.get('conflict', 0)}") + print(f"\nšŸ“„ Saved to: {args.output}")