807 lines
27 KiB
Python
807 lines
27 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Source Merger for Multi-Source Skills
|
|
|
|
Merges documentation and code data intelligently with GitHub insights:
|
|
- Rule-based merge: Fast, deterministic rules
|
|
- Claude-enhanced merge: AI-powered reconciliation
|
|
|
|
Handles conflicts and creates unified API reference with GitHub metadata.
|
|
|
|
Multi-layer architecture (Phase 3):
|
|
- Layer 1: C3.x code (ground truth)
|
|
- Layer 2: HTML docs (official intent)
|
|
- Layer 3: GitHub docs (README/CONTRIBUTING)
|
|
- Layer 4: GitHub insights (issues)
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
import subprocess
|
|
import tempfile
|
|
from typing import Any, Optional
|
|
|
|
from .conflict_detector import Conflict, ConflictDetector
|
|
|
|
# Import three-stream data classes (Phase 1)
|
|
try:
|
|
from .github_fetcher import CodeStream, DocsStream, InsightsStream, ThreeStreamData
|
|
except ImportError:
|
|
# Fallback if github_fetcher not available
|
|
ThreeStreamData = None
|
|
CodeStream = None
|
|
DocsStream = None
|
|
InsightsStream = None
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def categorize_issues_by_topic(
|
|
problems: list[dict], solutions: list[dict], topics: list[str]
|
|
) -> dict[str, list[dict]]:
|
|
"""
|
|
Categorize GitHub issues by topic keywords.
|
|
|
|
Args:
|
|
problems: List of common problems (open issues with 5+ comments)
|
|
solutions: List of known solutions (closed issues with comments)
|
|
topics: List of topic keywords to match against
|
|
|
|
Returns:
|
|
Dict mapping topic to relevant issues
|
|
"""
|
|
categorized = {topic: [] for topic in topics}
|
|
categorized["other"] = []
|
|
|
|
all_issues = problems + solutions
|
|
|
|
for issue in all_issues:
|
|
# Get searchable text
|
|
title = issue.get("title", "").lower()
|
|
labels = [label.lower() for label in issue.get("labels", [])]
|
|
text = f"{title} {' '.join(labels)}"
|
|
|
|
# Find best matching topic
|
|
matched_topic = None
|
|
max_matches = 0
|
|
|
|
for topic in topics:
|
|
# Count keyword matches
|
|
topic_keywords = topic.lower().split()
|
|
matches = sum(1 for keyword in topic_keywords if keyword in text)
|
|
|
|
if matches > max_matches:
|
|
max_matches = matches
|
|
matched_topic = topic
|
|
|
|
# Categorize by best match or 'other'
|
|
if matched_topic and max_matches > 0:
|
|
categorized[matched_topic].append(issue)
|
|
else:
|
|
categorized["other"].append(issue)
|
|
|
|
# Remove empty categories
|
|
return {k: v for k, v in categorized.items() if v}
|
|
|
|
|
|
def generate_hybrid_content(
|
|
api_data: dict,
|
|
github_docs: dict | None,
|
|
github_insights: dict | None,
|
|
conflicts: list[Conflict],
|
|
) -> dict[str, Any]:
|
|
"""
|
|
Generate hybrid content combining API data with GitHub context.
|
|
|
|
Args:
|
|
api_data: Merged API data
|
|
github_docs: GitHub docs stream (README, CONTRIBUTING, docs/*.md)
|
|
github_insights: GitHub insights stream (metadata, issues, labels)
|
|
conflicts: List of detected conflicts
|
|
|
|
Returns:
|
|
Hybrid content dict with enriched API reference
|
|
"""
|
|
hybrid = {"api_reference": api_data, "github_context": {}}
|
|
|
|
# Add GitHub documentation layer
|
|
if github_docs:
|
|
hybrid["github_context"]["docs"] = {
|
|
"readme": github_docs.get("readme"),
|
|
"contributing": github_docs.get("contributing"),
|
|
"docs_files_count": len(github_docs.get("docs_files", [])),
|
|
}
|
|
|
|
# Add GitHub insights layer
|
|
if github_insights:
|
|
metadata = github_insights.get("metadata", {})
|
|
hybrid["github_context"]["metadata"] = {
|
|
"stars": metadata.get("stars", 0),
|
|
"forks": metadata.get("forks", 0),
|
|
"language": metadata.get("language", "Unknown"),
|
|
"description": metadata.get("description", ""),
|
|
}
|
|
|
|
# Add issue insights
|
|
common_problems = github_insights.get("common_problems", [])
|
|
known_solutions = github_insights.get("known_solutions", [])
|
|
|
|
hybrid["github_context"]["issues"] = {
|
|
"common_problems_count": len(common_problems),
|
|
"known_solutions_count": len(known_solutions),
|
|
"top_problems": common_problems[:5], # Top 5 most-discussed
|
|
"top_solutions": known_solutions[:5],
|
|
}
|
|
|
|
hybrid["github_context"]["top_labels"] = github_insights.get("top_labels", [])
|
|
|
|
# Add conflict summary
|
|
hybrid["conflict_summary"] = {
|
|
"total_conflicts": len(conflicts),
|
|
"by_type": {},
|
|
"by_severity": {},
|
|
}
|
|
|
|
for conflict in conflicts:
|
|
# Count by type
|
|
conflict_type = conflict.type
|
|
hybrid["conflict_summary"]["by_type"][conflict_type] = (
|
|
hybrid["conflict_summary"]["by_type"].get(conflict_type, 0) + 1
|
|
)
|
|
|
|
# Count by severity
|
|
severity = conflict.severity
|
|
hybrid["conflict_summary"]["by_severity"][severity] = (
|
|
hybrid["conflict_summary"]["by_severity"].get(severity, 0) + 1
|
|
)
|
|
|
|
# Add GitHub issue links for relevant APIs
|
|
if github_insights:
|
|
hybrid["issue_links"] = _match_issues_to_apis(
|
|
api_data.get("apis", {}),
|
|
github_insights.get("common_problems", []),
|
|
github_insights.get("known_solutions", []),
|
|
)
|
|
|
|
return hybrid
|
|
|
|
|
|
def _match_issues_to_apis(
|
|
apis: dict[str, dict], problems: list[dict], solutions: list[dict]
|
|
) -> dict[str, list[dict]]:
|
|
"""
|
|
Match GitHub issues to specific APIs by keyword matching.
|
|
|
|
Args:
|
|
apis: Dict of API data keyed by name
|
|
problems: List of common problems
|
|
solutions: List of known solutions
|
|
|
|
Returns:
|
|
Dict mapping API names to relevant issues
|
|
"""
|
|
issue_links = {}
|
|
all_issues = problems + solutions
|
|
|
|
for api_name in apis:
|
|
# Extract searchable keywords from API name
|
|
api_keywords = api_name.lower().replace("_", " ").split(".")
|
|
|
|
matched_issues = []
|
|
for issue in all_issues:
|
|
title = issue.get("title", "").lower()
|
|
labels = [label.lower() for label in issue.get("labels", [])]
|
|
text = f"{title} {' '.join(labels)}"
|
|
|
|
# Check if any API keyword appears in issue
|
|
if any(keyword in text for keyword in api_keywords):
|
|
matched_issues.append(
|
|
{
|
|
"number": issue.get("number"),
|
|
"title": issue.get("title"),
|
|
"state": issue.get("state"),
|
|
"comments": issue.get("comments"),
|
|
}
|
|
)
|
|
|
|
if matched_issues:
|
|
issue_links[api_name] = matched_issues
|
|
|
|
return issue_links
|
|
|
|
|
|
class RuleBasedMerger:
|
|
"""
|
|
Rule-based API merger using deterministic rules with GitHub insights.
|
|
|
|
Multi-layer architecture (Phase 3):
|
|
- Layer 1: C3.x code (ground truth)
|
|
- Layer 2: HTML docs (official intent)
|
|
- Layer 3: GitHub docs (README/CONTRIBUTING)
|
|
- Layer 4: GitHub insights (issues)
|
|
|
|
Rules:
|
|
1. If API only in docs → Include with [DOCS_ONLY] tag
|
|
2. If API only in code → Include with [UNDOCUMENTED] tag
|
|
3. If both match perfectly → Include normally
|
|
4. If conflict → Include both versions with [CONFLICT] tag, prefer code signature
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
docs_data: dict,
|
|
github_data: dict,
|
|
conflicts: list[Conflict],
|
|
github_streams: Optional["ThreeStreamData"] = None,
|
|
):
|
|
"""
|
|
Initialize rule-based merger with GitHub streams support.
|
|
|
|
Args:
|
|
docs_data: Documentation scraper data (Layer 2: HTML docs)
|
|
github_data: GitHub scraper data (Layer 1: C3.x code)
|
|
conflicts: List of detected conflicts
|
|
github_streams: Optional ThreeStreamData with docs and insights (Layers 3-4)
|
|
"""
|
|
self.docs_data = docs_data
|
|
self.github_data = github_data
|
|
self.conflicts = conflicts
|
|
self.github_streams = github_streams
|
|
|
|
# Build conflict index for fast lookup
|
|
self.conflict_index = {c.api_name: c for c in conflicts}
|
|
|
|
# Extract APIs from both sources
|
|
detector = ConflictDetector(docs_data, github_data)
|
|
self.docs_apis = detector.docs_apis
|
|
self.code_apis = detector.code_apis
|
|
|
|
# Extract GitHub streams if available
|
|
self.github_docs = None
|
|
self.github_insights = None
|
|
if github_streams:
|
|
# Layer 3: GitHub docs
|
|
if github_streams.docs_stream:
|
|
self.github_docs = {
|
|
"readme": github_streams.docs_stream.readme,
|
|
"contributing": github_streams.docs_stream.contributing,
|
|
"docs_files": github_streams.docs_stream.docs_files,
|
|
}
|
|
|
|
# Layer 4: GitHub insights
|
|
if github_streams.insights_stream:
|
|
self.github_insights = {
|
|
"metadata": github_streams.insights_stream.metadata,
|
|
"common_problems": github_streams.insights_stream.common_problems,
|
|
"known_solutions": github_streams.insights_stream.known_solutions,
|
|
"top_labels": github_streams.insights_stream.top_labels,
|
|
}
|
|
|
|
def merge_all(self) -> dict[str, Any]:
|
|
"""
|
|
Merge all APIs using rule-based logic with GitHub insights (Phase 3).
|
|
|
|
Returns:
|
|
Dict containing merged API data with hybrid content
|
|
"""
|
|
logger.info("Starting rule-based merge with GitHub streams...")
|
|
|
|
merged_apis = {}
|
|
|
|
# Get all unique API names
|
|
all_api_names = set(self.docs_apis.keys()) | set(self.code_apis.keys())
|
|
|
|
for api_name in sorted(all_api_names):
|
|
merged_api = self._merge_single_api(api_name)
|
|
merged_apis[api_name] = merged_api
|
|
|
|
logger.info(f"Merged {len(merged_apis)} APIs")
|
|
|
|
# Build base result
|
|
merged_data = {
|
|
"merge_mode": "rule-based",
|
|
"apis": merged_apis,
|
|
"summary": {
|
|
"total_apis": len(merged_apis),
|
|
"docs_only": sum(1 for api in merged_apis.values() if api["status"] == "docs_only"),
|
|
"code_only": sum(1 for api in merged_apis.values() if api["status"] == "code_only"),
|
|
"matched": sum(1 for api in merged_apis.values() if api["status"] == "matched"),
|
|
"conflict": sum(1 for api in merged_apis.values() if api["status"] == "conflict"),
|
|
},
|
|
}
|
|
|
|
# Generate hybrid content if GitHub streams available (Phase 3)
|
|
if self.github_streams:
|
|
logger.info("Generating hybrid content with GitHub insights...")
|
|
hybrid_content = generate_hybrid_content(
|
|
api_data=merged_data,
|
|
github_docs=self.github_docs,
|
|
github_insights=self.github_insights,
|
|
conflicts=self.conflicts,
|
|
)
|
|
|
|
# Merge hybrid content into result
|
|
merged_data["github_context"] = hybrid_content.get("github_context", {})
|
|
merged_data["conflict_summary"] = hybrid_content.get("conflict_summary", {})
|
|
merged_data["issue_links"] = hybrid_content.get("issue_links", {})
|
|
|
|
logger.info(
|
|
f"Added GitHub context: {len(self.github_insights.get('common_problems', []))} problems, "
|
|
f"{len(self.github_insights.get('known_solutions', []))} solutions"
|
|
)
|
|
|
|
return merged_data
|
|
|
|
def _merge_single_api(self, api_name: str) -> dict[str, Any]:
|
|
"""
|
|
Merge a single API using rules.
|
|
|
|
Args:
|
|
api_name: Name of the API to merge
|
|
|
|
Returns:
|
|
Merged API dict
|
|
"""
|
|
in_docs = api_name in self.docs_apis
|
|
in_code = api_name in self.code_apis
|
|
has_conflict = api_name in self.conflict_index
|
|
|
|
# Rule 1: Only in docs
|
|
if in_docs and not in_code:
|
|
conflict = self.conflict_index.get(api_name)
|
|
return {
|
|
"name": api_name,
|
|
"status": "docs_only",
|
|
"source": "documentation",
|
|
"data": self.docs_apis[api_name],
|
|
"warning": "This API is documented but not found in codebase",
|
|
"conflict": conflict.__dict__ if conflict else None,
|
|
}
|
|
|
|
# Rule 2: Only in code
|
|
if in_code and not in_docs:
|
|
is_private = api_name.startswith("_")
|
|
conflict = self.conflict_index.get(api_name)
|
|
return {
|
|
"name": api_name,
|
|
"status": "code_only",
|
|
"source": "code",
|
|
"data": self.code_apis[api_name],
|
|
"warning": "This API exists in code but is not documented"
|
|
if not is_private
|
|
else "Internal/private API",
|
|
"conflict": conflict.__dict__ if conflict else None,
|
|
}
|
|
|
|
# Both exist - check for conflicts
|
|
docs_info = self.docs_apis[api_name]
|
|
code_info = self.code_apis[api_name]
|
|
|
|
# Rule 3: Both match perfectly (no conflict)
|
|
if not has_conflict:
|
|
return {
|
|
"name": api_name,
|
|
"status": "matched",
|
|
"source": "both",
|
|
"docs_data": docs_info,
|
|
"code_data": code_info,
|
|
"merged_signature": self._create_merged_signature(code_info, docs_info),
|
|
"merged_description": docs_info.get("docstring") or code_info.get("docstring"),
|
|
}
|
|
|
|
# Rule 4: Conflict exists - prefer code signature, keep docs description
|
|
conflict = self.conflict_index[api_name]
|
|
|
|
return {
|
|
"name": api_name,
|
|
"status": "conflict",
|
|
"source": "both",
|
|
"docs_data": docs_info,
|
|
"code_data": code_info,
|
|
"conflict": conflict.__dict__,
|
|
"resolution": "prefer_code_signature",
|
|
"merged_signature": self._create_merged_signature(code_info, docs_info),
|
|
"merged_description": docs_info.get("docstring") or code_info.get("docstring"),
|
|
"warning": conflict.difference,
|
|
}
|
|
|
|
def _create_merged_signature(self, code_info: dict, docs_info: dict) -> str:
|
|
"""
|
|
Create merged signature preferring code data.
|
|
|
|
Args:
|
|
code_info: API info from code
|
|
docs_info: API info from docs
|
|
|
|
Returns:
|
|
Merged signature string
|
|
"""
|
|
name = code_info.get("name", docs_info.get("name"))
|
|
params = code_info.get("parameters", docs_info.get("parameters", []))
|
|
return_type = code_info.get("return_type", docs_info.get("return_type"))
|
|
|
|
# Build parameter string
|
|
param_strs = []
|
|
for param in params:
|
|
param_str = param["name"]
|
|
if param.get("type_hint"):
|
|
param_str += f": {param['type_hint']}"
|
|
if param.get("default"):
|
|
param_str += f" = {param['default']}"
|
|
param_strs.append(param_str)
|
|
|
|
signature = f"{name}({', '.join(param_strs)})"
|
|
|
|
if return_type:
|
|
signature += f" -> {return_type}"
|
|
|
|
return signature
|
|
|
|
|
|
class ClaudeEnhancedMerger:
|
|
"""
|
|
Claude-enhanced API merger using local Claude Code with GitHub insights.
|
|
|
|
Opens Claude Code in a new terminal to intelligently reconcile conflicts.
|
|
Uses the same approach as enhance_skill_local.py.
|
|
|
|
Multi-layer architecture (Phase 3):
|
|
- Layer 1: C3.x code (ground truth)
|
|
- Layer 2: HTML docs (official intent)
|
|
- Layer 3: GitHub docs (README/CONTRIBUTING)
|
|
- Layer 4: GitHub insights (issues)
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
docs_data: dict,
|
|
github_data: dict,
|
|
conflicts: list[Conflict],
|
|
github_streams: Optional["ThreeStreamData"] = None,
|
|
):
|
|
"""
|
|
Initialize Claude-enhanced merger with GitHub streams support.
|
|
|
|
Args:
|
|
docs_data: Documentation scraper data (Layer 2: HTML docs)
|
|
github_data: GitHub scraper data (Layer 1: C3.x code)
|
|
conflicts: List of detected conflicts
|
|
github_streams: Optional ThreeStreamData with docs and insights (Layers 3-4)
|
|
"""
|
|
self.docs_data = docs_data
|
|
self.github_data = github_data
|
|
self.conflicts = conflicts
|
|
self.github_streams = github_streams
|
|
|
|
# First do rule-based merge as baseline
|
|
self.rule_merger = RuleBasedMerger(docs_data, github_data, conflicts, github_streams)
|
|
|
|
def merge_all(self) -> dict[str, Any]:
|
|
"""
|
|
Merge all APIs using Claude enhancement.
|
|
|
|
Returns:
|
|
Dict containing merged API data
|
|
"""
|
|
logger.info("Starting Claude-enhanced merge...")
|
|
|
|
# Create temporary workspace
|
|
workspace_dir = self._create_workspace()
|
|
|
|
# Launch Claude Code for enhancement
|
|
logger.info("Launching Claude Code for intelligent merging...")
|
|
logger.info("Claude will analyze conflicts and create reconciled API reference")
|
|
|
|
try:
|
|
self._launch_claude_merge(workspace_dir)
|
|
|
|
# Read enhanced results
|
|
merged_data = self._read_merged_results(workspace_dir)
|
|
|
|
logger.info("Claude-enhanced merge complete")
|
|
return merged_data
|
|
|
|
except Exception as e:
|
|
logger.error(f"Claude enhancement failed: {e}")
|
|
logger.info("Falling back to rule-based merge")
|
|
return self.rule_merger.merge_all()
|
|
|
|
def _create_workspace(self) -> str:
|
|
"""
|
|
Create temporary workspace with merge context.
|
|
|
|
Returns:
|
|
Path to workspace directory
|
|
"""
|
|
workspace = tempfile.mkdtemp(prefix="skill_merge_")
|
|
logger.info(f"Created merge workspace: {workspace}")
|
|
|
|
# Write context files for Claude
|
|
self._write_context_files(workspace)
|
|
|
|
return workspace
|
|
|
|
def _write_context_files(self, workspace: str):
|
|
"""Write context files for Claude to analyze."""
|
|
|
|
# 1. Write conflicts summary
|
|
conflicts_file = os.path.join(workspace, "conflicts.json")
|
|
with open(conflicts_file, "w") as f:
|
|
json.dump(
|
|
{
|
|
"conflicts": [c.__dict__ for c in self.conflicts],
|
|
"summary": {
|
|
"total": len(self.conflicts),
|
|
"by_type": self._count_by_field("type"),
|
|
"by_severity": self._count_by_field("severity"),
|
|
},
|
|
},
|
|
f,
|
|
indent=2,
|
|
)
|
|
|
|
# 2. Write documentation APIs
|
|
docs_apis_file = os.path.join(workspace, "docs_apis.json")
|
|
detector = ConflictDetector(self.docs_data, self.github_data)
|
|
with open(docs_apis_file, "w") as f:
|
|
json.dump(detector.docs_apis, f, indent=2)
|
|
|
|
# 3. Write code APIs
|
|
code_apis_file = os.path.join(workspace, "code_apis.json")
|
|
with open(code_apis_file, "w") as f:
|
|
json.dump(detector.code_apis, f, indent=2)
|
|
|
|
# 4. Write merge instructions for Claude
|
|
instructions = """# API Merge Task
|
|
|
|
You are merging API documentation from two sources:
|
|
1. Official documentation (user-facing)
|
|
2. Source code analysis (implementation reality)
|
|
|
|
## Context Files:
|
|
- `conflicts.json` - All detected conflicts between sources
|
|
- `docs_apis.json` - APIs from documentation
|
|
- `code_apis.json` - APIs from source code
|
|
|
|
## Your Task:
|
|
For each conflict, reconcile the differences intelligently:
|
|
|
|
1. **Prefer code signatures as source of truth**
|
|
- Use actual parameter names, types, defaults from code
|
|
- Code is what actually runs, docs might be outdated
|
|
|
|
2. **Keep documentation descriptions**
|
|
- Docs are user-friendly, code comments might be technical
|
|
- Keep the docs' explanation of what the API does
|
|
|
|
3. **Add implementation notes for discrepancies**
|
|
- If docs differ from code, explain the difference
|
|
- Example: "⚠️ The `snap` parameter exists in code but is not documented"
|
|
|
|
4. **Flag missing APIs clearly**
|
|
- Missing in docs → Add [UNDOCUMENTED] tag
|
|
- Missing in code → Add [REMOVED] or [DOCS_ERROR] tag
|
|
|
|
5. **Create unified API reference**
|
|
- One definitive signature per API
|
|
- Clear warnings about conflicts
|
|
- Implementation notes where helpful
|
|
|
|
## Output Format:
|
|
Create `merged_apis.json` with this structure:
|
|
|
|
```json
|
|
{
|
|
"apis": {
|
|
"API.name": {
|
|
"signature": "final_signature_here",
|
|
"parameters": [...],
|
|
"return_type": "type",
|
|
"description": "user-friendly description",
|
|
"implementation_notes": "Any discrepancies or warnings",
|
|
"source": "both|docs_only|code_only",
|
|
"confidence": "high|medium|low"
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
Take your time to analyze each conflict carefully. The goal is to create the most accurate and helpful API reference possible.
|
|
"""
|
|
|
|
instructions_file = os.path.join(workspace, "MERGE_INSTRUCTIONS.md")
|
|
with open(instructions_file, "w") as f:
|
|
f.write(instructions)
|
|
|
|
logger.info(f"Wrote context files to {workspace}")
|
|
|
|
def _count_by_field(self, field: str) -> dict[str, int]:
|
|
"""Count conflicts by a specific field."""
|
|
counts = {}
|
|
for conflict in self.conflicts:
|
|
value = getattr(conflict, field)
|
|
counts[value] = counts.get(value, 0) + 1
|
|
return counts
|
|
|
|
def _launch_claude_merge(self, workspace: str):
|
|
"""
|
|
Launch Claude Code to perform merge.
|
|
|
|
Similar to enhance_skill_local.py approach.
|
|
"""
|
|
# Create a script that Claude will execute
|
|
script_path = os.path.join(workspace, "merge_script.sh")
|
|
|
|
script_content = f"""#!/bin/bash
|
|
# Automatic merge script for Claude Code
|
|
|
|
cd "{workspace}"
|
|
|
|
echo "📊 Analyzing conflicts..."
|
|
cat conflicts.json | head -20
|
|
|
|
echo ""
|
|
echo "📖 Documentation APIs: $(cat docs_apis.json | grep -c '\"name\"')"
|
|
echo "💻 Code APIs: $(cat code_apis.json | grep -c '\"name\"')"
|
|
echo ""
|
|
echo "Please review the conflicts and create merged_apis.json"
|
|
echo "Follow the instructions in MERGE_INSTRUCTIONS.md"
|
|
echo ""
|
|
echo "When done, save merged_apis.json and close this terminal."
|
|
|
|
# Wait for user to complete merge
|
|
read -p "Press Enter when merge is complete..."
|
|
"""
|
|
|
|
with open(script_path, "w") as f:
|
|
f.write(script_content)
|
|
|
|
os.chmod(script_path, 0o755)
|
|
|
|
# Open new terminal with Claude Code
|
|
# Try different terminal emulators
|
|
terminals = [
|
|
["x-terminal-emulator", "-e"],
|
|
["gnome-terminal", "--"],
|
|
["xterm", "-e"],
|
|
["konsole", "-e"],
|
|
]
|
|
|
|
for terminal_cmd in terminals:
|
|
try:
|
|
cmd = terminal_cmd + ["bash", script_path]
|
|
subprocess.Popen(cmd)
|
|
logger.info(f"Opened terminal with {terminal_cmd[0]}")
|
|
break
|
|
except FileNotFoundError:
|
|
continue
|
|
|
|
# Wait for merge to complete
|
|
merged_file = os.path.join(workspace, "merged_apis.json")
|
|
logger.info(f"Waiting for merged results at: {merged_file}")
|
|
logger.info("Close the terminal when done to continue...")
|
|
|
|
# Poll for file existence
|
|
import time
|
|
|
|
timeout = 3600 # 1 hour max
|
|
elapsed = 0
|
|
while not os.path.exists(merged_file) and elapsed < timeout:
|
|
time.sleep(5)
|
|
elapsed += 5
|
|
|
|
if not os.path.exists(merged_file):
|
|
raise TimeoutError("Claude merge timed out after 1 hour")
|
|
|
|
def _read_merged_results(self, workspace: str) -> dict[str, Any]:
|
|
"""Read merged results from workspace."""
|
|
merged_file = os.path.join(workspace, "merged_apis.json")
|
|
|
|
if not os.path.exists(merged_file):
|
|
raise FileNotFoundError(f"Merged results not found: {merged_file}")
|
|
|
|
with open(merged_file) as f:
|
|
merged_data = json.load(f)
|
|
|
|
return {"merge_mode": "claude-enhanced", **merged_data}
|
|
|
|
|
|
def merge_sources(
|
|
docs_data_path: str,
|
|
github_data_path: str,
|
|
output_path: str,
|
|
mode: str = "rule-based",
|
|
github_streams: Optional["ThreeStreamData"] = None,
|
|
) -> dict[str, Any]:
|
|
"""
|
|
Merge documentation and GitHub data with optional GitHub streams (Phase 3).
|
|
|
|
Multi-layer architecture:
|
|
- Layer 1: C3.x code (ground truth)
|
|
- Layer 2: HTML docs (official intent)
|
|
- Layer 3: GitHub docs (README/CONTRIBUTING) - from github_streams
|
|
- Layer 4: GitHub insights (issues) - from github_streams
|
|
|
|
Args:
|
|
docs_data_path: Path to documentation data JSON
|
|
github_data_path: Path to GitHub data JSON
|
|
output_path: Path to save merged output
|
|
mode: 'rule-based' or 'claude-enhanced'
|
|
github_streams: Optional ThreeStreamData with docs and insights
|
|
|
|
Returns:
|
|
Merged data dict with hybrid content
|
|
"""
|
|
# Load data
|
|
with open(docs_data_path) as f:
|
|
docs_data = json.load(f)
|
|
|
|
with open(github_data_path) as f:
|
|
github_data = json.load(f)
|
|
|
|
# Detect conflicts
|
|
detector = ConflictDetector(docs_data, github_data)
|
|
conflicts = detector.detect_all_conflicts()
|
|
|
|
logger.info(f"Detected {len(conflicts)} conflicts")
|
|
|
|
# Log GitHub streams availability
|
|
if github_streams:
|
|
logger.info("GitHub streams available for multi-layer merge")
|
|
if github_streams.docs_stream:
|
|
logger.info(
|
|
f" - Docs stream: README, {len(github_streams.docs_stream.docs_files)} docs files"
|
|
)
|
|
if github_streams.insights_stream:
|
|
problems = len(github_streams.insights_stream.common_problems)
|
|
solutions = len(github_streams.insights_stream.known_solutions)
|
|
logger.info(f" - Insights stream: {problems} problems, {solutions} solutions")
|
|
|
|
# Merge based on mode
|
|
if mode == "claude-enhanced":
|
|
merger = ClaudeEnhancedMerger(docs_data, github_data, conflicts, github_streams)
|
|
else:
|
|
merger = RuleBasedMerger(docs_data, github_data, conflicts, github_streams)
|
|
|
|
merged_data = merger.merge_all()
|
|
|
|
# Save merged data
|
|
with open(output_path, "w") as f:
|
|
json.dump(merged_data, f, indent=2, ensure_ascii=False)
|
|
|
|
logger.info(f"Merged data saved to: {output_path}")
|
|
|
|
return merged_data
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description="Merge documentation and code sources")
|
|
parser.add_argument("docs_data", help="Path to documentation data JSON")
|
|
parser.add_argument("github_data", help="Path to GitHub data JSON")
|
|
parser.add_argument("--output", "-o", default="merged_data.json", help="Output file path")
|
|
parser.add_argument(
|
|
"--mode",
|
|
"-m",
|
|
choices=["rule-based", "claude-enhanced"],
|
|
default="rule-based",
|
|
help="Merge mode",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
merged = merge_sources(args.docs_data, args.github_data, args.output, args.mode)
|
|
|
|
# Print summary
|
|
summary = merged.get("summary", {})
|
|
print(f"\n✅ Merge complete ({merged.get('merge_mode')})")
|
|
print(f" Total APIs: {summary.get('total_apis', 0)}")
|
|
print(f" Matched: {summary.get('matched', 0)}")
|
|
print(f" Docs only: {summary.get('docs_only', 0)}")
|
|
print(f" Code only: {summary.get('code_only', 0)}")
|
|
print(f" Conflicts: {summary.get('conflict', 0)}")
|
|
print(f"\n📄 Saved to: {args.output}")
|