#!/usr/bin/env python3 """ Router Skill Generator with GitHub Integration (Phase 4) Creates a router/hub skill that intelligently directs queries to specialized sub-skills. Integrates GitHub insights (issues, metadata) for enhanced topic detection and routing. Phase 4 enhancements: - Enhanced topic definition using GitHub issue labels - Router template with repository stats and top issues - Sub-skill templates with "Common Issues" section - GitHub issue links for context """ import argparse import json import sys from pathlib import Path from typing import Any, Optional # Import three-stream data classes (Phase 1) try: from .github_fetcher import DocsStream, InsightsStream, ThreeStreamData from .markdown_cleaner import MarkdownCleaner from .merge_sources import categorize_issues_by_topic except ImportError: # Fallback if github_fetcher not available ThreeStreamData = None DocsStream = None InsightsStream = None categorize_issues_by_topic = None class RouterGenerator: """Generates router skills that direct to specialized sub-skills with GitHub integration""" def __init__( self, config_paths: list[str], router_name: str = None, github_streams: Optional["ThreeStreamData"] = None, ): """ Initialize router generator with optional GitHub streams. Args: config_paths: Paths to sub-skill config files router_name: Optional router skill name github_streams: Optional ThreeStreamData with docs and insights """ self.config_paths = [Path(p) for p in config_paths] self.configs = [self.load_config(p) for p in self.config_paths] self.router_name = router_name or self.infer_router_name() self.base_config = self.configs[0] # Use first as template self.github_streams = github_streams # Extract GitHub data if available self.github_metadata = None self.github_docs = None self.github_issues = None if github_streams and github_streams.insights_stream: self.github_metadata = github_streams.insights_stream.metadata self.github_issues = { "common_problems": github_streams.insights_stream.common_problems, "known_solutions": github_streams.insights_stream.known_solutions, "top_labels": github_streams.insights_stream.top_labels, } if github_streams and github_streams.docs_stream: self.github_docs = { "readme": github_streams.docs_stream.readme, "contributing": github_streams.docs_stream.contributing, } def load_config(self, path: Path) -> dict[str, Any]: """Load a config file""" try: with open(path) as f: return json.load(f) except Exception as e: print(f"❌ Error loading {path}: {e}") sys.exit(1) def infer_router_name(self) -> str: """Infer router name from sub-skill names""" # Find common prefix names = [cfg["name"] for cfg in self.configs] if not names: return "router" # Get common prefix before first dash first_name = names[0] if "-" in first_name: return first_name.split("-")[0] return first_name def extract_routing_keywords(self) -> dict[str, list[str]]: """ Extract keywords for routing to each skill (Phase 4 enhanced). Enhancement: Weight GitHub issue labels 2x in topic scoring. Uses C3.x patterns, examples, and GitHub insights for better routing. """ routing = {} for config in self.configs: name = config["name"] keywords = [] # Extract from categories (base weight: 1x) if "categories" in config: keywords.extend(config["categories"].keys()) # Extract from name (part after dash) if "-" in name: skill_topic = name.split("-", 1)[1] keywords.append(skill_topic) # Phase 4: Add GitHub issue labels (weight 2x by including twice) if self.github_issues: # Get top labels related to this skill topic top_labels = self.github_issues.get("top_labels", []) skill_keywords = set(keywords) for label_info in top_labels[:10]: # Top 10 labels label = label_info["label"].lower() # Check if label relates to any skill keyword if any( keyword.lower() in label or label in keyword.lower() for keyword in skill_keywords ): # Add twice for 2x weight keywords.append(label) keywords.append(label) # NEW: Extract skill-specific labels from individual issues skill_keywords_set = set(keywords) skill_specific_labels = self._extract_skill_specific_labels(name, skill_keywords_set) for label in skill_specific_labels: keywords.append(label) keywords.append(label) # 2x weight routing[name] = keywords return routing def _extract_skill_specific_labels(self, _skill_name: str, skill_keywords: set) -> list[str]: """ Extract labels from GitHub issues that match this specific skill. Scans all common_problems and known_solutions for issues whose labels match the skill's keywords, then extracts ALL labels from those issues. This provides richer, skill-specific routing keywords. Args: skill_name: Name of the skill skill_keywords: Set of keywords already associated with the skill Returns: List of skill-specific labels (excluding generic ones) """ if not self.github_issues: return [] common_problems = self.github_issues.get("common_problems", []) known_solutions = self.github_issues.get("known_solutions", []) all_issues = common_problems + known_solutions matching_labels = set() for issue in all_issues: issue_labels = issue.get("labels", []) issue_labels_lower = [label.lower() for label in issue_labels] # Check if this issue relates to the skill has_match = any( keyword.lower() in label or label in keyword.lower() for keyword in skill_keywords for label in issue_labels_lower ) if has_match: # Add ALL labels from this matching issue for label in issue_labels_lower: # Skip generic labels that don't add routing value if label not in [ "bug", "enhancement", "question", "help wanted", "good first issue", "documentation", "duplicate", ]: matching_labels.add(label) return list(matching_labels) def _generate_frontmatter(self, _routing_keywords: dict[str, list[str]]) -> str: """ Generate YAML frontmatter compliant with agentskills.io spec. Required fields: - name: router name (1-64 chars, lowercase-hyphen) - description: when to use (1-1024 chars, keyword-rich) Optional fields: - license: MIT (from config or default) - compatibility: Python version, dependencies """ # Build comprehensive description from all sub-skills all_topics = [] for config in self.configs: desc = config.get("description", "") # Extract key topics from description (simple extraction) topics = [word.strip() for word in desc.split(",") if word.strip()] all_topics.extend(topics[:2]) # Max 2 topics per skill # Create keyword-rich description unique_topics = list(dict.fromkeys(all_topics))[:7] # Top 7 unique topics if unique_topics: topics_str = ", ".join(unique_topics) description = ( f"{self.router_name.title()} framework. Use when working with: {topics_str}" ) else: description = ( f"Use when working with {self.router_name.title()} development and programming" ) # Truncate to 200 chars for performance (agentskills.io recommendation) if len(description) > 200: description = description[:197] + "..." # Extract license and compatibility license_info = "MIT" compatibility = "See sub-skills for specific requirements" # Try to get language-specific compatibility if GitHub metadata available if self.github_metadata: language = self.github_metadata.get("language", "") compatibility_map = { "Python": f"Python 3.10+, requires {self.router_name} package", "JavaScript": f"Node.js 18+, requires {self.router_name} package", "TypeScript": f"Node.js 18+, TypeScript 5+, requires {self.router_name} package", "Go": f"Go 1.20+, requires {self.router_name} package", "Rust": f"Rust 1.70+, requires {self.router_name} package", "Java": f"Java 17+, requires {self.router_name} package", } if language in compatibility_map: compatibility = compatibility_map[language] # Try to extract license if isinstance(self.github_metadata.get("license"), dict): license_info = self.github_metadata["license"].get("name", "MIT") frontmatter = f"""--- name: {self.router_name} description: {description} license: {license_info} compatibility: {compatibility} ---""" return frontmatter def _extract_clean_readme_section(self, readme: str) -> str: """ Extract and clean README quick start section. Args: readme: Full README content Returns: Cleaned quick start section (HTML removed, properly truncated) """ cleaner = MarkdownCleaner() # Extract first meaningful section (1500 chars soft limit - extends for complete code blocks) quick_start = cleaner.extract_first_section(readme, max_chars=1500) # Additional validation if len(quick_start) < 50: # Too short, probably just title # Try to get more content quick_start = cleaner.extract_first_section(readme, max_chars=2000) return quick_start def _extract_topic_from_skill(self, skill_name: str) -> str: """ Extract readable topic from skill name. Examples: - "fastmcp-oauth" -> "OAuth authentication" - "react-hooks" -> "React hooks" - "django-orm" -> "Django ORM" Args: skill_name: Skill name (e.g., "fastmcp-oauth") Returns: Readable topic string """ # Remove router name prefix if skill_name.startswith(f"{self.router_name}-"): topic = skill_name[len(self.router_name) + 1 :] else: topic = skill_name # Capitalize and add context topic = topic.replace("-", " ").title() # Add common suffixes for context topic_map = { "oauth": "OAuth authentication", "auth": "authentication", "async": "async patterns", "api": "API integration", "orm": "ORM queries", "hooks": "hooks", "routing": "routing", "testing": "testing", "2d": "2D development", "3d": "3D development", "scripting": "scripting", "physics": "physics", } topic_lower = topic.lower() for key, value in topic_map.items(): if key in topic_lower: return value return topic def _generate_dynamic_examples(self, routing_keywords: dict[str, list[str]]) -> str: """ Generate examples dynamically from actual sub-skill names and keywords. Creates 2-3 realistic examples showing: 1. Single skill activation 2. Different skill activation 3. Complex query routing (if 2+ skills) Args: routing_keywords: Dictionary mapping skill names to keywords Returns: Formatted examples section """ examples = [] # Get list of sub-skills skill_names = list(routing_keywords.keys()) if len(skill_names) == 0: return "" # Example 1: Single skill activation (first sub-skill) if len(skill_names) >= 1: first_skill = skill_names[0] first_keywords = routing_keywords[first_skill][:2] # Top 2 keywords # Extract topic from skill name topic = self._extract_topic_from_skill(first_skill) keyword = first_keywords[0] if first_keywords else topic examples.append( f'**Q:** "How do I implement {keyword}?"\n**A:** Activates {first_skill} skill' ) # Example 2: Different skill (second sub-skill if available) if len(skill_names) >= 2: second_skill = skill_names[1] second_keywords = routing_keywords[second_skill][:2] topic = self._extract_topic_from_skill(second_skill) keyword = second_keywords[0] if second_keywords else topic examples.append( f'**Q:** "Working with {keyword} in {self.router_name.title()}"\n**A:** Activates {second_skill} skill' ) # Example 3: Multi-skill activation (if 2+ skills) if len(skill_names) >= 2: skill_1 = skill_names[0] skill_2 = skill_names[1] topic_1 = self._extract_topic_from_skill(skill_1) topic_2 = self._extract_topic_from_skill(skill_2) examples.append( f'**Q:** "Combining {topic_1} with {topic_2}"\n**A:** Activates {skill_1} + {skill_2} skills' ) return "\n\n".join(examples) def _generate_examples_from_github(self, routing_keywords: dict[str, list[str]]) -> str: """ Generate examples from real GitHub issue titles. Uses actual user questions from GitHub issues to create realistic examples. Matches issues to skills based on labels for relevance. Fallback to keyword-based examples if no GitHub data available. Args: routing_keywords: Dictionary mapping skill names to keywords Returns: Formatted examples section with real user questions """ if not self.github_issues: return self._generate_dynamic_examples(routing_keywords) examples = [] common_problems = self.github_issues.get("common_problems", []) if not common_problems: return self._generate_dynamic_examples(routing_keywords) # Match issues to skills based on labels (generate up to 3 examples) for skill_name, keywords in list(routing_keywords.items())[:3]: skill_keywords_lower = [k.lower() for k in keywords] matched_issue = None # Find first issue matching this skill's keywords for issue in common_problems: issue_labels = [label.lower() for label in issue.get("labels", [])] if any(label in skill_keywords_lower for label in issue_labels): matched_issue = issue common_problems.remove(issue) # Don't reuse same issue break if matched_issue: title = matched_issue.get("title", "") question = self._convert_issue_to_question(title) examples.append(f'**Q:** "{question}"\n**A:** Activates {skill_name} skill') else: # Fallback to keyword-based example for this skill topic = self._extract_topic_from_skill(skill_name) keyword = keywords[0] if keywords else topic examples.append( f'**Q:** "Working with {keyword} in {self.router_name.title()}"\n' f"**A:** Activates {skill_name} skill" ) return ( "\n\n".join(examples) if examples else self._generate_dynamic_examples(routing_keywords) ) def _convert_issue_to_question(self, issue_title: str) -> str: """ Convert GitHub issue title to natural question format. Examples: - "OAuth fails on redirect" → "How do I fix OAuth redirect failures?" - "ApiKey Header documentation" → "How do I use ApiKey Header?" - "Add WebSocket support" → "How do I handle WebSocket support?" Args: issue_title: Raw GitHub issue title Returns: Natural question format suitable for examples """ title_lower = issue_title.lower() # Pattern 1: Error/Failure issues if "fail" in title_lower or "error" in title_lower or "issue" in title_lower: cleaned = issue_title.replace(" fails", "").replace(" errors", "").replace(" issue", "") return f"How do I fix {cleaned.lower()}?" # Pattern 2: Documentation requests if "documentation" in title_lower or "docs" in title_lower: cleaned = issue_title.replace(" documentation", "").replace(" docs", "") return f"How do I use {cleaned.lower()}?" # Pattern 3: Feature requests if title_lower.startswith("add ") or title_lower.startswith("added "): feature = issue_title.replace("Add ", "").replace("Added ", "") return f"How do I implement {feature.lower()}?" # Default: Generic question return f"How do I handle {issue_title.lower()}?" def _extract_common_patterns(self) -> list[dict[str, str]]: """ Extract problem-solution patterns from closed GitHub issues. Analyzes closed issues (known_solutions) to identify common patterns that users encountered and resolved. These patterns are shown in the Common Patterns section of the router skill. Returns: List of pattern dicts with 'problem', 'solution', 'issue_number' """ if not self.github_issues: return [] known_solutions = self.github_issues.get("known_solutions", []) if not known_solutions: return [] patterns = [] # Top 5 closed issues with most engagement (comments indicate usefulness) top_solutions = sorted(known_solutions, key=lambda x: x.get("comments", 0), reverse=True)[ :5 ] for issue in top_solutions: title = issue.get("title", "") number = issue.get("number", 0) problem, solution = self._parse_issue_pattern(title) patterns.append({"problem": problem, "solution": solution, "issue_number": number}) return patterns def _parse_issue_pattern(self, issue_title: str) -> tuple: """ Parse issue title to extract problem-solution pattern. Analyzes the structure of closed issue titles to infer the problem and solution pattern. Common patterns include fixes, additions, and resolutions. Examples: - "Fixed OAuth redirect" → ("OAuth redirect not working", "See fix implementation") - "Added API key support" → ("Missing API key support", "Use API key support feature") - "Resolved timeout errors" → ("Timeout errors issue", "See resolution approach") Args: issue_title: Title of closed GitHub issue Returns: Tuple of (problem_description, solution_hint) """ title_lower = issue_title.lower() # Pattern 1: "Fixed X" → "X not working" / "See fix" if title_lower.startswith("fixed ") or title_lower.startswith("fix "): problem_text = issue_title.replace("Fixed ", "").replace("Fix ", "") return (f"{problem_text} not working", "See fix implementation details") # Pattern 2: "Resolved X" → "X issue" / "See resolution" if title_lower.startswith("resolved ") or title_lower.startswith("resolve "): problem_text = issue_title.replace("Resolved ", "").replace("Resolve ", "") return (f"{problem_text} issue", "See resolution approach") # Pattern 3: "Added X" → "Missing X" / "Use X" if title_lower.startswith("added ") or title_lower.startswith("add "): feature_text = issue_title.replace("Added ", "").replace("Add ", "") return (f"Missing {feature_text}", f"Use {feature_text} feature") # Default: Use title as-is return (issue_title, "See issue for solution details") def _detect_framework(self) -> str | None: """ Detect framework from router name and GitHub metadata. Identifies common frameworks (fastapi, django, react, etc.) from router name or repository description. Used to provide framework-specific hello world templates when README lacks code examples. Returns: Framework identifier (e.g., 'fastapi', 'django') or None if unknown """ router_lower = self.router_name.lower() framework_keywords = { "fastapi": "fastapi", "django": "django", "flask": "flask", "react": "react", "vue": "vue", "express": "express", "fastmcp": "fastmcp", "mcp": "fastmcp", } # Check router name first for keyword, framework in framework_keywords.items(): if keyword in router_lower: return framework # Check GitHub description if available if self.github_metadata: description = self.github_metadata.get("description", "").lower() for keyword, framework in framework_keywords.items(): if keyword in description: return framework return None def _get_framework_hello_world(self, framework: str) -> str: """ Get framework-specific hello world template. Provides basic installation + hello world code for common frameworks. Used as fallback when README doesn't contain code examples. Args: framework: Framework identifier (e.g., 'fastapi', 'react') Returns: Formatted Quick Start section with install + hello world code """ templates = { "fastapi": """## Quick Start ```bash pip install fastapi uvicorn ``` ```python from fastapi import FastAPI app = FastAPI() @app.get("/") def read_root(): return {"Hello": "World"} # Run: uvicorn main:app --reload ``` """, "fastmcp": """## Quick Start ```bash pip install fastmcp ``` ```python from fastmcp import FastMCP mcp = FastMCP("My Server") @mcp.tool() def greet(name: str) -> str: return f"Hello, {name}!" ``` """, "django": """## Quick Start ```bash pip install django django-admin startproject mysite cd mysite python manage.py runserver ``` Visit http://127.0.0.1:8000/ to see your Django app. """, "react": """## Quick Start ```bash npx create-react-app my-app cd my-app npm start ``` ```jsx function App() { return