#!/usr/bin/env python3 """ Unified Skill Builder Generates final skill structure from merged multi-source data: - SKILL.md with merged APIs and conflict warnings - references/ with organized content by source - Inline conflict markers (โš ๏ธ) - Separate conflicts summary section Supports mixed sources (documentation, GitHub, PDF) and highlights discrepancies transparently. """ import json import logging import os import shutil from pathlib import Path logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class UnifiedSkillBuilder: """ Builds unified skill from multi-source data. """ def __init__( self, config: dict, scraped_data: dict, merged_data: dict | None = None, conflicts: list | None = None, cache_dir: str | None = None, ): """ Initialize skill builder. Args: config: Unified config dict scraped_data: Dict of scraped data by source type merged_data: Merged API data (if conflicts were resolved) conflicts: List of detected conflicts cache_dir: Optional cache directory for intermediate files """ self.config = config self.scraped_data = scraped_data self.merged_data = merged_data self.conflicts = conflicts or [] self.cache_dir = cache_dir self.name = config["name"] self.description = config["description"] self.skill_dir = f"output/{self.name}" # Create directories os.makedirs(self.skill_dir, exist_ok=True) os.makedirs(f"{self.skill_dir}/references", exist_ok=True) os.makedirs(f"{self.skill_dir}/scripts", exist_ok=True) os.makedirs(f"{self.skill_dir}/assets", exist_ok=True) def build(self): """Build complete skill structure.""" logger.info(f"Building unified skill: {self.name}") # Generate main SKILL.md self._generate_skill_md() # Generate reference files by source self._generate_references() # Generate conflicts report (if any) if self.conflicts: self._generate_conflicts_report() logger.info(f"โœ… Unified skill built: {self.skill_dir}/") def _load_source_skill_mds(self) -> dict[str, str]: """Load standalone SKILL.md files from each source. Returns: Dict mapping source type to SKILL.md content e.g., {'documentation': '...', 'github': '...', 'pdf': '...'} """ skill_mds = {} # Determine base directory for source SKILL.md files sources_dir = Path(self.cache_dir) / "sources" if self.cache_dir else Path("output") # Load documentation SKILL.md docs_skill_path = sources_dir / f"{self.name}_docs" / "SKILL.md" if docs_skill_path.exists(): try: skill_mds["documentation"] = docs_skill_path.read_text(encoding="utf-8") logger.debug( f"Loaded documentation SKILL.md ({len(skill_mds['documentation'])} chars)" ) except OSError as e: logger.warning(f"Failed to read documentation SKILL.md: {e}") # Load ALL GitHub sources (multi-source support) github_sources = [] for github_dir in sources_dir.glob(f"{self.name}_github_*"): github_skill_path = github_dir / "SKILL.md" if github_skill_path.exists(): try: content = github_skill_path.read_text(encoding="utf-8") github_sources.append(content) logger.debug( f"Loaded GitHub SKILL.md from {github_dir.name} ({len(content)} chars)" ) except OSError as e: logger.warning(f"Failed to read GitHub SKILL.md from {github_dir.name}: {e}") if github_sources: # Concatenate all GitHub sources with separator skill_mds["github"] = "\n\n---\n\n".join(github_sources) logger.debug(f"Combined {len(github_sources)} GitHub SKILL.md files") # Load ALL PDF sources (multi-source support) pdf_sources = [] for pdf_dir in sources_dir.glob(f"{self.name}_pdf_*"): pdf_skill_path = pdf_dir / "SKILL.md" if pdf_skill_path.exists(): try: content = pdf_skill_path.read_text(encoding="utf-8") pdf_sources.append(content) logger.debug(f"Loaded PDF SKILL.md from {pdf_dir.name} ({len(content)} chars)") except OSError as e: logger.warning(f"Failed to read PDF SKILL.md from {pdf_dir.name}: {e}") if pdf_sources: # Concatenate all PDF sources with separator skill_mds["pdf"] = "\n\n---\n\n".join(pdf_sources) logger.debug(f"Combined {len(pdf_sources)} PDF SKILL.md files") logger.info(f"Loaded {len(skill_mds)} source SKILL.md files") return skill_mds def _parse_skill_md_sections(self, skill_md: str) -> dict[str, str]: """Parse SKILL.md into sections by ## headers. Args: skill_md: Full SKILL.md content Returns: Dict mapping section name to content e.g., {'When to Use': '...', 'Quick Reference': '...'} """ sections = {} current_section = None current_content = [] lines = skill_md.split("\n") for line in lines: # Detect section header (## Header) if line.startswith("## "): # Save previous section if current_section: sections[current_section] = "\n".join(current_content).strip() # Start new section current_section = line[3:].strip() # Remove emoji and markdown formatting current_section = current_section.split("](")[0] # Remove links for emoji in [ "๐Ÿ“š", "๐Ÿ—๏ธ", "โš ๏ธ", "๐Ÿ”ง", "๐Ÿ“–", "๐Ÿ’ก", "๐ŸŽฏ", "๐Ÿ“Š", "๐Ÿ”", "โš™๏ธ", "๐Ÿงช", "๐Ÿ“", "๐Ÿ—‚๏ธ", "๐Ÿ“", "โšก", ]: current_section = current_section.replace(emoji, "").strip() current_content = [] elif current_section: # Accumulate content for current section current_content.append(line) # Save last section if current_section and current_content: sections[current_section] = "\n".join(current_content).strip() logger.debug(f"Parsed {len(sections)} sections from SKILL.md") return sections def _synthesize_docs_github(self, skill_mds: dict[str, str]) -> str: """Synthesize documentation + GitHub sources with weighted merge. Strategy: - Start with docs frontmatter and intro - Add GitHub metadata (stars, topics, language stats) - Merge "When to Use" from both sources - Merge "Quick Reference" from both sources - Include GitHub-specific sections (patterns, architecture) - Merge code examples (prioritize GitHub real usage) - Include Known Issues from GitHub - Fix placeholder text (httpx_docs โ†’ httpx) Args: skill_mds: Dict with 'documentation' and 'github' keys Returns: Synthesized SKILL.md content """ docs_sections = self._parse_skill_md_sections(skill_mds.get("documentation", "")) github_sections = self._parse_skill_md_sections(skill_mds.get("github", "")) # Extract GitHub metadata from full content _github_full = skill_mds.get("github", "") # Start with YAML frontmatter skill_name = self.name.lower().replace("_", "-").replace(" ", "-")[:64] desc = self.description[:1024] if len(self.description) > 1024 else self.description content = f"""--- name: {skill_name} description: {desc} --- # {self.name.title()} {self.description} ## ๐Ÿ“š Sources This skill synthesizes knowledge from multiple sources: - โœ… **Official Documentation**: {self.config.get("sources", [{}])[0].get("base_url", "N/A")} - โœ… **GitHub Repository**: {[s for s in self.config.get("sources", []) if s.get("type") == "github"][0].get("repo", "N/A") if [s for s in self.config.get("sources", []) if s.get("type") == "github"] else "N/A"} """ # Add GitHub Description and Metadata if present if "Description" in github_sections: content += "## ๐Ÿ“ฆ About\n\n" content += github_sections["Description"] + "\n\n" # Add Repository Info from GitHub if "Repository Info" in github_sections: content += "### Repository Info\n\n" content += github_sections["Repository Info"] + "\n\n" # Add Language stats from GitHub if "Languages" in github_sections: content += "### Languages\n\n" content += github_sections["Languages"] + "\n\n" content += "## ๐Ÿ’ก When to Use This Skill\n\n" # Merge "When to Use" sections - Fix placeholder text when_to_use_added = False for key in ["When to Use This Skill", "When to Use"]: if key in docs_sections: # Fix placeholder text: httpx_docs โ†’ httpx when_content = docs_sections[key].replace("httpx_docs", self.name) when_content = when_content.replace("httpx_github", self.name) content += when_content + "\n\n" when_to_use_added = True break if "When to Use This Skill" in github_sections: if when_to_use_added: content += "**From repository analysis:**\n\n" content += github_sections["When to Use This Skill"] + "\n\n" # Quick Reference: Merge from both sources content += "## ๐ŸŽฏ Quick Reference\n\n" if "Quick Reference" in docs_sections: content += "**From Documentation:**\n\n" content += docs_sections["Quick Reference"] + "\n\n" if "Quick Reference" in github_sections: # Include GitHub's Quick Reference (contains design patterns summary) logger.info( f"DEBUG: Including GitHub Quick Reference ({len(github_sections['Quick Reference'])} chars)" ) content += github_sections["Quick Reference"] + "\n\n" else: logger.warning("DEBUG: GitHub Quick Reference section NOT FOUND!") # Design Patterns (GitHub only - C3.1 analysis) if "Design Patterns Detected" in github_sections: content += "### Design Patterns Detected\n\n" content += "*From C3.1 codebase analysis (confidence > 0.7)*\n\n" content += github_sections["Design Patterns Detected"] + "\n\n" # Code Examples: Prefer GitHub (real usage) content += "## ๐Ÿงช Code Examples\n\n" if "Code Examples" in github_sections: content += "**From Repository Tests:**\n\n" # Note: GitHub section already includes "*High-quality examples from codebase (C3.2)*" label content += github_sections["Code Examples"] + "\n\n" elif "Usage Examples" in github_sections: content += "**From Repository:**\n\n" content += github_sections["Usage Examples"] + "\n\n" if "Example Code Patterns" in docs_sections: content += "**From Documentation:**\n\n" content += docs_sections["Example Code Patterns"] + "\n\n" # API Reference: Include from both sources if "API Reference" in docs_sections or "API Reference" in github_sections: content += "## ๐Ÿ”ง API Reference\n\n" if "API Reference" in github_sections: # Note: GitHub section already includes "*Extracted from codebase analysis (C2.5)*" label content += github_sections["API Reference"] + "\n\n" if "API Reference" in docs_sections: content += "**Official API Documentation:**\n\n" content += docs_sections["API Reference"] + "\n\n" # Known Issues: GitHub only if "Known Issues" in github_sections: content += "## โš ๏ธ Known Issues\n\n" content += "*Recent issues from GitHub*\n\n" content += github_sections["Known Issues"] + "\n\n" # Recent Releases: GitHub only (include subsection if present) if "Recent Releases" in github_sections: # Recent Releases might be a subsection within Known Issues # Check if it's standalone releases_content = github_sections["Recent Releases"] if releases_content.strip() and not releases_content.startswith("###"): content += "### Recent Releases\n" content += releases_content + "\n\n" # Reference documentation content += "## ๐Ÿ“– Reference Documentation\n\n" content += "Organized by source:\n\n" content += "- [Documentation](references/documentation/)\n" content += "- [GitHub](references/github/)\n" content += "- [Codebase Analysis](references/codebase_analysis/ARCHITECTURE.md)\n\n" # Footer content += "---\n\n" content += ( "*Synthesized from official documentation and codebase analysis by Skill Seekers*\n" ) return content def _synthesize_docs_github_pdf(self, skill_mds: dict[str, str]) -> str: """Synthesize all three sources: documentation + GitHub + PDF. Strategy: - Start with docs+github synthesis - Insert PDF chapters after Quick Reference - Add PDF key concepts as supplementary section Args: skill_mds: Dict with 'documentation', 'github', and 'pdf' keys Returns: Synthesized SKILL.md content """ # Start with docs+github synthesis base_content = self._synthesize_docs_github(skill_mds) pdf_sections = self._parse_skill_md_sections(skill_mds.get("pdf", "")) # Find insertion point after Quick Reference lines = base_content.split("\n") insertion_index = -1 for i, line in enumerate(lines): if line.startswith("## ๐Ÿงช Code Examples") or line.startswith("## ๐Ÿ”ง API Reference"): insertion_index = i break if insertion_index == -1: # Fallback: insert before Reference Documentation for i, line in enumerate(lines): if line.startswith("## ๐Ÿ“– Reference Documentation"): insertion_index = i break # Build PDF section pdf_content_lines = [] # Add Chapter Overview if "Chapter Overview" in pdf_sections: pdf_content_lines.append("## ๐Ÿ“š PDF Documentation Structure\n") pdf_content_lines.append("*From PDF analysis*\n") pdf_content_lines.append(pdf_sections["Chapter Overview"]) pdf_content_lines.append("\n") # Add Key Concepts if "Key Concepts" in pdf_sections: pdf_content_lines.append("## ๐Ÿ” Key Concepts\n") pdf_content_lines.append("*Extracted from PDF headings*\n") pdf_content_lines.append(pdf_sections["Key Concepts"]) pdf_content_lines.append("\n") # Insert PDF content if pdf_content_lines and insertion_index != -1: lines[insertion_index:insertion_index] = pdf_content_lines elif pdf_content_lines: # Append at end before footer footer_index = -1 for i, line in enumerate(lines): if line.startswith("---") and i > len(lines) - 5: footer_index = i break if footer_index != -1: lines[footer_index:footer_index] = pdf_content_lines # Update reference documentation to include PDF final_content = "\n".join(lines) final_content = final_content.replace( "- [Codebase Analysis](references/codebase_analysis/ARCHITECTURE.md)\n", "- [Codebase Analysis](references/codebase_analysis/ARCHITECTURE.md)\n- [PDF Documentation](references/pdf/)\n", ) return final_content def _generate_skill_md(self): """Generate main SKILL.md file using synthesis formulas. Strategy: 1. Try to load standalone SKILL.md from each source 2. If found, use synthesis formulas for rich content 3. If not found, fall back to legacy minimal generation """ skill_path = os.path.join(self.skill_dir, "SKILL.md") # Try to load source SKILL.md files skill_mds = self._load_source_skill_mds() # Determine synthesis strategy based on available sources has_docs = "documentation" in skill_mds has_github = "github" in skill_mds has_pdf = "pdf" in skill_mds content = None # Apply appropriate synthesis formula if has_docs and has_github and has_pdf: logger.info("Synthesizing: documentation + GitHub + PDF") content = self._synthesize_docs_github_pdf(skill_mds) elif has_docs and has_github: logger.info("Synthesizing: documentation + GitHub") content = self._synthesize_docs_github(skill_mds) elif has_docs and has_pdf: logger.info("Synthesizing: documentation + PDF") content = self._synthesize_docs_pdf(skill_mds) elif has_github and has_pdf: logger.info("Synthesizing: GitHub + PDF") content = self._synthesize_github_pdf(skill_mds) elif has_docs: logger.info("Using documentation SKILL.md as-is") content = skill_mds["documentation"] elif has_github: logger.info("Using GitHub SKILL.md as-is") content = skill_mds["github"] elif has_pdf: logger.info("Using PDF SKILL.md as-is") content = skill_mds["pdf"] # Fallback: generate minimal SKILL.md (legacy behavior) if not content: logger.warning("No source SKILL.md files found, generating minimal SKILL.md (legacy)") content = self._generate_minimal_skill_md() # Write final content with open(skill_path, "w", encoding="utf-8") as f: f.write(content) logger.info(f"Created SKILL.md ({len(content)} chars, ~{len(content.split())} words)") def _synthesize_docs_pdf(self, skill_mds: dict[str, str]) -> str: """Synthesize documentation + PDF sources. Strategy: - Start with docs SKILL.md - Insert PDF chapters and key concepts as supplementary sections Args: skill_mds: Dict with 'documentation' and 'pdf' keys Returns: Synthesized SKILL.md content """ docs_content = skill_mds["documentation"] pdf_sections = self._parse_skill_md_sections(skill_mds["pdf"]) lines = docs_content.split("\n") insertion_index = -1 # Find insertion point before Reference Documentation for i, line in enumerate(lines): if line.startswith("## ๐Ÿ“– Reference") or line.startswith("## Reference"): insertion_index = i break # Build PDF sections pdf_content_lines = [] if "Chapter Overview" in pdf_sections: pdf_content_lines.append("## ๐Ÿ“š PDF Documentation Structure\n") pdf_content_lines.append("*From PDF analysis*\n") pdf_content_lines.append(pdf_sections["Chapter Overview"]) pdf_content_lines.append("\n") if "Key Concepts" in pdf_sections: pdf_content_lines.append("## ๐Ÿ” Key Concepts\n") pdf_content_lines.append("*Extracted from PDF headings*\n") pdf_content_lines.append(pdf_sections["Key Concepts"]) pdf_content_lines.append("\n") # Insert PDF content if pdf_content_lines and insertion_index != -1: lines[insertion_index:insertion_index] = pdf_content_lines return "\n".join(lines) def _synthesize_github_pdf(self, skill_mds: dict[str, str]) -> str: """Synthesize GitHub + PDF sources. Strategy: - Start with GitHub SKILL.md (has C3.x analysis) - Add PDF documentation structure as supplementary section Args: skill_mds: Dict with 'github' and 'pdf' keys Returns: Synthesized SKILL.md content """ github_content = skill_mds["github"] pdf_sections = self._parse_skill_md_sections(skill_mds["pdf"]) lines = github_content.split("\n") insertion_index = -1 # Find insertion point before Reference Documentation for i, line in enumerate(lines): if line.startswith("## ๐Ÿ“– Reference") or line.startswith("## Reference"): insertion_index = i break # Build PDF sections pdf_content_lines = [] if "Chapter Overview" in pdf_sections: pdf_content_lines.append("## ๐Ÿ“š PDF Documentation Structure\n") pdf_content_lines.append("*From PDF analysis*\n") pdf_content_lines.append(pdf_sections["Chapter Overview"]) pdf_content_lines.append("\n") # Insert PDF content if pdf_content_lines and insertion_index != -1: lines[insertion_index:insertion_index] = pdf_content_lines return "\n".join(lines) def _generate_minimal_skill_md(self) -> str: """Generate minimal SKILL.md (legacy fallback behavior). Used when no source SKILL.md files are available. """ skill_name = self.name.lower().replace("_", "-").replace(" ", "-")[:64] desc = self.description[:1024] if len(self.description) > 1024 else self.description content = f"""--- name: {skill_name} description: {desc} --- # {self.name.title()} {self.description} ## ๐Ÿ“š Sources This skill combines knowledge from multiple sources: """ # List sources for source in self.config.get("sources", []): source_type = source["type"] if source_type == "documentation": content += f"- โœ… **Documentation**: {source.get('base_url', 'N/A')}\n" content += f" - Pages: {source.get('max_pages', 'unlimited')}\n" elif source_type == "github": content += f"- โœ… **GitHub Repository**: {source.get('repo', 'N/A')}\n" content += f" - Code Analysis: {source.get('code_analysis_depth', 'surface')}\n" content += f" - Issues: {source.get('max_issues', 0)}\n" elif source_type == "pdf": content += f"- โœ… **PDF Document**: {source.get('path', 'N/A')}\n" # C3.x Architecture & Code Analysis section (if available) github_data = self.scraped_data.get("github", {}) # Handle both dict and list cases if isinstance(github_data, dict): github_data = github_data.get("data", {}) elif isinstance(github_data, list) and len(github_data) > 0: github_data = github_data[0].get("data", {}) else: github_data = {} if github_data.get("c3_analysis"): content += self._format_c3_summary_section(github_data["c3_analysis"]) # Data quality section if self.conflicts: content += "\n## โš ๏ธ Data Quality\n\n" content += f"**{len(self.conflicts)} conflicts detected** between sources.\n\n" # Count by type by_type = {} for conflict in self.conflicts: ctype = ( conflict.type if hasattr(conflict, "type") else conflict.get("type", "unknown") ) by_type[ctype] = by_type.get(ctype, 0) + 1 content += "**Conflict Breakdown:**\n" for ctype, count in by_type.items(): content += f"- {ctype}: {count}\n" content += "\nSee `references/conflicts.md` for detailed conflict information.\n" # Merged API section (if available) if self.merged_data: content += self._format_merged_apis() # Quick reference from each source content += "\n## ๐Ÿ“– Reference Documentation\n\n" content += "Organized by source:\n\n" for source in self.config.get("sources", []): source_type = source["type"] content += f"- [{source_type.title()}](references/{source_type}/)\n" # When to use this skill content += "\n## ๐Ÿ’ก When to Use This Skill\n\n" content += "Use this skill when you need to:\n" content += f"- Understand how to use {self.name}\n" content += "- Look up API documentation\n" content += "- Find usage examples\n" if "github" in self.scraped_data: content += "- Check for known issues or recent changes\n" content += "- Review release history\n" content += "\n---\n\n" content += "*Generated by Skill Seeker's unified multi-source scraper*\n" return content def _format_merged_apis(self) -> str: """Format merged APIs section with inline conflict warnings.""" if not self.merged_data: return "" content = "\n## ๐Ÿ”ง API Reference\n\n" content += "*Merged from documentation and code analysis*\n\n" apis = self.merged_data.get("apis", {}) if not apis: return content + "*No APIs to display*\n" # Group APIs by status matched = {k: v for k, v in apis.items() if v.get("status") == "matched"} conflicts = {k: v for k, v in apis.items() if v.get("status") == "conflict"} docs_only = {k: v for k, v in apis.items() if v.get("status") == "docs_only"} code_only = {k: v for k, v in apis.items() if v.get("status") == "code_only"} # Show matched APIs first if matched: content += "### โœ… Verified APIs\n\n" content += "*Documentation and code agree*\n\n" for _api_name, api_data in list(matched.items())[:10]: # Limit to first 10 content += self._format_api_entry(api_data, inline_conflict=False) # Show conflicting APIs with warnings if conflicts: content += "\n### โš ๏ธ APIs with Conflicts\n\n" content += "*Documentation and code differ*\n\n" for _api_name, api_data in list(conflicts.items())[:10]: content += self._format_api_entry(api_data, inline_conflict=True) # Show undocumented APIs if code_only: content += "\n### ๐Ÿ’ป Undocumented APIs\n\n" content += f"*Found in code but not in documentation ({len(code_only)} total)*\n\n" for _api_name, api_data in list(code_only.items())[:5]: content += self._format_api_entry(api_data, inline_conflict=False) # Show removed/missing APIs if docs_only: content += "\n### ๐Ÿ“– Documentation-Only APIs\n\n" content += f"*Documented but not found in code ({len(docs_only)} total)*\n\n" for _api_name, api_data in list(docs_only.items())[:5]: content += self._format_api_entry(api_data, inline_conflict=False) content += "\n*See references/api/ for complete API documentation*\n" return content def _format_api_entry(self, api_data: dict, inline_conflict: bool = False) -> str: """Format a single API entry.""" name = api_data.get("name", "Unknown") signature = api_data.get("merged_signature", name) description = api_data.get("merged_description", "") warning = api_data.get("warning", "") entry = f"#### `{signature}`\n\n" if description: entry += f"{description}\n\n" # Add inline conflict warning if inline_conflict and warning: entry += f"โš ๏ธ **Conflict**: {warning}\n\n" # Show both versions if available conflict = api_data.get("conflict", {}) if conflict: docs_info = conflict.get("docs_info") code_info = conflict.get("code_info") if docs_info and code_info: entry += "**Documentation says:**\n" entry += f"```\n{docs_info.get('raw_signature', 'N/A')}\n```\n\n" entry += "**Code implementation:**\n" entry += f"```\n{self._format_code_signature(code_info)}\n```\n\n" # Add source info source = api_data.get("source", "unknown") entry += f"*Source: {source}*\n\n" entry += "---\n\n" return entry def _format_code_signature(self, code_info: dict) -> str: """Format code signature for display.""" name = code_info.get("name", "") params = code_info.get("parameters", []) return_type = code_info.get("return_type") param_strs = [] for param in params: param_str = param.get("name", "") if param.get("type_hint"): param_str += f": {param['type_hint']}" if param.get("default"): param_str += f" = {param['default']}" param_strs.append(param_str) sig = f"{name}({', '.join(param_strs)})" if return_type: sig += f" -> {return_type}" return sig def _generate_references(self): """Generate reference files organized by source.""" logger.info("Generating reference files...") # Generate references for each source type (now lists) docs_list = self.scraped_data.get("documentation", []) if docs_list: self._generate_docs_references(docs_list) github_list = self.scraped_data.get("github", []) if github_list: self._generate_github_references(github_list) pdf_list = self.scraped_data.get("pdf", []) if pdf_list: self._generate_pdf_references(pdf_list) # Generate merged API reference if available if self.merged_data: self._generate_merged_api_reference() # Generate C3.x codebase analysis references if available (multi-source) github_list = self.scraped_data.get("github", []) for github_source in github_list: github_data = github_source.get("data", {}) if github_data.get("c3_analysis"): repo_id = github_source.get("repo_id", "unknown") self._generate_c3_analysis_references(repo_id=repo_id) def _generate_docs_references(self, docs_list: list[dict]): """Generate references from multiple documentation sources.""" # Skip if no documentation sources if not docs_list: return docs_dir = os.path.join(self.skill_dir, "references", "documentation") os.makedirs(docs_dir, exist_ok=True) all_copied_files: list[str] = [] # Process each documentation source for i, doc_source in enumerate(docs_list): source_id = doc_source.get("source_id", f"source_{i}") base_url = doc_source.get("base_url", "Unknown") refs_dir = doc_source.get("refs_dir", "") # Create subdirectory for this source source_dir = os.path.join(docs_dir, source_id) os.makedirs(source_dir, exist_ok=True) copied_files: list[str] = [] if refs_dir and os.path.isdir(refs_dir): for entry in sorted(os.listdir(refs_dir)): src_path = os.path.join(refs_dir, entry) dst_path = os.path.join(source_dir, entry) if not os.path.isfile(src_path): continue shutil.copy2(src_path, dst_path) copied_files.append(entry) # Create index for this source source_index_path = os.path.join(source_dir, "index.md") with open(source_index_path, "w", encoding="utf-8") as f: f.write(f"# Documentation: {source_id}\n\n") f.write(f"**Source**: {base_url}\n\n") f.write(f"**Pages**: {doc_source.get('total_pages', 'N/A')}\n\n") if copied_files: files_no_index = [p for p in copied_files if p.lower() != "index.md"] f.write("## Files\n\n") for filename in files_no_index: f.write(f"- [{filename}]({filename})\n") else: f.write("No reference files available.\n") all_copied_files.extend(copied_files) # Create main index index_path = os.path.join(docs_dir, "index.md") with open(index_path, "w", encoding="utf-8") as f: f.write("# Documentation References\n\n") f.write(f"Combined from {len(docs_list)} documentation sources.\n\n") f.write("## Sources\n\n") for doc_source in docs_list: source_id = doc_source.get("source_id", "unknown") base_url = doc_source.get("base_url", "Unknown") total_pages = doc_source.get("total_pages", "N/A") f.write( f"- [{source_id}]({source_id}/index.md) - {base_url} ({total_pages} pages)\n" ) logger.info(f"Created documentation references ({len(docs_list)} sources)") def _generate_github_references(self, github_list: list[dict]): """Generate references from multiple GitHub sources.""" # Skip if no GitHub sources if not github_list: return github_dir = os.path.join(self.skill_dir, "references", "github") os.makedirs(github_dir, exist_ok=True) # Process each GitHub source for i, github_source in enumerate(github_list): repo = github_source.get("repo", f"repo_{i}") repo_id = github_source.get("repo_id", repo.replace("/", "_")) github_data = github_source.get("data", {}) # Create subdirectory for this repo repo_dir = os.path.join(github_dir, repo_id) os.makedirs(repo_dir, exist_ok=True) # Create README reference if github_data.get("readme"): readme_path = os.path.join(repo_dir, "README.md") with open(readme_path, "w", encoding="utf-8") as f: f.write(f"# Repository README: {repo}\n\n") f.write(github_data["readme"]) # Create issues reference if github_data.get("issues"): issues_path = os.path.join(repo_dir, "issues.md") with open(issues_path, "w", encoding="utf-8") as f: f.write(f"# GitHub Issues: {repo}\n\n") f.write(f"{len(github_data['issues'])} recent issues.\n\n") for issue in github_data["issues"][:20]: f.write(f"## #{issue['number']}: {issue['title']}\n\n") f.write(f"**State**: {issue['state']}\n") if issue.get("labels"): f.write(f"**Labels**: {', '.join(issue['labels'])}\n") f.write(f"**URL**: {issue.get('url', 'N/A')}\n\n") # Create releases reference if github_data.get("releases"): releases_path = os.path.join(repo_dir, "releases.md") with open(releases_path, "w", encoding="utf-8") as f: f.write(f"# Releases: {repo}\n\n") for release in github_data["releases"][:10]: f.write(f"## {release['tag_name']}: {release.get('name', 'N/A')}\n\n") f.write(f"**Published**: {release.get('published_at', 'N/A')[:10]}\n\n") if release.get("body"): f.write(release["body"][:500]) f.write("\n\n") # Create index for this repo repo_index_path = os.path.join(repo_dir, "index.md") repo_info = github_data.get("repo_info", {}) with open(repo_index_path, "w", encoding="utf-8") as f: f.write(f"# GitHub: {repo}\n\n") f.write(f"**Stars**: {repo_info.get('stars', 'N/A')}\n") f.write(f"**Language**: {repo_info.get('language', 'N/A')}\n") f.write(f"**Issues**: {len(github_data.get('issues', []))}\n") f.write(f"**Releases**: {len(github_data.get('releases', []))}\n\n") f.write("## Files\n\n") f.write("- [README.md](README.md)\n") if github_data.get("issues"): f.write("- [issues.md](issues.md)\n") if github_data.get("releases"): f.write("- [releases.md](releases.md)\n") # Create main index index_path = os.path.join(github_dir, "index.md") with open(index_path, "w", encoding="utf-8") as f: f.write("# GitHub References\n\n") f.write(f"Combined from {len(github_list)} GitHub repositories.\n\n") f.write("## Repositories\n\n") for github_source in github_list: repo = github_source.get("repo", "unknown") repo_id = github_source.get("repo_id", repo.replace("/", "_")) github_data = github_source.get("data", {}) repo_info = github_data.get("repo_info", {}) stars = repo_info.get("stars", "N/A") f.write(f"- [{repo}]({repo_id}/index.md) - {stars} stars\n") logger.info(f"Created GitHub references ({len(github_list)} repos)") def _generate_pdf_references(self, pdf_list: list[dict]): """Generate references from PDF sources.""" # Skip if no PDF sources if not pdf_list: return pdf_dir = os.path.join(self.skill_dir, "references", "pdf") os.makedirs(pdf_dir, exist_ok=True) # Create index index_path = os.path.join(pdf_dir, "index.md") with open(index_path, "w", encoding="utf-8") as f: f.write("# PDF Documentation\n\n") f.write(f"Reference from {len(pdf_list)} PDF document(s).\n\n") logger.info(f"Created PDF references ({len(pdf_list)} sources)") def _generate_merged_api_reference(self): """Generate merged API reference file.""" api_dir = os.path.join(self.skill_dir, "references", "api") os.makedirs(api_dir, exist_ok=True) api_path = os.path.join(api_dir, "merged_api.md") with open(api_path, "w") as f: f.write("# Merged API Reference\n\n") f.write("*Combined from documentation and code analysis*\n\n") apis = self.merged_data.get("apis", {}) for api_name in sorted(apis.keys()): api_data = apis[api_name] entry = self._format_api_entry(api_data, inline_conflict=True) f.write(entry) logger.info(f"Created merged API reference ({len(apis)} APIs)") def _generate_c3_analysis_references(self, repo_id: str = "github"): """Generate codebase analysis references (C3.5) for a specific GitHub source. Args: repo_id: Repository identifier (e.g., 'encode_httpx') for multi-source support """ # Find the correct github_source from the list github_list = self.scraped_data.get("github", []) github_source = None for source in github_list: if source.get("repo_id") == repo_id: github_source = source break if not github_source: logger.warning(f"GitHub source with repo_id '{repo_id}' not found") return github_data = github_source.get("data", {}) c3_data = github_data.get("c3_analysis") if not c3_data: return # Create unique directory per repo for multi-source support c3_dir = os.path.join(self.skill_dir, "references", "codebase_analysis", repo_id) os.makedirs(c3_dir, exist_ok=True) logger.info("Generating C3.x codebase analysis references...") # Generate ARCHITECTURE.md (main deliverable) self._generate_architecture_overview(c3_dir, c3_data, github_data) # Generate subdirectories for each C3.x component self._generate_pattern_references(c3_dir, c3_data.get("patterns")) self._generate_example_references(c3_dir, c3_data.get("test_examples")) self._generate_guide_references(c3_dir, c3_data.get("how_to_guides")) self._generate_config_references(c3_dir, c3_data.get("config_patterns")) self._copy_architecture_details(c3_dir, c3_data.get("architecture")) logger.info("โœ… Created codebase analysis references") def _generate_architecture_overview(self, c3_dir: str, c3_data: dict, github_data: dict): """Generate comprehensive ARCHITECTURE.md (C3.5 main deliverable).""" arch_path = os.path.join(c3_dir, "ARCHITECTURE.md") with open(arch_path, "w", encoding="utf-8") as f: f.write(f"# {self.name.title()} Architecture Overview\n\n") f.write("*Generated from C3.x automated codebase analysis*\n\n") # Section 1: Overview f.write("## 1. Overview\n\n") f.write(f"{self.description}\n\n") # Section 2: Architectural Patterns (C3.7) if c3_data.get("architecture"): arch = c3_data["architecture"] patterns = arch.get("patterns", []) if patterns: f.write("## 2. Architectural Patterns\n\n") f.write("*Detected architectural patterns from codebase structure*\n\n") for pattern in patterns[:5]: # Top 5 patterns f.write(f"### {pattern['pattern_name']}\n\n") f.write(f"- **Confidence**: {pattern['confidence']:.2f}\n") if pattern.get("framework"): f.write(f"- **Framework**: {pattern['framework']}\n") if pattern.get("evidence"): f.write(f"- **Evidence**: {', '.join(pattern['evidence'][:3])}\n") f.write("\n") # Section 3: Technology Stack f.write("## 3. Technology Stack\n\n") # Try to get languages from C3.7 architecture analysis first languages = {} if c3_data.get("architecture"): languages = c3_data["architecture"].get("languages", {}) # If no languages from C3.7, try to get from GitHub data # github_data already available from method scope if not languages and github_data.get("languages"): # GitHub data has languages as list, convert to dict with count 1 languages = dict.fromkeys(github_data["languages"], 1) if languages: f.write("**Languages Detected**:\n") for lang, count in sorted(languages.items(), key=lambda x: x[1], reverse=True)[:5]: if isinstance(count, int): f.write(f"- {lang}: {count} files\n") else: f.write(f"- {lang}\n") f.write("\n") # Add frameworks if available if c3_data.get("architecture"): frameworks = c3_data["architecture"].get("frameworks_detected", []) if frameworks: f.write("**Frameworks & Libraries**:\n") for fw in frameworks[:10]: f.write(f"- {fw}\n") f.write("\n") if not languages and not ( c3_data.get("architecture") and c3_data["architecture"].get("frameworks_detected") ): f.write("*Technology stack analysis not available*\n\n") # Section 4: Design Patterns (C3.1) if c3_data.get("patterns"): f.write("## 4. Design Patterns\n\n") f.write("*Classic design patterns identified in the codebase*\n\n") # Summarize pattern types pattern_summary = {} for file_data in c3_data["patterns"]: for pattern in file_data.get("patterns", []): ptype = pattern["pattern_type"] pattern_summary[ptype] = pattern_summary.get(ptype, 0) + 1 if pattern_summary: for ptype, count in sorted( pattern_summary.items(), key=lambda x: x[1], reverse=True ): f.write(f"- **{ptype}**: {count} instance(s)\n") f.write( "\n๐Ÿ“ See `references/codebase_analysis/patterns/` for detailed analysis.\n\n" ) else: f.write("*No design patterns detected.*\n\n") # Section 5: Configuration Overview (C3.4) if c3_data.get("config_patterns"): f.write("## 5. Configuration Overview\n\n") config = c3_data["config_patterns"] config_files = config.get("config_files", []) if config_files: f.write(f"**{len(config_files)} configuration file(s) detected**:\n\n") for cf in config_files[:10]: # Top 10 f.write(f"- **`{cf['relative_path']}`**: {cf['type']}\n") if cf.get("purpose"): f.write(f" - Purpose: {cf['purpose']}\n") # Add security warnings if available if config.get("ai_enhancements"): insights = config["ai_enhancements"].get("overall_insights", {}) security_issues = insights.get("security_issues_found", 0) if security_issues > 0: f.write( f"\n๐Ÿ” **Security Alert**: {security_issues} potential security issue(s) found in configurations.\n" ) if insights.get("recommended_actions"): f.write("\n**Recommended Actions**:\n") for action in insights["recommended_actions"][:5]: f.write(f"- {action}\n") f.write( "\n๐Ÿ“ See `references/codebase_analysis/configuration/` for details.\n\n" ) else: f.write("*No configuration files detected.*\n\n") # Section 6: Common Workflows (C3.3) if c3_data.get("how_to_guides"): f.write("## 6. Common Workflows\n\n") guides = c3_data["how_to_guides"].get("guides", []) if guides: f.write(f"**{len(guides)} how-to guide(s) extracted from codebase**:\n\n") for guide in guides[:10]: # Top 10 f.write(f"- {guide.get('title', 'Untitled Guide')}\n") f.write( "\n๐Ÿ“ See `references/codebase_analysis/guides/` for detailed tutorials.\n\n" ) else: f.write("*No workflow guides extracted.*\n\n") # Section 7: Usage Examples (C3.2) if c3_data.get("test_examples"): f.write("## 7. Usage Examples\n\n") examples = c3_data["test_examples"] total = examples.get("total_examples", 0) high_value = examples.get("high_value_count", 0) if total > 0: f.write(f"**{total} usage example(s) extracted from tests**:\n") f.write(f"- High-value examples: {high_value}\n") # Category breakdown if examples.get("examples_by_category"): f.write("\n**By Category**:\n") for cat, count in sorted( examples["examples_by_category"].items(), key=lambda x: x[1], reverse=True, ): f.write(f"- {cat}: {count}\n") f.write( "\n๐Ÿ“ See `references/codebase_analysis/examples/` for code samples.\n\n" ) else: f.write("*No test examples extracted.*\n\n") # Section 8: Entry Points & Directory Structure f.write("## 8. Entry Points & Directory Structure\n\n") f.write("*Analysis based on codebase organization*\n\n") if c3_data.get("architecture"): dir_struct = c3_data["architecture"].get("directory_structure", {}) if dir_struct: f.write("**Main Directories**:\n") for dir_name, file_count in sorted( dir_struct.items(), key=lambda x: x[1], reverse=True )[:15]: f.write(f"- `{dir_name}/`: {file_count} file(s)\n") f.write("\n") # Footer f.write("---\n\n") f.write( "*This architecture overview was automatically generated by C3.x codebase analysis.*\n" ) f.write("*Last updated: skill build time*\n") logger.info("๐Ÿ“ Created ARCHITECTURE.md") def _generate_pattern_references(self, c3_dir: str, patterns_data: dict): """Generate design pattern references (C3.1).""" if not patterns_data: return patterns_dir = os.path.join(c3_dir, "patterns") os.makedirs(patterns_dir, exist_ok=True) # Save JSON data json_path = os.path.join(patterns_dir, "detected_patterns.json") with open(json_path, "w", encoding="utf-8") as f: json.dump(patterns_data, f, indent=2, ensure_ascii=False) # Create summary markdown md_path = os.path.join(patterns_dir, "index.md") with open(md_path, "w", encoding="utf-8") as f: f.write("# Design Patterns\n\n") f.write("*Detected patterns from C3.1 analysis*\n\n") for file_data in patterns_data: patterns = file_data.get("patterns", []) if patterns: f.write(f"## {file_data['file_path']}\n\n") for p in patterns: f.write(f"### {p['pattern_type']}\n\n") if p.get("class_name"): f.write(f"- **Class**: `{p['class_name']}`\n") if p.get("confidence"): f.write(f"- **Confidence**: {p['confidence']:.2f}\n") if p.get("indicators"): f.write(f"- **Indicators**: {', '.join(p['indicators'][:3])}\n") f.write("\n") logger.info(f" โœ“ Design patterns: {len(patterns_data)} files") def _generate_example_references(self, c3_dir: str, examples_data: dict): """Generate test example references (C3.2).""" if not examples_data: return examples_dir = os.path.join(c3_dir, "examples") os.makedirs(examples_dir, exist_ok=True) # Save JSON data json_path = os.path.join(examples_dir, "test_examples.json") with open(json_path, "w", encoding="utf-8") as f: json.dump(examples_data, f, indent=2, ensure_ascii=False) # Create summary markdown md_path = os.path.join(examples_dir, "index.md") with open(md_path, "w", encoding="utf-8") as f: f.write("# Usage Examples\n\n") f.write("*Extracted from test files (C3.2)*\n\n") total = examples_data.get("total_examples", 0) high_value = examples_data.get("high_value_count", 0) f.write(f"**Total Examples**: {total}\n") f.write(f"**High-Value Examples**: {high_value}\n\n") # List high-value examples examples = examples_data.get("examples", []) high_value_examples = [e for e in examples if e.get("confidence", 0) > 0.7] if high_value_examples: f.write("## High-Value Examples\n\n") for ex in high_value_examples[:20]: # Top 20 f.write(f"### {ex.get('description', 'Example')}\n\n") f.write(f"- **Category**: {ex.get('category', 'unknown')}\n") f.write(f"- **Confidence**: {ex.get('confidence', 0):.2f}\n") f.write(f"- **File**: `{ex.get('file_path', 'N/A')}`\n") if ex.get("code_snippet"): f.write(f"\n```python\n{ex['code_snippet'][:300]}\n```\n") f.write("\n") logger.info(f" โœ“ Test examples: {total} total, {high_value} high-value") def _generate_guide_references(self, c3_dir: str, guides_data: dict): """Generate how-to guide references (C3.3).""" if not guides_data: return guides_dir = os.path.join(c3_dir, "guides") os.makedirs(guides_dir, exist_ok=True) # Save JSON collection data json_path = os.path.join(guides_dir, "guide_collection.json") with open(json_path, "w", encoding="utf-8") as f: json.dump(guides_data, f, indent=2, ensure_ascii=False) guides = guides_data.get("guides", []) # Create index md_path = os.path.join(guides_dir, "index.md") with open(md_path, "w", encoding="utf-8") as f: f.write("# How-To Guides\n\n") f.write("*Workflow tutorials extracted from codebase (C3.3)*\n\n") f.write(f"**Total Guides**: {len(guides)}\n\n") if guides: f.write("## Available Guides\n\n") for guide in guides: f.write( f"- [{guide.get('title', 'Untitled')}](guide_{guide.get('id', 'unknown')}.md)\n" ) f.write("\n") # Save individual guide markdown files for guide in guides: guide_id = guide.get("id", "unknown") guide_path = os.path.join(guides_dir, f"guide_{guide_id}.md") with open(guide_path, "w", encoding="utf-8") as f: f.write(f"# {guide.get('title', 'Untitled Guide')}\n\n") if guide.get("description"): f.write(f"{guide['description']}\n\n") steps = guide.get("steps", []) if steps: f.write("## Steps\n\n") for i, step in enumerate(steps, 1): f.write(f"### {i}. {step.get('action', 'Step')}\n\n") if step.get("code_example"): lang = step.get("language", "python") f.write(f"```{lang}\n{step['code_example']}\n```\n\n") if step.get("explanation"): f.write(f"{step['explanation']}\n\n") logger.info(f" โœ“ How-to guides: {len(guides)}") def _generate_config_references(self, c3_dir: str, config_data: dict): """Generate configuration pattern references (C3.4).""" if not config_data: return config_dir = os.path.join(c3_dir, "configuration") os.makedirs(config_dir, exist_ok=True) # Save JSON data json_path = os.path.join(config_dir, "config_patterns.json") with open(json_path, "w", encoding="utf-8") as f: json.dump(config_data, f, indent=2, ensure_ascii=False) # Create summary markdown md_path = os.path.join(config_dir, "index.md") config_files = config_data.get("config_files", []) with open(md_path, "w", encoding="utf-8") as f: f.write("# Configuration Patterns\n\n") f.write("*Detected configuration files (C3.4)*\n\n") f.write(f"**Total Config Files**: {len(config_files)}\n\n") if config_files: f.write("## Configuration Files\n\n") for cf in config_files: f.write(f"### `{cf['relative_path']}`\n\n") f.write(f"- **Type**: {cf['type']}\n") f.write(f"- **Purpose**: {cf.get('purpose', 'N/A')}\n") f.write(f"- **Settings**: {len(cf.get('settings', []))}\n") # Show AI enhancements if available if cf.get("ai_enhancement"): enh = cf["ai_enhancement"] if enh.get("security_concern"): f.write(f"- **Security**: {enh['security_concern']}\n") if enh.get("best_practice"): f.write(f"- **Best Practice**: {enh['best_practice']}\n") f.write("\n") # Overall insights if config_data.get("ai_enhancements"): insights = config_data["ai_enhancements"].get("overall_insights", {}) if insights: f.write("## Overall Insights\n\n") if insights.get("security_issues_found"): f.write( f"๐Ÿ” **Security Issues**: {insights['security_issues_found']}\n\n" ) if insights.get("recommended_actions"): f.write("**Recommended Actions**:\n") for action in insights["recommended_actions"]: f.write(f"- {action}\n") f.write("\n") logger.info(f" โœ“ Configuration files: {len(config_files)}") def _copy_architecture_details(self, c3_dir: str, arch_data: dict): """Copy architectural pattern JSON details (C3.7).""" if not arch_data: return arch_dir = os.path.join(c3_dir, "architecture_details") os.makedirs(arch_dir, exist_ok=True) # Save full JSON data json_path = os.path.join(arch_dir, "architectural_patterns.json") with open(json_path, "w", encoding="utf-8") as f: json.dump(arch_data, f, indent=2, ensure_ascii=False) # Create summary markdown md_path = os.path.join(arch_dir, "index.md") with open(md_path, "w", encoding="utf-8") as f: f.write("# Architectural Patterns (Detailed)\n\n") f.write("*Comprehensive architectural analysis (C3.7)*\n\n") patterns = arch_data.get("patterns", []) if patterns: f.write("## Detected Patterns\n\n") for p in patterns: f.write(f"### {p['pattern_name']}\n\n") f.write(f"- **Confidence**: {p['confidence']:.2f}\n") if p.get("framework"): f.write(f"- **Framework**: {p['framework']}\n") if p.get("evidence"): f.write("- **Evidence**:\n") for e in p["evidence"][:5]: f.write(f" - {e}\n") f.write("\n") logger.info(f" โœ“ Architectural details: {len(patterns)} patterns") def _format_c3_summary_section(self, c3_data: dict) -> str: """Format C3.x analysis summary for SKILL.md.""" content = "\n## ๐Ÿ—๏ธ Architecture & Code Analysis\n\n" content += "*This skill includes comprehensive codebase analysis*\n\n" # Add architectural pattern summary if c3_data.get("architecture"): patterns = c3_data["architecture"].get("patterns", []) if patterns: top_pattern = patterns[0] content += f"**Primary Architecture**: {top_pattern['pattern_name']}" if top_pattern.get("framework"): content += f" ({top_pattern['framework']})" content += f" - Confidence: {top_pattern['confidence']:.0%}\n\n" # Add design patterns summary if c3_data.get("patterns"): total_patterns = sum(len(f.get("patterns", [])) for f in c3_data["patterns"]) if total_patterns > 0: content += f"**Design Patterns**: {total_patterns} detected\n" # Show top 3 pattern types pattern_summary = {} for file_data in c3_data["patterns"]: for pattern in file_data.get("patterns", []): ptype = pattern["pattern_type"] pattern_summary[ptype] = pattern_summary.get(ptype, 0) + 1 top_patterns = sorted(pattern_summary.items(), key=lambda x: x[1], reverse=True)[:3] if top_patterns: content += ( f"- Top patterns: {', '.join([f'{p[0]} ({p[1]})' for p in top_patterns])}\n" ) content += "\n" # Add test examples summary if c3_data.get("test_examples"): total = c3_data["test_examples"].get("total_examples", 0) high_value = c3_data["test_examples"].get("high_value_count", 0) if total > 0: content += f"**Usage Examples**: {total} extracted from tests ({high_value} high-value)\n\n" # Add how-to guides summary if c3_data.get("how_to_guides"): guide_count = len(c3_data["how_to_guides"].get("guides", [])) if guide_count > 0: content += f"**How-To Guides**: {guide_count} workflow tutorials\n\n" # Add configuration summary if c3_data.get("config_patterns"): config_files = c3_data["config_patterns"].get("config_files", []) if config_files: content += f"**Configuration Files**: {len(config_files)} analyzed\n" # Add security warning if present if c3_data["config_patterns"].get("ai_enhancements"): insights = c3_data["config_patterns"]["ai_enhancements"].get( "overall_insights", {} ) security_issues = insights.get("security_issues_found", 0) if security_issues > 0: content += f"- ๐Ÿ” **Security Alert**: {security_issues} issue(s) detected\n" content += "\n" # Add link to ARCHITECTURE.md content += "๐Ÿ“– **See** `references/codebase_analysis/ARCHITECTURE.md` for complete architectural overview.\n\n" return content def _generate_conflicts_report(self): """Generate detailed conflicts report.""" conflicts_path = os.path.join(self.skill_dir, "references", "conflicts.md") with open(conflicts_path, "w") as f: f.write("# Conflict Report\n\n") f.write(f"Found **{len(self.conflicts)}** conflicts between sources.\n\n") # Group by severity high = [ c for c in self.conflicts if (hasattr(c, "severity") and c.severity == "high") or c.get("severity") == "high" ] medium = [ c for c in self.conflicts if (hasattr(c, "severity") and c.severity == "medium") or c.get("severity") == "medium" ] low = [ c for c in self.conflicts if (hasattr(c, "severity") and c.severity == "low") or c.get("severity") == "low" ] f.write("## Severity Breakdown\n\n") f.write(f"- ๐Ÿ”ด **High**: {len(high)} (action required)\n") f.write(f"- ๐ŸŸก **Medium**: {len(medium)} (review recommended)\n") f.write(f"- ๐ŸŸข **Low**: {len(low)} (informational)\n\n") # List high severity conflicts if high: f.write("## ๐Ÿ”ด High Severity\n\n") f.write("*These conflicts require immediate attention*\n\n") for conflict in high: api_name = ( conflict.api_name if hasattr(conflict, "api_name") else conflict.get("api_name", "Unknown") ) diff = ( conflict.difference if hasattr(conflict, "difference") else conflict.get("difference", "N/A") ) f.write(f"### {api_name}\n\n") f.write(f"**Issue**: {diff}\n\n") # List medium severity if medium: f.write("## ๐ŸŸก Medium Severity\n\n") for conflict in medium[:20]: # Limit to 20 api_name = ( conflict.api_name if hasattr(conflict, "api_name") else conflict.get("api_name", "Unknown") ) diff = ( conflict.difference if hasattr(conflict, "difference") else conflict.get("difference", "N/A") ) f.write(f"### {api_name}\n\n") f.write(f"{diff}\n\n") logger.info("Created conflicts report") if __name__ == "__main__": # Test with mock data import sys if len(sys.argv) < 2: print("Usage: python unified_skill_builder.py ") sys.exit(1) config_path = sys.argv[1] with open(config_path) as f: config = json.load(f) # Mock scraped data scraped_data = { "github": {"data": {"readme": "# Test Repository", "issues": [], "releases": []}} } builder = UnifiedSkillBuilder(config, scraped_data) builder.build() print(f"\nโœ… Test skill built in: output/{config['name']}/")