#!/usr/bin/env python3 """ SKILL.md Enhancement Script Uses platform AI APIs to improve SKILL.md by analyzing reference documentation. Usage: # Claude (default) skill-seekers enhance output/react/ skill-seekers enhance output/react/ --api-key sk-ant-... # Gemini skill-seekers enhance output/react/ --target gemini --api-key AIzaSy... # OpenAI skill-seekers enhance output/react/ --target openai --api-key sk-proj-... """ import argparse import os import sys from pathlib import Path # Add parent directory to path for imports when run as script sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from skill_seekers.cli.constants import API_CONTENT_LIMIT, API_PREVIEW_LIMIT from skill_seekers.cli.utils import read_reference_files try: import anthropic except ImportError: print("āŒ Error: anthropic package not installed") print("Install with: pip3 install anthropic") sys.exit(1) class SkillEnhancer: def __init__(self, skill_dir, api_key=None): self.skill_dir = Path(skill_dir) self.references_dir = self.skill_dir / "references" self.skill_md_path = self.skill_dir / "SKILL.md" # Get API key - support both ANTHROPIC_API_KEY and ANTHROPIC_AUTH_TOKEN self.api_key = ( api_key or os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("ANTHROPIC_AUTH_TOKEN") ) if not self.api_key: raise ValueError( "No API key provided. Set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN " "environment variable or use --api-key argument" ) # Support custom base URL for alternative API endpoints base_url = os.environ.get("ANTHROPIC_BASE_URL") client_kwargs = {"api_key": self.api_key} if base_url: client_kwargs["base_url"] = base_url print(f"ā„¹ļø Using custom API base URL: {base_url}") self.client = anthropic.Anthropic(**client_kwargs) def read_current_skill_md(self): """Read existing SKILL.md""" if not self.skill_md_path.exists(): return None return self.skill_md_path.read_text(encoding="utf-8") def enhance_skill_md(self, references, current_skill_md): """Use Claude to enhance SKILL.md""" # Build prompt prompt = self._build_enhancement_prompt(references, current_skill_md) print("\nšŸ¤– Asking Claude to enhance SKILL.md...") print(f" Input: {len(prompt):,} characters") try: message = self.client.messages.create( model="claude-sonnet-4-20250514", max_tokens=4096, temperature=0.3, messages=[{"role": "user", "content": prompt}], ) # Handle response content - newer SDK versions may include ThinkingBlock # Find the TextBlock containing the actual response enhanced_content = None for block in message.content: if hasattr(block, "text"): enhanced_content = block.text break if not enhanced_content: print("āŒ Error: No text content found in API response") return None return enhanced_content except Exception as e: print(f"āŒ Error calling Claude API: {e}") return None def _build_enhancement_prompt(self, references, current_skill_md): """Build the prompt for Claude with multi-source awareness""" # Extract skill name and description skill_name = self.skill_dir.name # Analyze sources sources_found = set() for metadata in references.values(): sources_found.add(metadata["source"]) # Analyze conflicts if present has_conflicts = any("conflicts" in meta["path"] for meta in references.values()) prompt = f"""You are enhancing a Claude skill's SKILL.md file. This skill is about: {skill_name} I've scraped documentation from multiple sources and organized it into reference files. Your job is to create an EXCELLENT SKILL.md that synthesizes knowledge from these sources. SKILL OVERVIEW: - Name: {skill_name} - Source Types: {", ".join(sorted(sources_found))} - Multi-Source: {"Yes" if len(sources_found) > 1 else "No"} - Conflicts Detected: {"Yes - see conflicts.md in references" if has_conflicts else "No"} CURRENT SKILL.MD: {"```markdown" if current_skill_md else "(none - create from scratch)"} {current_skill_md or "No existing SKILL.md"} {"```" if current_skill_md else ""} SOURCE ANALYSIS: This skill combines knowledge from {len(sources_found)} source type(s): """ # Group references by (source_type, repo_id) for multi-source support by_source = {} for filename, metadata in references.items(): source = metadata["source"] repo_id = metadata.get("repo_id") # None for single-source key = (source, repo_id) if repo_id else (source, None) if key not in by_source: by_source[key] = [] by_source[key].append((filename, metadata)) # Add source breakdown with repo identity for source, repo_id in sorted(by_source.keys()): files = by_source[(source, repo_id)] if repo_id: prompt += f"\n**{source.upper()} - {repo_id} ({len(files)} file(s))**\n" else: prompt += f"\n**{source.upper()} ({len(files)} file(s))**\n" for filename, metadata in files[:5]: # Top 5 per source prompt += f"- {filename} (confidence: {metadata['confidence']}, {metadata['size']:,} chars)\n" if len(files) > 5: prompt += f"- ... and {len(files) - 5} more\n" prompt += "\n\nREFERENCE DOCUMENTATION:\n" # Add references grouped by (source, repo_id) with metadata for source, repo_id in sorted(by_source.keys()): if repo_id: prompt += f"\n### {source.upper()} SOURCES - {repo_id}\n\n" else: prompt += f"\n### {source.upper()} SOURCES\n\n" for filename, metadata in by_source[(source, repo_id)]: content = metadata["content"] # Limit per-file to 30K if len(content) > 30000: content = content[:30000] + "\n\n[Content truncated for size...]" prompt += f"\n#### {filename}\n" if repo_id: prompt += f"*Source: {metadata['source']} ({repo_id}), Confidence: {metadata['confidence']}*\n\n" else: prompt += ( f"*Source: {metadata['source']}, Confidence: {metadata['confidence']}*\n\n" ) prompt += f"```markdown\n{content}\n```\n" prompt += """ REFERENCE PRIORITY (when sources differ): 1. **Code patterns (codebase_analysis)**: Ground truth - what the code actually does 2. **Official documentation**: Intended API and usage patterns 3. **GitHub issues**: Real-world usage and known problems 4. **PDF documentation**: Additional context and tutorials MULTI-REPOSITORY HANDLING: """ # Detect multiple repos from same source type repo_ids = set() for metadata in references.values(): if metadata.get("repo_id"): repo_ids.add(metadata["repo_id"]) if len(repo_ids) > 1: prompt += f""" āš ļø MULTIPLE REPOSITORIES DETECTED: {", ".join(sorted(repo_ids))} This skill combines codebase analysis from {len(repo_ids)} different repositories. Each repo has its own ARCHITECTURE.md, patterns, examples, and configuration. When synthesizing: - Clearly identify which content comes from which repo - Compare and contrast patterns across repos (e.g., "httpx uses Strategy pattern 50 times, httpcore uses it 32 times") - Highlight relationships (e.g., "httpx is a client library built on top of httpcore") - Present examples from BOTH repos to show different use cases - If repos serve different purposes, explain when to use each """ else: prompt += "\nSingle repository - standard synthesis applies.\n" prompt += """ YOUR TASK: Create an enhanced SKILL.md that synthesizes knowledge from multiple sources: 1. **Multi-Source Synthesis** - Acknowledge that this skill combines multiple sources - Highlight agreements between sources (builds confidence) - Note discrepancies transparently (if present) - Use source priority when synthesizing conflicting information 2. **Clear "When to Use This Skill" section** - Be SPECIFIC about trigger conditions - List concrete use cases - Include perspective from both docs AND real-world usage (if GitHub/codebase data available) 3. **Excellent Quick Reference section** - Extract 5-10 of the BEST, most practical code examples - Prefer examples from HIGH CONFIDENCE sources first - If code examples exist from codebase analysis, prioritize those (real usage) - If docs examples exist, include those too (official patterns) - Choose SHORT, clear examples (5-20 lines max) - Use proper language tags (cpp, python, javascript, json, etc.) - Add clear descriptions noting the source (e.g., "From official docs" or "From codebase") 4. **Detailed Reference Files description** - Explain what's in each reference file - Note the source type and confidence level - Help users navigate multi-source documentation 5. **Practical "Working with This Skill" section** - Clear guidance for beginners, intermediate, and advanced users - Navigation tips for multi-source references - How to resolve conflicts if present 6. **Key Concepts section** (if applicable) - Explain core concepts - Define important terminology - Reconcile differences between sources if needed 7. **Conflict Handling** (if conflicts detected) - Add a "Known Discrepancies" section - Explain major conflicts transparently - Provide guidance on which source to trust in each case 8. **Keep the frontmatter** (---\nname: ...\n---) intact IMPORTANT: - Extract REAL examples from the reference docs, don't make them up - Prioritize HIGH CONFIDENCE sources when synthesizing - Note source attribution when helpful (e.g., "Official docs say X, but codebase shows Y") - Make discrepancies transparent, not hidden - Prioritize SHORT, clear examples (5-20 lines max) - Make it actionable and practical - Don't be too verbose - be concise but useful - Maintain the markdown structure for Claude skills - Keep code examples properly formatted with language tags OUTPUT: Return ONLY the complete SKILL.md content, starting with the frontmatter (---). """ return prompt def save_enhanced_skill_md(self, content): """Save the enhanced SKILL.md""" # Backup original if self.skill_md_path.exists(): backup_path = self.skill_md_path.with_suffix(".md.backup") self.skill_md_path.rename(backup_path) print(f" šŸ’¾ Backed up original to: {backup_path.name}") # Save enhanced version self.skill_md_path.write_text(content, encoding="utf-8") print(" āœ… Saved enhanced SKILL.md") def run(self): """Main enhancement workflow""" print(f"\n{'=' * 60}") print(f"ENHANCING SKILL: {self.skill_dir.name}") print(f"{'=' * 60}\n") # Read reference files print("šŸ“– Reading reference documentation...") references = read_reference_files( self.skill_dir, max_chars=API_CONTENT_LIMIT, preview_limit=API_PREVIEW_LIMIT ) if not references: print("āŒ No reference files found to analyze") return False # Analyze sources sources_found = set() for metadata in references.values(): sources_found.add(metadata["source"]) print(f" āœ“ Read {len(references)} reference files") print(f" āœ“ Sources: {', '.join(sorted(sources_found))}") total_size = sum(meta["size"] for meta in references.values()) print(f" āœ“ Total size: {total_size:,} characters\n") # Read current SKILL.md current_skill_md = self.read_current_skill_md() if current_skill_md: print(f" ℹ Found existing SKILL.md ({len(current_skill_md)} chars)") else: print(" ℹ No existing SKILL.md, will create new one") # Enhance with Claude enhanced = self.enhance_skill_md(references, current_skill_md) if not enhanced: print("āŒ Enhancement failed") return False print(f" āœ“ Generated enhanced SKILL.md ({len(enhanced)} chars)\n") # Save print("šŸ’¾ Saving enhanced SKILL.md...") self.save_enhanced_skill_md(enhanced) print("\nāœ… Enhancement complete!") print("\nNext steps:") print(f" 1. Review: {self.skill_md_path}") print( f" 2. If you don't like it, restore backup: {self.skill_md_path.with_suffix('.md.backup')}" ) print(" 3. Package your skill:") print(f" skill-seekers package {self.skill_dir}/") return True def main(): parser = argparse.ArgumentParser( description="Enhance SKILL.md using platform AI APIs", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Claude (default) export ANTHROPIC_API_KEY=sk-ant-... skill-seekers enhance output/react/ # Gemini export GOOGLE_API_KEY=AIzaSy... skill-seekers enhance output/react/ --target gemini # OpenAI export OPENAI_API_KEY=sk-proj-... skill-seekers enhance output/react/ --target openai # With explicit API key skill-seekers enhance output/react/ --api-key sk-ant-... # Dry run skill-seekers enhance output/godot/ --dry-run """, ) parser.add_argument( "skill_dir", type=str, help="Path to skill directory (e.g., output/steam-inventory/)" ) parser.add_argument( "--api-key", type=str, help="Platform API key (or set environment variable)" ) parser.add_argument( "--target", choices=["claude", "gemini", "openai"], default="claude", help="Target LLM platform (default: claude)", ) parser.add_argument( "--dry-run", action="store_true", help="Show what would be done without calling API" ) args = parser.parse_args() # Validate skill directory skill_dir = Path(args.skill_dir) if not skill_dir.exists(): print(f"āŒ Error: Directory not found: {skill_dir}") sys.exit(1) if not skill_dir.is_dir(): print(f"āŒ Error: Not a directory: {skill_dir}") sys.exit(1) # Dry run mode if args.dry_run: print("šŸ” DRY RUN MODE") print(f" Would enhance: {skill_dir}") print(f" References: {skill_dir / 'references'}") print(f" SKILL.md: {skill_dir / 'SKILL.md'}") refs_dir = skill_dir / "references" if refs_dir.exists(): ref_files = list(refs_dir.glob("*.md")) print(f" Found {len(ref_files)} reference files:") for rf in ref_files: size = rf.stat().st_size print(f" - {rf.name} ({size:,} bytes)") print("\nTo actually run enhancement:") print(f" skill-seekers enhance {skill_dir}") return # Check if platform supports enhancement try: from skill_seekers.cli.adaptors import get_adaptor adaptor = get_adaptor(args.target) if not adaptor.supports_enhancement(): print(f"āŒ Error: {adaptor.PLATFORM_NAME} does not support AI enhancement") print("\nSupported platforms for enhancement:") print(" - Claude AI (Anthropic)") print(" - Google Gemini") print(" - OpenAI ChatGPT") sys.exit(1) # Get API key api_key = args.api_key if not api_key: api_key = os.environ.get(adaptor.get_env_var_name(), "").strip() if not api_key: print(f"āŒ Error: {adaptor.get_env_var_name()} not set") print(f"\nSet your API key for {adaptor.PLATFORM_NAME}:") print(f" export {adaptor.get_env_var_name()}=...") print("Or provide it directly:") print(f" skill-seekers enhance {skill_dir} --target {args.target} --api-key ...") sys.exit(1) # Run enhancement using adaptor print(f"\n{'=' * 60}") print(f"ENHANCING SKILL: {skill_dir}") print(f"Platform: {adaptor.PLATFORM_NAME}") print(f"{'=' * 60}\n") success = adaptor.enhance(Path(skill_dir), api_key) if success: print("\nāœ… Enhancement complete!") print("\nNext steps:") print(f" 1. Review: {Path(skill_dir) / 'SKILL.md'}") print( f" 2. If you don't like it, restore backup: {Path(skill_dir) / 'SKILL.md.backup'}" ) print(" 3. Package your skill:") print(f" skill-seekers package {skill_dir}/ --target {args.target}") sys.exit(0 if success else 1) except ImportError as e: print(f"āŒ Error: {e}") print("\nAdaptor system not available. Reinstall skill-seekers.") sys.exit(1) except ValueError as e: print(f"āŒ Error: {e}") sys.exit(1) except Exception as e: print(f"āŒ Unexpected error: {e}") import traceback traceback.print_exc() sys.exit(1) if __name__ == "__main__": main()