Files
skill-seekers-reference/src/skill_seekers/cli/enhance_skill.py
2026-01-17 17:48:15 +00:00

489 lines
17 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
SKILL.md Enhancement Script
Uses platform AI APIs to improve SKILL.md by analyzing reference documentation.
Usage:
# Claude (default)
skill-seekers enhance output/react/
skill-seekers enhance output/react/ --api-key sk-ant-...
# Gemini
skill-seekers enhance output/react/ --target gemini --api-key AIzaSy...
# OpenAI
skill-seekers enhance output/react/ --target openai --api-key sk-proj-...
"""
import argparse
import os
import sys
from pathlib import Path
# Add parent directory to path for imports when run as script
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from skill_seekers.cli.constants import API_CONTENT_LIMIT, API_PREVIEW_LIMIT
from skill_seekers.cli.utils import read_reference_files
try:
import anthropic
except ImportError:
print("❌ Error: anthropic package not installed")
print("Install with: pip3 install anthropic")
sys.exit(1)
class SkillEnhancer:
def __init__(self, skill_dir, api_key=None):
self.skill_dir = Path(skill_dir)
self.references_dir = self.skill_dir / "references"
self.skill_md_path = self.skill_dir / "SKILL.md"
# Get API key - support both ANTHROPIC_API_KEY and ANTHROPIC_AUTH_TOKEN
self.api_key = (
api_key or os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("ANTHROPIC_AUTH_TOKEN")
)
if not self.api_key:
raise ValueError(
"No API key provided. Set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN "
"environment variable or use --api-key argument"
)
# Support custom base URL for alternative API endpoints
base_url = os.environ.get("ANTHROPIC_BASE_URL")
client_kwargs = {"api_key": self.api_key}
if base_url:
client_kwargs["base_url"] = base_url
print(f" Using custom API base URL: {base_url}")
self.client = anthropic.Anthropic(**client_kwargs)
def read_current_skill_md(self):
"""Read existing SKILL.md"""
if not self.skill_md_path.exists():
return None
return self.skill_md_path.read_text(encoding="utf-8")
def enhance_skill_md(self, references, current_skill_md):
"""Use Claude to enhance SKILL.md"""
# Build prompt
prompt = self._build_enhancement_prompt(references, current_skill_md)
print("\n🤖 Asking Claude to enhance SKILL.md...")
print(f" Input: {len(prompt):,} characters")
try:
message = self.client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=4096,
temperature=0.3,
messages=[{"role": "user", "content": prompt}],
)
# Handle response content - newer SDK versions may include ThinkingBlock
# Find the TextBlock containing the actual response
enhanced_content = None
for block in message.content:
if hasattr(block, "text"):
enhanced_content = block.text
break
if not enhanced_content:
print("❌ Error: No text content found in API response")
return None
return enhanced_content
except Exception as e:
print(f"❌ Error calling Claude API: {e}")
return None
def _build_enhancement_prompt(self, references, current_skill_md):
"""Build the prompt for Claude with multi-source awareness"""
# Extract skill name and description
skill_name = self.skill_dir.name
# Analyze sources
sources_found = set()
for metadata in references.values():
sources_found.add(metadata["source"])
# Analyze conflicts if present
has_conflicts = any("conflicts" in meta["path"] for meta in references.values())
prompt = f"""You are enhancing a Claude skill's SKILL.md file. This skill is about: {skill_name}
I've scraped documentation from multiple sources and organized it into reference files. Your job is to create an EXCELLENT SKILL.md that synthesizes knowledge from these sources.
SKILL OVERVIEW:
- Name: {skill_name}
- Source Types: {", ".join(sorted(sources_found))}
- Multi-Source: {"Yes" if len(sources_found) > 1 else "No"}
- Conflicts Detected: {"Yes - see conflicts.md in references" if has_conflicts else "No"}
CURRENT SKILL.MD:
{"```markdown" if current_skill_md else "(none - create from scratch)"}
{current_skill_md or "No existing SKILL.md"}
{"```" if current_skill_md else ""}
SOURCE ANALYSIS:
This skill combines knowledge from {len(sources_found)} source type(s):
"""
# Group references by (source_type, repo_id) for multi-source support
by_source = {}
for filename, metadata in references.items():
source = metadata["source"]
repo_id = metadata.get("repo_id") # None for single-source
key = (source, repo_id) if repo_id else (source, None)
if key not in by_source:
by_source[key] = []
by_source[key].append((filename, metadata))
# Add source breakdown with repo identity
for source, repo_id in sorted(by_source.keys()):
files = by_source[(source, repo_id)]
if repo_id:
prompt += f"\n**{source.upper()} - {repo_id} ({len(files)} file(s))**\n"
else:
prompt += f"\n**{source.upper()} ({len(files)} file(s))**\n"
for filename, metadata in files[:5]: # Top 5 per source
prompt += f"- {filename} (confidence: {metadata['confidence']}, {metadata['size']:,} chars)\n"
if len(files) > 5:
prompt += f"- ... and {len(files) - 5} more\n"
prompt += "\n\nREFERENCE DOCUMENTATION:\n"
# Add references grouped by (source, repo_id) with metadata
for source, repo_id in sorted(by_source.keys()):
if repo_id:
prompt += f"\n### {source.upper()} SOURCES - {repo_id}\n\n"
else:
prompt += f"\n### {source.upper()} SOURCES\n\n"
for filename, metadata in by_source[(source, repo_id)]:
content = metadata["content"]
# Limit per-file to 30K
if len(content) > 30000:
content = content[:30000] + "\n\n[Content truncated for size...]"
prompt += f"\n#### {filename}\n"
if repo_id:
prompt += f"*Source: {metadata['source']} ({repo_id}), Confidence: {metadata['confidence']}*\n\n"
else:
prompt += (
f"*Source: {metadata['source']}, Confidence: {metadata['confidence']}*\n\n"
)
prompt += f"```markdown\n{content}\n```\n"
prompt += """
REFERENCE PRIORITY (when sources differ):
1. **Code patterns (codebase_analysis)**: Ground truth - what the code actually does
2. **Official documentation**: Intended API and usage patterns
3. **GitHub issues**: Real-world usage and known problems
4. **PDF documentation**: Additional context and tutorials
MULTI-REPOSITORY HANDLING:
"""
# Detect multiple repos from same source type
repo_ids = set()
for metadata in references.values():
if metadata.get("repo_id"):
repo_ids.add(metadata["repo_id"])
if len(repo_ids) > 1:
prompt += f"""
⚠️ MULTIPLE REPOSITORIES DETECTED: {", ".join(sorted(repo_ids))}
This skill combines codebase analysis from {len(repo_ids)} different repositories.
Each repo has its own ARCHITECTURE.md, patterns, examples, and configuration.
When synthesizing:
- Clearly identify which content comes from which repo
- Compare and contrast patterns across repos (e.g., "httpx uses Strategy pattern 50 times, httpcore uses it 32 times")
- Highlight relationships (e.g., "httpx is a client library built on top of httpcore")
- Present examples from BOTH repos to show different use cases
- If repos serve different purposes, explain when to use each
"""
else:
prompt += "\nSingle repository - standard synthesis applies.\n"
prompt += """
YOUR TASK:
Create an enhanced SKILL.md that synthesizes knowledge from multiple sources:
1. **Multi-Source Synthesis**
- Acknowledge that this skill combines multiple sources
- Highlight agreements between sources (builds confidence)
- Note discrepancies transparently (if present)
- Use source priority when synthesizing conflicting information
2. **Clear "When to Use This Skill" section**
- Be SPECIFIC about trigger conditions
- List concrete use cases
- Include perspective from both docs AND real-world usage (if GitHub/codebase data available)
3. **Excellent Quick Reference section**
- Extract 5-10 of the BEST, most practical code examples
- Prefer examples from HIGH CONFIDENCE sources first
- If code examples exist from codebase analysis, prioritize those (real usage)
- If docs examples exist, include those too (official patterns)
- Choose SHORT, clear examples (5-20 lines max)
- Use proper language tags (cpp, python, javascript, json, etc.)
- Add clear descriptions noting the source (e.g., "From official docs" or "From codebase")
4. **Detailed Reference Files description**
- Explain what's in each reference file
- Note the source type and confidence level
- Help users navigate multi-source documentation
5. **Practical "Working with This Skill" section**
- Clear guidance for beginners, intermediate, and advanced users
- Navigation tips for multi-source references
- How to resolve conflicts if present
6. **Key Concepts section** (if applicable)
- Explain core concepts
- Define important terminology
- Reconcile differences between sources if needed
7. **Conflict Handling** (if conflicts detected)
- Add a "Known Discrepancies" section
- Explain major conflicts transparently
- Provide guidance on which source to trust in each case
8. **Keep the frontmatter** (---\nname: ...\n---) intact
IMPORTANT:
- Extract REAL examples from the reference docs, don't make them up
- Prioritize HIGH CONFIDENCE sources when synthesizing
- Note source attribution when helpful (e.g., "Official docs say X, but codebase shows Y")
- Make discrepancies transparent, not hidden
- Prioritize SHORT, clear examples (5-20 lines max)
- Make it actionable and practical
- Don't be too verbose - be concise but useful
- Maintain the markdown structure for Claude skills
- Keep code examples properly formatted with language tags
OUTPUT:
Return ONLY the complete SKILL.md content, starting with the frontmatter (---).
"""
return prompt
def save_enhanced_skill_md(self, content):
"""Save the enhanced SKILL.md"""
# Backup original
if self.skill_md_path.exists():
backup_path = self.skill_md_path.with_suffix(".md.backup")
self.skill_md_path.rename(backup_path)
print(f" 💾 Backed up original to: {backup_path.name}")
# Save enhanced version
self.skill_md_path.write_text(content, encoding="utf-8")
print(" ✅ Saved enhanced SKILL.md")
def run(self):
"""Main enhancement workflow"""
print(f"\n{'=' * 60}")
print(f"ENHANCING SKILL: {self.skill_dir.name}")
print(f"{'=' * 60}\n")
# Read reference files
print("📖 Reading reference documentation...")
references = read_reference_files(
self.skill_dir, max_chars=API_CONTENT_LIMIT, preview_limit=API_PREVIEW_LIMIT
)
if not references:
print("❌ No reference files found to analyze")
return False
# Analyze sources
sources_found = set()
for metadata in references.values():
sources_found.add(metadata["source"])
print(f" ✓ Read {len(references)} reference files")
print(f" ✓ Sources: {', '.join(sorted(sources_found))}")
total_size = sum(meta["size"] for meta in references.values())
print(f" ✓ Total size: {total_size:,} characters\n")
# Read current SKILL.md
current_skill_md = self.read_current_skill_md()
if current_skill_md:
print(f" Found existing SKILL.md ({len(current_skill_md)} chars)")
else:
print(" No existing SKILL.md, will create new one")
# Enhance with Claude
enhanced = self.enhance_skill_md(references, current_skill_md)
if not enhanced:
print("❌ Enhancement failed")
return False
print(f" ✓ Generated enhanced SKILL.md ({len(enhanced)} chars)\n")
# Save
print("💾 Saving enhanced SKILL.md...")
self.save_enhanced_skill_md(enhanced)
print("\n✅ Enhancement complete!")
print("\nNext steps:")
print(f" 1. Review: {self.skill_md_path}")
print(
f" 2. If you don't like it, restore backup: {self.skill_md_path.with_suffix('.md.backup')}"
)
print(" 3. Package your skill:")
print(f" skill-seekers package {self.skill_dir}/")
return True
def main():
parser = argparse.ArgumentParser(
description="Enhance SKILL.md using platform AI APIs",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Claude (default)
export ANTHROPIC_API_KEY=sk-ant-...
skill-seekers enhance output/react/
# Gemini
export GOOGLE_API_KEY=AIzaSy...
skill-seekers enhance output/react/ --target gemini
# OpenAI
export OPENAI_API_KEY=sk-proj-...
skill-seekers enhance output/react/ --target openai
# With explicit API key
skill-seekers enhance output/react/ --api-key sk-ant-...
# Dry run
skill-seekers enhance output/godot/ --dry-run
""",
)
parser.add_argument(
"skill_dir", type=str, help="Path to skill directory (e.g., output/steam-inventory/)"
)
parser.add_argument(
"--api-key", type=str, help="Platform API key (or set environment variable)"
)
parser.add_argument(
"--target",
choices=["claude", "gemini", "openai"],
default="claude",
help="Target LLM platform (default: claude)",
)
parser.add_argument(
"--dry-run", action="store_true", help="Show what would be done without calling API"
)
args = parser.parse_args()
# Validate skill directory
skill_dir = Path(args.skill_dir)
if not skill_dir.exists():
print(f"❌ Error: Directory not found: {skill_dir}")
sys.exit(1)
if not skill_dir.is_dir():
print(f"❌ Error: Not a directory: {skill_dir}")
sys.exit(1)
# Dry run mode
if args.dry_run:
print("🔍 DRY RUN MODE")
print(f" Would enhance: {skill_dir}")
print(f" References: {skill_dir / 'references'}")
print(f" SKILL.md: {skill_dir / 'SKILL.md'}")
refs_dir = skill_dir / "references"
if refs_dir.exists():
ref_files = list(refs_dir.glob("*.md"))
print(f" Found {len(ref_files)} reference files:")
for rf in ref_files:
size = rf.stat().st_size
print(f" - {rf.name} ({size:,} bytes)")
print("\nTo actually run enhancement:")
print(f" skill-seekers enhance {skill_dir}")
return
# Check if platform supports enhancement
try:
from skill_seekers.cli.adaptors import get_adaptor
adaptor = get_adaptor(args.target)
if not adaptor.supports_enhancement():
print(f"❌ Error: {adaptor.PLATFORM_NAME} does not support AI enhancement")
print("\nSupported platforms for enhancement:")
print(" - Claude AI (Anthropic)")
print(" - Google Gemini")
print(" - OpenAI ChatGPT")
sys.exit(1)
# Get API key
api_key = args.api_key
if not api_key:
api_key = os.environ.get(adaptor.get_env_var_name(), "").strip()
if not api_key:
print(f"❌ Error: {adaptor.get_env_var_name()} not set")
print(f"\nSet your API key for {adaptor.PLATFORM_NAME}:")
print(f" export {adaptor.get_env_var_name()}=...")
print("Or provide it directly:")
print(f" skill-seekers enhance {skill_dir} --target {args.target} --api-key ...")
sys.exit(1)
# Run enhancement using adaptor
print(f"\n{'=' * 60}")
print(f"ENHANCING SKILL: {skill_dir}")
print(f"Platform: {adaptor.PLATFORM_NAME}")
print(f"{'=' * 60}\n")
success = adaptor.enhance(Path(skill_dir), api_key)
if success:
print("\n✅ Enhancement complete!")
print("\nNext steps:")
print(f" 1. Review: {Path(skill_dir) / 'SKILL.md'}")
print(
f" 2. If you don't like it, restore backup: {Path(skill_dir) / 'SKILL.md.backup'}"
)
print(" 3. Package your skill:")
print(f" skill-seekers package {skill_dir}/ --target {args.target}")
sys.exit(0 if success else 1)
except ImportError as e:
print(f"❌ Error: {e}")
print("\nAdaptor system not available. Reinstall skill-seekers.")
sys.exit(1)
except ValueError as e:
print(f"❌ Error: {e}")
sys.exit(1)
except Exception as e:
print(f"❌ Unexpected error: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
if __name__ == "__main__":
main()