feat(multi-llm): Phase 3 - OpenAI adaptor implementation

Implement OpenAI ChatGPT platform support (Issue #179, Phase 3/6)

**Features:**
- Assistant instructions format (plain text, no frontmatter)
- ZIP packaging for Assistants API
- Upload creates Assistant + Vector Store with file_search
- Enhancement using GPT-4o
- API key validation (sk- prefix)

**Implementation:**
- New: src/skill_seekers/cli/adaptors/openai.py (520 lines)
  - format_skill_md(): Assistant instructions format
  - package(): Creates .zip with assistant_instructions.txt + vector_store_files/
  - upload(): Creates Assistant with Vector Store via Assistants API
  - enhance(): Uses GPT-4o for enhancement
  - validate_api_key(): Checks OpenAI key format (sk-)

**Tests:**
- New: tests/test_adaptors/test_openai_adaptor.py (14 tests)
  - 12 passing unit tests
  - 2 skipped (integration tests requiring real API keys)
  - Tests: validation, formatting, packaging, vector store structure

**Test Summary:**
- Total adaptor tests: 37 (33 passing, 4 skipped)
- Base: 10 tests
- Claude: (integrated in base)
- Gemini: 11 tests (2 skipped)
- OpenAI: 12 tests (2 skipped)

**Next:** Phase 4 - Implement Markdown adaptor (generic export)
This commit is contained in:
yusyus
2025-12-28 20:29:54 +03:00
parent 7320da6a07
commit 9032232ac7
2 changed files with 715 additions and 0 deletions

View File

@@ -0,0 +1,524 @@
#!/usr/bin/env python3
"""
OpenAI ChatGPT Adaptor
Implements platform-specific handling for OpenAI ChatGPT Assistants.
Uses Assistants API with Vector Store for file search.
"""
import os
import zipfile
import json
from pathlib import Path
from typing import Dict, Any
from .base import SkillAdaptor, SkillMetadata
class OpenAIAdaptor(SkillAdaptor):
"""
OpenAI ChatGPT platform adaptor.
Handles:
- Assistant instructions format (not YAML frontmatter)
- ZIP packaging for Assistants API
- Upload creates Assistant + Vector Store
- AI enhancement using GPT-4o
"""
PLATFORM = "openai"
PLATFORM_NAME = "OpenAI ChatGPT"
DEFAULT_API_ENDPOINT = "https://api.openai.com/v1/assistants"
def format_skill_md(self, skill_dir: Path, metadata: SkillMetadata) -> str:
"""
Format SKILL.md as Assistant instructions.
OpenAI Assistants use instructions rather than markdown docs.
Args:
skill_dir: Path to skill directory
metadata: Skill metadata
Returns:
Formatted instructions for OpenAI Assistant
"""
# Read existing content (if any)
existing_content = self._read_existing_content(skill_dir)
# If existing content is substantial, adapt it to instructions format
if existing_content and len(existing_content) > 100:
content_body = f"""You are an expert assistant for {metadata.name}.
{metadata.description}
Use the attached knowledge files to provide accurate, detailed answers about {metadata.name}.
{existing_content}
## How to Assist Users
When users ask questions:
1. Search the knowledge files for relevant information
2. Provide clear, practical answers with code examples
3. Reference specific documentation sections when helpful
4. Be concise but thorough
Always prioritize accuracy by consulting the knowledge base before responding."""
else:
# Generate default instructions
content_body = f"""You are an expert assistant for {metadata.name}.
{metadata.description}
## Your Knowledge Base
You have access to comprehensive documentation files about {metadata.name}. Use these files to provide accurate answers to user questions.
{self._generate_toc(skill_dir)}
## Quick Reference
{self._extract_quick_reference(skill_dir)}
## How to Assist Users
When users ask questions about {metadata.name}:
1. **Search the knowledge files** - Use file_search to find relevant information
2. **Provide code examples** - Include practical, working code snippets
3. **Reference documentation** - Cite specific sections when helpful
4. **Be practical** - Focus on real-world usage and best practices
5. **Stay accurate** - Always verify information against the knowledge base
## Response Guidelines
- Keep answers clear and concise
- Use proper code formatting with language tags
- Provide both simple and detailed explanations as needed
- Suggest related topics when relevant
- Admit when information isn't in the knowledge base
Always prioritize accuracy by consulting the attached documentation files before responding."""
# Return plain text instructions (NO frontmatter)
return content_body
def package(self, skill_dir: Path, output_path: Path) -> Path:
"""
Package skill into ZIP file for OpenAI Assistants.
Creates OpenAI-compatible structure:
- assistant_instructions.txt (main instructions)
- vector_store_files/*.md (reference files for vector store)
- openai_metadata.json (skill metadata)
Args:
skill_dir: Path to skill directory
output_path: Output path/filename for ZIP
Returns:
Path to created ZIP file
"""
skill_dir = Path(skill_dir)
# Determine output filename
if output_path.is_dir() or str(output_path).endswith('/'):
output_path = Path(output_path) / f"{skill_dir.name}-openai.zip"
elif not str(output_path).endswith('.zip'):
# Keep .zip extension
if not str(output_path).endswith('-openai.zip'):
output_str = str(output_path).replace('.zip', '-openai.zip')
if not output_str.endswith('.zip'):
output_str += '.zip'
output_path = Path(output_str)
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
# Create ZIP file
with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
# Add SKILL.md as assistant_instructions.txt
skill_md = skill_dir / "SKILL.md"
if skill_md.exists():
instructions = skill_md.read_text(encoding='utf-8')
zf.writestr("assistant_instructions.txt", instructions)
# Add references directory as vector_store_files/
refs_dir = skill_dir / "references"
if refs_dir.exists():
for ref_file in refs_dir.rglob("*.md"):
if ref_file.is_file() and not ref_file.name.startswith('.'):
# Place all reference files in vector_store_files/
arcname = f"vector_store_files/{ref_file.name}"
zf.write(ref_file, arcname)
# Create and add metadata file
metadata = {
'platform': 'openai',
'name': skill_dir.name,
'version': '1.0.0',
'created_with': 'skill-seekers',
'model': 'gpt-4o',
'tools': ['file_search']
}
zf.writestr("openai_metadata.json", json.dumps(metadata, indent=2))
return output_path
def upload(self, package_path: Path, api_key: str, **kwargs) -> Dict[str, Any]:
"""
Upload skill ZIP to OpenAI Assistants API.
Creates:
1. Vector Store with reference files
2. Assistant with file_search tool
Args:
package_path: Path to skill ZIP file
api_key: OpenAI API key
**kwargs: Additional arguments (model, etc.)
Returns:
Dictionary with upload result
"""
# Validate package file FIRST
package_path = Path(package_path)
if not package_path.exists():
return {
'success': False,
'skill_id': None,
'url': None,
'message': f'File not found: {package_path}'
}
if not package_path.suffix == '.zip':
return {
'success': False,
'skill_id': None,
'url': None,
'message': f'Not a ZIP file: {package_path}'
}
# Check for openai library
try:
from openai import OpenAI
except ImportError:
return {
'success': False,
'skill_id': None,
'url': None,
'message': 'openai library not installed. Run: pip install openai'
}
# Configure OpenAI client
try:
client = OpenAI(api_key=api_key)
# Extract package to temp directory
import tempfile
import shutil
with tempfile.TemporaryDirectory() as temp_dir:
# Extract ZIP
with zipfile.ZipFile(package_path, 'r') as zf:
zf.extractall(temp_dir)
temp_path = Path(temp_dir)
# Read instructions
instructions_file = temp_path / "assistant_instructions.txt"
if not instructions_file.exists():
return {
'success': False,
'skill_id': None,
'url': None,
'message': 'Invalid package: assistant_instructions.txt not found'
}
instructions = instructions_file.read_text(encoding='utf-8')
# Read metadata
metadata_file = temp_path / "openai_metadata.json"
skill_name = package_path.stem
model = kwargs.get('model', 'gpt-4o')
if metadata_file.exists():
with open(metadata_file, 'r') as f:
metadata = json.load(f)
skill_name = metadata.get('name', skill_name)
model = metadata.get('model', model)
# Create vector store
vector_store = client.beta.vector_stores.create(
name=f"{skill_name} Documentation"
)
# Upload reference files to vector store
vector_files_dir = temp_path / "vector_store_files"
file_ids = []
if vector_files_dir.exists():
for ref_file in vector_files_dir.glob("*.md"):
# Upload file
with open(ref_file, 'rb') as f:
uploaded_file = client.files.create(
file=f,
purpose='assistants'
)
file_ids.append(uploaded_file.id)
# Attach files to vector store
if file_ids:
client.beta.vector_stores.files.create_batch(
vector_store_id=vector_store.id,
file_ids=file_ids
)
# Create assistant
assistant = client.beta.assistants.create(
name=skill_name,
instructions=instructions,
model=model,
tools=[{"type": "file_search"}],
tool_resources={
"file_search": {
"vector_store_ids": [vector_store.id]
}
}
)
return {
'success': True,
'skill_id': assistant.id,
'url': f"https://platform.openai.com/assistants/{assistant.id}",
'message': f'Assistant created with {len(file_ids)} knowledge files'
}
except Exception as e:
return {
'success': False,
'skill_id': None,
'url': None,
'message': f'Upload failed: {str(e)}'
}
def validate_api_key(self, api_key: str) -> bool:
"""
Validate OpenAI API key format.
Args:
api_key: API key to validate
Returns:
True if key starts with 'sk-'
"""
return api_key.strip().startswith('sk-')
def get_env_var_name(self) -> str:
"""
Get environment variable name for OpenAI API key.
Returns:
'OPENAI_API_KEY'
"""
return "OPENAI_API_KEY"
def supports_enhancement(self) -> bool:
"""
OpenAI supports AI enhancement via GPT-4o.
Returns:
True
"""
return True
def enhance(self, skill_dir: Path, api_key: str) -> bool:
"""
Enhance SKILL.md using GPT-4o API.
Args:
skill_dir: Path to skill directory
api_key: OpenAI API key
Returns:
True if enhancement succeeded
"""
# Check for openai library
try:
from openai import OpenAI
except ImportError:
print("❌ Error: openai package not installed")
print("Install with: pip install openai")
return False
skill_dir = Path(skill_dir)
references_dir = skill_dir / "references"
skill_md_path = skill_dir / "SKILL.md"
# Read reference files
print("📖 Reading reference documentation...")
references = self._read_reference_files(references_dir)
if not references:
print("❌ No reference files found to analyze")
return False
print(f" ✓ Read {len(references)} reference files")
total_size = sum(len(c) for c in references.values())
print(f" ✓ Total size: {total_size:,} characters\n")
# Read current SKILL.md
current_skill_md = None
if skill_md_path.exists():
current_skill_md = skill_md_path.read_text(encoding='utf-8')
print(f" Found existing SKILL.md ({len(current_skill_md)} chars)")
else:
print(f" No existing SKILL.md, will create new one")
# Build enhancement prompt
prompt = self._build_enhancement_prompt(
skill_dir.name,
references,
current_skill_md
)
print("\n🤖 Asking GPT-4o to enhance SKILL.md...")
print(f" Input: {len(prompt):,} characters")
try:
client = OpenAI(api_key=api_key)
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": "You are an expert technical writer creating Assistant instructions for OpenAI ChatGPT."
},
{
"role": "user",
"content": prompt
}
],
temperature=0.3,
max_tokens=4096
)
enhanced_content = response.choices[0].message.content
print(f" ✓ Generated enhanced SKILL.md ({len(enhanced_content)} chars)\n")
# Backup original
if skill_md_path.exists():
backup_path = skill_md_path.with_suffix('.md.backup')
skill_md_path.rename(backup_path)
print(f" 💾 Backed up original to: {backup_path.name}")
# Save enhanced version
skill_md_path.write_text(enhanced_content, encoding='utf-8')
print(f" ✅ Saved enhanced SKILL.md")
return True
except Exception as e:
print(f"❌ Error calling OpenAI API: {e}")
return False
def _read_reference_files(self, references_dir: Path, max_chars: int = 200000) -> Dict[str, str]:
"""
Read reference markdown files from skill directory.
Args:
references_dir: Path to references directory
max_chars: Maximum total characters to read
Returns:
Dictionary mapping filename to content
"""
if not references_dir.exists():
return {}
references = {}
total_chars = 0
# Read all .md files
for ref_file in sorted(references_dir.glob("*.md")):
if total_chars >= max_chars:
break
try:
content = ref_file.read_text(encoding='utf-8')
# Limit individual file size
if len(content) > 30000:
content = content[:30000] + "\n\n...(truncated)"
references[ref_file.name] = content
total_chars += len(content)
except Exception as e:
print(f" ⚠️ Could not read {ref_file.name}: {e}")
return references
def _build_enhancement_prompt(
self,
skill_name: str,
references: Dict[str, str],
current_skill_md: str = None
) -> str:
"""
Build OpenAI API prompt for enhancement.
Args:
skill_name: Name of the skill
references: Dictionary of reference content
current_skill_md: Existing SKILL.md content (optional)
Returns:
Enhancement prompt for GPT-4o
"""
prompt = f"""You are creating Assistant instructions for an OpenAI ChatGPT Assistant about: {skill_name}
I've scraped documentation and organized it into reference files. Your job is to create EXCELLENT Assistant instructions that will help the Assistant use this documentation effectively.
CURRENT INSTRUCTIONS:
{'```' if current_skill_md else '(none - create from scratch)'}
{current_skill_md or 'No existing instructions'}
{'```' if current_skill_md else ''}
REFERENCE DOCUMENTATION:
"""
for filename, content in references.items():
prompt += f"\n\n## {filename}\n```markdown\n{content[:30000]}\n```\n"
prompt += """
YOUR TASK:
Create enhanced Assistant instructions that include:
1. **Clear role definition** - "You are an expert assistant for [topic]"
2. **Knowledge base description** - What documentation is attached
3. **Excellent Quick Reference** - Extract 5-10 of the BEST, most practical code examples from the reference docs
- Choose SHORT, clear examples that demonstrate common tasks
- Include both simple and intermediate examples
- Annotate examples with clear descriptions
- Use proper language tags (cpp, python, javascript, json, etc.)
4. **Response guidelines** - How the Assistant should help users
5. **Search strategy** - When to use file_search, how to find information
6. **DO NOT use YAML frontmatter** - This is plain text instructions for OpenAI
IMPORTANT:
- Extract REAL examples from the reference docs, don't make them up
- Prioritize SHORT, clear examples (5-20 lines max)
- Make it actionable and practical for the Assistant
- Write clear, direct instructions
- Focus on how the Assistant should behave and respond
- NO YAML frontmatter (no --- blocks)
OUTPUT:
Return ONLY the complete Assistant instructions as plain text.
"""
return prompt

View File

@@ -0,0 +1,191 @@
#!/usr/bin/env python3
"""
Tests for OpenAI adaptor
"""
import unittest
from unittest.mock import patch, MagicMock
from pathlib import Path
import tempfile
import zipfile
from skill_seekers.cli.adaptors import get_adaptor
from skill_seekers.cli.adaptors.base import SkillMetadata
class TestOpenAIAdaptor(unittest.TestCase):
"""Test OpenAI adaptor functionality"""
def setUp(self):
"""Set up test adaptor"""
self.adaptor = get_adaptor('openai')
def test_platform_info(self):
"""Test platform identifiers"""
self.assertEqual(self.adaptor.PLATFORM, 'openai')
self.assertEqual(self.adaptor.PLATFORM_NAME, 'OpenAI ChatGPT')
self.assertIsNotNone(self.adaptor.DEFAULT_API_ENDPOINT)
def test_validate_api_key_valid(self):
"""Test valid OpenAI API keys"""
self.assertTrue(self.adaptor.validate_api_key('sk-proj-abc123'))
self.assertTrue(self.adaptor.validate_api_key('sk-abc123'))
self.assertTrue(self.adaptor.validate_api_key(' sk-test ')) # with whitespace
def test_validate_api_key_invalid(self):
"""Test invalid API keys"""
self.assertFalse(self.adaptor.validate_api_key('AIzaSyABC123')) # Gemini key
# Note: Can't distinguish Claude keys (sk-ant-*) from OpenAI keys (sk-*)
self.assertFalse(self.adaptor.validate_api_key('invalid'))
self.assertFalse(self.adaptor.validate_api_key(''))
def test_get_env_var_name(self):
"""Test environment variable name"""
self.assertEqual(self.adaptor.get_env_var_name(), 'OPENAI_API_KEY')
def test_supports_enhancement(self):
"""Test enhancement support"""
self.assertTrue(self.adaptor.supports_enhancement())
def test_format_skill_md_no_frontmatter(self):
"""Test that OpenAI format has no YAML frontmatter"""
with tempfile.TemporaryDirectory() as temp_dir:
skill_dir = Path(temp_dir)
# Create minimal skill structure
(skill_dir / "references").mkdir()
(skill_dir / "references" / "test.md").write_text("# Test content")
metadata = SkillMetadata(
name="test-skill",
description="Test skill description"
)
formatted = self.adaptor.format_skill_md(skill_dir, metadata)
# Should NOT start with YAML frontmatter
self.assertFalse(formatted.startswith('---'))
# Should contain assistant-style instructions
self.assertIn('You are an expert assistant', formatted)
self.assertIn('test-skill', formatted)
self.assertIn('Test skill description', formatted)
def test_package_creates_zip(self):
"""Test that package creates ZIP file with correct structure"""
with tempfile.TemporaryDirectory() as temp_dir:
skill_dir = Path(temp_dir) / "test-skill"
skill_dir.mkdir()
# Create minimal skill structure
(skill_dir / "SKILL.md").write_text("You are an expert assistant")
(skill_dir / "references").mkdir()
(skill_dir / "references" / "test.md").write_text("# Reference")
output_dir = Path(temp_dir) / "output"
output_dir.mkdir()
# Package skill
package_path = self.adaptor.package(skill_dir, output_dir)
# Verify package was created
self.assertTrue(package_path.exists())
self.assertTrue(str(package_path).endswith('.zip'))
self.assertIn('openai', package_path.name)
# Verify package contents
with zipfile.ZipFile(package_path, 'r') as zf:
names = zf.namelist()
self.assertIn('assistant_instructions.txt', names)
self.assertIn('openai_metadata.json', names)
# Should have vector store files
self.assertTrue(any('vector_store_files' in name for name in names))
def test_upload_missing_library(self):
"""Test upload when openai library is not installed"""
with tempfile.NamedTemporaryFile(suffix='.zip') as tmp:
# Simulate missing library by not mocking it
result = self.adaptor.upload(Path(tmp.name), 'sk-test123')
self.assertFalse(result['success'])
self.assertIn('openai', result['message'])
self.assertIn('not installed', result['message'])
def test_upload_invalid_file(self):
"""Test upload with invalid file"""
result = self.adaptor.upload(Path('/nonexistent/file.zip'), 'sk-test123')
self.assertFalse(result['success'])
self.assertIn('not found', result['message'].lower())
def test_upload_wrong_format(self):
"""Test upload with wrong file format"""
with tempfile.NamedTemporaryFile(suffix='.tar.gz') as tmp:
result = self.adaptor.upload(Path(tmp.name), 'sk-test123')
self.assertFalse(result['success'])
self.assertIn('not a zip', result['message'].lower())
@unittest.skip("Complex mocking - integration test needed with real API")
def test_upload_success(self):
"""Test successful upload to OpenAI - skipped (needs real API for integration test)"""
pass
@unittest.skip("Complex mocking - integration test needed with real API")
def test_enhance_success(self):
"""Test successful enhancement - skipped (needs real API for integration test)"""
pass
def test_enhance_missing_library(self):
"""Test enhance when openai library is not installed"""
with tempfile.TemporaryDirectory() as temp_dir:
skill_dir = Path(temp_dir)
refs_dir = skill_dir / "references"
refs_dir.mkdir()
(refs_dir / "test.md").write_text("Test")
# Don't mock the module - it won't be available
success = self.adaptor.enhance(skill_dir, 'sk-test123')
self.assertFalse(success)
def test_package_includes_instructions(self):
"""Test that packaged ZIP includes assistant instructions"""
with tempfile.TemporaryDirectory() as temp_dir:
skill_dir = Path(temp_dir) / "test-skill"
skill_dir.mkdir()
# Create SKILL.md
skill_md_content = "You are an expert assistant for testing."
(skill_dir / "SKILL.md").write_text(skill_md_content)
# Create references
refs_dir = skill_dir / "references"
refs_dir.mkdir()
(refs_dir / "guide.md").write_text("# User Guide")
output_dir = Path(temp_dir) / "output"
output_dir.mkdir()
# Package
package_path = self.adaptor.package(skill_dir, output_dir)
# Verify contents
with zipfile.ZipFile(package_path, 'r') as zf:
# Read instructions
instructions = zf.read('assistant_instructions.txt').decode('utf-8')
self.assertEqual(instructions, skill_md_content)
# Verify vector store file
self.assertIn('vector_store_files/guide.md', zf.namelist())
# Verify metadata
metadata_content = zf.read('openai_metadata.json').decode('utf-8')
import json
metadata = json.loads(metadata_content)
self.assertEqual(metadata['platform'], 'openai')
self.assertEqual(metadata['name'], 'test-skill')
self.assertIn('file_search', metadata['tools'])
if __name__ == '__main__':
unittest.main()