Merge fix/issue-219-complete-fix into development
Complete fix for Issue #219 - All three problems resolved ✅ Problem #1: Large file download via download_url ✅ Problem #2: CLI enhancement flags working ✅ Problem #3: Custom API endpoint support Tests: 40/40 passing (31 unit + 9 E2E) Fixes #219
This commit is contained in:
@@ -41,15 +41,24 @@ class SkillEnhancer:
|
||||
self.references_dir = self.skill_dir / "references"
|
||||
self.skill_md_path = self.skill_dir / "SKILL.md"
|
||||
|
||||
# Get API key
|
||||
self.api_key = api_key or os.environ.get('ANTHROPIC_API_KEY')
|
||||
# Get API key - support both ANTHROPIC_API_KEY and ANTHROPIC_AUTH_TOKEN
|
||||
self.api_key = (api_key or
|
||||
os.environ.get('ANTHROPIC_API_KEY') or
|
||||
os.environ.get('ANTHROPIC_AUTH_TOKEN'))
|
||||
if not self.api_key:
|
||||
raise ValueError(
|
||||
"No API key provided. Set ANTHROPIC_API_KEY environment variable "
|
||||
"or use --api-key argument"
|
||||
"No API key provided. Set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN "
|
||||
"environment variable or use --api-key argument"
|
||||
)
|
||||
|
||||
self.client = anthropic.Anthropic(api_key=self.api_key)
|
||||
# Support custom base URL for alternative API endpoints
|
||||
base_url = os.environ.get('ANTHROPIC_BASE_URL')
|
||||
client_kwargs = {'api_key': self.api_key}
|
||||
if base_url:
|
||||
client_kwargs['base_url'] = base_url
|
||||
print(f"ℹ️ Using custom API base URL: {base_url}")
|
||||
|
||||
self.client = anthropic.Anthropic(**client_kwargs)
|
||||
|
||||
def read_current_skill_md(self):
|
||||
"""Read existing SKILL.md"""
|
||||
@@ -77,7 +86,18 @@ class SkillEnhancer:
|
||||
}]
|
||||
)
|
||||
|
||||
enhanced_content = message.content[0].text
|
||||
# Handle response content - newer SDK versions may include ThinkingBlock
|
||||
# Find the TextBlock containing the actual response
|
||||
enhanced_content = None
|
||||
for block in message.content:
|
||||
if hasattr(block, 'text'):
|
||||
enhanced_content = block.text
|
||||
break
|
||||
|
||||
if not enhanced_content:
|
||||
print("❌ Error: No text content found in API response")
|
||||
return None
|
||||
|
||||
return enhanced_content
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@@ -325,6 +325,78 @@ class GitHubScraper:
|
||||
raise ValueError(f"Repository not found: {self.repo_name}")
|
||||
raise
|
||||
|
||||
def _get_file_content(self, file_path: str) -> Optional[str]:
|
||||
"""
|
||||
Safely get file content, handling symlinks and encoding issues.
|
||||
|
||||
Args:
|
||||
file_path: Path to file in repository
|
||||
|
||||
Returns:
|
||||
File content as string, or None if file not found/error
|
||||
"""
|
||||
try:
|
||||
content = self.repo.get_contents(file_path)
|
||||
if not content:
|
||||
return None
|
||||
|
||||
# Handle symlinks - follow the target to get actual file
|
||||
if hasattr(content, 'type') and content.type == 'symlink':
|
||||
target = getattr(content, 'target', None)
|
||||
if target:
|
||||
target = target.strip()
|
||||
logger.debug(f"File {file_path} is a symlink to {target}, following...")
|
||||
try:
|
||||
content = self.repo.get_contents(target)
|
||||
except GithubException as e:
|
||||
logger.warning(f"Failed to follow symlink {file_path} -> {target}: {e}")
|
||||
return None
|
||||
else:
|
||||
logger.warning(f"Symlink {file_path} has no target")
|
||||
return None
|
||||
|
||||
# Handle large files (encoding="none") - download via URL
|
||||
# GitHub API doesn't base64-encode files >1MB
|
||||
if hasattr(content, 'encoding') and content.encoding in [None, "none"]:
|
||||
download_url = getattr(content, 'download_url', None)
|
||||
file_size = getattr(content, 'size', 0)
|
||||
|
||||
if download_url:
|
||||
logger.info(f"File {file_path} is large ({file_size:,} bytes), downloading via URL...")
|
||||
try:
|
||||
import requests
|
||||
response = requests.get(download_url, timeout=30)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to download {file_path} from {download_url}: {e}")
|
||||
return None
|
||||
else:
|
||||
logger.warning(f"File {file_path} has no download URL (encoding={content.encoding})")
|
||||
return None
|
||||
|
||||
# Handle regular files - decode content
|
||||
try:
|
||||
if isinstance(content.decoded_content, bytes):
|
||||
return content.decoded_content.decode('utf-8')
|
||||
else:
|
||||
return str(content.decoded_content)
|
||||
except (UnicodeDecodeError, AttributeError, LookupError, AssertionError) as e:
|
||||
logger.warning(f"Encoding issue with {file_path}: {e}")
|
||||
# Try alternative encoding
|
||||
try:
|
||||
if isinstance(content.decoded_content, bytes):
|
||||
return content.decoded_content.decode('latin-1')
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
except GithubException:
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(f"Error reading {file_path}: {e}")
|
||||
return None
|
||||
|
||||
def _extract_readme(self):
|
||||
"""C1.2: Extract README.md files."""
|
||||
logger.info("Extracting README...")
|
||||
@@ -334,24 +406,21 @@ class GitHubScraper:
|
||||
'docs/README.md', '.github/README.md']
|
||||
|
||||
for readme_path in readme_files:
|
||||
try:
|
||||
content = self.repo.get_contents(readme_path)
|
||||
if content:
|
||||
self.extracted_data['readme'] = content.decoded_content.decode('utf-8')
|
||||
logger.info(f"README found: {readme_path}")
|
||||
readme_content = self._get_file_content(readme_path)
|
||||
if readme_content:
|
||||
self.extracted_data['readme'] = readme_content
|
||||
logger.info(f"README found: {readme_path}")
|
||||
|
||||
# Update description if not explicitly set in config
|
||||
if 'description' not in self.config:
|
||||
smart_description = extract_description_from_readme(
|
||||
self.extracted_data['readme'],
|
||||
self.repo_name
|
||||
)
|
||||
self.description = smart_description
|
||||
logger.debug(f"Generated description: {self.description}")
|
||||
# Update description if not explicitly set in config
|
||||
if 'description' not in self.config:
|
||||
smart_description = extract_description_from_readme(
|
||||
self.extracted_data['readme'],
|
||||
self.repo_name
|
||||
)
|
||||
self.description = smart_description
|
||||
logger.debug(f"Generated description: {self.description}")
|
||||
|
||||
return
|
||||
except GithubException:
|
||||
continue
|
||||
return
|
||||
|
||||
logger.warning("No README found in repository")
|
||||
|
||||
@@ -666,35 +735,11 @@ class GitHubScraper:
|
||||
'docs/CHANGELOG.md', '.github/CHANGELOG.md']
|
||||
|
||||
for changelog_path in changelog_files:
|
||||
try:
|
||||
content = self.repo.get_contents(changelog_path)
|
||||
if content:
|
||||
# decoded_content is already bytes, decode to string
|
||||
# Handle potential encoding issues gracefully
|
||||
try:
|
||||
if isinstance(content.decoded_content, bytes):
|
||||
changelog_text = content.decoded_content.decode('utf-8')
|
||||
else:
|
||||
# Already a string
|
||||
changelog_text = str(content.decoded_content)
|
||||
except (UnicodeDecodeError, AttributeError, LookupError) as e:
|
||||
# Try alternative encodings or skip this file
|
||||
logger.warning(f"Encoding issue with {changelog_path}: {e}, trying latin-1")
|
||||
try:
|
||||
changelog_text = content.decoded_content.decode('latin-1')
|
||||
except Exception:
|
||||
logger.warning(f"Could not decode {changelog_path}, skipping")
|
||||
continue
|
||||
|
||||
self.extracted_data['changelog'] = changelog_text
|
||||
logger.info(f"CHANGELOG found: {changelog_path}")
|
||||
return
|
||||
except GithubException:
|
||||
continue
|
||||
except Exception as e:
|
||||
# Catch any other errors (like "unsupported encoding: none")
|
||||
logger.warning(f"Error reading {changelog_path}: {e}")
|
||||
continue
|
||||
changelog_content = self._get_file_content(changelog_path)
|
||||
if changelog_content:
|
||||
self.extracted_data['changelog'] = changelog_content
|
||||
logger.info(f"CHANGELOG found: {changelog_path}")
|
||||
return
|
||||
|
||||
logger.warning("No CHANGELOG found in repository")
|
||||
|
||||
|
||||
@@ -99,6 +99,9 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
|
||||
github_parser.add_argument("--repo", help="GitHub repo (owner/repo)")
|
||||
github_parser.add_argument("--name", help="Skill name")
|
||||
github_parser.add_argument("--description", help="Skill description")
|
||||
github_parser.add_argument("--enhance", action="store_true", help="AI enhancement (API)")
|
||||
github_parser.add_argument("--enhance-local", action="store_true", help="AI enhancement (local)")
|
||||
github_parser.add_argument("--api-key", type=str, help="Anthropic API key for --enhance")
|
||||
|
||||
# === pdf subcommand ===
|
||||
pdf_parser = subparsers.add_parser(
|
||||
@@ -274,6 +277,12 @@ def main(argv: Optional[List[str]] = None) -> int:
|
||||
sys.argv.extend(["--name", args.name])
|
||||
if args.description:
|
||||
sys.argv.extend(["--description", args.description])
|
||||
if args.enhance:
|
||||
sys.argv.append("--enhance")
|
||||
if args.enhance_local:
|
||||
sys.argv.append("--enhance-local")
|
||||
if args.api_key:
|
||||
sys.argv.extend(["--api-key", args.api_key])
|
||||
return github_main() or 0
|
||||
|
||||
elif args.command == "pdf":
|
||||
|
||||
@@ -680,6 +680,288 @@ class TestGitHubToSkillConverter(unittest.TestCase):
|
||||
self.assertTrue((skill_dir / 'references').exists())
|
||||
|
||||
|
||||
class TestSymlinkHandling(unittest.TestCase):
|
||||
"""Test symlink handling (Issue #225)"""
|
||||
|
||||
def setUp(self):
|
||||
if not PYGITHUB_AVAILABLE:
|
||||
self.skipTest("PyGithub not installed")
|
||||
from skill_seekers.cli.github_scraper import GitHubScraper
|
||||
self.GitHubScraper = GitHubScraper
|
||||
|
||||
def test_get_file_content_regular_file(self):
|
||||
"""Test _get_file_content with regular file"""
|
||||
config = {
|
||||
'repo': 'facebook/react',
|
||||
'name': 'react',
|
||||
'github_token': None
|
||||
}
|
||||
|
||||
# Create mock regular file
|
||||
mock_content = Mock()
|
||||
mock_content.type = 'file'
|
||||
mock_content.encoding = 'base64'
|
||||
mock_content.decoded_content = b'# React\n\nA JavaScript library'
|
||||
|
||||
with patch('skill_seekers.cli.github_scraper.Github'):
|
||||
scraper = self.GitHubScraper(config)
|
||||
scraper.repo = Mock()
|
||||
scraper.repo.get_contents.return_value = mock_content
|
||||
|
||||
result = scraper._get_file_content('README.md')
|
||||
|
||||
self.assertEqual(result, '# React\n\nA JavaScript library')
|
||||
scraper.repo.get_contents.assert_called_once_with('README.md')
|
||||
|
||||
def test_get_file_content_symlink(self):
|
||||
"""Test _get_file_content with symlink file"""
|
||||
config = {
|
||||
'repo': 'vercel/ai',
|
||||
'name': 'ai',
|
||||
'github_token': None
|
||||
}
|
||||
|
||||
# Create mock symlink
|
||||
mock_symlink = Mock()
|
||||
mock_symlink.type = 'symlink'
|
||||
mock_symlink.encoding = None
|
||||
mock_symlink.target = 'packages/ai/README.md'
|
||||
|
||||
# Create mock target file
|
||||
mock_target = Mock()
|
||||
mock_target.type = 'file'
|
||||
mock_target.encoding = 'base64'
|
||||
mock_target.decoded_content = b'# AI SDK\n\nReal content from symlink target'
|
||||
|
||||
with patch('skill_seekers.cli.github_scraper.Github'):
|
||||
scraper = self.GitHubScraper(config)
|
||||
scraper.repo = Mock()
|
||||
|
||||
# First call returns symlink, second call returns target
|
||||
scraper.repo.get_contents.side_effect = [mock_symlink, mock_target]
|
||||
|
||||
result = scraper._get_file_content('README.md')
|
||||
|
||||
self.assertEqual(result, '# AI SDK\n\nReal content from symlink target')
|
||||
# Should have called get_contents twice: once for symlink, once for target
|
||||
self.assertEqual(scraper.repo.get_contents.call_count, 2)
|
||||
scraper.repo.get_contents.assert_any_call('README.md')
|
||||
scraper.repo.get_contents.assert_any_call('packages/ai/README.md')
|
||||
|
||||
def test_get_file_content_broken_symlink(self):
|
||||
"""Test _get_file_content with broken symlink"""
|
||||
config = {
|
||||
'repo': 'test/repo',
|
||||
'name': 'test',
|
||||
'github_token': None
|
||||
}
|
||||
|
||||
# Create mock symlink with broken target
|
||||
mock_symlink = Mock()
|
||||
mock_symlink.type = 'symlink'
|
||||
mock_symlink.encoding = None
|
||||
mock_symlink.target = 'nonexistent/file.md'
|
||||
|
||||
with patch('skill_seekers.cli.github_scraper.Github'):
|
||||
scraper = self.GitHubScraper(config)
|
||||
scraper.repo = Mock()
|
||||
|
||||
# First call returns symlink, second call raises 404
|
||||
scraper.repo.get_contents.side_effect = [
|
||||
mock_symlink,
|
||||
GithubException(404, 'Not found')
|
||||
]
|
||||
|
||||
result = scraper._get_file_content('README.md')
|
||||
|
||||
# Should return None gracefully
|
||||
self.assertIsNone(result)
|
||||
|
||||
def test_get_file_content_symlink_no_target(self):
|
||||
"""Test _get_file_content with symlink that has no target attribute"""
|
||||
config = {
|
||||
'repo': 'test/repo',
|
||||
'name': 'test',
|
||||
'github_token': None
|
||||
}
|
||||
|
||||
# Create mock symlink without target
|
||||
mock_symlink = Mock()
|
||||
mock_symlink.type = 'symlink'
|
||||
mock_symlink.encoding = None
|
||||
mock_symlink.target = None
|
||||
|
||||
with patch('skill_seekers.cli.github_scraper.Github'):
|
||||
scraper = self.GitHubScraper(config)
|
||||
scraper.repo = Mock()
|
||||
scraper.repo.get_contents.return_value = mock_symlink
|
||||
|
||||
result = scraper._get_file_content('README.md')
|
||||
|
||||
# Should return None gracefully
|
||||
self.assertIsNone(result)
|
||||
|
||||
def test_extract_readme_with_symlink(self):
|
||||
"""Test README extraction with symlinked README.md (Integration test for Issue #225)"""
|
||||
config = {
|
||||
'repo': 'vercel/ai',
|
||||
'name': 'ai',
|
||||
'github_token': None
|
||||
}
|
||||
|
||||
# Create mock symlink
|
||||
mock_symlink = Mock()
|
||||
mock_symlink.type = 'symlink'
|
||||
mock_symlink.encoding = None
|
||||
mock_symlink.target = 'packages/ai/README.md'
|
||||
|
||||
# Create mock target file
|
||||
mock_target = Mock()
|
||||
mock_target.type = 'file'
|
||||
mock_target.encoding = 'base64'
|
||||
mock_target.decoded_content = b'# AI SDK\n\nThe AI SDK is a TypeScript toolkit'
|
||||
|
||||
with patch('skill_seekers.cli.github_scraper.Github'):
|
||||
scraper = self.GitHubScraper(config)
|
||||
scraper.repo = Mock()
|
||||
scraper.repo.get_contents.side_effect = [mock_symlink, mock_target]
|
||||
|
||||
scraper._extract_readme()
|
||||
|
||||
# Should successfully extract README content
|
||||
self.assertIn('readme', scraper.extracted_data)
|
||||
self.assertEqual(
|
||||
scraper.extracted_data['readme'],
|
||||
'# AI SDK\n\nThe AI SDK is a TypeScript toolkit'
|
||||
)
|
||||
|
||||
def test_extract_changelog_with_symlink(self):
|
||||
"""Test CHANGELOG extraction with symlinked CHANGELOG.md"""
|
||||
config = {
|
||||
'repo': 'test/repo',
|
||||
'name': 'test',
|
||||
'github_token': None
|
||||
}
|
||||
|
||||
# Create mock symlink
|
||||
mock_symlink = Mock()
|
||||
mock_symlink.type = 'symlink'
|
||||
mock_symlink.encoding = None
|
||||
mock_symlink.target = 'docs/CHANGELOG.md'
|
||||
|
||||
# Create mock target file
|
||||
mock_target = Mock()
|
||||
mock_target.type = 'file'
|
||||
mock_target.encoding = 'base64'
|
||||
mock_target.decoded_content = b'# Changelog\n\n## v1.0.0\n- Initial release'
|
||||
|
||||
with patch('skill_seekers.cli.github_scraper.Github'):
|
||||
scraper = self.GitHubScraper(config)
|
||||
scraper.repo = Mock()
|
||||
scraper.repo.get_contents.side_effect = [mock_symlink, mock_target]
|
||||
|
||||
scraper._extract_changelog()
|
||||
|
||||
# Should successfully extract CHANGELOG content
|
||||
self.assertIn('changelog', scraper.extracted_data)
|
||||
self.assertIn('Initial release', scraper.extracted_data['changelog'])
|
||||
|
||||
def test_get_file_content_encoding_error(self):
|
||||
"""Test _get_file_content handles encoding errors gracefully"""
|
||||
config = {
|
||||
'repo': 'test/repo',
|
||||
'name': 'test',
|
||||
'github_token': None
|
||||
}
|
||||
|
||||
# Create mock file with invalid UTF-8 content
|
||||
mock_content = Mock()
|
||||
mock_content.type = 'file'
|
||||
mock_content.encoding = 'base64'
|
||||
# Mock decoded_content that can't be decoded as UTF-8
|
||||
mock_content.decoded_content = b'\xff\xfe Invalid UTF-8'
|
||||
|
||||
with patch('skill_seekers.cli.github_scraper.Github'):
|
||||
scraper = self.GitHubScraper(config)
|
||||
scraper.repo = Mock()
|
||||
scraper.repo.get_contents.return_value = mock_content
|
||||
|
||||
# Should try latin-1 fallback
|
||||
result = scraper._get_file_content('README.md')
|
||||
|
||||
# Should not crash (will try latin-1 fallback)
|
||||
self.assertIsNotNone(result)
|
||||
|
||||
def test_get_file_content_large_file(self):
|
||||
"""Test _get_file_content handles large files with encoding='none' (Issue #219)"""
|
||||
config = {
|
||||
'repo': 'ccxt/ccxt',
|
||||
'name': 'ccxt',
|
||||
'github_token': None
|
||||
}
|
||||
|
||||
# Create mock large file (encoding="none")
|
||||
mock_content = Mock()
|
||||
mock_content.type = 'file'
|
||||
mock_content.encoding = 'none' # Large files have encoding="none"
|
||||
mock_content.size = 1388271 # 1.4MB CHANGELOG
|
||||
mock_content.download_url = 'https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md'
|
||||
|
||||
with patch('skill_seekers.cli.github_scraper.Github'):
|
||||
scraper = self.GitHubScraper(config)
|
||||
scraper.repo = Mock()
|
||||
scraper.repo.get_contents.return_value = mock_content
|
||||
|
||||
# Mock requests.get
|
||||
with patch('requests.get') as mock_requests:
|
||||
mock_response = Mock()
|
||||
mock_response.text = '# Changelog\n\n## v1.0.0\n- Initial release'
|
||||
mock_response.raise_for_status = Mock()
|
||||
mock_requests.return_value = mock_response
|
||||
|
||||
result = scraper._get_file_content('CHANGELOG.md')
|
||||
|
||||
# Should download via download_url
|
||||
self.assertEqual(result, '# Changelog\n\n## v1.0.0\n- Initial release')
|
||||
mock_requests.assert_called_once_with(
|
||||
'https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md',
|
||||
timeout=30
|
||||
)
|
||||
|
||||
def test_extract_changelog_large_file(self):
|
||||
"""Test CHANGELOG extraction with large file (Integration test for Issue #219)"""
|
||||
config = {
|
||||
'repo': 'ccxt/ccxt',
|
||||
'name': 'ccxt',
|
||||
'github_token': None
|
||||
}
|
||||
|
||||
# Create mock large CHANGELOG
|
||||
mock_content = Mock()
|
||||
mock_content.type = 'file'
|
||||
mock_content.encoding = 'none'
|
||||
mock_content.size = 1388271
|
||||
mock_content.download_url = 'https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md'
|
||||
|
||||
with patch('skill_seekers.cli.github_scraper.Github'):
|
||||
scraper = self.GitHubScraper(config)
|
||||
scraper.repo = Mock()
|
||||
scraper.repo.get_contents.return_value = mock_content
|
||||
|
||||
# Mock requests.get
|
||||
with patch('requests.get') as mock_requests:
|
||||
mock_response = Mock()
|
||||
mock_response.text = '# CCXT Changelog\n\n## v4.0.0\n- Major update'
|
||||
mock_response.raise_for_status = Mock()
|
||||
mock_requests.return_value = mock_response
|
||||
|
||||
scraper._extract_changelog()
|
||||
|
||||
# Should successfully extract CHANGELOG content
|
||||
self.assertIn('changelog', scraper.extracted_data)
|
||||
self.assertIn('Major update', scraper.extracted_data['changelog'])
|
||||
|
||||
|
||||
class TestErrorHandling(unittest.TestCase):
|
||||
"""Test error handling and edge cases"""
|
||||
|
||||
|
||||
331
tests/test_issue_219_e2e.py
Normal file
331
tests/test_issue_219_e2e.py
Normal file
@@ -0,0 +1,331 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
End-to-End Tests for Issue #219 - All Three Problems
|
||||
|
||||
Tests verify complete fixes for:
|
||||
1. Large file encoding error (ccxt/ccxt 1.4MB CHANGELOG)
|
||||
2. Missing --enhance-local CLI flag
|
||||
3. Custom API endpoint support (ANTHROPIC_BASE_URL, ANTHROPIC_AUTH_TOKEN)
|
||||
"""
|
||||
|
||||
import unittest
|
||||
import sys
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
from types import SimpleNamespace
|
||||
|
||||
# Add src to path
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
|
||||
|
||||
|
||||
class TestIssue219Problem1LargeFiles(unittest.TestCase):
|
||||
"""E2E Test: Problem #1 - Large file download via download_url"""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test environment"""
|
||||
try:
|
||||
from github import Github, GithubException
|
||||
self.PYGITHUB_AVAILABLE = True
|
||||
except ImportError:
|
||||
self.PYGITHUB_AVAILABLE = False
|
||||
|
||||
if not self.PYGITHUB_AVAILABLE:
|
||||
self.skipTest("PyGithub not installed")
|
||||
|
||||
from skill_seekers.cli.github_scraper import GitHubScraper
|
||||
self.GitHubScraper = GitHubScraper
|
||||
|
||||
def test_large_file_extraction_end_to_end(self):
|
||||
"""E2E: Verify large files (encoding='none') are downloaded via URL"""
|
||||
from github import GithubException
|
||||
|
||||
config = {
|
||||
'repo': 'ccxt/ccxt',
|
||||
'name': 'ccxt',
|
||||
'github_token': None
|
||||
}
|
||||
|
||||
# Mock large CHANGELOG (1.4MB, encoding="none")
|
||||
mock_content = Mock()
|
||||
mock_content.type = 'file'
|
||||
mock_content.encoding = 'none' # This is what GitHub API returns for large files
|
||||
mock_content.size = 1388271
|
||||
mock_content.download_url = 'https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md'
|
||||
|
||||
with patch('skill_seekers.cli.github_scraper.Github'):
|
||||
scraper = self.GitHubScraper(config)
|
||||
scraper.repo = Mock()
|
||||
scraper.repo.get_contents.return_value = mock_content
|
||||
|
||||
# Mock requests.get for download
|
||||
with patch('requests.get') as mock_requests:
|
||||
mock_response = Mock()
|
||||
mock_response.text = '# CCXT Changelog\n\n## v4.4.20\n- Bug fixes'
|
||||
mock_response.raise_for_status = Mock()
|
||||
mock_requests.return_value = mock_response
|
||||
|
||||
# Call _extract_changelog (full workflow)
|
||||
scraper._extract_changelog()
|
||||
|
||||
# VERIFY: download_url was called
|
||||
mock_requests.assert_called_once_with(
|
||||
'https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md',
|
||||
timeout=30
|
||||
)
|
||||
|
||||
# VERIFY: CHANGELOG was extracted successfully
|
||||
self.assertIn('changelog', scraper.extracted_data)
|
||||
self.assertIn('Bug fixes', scraper.extracted_data['changelog'])
|
||||
self.assertEqual(scraper.extracted_data['changelog'], mock_response.text)
|
||||
|
||||
def test_large_file_fallback_on_error(self):
|
||||
"""E2E: Verify graceful handling if download_url fails"""
|
||||
from github import GithubException
|
||||
|
||||
config = {
|
||||
'repo': 'test/repo',
|
||||
'name': 'test',
|
||||
'github_token': None
|
||||
}
|
||||
|
||||
# Mock large file without download_url
|
||||
mock_content = Mock()
|
||||
mock_content.type = 'file'
|
||||
mock_content.encoding = 'none'
|
||||
mock_content.size = 2000000
|
||||
mock_content.download_url = None # Missing download URL
|
||||
|
||||
with patch('skill_seekers.cli.github_scraper.Github'):
|
||||
scraper = self.GitHubScraper(config)
|
||||
scraper.repo = Mock()
|
||||
scraper.repo.get_contents.return_value = mock_content
|
||||
|
||||
# Should return None gracefully
|
||||
result = scraper._get_file_content('CHANGELOG.md')
|
||||
self.assertIsNone(result)
|
||||
|
||||
# Should not crash
|
||||
scraper._extract_changelog()
|
||||
self.assertEqual(scraper.extracted_data['changelog'], '')
|
||||
|
||||
|
||||
class TestIssue219Problem2CLIFlags(unittest.TestCase):
|
||||
"""E2E Test: Problem #2 - CLI flags working through main.py dispatcher"""
|
||||
|
||||
def test_github_command_has_enhancement_flags(self):
|
||||
"""E2E: Verify --enhance-local flag exists in github command help"""
|
||||
result = subprocess.run(
|
||||
['skill-seekers', 'github', '--help'],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
# VERIFY: Command succeeds
|
||||
self.assertEqual(result.returncode, 0, "github --help should succeed")
|
||||
|
||||
# VERIFY: All enhancement flags present
|
||||
self.assertIn('--enhance', result.stdout, "Missing --enhance flag")
|
||||
self.assertIn('--enhance-local', result.stdout, "Missing --enhance-local flag")
|
||||
self.assertIn('--api-key', result.stdout, "Missing --api-key flag")
|
||||
|
||||
def test_github_command_accepts_enhance_local_flag(self):
|
||||
"""E2E: Verify --enhance-local flag doesn't cause 'unrecognized arguments' error"""
|
||||
# Use dry-run with minimal args to test flag parsing
|
||||
result = subprocess.run(
|
||||
['skill-seekers', 'github', '--repo', 'test/test', '--enhance-local'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5
|
||||
)
|
||||
|
||||
# VERIFY: No "unrecognized arguments" error
|
||||
self.assertNotIn('unrecognized arguments', result.stderr,
|
||||
"Flag should be recognized by CLI parser")
|
||||
self.assertNotIn('--enhance-local', result.stderr,
|
||||
"Flag should not appear in error message")
|
||||
|
||||
def test_cli_dispatcher_forwards_flags_to_github_scraper(self):
|
||||
"""E2E: Verify main.py dispatcher forwards flags to github_scraper.py"""
|
||||
from skill_seekers.cli import main
|
||||
|
||||
# Mock sys.argv to simulate CLI call
|
||||
test_args = [
|
||||
'skill-seekers',
|
||||
'github',
|
||||
'--repo', 'test/test',
|
||||
'--name', 'test',
|
||||
'--enhance-local'
|
||||
]
|
||||
|
||||
with patch('sys.argv', test_args):
|
||||
with patch('skill_seekers.cli.github_scraper.main') as mock_github_main:
|
||||
mock_github_main.return_value = 0
|
||||
|
||||
# Call main dispatcher
|
||||
with patch('sys.exit'):
|
||||
try:
|
||||
main.main()
|
||||
except SystemExit:
|
||||
pass
|
||||
|
||||
# VERIFY: github_scraper.main was called
|
||||
mock_github_main.assert_called_once()
|
||||
|
||||
# VERIFY: sys.argv contains --enhance-local flag
|
||||
# (main.py should have added it before calling github_scraper)
|
||||
called_with_enhance = any('--enhance-local' in str(call) for call in mock_github_main.call_args_list)
|
||||
self.assertTrue(called_with_enhance or '--enhance-local' in sys.argv,
|
||||
"Flag should be forwarded to github_scraper")
|
||||
|
||||
|
||||
class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
|
||||
"""E2E Test: Problem #3 - Custom API endpoint support"""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test environment"""
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
self.skill_dir = Path(self.temp_dir) / "test_skill"
|
||||
self.skill_dir.mkdir()
|
||||
|
||||
# Create minimal SKILL.md
|
||||
(self.skill_dir / "SKILL.md").write_text("# Test Skill\n", encoding='utf-8')
|
||||
|
||||
# Create references directory
|
||||
refs_dir = self.skill_dir / "references"
|
||||
refs_dir.mkdir()
|
||||
(refs_dir / "index.md").write_text("# Index\n", encoding='utf-8')
|
||||
|
||||
def tearDown(self):
|
||||
"""Clean up test environment"""
|
||||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||||
|
||||
def test_anthropic_base_url_support(self):
|
||||
"""E2E: Verify ANTHROPIC_BASE_URL environment variable is supported"""
|
||||
try:
|
||||
from skill_seekers.cli.enhance_skill import SkillEnhancer
|
||||
except ImportError:
|
||||
self.skipTest("anthropic package not installed")
|
||||
|
||||
# Set custom base URL
|
||||
custom_url = 'http://localhost:3000'
|
||||
|
||||
with patch.dict(os.environ, {
|
||||
'ANTHROPIC_API_KEY': 'test-key-123',
|
||||
'ANTHROPIC_BASE_URL': custom_url
|
||||
}):
|
||||
with patch('skill_seekers.cli.enhance_skill.anthropic.Anthropic') as mock_anthropic:
|
||||
# Create enhancer
|
||||
enhancer = SkillEnhancer(self.skill_dir)
|
||||
|
||||
# VERIFY: Anthropic client called with custom base_url
|
||||
mock_anthropic.assert_called_once()
|
||||
call_kwargs = mock_anthropic.call_args[1]
|
||||
self.assertIn('base_url', call_kwargs, "base_url should be passed")
|
||||
self.assertEqual(call_kwargs['base_url'], custom_url,
|
||||
"base_url should match ANTHROPIC_BASE_URL env var")
|
||||
|
||||
def test_anthropic_auth_token_support(self):
|
||||
"""E2E: Verify ANTHROPIC_AUTH_TOKEN is accepted as alternative to ANTHROPIC_API_KEY"""
|
||||
try:
|
||||
from skill_seekers.cli.enhance_skill import SkillEnhancer
|
||||
except ImportError:
|
||||
self.skipTest("anthropic package not installed")
|
||||
|
||||
custom_token = 'custom-auth-token-456'
|
||||
|
||||
# Use ANTHROPIC_AUTH_TOKEN instead of ANTHROPIC_API_KEY
|
||||
with patch.dict(os.environ, {
|
||||
'ANTHROPIC_AUTH_TOKEN': custom_token
|
||||
}, clear=True):
|
||||
with patch('skill_seekers.cli.enhance_skill.anthropic.Anthropic') as mock_anthropic:
|
||||
# Create enhancer (should accept ANTHROPIC_AUTH_TOKEN)
|
||||
enhancer = SkillEnhancer(self.skill_dir)
|
||||
|
||||
# VERIFY: api_key set to ANTHROPIC_AUTH_TOKEN value
|
||||
self.assertEqual(enhancer.api_key, custom_token,
|
||||
"Should use ANTHROPIC_AUTH_TOKEN when ANTHROPIC_API_KEY not set")
|
||||
|
||||
# VERIFY: Anthropic client initialized with correct key
|
||||
mock_anthropic.assert_called_once()
|
||||
call_kwargs = mock_anthropic.call_args[1]
|
||||
self.assertEqual(call_kwargs['api_key'], custom_token,
|
||||
"api_key should match ANTHROPIC_AUTH_TOKEN")
|
||||
|
||||
def test_thinking_block_handling(self):
|
||||
"""E2E: Verify ThinkingBlock doesn't cause .text AttributeError"""
|
||||
try:
|
||||
from skill_seekers.cli.enhance_skill import SkillEnhancer
|
||||
except ImportError:
|
||||
self.skipTest("anthropic package not installed")
|
||||
|
||||
with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}):
|
||||
with patch('skill_seekers.cli.enhance_skill.anthropic.Anthropic') as mock_anthropic:
|
||||
enhancer = SkillEnhancer(self.skill_dir)
|
||||
|
||||
# Mock response with ThinkingBlock (newer SDK)
|
||||
# ThinkingBlock has no .text attribute
|
||||
mock_thinking_block = SimpleNamespace(type='thinking')
|
||||
|
||||
# TextBlock has .text attribute
|
||||
mock_text_block = SimpleNamespace(text='# Enhanced SKILL.md\n\nContent here')
|
||||
|
||||
mock_message = Mock()
|
||||
mock_message.content = [mock_thinking_block, mock_text_block]
|
||||
|
||||
mock_client = mock_anthropic.return_value
|
||||
mock_client.messages.create.return_value = mock_message
|
||||
|
||||
# Read references
|
||||
references = {
|
||||
'index.md': '# Index\nTest content'
|
||||
}
|
||||
|
||||
# Call enhance_skill_md (should handle ThinkingBlock gracefully)
|
||||
result = enhancer.enhance_skill_md(references, current_skill_md='# Old')
|
||||
|
||||
# VERIFY: Should find text from TextBlock, ignore ThinkingBlock
|
||||
self.assertIsNotNone(result, "Should return enhanced content")
|
||||
self.assertEqual(result, '# Enhanced SKILL.md\n\nContent here',
|
||||
"Should extract text from TextBlock")
|
||||
|
||||
|
||||
class TestIssue219IntegrationAll(unittest.TestCase):
|
||||
"""E2E Integration: All 3 problems together"""
|
||||
|
||||
def test_all_fixes_work_together(self):
|
||||
"""E2E: Verify all 3 fixes work in combination"""
|
||||
# This test verifies the complete workflow:
|
||||
# 1. CLI accepts --enhance-local
|
||||
# 2. Large files are downloaded
|
||||
# 3. Custom API endpoints work
|
||||
|
||||
result = subprocess.run(
|
||||
['skill-seekers', 'github', '--help'],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
# All flags present
|
||||
self.assertIn('--enhance', result.stdout)
|
||||
self.assertIn('--enhance-local', result.stdout)
|
||||
self.assertIn('--api-key', result.stdout)
|
||||
|
||||
# Verify we can import all fixed modules
|
||||
try:
|
||||
from skill_seekers.cli.github_scraper import GitHubScraper
|
||||
from skill_seekers.cli.enhance_skill import SkillEnhancer
|
||||
from skill_seekers.cli import main
|
||||
|
||||
# All imports successful
|
||||
self.assertTrue(True, "All modules import successfully")
|
||||
except ImportError as e:
|
||||
self.fail(f"Module import failed: {e}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Run tests with verbose output
|
||||
unittest.main(verbosity=2)
|
||||
Reference in New Issue
Block a user