fix: Complete fix for Issue #219 - All three problems resolved
**Problem #1: Large File Encoding Error** ✅ FIXED - Add large file download support via download_url - Detect encoding='none' for files >1MB - Download via GitHub raw URL instead of API - Handles ccxt/ccxt's 1.4MB CHANGELOG.md successfully **Problem #2: Missing CLI Enhancement Flags** ✅ FIXED - Add --enhance, --enhance-local, --api-key to main.py github_parser - Add flag forwarding in CLI dispatcher - Fixes 'unrecognized arguments' error - Users can now use: skill-seekers github --repo owner/repo --enhance-local **Problem #3: Custom API Endpoint Support** ✅ FIXED - Support ANTHROPIC_BASE_URL environment variable - Support ANTHROPIC_AUTH_TOKEN (alternative to ANTHROPIC_API_KEY) - Fix ThinkingBlock.text error with newer Anthropic SDK - Find TextBlock in response content array (handles thinking blocks) **Changes**: - src/skill_seekers/cli/enhance_skill.py: - Support custom base_url parameter - Support both ANTHROPIC_API_KEY and ANTHROPIC_AUTH_TOKEN - Iterate through content blocks to find text (handles ThinkingBlock) - src/skill_seekers/cli/main.py: - Add --enhance, --enhance-local, --api-key to github_parser - Forward flags to github_scraper.py in dispatcher - src/skill_seekers/cli/github_scraper.py: - Add large file detection (encoding=None/"none") - Download via download_url with requests - Log file size and download progress - tests/test_github_scraper.py: - Add test_get_file_content_large_file - Add test_extract_changelog_large_file - All 31 tests passing ✅ **Credits**: - Thanks to @XGCoder for detailed bug report - Thanks to @gorquan for local fixes and guidance Fixes #219 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -892,6 +892,75 @@ class TestSymlinkHandling(unittest.TestCase):
|
||||
# Should not crash (will try latin-1 fallback)
|
||||
self.assertIsNotNone(result)
|
||||
|
||||
def test_get_file_content_large_file(self):
|
||||
"""Test _get_file_content handles large files with encoding='none' (Issue #219)"""
|
||||
config = {
|
||||
'repo': 'ccxt/ccxt',
|
||||
'name': 'ccxt',
|
||||
'github_token': None
|
||||
}
|
||||
|
||||
# Create mock large file (encoding="none")
|
||||
mock_content = Mock()
|
||||
mock_content.type = 'file'
|
||||
mock_content.encoding = 'none' # Large files have encoding="none"
|
||||
mock_content.size = 1388271 # 1.4MB CHANGELOG
|
||||
mock_content.download_url = 'https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md'
|
||||
|
||||
with patch('skill_seekers.cli.github_scraper.Github'):
|
||||
scraper = self.GitHubScraper(config)
|
||||
scraper.repo = Mock()
|
||||
scraper.repo.get_contents.return_value = mock_content
|
||||
|
||||
# Mock requests.get
|
||||
with patch('requests.get') as mock_requests:
|
||||
mock_response = Mock()
|
||||
mock_response.text = '# Changelog\n\n## v1.0.0\n- Initial release'
|
||||
mock_response.raise_for_status = Mock()
|
||||
mock_requests.return_value = mock_response
|
||||
|
||||
result = scraper._get_file_content('CHANGELOG.md')
|
||||
|
||||
# Should download via download_url
|
||||
self.assertEqual(result, '# Changelog\n\n## v1.0.0\n- Initial release')
|
||||
mock_requests.assert_called_once_with(
|
||||
'https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md',
|
||||
timeout=30
|
||||
)
|
||||
|
||||
def test_extract_changelog_large_file(self):
|
||||
"""Test CHANGELOG extraction with large file (Integration test for Issue #219)"""
|
||||
config = {
|
||||
'repo': 'ccxt/ccxt',
|
||||
'name': 'ccxt',
|
||||
'github_token': None
|
||||
}
|
||||
|
||||
# Create mock large CHANGELOG
|
||||
mock_content = Mock()
|
||||
mock_content.type = 'file'
|
||||
mock_content.encoding = 'none'
|
||||
mock_content.size = 1388271
|
||||
mock_content.download_url = 'https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md'
|
||||
|
||||
with patch('skill_seekers.cli.github_scraper.Github'):
|
||||
scraper = self.GitHubScraper(config)
|
||||
scraper.repo = Mock()
|
||||
scraper.repo.get_contents.return_value = mock_content
|
||||
|
||||
# Mock requests.get
|
||||
with patch('requests.get') as mock_requests:
|
||||
mock_response = Mock()
|
||||
mock_response.text = '# CCXT Changelog\n\n## v4.0.0\n- Major update'
|
||||
mock_response.raise_for_status = Mock()
|
||||
mock_requests.return_value = mock_response
|
||||
|
||||
scraper._extract_changelog()
|
||||
|
||||
# Should successfully extract CHANGELOG content
|
||||
self.assertIn('changelog', scraper.extracted_data)
|
||||
self.assertIn('Major update', scraper.extracted_data['changelog'])
|
||||
|
||||
|
||||
class TestErrorHandling(unittest.TestCase):
|
||||
"""Test error handling and edge cases"""
|
||||
|
||||
Reference in New Issue
Block a user