fix: Complete fix for Issue #219 - All three problems resolved
**Problem #1: Large File Encoding Error** ✅ FIXED - Add large file download support via download_url - Detect encoding='none' for files >1MB - Download via GitHub raw URL instead of API - Handles ccxt/ccxt's 1.4MB CHANGELOG.md successfully **Problem #2: Missing CLI Enhancement Flags** ✅ FIXED - Add --enhance, --enhance-local, --api-key to main.py github_parser - Add flag forwarding in CLI dispatcher - Fixes 'unrecognized arguments' error - Users can now use: skill-seekers github --repo owner/repo --enhance-local **Problem #3: Custom API Endpoint Support** ✅ FIXED - Support ANTHROPIC_BASE_URL environment variable - Support ANTHROPIC_AUTH_TOKEN (alternative to ANTHROPIC_API_KEY) - Fix ThinkingBlock.text error with newer Anthropic SDK - Find TextBlock in response content array (handles thinking blocks) **Changes**: - src/skill_seekers/cli/enhance_skill.py: - Support custom base_url parameter - Support both ANTHROPIC_API_KEY and ANTHROPIC_AUTH_TOKEN - Iterate through content blocks to find text (handles ThinkingBlock) - src/skill_seekers/cli/main.py: - Add --enhance, --enhance-local, --api-key to github_parser - Forward flags to github_scraper.py in dispatcher - src/skill_seekers/cli/github_scraper.py: - Add large file detection (encoding=None/"none") - Download via download_url with requests - Log file size and download progress - tests/test_github_scraper.py: - Add test_get_file_content_large_file - Add test_extract_changelog_large_file - All 31 tests passing ✅ **Credits**: - Thanks to @XGCoder for detailed bug report - Thanks to @gorquan for local fixes and guidance Fixes #219 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -41,15 +41,24 @@ class SkillEnhancer:
|
|||||||
self.references_dir = self.skill_dir / "references"
|
self.references_dir = self.skill_dir / "references"
|
||||||
self.skill_md_path = self.skill_dir / "SKILL.md"
|
self.skill_md_path = self.skill_dir / "SKILL.md"
|
||||||
|
|
||||||
# Get API key
|
# Get API key - support both ANTHROPIC_API_KEY and ANTHROPIC_AUTH_TOKEN
|
||||||
self.api_key = api_key or os.environ.get('ANTHROPIC_API_KEY')
|
self.api_key = (api_key or
|
||||||
|
os.environ.get('ANTHROPIC_API_KEY') or
|
||||||
|
os.environ.get('ANTHROPIC_AUTH_TOKEN'))
|
||||||
if not self.api_key:
|
if not self.api_key:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"No API key provided. Set ANTHROPIC_API_KEY environment variable "
|
"No API key provided. Set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN "
|
||||||
"or use --api-key argument"
|
"environment variable or use --api-key argument"
|
||||||
)
|
)
|
||||||
|
|
||||||
self.client = anthropic.Anthropic(api_key=self.api_key)
|
# Support custom base URL for alternative API endpoints
|
||||||
|
base_url = os.environ.get('ANTHROPIC_BASE_URL')
|
||||||
|
client_kwargs = {'api_key': self.api_key}
|
||||||
|
if base_url:
|
||||||
|
client_kwargs['base_url'] = base_url
|
||||||
|
print(f"ℹ️ Using custom API base URL: {base_url}")
|
||||||
|
|
||||||
|
self.client = anthropic.Anthropic(**client_kwargs)
|
||||||
|
|
||||||
def read_current_skill_md(self):
|
def read_current_skill_md(self):
|
||||||
"""Read existing SKILL.md"""
|
"""Read existing SKILL.md"""
|
||||||
@@ -77,7 +86,18 @@ class SkillEnhancer:
|
|||||||
}]
|
}]
|
||||||
)
|
)
|
||||||
|
|
||||||
enhanced_content = message.content[0].text
|
# Handle response content - newer SDK versions may include ThinkingBlock
|
||||||
|
# Find the TextBlock containing the actual response
|
||||||
|
enhanced_content = None
|
||||||
|
for block in message.content:
|
||||||
|
if hasattr(block, 'text'):
|
||||||
|
enhanced_content = block.text
|
||||||
|
break
|
||||||
|
|
||||||
|
if not enhanced_content:
|
||||||
|
print("❌ Error: No text content found in API response")
|
||||||
|
return None
|
||||||
|
|
||||||
return enhanced_content
|
return enhanced_content
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -355,6 +355,26 @@ class GitHubScraper:
|
|||||||
logger.warning(f"Symlink {file_path} has no target")
|
logger.warning(f"Symlink {file_path} has no target")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# Handle large files (encoding="none") - download via URL
|
||||||
|
# GitHub API doesn't base64-encode files >1MB
|
||||||
|
if hasattr(content, 'encoding') and content.encoding in [None, "none"]:
|
||||||
|
download_url = getattr(content, 'download_url', None)
|
||||||
|
file_size = getattr(content, 'size', 0)
|
||||||
|
|
||||||
|
if download_url:
|
||||||
|
logger.info(f"File {file_path} is large ({file_size:,} bytes), downloading via URL...")
|
||||||
|
try:
|
||||||
|
import requests
|
||||||
|
response = requests.get(download_url, timeout=30)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.text
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to download {file_path} from {download_url}: {e}")
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
logger.warning(f"File {file_path} has no download URL (encoding={content.encoding})")
|
||||||
|
return None
|
||||||
|
|
||||||
# Handle regular files - decode content
|
# Handle regular files - decode content
|
||||||
try:
|
try:
|
||||||
if isinstance(content.decoded_content, bytes):
|
if isinstance(content.decoded_content, bytes):
|
||||||
|
|||||||
@@ -99,6 +99,9 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
|
|||||||
github_parser.add_argument("--repo", help="GitHub repo (owner/repo)")
|
github_parser.add_argument("--repo", help="GitHub repo (owner/repo)")
|
||||||
github_parser.add_argument("--name", help="Skill name")
|
github_parser.add_argument("--name", help="Skill name")
|
||||||
github_parser.add_argument("--description", help="Skill description")
|
github_parser.add_argument("--description", help="Skill description")
|
||||||
|
github_parser.add_argument("--enhance", action="store_true", help="AI enhancement (API)")
|
||||||
|
github_parser.add_argument("--enhance-local", action="store_true", help="AI enhancement (local)")
|
||||||
|
github_parser.add_argument("--api-key", type=str, help="Anthropic API key for --enhance")
|
||||||
|
|
||||||
# === pdf subcommand ===
|
# === pdf subcommand ===
|
||||||
pdf_parser = subparsers.add_parser(
|
pdf_parser = subparsers.add_parser(
|
||||||
@@ -274,6 +277,12 @@ def main(argv: Optional[List[str]] = None) -> int:
|
|||||||
sys.argv.extend(["--name", args.name])
|
sys.argv.extend(["--name", args.name])
|
||||||
if args.description:
|
if args.description:
|
||||||
sys.argv.extend(["--description", args.description])
|
sys.argv.extend(["--description", args.description])
|
||||||
|
if args.enhance:
|
||||||
|
sys.argv.append("--enhance")
|
||||||
|
if args.enhance_local:
|
||||||
|
sys.argv.append("--enhance-local")
|
||||||
|
if args.api_key:
|
||||||
|
sys.argv.extend(["--api-key", args.api_key])
|
||||||
return github_main() or 0
|
return github_main() or 0
|
||||||
|
|
||||||
elif args.command == "pdf":
|
elif args.command == "pdf":
|
||||||
|
|||||||
@@ -892,6 +892,75 @@ class TestSymlinkHandling(unittest.TestCase):
|
|||||||
# Should not crash (will try latin-1 fallback)
|
# Should not crash (will try latin-1 fallback)
|
||||||
self.assertIsNotNone(result)
|
self.assertIsNotNone(result)
|
||||||
|
|
||||||
|
def test_get_file_content_large_file(self):
|
||||||
|
"""Test _get_file_content handles large files with encoding='none' (Issue #219)"""
|
||||||
|
config = {
|
||||||
|
'repo': 'ccxt/ccxt',
|
||||||
|
'name': 'ccxt',
|
||||||
|
'github_token': None
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create mock large file (encoding="none")
|
||||||
|
mock_content = Mock()
|
||||||
|
mock_content.type = 'file'
|
||||||
|
mock_content.encoding = 'none' # Large files have encoding="none"
|
||||||
|
mock_content.size = 1388271 # 1.4MB CHANGELOG
|
||||||
|
mock_content.download_url = 'https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md'
|
||||||
|
|
||||||
|
with patch('skill_seekers.cli.github_scraper.Github'):
|
||||||
|
scraper = self.GitHubScraper(config)
|
||||||
|
scraper.repo = Mock()
|
||||||
|
scraper.repo.get_contents.return_value = mock_content
|
||||||
|
|
||||||
|
# Mock requests.get
|
||||||
|
with patch('requests.get') as mock_requests:
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.text = '# Changelog\n\n## v1.0.0\n- Initial release'
|
||||||
|
mock_response.raise_for_status = Mock()
|
||||||
|
mock_requests.return_value = mock_response
|
||||||
|
|
||||||
|
result = scraper._get_file_content('CHANGELOG.md')
|
||||||
|
|
||||||
|
# Should download via download_url
|
||||||
|
self.assertEqual(result, '# Changelog\n\n## v1.0.0\n- Initial release')
|
||||||
|
mock_requests.assert_called_once_with(
|
||||||
|
'https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md',
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_extract_changelog_large_file(self):
|
||||||
|
"""Test CHANGELOG extraction with large file (Integration test for Issue #219)"""
|
||||||
|
config = {
|
||||||
|
'repo': 'ccxt/ccxt',
|
||||||
|
'name': 'ccxt',
|
||||||
|
'github_token': None
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create mock large CHANGELOG
|
||||||
|
mock_content = Mock()
|
||||||
|
mock_content.type = 'file'
|
||||||
|
mock_content.encoding = 'none'
|
||||||
|
mock_content.size = 1388271
|
||||||
|
mock_content.download_url = 'https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md'
|
||||||
|
|
||||||
|
with patch('skill_seekers.cli.github_scraper.Github'):
|
||||||
|
scraper = self.GitHubScraper(config)
|
||||||
|
scraper.repo = Mock()
|
||||||
|
scraper.repo.get_contents.return_value = mock_content
|
||||||
|
|
||||||
|
# Mock requests.get
|
||||||
|
with patch('requests.get') as mock_requests:
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.text = '# CCXT Changelog\n\n## v4.0.0\n- Major update'
|
||||||
|
mock_response.raise_for_status = Mock()
|
||||||
|
mock_requests.return_value = mock_response
|
||||||
|
|
||||||
|
scraper._extract_changelog()
|
||||||
|
|
||||||
|
# Should successfully extract CHANGELOG content
|
||||||
|
self.assertIn('changelog', scraper.extracted_data)
|
||||||
|
self.assertIn('Major update', scraper.extracted_data['changelog'])
|
||||||
|
|
||||||
|
|
||||||
class TestErrorHandling(unittest.TestCase):
|
class TestErrorHandling(unittest.TestCase):
|
||||||
"""Test error handling and edge cases"""
|
"""Test error handling and edge cases"""
|
||||||
|
|||||||
Reference in New Issue
Block a user