From f2faebb8d5d17a711bd72a36425a00163cabeb71 Mon Sep 17 00:00:00 2001 From: yusyus Date: Thu, 1 Jan 2026 20:57:03 +0300 Subject: [PATCH] fix: Complete fix for Issue #219 - All three problems resolved MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **Problem #1: Large File Encoding Error** ✅ FIXED - Add large file download support via download_url - Detect encoding='none' for files >1MB - Download via GitHub raw URL instead of API - Handles ccxt/ccxt's 1.4MB CHANGELOG.md successfully **Problem #2: Missing CLI Enhancement Flags** ✅ FIXED - Add --enhance, --enhance-local, --api-key to main.py github_parser - Add flag forwarding in CLI dispatcher - Fixes 'unrecognized arguments' error - Users can now use: skill-seekers github --repo owner/repo --enhance-local **Problem #3: Custom API Endpoint Support** ✅ FIXED - Support ANTHROPIC_BASE_URL environment variable - Support ANTHROPIC_AUTH_TOKEN (alternative to ANTHROPIC_API_KEY) - Fix ThinkingBlock.text error with newer Anthropic SDK - Find TextBlock in response content array (handles thinking blocks) **Changes**: - src/skill_seekers/cli/enhance_skill.py: - Support custom base_url parameter - Support both ANTHROPIC_API_KEY and ANTHROPIC_AUTH_TOKEN - Iterate through content blocks to find text (handles ThinkingBlock) - src/skill_seekers/cli/main.py: - Add --enhance, --enhance-local, --api-key to github_parser - Forward flags to github_scraper.py in dispatcher - src/skill_seekers/cli/github_scraper.py: - Add large file detection (encoding=None/"none") - Download via download_url with requests - Log file size and download progress - tests/test_github_scraper.py: - Add test_get_file_content_large_file - Add test_extract_changelog_large_file - All 31 tests passing ✅ **Credits**: - Thanks to @XGCoder for detailed bug report - Thanks to @gorquan for local fixes and guidance Fixes #219 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- src/skill_seekers/cli/enhance_skill.py | 32 +++++++++--- src/skill_seekers/cli/github_scraper.py | 20 +++++++ src/skill_seekers/cli/main.py | 9 ++++ tests/test_github_scraper.py | 69 +++++++++++++++++++++++++ 4 files changed, 124 insertions(+), 6 deletions(-) diff --git a/src/skill_seekers/cli/enhance_skill.py b/src/skill_seekers/cli/enhance_skill.py index f87d0ae..5f1ae3a 100644 --- a/src/skill_seekers/cli/enhance_skill.py +++ b/src/skill_seekers/cli/enhance_skill.py @@ -41,15 +41,24 @@ class SkillEnhancer: self.references_dir = self.skill_dir / "references" self.skill_md_path = self.skill_dir / "SKILL.md" - # Get API key - self.api_key = api_key or os.environ.get('ANTHROPIC_API_KEY') + # Get API key - support both ANTHROPIC_API_KEY and ANTHROPIC_AUTH_TOKEN + self.api_key = (api_key or + os.environ.get('ANTHROPIC_API_KEY') or + os.environ.get('ANTHROPIC_AUTH_TOKEN')) if not self.api_key: raise ValueError( - "No API key provided. Set ANTHROPIC_API_KEY environment variable " - "or use --api-key argument" + "No API key provided. Set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN " + "environment variable or use --api-key argument" ) - self.client = anthropic.Anthropic(api_key=self.api_key) + # Support custom base URL for alternative API endpoints + base_url = os.environ.get('ANTHROPIC_BASE_URL') + client_kwargs = {'api_key': self.api_key} + if base_url: + client_kwargs['base_url'] = base_url + print(f"â„šī¸ Using custom API base URL: {base_url}") + + self.client = anthropic.Anthropic(**client_kwargs) def read_current_skill_md(self): """Read existing SKILL.md""" @@ -77,7 +86,18 @@ class SkillEnhancer: }] ) - enhanced_content = message.content[0].text + # Handle response content - newer SDK versions may include ThinkingBlock + # Find the TextBlock containing the actual response + enhanced_content = None + for block in message.content: + if hasattr(block, 'text'): + enhanced_content = block.text + break + + if not enhanced_content: + print("❌ Error: No text content found in API response") + return None + return enhanced_content except Exception as e: diff --git a/src/skill_seekers/cli/github_scraper.py b/src/skill_seekers/cli/github_scraper.py index 0f77468..c04b5d3 100644 --- a/src/skill_seekers/cli/github_scraper.py +++ b/src/skill_seekers/cli/github_scraper.py @@ -355,6 +355,26 @@ class GitHubScraper: logger.warning(f"Symlink {file_path} has no target") return None + # Handle large files (encoding="none") - download via URL + # GitHub API doesn't base64-encode files >1MB + if hasattr(content, 'encoding') and content.encoding in [None, "none"]: + download_url = getattr(content, 'download_url', None) + file_size = getattr(content, 'size', 0) + + if download_url: + logger.info(f"File {file_path} is large ({file_size:,} bytes), downloading via URL...") + try: + import requests + response = requests.get(download_url, timeout=30) + response.raise_for_status() + return response.text + except Exception as e: + logger.warning(f"Failed to download {file_path} from {download_url}: {e}") + return None + else: + logger.warning(f"File {file_path} has no download URL (encoding={content.encoding})") + return None + # Handle regular files - decode content try: if isinstance(content.decoded_content, bytes): diff --git a/src/skill_seekers/cli/main.py b/src/skill_seekers/cli/main.py index ebc920d..bddfe4d 100644 --- a/src/skill_seekers/cli/main.py +++ b/src/skill_seekers/cli/main.py @@ -99,6 +99,9 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers github_parser.add_argument("--repo", help="GitHub repo (owner/repo)") github_parser.add_argument("--name", help="Skill name") github_parser.add_argument("--description", help="Skill description") + github_parser.add_argument("--enhance", action="store_true", help="AI enhancement (API)") + github_parser.add_argument("--enhance-local", action="store_true", help="AI enhancement (local)") + github_parser.add_argument("--api-key", type=str, help="Anthropic API key for --enhance") # === pdf subcommand === pdf_parser = subparsers.add_parser( @@ -274,6 +277,12 @@ def main(argv: Optional[List[str]] = None) -> int: sys.argv.extend(["--name", args.name]) if args.description: sys.argv.extend(["--description", args.description]) + if args.enhance: + sys.argv.append("--enhance") + if args.enhance_local: + sys.argv.append("--enhance-local") + if args.api_key: + sys.argv.extend(["--api-key", args.api_key]) return github_main() or 0 elif args.command == "pdf": diff --git a/tests/test_github_scraper.py b/tests/test_github_scraper.py index 4fb3512..46cf6d2 100644 --- a/tests/test_github_scraper.py +++ b/tests/test_github_scraper.py @@ -892,6 +892,75 @@ class TestSymlinkHandling(unittest.TestCase): # Should not crash (will try latin-1 fallback) self.assertIsNotNone(result) + def test_get_file_content_large_file(self): + """Test _get_file_content handles large files with encoding='none' (Issue #219)""" + config = { + 'repo': 'ccxt/ccxt', + 'name': 'ccxt', + 'github_token': None + } + + # Create mock large file (encoding="none") + mock_content = Mock() + mock_content.type = 'file' + mock_content.encoding = 'none' # Large files have encoding="none" + mock_content.size = 1388271 # 1.4MB CHANGELOG + mock_content.download_url = 'https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md' + + with patch('skill_seekers.cli.github_scraper.Github'): + scraper = self.GitHubScraper(config) + scraper.repo = Mock() + scraper.repo.get_contents.return_value = mock_content + + # Mock requests.get + with patch('requests.get') as mock_requests: + mock_response = Mock() + mock_response.text = '# Changelog\n\n## v1.0.0\n- Initial release' + mock_response.raise_for_status = Mock() + mock_requests.return_value = mock_response + + result = scraper._get_file_content('CHANGELOG.md') + + # Should download via download_url + self.assertEqual(result, '# Changelog\n\n## v1.0.0\n- Initial release') + mock_requests.assert_called_once_with( + 'https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md', + timeout=30 + ) + + def test_extract_changelog_large_file(self): + """Test CHANGELOG extraction with large file (Integration test for Issue #219)""" + config = { + 'repo': 'ccxt/ccxt', + 'name': 'ccxt', + 'github_token': None + } + + # Create mock large CHANGELOG + mock_content = Mock() + mock_content.type = 'file' + mock_content.encoding = 'none' + mock_content.size = 1388271 + mock_content.download_url = 'https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md' + + with patch('skill_seekers.cli.github_scraper.Github'): + scraper = self.GitHubScraper(config) + scraper.repo = Mock() + scraper.repo.get_contents.return_value = mock_content + + # Mock requests.get + with patch('requests.get') as mock_requests: + mock_response = Mock() + mock_response.text = '# CCXT Changelog\n\n## v4.0.0\n- Major update' + mock_response.raise_for_status = Mock() + mock_requests.return_value = mock_response + + scraper._extract_changelog() + + # Should successfully extract CHANGELOG content + self.assertIn('changelog', scraper.extracted_data) + self.assertIn('Major update', scraper.extracted_data['changelog']) + class TestErrorHandling(unittest.TestCase): """Test error handling and edge cases"""