fix: Complete fix for Issue #219 - All three problems resolved

**Problem #1: Large File Encoding Error** ✅ FIXED - Add large file download support via download_url - Detect encoding='none' for files >1MB - Download via GitHub raw URL instead of API - Handles ccxt/ccxt's 1.4MB CHANGELOG.md successfully **Problem #2: Missing CLI Enhancement Flags** ✅ FIXED - Add --enhance, --enhance-local, --api-key to main.py github_parser - Add flag forwarding in CLI dispatcher - Fixes 'unrecognized arguments' error - Users can now use: skill-seekers github --repo owner/repo --enhance-local **Problem #3: Custom API Endpoint Support** ✅ FIXED - Support ANTHROPIC_BASE_URL environment variable - Support ANTHROPIC_AUTH_TOKEN (alternative to ANTHROPIC_API_KEY) - Fix ThinkingBlock.text error with newer Anthropic SDK - Find TextBlock in response content array (handles thinking blocks) **Changes**: - src/skill_seekers/cli/enhance_skill.py: - Support custom base_url parameter - Support both ANTHROPIC_API_KEY and ANTHROPIC_AUTH_TOKEN - Iterate through content blocks to find text (handles ThinkingBlock) - src/skill_seekers/cli/main.py: - Add --enhance, --enhance-local, --api-key to github_parser - Forward flags to github_scraper.py in dispatcher - src/skill_seekers/cli/github_scraper.py: - Add large file detection (encoding=None/"none") - Download via download_url with requests - Log file size and download progress - tests/test_github_scraper.py: - Add test_get_file_content_large_file - Add test_extract_changelog_large_file - All 31 tests passing ✅ **Credits**: - Thanks to @XGCoder for detailed bug report - Thanks to @gorquan for local fixes and guidance Fixes #219 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-01 20:57:03 +03:00
parent 58286f454a
commit f2faebb8d5
4 changed files with 124 additions and 6 deletions
--- a/src/skill_seekers/cli/enhance_skill.py
+++ b/src/skill_seekers/cli/enhance_skill.py
@@ -41,15 +41,24 @@ class SkillEnhancer:
        self.references_dir = self.skill_dir / "references"
        self.skill_md_path = self.skill_dir / "SKILL.md"
-        # Get API key
+        # Get API key - support both ANTHROPIC_API_KEY and ANTHROPIC_AUTH_TOKEN
-        self.api_key = api_key or os.environ.get('ANTHROPIC_API_KEY')
+        self.api_key = (api_key or
                       os.environ.get('ANTHROPIC_API_KEY') or
                       os.environ.get('ANTHROPIC_AUTH_TOKEN'))
        if not self.api_key:
            raise ValueError(
-                "No API key provided. Set ANTHROPIC_API_KEY environment variable "
+                "No API key provided. Set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN "
-                "or use --api-key argument"
+                "environment variable or use --api-key argument"
            )
-        self.client = anthropic.Anthropic(api_key=self.api_key)
+        # Support custom base URL for alternative API endpoints
        base_url = os.environ.get('ANTHROPIC_BASE_URL')
        client_kwargs = {'api_key': self.api_key}
        if base_url:
            client_kwargs['base_url'] = base_url
            print(f"ℹ️  Using custom API base URL: {base_url}")
        self.client = anthropic.Anthropic(**client_kwargs)
    def read_current_skill_md(self):
        """Read existing SKILL.md"""
@@ -77,7 +86,18 @@ class SkillEnhancer:
                }]
            )
-            enhanced_content = message.content[0].text
+            # Handle response content - newer SDK versions may include ThinkingBlock
            # Find the TextBlock containing the actual response
            enhanced_content = None
            for block in message.content:
                if hasattr(block, 'text'):
                    enhanced_content = block.text
                    break
            if not enhanced_content:
                print("❌ Error: No text content found in API response")
                return None
            return enhanced_content
        except Exception as e:
--- a/src/skill_seekers/cli/github_scraper.py
+++ b/src/skill_seekers/cli/github_scraper.py
@@ -355,6 +355,26 @@ class GitHubScraper:
                    logger.warning(f"Symlink {file_path} has no target")
                    return None
            # Handle large files (encoding="none") - download via URL
            # GitHub API doesn't base64-encode files >1MB
            if hasattr(content, 'encoding') and content.encoding in [None, "none"]:
                download_url = getattr(content, 'download_url', None)
                file_size = getattr(content, 'size', 0)
                if download_url:
                    logger.info(f"File {file_path} is large ({file_size:,} bytes), downloading via URL...")
                    try:
                        import requests
                        response = requests.get(download_url, timeout=30)
                        response.raise_for_status()
                        return response.text
                    except Exception as e:
                        logger.warning(f"Failed to download {file_path} from {download_url}: {e}")
                        return None
                else:
                    logger.warning(f"File {file_path} has no download URL (encoding={content.encoding})")
                    return None
            # Handle regular files - decode content
            try:
                if isinstance(content.decoded_content, bytes):
--- a/src/skill_seekers/cli/main.py
+++ b/src/skill_seekers/cli/main.py
@@ -99,6 +99,9 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
    github_parser.add_argument("--repo", help="GitHub repo (owner/repo)")
    github_parser.add_argument("--name", help="Skill name")
    github_parser.add_argument("--description", help="Skill description")
    github_parser.add_argument("--enhance", action="store_true", help="AI enhancement (API)")
    github_parser.add_argument("--enhance-local", action="store_true", help="AI enhancement (local)")
    github_parser.add_argument("--api-key", type=str, help="Anthropic API key for --enhance")
    # === pdf subcommand ===
    pdf_parser = subparsers.add_parser(
@@ -274,6 +277,12 @@ def main(argv: Optional[List[str]] = None) -> int:
                sys.argv.extend(["--name", args.name])
            if args.description:
                sys.argv.extend(["--description", args.description])
            if args.enhance:
                sys.argv.append("--enhance")
            if args.enhance_local:
                sys.argv.append("--enhance-local")
            if args.api_key:
                sys.argv.extend(["--api-key", args.api_key])
            return github_main() or 0
        elif args.command == "pdf":
--- a/tests/test_github_scraper.py
+++ b/tests/test_github_scraper.py
@@ -892,6 +892,75 @@ class TestSymlinkHandling(unittest.TestCase):
            # Should not crash (will try latin-1 fallback)
            self.assertIsNotNone(result)
    def test_get_file_content_large_file(self):
        """Test _get_file_content handles large files with encoding='none' (Issue #219)"""
        config = {
            'repo': 'ccxt/ccxt',
            'name': 'ccxt',
            'github_token': None
        }
        # Create mock large file (encoding="none")
        mock_content = Mock()
        mock_content.type = 'file'
        mock_content.encoding = 'none'  # Large files have encoding="none"
        mock_content.size = 1388271  # 1.4MB CHANGELOG
        mock_content.download_url = 'https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md'
        with patch('skill_seekers.cli.github_scraper.Github'):
            scraper = self.GitHubScraper(config)
            scraper.repo = Mock()
            scraper.repo.get_contents.return_value = mock_content
            # Mock requests.get
            with patch('requests.get') as mock_requests:
                mock_response = Mock()
                mock_response.text = '# Changelog\n\n## v1.0.0\n- Initial release'
                mock_response.raise_for_status = Mock()
                mock_requests.return_value = mock_response
                result = scraper._get_file_content('CHANGELOG.md')
                # Should download via download_url
                self.assertEqual(result, '# Changelog\n\n## v1.0.0\n- Initial release')
                mock_requests.assert_called_once_with(
                    'https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md',
                    timeout=30
                )
    def test_extract_changelog_large_file(self):
        """Test CHANGELOG extraction with large file (Integration test for Issue #219)"""
        config = {
            'repo': 'ccxt/ccxt',
            'name': 'ccxt',
            'github_token': None
        }
        # Create mock large CHANGELOG
        mock_content = Mock()
        mock_content.type = 'file'
        mock_content.encoding = 'none'
        mock_content.size = 1388271
        mock_content.download_url = 'https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md'
        with patch('skill_seekers.cli.github_scraper.Github'):
            scraper = self.GitHubScraper(config)
            scraper.repo = Mock()
            scraper.repo.get_contents.return_value = mock_content
            # Mock requests.get
            with patch('requests.get') as mock_requests:
                mock_response = Mock()
                mock_response.text = '# CCXT Changelog\n\n## v4.0.0\n- Major update'
                mock_response.raise_for_status = Mock()
                mock_requests.return_value = mock_response
                scraper._extract_changelog()
                # Should successfully extract CHANGELOG content
                self.assertIn('changelog', scraper.extracted_data)
                self.assertIn('Major update', scraper.extracted_data['changelog'])
 class TestErrorHandling(unittest.TestCase):
    """Test error handling and edge cases"""