Implements issue #168 - Modern Python packaging with uv support This is Phase 1 of the modernization effort, establishing the core package structure and build system. ## Major Changes ### 1. Migrated to src/ Layout - Moved cli/ → src/skill_seekers/cli/ - Moved skill_seeker_mcp/ → src/skill_seekers/mcp/ - Created root package: src/skill_seekers/__init__.py - Updated all imports: cli. → skill_seekers.cli. - Updated all imports: skill_seeker_mcp. → skill_seekers.mcp. ### 2. Created pyproject.toml - Modern Python packaging configuration - All dependencies properly declared - 8 CLI entry points configured: * skill-seekers (unified CLI) * skill-seekers-scrape * skill-seekers-github * skill-seekers-pdf * skill-seekers-unified * skill-seekers-enhance * skill-seekers-package * skill-seekers-upload * skill-seekers-estimate - uv tool support enabled - Build system: setuptools with wheel ### 3. Created Unified CLI (main.py) - Git-style subcommands (skill-seekers scrape, etc.) - Delegates to existing tool main() functions - Full help system at top-level and subcommand level - Backwards compatible with individual commands ### 4. Updated Package Versions - cli/__init__.py: 1.3.0 → 2.0.0 - mcp/__init__.py: 1.2.0 → 2.0.0 - Root package: 2.0.0 ### 5. Updated Test Suite - Fixed test_package_structure.py for new layout - All 28 package structure tests passing - Updated all test imports for new structure ## Installation Methods (Working) ```bash # Development install pip install -e . # Run unified CLI skill-seekers --version # → 2.0.0 skill-seekers --help # Run individual tools skill-seekers-scrape --help skill-seekers-github --help ``` ## Test Results - Package structure tests: 28/28 passing ✅ - Package installs successfully ✅ - All entry points working ✅ ## Still TODO (Phase 2) - [ ] Run full test suite (299 tests) - [ ] Update documentation (README, CLAUDE.md, etc.) - [ ] Test with uv tool run/install - [ ] Build and publish to PyPI - [ ] Create PR and merge ## Breaking Changes None - fully backwards compatible. Old import paths still work. ## Migration for Users No action needed. Package works with both pip and uv. Closes #168 (when complete) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
171 lines
6.1 KiB
Python
171 lines
6.1 KiB
Python
import pytest
|
|
from unittest.mock import patch, Mock
|
|
import requests
|
|
from skill_seekers.cli.llms_txt_downloader import LlmsTxtDownloader
|
|
|
|
def test_successful_download():
|
|
"""Test successful download with valid markdown content"""
|
|
downloader = LlmsTxtDownloader("https://example.com/llms.txt")
|
|
|
|
mock_response = Mock()
|
|
mock_response.text = "# Header\n\nSome content with markdown patterns.\n\n## Subheader\n\n- List item\n- Another item\n\n```python\ncode_block()\n```\n" + "x" * 200
|
|
mock_response.raise_for_status = Mock()
|
|
|
|
with patch('requests.get', return_value=mock_response) as mock_get:
|
|
content = downloader.download()
|
|
|
|
assert content is not None
|
|
assert len(content) > 100
|
|
assert isinstance(content, str)
|
|
assert "# Header" in content
|
|
mock_get.assert_called_once()
|
|
|
|
def test_timeout_with_retry():
|
|
"""Test timeout scenario with retry logic"""
|
|
downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=2)
|
|
|
|
with patch('requests.get', side_effect=requests.Timeout("Connection timeout")) as mock_get:
|
|
with patch('time.sleep') as mock_sleep: # Mock sleep to speed up test
|
|
content = downloader.download()
|
|
|
|
assert content is None
|
|
assert mock_get.call_count == 2 # Should retry once (2 total attempts)
|
|
assert mock_sleep.call_count == 1 # Should sleep once between retries
|
|
|
|
def test_empty_content_rejection():
|
|
"""Test rejection of content shorter than 100 chars"""
|
|
downloader = LlmsTxtDownloader("https://example.com/llms.txt")
|
|
|
|
mock_response = Mock()
|
|
mock_response.text = "# Short"
|
|
mock_response.raise_for_status = Mock()
|
|
|
|
with patch('requests.get', return_value=mock_response):
|
|
content = downloader.download()
|
|
|
|
assert content is None
|
|
|
|
def test_non_markdown_rejection():
|
|
"""Test rejection of content that doesn't look like markdown"""
|
|
downloader = LlmsTxtDownloader("https://example.com/llms.txt")
|
|
|
|
mock_response = Mock()
|
|
mock_response.text = "Plain text without any markdown patterns at all. " * 10
|
|
mock_response.raise_for_status = Mock()
|
|
|
|
with patch('requests.get', return_value=mock_response):
|
|
content = downloader.download()
|
|
|
|
assert content is None
|
|
|
|
def test_http_error_handling():
|
|
"""Test handling of HTTP errors (404, 500, etc.)"""
|
|
downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=2)
|
|
|
|
mock_response = Mock()
|
|
mock_response.raise_for_status.side_effect = requests.HTTPError("404 Not Found")
|
|
|
|
with patch('requests.get', return_value=mock_response) as mock_get:
|
|
with patch('time.sleep'):
|
|
content = downloader.download()
|
|
|
|
assert content is None
|
|
assert mock_get.call_count == 2 # Should retry once
|
|
|
|
def test_exponential_backoff():
|
|
"""Test that exponential backoff delays are correct"""
|
|
downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=3)
|
|
|
|
with patch('requests.get', side_effect=requests.Timeout("Connection timeout")):
|
|
with patch('time.sleep') as mock_sleep:
|
|
content = downloader.download()
|
|
|
|
assert content is None
|
|
# Should sleep with delays: 1s, 2s (2^0, 2^1)
|
|
assert mock_sleep.call_count == 2
|
|
mock_sleep.assert_any_call(1) # First retry delay
|
|
mock_sleep.assert_any_call(2) # Second retry delay
|
|
|
|
def test_markdown_validation():
|
|
"""Test markdown pattern detection"""
|
|
downloader = LlmsTxtDownloader("https://example.com/llms.txt")
|
|
|
|
# Test various markdown patterns
|
|
assert downloader._is_markdown("# Header")
|
|
assert downloader._is_markdown("## Subheader")
|
|
assert downloader._is_markdown("```code```")
|
|
assert downloader._is_markdown("- list item")
|
|
assert downloader._is_markdown("* bullet point")
|
|
assert downloader._is_markdown("`inline code`")
|
|
|
|
# Test non-markdown content
|
|
assert not downloader._is_markdown("Plain text without any markdown patterns")
|
|
|
|
def test_custom_timeout():
|
|
"""Test custom timeout parameter"""
|
|
downloader = LlmsTxtDownloader("https://example.com/llms.txt", timeout=10)
|
|
|
|
mock_response = Mock()
|
|
mock_response.text = "# Header\n\nContent " * 50
|
|
mock_response.raise_for_status = Mock()
|
|
|
|
with patch('requests.get', return_value=mock_response) as mock_get:
|
|
content = downloader.download()
|
|
|
|
assert content is not None
|
|
# Verify timeout was passed to requests.get
|
|
call_kwargs = mock_get.call_args[1]
|
|
assert call_kwargs['timeout'] == 10
|
|
|
|
def test_custom_max_retries():
|
|
"""Test custom max_retries parameter"""
|
|
downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=5)
|
|
|
|
with patch('requests.get', side_effect=requests.Timeout("Connection timeout")) as mock_get:
|
|
with patch('time.sleep'):
|
|
content = downloader.download()
|
|
|
|
assert content is None
|
|
assert mock_get.call_count == 5 # Should attempt 5 times
|
|
|
|
def test_user_agent_header():
|
|
"""Test that custom user agent is set"""
|
|
downloader = LlmsTxtDownloader("https://example.com/llms.txt")
|
|
|
|
mock_response = Mock()
|
|
mock_response.text = "# Header\n\nContent " * 50
|
|
mock_response.raise_for_status = Mock()
|
|
|
|
with patch('requests.get', return_value=mock_response) as mock_get:
|
|
content = downloader.download()
|
|
|
|
assert content is not None
|
|
# Verify custom user agent was passed
|
|
call_kwargs = mock_get.call_args[1]
|
|
assert call_kwargs['headers']['User-Agent'] == 'Skill-Seekers-llms.txt-Reader/1.0'
|
|
|
|
def test_get_proper_filename():
|
|
"""Test filename conversion from .txt to .md"""
|
|
downloader = LlmsTxtDownloader("https://hono.dev/llms-full.txt")
|
|
|
|
filename = downloader.get_proper_filename()
|
|
|
|
assert filename == "llms-full.md"
|
|
assert not filename.endswith('.txt')
|
|
|
|
def test_get_proper_filename_standard():
|
|
"""Test standard variant naming"""
|
|
downloader = LlmsTxtDownloader("https://hono.dev/llms.txt")
|
|
|
|
filename = downloader.get_proper_filename()
|
|
|
|
assert filename == "llms.md"
|
|
|
|
def test_get_proper_filename_small():
|
|
"""Test small variant naming"""
|
|
downloader = LlmsTxtDownloader("https://hono.dev/llms-small.txt")
|
|
|
|
filename = downloader.get_proper_filename()
|
|
|
|
assert filename == "llms-small.md"
|