style: Run ruff format on 15 files (CI fix)

CI uses 'ruff format' not 'black' - applied proper formatting:

Files reformatted by ruff:
- config_extractor.py
- doc_scraper.py
- how_to_guide_builder.py
- llms_txt_parser.py
- pattern_recognizer.py
- test_example_extractor.py
- unified_codebase_analyzer.py
- test_architecture_scenarios.py
- test_async_scraping.py
- test_github_scraper.py
- test_guide_enhancer.py
- test_install_agent.py
- test_issue_219_e2e.py
- test_llms_txt_downloader.py
- test_skip_llms_txt.py

Fixes CI formatting check failure.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
yusyus
2026-01-18 00:01:30 +03:00
parent 9d43956b1d
commit 85c8d9d385
15 changed files with 179 additions and 510 deletions

View File

@@ -203,15 +203,11 @@ How to use async tools.
],
}
def test_scenario_1_github_three_stream_fetcher(
self, mock_github_repo, mock_github_api_data
):
def test_scenario_1_github_three_stream_fetcher(self, mock_github_repo, mock_github_api_data):
"""Test GitHub three-stream fetcher with mock data."""
# Create fetcher with mock
with (
patch.object(
GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo
),
patch.object(GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo),
patch.object(
GitHubThreeStreamFetcher,
"fetch_github_metadata",
@@ -251,14 +247,10 @@ How to use async tools.
assert len(three_streams.insights_stream.known_solutions) >= 1
assert len(three_streams.insights_stream.top_labels) >= 2
def test_scenario_1_unified_analyzer_github(
self, mock_github_repo, mock_github_api_data
):
def test_scenario_1_unified_analyzer_github(self, mock_github_repo, mock_github_api_data):
"""Test unified analyzer with GitHub source."""
with (
patch.object(
GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo
),
patch.object(GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo),
patch.object(
GitHubThreeStreamFetcher,
"fetch_github_metadata",
@@ -286,9 +278,7 @@ How to use async tools.
{"name": "test_azure_provider", "file": "test_auth.py"},
],
"c3_2_examples_count": 2,
"c3_3_guides": [
{"title": "OAuth Setup Guide", "file": "docs/oauth.md"}
],
"c3_3_guides": [{"title": "OAuth Setup Guide", "file": "docs/oauth.md"}],
"c3_4_configs": [],
"c3_7_architecture": [
{
@@ -335,9 +325,7 @@ How to use async tools.
{
"name": "fastmcp-oauth",
"description": "OAuth authentication for FastMCP",
"categories": {
"oauth": ["oauth", "auth", "provider", "google", "azure"]
},
"categories": {"oauth": ["oauth", "auth", "provider", "google", "azure"]},
}
)
)
@@ -491,9 +479,7 @@ pip install fastmcp
# Check content quality (Architecture Section 8.2)
assert "Issue #42" in router_md, "Missing issue references"
assert "" in router_md or "Stars:" in router_md, "Missing GitHub metadata"
assert (
"Quick Start" in router_md or "README" in router_md
), "Missing README content"
assert "Quick Start" in router_md or "README" in router_md, "Missing README content"
class TestScenario2MultiSource:
@@ -617,15 +603,11 @@ class TestScenario2MultiSource:
# Layer 4: GitHub insights (community knowledge)
# Mock source 1 (HTML docs)
source1_data = {
"api": [{"name": "GoogleProvider", "params": ["app_id", "app_secret"]}]
}
source1_data = {"api": [{"name": "GoogleProvider", "params": ["app_id", "app_secret"]}]}
# Mock source 2 (GitHub C3.x)
source2_data = {
"api": [
{"name": "GoogleProvider", "params": ["client_id", "client_secret"]}
]
"api": [{"name": "GoogleProvider", "params": ["client_id", "client_secret"]}]
}
# Mock GitHub streams
@@ -651,9 +633,7 @@ class TestScenario2MultiSource:
)
# Create merger with required arguments
merger = RuleBasedMerger(
docs_data=source1_data, github_data=source2_data, conflicts=[]
)
merger = RuleBasedMerger(docs_data=source1_data, github_data=source2_data, conflicts=[])
# Merge using merge_all() method
merged = merger.merge_all()
@@ -770,12 +750,8 @@ def test_connection():
mock_c3x.return_value = {
"files": ["database.py", "api.py"],
"analysis_type": "c3x",
"c3_1_patterns": [
{"name": "Singleton", "count": 1, "file": "database.py"}
],
"c3_2_examples": [
{"name": "test_connection", "file": "test_database.py"}
],
"c3_1_patterns": [{"name": "Singleton", "count": 1, "file": "database.py"}],
"c3_2_examples": [{"name": "test_connection", "file": "test_database.py"}],
"c3_2_examples_count": 1,
"c3_3_guides": [],
"c3_4_configs": [],
@@ -967,9 +943,7 @@ Based on analysis of GitHub issues:
print(f"\nGitHub overhead: {github_overhead} lines")
# Architecture target: 20-60 lines
assert (
20 <= github_overhead <= 60
), f"GitHub overhead {github_overhead} not in range 20-60"
assert 20 <= github_overhead <= 60, f"GitHub overhead {github_overhead} not in range 20-60"
def test_router_size_within_limits(self):
"""Test router size is 150±20 lines (Architecture Section 8.1, Line 1970)."""
@@ -977,9 +951,7 @@ Based on analysis of GitHub issues:
router_lines = 150 # Simulated count
# Architecture target: 150 lines (±20)
assert (
130 <= router_lines <= 170
), f"Router size {router_lines} not in range 130-170"
assert 130 <= router_lines <= 170, f"Router size {router_lines} not in range 130-170"
def test_content_quality_requirements(self):
"""Test content quality (Architecture Section 8.2, Lines 1977-2014)."""
@@ -1021,9 +993,9 @@ provider = GitHubProvider(client_id="...", client_secret="...")
# Check minimum 3 code examples
code_blocks = sub_skill_md.count("```")
assert (
code_blocks >= 6
), f"Need at least 3 code examples (6 markers), found {code_blocks // 2}"
assert code_blocks >= 6, (
f"Need at least 3 code examples (6 markers), found {code_blocks // 2}"
)
# Check language tags
assert "```python" in sub_skill_md, "Code blocks must have language tags"
@@ -1038,9 +1010,9 @@ provider = GitHubProvider(client_id="...", client_secret="...")
# Check solution indicators for closed issues
if "closed" in sub_skill_md.lower():
assert (
"" in sub_skill_md or "Solution" in sub_skill_md
), "Closed issues should indicate solution found"
assert "" in sub_skill_md or "Solution" in sub_skill_md, (
"Closed issues should indicate solution found"
)
class TestTokenEfficiencyCalculation:
@@ -1077,9 +1049,9 @@ class TestTokenEfficiencyCalculation:
# With selective loading and caching, achieve 35-40%
# Even conservative estimate shows 29.5%, actual usage patterns show 35-40%
assert (
reduction_percent >= 29
), f"Token reduction {reduction_percent:.1f}% below 29% (conservative target)"
assert reduction_percent >= 29, (
f"Token reduction {reduction_percent:.1f}% below 29% (conservative target)"
)
if __name__ == "__main__":

View File

@@ -103,9 +103,7 @@ class TestAsyncScrapeMethods(unittest.TestCase):
os.chdir(tmpdir)
converter = DocToSkillConverter(config, dry_run=True)
self.assertTrue(hasattr(converter, "scrape_page_async"))
self.assertTrue(
asyncio.iscoroutinefunction(converter.scrape_page_async)
)
self.assertTrue(asyncio.iscoroutinefunction(converter.scrape_page_async))
finally:
os.chdir(self.original_cwd)
@@ -263,9 +261,7 @@ class TestAsyncErrorHandling(unittest.TestCase):
async with httpx.AsyncClient() as client:
# Mock client.get to raise exception
with patch.object(
client, "get", side_effect=httpx.HTTPError("Test error")
):
with patch.object(client, "get", side_effect=httpx.HTTPError("Test error")):
# Should not raise exception, just log error
await converter.scrape_page_async(
"https://example.com/test", semaphore, client

View File

@@ -134,9 +134,7 @@ class TestREADMEExtraction(unittest.TestCase):
scraper._extract_readme()
self.assertIn("readme", scraper.extracted_data)
self.assertEqual(
scraper.extracted_data["readme"], "# React\n\nA JavaScript library"
)
self.assertEqual(scraper.extracted_data["readme"], "# React\n\nA JavaScript library")
def test_extract_readme_tries_multiple_locations(self):
"""Test that README extraction tries multiple file locations"""
@@ -477,15 +475,9 @@ class TestReleasesExtraction(unittest.TestCase):
mock_release1.prerelease = False
mock_release1.created_at = datetime(2023, 3, 1)
mock_release1.published_at = datetime(2023, 3, 1)
mock_release1.html_url = (
"https://github.com/facebook/react/releases/tag/v18.0.0"
)
mock_release1.tarball_url = (
"https://github.com/facebook/react/archive/v18.0.0.tar.gz"
)
mock_release1.zipball_url = (
"https://github.com/facebook/react/archive/v18.0.0.zip"
)
mock_release1.html_url = "https://github.com/facebook/react/releases/tag/v18.0.0"
mock_release1.tarball_url = "https://github.com/facebook/react/archive/v18.0.0.tar.gz"
mock_release1.zipball_url = "https://github.com/facebook/react/archive/v18.0.0.zip"
mock_release2 = Mock()
mock_release2.tag_name = "v18.0.0-rc.0"
@@ -495,15 +487,9 @@ class TestReleasesExtraction(unittest.TestCase):
mock_release2.prerelease = True
mock_release2.created_at = datetime(2023, 2, 1)
mock_release2.published_at = datetime(2023, 2, 1)
mock_release2.html_url = (
"https://github.com/facebook/react/releases/tag/v18.0.0-rc.0"
)
mock_release2.tarball_url = (
"https://github.com/facebook/react/archive/v18.0.0-rc.0.tar.gz"
)
mock_release2.zipball_url = (
"https://github.com/facebook/react/archive/v18.0.0-rc.0.zip"
)
mock_release2.html_url = "https://github.com/facebook/react/releases/tag/v18.0.0-rc.0"
mock_release2.tarball_url = "https://github.com/facebook/react/archive/v18.0.0-rc.0.tar.gz"
mock_release2.zipball_url = "https://github.com/facebook/react/archive/v18.0.0-rc.0.zip"
with patch("skill_seekers.cli.github_scraper.Github"):
scraper = self.GitHubScraper(config)
@@ -612,9 +598,7 @@ class TestGitHubToSkillConverter(unittest.TestCase):
config = {"repo": "facebook/react", "name": "test", "description": "Test skill"}
# Override data file path
with patch(
"skill_seekers.cli.github_scraper.GitHubToSkillConverter.__init__"
) as mock_init:
with patch("skill_seekers.cli.github_scraper.GitHubToSkillConverter.__init__") as mock_init:
mock_init.return_value = None
converter = self.GitHubToSkillConverter(config)
converter.data_file = str(self.data_file)
@@ -1000,9 +984,7 @@ class TestErrorHandling(unittest.TestCase):
with patch("skill_seekers.cli.github_scraper.Github"):
scraper = self.GitHubScraper(config)
scraper.repo = None
scraper.github.get_repo = Mock(
side_effect=GithubException(404, "Not found")
)
scraper.github.get_repo = Mock(side_effect=GithubException(404, "Not found"))
# Should raise ValueError with helpful message
with self.assertRaises(ValueError) as context:
@@ -1022,9 +1004,7 @@ class TestErrorHandling(unittest.TestCase):
with patch("skill_seekers.cli.github_scraper.Github"):
scraper = self.GitHubScraper(config)
scraper.repo = Mock()
scraper.repo.get_issues.side_effect = GithubException(
403, "Rate limit exceeded"
)
scraper.repo.get_issues.side_effect = GithubException(403, "Rate limit exceeded")
# Should handle gracefully and log warning
scraper._extract_issues()

View File

@@ -31,9 +31,7 @@ class TestGuideEnhancerModeDetection:
with (
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic,
patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic,
):
mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="auto")
@@ -111,9 +109,7 @@ class TestGuideEnhancerStepDescriptions:
with (
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic,
patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic,
):
mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api")
@@ -179,9 +175,7 @@ class TestGuideEnhancerTroubleshooting:
with (
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic,
patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic,
):
mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api")
@@ -192,9 +186,7 @@ class TestGuideEnhancerTroubleshooting:
guide_data = {
"title": "Test Guide",
"steps": [
{"description": "import requests", "code": "import requests"}
],
"steps": [{"description": "import requests", "code": "import requests"}],
"language": "python",
}
result = enhancer.enhance_troubleshooting(guide_data)
@@ -246,9 +238,7 @@ class TestGuideEnhancerPrerequisites:
with (
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic,
patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic,
):
mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api")
@@ -293,9 +283,7 @@ class TestGuideEnhancerNextSteps:
with (
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic,
patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic,
):
mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api")
@@ -340,9 +328,7 @@ class TestGuideEnhancerUseCases:
with (
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic,
patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic,
):
mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api")
@@ -426,9 +412,7 @@ class TestGuideEnhancerFullWorkflow:
with (
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic,
patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic,
):
mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api")
@@ -461,9 +445,7 @@ class TestGuideEnhancerFullWorkflow:
"""Test graceful fallback on enhancement error"""
enhancer = GuideEnhancer(mode="none")
with patch.object(
enhancer, "enhance_guide", side_effect=Exception("API error")
):
with patch.object(enhancer, "enhance_guide", side_effect=Exception("API error")):
guide_data = {
"title": "Test",
"steps": [],
@@ -533,9 +515,7 @@ class TestGuideEnhancerPromptGeneration:
guide_data = {
"title": "How to Test",
"steps": [
{"description": "Write test", "code": "def test_example(): pass"}
],
"steps": [{"description": "Write test", "code": "def test_example(): pass"}],
"language": "python",
"prerequisites": ["pytest"],
}
@@ -583,9 +563,7 @@ class TestGuideEnhancerResponseParsing:
response = json.dumps(
{
"step_descriptions": [
{"step_index": 0, "explanation": "Test", "variations": []}
],
"step_descriptions": [{"step_index": 0, "explanation": "Test", "variations": []}],
"troubleshooting": [],
"prerequisites_detailed": [],
"next_steps": [],

View File

@@ -174,9 +174,7 @@ class TestInstallToAgent:
self.skill_dir.mkdir()
# Create SKILL.md
(self.skill_dir / "SKILL.md").write_text(
"# Test Skill\n\nThis is a test skill."
)
(self.skill_dir / "SKILL.md").write_text("# Test Skill\n\nThis is a test skill.")
# Create references directory with files
refs_dir = self.skill_dir / "references"
@@ -201,9 +199,7 @@ class TestInstallToAgent:
"skill_seekers.cli.install_agent.get_agent_path",
return_value=agent_path,
):
success, message = install_to_agent(
self.skill_dir, "claude", force=True
)
success, message = install_to_agent(self.skill_dir, "claude", force=True)
assert success is True
target_path = agent_path / "test-skill"
@@ -219,9 +215,7 @@ class TestInstallToAgent:
"skill_seekers.cli.install_agent.get_agent_path",
return_value=agent_path,
):
success, message = install_to_agent(
self.skill_dir, "claude", force=True
)
success, message = install_to_agent(self.skill_dir, "claude", force=True)
assert success is True
target_path = agent_path / "test-skill"
@@ -246,9 +240,7 @@ class TestInstallToAgent:
"skill_seekers.cli.install_agent.get_agent_path",
return_value=agent_path,
):
success, message = install_to_agent(
self.skill_dir, "claude", force=True
)
success, message = install_to_agent(self.skill_dir, "claude", force=True)
assert success is True
target_path = agent_path / "test-skill"
@@ -269,9 +261,7 @@ class TestInstallToAgent:
"skill_seekers.cli.install_agent.get_agent_path",
return_value=agent_path,
):
success, message = install_to_agent(
self.skill_dir, "claude", force=False
)
success, message = install_to_agent(self.skill_dir, "claude", force=False)
assert success is False
assert "already installed" in message.lower()
@@ -289,9 +279,7 @@ class TestInstallToAgent:
"skill_seekers.cli.install_agent.get_agent_path",
return_value=agent_path,
):
success, message = install_to_agent(
self.skill_dir, "claude", force=True
)
success, message = install_to_agent(self.skill_dir, "claude", force=True)
assert success is True
# Old file should be gone
@@ -328,9 +316,7 @@ class TestInstallToAgent:
"skill_seekers.cli.install_agent.get_agent_path",
return_value=agent_path,
):
success, message = install_to_agent(
self.skill_dir, "claude", dry_run=True
)
success, message = install_to_agent(self.skill_dir, "claude", dry_run=True)
assert success is True
assert "DRY RUN" in message
@@ -485,9 +471,7 @@ class TestInstallAgentCLI:
assert exit_code == 0
# Directory should NOT be created
assert not (
Path(agent_tmpdir) / ".claude" / "skills" / "test-skill"
).exists()
assert not (Path(agent_tmpdir) / ".claude" / "skills" / "test-skill").exists()
def test_cli_integration(self):
"""Test end-to-end CLI execution."""

View File

@@ -50,9 +50,7 @@ class TestIssue219Problem1LargeFiles(unittest.TestCase):
# Mock large CHANGELOG (1.4MB, encoding="none")
mock_content = Mock()
mock_content.type = "file"
mock_content.encoding = (
"none" # This is what GitHub API returns for large files
)
mock_content.encoding = "none" # This is what GitHub API returns for large files
mock_content.size = 1388271
mock_content.download_url = (
"https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md"
@@ -82,9 +80,7 @@ class TestIssue219Problem1LargeFiles(unittest.TestCase):
# VERIFY: CHANGELOG was extracted successfully
self.assertIn("changelog", scraper.extracted_data)
self.assertIn("Bug fixes", scraper.extracted_data["changelog"])
self.assertEqual(
scraper.extracted_data["changelog"], mock_response.text
)
self.assertEqual(scraper.extracted_data["changelog"], mock_response.text)
def test_large_file_fallback_on_error(self):
"""E2E: Verify graceful handling if download_url fails"""
@@ -184,8 +180,7 @@ class TestIssue219Problem2CLIFlags(unittest.TestCase):
# VERIFY: sys.argv contains --enhance-local flag
# (main.py should have added it before calling github_scraper)
called_with_enhance = any(
"--enhance-local" in str(call)
for call in mock_github_main.call_args_list
"--enhance-local" in str(call) for call in mock_github_main.call_args_list
)
self.assertTrue(
called_with_enhance or "--enhance-local" in sys.argv,
@@ -229,9 +224,7 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
os.environ,
{"ANTHROPIC_API_KEY": "test-key-123", "ANTHROPIC_BASE_URL": custom_url},
),
patch(
"skill_seekers.cli.enhance_skill.anthropic.Anthropic"
) as mock_anthropic,
patch("skill_seekers.cli.enhance_skill.anthropic.Anthropic") as mock_anthropic,
):
# Create enhancer
_enhancer = SkillEnhancer(self.skill_dir)
@@ -258,9 +251,7 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
# Use ANTHROPIC_AUTH_TOKEN instead of ANTHROPIC_API_KEY
with (
patch.dict(os.environ, {"ANTHROPIC_AUTH_TOKEN": custom_token}, clear=True),
patch(
"skill_seekers.cli.enhance_skill.anthropic.Anthropic"
) as mock_anthropic,
patch("skill_seekers.cli.enhance_skill.anthropic.Anthropic") as mock_anthropic,
):
# Create enhancer (should accept ANTHROPIC_AUTH_TOKEN)
enhancer = SkillEnhancer(self.skill_dir)
@@ -290,9 +281,7 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
with (
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}),
patch(
"skill_seekers.cli.enhance_skill.anthropic.Anthropic"
) as mock_anthropic,
patch("skill_seekers.cli.enhance_skill.anthropic.Anthropic") as mock_anthropic,
):
enhancer = SkillEnhancer(self.skill_dir)
@@ -301,9 +290,7 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
mock_thinking_block = SimpleNamespace(type="thinking")
# TextBlock has .text attribute
mock_text_block = SimpleNamespace(
text="# Enhanced SKILL.md\n\nContent here"
)
mock_text_block = SimpleNamespace(text="# Enhanced SKILL.md\n\nContent here")
mock_message = Mock()
mock_message.content = [mock_thinking_block, mock_text_block]

View File

@@ -31,9 +31,7 @@ def test_timeout_with_retry():
downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=2)
with (
patch(
"requests.get", side_effect=requests.Timeout("Connection timeout")
) as mock_get,
patch("requests.get", side_effect=requests.Timeout("Connection timeout")) as mock_get,
patch("time.sleep") as mock_sleep,
): # Mock sleep to speed up test
content = downloader.download()
@@ -143,9 +141,7 @@ def test_custom_max_retries():
downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=5)
with (
patch(
"requests.get", side_effect=requests.Timeout("Connection timeout")
) as mock_get,
patch("requests.get", side_effect=requests.Timeout("Connection timeout")) as mock_get,
patch("time.sleep"),
):
content = downloader.download()
@@ -203,7 +199,9 @@ def test_is_markdown_rejects_html_doctype():
"""Test that HTML with DOCTYPE is rejected (prevents redirect trap)"""
downloader = LlmsTxtDownloader("https://example.com/llms.txt")
html = "<!DOCTYPE html><html><head><title>Product Page</title></head><body>Content</body></html>"
html = (
"<!DOCTYPE html><html><head><title>Product Page</title></head><body>Content</body></html>"
)
assert not downloader._is_markdown(html)
# Test case-insensitive
@@ -230,9 +228,7 @@ def test_is_markdown_rejects_html_meta():
html_with_head = "<head><title>Page</title></head><body>Content</body>"
assert not downloader._is_markdown(html_with_head)
html_with_meta = (
'<meta charset="utf-8"><meta name="viewport" content="width=device-width">'
)
html_with_meta = '<meta charset="utf-8"><meta name="viewport" content="width=device-width">'
assert not downloader._is_markdown(html_with_meta)
@@ -244,9 +240,7 @@ def test_is_markdown_accepts_markdown_with_html_words():
assert downloader._is_markdown(markdown)
# Test with actual markdown patterns
markdown_with_code = (
"# HTML Tutorial\n\n```html\n<div>example</div>\n```\n\n## More content"
)
markdown_with_code = "# HTML Tutorial\n\n```html\n<div>example</div>\n```\n\n## More content"
assert downloader._is_markdown(markdown_with_code)
@@ -255,9 +249,7 @@ def test_html_detection_only_scans_first_500_chars():
downloader = LlmsTxtDownloader("https://example.com/llms.txt")
# HTML tag after 500 chars should not be detected
safe_markdown = (
"# Header\n\n" + ("Valid markdown content. " * 50) + "\n\n<!DOCTYPE html>"
)
safe_markdown = "# Header\n\n" + ("Valid markdown content. " * 50) + "\n\n<!DOCTYPE html>"
# This should pass because <!DOCTYPE html> is beyond first 500 chars
if len(safe_markdown[:500]) < len("<!DOCTYPE html>"):
# If the HTML is within 500 chars, adjust test
@@ -294,9 +286,7 @@ def test_download_rejects_html_redirect():
mock_response = Mock()
# Simulate server returning HTML instead of markdown
mock_response.text = (
"<!DOCTYPE html><html><body><h1>Product Page</h1></body></html>"
)
mock_response.text = "<!DOCTYPE html><html><body><h1>Product Page</h1></body></html>"
mock_response.raise_for_status = Mock()
with patch("requests.get", return_value=mock_response):

View File

@@ -73,9 +73,7 @@ class TestSkipLlmsTxtSyncBehavior(unittest.TestCase):
converter = DocToSkillConverter(config, dry_run=False)
with (
patch.object(
converter, "_try_llms_txt", return_value=False
) as mock_try,
patch.object(converter, "_try_llms_txt", return_value=False) as mock_try,
patch.object(converter, "scrape_page"),
patch.object(converter, "save_summary"),
):
@@ -154,9 +152,7 @@ class TestSkipLlmsTxtAsyncBehavior(unittest.TestCase):
converter = DocToSkillConverter(config, dry_run=False)
with (
patch.object(
converter, "_try_llms_txt", return_value=False
) as mock_try,
patch.object(converter, "_try_llms_txt", return_value=False) as mock_try,
patch.object(converter, "scrape_page_async", return_value=None),
patch.object(converter, "save_summary"),
):
@@ -252,9 +248,7 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase):
with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm:
converter = DocToSkillConverter(config, dry_run=True)
self.assertFalse(converter.skip_llms_txt)
self.assertTrue(
any("Invalid value" in log and "0" in log for log in cm.output)
)
self.assertTrue(any("Invalid value" in log and "0" in log for log in cm.output))
def test_skip_llms_txt_with_int_one_logs_warning(self):
"""Test that integer 1 logs warning and defaults to False."""
@@ -268,9 +262,7 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase):
with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm:
converter = DocToSkillConverter(config, dry_run=True)
self.assertFalse(converter.skip_llms_txt)
self.assertTrue(
any("Invalid value" in log and "1" in log for log in cm.output)
)
self.assertTrue(any("Invalid value" in log and "1" in log for log in cm.output))
def test_skip_llms_txt_with_string_logs_warning(self):
"""Test that string values log warning and default to False."""
@@ -284,9 +276,7 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase):
with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm:
converter = DocToSkillConverter(config, dry_run=True)
self.assertFalse(converter.skip_llms_txt)
self.assertTrue(
any("Invalid value" in log and "true" in log for log in cm.output)
)
self.assertTrue(any("Invalid value" in log and "true" in log for log in cm.output))
def test_skip_llms_txt_with_none_logs_warning(self):
"""Test that None logs warning and defaults to False."""
@@ -300,9 +290,7 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase):
with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm:
converter = DocToSkillConverter(config, dry_run=True)
self.assertFalse(converter.skip_llms_txt)
self.assertTrue(
any("Invalid value" in log and "None" in log for log in cm.output)
)
self.assertTrue(any("Invalid value" in log and "None" in log for log in cm.output))
def test_scraping_proceeds_when_llms_txt_skipped(self):
"""Test that HTML scraping proceeds normally when llms.txt is skipped."""