fix: unified scraper temp config uses unified format for doc_scraper (#317)
The unified scraper's _scrape_documentation() was creating temp configs in flat/legacy format (no "sources" key), causing doc_scraper's ConfigValidator to reject them. Wrap the temp config in unified format with a "sources" array. Also remove dead code branches and fix a pre-existing test that didn't clear GITHUB_TOKEN from env. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -224,7 +224,11 @@ class TestScrapeDocumentation:
|
||||
mock_run.return_value = MagicMock(returncode=1, stdout="", stderr="")
|
||||
scraper._scrape_documentation(source)
|
||||
|
||||
assert any("llms_txt_url" in c for c in written_configs)
|
||||
assert any(
|
||||
"llms_txt_url" in s
|
||||
for c in written_configs
|
||||
for s in c.get("sources", [c])
|
||||
)
|
||||
|
||||
def test_start_urls_forwarded_to_doc_config(self, tmp_path):
|
||||
"""start_urls from source is forwarded to the temporary doc config."""
|
||||
@@ -247,7 +251,11 @@ class TestScrapeDocumentation:
|
||||
mock_run.return_value = MagicMock(returncode=1, stdout="", stderr="")
|
||||
scraper._scrape_documentation(source)
|
||||
|
||||
assert any("start_urls" in c for c in written_configs)
|
||||
assert any(
|
||||
"start_urls" in s
|
||||
for c in written_configs
|
||||
for s in c.get("sources", [c])
|
||||
)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
|
||||
Reference in New Issue
Block a user