fix: unified scraper temp config uses unified format for doc_scraper (#317)

The unified scraper's _scrape_documentation() was creating temp configs
in flat/legacy format (no "sources" key), causing doc_scraper's
ConfigValidator to reject them. Wrap the temp config in unified format
with a "sources" array. Also remove dead code branches and fix a
pre-existing test that didn't clear GITHUB_TOKEN from env.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
yusyus
2026-03-20 22:35:12 +03:00
parent 4f87de6b56
commit f6131c6798
4 changed files with 29 additions and 15 deletions

View File

@@ -68,7 +68,12 @@ class TestGitHubScraperInitialization(unittest.TestCase):
"github_token": "test_token_123",
}
with patch("skill_seekers.cli.github_scraper.Github") as mock_github:
# Clear GITHUB_TOKEN env var so config token is used (env takes priority)
env = {k: v for k, v in os.environ.items() if k != "GITHUB_TOKEN"}
with (
patch.dict(os.environ, env, clear=True),
patch("skill_seekers.cli.github_scraper.Github") as mock_github,
):
_scraper = self.GitHubScraper(config)
mock_github.assert_called_once_with("test_token_123")

View File

@@ -224,7 +224,11 @@ class TestScrapeDocumentation:
mock_run.return_value = MagicMock(returncode=1, stdout="", stderr="")
scraper._scrape_documentation(source)
assert any("llms_txt_url" in c for c in written_configs)
assert any(
"llms_txt_url" in s
for c in written_configs
for s in c.get("sources", [c])
)
def test_start_urls_forwarded_to_doc_config(self, tmp_path):
"""start_urls from source is forwarded to the temporary doc config."""
@@ -247,7 +251,11 @@ class TestScrapeDocumentation:
mock_run.return_value = MagicMock(returncode=1, stdout="", stderr="")
scraper._scrape_documentation(source)
assert any("start_urls" in c for c in written_configs)
assert any(
"start_urls" in s
for c in written_configs
for s in c.get("sources", [c])
)
# ===========================================================================