fix(#300): centralize selector fallback, fix dry-run link discovery, and smart --config routing
- Add FALLBACK_MAIN_SELECTORS constant and _find_main_content() helper to eliminate 3 duplicated fallback loops in doc_scraper.py - Move link extraction before early return in extract_content() so links are always discovered from the full page, not just main content - Fix single-threaded dry-run to extract links from soup (full page) instead of main element only — fixes reactflow.dev finding only 1 page - Add link extraction to async dry-run path (was completely missing) - Remove main_content from get_configuration() defaults so fallback logic kicks in instead of a broad CSS comma selector matching body - Smart create --config routing: peek at JSON to determine unified (sources array → unified_scraper) vs simple (base_url → doc_scraper) - Update docs/user-guide/02-scraping.md and docs/reference/CONFIG_FORMAT.md to use unified config format (legacy format rejected since v2.11.0) - Fix test_auto_fetch_enabled and test_mcp_validate_legacy_config Closes #300 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -265,16 +265,16 @@ class TestResolveConfigPath:
|
||||
@patch("skill_seekers.cli.config_fetcher.fetch_config_from_api")
|
||||
def test_auto_fetch_enabled(self, mock_fetch, tmp_path):
|
||||
"""Test that auto-fetch runs when enabled."""
|
||||
# Mock fetch to return a path
|
||||
mock_config = tmp_path / "configs" / "react.json"
|
||||
# Use a name that does NOT exist locally (react.json exists in configs/)
|
||||
mock_config = tmp_path / "configs" / "obscure_framework.json"
|
||||
mock_config.parent.mkdir(exist_ok=True)
|
||||
mock_config.write_text('{"name": "react"}')
|
||||
mock_config.write_text('{"name": "obscure_framework"}')
|
||||
mock_fetch.return_value = mock_config
|
||||
|
||||
result = resolve_config_path("react.json", auto_fetch=True)
|
||||
result = resolve_config_path("obscure_framework.json", auto_fetch=True)
|
||||
|
||||
# Verify fetch was called
|
||||
mock_fetch.assert_called_once_with("react", destination="configs")
|
||||
mock_fetch.assert_called_once_with("obscure_framework", destination="configs")
|
||||
assert result is not None
|
||||
assert result.exists()
|
||||
|
||||
|
||||
@@ -67,22 +67,30 @@ async def test_mcp_validate_legacy_config():
|
||||
"""Test that MCP can validate legacy configs"""
|
||||
print("\n✓ Testing MCP validate_config_tool with legacy config...")
|
||||
|
||||
# Use existing legacy config
|
||||
config_path = "configs/react.json"
|
||||
# Create a truly legacy config (no "sources" key — just base_url + selectors)
|
||||
legacy_config = {
|
||||
"name": "test-legacy",
|
||||
"base_url": "https://example.com/",
|
||||
"selectors": {"main_content": "main", "title": "h1", "code_blocks": "pre code"},
|
||||
"url_patterns": {"include": [], "exclude": []},
|
||||
"rate_limit": 0.5,
|
||||
}
|
||||
|
||||
if not Path(config_path).exists():
|
||||
print(f" ⚠️ Skipping: {config_path} not found")
|
||||
return
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
||||
json.dump(legacy_config, f)
|
||||
config_path = f.name
|
||||
|
||||
args = {"config_path": config_path}
|
||||
result = await validate_config_tool(args)
|
||||
try:
|
||||
args = {"config_path": config_path}
|
||||
result = await validate_config_tool(args)
|
||||
|
||||
# Check result
|
||||
text = result[0].text
|
||||
assert "✅" in text, f"Expected success, got: {text}"
|
||||
assert "Legacy" in text, f"Expected legacy format detected, got: {text}"
|
||||
# Legacy configs are rejected since v2.11.0 — validator should detect the format
|
||||
text = result[0].text
|
||||
assert "LEGACY" in text.upper(), f"Expected legacy format detected, got: {text}"
|
||||
|
||||
print(" ✅ MCP correctly validates legacy config")
|
||||
print(" ✅ MCP correctly detects legacy config format")
|
||||
finally:
|
||||
os.unlink(config_path)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not MCP_AVAILABLE, reason="MCP package not installed")
|
||||
|
||||
Reference in New Issue
Block a user