fix: Auto-fetch preset configs from API when not found locally

Fixes #264

Users reported that preset configs (react.json, godot.json, etc.) were not
found after installing via pip/uv, causing immediate failure on first use.

Solution: Instead of bundling configs in the package, the CLI now automatically
fetches missing configs from the SkillSeekersWeb.com API.

Changes:
- Created config_fetcher.py with smart config resolution:
  1. Check local path (backward compatible)
  2. Check with configs/ prefix
  3. Auto-fetch from SkillSeekersWeb.com API (new!)
- Updated doc_scraper.py to use ConfigValidator (supports unified configs)
- Added 15 comprehensive tests for auto-fetch functionality

User Experience:
- Zero configuration needed - presets work immediately after install
- Better error messages showing available configs from API
- Downloaded configs are cached locally for future use
- Fully backward compatible with existing local configs

Testing:
- 15 new unit tests (all passing)
- 2 integration tests with real API
- Full test suite: 1387 tests passing
- No breaking changes

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
yusyus
2026-01-27 21:41:20 +03:00
parent 6aec0e3688
commit 746e335fae
3 changed files with 572 additions and 21 deletions

View File

@@ -0,0 +1,184 @@
"""
Config fetcher for CLI - synchronous wrapper around API fetch.
Provides automatic config downloading from SkillSeekersWeb.com API
when local config files are not found.
"""
import json
import logging
from pathlib import Path
from typing import Optional
import httpx
logger = logging.getLogger(__name__)
API_BASE_URL = "https://api.skillseekersweb.com"
def fetch_config_from_api(
config_name: str, destination: str = "configs", timeout: float = 30.0
) -> Optional[Path]:
"""
Fetch a config file from the SkillSeekersWeb.com API.
Args:
config_name: Name of config to download (e.g., 'react', 'godot')
destination: Directory to save config file (default: 'configs')
timeout: Request timeout in seconds (default: 30.0)
Returns:
Path to downloaded config file, or None if fetch failed
Example:
>>> config_path = fetch_config_from_api('react')
>>> if config_path:
... print(f"Downloaded to {config_path}")
"""
# Normalize config name (remove .json if present)
if config_name.endswith(".json"):
config_name = config_name[:-5]
# Remove 'configs/' prefix if present
if config_name.startswith("configs/"):
config_name = config_name[8:]
try:
with httpx.Client(timeout=timeout) as client:
# Get config details first
detail_url = f"{API_BASE_URL}/api/configs/{config_name}"
logger.info(f"🔍 Checking API for config: {config_name}")
detail_response = client.get(detail_url)
if detail_response.status_code == 404:
logger.warning(f"⚠️ Config '{config_name}' not found on API")
return None
detail_response.raise_for_status()
config_info = detail_response.json()
# Download the actual config file using download_url from API response
download_url = config_info.get("download_url")
if not download_url:
logger.error(
f"❌ Config '{config_name}' has no download_url. Contact support."
)
return None
logger.info(f"📥 Downloading config from API...")
download_response = client.get(download_url)
download_response.raise_for_status()
config_data = download_response.json()
# Save to destination
dest_path = Path(destination)
dest_path.mkdir(parents=True, exist_ok=True)
config_file = dest_path / f"{config_name}.json"
with open(config_file, "w", encoding="utf-8") as f:
json.dump(config_data, f, indent=2)
logger.info(f"✅ Config downloaded successfully: {config_file}")
logger.info(
f" Category: {config_info.get('category', 'uncategorized')}"
)
logger.info(f" Type: {config_info.get('type', 'unknown')}")
return config_file
except httpx.HTTPError as e:
logger.warning(f"⚠️ HTTP Error fetching config: {e}")
return None
except json.JSONDecodeError as e:
logger.warning(f"⚠️ Invalid JSON response from API: {e}")
return None
except Exception as e:
logger.warning(f"⚠️ Error fetching config: {e}")
return None
def list_available_configs(category: Optional[str] = None, timeout: float = 30.0) -> list[str]:
"""
List all available configs from the API.
Args:
category: Filter by category (optional)
timeout: Request timeout in seconds (default: 30.0)
Returns:
List of available config names
Example:
>>> configs = list_available_configs()
>>> print(f"Available: {', '.join(configs)}")
"""
try:
with httpx.Client(timeout=timeout) as client:
list_url = f"{API_BASE_URL}/api/configs"
params = {}
if category:
params["category"] = category
response = client.get(list_url, params=params)
response.raise_for_status()
data = response.json()
configs = data.get("configs", [])
return [cfg.get("name") for cfg in configs if cfg.get("name")]
except Exception:
return []
def resolve_config_path(config_path: str, auto_fetch: bool = True) -> Optional[Path]:
"""
Resolve config path with automatic API fallback.
Tries to find config in this order:
1. Exact path as provided
2. With 'configs/' prefix added
3. Fetch from API (if auto_fetch=True)
Args:
config_path: Config file path or name
auto_fetch: Automatically fetch from API if not found locally (default: True)
Returns:
Path to config file, or None if not found
Example:
>>> path = resolve_config_path('react.json')
>>> if path:
... with open(path) as f:
... config = json.load(f)
"""
# 1. Try exact path
exact_path = Path(config_path)
if exact_path.exists():
return exact_path.resolve()
# 2. Try with configs/ prefix
if not config_path.startswith("configs/"):
with_prefix = Path("configs") / config_path
if with_prefix.exists():
return with_prefix.resolve()
# 3. Try API fetch (if enabled)
if auto_fetch:
# Extract config name (remove .json, remove configs/ prefix)
config_name = config_path
if config_name.endswith(".json"):
config_name = config_name[:-5]
if config_name.startswith("configs/"):
config_name = config_name[8:]
logger.info(
f"\n💡 Config not found locally, attempting to fetch from SkillSeekersWeb.com API..."
)
fetched_path = fetch_config_from_api(config_name, destination="configs")
if fetched_path and fetched_path.exists():
return fetched_path.resolve()
return None

View File

@@ -30,6 +30,8 @@ from bs4 import BeautifulSoup
# Add parent directory to path for imports when run as script
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from skill_seekers.cli.config_fetcher import list_available_configs, resolve_config_path
from skill_seekers.cli.config_validator import ConfigValidator
from skill_seekers.cli.constants import (
CONTENT_PREVIEW_LENGTH,
DEFAULT_ASYNC_MODE,
@@ -1751,6 +1753,8 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]:
def load_config(config_path: str) -> dict[str, Any]:
"""Load and validate configuration from JSON file.
Automatically fetches configs from SkillSeekersWeb.com API if not found locally.
Args:
config_path (str): Path to JSON configuration file
@@ -1765,36 +1769,56 @@ def load_config(config_path: str) -> dict[str, Any]:
>>> print(config['name'])
'react'
"""
# Try to resolve config path (with auto-fetch from API)
resolved_path = resolve_config_path(config_path, auto_fetch=True)
if resolved_path is None:
# Config not found locally and fetch failed
available = list_available_configs()
logger.error("❌ Error: Config file not found: %s", config_path)
logger.error(" Tried:")
logger.error(" 1. Local path: %s", config_path)
logger.error(" 2. With prefix: configs/%s", config_path)
logger.error(" 3. SkillSeekersWeb.com API")
logger.error("")
if available:
logger.error(" 📋 Available configs from API (%d total):", len(available))
for cfg in available[:10]: # Show first 10
logger.error("%s", cfg)
if len(available) > 10:
logger.error(" ... and %d more", len(available) - 10)
logger.error("")
logger.error(" 💡 Use any config name: skill-seekers scrape --config <name>.json")
logger.error(" 🌐 Browse all: https://skillseekersweb.com/")
else:
logger.error(" ⚠️ Could not connect to API to list available configs")
logger.error(" 🌐 Visit: https://skillseekersweb.com/ for available configs")
sys.exit(1)
# Load the resolved config file
try:
with open(config_path, encoding="utf-8") as f:
with open(resolved_path, encoding="utf-8") as f:
config = json.load(f)
except json.JSONDecodeError as e:
logger.error("❌ Error: Invalid JSON in config file: %s", config_path)
logger.error("❌ Error: Invalid JSON in config file: %s", resolved_path)
logger.error(" Details: %s", e)
logger.error(" Suggestion: Check syntax at line %d, column %d", e.lineno, e.colno)
sys.exit(1)
except FileNotFoundError:
logger.error("❌ Error: Config file not found: %s", config_path)
logger.error(" Suggestion: Create a config file or use an existing one from configs/")
logger.error(" Available configs: react.json, vue.json, django.json, godot.json")
sys.exit(1)
# Validate config
errors, warnings = validate_config(config)
# Validate config using ConfigValidator (supports both unified and legacy formats)
try:
validator = ConfigValidator(config)
validator.validate()
# Show warnings (non-blocking)
if warnings:
logger.warning("⚠️ Configuration warnings in %s:", config_path)
for warning in warnings:
logger.warning(" - %s", warning)
logger.info("")
# Show errors (blocking)
if errors:
# Log config type
if validator.is_unified:
logger.debug("✓ Unified config format detected")
else:
logger.debug("✓ Legacy config format detected")
except ValueError as e:
logger.error("❌ Configuration validation errors in %s:", config_path)
for error in errors:
logger.error(" - %s", error)
logger.error("\n Suggestion: Fix the above errors or check configs/ for working examples")
logger.error(" %s", str(e))
logger.error("\n Suggestion: Fix the above errors or check https://skillseekersweb.com/ for examples")
sys.exit(1)
return config

View File

@@ -0,0 +1,343 @@
"""Tests for config_fetcher module - automatic API config downloading."""
import json
from pathlib import Path
from unittest.mock import Mock, patch
import httpx
import pytest
from skill_seekers.cli.config_fetcher import (
fetch_config_from_api,
list_available_configs,
resolve_config_path,
)
class TestFetchConfigFromApi:
"""Tests for fetch_config_from_api function."""
@patch("skill_seekers.cli.config_fetcher.httpx.Client")
def test_successful_fetch(self, mock_client_class, tmp_path):
"""Test successful config download from API."""
# Mock API responses
mock_client = Mock()
mock_client_class.return_value.__enter__.return_value = mock_client
# Mock detail response
detail_response = Mock()
detail_response.status_code = 200
detail_response.json.return_value = {
"name": "react",
"download_url": "https://api.skillseekersweb.com/api/configs/react/download",
"category": "web-frameworks",
"type": "unified",
}
detail_response.raise_for_status = Mock()
# Mock download response
download_response = Mock()
download_response.json.return_value = {
"name": "react",
"description": "React documentation skill",
"base_url": "https://react.dev/",
}
download_response.raise_for_status = Mock()
# Setup mock to return different responses for different URLs
def get_side_effect(url, *args, **kwargs):
if "download" in url:
return download_response
return detail_response
mock_client.get.side_effect = get_side_effect
# Test fetch
destination = str(tmp_path)
result = fetch_config_from_api("react", destination=destination)
# Verify
assert result is not None
assert result.exists()
assert result.name == "react.json"
# Verify file contents
with open(result) as f:
config = json.load(f)
assert config["name"] == "react"
assert "description" in config
@patch("skill_seekers.cli.config_fetcher.httpx.Client")
def test_config_not_found(self, mock_client_class):
"""Test handling of 404 response."""
mock_client = Mock()
mock_client_class.return_value.__enter__.return_value = mock_client
# Mock 404 response
detail_response = Mock()
detail_response.status_code = 404
mock_client.get.return_value = detail_response
result = fetch_config_from_api("nonexistent")
assert result is None
@patch("skill_seekers.cli.config_fetcher.httpx.Client")
def test_no_download_url(self, mock_client_class):
"""Test handling of missing download_url."""
mock_client = Mock()
mock_client_class.return_value.__enter__.return_value = mock_client
# Mock response without download_url
detail_response = Mock()
detail_response.status_code = 200
detail_response.json.return_value = {"name": "test"}
detail_response.raise_for_status = Mock()
mock_client.get.return_value = detail_response
result = fetch_config_from_api("test")
assert result is None
@patch("skill_seekers.cli.config_fetcher.httpx.Client")
def test_http_error(self, mock_client_class):
"""Test handling of HTTP errors."""
mock_client = Mock()
mock_client_class.return_value.__enter__.return_value = mock_client
# Mock HTTP error
mock_client.get.side_effect = httpx.HTTPError("Connection failed")
result = fetch_config_from_api("react")
assert result is None
@patch("skill_seekers.cli.config_fetcher.httpx.Client")
def test_json_decode_error(self, mock_client_class):
"""Test handling of invalid JSON response."""
mock_client = Mock()
mock_client_class.return_value.__enter__.return_value = mock_client
# Mock response with invalid JSON
detail_response = Mock()
detail_response.status_code = 200
detail_response.json.side_effect = json.JSONDecodeError("Invalid", "", 0)
detail_response.raise_for_status = Mock()
mock_client.get.return_value = detail_response
result = fetch_config_from_api("react")
assert result is None
def test_normalize_config_name(self, tmp_path):
"""Test config name normalization (remove .json, remove configs/ prefix)."""
with patch("skill_seekers.cli.config_fetcher.httpx.Client") as mock_client_class:
mock_client = Mock()
mock_client_class.return_value.__enter__.return_value = mock_client
detail_response = Mock()
detail_response.status_code = 200
detail_response.json.return_value = {
"download_url": "https://api.example.com/download"
}
detail_response.raise_for_status = Mock()
download_response = Mock()
download_response.json.return_value = {"name": "test"}
download_response.raise_for_status = Mock()
def get_side_effect(url, *args, **kwargs):
if "download" in url:
return download_response
return detail_response
mock_client.get.side_effect = get_side_effect
destination = str(tmp_path)
# Test with .json extension
result1 = fetch_config_from_api("test.json", destination=destination)
assert result1 is not None
assert result1.name == "test.json"
# Test with configs/ prefix
result2 = fetch_config_from_api("configs/test", destination=destination)
assert result2 is not None
class TestListAvailableConfigs:
"""Tests for list_available_configs function."""
@patch("skill_seekers.cli.config_fetcher.httpx.Client")
def test_successful_list(self, mock_client_class):
"""Test successful config listing."""
mock_client = Mock()
mock_client_class.return_value.__enter__.return_value = mock_client
# Mock API response
response = Mock()
response.json.return_value = {
"configs": [
{"name": "react"},
{"name": "vue"},
{"name": "godot"},
],
"total": 3,
}
response.raise_for_status = Mock()
mock_client.get.return_value = response
result = list_available_configs()
assert len(result) == 3
assert "react" in result
assert "vue" in result
assert "godot" in result
@patch("skill_seekers.cli.config_fetcher.httpx.Client")
def test_category_filter(self, mock_client_class):
"""Test listing with category filter."""
mock_client = Mock()
mock_client_class.return_value.__enter__.return_value = mock_client
response = Mock()
response.json.return_value = {
"configs": [{"name": "react"}, {"name": "vue"}],
"total": 2,
}
response.raise_for_status = Mock()
mock_client.get.return_value = response
result = list_available_configs(category="web-frameworks")
assert len(result) == 2
# Verify category parameter was passed
mock_client.get.assert_called_once()
call_args = mock_client.get.call_args
assert "params" in call_args.kwargs
assert call_args.kwargs["params"]["category"] == "web-frameworks"
@patch("skill_seekers.cli.config_fetcher.httpx.Client")
def test_api_error(self, mock_client_class):
"""Test handling of API errors."""
mock_client = Mock()
mock_client_class.return_value.__enter__.return_value = mock_client
# Mock error
mock_client.get.side_effect = httpx.HTTPError("Connection failed")
result = list_available_configs()
assert result == []
class TestResolveConfigPath:
"""Tests for resolve_config_path function."""
def test_exact_path_exists(self, tmp_path):
"""Test resolution when exact path exists."""
# Create test config file
config_file = tmp_path / "test.json"
config_file.write_text('{"name": "test"}')
result = resolve_config_path(str(config_file), auto_fetch=False)
assert result is not None
assert result.exists()
assert result.name == "test.json"
def test_with_configs_prefix(self, tmp_path):
"""Test resolution with configs/ prefix."""
# Create configs directory and file
configs_dir = tmp_path / "configs"
configs_dir.mkdir()
config_file = configs_dir / "test.json"
config_file.write_text('{"name": "test"}')
# Change to tmp_path for relative path testing
import os
original_cwd = os.getcwd()
try:
os.chdir(tmp_path)
result = resolve_config_path("test.json", auto_fetch=False)
assert result is not None
assert result.exists()
assert result.name == "test.json"
finally:
os.chdir(original_cwd)
def test_auto_fetch_disabled(self):
"""Test that auto-fetch doesn't run when disabled."""
result = resolve_config_path("nonexistent.json", auto_fetch=False)
assert result is None
@patch("skill_seekers.cli.config_fetcher.fetch_config_from_api")
def test_auto_fetch_enabled(self, mock_fetch, tmp_path):
"""Test that auto-fetch runs when enabled."""
# Mock fetch to return a path
mock_config = tmp_path / "configs" / "react.json"
mock_config.parent.mkdir(exist_ok=True)
mock_config.write_text('{"name": "react"}')
mock_fetch.return_value = mock_config
result = resolve_config_path("react.json", auto_fetch=True)
# Verify fetch was called
mock_fetch.assert_called_once_with("react", destination="configs")
assert result is not None
assert result.exists()
@patch("skill_seekers.cli.config_fetcher.fetch_config_from_api")
def test_auto_fetch_failed(self, mock_fetch):
"""Test handling when auto-fetch fails."""
# Mock fetch to return None (failed)
mock_fetch.return_value = None
result = resolve_config_path("nonexistent.json", auto_fetch=True)
assert result is None
def test_config_name_normalization(self, tmp_path):
"""Test various config name formats."""
configs_dir = tmp_path / "configs"
configs_dir.mkdir()
config_file = configs_dir / "react.json"
config_file.write_text('{"name": "react"}')
import os
original_cwd = os.getcwd()
try:
os.chdir(tmp_path)
# All of these should resolve to the same file
test_cases = ["react.json", "configs/react.json"]
for config_name in test_cases:
result = resolve_config_path(config_name, auto_fetch=False)
assert result is not None, f"Failed for {config_name}"
assert result.exists()
assert result.name == "react.json"
finally:
os.chdir(original_cwd)
@pytest.mark.integration
class TestConfigFetcherIntegration:
"""Integration tests that hit real API (marked as integration)."""
def test_fetch_real_config(self, tmp_path):
"""Test fetching a real config from API."""
destination = str(tmp_path)
result = fetch_config_from_api("godot", destination=destination, timeout=10.0)
if result: # Only assert if fetch succeeded (API might be down)
assert result.exists()
assert result.name == "godot.json"
with open(result) as f:
config = json.load(f)
assert config["name"] == "godot"
assert "description" in config
def test_list_real_configs(self):
"""Test listing real configs from API."""
result = list_available_configs(timeout=10.0)
if result: # Only assert if API is available
assert len(result) > 0
assert isinstance(result, list)
assert all(isinstance(cfg, str) for cfg in result)