fix: Auto-fetch preset configs from API when not found locally

Fixes #264

Users reported that preset configs (react.json, godot.json, etc.) were not
found after installing via pip/uv, causing immediate failure on first use.

Solution: Instead of bundling configs in the package, the CLI now automatically
fetches missing configs from the SkillSeekersWeb.com API.

Changes:
- Created config_fetcher.py with smart config resolution:
  1. Check local path (backward compatible)
  2. Check with configs/ prefix
  3. Auto-fetch from SkillSeekersWeb.com API (new!)
- Updated doc_scraper.py to use ConfigValidator (supports unified configs)
- Added 15 comprehensive tests for auto-fetch functionality

User Experience:
- Zero configuration needed - presets work immediately after install
- Better error messages showing available configs from API
- Downloaded configs are cached locally for future use
- Fully backward compatible with existing local configs

Testing:
- 15 new unit tests (all passing)
- 2 integration tests with real API
- Full test suite: 1387 tests passing
- No breaking changes

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
yusyus
2026-01-27 21:41:20 +03:00
parent 6aec0e3688
commit 746e335fae
3 changed files with 572 additions and 21 deletions

View File

@@ -0,0 +1,184 @@
"""
Config fetcher for CLI - synchronous wrapper around API fetch.
Provides automatic config downloading from SkillSeekersWeb.com API
when local config files are not found.
"""
import json
import logging
from pathlib import Path
from typing import Optional
import httpx
logger = logging.getLogger(__name__)
API_BASE_URL = "https://api.skillseekersweb.com"
def fetch_config_from_api(
config_name: str, destination: str = "configs", timeout: float = 30.0
) -> Optional[Path]:
"""
Fetch a config file from the SkillSeekersWeb.com API.
Args:
config_name: Name of config to download (e.g., 'react', 'godot')
destination: Directory to save config file (default: 'configs')
timeout: Request timeout in seconds (default: 30.0)
Returns:
Path to downloaded config file, or None if fetch failed
Example:
>>> config_path = fetch_config_from_api('react')
>>> if config_path:
... print(f"Downloaded to {config_path}")
"""
# Normalize config name (remove .json if present)
if config_name.endswith(".json"):
config_name = config_name[:-5]
# Remove 'configs/' prefix if present
if config_name.startswith("configs/"):
config_name = config_name[8:]
try:
with httpx.Client(timeout=timeout) as client:
# Get config details first
detail_url = f"{API_BASE_URL}/api/configs/{config_name}"
logger.info(f"🔍 Checking API for config: {config_name}")
detail_response = client.get(detail_url)
if detail_response.status_code == 404:
logger.warning(f"⚠️ Config '{config_name}' not found on API")
return None
detail_response.raise_for_status()
config_info = detail_response.json()
# Download the actual config file using download_url from API response
download_url = config_info.get("download_url")
if not download_url:
logger.error(
f"❌ Config '{config_name}' has no download_url. Contact support."
)
return None
logger.info(f"📥 Downloading config from API...")
download_response = client.get(download_url)
download_response.raise_for_status()
config_data = download_response.json()
# Save to destination
dest_path = Path(destination)
dest_path.mkdir(parents=True, exist_ok=True)
config_file = dest_path / f"{config_name}.json"
with open(config_file, "w", encoding="utf-8") as f:
json.dump(config_data, f, indent=2)
logger.info(f"✅ Config downloaded successfully: {config_file}")
logger.info(
f" Category: {config_info.get('category', 'uncategorized')}"
)
logger.info(f" Type: {config_info.get('type', 'unknown')}")
return config_file
except httpx.HTTPError as e:
logger.warning(f"⚠️ HTTP Error fetching config: {e}")
return None
except json.JSONDecodeError as e:
logger.warning(f"⚠️ Invalid JSON response from API: {e}")
return None
except Exception as e:
logger.warning(f"⚠️ Error fetching config: {e}")
return None
def list_available_configs(category: Optional[str] = None, timeout: float = 30.0) -> list[str]:
"""
List all available configs from the API.
Args:
category: Filter by category (optional)
timeout: Request timeout in seconds (default: 30.0)
Returns:
List of available config names
Example:
>>> configs = list_available_configs()
>>> print(f"Available: {', '.join(configs)}")
"""
try:
with httpx.Client(timeout=timeout) as client:
list_url = f"{API_BASE_URL}/api/configs"
params = {}
if category:
params["category"] = category
response = client.get(list_url, params=params)
response.raise_for_status()
data = response.json()
configs = data.get("configs", [])
return [cfg.get("name") for cfg in configs if cfg.get("name")]
except Exception:
return []
def resolve_config_path(config_path: str, auto_fetch: bool = True) -> Optional[Path]:
"""
Resolve config path with automatic API fallback.
Tries to find config in this order:
1. Exact path as provided
2. With 'configs/' prefix added
3. Fetch from API (if auto_fetch=True)
Args:
config_path: Config file path or name
auto_fetch: Automatically fetch from API if not found locally (default: True)
Returns:
Path to config file, or None if not found
Example:
>>> path = resolve_config_path('react.json')
>>> if path:
... with open(path) as f:
... config = json.load(f)
"""
# 1. Try exact path
exact_path = Path(config_path)
if exact_path.exists():
return exact_path.resolve()
# 2. Try with configs/ prefix
if not config_path.startswith("configs/"):
with_prefix = Path("configs") / config_path
if with_prefix.exists():
return with_prefix.resolve()
# 3. Try API fetch (if enabled)
if auto_fetch:
# Extract config name (remove .json, remove configs/ prefix)
config_name = config_path
if config_name.endswith(".json"):
config_name = config_name[:-5]
if config_name.startswith("configs/"):
config_name = config_name[8:]
logger.info(
f"\n💡 Config not found locally, attempting to fetch from SkillSeekersWeb.com API..."
)
fetched_path = fetch_config_from_api(config_name, destination="configs")
if fetched_path and fetched_path.exists():
return fetched_path.resolve()
return None

View File

@@ -30,6 +30,8 @@ from bs4 import BeautifulSoup
# Add parent directory to path for imports when run as script
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from skill_seekers.cli.config_fetcher import list_available_configs, resolve_config_path
from skill_seekers.cli.config_validator import ConfigValidator
from skill_seekers.cli.constants import (
CONTENT_PREVIEW_LENGTH,
DEFAULT_ASYNC_MODE,
@@ -1751,6 +1753,8 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]:
def load_config(config_path: str) -> dict[str, Any]:
"""Load and validate configuration from JSON file.
Automatically fetches configs from SkillSeekersWeb.com API if not found locally.
Args:
config_path (str): Path to JSON configuration file
@@ -1765,36 +1769,56 @@ def load_config(config_path: str) -> dict[str, Any]:
>>> print(config['name'])
'react'
"""
# Try to resolve config path (with auto-fetch from API)
resolved_path = resolve_config_path(config_path, auto_fetch=True)
if resolved_path is None:
# Config not found locally and fetch failed
available = list_available_configs()
logger.error("❌ Error: Config file not found: %s", config_path)
logger.error(" Tried:")
logger.error(" 1. Local path: %s", config_path)
logger.error(" 2. With prefix: configs/%s", config_path)
logger.error(" 3. SkillSeekersWeb.com API")
logger.error("")
if available:
logger.error(" 📋 Available configs from API (%d total):", len(available))
for cfg in available[:10]: # Show first 10
logger.error("%s", cfg)
if len(available) > 10:
logger.error(" ... and %d more", len(available) - 10)
logger.error("")
logger.error(" 💡 Use any config name: skill-seekers scrape --config <name>.json")
logger.error(" 🌐 Browse all: https://skillseekersweb.com/")
else:
logger.error(" ⚠️ Could not connect to API to list available configs")
logger.error(" 🌐 Visit: https://skillseekersweb.com/ for available configs")
sys.exit(1)
# Load the resolved config file
try:
with open(config_path, encoding="utf-8") as f:
with open(resolved_path, encoding="utf-8") as f:
config = json.load(f)
except json.JSONDecodeError as e:
logger.error("❌ Error: Invalid JSON in config file: %s", config_path)
logger.error("❌ Error: Invalid JSON in config file: %s", resolved_path)
logger.error(" Details: %s", e)
logger.error(" Suggestion: Check syntax at line %d, column %d", e.lineno, e.colno)
sys.exit(1)
except FileNotFoundError:
logger.error("❌ Error: Config file not found: %s", config_path)
logger.error(" Suggestion: Create a config file or use an existing one from configs/")
logger.error(" Available configs: react.json, vue.json, django.json, godot.json")
sys.exit(1)
# Validate config
errors, warnings = validate_config(config)
# Validate config using ConfigValidator (supports both unified and legacy formats)
try:
validator = ConfigValidator(config)
validator.validate()
# Show warnings (non-blocking)
if warnings:
logger.warning("⚠️ Configuration warnings in %s:", config_path)
for warning in warnings:
logger.warning(" - %s", warning)
logger.info("")
# Show errors (blocking)
if errors:
# Log config type
if validator.is_unified:
logger.debug("✓ Unified config format detected")
else:
logger.debug("✓ Legacy config format detected")
except ValueError as e:
logger.error("❌ Configuration validation errors in %s:", config_path)
for error in errors:
logger.error(" - %s", error)
logger.error("\n Suggestion: Fix the above errors or check configs/ for working examples")
logger.error(" %s", str(e))
logger.error("\n Suggestion: Fix the above errors or check https://skillseekersweb.com/ for examples")
sys.exit(1)
return config