Files
skill-seekers-reference/src/skill_seekers/mcp/tools/source_tools.py
yusyus f1d97facbc fix: Use download_url from API response instead of constructing URL
CRITICAL BUG FIX - Resolves 404 errors when fetching configs from API

Root Cause:
The code was constructing download URLs manually:
  download_url = f"{API_BASE_URL}/api/download/{config_name}.json"

This fails because the API provides download_url in the response, which
may differ from the constructed path (e.g., CDN URLs, version-specific paths).

Solution:
Changed both MCP server implementations to use download_url from API:
  download_url = config_info.get("download_url")

Added validation check for missing download_url field.

Files Modified:
- src/skill_seekers/mcp/tools/source_tools.py (FastMCP server, line 285-297)
- src/skill_seekers/mcp/server_legacy.py (Legacy server, line 1483-1494)

Bug Report:
User reported: skill-seekers install --config godot --unlimited
- API check: /api/configs/godot → 200 OK 
- Download: /api/download/godot.json → 404 Not Found 

After Fix:
- Uses download_url from API response → Works correctly 

Testing:
 All 15 source tools tests pass (test_mcp_fastmcp.py::TestSourceTools)
 All 8 fetch_config tests pass
 test_fetch_config_download_api: PASSED
 test_fetch_config_from_source: PASSED

Impact:
- Fixes config downloads from official API (skillseekersweb.com)
- Fixes config downloads from private Git repositories
- Prevents all future 404 errors from URL construction mismatch
- No breaking changes - fully backward compatible

Related Issue: Bug reported by user when testing Godot skill

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-18 22:25:35 +03:00

829 lines
29 KiB
Python

"""
Source management tools for MCP server.
This module contains tools for managing config sources:
- fetch_config: Fetch configs from API, git URL, or named sources
- submit_config: Submit configs to the community repository
- add_config_source: Register a git repository as a config source
- list_config_sources: List all registered config sources
- remove_config_source: Remove a registered config source
"""
import json
import os
import re
from pathlib import Path
# MCP types (imported conditionally)
try:
from mcp.types import TextContent
MCP_AVAILABLE = True
except ImportError:
# Graceful degradation: Create a simple fallback class for testing
class TextContent:
"""Fallback TextContent for when MCP is not installed"""
def __init__(self, type: str, text: str):
self.type = type
self.text = text
MCP_AVAILABLE = False
import httpx
async def fetch_config_tool(args: dict) -> list[TextContent]:
"""
Fetch config from API, git URL, or named source.
Supports three modes:
1. Named source from registry (highest priority)
2. Direct git URL
3. API (default, backward compatible)
Args:
args: Dictionary containing:
- config_name: Name of config to download (optional for API list mode)
- destination: Directory to save config file (default: "configs")
- list_available: List all available configs from API (default: false)
- category: Filter configs by category when listing (optional)
- git_url: Git repository URL (enables git mode)
- source: Named source from registry (enables named source mode)
- branch: Git branch to use (default: "main")
- token: Authentication token for private repos (optional)
- refresh: Force refresh cached git repository (default: false)
Returns:
List of TextContent with fetch results or config list
"""
from skill_seekers.mcp.git_repo import GitConfigRepo
from skill_seekers.mcp.source_manager import SourceManager
config_name = args.get("config_name")
destination = args.get("destination", "configs")
list_available = args.get("list_available", False)
category = args.get("category")
# Git mode parameters
source_name = args.get("source")
git_url = args.get("git_url")
branch = args.get("branch", "main")
token = args.get("token")
force_refresh = args.get("refresh", False)
try:
# MODE 1: Named Source (highest priority)
if source_name:
if not config_name:
return [
TextContent(
type="text",
text="❌ Error: config_name is required when using source parameter",
)
]
# Get source from registry
source_manager = SourceManager()
try:
source = source_manager.get_source(source_name)
except KeyError as e:
return [TextContent(type="text", text=f"{str(e)}")]
git_url = source["git_url"]
branch = source.get("branch", branch)
token_env = source.get("token_env")
# Get token from environment if not provided
if not token and token_env:
token = os.environ.get(token_env)
# Clone/pull repository
git_repo = GitConfigRepo()
try:
repo_path = git_repo.clone_or_pull(
source_name=source_name,
git_url=git_url,
branch=branch,
token=token,
force_refresh=force_refresh,
)
except Exception as e:
return [TextContent(type="text", text=f"❌ Git error: {str(e)}")]
# Load config from repository
try:
config_data = git_repo.get_config(repo_path, config_name)
except FileNotFoundError as e:
return [TextContent(type="text", text=f"{str(e)}")]
except ValueError as e:
return [TextContent(type="text", text=f"{str(e)}")]
# Save to destination
dest_path = Path(destination)
dest_path.mkdir(parents=True, exist_ok=True)
config_file = dest_path / f"{config_name}.json"
with open(config_file, "w") as f:
json.dump(config_data, f, indent=2)
result = f"""✅ Config fetched from git source successfully!
📦 Config: {config_name}
📂 Saved to: {config_file}
🔗 Source: {source_name}
🌿 Branch: {branch}
📁 Repository: {git_url}
🔄 Refreshed: {"Yes (forced)" if force_refresh else "No (used cache)"}
Next steps:
1. Review config: cat {config_file}
2. Estimate pages: Use estimate_pages tool
3. Scrape docs: Use scrape_docs tool
💡 Manage sources: Use add_config_source, list_config_sources, remove_config_source tools
"""
return [TextContent(type="text", text=result)]
# MODE 2: Direct Git URL
elif git_url:
if not config_name:
return [
TextContent(
type="text",
text="❌ Error: config_name is required when using git_url parameter",
)
]
# Clone/pull repository
git_repo = GitConfigRepo()
source_name_temp = f"temp_{config_name}"
try:
repo_path = git_repo.clone_or_pull(
source_name=source_name_temp,
git_url=git_url,
branch=branch,
token=token,
force_refresh=force_refresh,
)
except ValueError as e:
return [TextContent(type="text", text=f"❌ Invalid git URL: {str(e)}")]
except Exception as e:
return [TextContent(type="text", text=f"❌ Git error: {str(e)}")]
# Load config from repository
try:
config_data = git_repo.get_config(repo_path, config_name)
except FileNotFoundError as e:
return [TextContent(type="text", text=f"{str(e)}")]
except ValueError as e:
return [TextContent(type="text", text=f"{str(e)}")]
# Save to destination
dest_path = Path(destination)
dest_path.mkdir(parents=True, exist_ok=True)
config_file = dest_path / f"{config_name}.json"
with open(config_file, "w") as f:
json.dump(config_data, f, indent=2)
result = f"""✅ Config fetched from git URL successfully!
📦 Config: {config_name}
📂 Saved to: {config_file}
📁 Repository: {git_url}
🌿 Branch: {branch}
🔄 Refreshed: {"Yes (forced)" if force_refresh else "No (used cache)"}
Next steps:
1. Review config: cat {config_file}
2. Estimate pages: Use estimate_pages tool
3. Scrape docs: Use scrape_docs tool
💡 Register this source: Use add_config_source to save for future use
"""
return [TextContent(type="text", text=result)]
# MODE 3: API (existing, backward compatible)
else:
API_BASE_URL = "https://api.skillseekersweb.com"
async with httpx.AsyncClient(timeout=30.0) as client:
# List available configs if requested or no config_name provided
if list_available or not config_name:
# Build API URL with optional category filter
list_url = f"{API_BASE_URL}/api/configs"
params = {}
if category:
params["category"] = category
response = await client.get(list_url, params=params)
response.raise_for_status()
data = response.json()
configs = data.get("configs", [])
total = data.get("total", 0)
filters = data.get("filters")
# Format list output
result = f"📋 Available Configs ({total} total)\n"
if filters:
result += f"🔍 Filters: {filters}\n"
result += "\n"
# Group by category
by_category = {}
for config in configs:
cat = config.get("category", "uncategorized")
if cat not in by_category:
by_category[cat] = []
by_category[cat].append(config)
for cat, cat_configs in sorted(by_category.items()):
result += f"\n**{cat.upper()}** ({len(cat_configs)} configs):\n"
for cfg in cat_configs:
name = cfg.get("name")
desc = cfg.get("description", "")[:60]
config_type = cfg.get("type", "unknown")
tags = ", ".join(cfg.get("tags", [])[:3])
result += f"{name} [{config_type}] - {desc}{'...' if len(cfg.get('description', '')) > 60 else ''}\n"
if tags:
result += f" Tags: {tags}\n"
result += (
"\n💡 To download a config, use: fetch_config with config_name='<name>'\n"
)
result += f"📚 API Docs: {API_BASE_URL}/docs\n"
return [TextContent(type="text", text=result)]
# Download specific config
if not config_name:
return [
TextContent(
type="text",
text="❌ Error: Please provide config_name or set list_available=true",
)
]
# Get config details first
detail_url = f"{API_BASE_URL}/api/configs/{config_name}"
detail_response = await client.get(detail_url)
if detail_response.status_code == 404:
return [
TextContent(
type="text",
text=f"❌ Config '{config_name}' not found. Use list_available=true to see available configs.",
)
]
detail_response.raise_for_status()
config_info = detail_response.json()
# Download the actual config file using the download_url from API response
download_url = config_info.get("download_url")
if not download_url:
return [
TextContent(
type="text",
text=f"❌ Config '{config_name}' has no download_url. Contact support.",
)
]
download_response = await client.get(download_url)
download_response.raise_for_status()
config_data = download_response.json()
# Save to destination
dest_path = Path(destination)
dest_path.mkdir(parents=True, exist_ok=True)
config_file = dest_path / f"{config_name}.json"
with open(config_file, "w") as f:
json.dump(config_data, f, indent=2)
# Build result message
result = f"""✅ Config downloaded successfully!
📦 Config: {config_name}
📂 Saved to: {config_file}
📊 Category: {config_info.get("category", "uncategorized")}
🏷️ Tags: {", ".join(config_info.get("tags", []))}
📄 Type: {config_info.get("type", "unknown")}
📝 Description: {config_info.get("description", "No description")}
🔗 Source: {config_info.get("primary_source", "N/A")}
📏 Max pages: {config_info.get("max_pages", "N/A")}
📦 File size: {config_info.get("file_size", "N/A")} bytes
🕒 Last updated: {config_info.get("last_updated", "N/A")}
Next steps:
1. Review config: cat {config_file}
2. Estimate pages: Use estimate_pages tool
3. Scrape docs: Use scrape_docs tool
💡 More configs: Use list_available=true to see all available configs
"""
return [TextContent(type="text", text=result)]
except httpx.HTTPError as e:
return [
TextContent(
type="text",
text=f"❌ HTTP Error: {str(e)}\n\nCheck your internet connection or try again later.",
)
]
except json.JSONDecodeError as e:
return [
TextContent(type="text", text=f"❌ JSON Error: Invalid response from API: {str(e)}")
]
except Exception as e:
return [TextContent(type="text", text=f"❌ Error: {str(e)}")]
async def submit_config_tool(args: dict) -> list[TextContent]:
"""
Submit a custom config to skill-seekers-configs repository via GitHub issue.
Validates the config (both legacy and unified formats) and creates a GitHub
issue for community review.
Args:
args: Dictionary containing:
- config_path: Path to config JSON file (optional)
- config_json: Config JSON as string (optional, alternative to config_path)
- testing_notes: Notes about testing (optional)
- github_token: GitHub personal access token (optional, can use GITHUB_TOKEN env var)
Returns:
List of TextContent with submission results
"""
try:
from github import Github, GithubException
except ImportError:
return [
TextContent(
type="text",
text="❌ Error: PyGithub not installed.\n\nInstall with: pip install PyGithub",
)
]
# Import config validator
try:
import sys
from pathlib import Path
CLI_DIR = Path(__file__).parent.parent.parent / "cli"
sys.path.insert(0, str(CLI_DIR))
from config_validator import ConfigValidator
except ImportError:
ConfigValidator = None
config_path = args.get("config_path")
config_json_str = args.get("config_json")
testing_notes = args.get("testing_notes", "")
github_token = args.get("github_token") or os.environ.get("GITHUB_TOKEN")
try:
# Load config data
if config_path:
config_file = Path(config_path)
if not config_file.exists():
return [
TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")
]
with open(config_file) as f:
config_data = json.load(f)
config_json_str = json.dumps(config_data, indent=2)
config_name = config_data.get("name", config_file.stem)
elif config_json_str:
try:
config_data = json.loads(config_json_str)
config_name = config_data.get("name", "unnamed")
except json.JSONDecodeError as e:
return [TextContent(type="text", text=f"❌ Error: Invalid JSON: {str(e)}")]
else:
return [
TextContent(
type="text", text="❌ Error: Must provide either config_path or config_json"
)
]
# Use ConfigValidator for comprehensive validation
if ConfigValidator is None:
return [
TextContent(
type="text",
text="❌ Error: ConfigValidator not available. Please ensure config_validator.py is in the CLI directory.",
)
]
try:
validator = ConfigValidator(config_data)
validator.validate()
# Get format info
is_unified = validator.is_unified
config_name = config_data.get("name", "unnamed")
# Additional format validation (ConfigValidator only checks structure)
# Validate name format (alphanumeric, hyphens, underscores only)
if not re.match(r"^[a-zA-Z0-9_-]+$", config_name):
raise ValueError(
f"Invalid name format: '{config_name}'\nNames must contain only alphanumeric characters, hyphens, and underscores"
)
# Validate URL formats
if not is_unified:
# Legacy config - check base_url
base_url = config_data.get("base_url", "")
if base_url and not (
base_url.startswith("http://") or base_url.startswith("https://")
):
raise ValueError(
f"Invalid base_url format: '{base_url}'\nURLs must start with http:// or https://"
)
else:
# Unified config - check URLs in sources
for idx, source in enumerate(config_data.get("sources", [])):
if source.get("type") == "documentation":
source_url = source.get("base_url", "")
if source_url and not (
source_url.startswith("http://") or source_url.startswith("https://")
):
raise ValueError(
f"Source {idx} (documentation): Invalid base_url format: '{source_url}'\nURLs must start with http:// or https://"
)
except ValueError as validation_error:
# Provide detailed validation feedback
error_msg = f"""❌ Config validation failed:
{str(validation_error)}
Please fix these issues and try again.
💡 Validation help:
- Names: alphanumeric, hyphens, underscores only (e.g., "my-framework", "react_docs")
- URLs: must start with http:// or https://
- Selectors: should be a dict with keys like 'main_content', 'title', 'code_blocks'
- Rate limit: non-negative number (default: 0.5)
- Max pages: positive integer or -1 for unlimited
📚 Example configs: https://github.com/yusufkaraaslan/skill-seekers-configs/tree/main/official
"""
return [TextContent(type="text", text=error_msg)]
# Detect category based on config format and content
if is_unified:
# For unified configs, look at source types
source_types = [src.get("type") for src in config_data.get("sources", [])]
if (
"documentation" in source_types
and "github" in source_types
or "documentation" in source_types
and "pdf" in source_types
or len(source_types) > 1
):
category = "multi-source"
else:
category = "unified"
else:
# For legacy configs, use name-based detection
name_lower = config_name.lower()
category = "other"
if any(
x in name_lower
for x in ["react", "vue", "django", "laravel", "fastapi", "astro", "hono"]
):
category = "web-frameworks"
elif any(x in name_lower for x in ["godot", "unity", "unreal"]):
category = "game-engines"
elif any(x in name_lower for x in ["kubernetes", "ansible", "docker"]):
category = "devops"
elif any(x in name_lower for x in ["tailwind", "bootstrap", "bulma"]):
category = "css-frameworks"
# Collect validation warnings
warnings = []
if not is_unified:
# Legacy config warnings
if "max_pages" not in config_data:
warnings.append("⚠️ No max_pages set - will use default (100)")
elif config_data.get("max_pages") in (None, -1):
warnings.append(
"⚠️ Unlimited scraping enabled - may scrape thousands of pages and take hours"
)
else:
# Unified config warnings
for src in config_data.get("sources", []):
if src.get("type") == "documentation" and "max_pages" not in src:
warnings.append(
"⚠️ No max_pages set for documentation source - will use default (100)"
)
elif src.get("type") == "documentation" and src.get("max_pages") in (None, -1):
warnings.append("⚠️ Unlimited scraping enabled for documentation source")
# Check for GitHub token
if not github_token:
return [
TextContent(
type="text",
text="❌ Error: GitHub token required.\n\nProvide github_token parameter or set GITHUB_TOKEN environment variable.\n\nCreate token at: https://github.com/settings/tokens",
)
]
# Create GitHub issue
try:
gh = Github(github_token)
repo = gh.get_repo("yusufkaraaslan/skill-seekers-configs")
# Build issue body
issue_body = f"""## Config Submission
### Framework/Tool Name
{config_name}
### Category
{category}
### Config Format
{"Unified (multi-source)" if is_unified else "Legacy (single-source)"}
### Configuration JSON
```json
{config_json_str}
```
### Testing Results
{testing_notes if testing_notes else "Not provided"}
### Documentation URL
{config_data.get("base_url") if not is_unified else "See sources in config"}
{"### Validation Warnings" if warnings else ""}
{chr(10).join(f"- {w}" for w in warnings) if warnings else ""}
---
### Checklist
- [x] Config validated with ConfigValidator
- [ ] Test scraping completed
- [ ] Added to appropriate category
- [ ] API updated
"""
# Create issue
issue = repo.create_issue(
title=f"[CONFIG] {config_name}",
body=issue_body,
labels=["config-submission", "needs-review"],
)
result = f"""✅ Config submitted successfully!
📝 Issue created: {issue.html_url}
🏷️ Issue #{issue.number}
📦 Config: {config_name}
📊 Category: {category}
🏷️ Labels: config-submission, needs-review
What happens next:
1. Maintainers will review your config
2. They'll test it with the actual documentation
3. If approved, it will be added to official/{category}/
4. The API will auto-update and your config becomes available!
💡 Track your submission: {issue.html_url}
📚 All configs: https://github.com/yusufkaraaslan/skill-seekers-configs
"""
return [TextContent(type="text", text=result)]
except GithubException as e:
return [
TextContent(
type="text",
text=f"❌ GitHub Error: {str(e)}\n\nCheck your token permissions (needs 'repo' or 'public_repo' scope).",
)
]
except Exception as e:
return [TextContent(type="text", text=f"❌ Error: {str(e)}")]
async def add_config_source_tool(args: dict) -> list[TextContent]:
"""
Register a git repository as a config source.
Allows fetching configs from private/team repos. Use this to set up named
sources that can be referenced by fetch_config.
Args:
args: Dictionary containing:
- name: Source identifier (required)
- git_url: Git repository URL (required)
- source_type: Source type (default: "github")
- token_env: Environment variable name for auth token (optional)
- branch: Git branch to use (default: "main")
- priority: Source priority (default: 100, lower = higher priority)
- enabled: Whether source is enabled (default: true)
Returns:
List of TextContent with registration results
"""
from skill_seekers.mcp.source_manager import SourceManager
name = args.get("name")
git_url = args.get("git_url")
source_type = args.get("source_type", "github")
token_env = args.get("token_env")
branch = args.get("branch", "main")
priority = args.get("priority", 100)
enabled = args.get("enabled", True)
try:
# Validate required parameters
if not name:
return [TextContent(type="text", text="❌ Error: 'name' parameter is required")]
if not git_url:
return [TextContent(type="text", text="❌ Error: 'git_url' parameter is required")]
# Add source
source_manager = SourceManager()
source = source_manager.add_source(
name=name,
git_url=git_url,
source_type=source_type,
token_env=token_env,
branch=branch,
priority=priority,
enabled=enabled,
)
# Check if this is an update
is_update = "updated_at" in source and source["added_at"] != source["updated_at"]
result = f"""✅ Config source {"updated" if is_update else "registered"} successfully!
📛 Name: {source["name"]}
📁 Repository: {source["git_url"]}
🔖 Type: {source["type"]}
🌿 Branch: {source["branch"]}
🔑 Token env: {source.get("token_env", "None")}
⚡ Priority: {source["priority"]} (lower = higher priority)
✓ Enabled: {source["enabled"]}
🕒 Added: {source["added_at"][:19]}
Usage:
# Fetch config from this source
fetch_config(source="{source["name"]}", config_name="your-config")
# List all sources
list_config_sources()
# Remove this source
remove_config_source(name="{source["name"]}")
💡 Make sure to set {source.get("token_env", "GIT_TOKEN")} environment variable for private repos
"""
return [TextContent(type="text", text=result)]
except ValueError as e:
return [TextContent(type="text", text=f"❌ Validation Error: {str(e)}")]
except Exception as e:
return [TextContent(type="text", text=f"❌ Error: {str(e)}")]
async def list_config_sources_tool(args: dict) -> list[TextContent]:
"""
List all registered config sources.
Shows git repositories that have been registered with add_config_source.
Args:
args: Dictionary containing:
- enabled_only: Only show enabled sources (default: false)
Returns:
List of TextContent with source list
"""
from skill_seekers.mcp.source_manager import SourceManager
enabled_only = args.get("enabled_only", False)
try:
source_manager = SourceManager()
sources = source_manager.list_sources(enabled_only=enabled_only)
if not sources:
result = """📋 No config sources registered
To add a source:
add_config_source(
name="team",
git_url="https://github.com/myorg/configs.git"
)
💡 Once added, use: fetch_config(source="team", config_name="...")
"""
return [TextContent(type="text", text=result)]
# Format sources list
result = f"📋 Config Sources ({len(sources)} total"
if enabled_only:
result += ", enabled only"
result += ")\n\n"
for source in sources:
status_icon = "" if source.get("enabled", True) else ""
result += f"{status_icon} **{source['name']}**\n"
result += f" 📁 {source['git_url']}\n"
result += f" 🔖 Type: {source['type']} | 🌿 Branch: {source['branch']}\n"
result += f" 🔑 Token: {source.get('token_env', 'None')} | ⚡ Priority: {source['priority']}\n"
result += f" 🕒 Added: {source['added_at'][:19]}\n"
result += "\n"
result += """Usage:
# Fetch config from a source
fetch_config(source="SOURCE_NAME", config_name="CONFIG_NAME")
# Add new source
add_config_source(name="...", git_url="...")
# Remove source
remove_config_source(name="SOURCE_NAME")
"""
return [TextContent(type="text", text=result)]
except Exception as e:
return [TextContent(type="text", text=f"❌ Error: {str(e)}")]
async def remove_config_source_tool(args: dict) -> list[TextContent]:
"""
Remove a registered config source.
Deletes the source from the registry. Does not delete cached git repository data.
Args:
args: Dictionary containing:
- name: Source identifier to remove (required)
Returns:
List of TextContent with removal results
"""
from skill_seekers.mcp.source_manager import SourceManager
name = args.get("name")
try:
# Validate required parameter
if not name:
return [TextContent(type="text", text="❌ Error: 'name' parameter is required")]
# Remove source
source_manager = SourceManager()
removed = source_manager.remove_source(name)
if removed:
result = f"""✅ Config source removed successfully!
📛 Removed: {name}
⚠️ Note: Cached git repository data is NOT deleted
To free up disk space, manually delete: ~/.skill-seekers/cache/{name}/
Next steps:
# List remaining sources
list_config_sources()
# Add a different source
add_config_source(name="...", git_url="...")
"""
return [TextContent(type="text", text=result)]
else:
# Not found - show available sources
sources = source_manager.list_sources()
available = [s["name"] for s in sources]
result = f"""❌ Source '{name}' not found
Available sources: {", ".join(available) if available else "none"}
To see all sources:
list_config_sources()
"""
return [TextContent(type="text", text=result)]
except Exception as e:
return [TextContent(type="text", text=f"❌ Error: {str(e)}")]