Files
skill-seekers-reference/src/skill_seekers/mcp/tools/config_tools.py
Pablo Estevez 5ed767ff9a run ruff
2026-01-17 17:29:21 +00:00

248 lines
8.7 KiB
Python

"""
Config management tools for Skill Seeker MCP Server.
This module provides tools for generating, listing, and validating configuration files
for documentation scraping.
"""
import json
import sys
from pathlib import Path
try:
from mcp.types import TextContent
except ImportError:
# Graceful degradation: Create a simple fallback class for testing
class TextContent:
"""Fallback TextContent for when MCP is not installed"""
def __init__(self, type: str, text: str):
self.type = type
self.text = text
# Path to CLI tools
CLI_DIR = Path(__file__).parent.parent.parent / "cli"
# Import config validator for validation
sys.path.insert(0, str(CLI_DIR))
try:
from config_validator import ConfigValidator
except ImportError:
ConfigValidator = None # Graceful degradation if not available
async def generate_config(args: dict) -> list[TextContent]:
"""
Generate a config file for documentation scraping.
Interactively creates a JSON config for any documentation website with default
selectors and sensible defaults. The config can be further customized after creation.
Args:
args: Dictionary containing:
- name (str): Skill name (lowercase, alphanumeric, hyphens, underscores)
- url (str): Base documentation URL (must include http:// or https://)
- description (str): Description of when to use this skill
- max_pages (int, optional): Maximum pages to scrape (default: 100, use -1 for unlimited)
- unlimited (bool, optional): Remove all limits - scrape all pages (default: False). Overrides max_pages.
- rate_limit (float, optional): Delay between requests in seconds (default: 0.5)
Returns:
List[TextContent]: Success message with config path and next steps, or error message.
"""
name = args["name"]
url = args["url"]
description = args["description"]
max_pages = args.get("max_pages", 100)
unlimited = args.get("unlimited", False)
rate_limit = args.get("rate_limit", 0.5)
# Handle unlimited mode
if unlimited or max_pages == -1:
max_pages = None
limit_msg = "unlimited (no page limit)"
else:
limit_msg = str(max_pages)
# Create config
config = {
"name": name,
"description": description,
"base_url": url,
"selectors": {"main_content": "article", "title": "h1", "code_blocks": "pre code"},
"url_patterns": {"include": [], "exclude": []},
"categories": {},
"rate_limit": rate_limit,
"max_pages": max_pages,
}
# Save to configs directory
config_path = Path("configs") / f"{name}.json"
config_path.parent.mkdir(exist_ok=True)
with open(config_path, "w") as f:
json.dump(config, f, indent=2)
result = f"""✅ Config created: {config_path}
Configuration:
Name: {name}
URL: {url}
Max pages: {limit_msg}
Rate limit: {rate_limit}s
Next steps:
1. Review/edit config: cat {config_path}
2. Estimate pages: Use estimate_pages tool
3. Scrape docs: Use scrape_docs tool
Note: Default selectors may need adjustment for your documentation site.
"""
return [TextContent(type="text", text=result)]
async def list_configs(args: dict) -> list[TextContent]:
"""
List all available preset configurations.
Scans the configs directory and lists all available config files with their
basic information (name, URL, description).
Args:
args: Dictionary (empty, no parameters required)
Returns:
List[TextContent]: Formatted list of available configs with details, or error if no configs found.
"""
configs_dir = Path("configs")
if not configs_dir.exists():
return [TextContent(type="text", text="No configs directory found")]
configs = list(configs_dir.glob("*.json"))
if not configs:
return [TextContent(type="text", text="No config files found")]
result = "📋 Available Configs:\n\n"
for config_file in sorted(configs):
try:
with open(config_file) as f:
config = json.load(f)
name = config.get("name", config_file.stem)
desc = config.get("description", "No description")
url = config.get("base_url", "")
result += f"{config_file.name}\n"
result += f" Name: {name}\n"
result += f" URL: {url}\n"
result += f" Description: {desc}\n\n"
except Exception as e:
result += f"{config_file.name} - Error reading: {e}\n\n"
return [TextContent(type="text", text=result)]
async def validate_config(args: dict) -> list[TextContent]:
"""
Validate a config file for errors.
Validates both legacy (single-source) and unified (multi-source) config formats.
Checks for required fields, valid URLs, proper structure, and provides detailed
feedback on any issues found.
Args:
args: Dictionary containing:
- config_path (str): Path to config JSON file to validate
Returns:
List[TextContent]: Validation results with format details and any errors/warnings, or error message.
"""
config_path = args["config_path"]
# Import validation classes
sys.path.insert(0, str(CLI_DIR))
try:
# Check if file exists
if not Path(config_path).exists():
return [TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")]
# Try unified config validator first
try:
from config_validator import validate_config
validator = validate_config(config_path)
result = "✅ Config is valid!\n\n"
# Show format
if validator.is_unified:
result += "📦 Format: Unified (multi-source)\n"
result += f" Name: {validator.config['name']}\n"
result += f" Sources: {len(validator.config.get('sources', []))}\n"
# Show sources
for i, source in enumerate(validator.config.get("sources", []), 1):
result += f"\n Source {i}: {source['type']}\n"
if source["type"] == "documentation":
result += f" URL: {source.get('base_url', 'N/A')}\n"
result += f" Max pages: {source.get('max_pages', 'Not set')}\n"
elif source["type"] == "github":
result += f" Repo: {source.get('repo', 'N/A')}\n"
result += f" Code depth: {source.get('code_analysis_depth', 'surface')}\n"
elif source["type"] == "pdf":
result += f" Path: {source.get('path', 'N/A')}\n"
# Show merge settings if applicable
if validator.needs_api_merge():
merge_mode = validator.config.get("merge_mode", "rule-based")
result += f"\n Merge mode: {merge_mode}\n"
result += " API merging: Required (docs + code sources)\n"
else:
result += "📦 Format: Legacy (single source)\n"
result += f" Name: {validator.config['name']}\n"
result += f" Base URL: {validator.config.get('base_url', 'N/A')}\n"
result += f" Max pages: {validator.config.get('max_pages', 'Not set')}\n"
result += f" Rate limit: {validator.config.get('rate_limit', 'Not set')}s\n"
return [TextContent(type="text", text=result)]
except ImportError:
# Fall back to legacy validation
import json
from doc_scraper import validate_config
with open(config_path) as f:
config = json.load(f)
# Validate config - returns (errors, warnings) tuple
errors, warnings = validate_config(config)
if errors:
result = "❌ Config validation failed:\n\n"
for error in errors:
result += f"{error}\n"
else:
result = "✅ Config is valid!\n\n"
result += "📦 Format: Legacy (single source)\n"
result += f" Name: {config['name']}\n"
result += f" Base URL: {config['base_url']}\n"
result += f" Max pages: {config.get('max_pages', 'Not set')}\n"
result += f" Rate limit: {config.get('rate_limit', 'Not set')}s\n"
if warnings:
result += "\n⚠️ Warnings:\n"
for warning in warnings:
result += f"{warning}\n"
return [TextContent(type="text", text=result)]
except Exception as e:
return [TextContent(type="text", text=f"❌ Error: {str(e)}")]