248 lines
8.7 KiB
Python
248 lines
8.7 KiB
Python
"""
|
|
Config management tools for Skill Seeker MCP Server.
|
|
|
|
This module provides tools for generating, listing, and validating configuration files
|
|
for documentation scraping.
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
try:
|
|
from mcp.types import TextContent
|
|
except ImportError:
|
|
# Graceful degradation: Create a simple fallback class for testing
|
|
class TextContent:
|
|
"""Fallback TextContent for when MCP is not installed"""
|
|
|
|
def __init__(self, type: str, text: str):
|
|
self.type = type
|
|
self.text = text
|
|
|
|
|
|
# Path to CLI tools
|
|
CLI_DIR = Path(__file__).parent.parent.parent / "cli"
|
|
|
|
# Import config validator for validation
|
|
sys.path.insert(0, str(CLI_DIR))
|
|
try:
|
|
from config_validator import ConfigValidator
|
|
except ImportError:
|
|
ConfigValidator = None # Graceful degradation if not available
|
|
|
|
|
|
async def generate_config(args: dict) -> list[TextContent]:
|
|
"""
|
|
Generate a config file for documentation scraping.
|
|
|
|
Interactively creates a JSON config for any documentation website with default
|
|
selectors and sensible defaults. The config can be further customized after creation.
|
|
|
|
Args:
|
|
args: Dictionary containing:
|
|
- name (str): Skill name (lowercase, alphanumeric, hyphens, underscores)
|
|
- url (str): Base documentation URL (must include http:// or https://)
|
|
- description (str): Description of when to use this skill
|
|
- max_pages (int, optional): Maximum pages to scrape (default: 100, use -1 for unlimited)
|
|
- unlimited (bool, optional): Remove all limits - scrape all pages (default: False). Overrides max_pages.
|
|
- rate_limit (float, optional): Delay between requests in seconds (default: 0.5)
|
|
|
|
Returns:
|
|
List[TextContent]: Success message with config path and next steps, or error message.
|
|
"""
|
|
name = args["name"]
|
|
url = args["url"]
|
|
description = args["description"]
|
|
max_pages = args.get("max_pages", 100)
|
|
unlimited = args.get("unlimited", False)
|
|
rate_limit = args.get("rate_limit", 0.5)
|
|
|
|
# Handle unlimited mode
|
|
if unlimited or max_pages == -1:
|
|
max_pages = None
|
|
limit_msg = "unlimited (no page limit)"
|
|
else:
|
|
limit_msg = str(max_pages)
|
|
|
|
# Create config
|
|
config = {
|
|
"name": name,
|
|
"description": description,
|
|
"base_url": url,
|
|
"selectors": {"main_content": "article", "title": "h1", "code_blocks": "pre code"},
|
|
"url_patterns": {"include": [], "exclude": []},
|
|
"categories": {},
|
|
"rate_limit": rate_limit,
|
|
"max_pages": max_pages,
|
|
}
|
|
|
|
# Save to configs directory
|
|
config_path = Path("configs") / f"{name}.json"
|
|
config_path.parent.mkdir(exist_ok=True)
|
|
|
|
with open(config_path, "w") as f:
|
|
json.dump(config, f, indent=2)
|
|
|
|
result = f"""✅ Config created: {config_path}
|
|
|
|
Configuration:
|
|
Name: {name}
|
|
URL: {url}
|
|
Max pages: {limit_msg}
|
|
Rate limit: {rate_limit}s
|
|
|
|
Next steps:
|
|
1. Review/edit config: cat {config_path}
|
|
2. Estimate pages: Use estimate_pages tool
|
|
3. Scrape docs: Use scrape_docs tool
|
|
|
|
Note: Default selectors may need adjustment for your documentation site.
|
|
"""
|
|
|
|
return [TextContent(type="text", text=result)]
|
|
|
|
|
|
async def list_configs(args: dict) -> list[TextContent]:
|
|
"""
|
|
List all available preset configurations.
|
|
|
|
Scans the configs directory and lists all available config files with their
|
|
basic information (name, URL, description).
|
|
|
|
Args:
|
|
args: Dictionary (empty, no parameters required)
|
|
|
|
Returns:
|
|
List[TextContent]: Formatted list of available configs with details, or error if no configs found.
|
|
"""
|
|
configs_dir = Path("configs")
|
|
|
|
if not configs_dir.exists():
|
|
return [TextContent(type="text", text="No configs directory found")]
|
|
|
|
configs = list(configs_dir.glob("*.json"))
|
|
|
|
if not configs:
|
|
return [TextContent(type="text", text="No config files found")]
|
|
|
|
result = "📋 Available Configs:\n\n"
|
|
|
|
for config_file in sorted(configs):
|
|
try:
|
|
with open(config_file) as f:
|
|
config = json.load(f)
|
|
name = config.get("name", config_file.stem)
|
|
desc = config.get("description", "No description")
|
|
url = config.get("base_url", "")
|
|
|
|
result += f" • {config_file.name}\n"
|
|
result += f" Name: {name}\n"
|
|
result += f" URL: {url}\n"
|
|
result += f" Description: {desc}\n\n"
|
|
except Exception as e:
|
|
result += f" • {config_file.name} - Error reading: {e}\n\n"
|
|
|
|
return [TextContent(type="text", text=result)]
|
|
|
|
|
|
async def validate_config(args: dict) -> list[TextContent]:
|
|
"""
|
|
Validate a config file for errors.
|
|
|
|
Validates both legacy (single-source) and unified (multi-source) config formats.
|
|
Checks for required fields, valid URLs, proper structure, and provides detailed
|
|
feedback on any issues found.
|
|
|
|
Args:
|
|
args: Dictionary containing:
|
|
- config_path (str): Path to config JSON file to validate
|
|
|
|
Returns:
|
|
List[TextContent]: Validation results with format details and any errors/warnings, or error message.
|
|
"""
|
|
config_path = args["config_path"]
|
|
|
|
# Import validation classes
|
|
sys.path.insert(0, str(CLI_DIR))
|
|
|
|
try:
|
|
# Check if file exists
|
|
if not Path(config_path).exists():
|
|
return [TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")]
|
|
|
|
# Try unified config validator first
|
|
try:
|
|
from config_validator import validate_config
|
|
|
|
validator = validate_config(config_path)
|
|
|
|
result = "✅ Config is valid!\n\n"
|
|
|
|
# Show format
|
|
if validator.is_unified:
|
|
result += "📦 Format: Unified (multi-source)\n"
|
|
result += f" Name: {validator.config['name']}\n"
|
|
result += f" Sources: {len(validator.config.get('sources', []))}\n"
|
|
|
|
# Show sources
|
|
for i, source in enumerate(validator.config.get("sources", []), 1):
|
|
result += f"\n Source {i}: {source['type']}\n"
|
|
if source["type"] == "documentation":
|
|
result += f" URL: {source.get('base_url', 'N/A')}\n"
|
|
result += f" Max pages: {source.get('max_pages', 'Not set')}\n"
|
|
elif source["type"] == "github":
|
|
result += f" Repo: {source.get('repo', 'N/A')}\n"
|
|
result += f" Code depth: {source.get('code_analysis_depth', 'surface')}\n"
|
|
elif source["type"] == "pdf":
|
|
result += f" Path: {source.get('path', 'N/A')}\n"
|
|
|
|
# Show merge settings if applicable
|
|
if validator.needs_api_merge():
|
|
merge_mode = validator.config.get("merge_mode", "rule-based")
|
|
result += f"\n Merge mode: {merge_mode}\n"
|
|
result += " API merging: Required (docs + code sources)\n"
|
|
|
|
else:
|
|
result += "📦 Format: Legacy (single source)\n"
|
|
result += f" Name: {validator.config['name']}\n"
|
|
result += f" Base URL: {validator.config.get('base_url', 'N/A')}\n"
|
|
result += f" Max pages: {validator.config.get('max_pages', 'Not set')}\n"
|
|
result += f" Rate limit: {validator.config.get('rate_limit', 'Not set')}s\n"
|
|
|
|
return [TextContent(type="text", text=result)]
|
|
|
|
except ImportError:
|
|
# Fall back to legacy validation
|
|
import json
|
|
|
|
from doc_scraper import validate_config
|
|
|
|
with open(config_path) as f:
|
|
config = json.load(f)
|
|
|
|
# Validate config - returns (errors, warnings) tuple
|
|
errors, warnings = validate_config(config)
|
|
|
|
if errors:
|
|
result = "❌ Config validation failed:\n\n"
|
|
for error in errors:
|
|
result += f" • {error}\n"
|
|
else:
|
|
result = "✅ Config is valid!\n\n"
|
|
result += "📦 Format: Legacy (single source)\n"
|
|
result += f" Name: {config['name']}\n"
|
|
result += f" Base URL: {config['base_url']}\n"
|
|
result += f" Max pages: {config.get('max_pages', 'Not set')}\n"
|
|
result += f" Rate limit: {config.get('rate_limit', 'Not set')}s\n"
|
|
|
|
if warnings:
|
|
result += "\n⚠️ Warnings:\n"
|
|
for warning in warnings:
|
|
result += f" • {warning}\n"
|
|
|
|
return [TextContent(type="text", text=result)]
|
|
|
|
except Exception as e:
|
|
return [TextContent(type="text", text=f"❌ Error: {str(e)}")]
|