Files
skill-seekers-reference/src/skill_seekers/mcp/tools/sync_config_tools.py
yusyus 83b9a695ba feat: add sync-config command to detect and update config start_urls (#306)
## Summary

Add `skill-seekers sync-config` subcommand that crawls a docs site's navigation,
diffs discovered URLs against a config's start_urls, and optionally writes the
updated list back with --apply.

- BFS link discovery with configurable depth (default 2), max-pages, rate-limit
- Respects url_patterns.include/exclude from config
- Supports optional nav_seed_urls config field
- Handles both unified (sources array) and legacy flat config formats
- MCP tool sync_config included
- 57 tests (39 unit + 18 E2E with local HTTP server)
- Fixed CI: renamed summary job to "Tests" to match branch protection rule

Closes #306
2026-03-15 02:16:32 +03:00

86 lines
3.0 KiB
Python

"""Sync-config MCP tool for Skill Seekers MCP Server.
Provides the ``sync_config`` tool that diffs a config's start_urls against
the live docs site and optionally applies the update.
"""
try:
from mcp.types import TextContent
except ImportError:
class TextContent:
"""Fallback TextContent for when MCP is not installed."""
def __init__(self, type: str, text: str):
self.type = type
self.text = text
async def sync_config_tool(args: dict) -> list[TextContent]:
"""Sync a config file's start_urls against what's live on the docs site.
Crawls seed/nav pages, discovers internal links, diffs against the
config's existing ``start_urls``, and optionally writes the update.
Args:
args: Dictionary containing:
- config_path (str): Path to the config JSON file.
- apply (bool, optional): Write changes back (default: False).
- depth (int, optional): BFS crawl depth (default: 2).
- max_pages (int, optional): Max URLs to discover (default: 500).
- rate_limit (float, optional): Seconds between requests.
- source_index (int, optional): Documentation source index (default: 0).
Returns:
List[TextContent]: Report of added/removed URLs, or error message.
"""
config_path = args.get("config_path", "")
if not config_path:
return [TextContent(type="text", text="Error: config_path is required")]
try:
from skill_seekers.cli.sync_config import sync_config
result = sync_config(
config_path=config_path,
apply=args.get("apply", False),
depth=args.get("depth", 2),
max_pages=args.get("max_pages", 500),
rate_limit=args.get("rate_limit"),
source_index=args.get("source_index", 0),
)
except FileNotFoundError:
return [TextContent(type="text", text=f"Error: Config file not found: {config_path}")]
except Exception as e:
return [TextContent(type="text", text=f"Error syncing config: {e}")]
if result.get("error"):
return [TextContent(type="text", text=f"Error: {result['error']}")]
lines = []
added = result["added"]
removed = result["removed"]
if added:
lines.append(f"New pages ({len(added)}):")
for url in added:
lines.append(f" + {url}")
if removed:
lines.append(f"Removed pages ({len(removed)}):")
for url in removed:
lines.append(f" - {url}")
if not added and not removed:
lines.append("Config is up to date. No changes detected.")
else:
lines.append(
f"\nSummary: {len(added)} new, {len(removed)} removed "
f"(discovered {result['total_discovered']}, "
f"configured {result['total_configured']})"
)
if result["applied"]:
lines.append(f"Updated {config_path}")
else:
lines.append(f"Run with apply=true to update {config_path}")
return [TextContent(type="text", text="\n".join(lines))]