skill-seekers-reference/src/skill_seekers/mcp/tools/splitting_tools.py

"""
Splitting tools for Skill Seeker MCP Server.

This module provides tools for splitting large documentation configs into multiple
focused skills and generating router/hub skills for managing split documentation.
"""

import glob
import sys
from pathlib import Path
from typing import Any, List

try:
    from mcp.types import TextContent
except ImportError:
    # Graceful degradation: Create a simple fallback class for testing
    class TextContent:
        """Fallback TextContent for when MCP is not installed"""
        def __init__(self, type: str, text: str):
            self.type = type
            self.text = text

# Path to CLI tools
CLI_DIR = Path(__file__).parent.parent.parent / "cli"

# Import subprocess helper from parent module
# We'll use a local import to avoid circular dependencies
def run_subprocess_with_streaming(cmd, timeout=None):
    """
    Run subprocess with real-time output streaming.
    Returns (stdout, stderr, returncode).

    This solves the blocking issue where long-running processes (like scraping)
    would cause MCP to appear frozen. Now we stream output as it comes.
    """
    import subprocess
    import time

    try:
        process = subprocess.Popen(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            bufsize=1,  # Line buffered
            universal_newlines=True
        )

        stdout_lines = []
        stderr_lines = []
        start_time = time.time()

        # Read output line by line as it comes
        while True:
            # Check timeout
            if timeout and (time.time() - start_time) > timeout:
                process.kill()
                stderr_lines.append(f"\n⚠️ Process killed after {timeout}s timeout")
                break

            # Check if process finished
            if process.poll() is not None:
                break

            # Read available output (non-blocking)
            try:
                import select
                readable, _, _ = select.select([process.stdout, process.stderr], [], [], 0.1)

                if process.stdout in readable:
                    line = process.stdout.readline()
                    if line:
                        stdout_lines.append(line)

                if process.stderr in readable:
                    line = process.stderr.readline()
                    if line:
                        stderr_lines.append(line)
            except:
                # Fallback for Windows (no select)
                time.sleep(0.1)

        # Get any remaining output
        remaining_stdout, remaining_stderr = process.communicate()
        if remaining_stdout:
            stdout_lines.append(remaining_stdout)
        if remaining_stderr:
            stderr_lines.append(remaining_stderr)

        stdout = ''.join(stdout_lines)
        stderr = ''.join(stderr_lines)
        returncode = process.returncode

        return stdout, stderr, returncode

    except Exception as e:
        return "", f"Error running subprocess: {str(e)}", 1


async def split_config(args: dict) -> List[TextContent]:
    """
    Split large configs into multiple focused skills.

    Supports both documentation and unified (multi-source) configs:
    - Documentation configs: Split by categories, size, or create router skills
    - Unified configs: Split by source type (documentation, github, pdf)

    For large documentation sites (10K+ pages), this tool splits the config into
    multiple smaller configs. For unified configs with multiple sources, splits
    into separate configs per source type.

    Args:
        args: Dictionary containing:
            - config_path (str): Path to config JSON file (e.g., configs/godot.json or configs/react_unified.json)
            - strategy (str, optional): Split strategy: auto, none, source, category, router, size (default: auto)
                                       'source' strategy is for unified configs only
            - target_pages (int, optional): Target pages per skill for doc configs (default: 5000)
            - dry_run (bool, optional): Preview without saving files (default: False)

    Returns:
        List[TextContent]: Split results showing created configs and recommendations,
                          or error message if split failed.
    """
    config_path = args["config_path"]
    strategy = args.get("strategy", "auto")
    target_pages = args.get("target_pages", 5000)
    dry_run = args.get("dry_run", False)

    # Run split_config.py
    cmd = [
        sys.executable,
        str(CLI_DIR / "split_config.py"),
        config_path,
        "--strategy", strategy,
        "--target-pages", str(target_pages)
    ]

    if dry_run:
        cmd.append("--dry-run")

    # Timeout: 5 minutes for config splitting
    timeout = 300

    progress_msg = "✂️ Splitting configuration...\n"
    progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"

    stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)

    output = progress_msg + stdout

    if returncode == 0:
        return [TextContent(type="text", text=output)]
    else:
        return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]


async def generate_router(args: dict) -> List[TextContent]:
    """
    Generate router/hub skill for split documentation.

    Creates an intelligent routing skill that helps users navigate between split
    sub-skills. The router skill analyzes user queries and directs them to the
    appropriate sub-skill based on content categories.

    Args:
        args: Dictionary containing:
            - config_pattern (str): Config pattern for sub-skills (e.g., 'configs/godot-*.json')
            - router_name (str, optional): Router skill name (optional, inferred from configs)

    Returns:
        List[TextContent]: Router skill creation results with usage instructions,
                          or error message if generation failed.
    """
    config_pattern = args["config_pattern"]
    router_name = args.get("router_name")

    # Expand glob pattern
    config_files = glob.glob(config_pattern)

    if not config_files:
        return [TextContent(type="text", text=f"❌ No config files match pattern: {config_pattern}")]

    # Run generate_router.py
    cmd = [
        sys.executable,
        str(CLI_DIR / "generate_router.py"),
    ] + config_files

    if router_name:
        cmd.extend(["--name", router_name])

    # Timeout: 5 minutes for router generation
    timeout = 300

    progress_msg = "🧭 Generating router skill...\n"
    progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"

    stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)

    output = progress_msg + stdout

    if returncode == 0:
        return [TextContent(type="text", text=output)]
    else:
        return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]