diff --git a/src/skill_seekers/mcp/server_fastmcp.py b/src/skill_seekers/mcp/server_fastmcp.py index 49bf9cc..bd0264c 100644 --- a/src/skill_seekers/mcp/server_fastmcp.py +++ b/src/skill_seekers/mcp/server_fastmcp.py @@ -3,16 +3,16 @@ Skill Seeker MCP Server (FastMCP Implementation) Modern, decorator-based MCP server using FastMCP for simplified tool registration. -Provides 17 tools for generating Claude AI skills from documentation. +Provides 18 tools for generating Claude AI skills from documentation. This is a streamlined alternative to server.py (2200 lines → 708 lines, 68% reduction). All tool implementations are delegated to modular tool files in tools/ directory. **Architecture:** - FastMCP server with decorator-based tool registration -- 17 tools organized into 5 categories: +- 18 tools organized into 5 categories: * Config tools (3): generate_config, list_configs, validate_config - * Scraping tools (4): estimate_pages, scrape_docs, scrape_github, scrape_pdf + * Scraping tools (5): estimate_pages, scrape_docs, scrape_github, scrape_pdf, scrape_codebase * Packaging tools (3): package_skill, upload_skill, install_skill * Splitting tools (2): split_config, generate_router * Source tools (5): fetch_config, submit_config, add_config_source, list_config_sources, remove_config_source @@ -81,6 +81,7 @@ try: scrape_docs_impl, scrape_github_impl, scrape_pdf_impl, + scrape_codebase_impl, # Packaging tools package_skill_impl, upload_skill_impl, @@ -108,6 +109,7 @@ except ImportError: scrape_docs_impl, scrape_github_impl, scrape_pdf_impl, + scrape_codebase_impl, package_skill_impl, upload_skill_impl, enhance_skill_impl, @@ -393,6 +395,46 @@ async def scrape_pdf( return str(result) +@safe_tool_decorator( + description="Analyze local codebase and extract code knowledge. Walks directory tree, analyzes code files, extracts signatures, docstrings, and optionally generates API reference documentation." +) +async def scrape_codebase( + directory: str, + output: str = "output/codebase/", + depth: str = "deep", + languages: str = "", + file_patterns: str = "", + build_api_reference: bool = False, +) -> str: + """ + Analyze local codebase and extract code knowledge. + + Args: + directory: Directory to analyze (required) + output: Output directory for results (default: output/codebase/) + depth: Analysis depth - surface, deep, full (default: deep) + languages: Comma-separated languages to analyze (e.g., "Python,JavaScript,C++") + file_patterns: Comma-separated file patterns (e.g., "*.py,src/**/*.js") + build_api_reference: Generate API reference markdown (default: false) + + Returns: + Codebase analysis results with file paths. + """ + args = { + "directory": directory, + "output": output, + "depth": depth, + "languages": languages, + "file_patterns": file_patterns, + "build_api_reference": build_api_reference, + } + + result = await scrape_codebase_impl(args) + if isinstance(result, list) and result: + return result[0].text if hasattr(result[0], "text") else str(result[0]) + return str(result) + + # ============================================================================ # PACKAGING TOOLS (3 tools) # ============================================================================ diff --git a/src/skill_seekers/mcp/tools/__init__.py b/src/skill_seekers/mcp/tools/__init__.py index 926f80c..6f356d0 100644 --- a/src/skill_seekers/mcp/tools/__init__.py +++ b/src/skill_seekers/mcp/tools/__init__.py @@ -24,6 +24,7 @@ from .scraping_tools import ( scrape_docs_tool as scrape_docs_impl, scrape_github_tool as scrape_github_impl, scrape_pdf_tool as scrape_pdf_impl, + scrape_codebase_tool as scrape_codebase_impl, ) from .packaging_tools import ( @@ -56,6 +57,7 @@ __all__ = [ "scrape_docs_impl", "scrape_github_impl", "scrape_pdf_impl", + "scrape_codebase_impl", # Packaging tools "package_skill_impl", "upload_skill_impl", diff --git a/src/skill_seekers/mcp/tools/scraping_tools.py b/src/skill_seekers/mcp/tools/scraping_tools.py index 43bff70..495d8f0 100644 --- a/src/skill_seekers/mcp/tools/scraping_tools.py +++ b/src/skill_seekers/mcp/tools/scraping_tools.py @@ -6,6 +6,7 @@ This module contains all scraping-related MCP tool implementations: - scrape_docs_tool: Scrape documentation (legacy or unified) - scrape_github_tool: Scrape GitHub repositories - scrape_pdf_tool: Scrape PDF documentation +- scrape_codebase_tool: Analyze local codebase and extract code knowledge Extracted from server.py for better modularity and organization. """ @@ -430,3 +431,70 @@ async def scrape_github_tool(args: dict) -> List[TextContent]: return [TextContent(type="text", text=output)] else: return [TextContent(type="text", text=f"{output}\n\nāŒ Error:\n{stderr}")] + + +async def scrape_codebase_tool(args: dict) -> List[TextContent]: + """ + Analyze local codebase and extract code knowledge. + + Walks directory tree, analyzes code files, extracts signatures, + docstrings, and optionally generates API reference documentation. + + Args: + args: Dictionary containing: + - directory (str): Directory to analyze + - output (str, optional): Output directory for results (default: output/codebase/) + - depth (str, optional): Analysis depth - surface, deep, full (default: deep) + - languages (str, optional): Comma-separated languages (e.g., "Python,JavaScript,C++") + - file_patterns (str, optional): Comma-separated file patterns (e.g., "*.py,src/**/*.js") + - build_api_reference (bool, optional): Generate API reference markdown (default: False) + + Returns: + List[TextContent]: Tool execution results + + Example: + scrape_codebase( + directory="/path/to/repo", + depth="deep", + build_api_reference=True + ) + """ + directory = args.get("directory") + if not directory: + return [TextContent(type="text", text="āŒ Error: directory parameter is required")] + + output = args.get("output", "output/codebase/") + depth = args.get("depth", "deep") + languages = args.get("languages", "") + file_patterns = args.get("file_patterns", "") + build_api_reference = args.get("build_api_reference", False) + + # Build command + cmd = [sys.executable, "-m", "skill_seekers.cli.codebase_scraper"] + cmd.extend(["--directory", directory]) + + if output: + cmd.extend(["--output", output]) + if depth: + cmd.extend(["--depth", depth]) + if languages: + cmd.extend(["--languages", languages]) + if file_patterns: + cmd.extend(["--file-patterns", file_patterns]) + if build_api_reference: + cmd.append("--build-api-reference") + + timeout = 600 # 10 minutes for codebase analysis + + progress_msg = "šŸ” Analyzing local codebase...\n" + progress_msg += f"šŸ“ Directory: {directory}\n" + progress_msg += f"ā±ļø Maximum time: {timeout // 60} minutes\n\n" + + stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout) + + output_text = progress_msg + stdout + + if returncode == 0: + return [TextContent(type="text", text=output_text)] + else: + return [TextContent(type="text", text=f"{output_text}\n\nāŒ Error:\n{stderr}")] diff --git a/tests/test_mcp_fastmcp.py b/tests/test_mcp_fastmcp.py index bcc77e4..b0e497d 100644 --- a/tests/test_mcp_fastmcp.py +++ b/tests/test_mcp_fastmcp.py @@ -429,6 +429,38 @@ class TestScrapingTools: assert isinstance(result, str) + async def test_scrape_codebase_basic(self, temp_dirs): + """Test basic codebase scraping.""" + # Create a dummy source directory + src_dir = temp_dirs["output"] / "test_codebase" + src_dir.mkdir() + (src_dir / "test.py").write_text("def hello(): pass") + + result = await server_fastmcp.scrape_codebase( + directory=str(src_dir), + output=str(temp_dirs["output"] / "codebase_analysis") + ) + + assert isinstance(result, str) + + async def test_scrape_codebase_with_options(self, temp_dirs): + """Test codebase scraping with various options.""" + # Create a dummy source directory + src_dir = temp_dirs["output"] / "test_codebase2" + src_dir.mkdir() + (src_dir / "main.py").write_text("class Foo: pass") + (src_dir / "utils.js").write_text("function bar() {}") + + result = await server_fastmcp.scrape_codebase( + directory=str(src_dir), + depth="deep", + languages="Python,JavaScript", + file_patterns="*.py,*.js", + build_api_reference=True + ) + + assert isinstance(result, str) + # ============================================================================ # PACKAGING TOOLS TESTS (3 tools)