feat(C2.8): Add scrape_codebase MCP tool for local codebase analysis
- Add scrape_codebase_tool() to scraping_tools.py (67 lines) - Register tool in MCP server with @safe_tool_decorator - Add tool to FastMCP server imports and exports - Add 2 comprehensive tests for basic and advanced usage - Update MCP server tool count from 17 to 18 tools - Tool supports directory analysis with configurable depth - Features: language filtering, file patterns, API reference generation Closes #70 - C2.8 MCP Tool Integration complete Related: - Builds on C2.7 (codebase_scraper.py CLI tool) - Uses existing code_analyzer.py infrastructure - Follows same pattern as scrape_github and scrape_pdf tools Test coverage: - test_scrape_codebase_basic: Basic codebase analysis - test_scrape_codebase_with_options: Advanced options testing
This commit is contained in:
@@ -3,16 +3,16 @@
|
||||
Skill Seeker MCP Server (FastMCP Implementation)
|
||||
|
||||
Modern, decorator-based MCP server using FastMCP for simplified tool registration.
|
||||
Provides 17 tools for generating Claude AI skills from documentation.
|
||||
Provides 18 tools for generating Claude AI skills from documentation.
|
||||
|
||||
This is a streamlined alternative to server.py (2200 lines → 708 lines, 68% reduction).
|
||||
All tool implementations are delegated to modular tool files in tools/ directory.
|
||||
|
||||
**Architecture:**
|
||||
- FastMCP server with decorator-based tool registration
|
||||
- 17 tools organized into 5 categories:
|
||||
- 18 tools organized into 5 categories:
|
||||
* Config tools (3): generate_config, list_configs, validate_config
|
||||
* Scraping tools (4): estimate_pages, scrape_docs, scrape_github, scrape_pdf
|
||||
* Scraping tools (5): estimate_pages, scrape_docs, scrape_github, scrape_pdf, scrape_codebase
|
||||
* Packaging tools (3): package_skill, upload_skill, install_skill
|
||||
* Splitting tools (2): split_config, generate_router
|
||||
* Source tools (5): fetch_config, submit_config, add_config_source, list_config_sources, remove_config_source
|
||||
@@ -81,6 +81,7 @@ try:
|
||||
scrape_docs_impl,
|
||||
scrape_github_impl,
|
||||
scrape_pdf_impl,
|
||||
scrape_codebase_impl,
|
||||
# Packaging tools
|
||||
package_skill_impl,
|
||||
upload_skill_impl,
|
||||
@@ -108,6 +109,7 @@ except ImportError:
|
||||
scrape_docs_impl,
|
||||
scrape_github_impl,
|
||||
scrape_pdf_impl,
|
||||
scrape_codebase_impl,
|
||||
package_skill_impl,
|
||||
upload_skill_impl,
|
||||
enhance_skill_impl,
|
||||
@@ -393,6 +395,46 @@ async def scrape_pdf(
|
||||
return str(result)
|
||||
|
||||
|
||||
@safe_tool_decorator(
|
||||
description="Analyze local codebase and extract code knowledge. Walks directory tree, analyzes code files, extracts signatures, docstrings, and optionally generates API reference documentation."
|
||||
)
|
||||
async def scrape_codebase(
|
||||
directory: str,
|
||||
output: str = "output/codebase/",
|
||||
depth: str = "deep",
|
||||
languages: str = "",
|
||||
file_patterns: str = "",
|
||||
build_api_reference: bool = False,
|
||||
) -> str:
|
||||
"""
|
||||
Analyze local codebase and extract code knowledge.
|
||||
|
||||
Args:
|
||||
directory: Directory to analyze (required)
|
||||
output: Output directory for results (default: output/codebase/)
|
||||
depth: Analysis depth - surface, deep, full (default: deep)
|
||||
languages: Comma-separated languages to analyze (e.g., "Python,JavaScript,C++")
|
||||
file_patterns: Comma-separated file patterns (e.g., "*.py,src/**/*.js")
|
||||
build_api_reference: Generate API reference markdown (default: false)
|
||||
|
||||
Returns:
|
||||
Codebase analysis results with file paths.
|
||||
"""
|
||||
args = {
|
||||
"directory": directory,
|
||||
"output": output,
|
||||
"depth": depth,
|
||||
"languages": languages,
|
||||
"file_patterns": file_patterns,
|
||||
"build_api_reference": build_api_reference,
|
||||
}
|
||||
|
||||
result = await scrape_codebase_impl(args)
|
||||
if isinstance(result, list) and result:
|
||||
return result[0].text if hasattr(result[0], "text") else str(result[0])
|
||||
return str(result)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# PACKAGING TOOLS (3 tools)
|
||||
# ============================================================================
|
||||
|
||||
@@ -24,6 +24,7 @@ from .scraping_tools import (
|
||||
scrape_docs_tool as scrape_docs_impl,
|
||||
scrape_github_tool as scrape_github_impl,
|
||||
scrape_pdf_tool as scrape_pdf_impl,
|
||||
scrape_codebase_tool as scrape_codebase_impl,
|
||||
)
|
||||
|
||||
from .packaging_tools import (
|
||||
@@ -56,6 +57,7 @@ __all__ = [
|
||||
"scrape_docs_impl",
|
||||
"scrape_github_impl",
|
||||
"scrape_pdf_impl",
|
||||
"scrape_codebase_impl",
|
||||
# Packaging tools
|
||||
"package_skill_impl",
|
||||
"upload_skill_impl",
|
||||
|
||||
@@ -6,6 +6,7 @@ This module contains all scraping-related MCP tool implementations:
|
||||
- scrape_docs_tool: Scrape documentation (legacy or unified)
|
||||
- scrape_github_tool: Scrape GitHub repositories
|
||||
- scrape_pdf_tool: Scrape PDF documentation
|
||||
- scrape_codebase_tool: Analyze local codebase and extract code knowledge
|
||||
|
||||
Extracted from server.py for better modularity and organization.
|
||||
"""
|
||||
@@ -430,3 +431,70 @@ async def scrape_github_tool(args: dict) -> List[TextContent]:
|
||||
return [TextContent(type="text", text=output)]
|
||||
else:
|
||||
return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]
|
||||
|
||||
|
||||
async def scrape_codebase_tool(args: dict) -> List[TextContent]:
|
||||
"""
|
||||
Analyze local codebase and extract code knowledge.
|
||||
|
||||
Walks directory tree, analyzes code files, extracts signatures,
|
||||
docstrings, and optionally generates API reference documentation.
|
||||
|
||||
Args:
|
||||
args: Dictionary containing:
|
||||
- directory (str): Directory to analyze
|
||||
- output (str, optional): Output directory for results (default: output/codebase/)
|
||||
- depth (str, optional): Analysis depth - surface, deep, full (default: deep)
|
||||
- languages (str, optional): Comma-separated languages (e.g., "Python,JavaScript,C++")
|
||||
- file_patterns (str, optional): Comma-separated file patterns (e.g., "*.py,src/**/*.js")
|
||||
- build_api_reference (bool, optional): Generate API reference markdown (default: False)
|
||||
|
||||
Returns:
|
||||
List[TextContent]: Tool execution results
|
||||
|
||||
Example:
|
||||
scrape_codebase(
|
||||
directory="/path/to/repo",
|
||||
depth="deep",
|
||||
build_api_reference=True
|
||||
)
|
||||
"""
|
||||
directory = args.get("directory")
|
||||
if not directory:
|
||||
return [TextContent(type="text", text="❌ Error: directory parameter is required")]
|
||||
|
||||
output = args.get("output", "output/codebase/")
|
||||
depth = args.get("depth", "deep")
|
||||
languages = args.get("languages", "")
|
||||
file_patterns = args.get("file_patterns", "")
|
||||
build_api_reference = args.get("build_api_reference", False)
|
||||
|
||||
# Build command
|
||||
cmd = [sys.executable, "-m", "skill_seekers.cli.codebase_scraper"]
|
||||
cmd.extend(["--directory", directory])
|
||||
|
||||
if output:
|
||||
cmd.extend(["--output", output])
|
||||
if depth:
|
||||
cmd.extend(["--depth", depth])
|
||||
if languages:
|
||||
cmd.extend(["--languages", languages])
|
||||
if file_patterns:
|
||||
cmd.extend(["--file-patterns", file_patterns])
|
||||
if build_api_reference:
|
||||
cmd.append("--build-api-reference")
|
||||
|
||||
timeout = 600 # 10 minutes for codebase analysis
|
||||
|
||||
progress_msg = "🔍 Analyzing local codebase...\n"
|
||||
progress_msg += f"📁 Directory: {directory}\n"
|
||||
progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
|
||||
|
||||
stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
|
||||
|
||||
output_text = progress_msg + stdout
|
||||
|
||||
if returncode == 0:
|
||||
return [TextContent(type="text", text=output_text)]
|
||||
else:
|
||||
return [TextContent(type="text", text=f"{output_text}\n\n❌ Error:\n{stderr}")]
|
||||
|
||||
@@ -429,6 +429,38 @@ class TestScrapingTools:
|
||||
|
||||
assert isinstance(result, str)
|
||||
|
||||
async def test_scrape_codebase_basic(self, temp_dirs):
|
||||
"""Test basic codebase scraping."""
|
||||
# Create a dummy source directory
|
||||
src_dir = temp_dirs["output"] / "test_codebase"
|
||||
src_dir.mkdir()
|
||||
(src_dir / "test.py").write_text("def hello(): pass")
|
||||
|
||||
result = await server_fastmcp.scrape_codebase(
|
||||
directory=str(src_dir),
|
||||
output=str(temp_dirs["output"] / "codebase_analysis")
|
||||
)
|
||||
|
||||
assert isinstance(result, str)
|
||||
|
||||
async def test_scrape_codebase_with_options(self, temp_dirs):
|
||||
"""Test codebase scraping with various options."""
|
||||
# Create a dummy source directory
|
||||
src_dir = temp_dirs["output"] / "test_codebase2"
|
||||
src_dir.mkdir()
|
||||
(src_dir / "main.py").write_text("class Foo: pass")
|
||||
(src_dir / "utils.js").write_text("function bar() {}")
|
||||
|
||||
result = await server_fastmcp.scrape_codebase(
|
||||
directory=str(src_dir),
|
||||
depth="deep",
|
||||
languages="Python,JavaScript",
|
||||
file_patterns="*.py,*.js",
|
||||
build_api_reference=True
|
||||
)
|
||||
|
||||
assert isinstance(result, str)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# PACKAGING TOOLS TESTS (3 tools)
|
||||
|
||||
Reference in New Issue
Block a user