feat(C2.8): Add scrape_codebase MCP tool for local codebase analysis
- Add scrape_codebase_tool() to scraping_tools.py (67 lines) - Register tool in MCP server with @safe_tool_decorator - Add tool to FastMCP server imports and exports - Add 2 comprehensive tests for basic and advanced usage - Update MCP server tool count from 17 to 18 tools - Tool supports directory analysis with configurable depth - Features: language filtering, file patterns, API reference generation Closes #70 - C2.8 MCP Tool Integration complete Related: - Builds on C2.7 (codebase_scraper.py CLI tool) - Uses existing code_analyzer.py infrastructure - Follows same pattern as scrape_github and scrape_pdf tools Test coverage: - test_scrape_codebase_basic: Basic codebase analysis - test_scrape_codebase_with_options: Advanced options testing
This commit is contained in:
@@ -3,16 +3,16 @@
|
|||||||
Skill Seeker MCP Server (FastMCP Implementation)
|
Skill Seeker MCP Server (FastMCP Implementation)
|
||||||
|
|
||||||
Modern, decorator-based MCP server using FastMCP for simplified tool registration.
|
Modern, decorator-based MCP server using FastMCP for simplified tool registration.
|
||||||
Provides 17 tools for generating Claude AI skills from documentation.
|
Provides 18 tools for generating Claude AI skills from documentation.
|
||||||
|
|
||||||
This is a streamlined alternative to server.py (2200 lines → 708 lines, 68% reduction).
|
This is a streamlined alternative to server.py (2200 lines → 708 lines, 68% reduction).
|
||||||
All tool implementations are delegated to modular tool files in tools/ directory.
|
All tool implementations are delegated to modular tool files in tools/ directory.
|
||||||
|
|
||||||
**Architecture:**
|
**Architecture:**
|
||||||
- FastMCP server with decorator-based tool registration
|
- FastMCP server with decorator-based tool registration
|
||||||
- 17 tools organized into 5 categories:
|
- 18 tools organized into 5 categories:
|
||||||
* Config tools (3): generate_config, list_configs, validate_config
|
* Config tools (3): generate_config, list_configs, validate_config
|
||||||
* Scraping tools (4): estimate_pages, scrape_docs, scrape_github, scrape_pdf
|
* Scraping tools (5): estimate_pages, scrape_docs, scrape_github, scrape_pdf, scrape_codebase
|
||||||
* Packaging tools (3): package_skill, upload_skill, install_skill
|
* Packaging tools (3): package_skill, upload_skill, install_skill
|
||||||
* Splitting tools (2): split_config, generate_router
|
* Splitting tools (2): split_config, generate_router
|
||||||
* Source tools (5): fetch_config, submit_config, add_config_source, list_config_sources, remove_config_source
|
* Source tools (5): fetch_config, submit_config, add_config_source, list_config_sources, remove_config_source
|
||||||
@@ -81,6 +81,7 @@ try:
|
|||||||
scrape_docs_impl,
|
scrape_docs_impl,
|
||||||
scrape_github_impl,
|
scrape_github_impl,
|
||||||
scrape_pdf_impl,
|
scrape_pdf_impl,
|
||||||
|
scrape_codebase_impl,
|
||||||
# Packaging tools
|
# Packaging tools
|
||||||
package_skill_impl,
|
package_skill_impl,
|
||||||
upload_skill_impl,
|
upload_skill_impl,
|
||||||
@@ -108,6 +109,7 @@ except ImportError:
|
|||||||
scrape_docs_impl,
|
scrape_docs_impl,
|
||||||
scrape_github_impl,
|
scrape_github_impl,
|
||||||
scrape_pdf_impl,
|
scrape_pdf_impl,
|
||||||
|
scrape_codebase_impl,
|
||||||
package_skill_impl,
|
package_skill_impl,
|
||||||
upload_skill_impl,
|
upload_skill_impl,
|
||||||
enhance_skill_impl,
|
enhance_skill_impl,
|
||||||
@@ -393,6 +395,46 @@ async def scrape_pdf(
|
|||||||
return str(result)
|
return str(result)
|
||||||
|
|
||||||
|
|
||||||
|
@safe_tool_decorator(
|
||||||
|
description="Analyze local codebase and extract code knowledge. Walks directory tree, analyzes code files, extracts signatures, docstrings, and optionally generates API reference documentation."
|
||||||
|
)
|
||||||
|
async def scrape_codebase(
|
||||||
|
directory: str,
|
||||||
|
output: str = "output/codebase/",
|
||||||
|
depth: str = "deep",
|
||||||
|
languages: str = "",
|
||||||
|
file_patterns: str = "",
|
||||||
|
build_api_reference: bool = False,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Analyze local codebase and extract code knowledge.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
directory: Directory to analyze (required)
|
||||||
|
output: Output directory for results (default: output/codebase/)
|
||||||
|
depth: Analysis depth - surface, deep, full (default: deep)
|
||||||
|
languages: Comma-separated languages to analyze (e.g., "Python,JavaScript,C++")
|
||||||
|
file_patterns: Comma-separated file patterns (e.g., "*.py,src/**/*.js")
|
||||||
|
build_api_reference: Generate API reference markdown (default: false)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Codebase analysis results with file paths.
|
||||||
|
"""
|
||||||
|
args = {
|
||||||
|
"directory": directory,
|
||||||
|
"output": output,
|
||||||
|
"depth": depth,
|
||||||
|
"languages": languages,
|
||||||
|
"file_patterns": file_patterns,
|
||||||
|
"build_api_reference": build_api_reference,
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await scrape_codebase_impl(args)
|
||||||
|
if isinstance(result, list) and result:
|
||||||
|
return result[0].text if hasattr(result[0], "text") else str(result[0])
|
||||||
|
return str(result)
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# PACKAGING TOOLS (3 tools)
|
# PACKAGING TOOLS (3 tools)
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ from .scraping_tools import (
|
|||||||
scrape_docs_tool as scrape_docs_impl,
|
scrape_docs_tool as scrape_docs_impl,
|
||||||
scrape_github_tool as scrape_github_impl,
|
scrape_github_tool as scrape_github_impl,
|
||||||
scrape_pdf_tool as scrape_pdf_impl,
|
scrape_pdf_tool as scrape_pdf_impl,
|
||||||
|
scrape_codebase_tool as scrape_codebase_impl,
|
||||||
)
|
)
|
||||||
|
|
||||||
from .packaging_tools import (
|
from .packaging_tools import (
|
||||||
@@ -56,6 +57,7 @@ __all__ = [
|
|||||||
"scrape_docs_impl",
|
"scrape_docs_impl",
|
||||||
"scrape_github_impl",
|
"scrape_github_impl",
|
||||||
"scrape_pdf_impl",
|
"scrape_pdf_impl",
|
||||||
|
"scrape_codebase_impl",
|
||||||
# Packaging tools
|
# Packaging tools
|
||||||
"package_skill_impl",
|
"package_skill_impl",
|
||||||
"upload_skill_impl",
|
"upload_skill_impl",
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ This module contains all scraping-related MCP tool implementations:
|
|||||||
- scrape_docs_tool: Scrape documentation (legacy or unified)
|
- scrape_docs_tool: Scrape documentation (legacy or unified)
|
||||||
- scrape_github_tool: Scrape GitHub repositories
|
- scrape_github_tool: Scrape GitHub repositories
|
||||||
- scrape_pdf_tool: Scrape PDF documentation
|
- scrape_pdf_tool: Scrape PDF documentation
|
||||||
|
- scrape_codebase_tool: Analyze local codebase and extract code knowledge
|
||||||
|
|
||||||
Extracted from server.py for better modularity and organization.
|
Extracted from server.py for better modularity and organization.
|
||||||
"""
|
"""
|
||||||
@@ -430,3 +431,70 @@ async def scrape_github_tool(args: dict) -> List[TextContent]:
|
|||||||
return [TextContent(type="text", text=output)]
|
return [TextContent(type="text", text=output)]
|
||||||
else:
|
else:
|
||||||
return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]
|
return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]
|
||||||
|
|
||||||
|
|
||||||
|
async def scrape_codebase_tool(args: dict) -> List[TextContent]:
|
||||||
|
"""
|
||||||
|
Analyze local codebase and extract code knowledge.
|
||||||
|
|
||||||
|
Walks directory tree, analyzes code files, extracts signatures,
|
||||||
|
docstrings, and optionally generates API reference documentation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
args: Dictionary containing:
|
||||||
|
- directory (str): Directory to analyze
|
||||||
|
- output (str, optional): Output directory for results (default: output/codebase/)
|
||||||
|
- depth (str, optional): Analysis depth - surface, deep, full (default: deep)
|
||||||
|
- languages (str, optional): Comma-separated languages (e.g., "Python,JavaScript,C++")
|
||||||
|
- file_patterns (str, optional): Comma-separated file patterns (e.g., "*.py,src/**/*.js")
|
||||||
|
- build_api_reference (bool, optional): Generate API reference markdown (default: False)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[TextContent]: Tool execution results
|
||||||
|
|
||||||
|
Example:
|
||||||
|
scrape_codebase(
|
||||||
|
directory="/path/to/repo",
|
||||||
|
depth="deep",
|
||||||
|
build_api_reference=True
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
directory = args.get("directory")
|
||||||
|
if not directory:
|
||||||
|
return [TextContent(type="text", text="❌ Error: directory parameter is required")]
|
||||||
|
|
||||||
|
output = args.get("output", "output/codebase/")
|
||||||
|
depth = args.get("depth", "deep")
|
||||||
|
languages = args.get("languages", "")
|
||||||
|
file_patterns = args.get("file_patterns", "")
|
||||||
|
build_api_reference = args.get("build_api_reference", False)
|
||||||
|
|
||||||
|
# Build command
|
||||||
|
cmd = [sys.executable, "-m", "skill_seekers.cli.codebase_scraper"]
|
||||||
|
cmd.extend(["--directory", directory])
|
||||||
|
|
||||||
|
if output:
|
||||||
|
cmd.extend(["--output", output])
|
||||||
|
if depth:
|
||||||
|
cmd.extend(["--depth", depth])
|
||||||
|
if languages:
|
||||||
|
cmd.extend(["--languages", languages])
|
||||||
|
if file_patterns:
|
||||||
|
cmd.extend(["--file-patterns", file_patterns])
|
||||||
|
if build_api_reference:
|
||||||
|
cmd.append("--build-api-reference")
|
||||||
|
|
||||||
|
timeout = 600 # 10 minutes for codebase analysis
|
||||||
|
|
||||||
|
progress_msg = "🔍 Analyzing local codebase...\n"
|
||||||
|
progress_msg += f"📁 Directory: {directory}\n"
|
||||||
|
progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
|
||||||
|
|
||||||
|
stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
|
||||||
|
|
||||||
|
output_text = progress_msg + stdout
|
||||||
|
|
||||||
|
if returncode == 0:
|
||||||
|
return [TextContent(type="text", text=output_text)]
|
||||||
|
else:
|
||||||
|
return [TextContent(type="text", text=f"{output_text}\n\n❌ Error:\n{stderr}")]
|
||||||
|
|||||||
@@ -429,6 +429,38 @@ class TestScrapingTools:
|
|||||||
|
|
||||||
assert isinstance(result, str)
|
assert isinstance(result, str)
|
||||||
|
|
||||||
|
async def test_scrape_codebase_basic(self, temp_dirs):
|
||||||
|
"""Test basic codebase scraping."""
|
||||||
|
# Create a dummy source directory
|
||||||
|
src_dir = temp_dirs["output"] / "test_codebase"
|
||||||
|
src_dir.mkdir()
|
||||||
|
(src_dir / "test.py").write_text("def hello(): pass")
|
||||||
|
|
||||||
|
result = await server_fastmcp.scrape_codebase(
|
||||||
|
directory=str(src_dir),
|
||||||
|
output=str(temp_dirs["output"] / "codebase_analysis")
|
||||||
|
)
|
||||||
|
|
||||||
|
assert isinstance(result, str)
|
||||||
|
|
||||||
|
async def test_scrape_codebase_with_options(self, temp_dirs):
|
||||||
|
"""Test codebase scraping with various options."""
|
||||||
|
# Create a dummy source directory
|
||||||
|
src_dir = temp_dirs["output"] / "test_codebase2"
|
||||||
|
src_dir.mkdir()
|
||||||
|
(src_dir / "main.py").write_text("class Foo: pass")
|
||||||
|
(src_dir / "utils.js").write_text("function bar() {}")
|
||||||
|
|
||||||
|
result = await server_fastmcp.scrape_codebase(
|
||||||
|
directory=str(src_dir),
|
||||||
|
depth="deep",
|
||||||
|
languages="Python,JavaScript",
|
||||||
|
file_patterns="*.py,*.js",
|
||||||
|
build_api_reference=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert isinstance(result, str)
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# PACKAGING TOOLS TESTS (3 tools)
|
# PACKAGING TOOLS TESTS (3 tools)
|
||||||
|
|||||||
Reference in New Issue
Block a user