feat(C2.8): Add scrape_codebase MCP tool for local codebase analysis

- Add scrape_codebase_tool() to scraping_tools.py (67 lines)
- Register tool in MCP server with @safe_tool_decorator
- Add tool to FastMCP server imports and exports
- Add 2 comprehensive tests for basic and advanced usage
- Update MCP server tool count from 17 to 18 tools
- Tool supports directory analysis with configurable depth
- Features: language filtering, file patterns, API reference generation

Closes #70 - C2.8 MCP Tool Integration complete

Related:
- Builds on C2.7 (codebase_scraper.py CLI tool)
- Uses existing code_analyzer.py infrastructure
- Follows same pattern as scrape_github and scrape_pdf tools

Test coverage:
- test_scrape_codebase_basic: Basic codebase analysis
- test_scrape_codebase_with_options: Advanced options testing
This commit is contained in:
yusyus
2026-01-01 23:18:04 +03:00
parent ae96526d4b
commit a99f71e714
4 changed files with 147 additions and 3 deletions

View File

@@ -3,16 +3,16 @@
Skill Seeker MCP Server (FastMCP Implementation)
Modern, decorator-based MCP server using FastMCP for simplified tool registration.
Provides 17 tools for generating Claude AI skills from documentation.
Provides 18 tools for generating Claude AI skills from documentation.
This is a streamlined alternative to server.py (2200 lines → 708 lines, 68% reduction).
All tool implementations are delegated to modular tool files in tools/ directory.
**Architecture:**
- FastMCP server with decorator-based tool registration
- 17 tools organized into 5 categories:
- 18 tools organized into 5 categories:
* Config tools (3): generate_config, list_configs, validate_config
* Scraping tools (4): estimate_pages, scrape_docs, scrape_github, scrape_pdf
* Scraping tools (5): estimate_pages, scrape_docs, scrape_github, scrape_pdf, scrape_codebase
* Packaging tools (3): package_skill, upload_skill, install_skill
* Splitting tools (2): split_config, generate_router
* Source tools (5): fetch_config, submit_config, add_config_source, list_config_sources, remove_config_source
@@ -81,6 +81,7 @@ try:
scrape_docs_impl,
scrape_github_impl,
scrape_pdf_impl,
scrape_codebase_impl,
# Packaging tools
package_skill_impl,
upload_skill_impl,
@@ -108,6 +109,7 @@ except ImportError:
scrape_docs_impl,
scrape_github_impl,
scrape_pdf_impl,
scrape_codebase_impl,
package_skill_impl,
upload_skill_impl,
enhance_skill_impl,
@@ -393,6 +395,46 @@ async def scrape_pdf(
return str(result)
@safe_tool_decorator(
description="Analyze local codebase and extract code knowledge. Walks directory tree, analyzes code files, extracts signatures, docstrings, and optionally generates API reference documentation."
)
async def scrape_codebase(
directory: str,
output: str = "output/codebase/",
depth: str = "deep",
languages: str = "",
file_patterns: str = "",
build_api_reference: bool = False,
) -> str:
"""
Analyze local codebase and extract code knowledge.
Args:
directory: Directory to analyze (required)
output: Output directory for results (default: output/codebase/)
depth: Analysis depth - surface, deep, full (default: deep)
languages: Comma-separated languages to analyze (e.g., "Python,JavaScript,C++")
file_patterns: Comma-separated file patterns (e.g., "*.py,src/**/*.js")
build_api_reference: Generate API reference markdown (default: false)
Returns:
Codebase analysis results with file paths.
"""
args = {
"directory": directory,
"output": output,
"depth": depth,
"languages": languages,
"file_patterns": file_patterns,
"build_api_reference": build_api_reference,
}
result = await scrape_codebase_impl(args)
if isinstance(result, list) and result:
return result[0].text if hasattr(result[0], "text") else str(result[0])
return str(result)
# ============================================================================
# PACKAGING TOOLS (3 tools)
# ============================================================================

View File

@@ -24,6 +24,7 @@ from .scraping_tools import (
scrape_docs_tool as scrape_docs_impl,
scrape_github_tool as scrape_github_impl,
scrape_pdf_tool as scrape_pdf_impl,
scrape_codebase_tool as scrape_codebase_impl,
)
from .packaging_tools import (
@@ -56,6 +57,7 @@ __all__ = [
"scrape_docs_impl",
"scrape_github_impl",
"scrape_pdf_impl",
"scrape_codebase_impl",
# Packaging tools
"package_skill_impl",
"upload_skill_impl",

View File

@@ -6,6 +6,7 @@ This module contains all scraping-related MCP tool implementations:
- scrape_docs_tool: Scrape documentation (legacy or unified)
- scrape_github_tool: Scrape GitHub repositories
- scrape_pdf_tool: Scrape PDF documentation
- scrape_codebase_tool: Analyze local codebase and extract code knowledge
Extracted from server.py for better modularity and organization.
"""
@@ -430,3 +431,70 @@ async def scrape_github_tool(args: dict) -> List[TextContent]:
return [TextContent(type="text", text=output)]
else:
return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]
async def scrape_codebase_tool(args: dict) -> List[TextContent]:
"""
Analyze local codebase and extract code knowledge.
Walks directory tree, analyzes code files, extracts signatures,
docstrings, and optionally generates API reference documentation.
Args:
args: Dictionary containing:
- directory (str): Directory to analyze
- output (str, optional): Output directory for results (default: output/codebase/)
- depth (str, optional): Analysis depth - surface, deep, full (default: deep)
- languages (str, optional): Comma-separated languages (e.g., "Python,JavaScript,C++")
- file_patterns (str, optional): Comma-separated file patterns (e.g., "*.py,src/**/*.js")
- build_api_reference (bool, optional): Generate API reference markdown (default: False)
Returns:
List[TextContent]: Tool execution results
Example:
scrape_codebase(
directory="/path/to/repo",
depth="deep",
build_api_reference=True
)
"""
directory = args.get("directory")
if not directory:
return [TextContent(type="text", text="❌ Error: directory parameter is required")]
output = args.get("output", "output/codebase/")
depth = args.get("depth", "deep")
languages = args.get("languages", "")
file_patterns = args.get("file_patterns", "")
build_api_reference = args.get("build_api_reference", False)
# Build command
cmd = [sys.executable, "-m", "skill_seekers.cli.codebase_scraper"]
cmd.extend(["--directory", directory])
if output:
cmd.extend(["--output", output])
if depth:
cmd.extend(["--depth", depth])
if languages:
cmd.extend(["--languages", languages])
if file_patterns:
cmd.extend(["--file-patterns", file_patterns])
if build_api_reference:
cmd.append("--build-api-reference")
timeout = 600 # 10 minutes for codebase analysis
progress_msg = "🔍 Analyzing local codebase...\n"
progress_msg += f"📁 Directory: {directory}\n"
progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
output_text = progress_msg + stdout
if returncode == 0:
return [TextContent(type="text", text=output_text)]
else:
return [TextContent(type="text", text=f"{output_text}\n\n❌ Error:\n{stderr}")]

View File

@@ -429,6 +429,38 @@ class TestScrapingTools:
assert isinstance(result, str)
async def test_scrape_codebase_basic(self, temp_dirs):
"""Test basic codebase scraping."""
# Create a dummy source directory
src_dir = temp_dirs["output"] / "test_codebase"
src_dir.mkdir()
(src_dir / "test.py").write_text("def hello(): pass")
result = await server_fastmcp.scrape_codebase(
directory=str(src_dir),
output=str(temp_dirs["output"] / "codebase_analysis")
)
assert isinstance(result, str)
async def test_scrape_codebase_with_options(self, temp_dirs):
"""Test codebase scraping with various options."""
# Create a dummy source directory
src_dir = temp_dirs["output"] / "test_codebase2"
src_dir.mkdir()
(src_dir / "main.py").write_text("class Foo: pass")
(src_dir / "utils.js").write_text("function bar() {}")
result = await server_fastmcp.scrape_codebase(
directory=str(src_dir),
depth="deep",
languages="Python,JavaScript",
file_patterns="*.py,*.js",
build_api_reference=True
)
assert isinstance(result, str)
# ============================================================================
# PACKAGING TOOLS TESTS (3 tools)