feat(C2.8): Add scrape_codebase MCP tool for local codebase analysis

- Add scrape_codebase_tool() to scraping_tools.py (67 lines)
- Register tool in MCP server with @safe_tool_decorator
- Add tool to FastMCP server imports and exports
- Add 2 comprehensive tests for basic and advanced usage
- Update MCP server tool count from 17 to 18 tools
- Tool supports directory analysis with configurable depth
- Features: language filtering, file patterns, API reference generation

Closes #70 - C2.8 MCP Tool Integration complete

Related:
- Builds on C2.7 (codebase_scraper.py CLI tool)
- Uses existing code_analyzer.py infrastructure
- Follows same pattern as scrape_github and scrape_pdf tools

Test coverage:
- test_scrape_codebase_basic: Basic codebase analysis
- test_scrape_codebase_with_options: Advanced options testing
This commit is contained in:
yusyus
2026-01-01 23:18:04 +03:00
parent ae96526d4b
commit a99f71e714
4 changed files with 147 additions and 3 deletions

View File

@@ -24,6 +24,7 @@ from .scraping_tools import (
scrape_docs_tool as scrape_docs_impl,
scrape_github_tool as scrape_github_impl,
scrape_pdf_tool as scrape_pdf_impl,
scrape_codebase_tool as scrape_codebase_impl,
)
from .packaging_tools import (
@@ -56,6 +57,7 @@ __all__ = [
"scrape_docs_impl",
"scrape_github_impl",
"scrape_pdf_impl",
"scrape_codebase_impl",
# Packaging tools
"package_skill_impl",
"upload_skill_impl",

View File

@@ -6,6 +6,7 @@ This module contains all scraping-related MCP tool implementations:
- scrape_docs_tool: Scrape documentation (legacy or unified)
- scrape_github_tool: Scrape GitHub repositories
- scrape_pdf_tool: Scrape PDF documentation
- scrape_codebase_tool: Analyze local codebase and extract code knowledge
Extracted from server.py for better modularity and organization.
"""
@@ -430,3 +431,70 @@ async def scrape_github_tool(args: dict) -> List[TextContent]:
return [TextContent(type="text", text=output)]
else:
return [TextContent(type="text", text=f"{output}\n\n❌ Error:\n{stderr}")]
async def scrape_codebase_tool(args: dict) -> List[TextContent]:
"""
Analyze local codebase and extract code knowledge.
Walks directory tree, analyzes code files, extracts signatures,
docstrings, and optionally generates API reference documentation.
Args:
args: Dictionary containing:
- directory (str): Directory to analyze
- output (str, optional): Output directory for results (default: output/codebase/)
- depth (str, optional): Analysis depth - surface, deep, full (default: deep)
- languages (str, optional): Comma-separated languages (e.g., "Python,JavaScript,C++")
- file_patterns (str, optional): Comma-separated file patterns (e.g., "*.py,src/**/*.js")
- build_api_reference (bool, optional): Generate API reference markdown (default: False)
Returns:
List[TextContent]: Tool execution results
Example:
scrape_codebase(
directory="/path/to/repo",
depth="deep",
build_api_reference=True
)
"""
directory = args.get("directory")
if not directory:
return [TextContent(type="text", text="❌ Error: directory parameter is required")]
output = args.get("output", "output/codebase/")
depth = args.get("depth", "deep")
languages = args.get("languages", "")
file_patterns = args.get("file_patterns", "")
build_api_reference = args.get("build_api_reference", False)
# Build command
cmd = [sys.executable, "-m", "skill_seekers.cli.codebase_scraper"]
cmd.extend(["--directory", directory])
if output:
cmd.extend(["--output", output])
if depth:
cmd.extend(["--depth", depth])
if languages:
cmd.extend(["--languages", languages])
if file_patterns:
cmd.extend(["--file-patterns", file_patterns])
if build_api_reference:
cmd.append("--build-api-reference")
timeout = 600 # 10 minutes for codebase analysis
progress_msg = "🔍 Analyzing local codebase...\n"
progress_msg += f"📁 Directory: {directory}\n"
progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
stdout, stderr, returncode = run_subprocess_with_streaming(cmd, timeout=timeout)
output_text = progress_msg + stdout
if returncode == 0:
return [TextContent(type="text", text=output_text)]
else:
return [TextContent(type="text", text=f"{output_text}\n\n❌ Error:\n{stderr}")]