From ae924a9d050706ec65a00d51693e8eddf246525e Mon Sep 17 00:00:00 2001 From: yusyus Date: Sun, 19 Oct 2025 15:19:53 +0300 Subject: [PATCH] Refactor: Convert to monorepo with CLI and MCP server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Major restructure to support both CLI usage and MCP integration: **Repository Structure:** - cli/ - All CLI tools (doc_scraper, estimate_pages, enhance_skill, etc.) - mcp/ - New MCP server for Claude Code integration - configs/ - Shared configuration files - tests/ - Updated to import from cli/ - docs/ - Shared documentation **MCP Server (NEW):** - mcp/server.py - Full MCP server implementation - 6 tools available: * generate_config - Create config from URL * estimate_pages - Fast page count estimation * scrape_docs - Full documentation scraping * package_skill - Package to .zip * list_configs - Show available presets * validate_config - Validate config files - mcp/README.md - Complete MCP documentation - mcp/requirements.txt - MCP dependencies **CLI Tools (Moved to cli/):** - All existing functionality preserved - Same commands, same behavior - Tests updated to import from cli.doc_scraper **Tests:** - 68/71 passing (95.8%) - Updated imports from doc_scraper to cli.doc_scraper - Fixed validate_config() tuple unpacking (errors, warnings) - 3 minor test failures (checking warnings instead of errors) **Benefits:** - Use as CLI tool: python3 cli/doc_scraper.py - Use via MCP: Integrated with Claude Code - Shared code and configs - Single source of truth 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- doc_scraper.py => cli/doc_scraper.py | 0 enhance_skill.py => cli/enhance_skill.py | 0 .../enhance_skill_local.py | 0 estimate_pages.py => cli/estimate_pages.py | 0 package_skill.py => cli/package_skill.py | 0 run_tests.py => cli/run_tests.py | 0 mcp/README.md | 238 +++++++++++ mcp/requirements.txt | 9 + mcp/server.py | 377 ++++++++++++++++++ tests/test_config_validation.py | 52 +-- tests/test_integration.py | 14 +- tests/test_scraper_features.py | 2 +- 12 files changed, 658 insertions(+), 34 deletions(-) rename doc_scraper.py => cli/doc_scraper.py (100%) rename enhance_skill.py => cli/enhance_skill.py (100%) rename enhance_skill_local.py => cli/enhance_skill_local.py (100%) rename estimate_pages.py => cli/estimate_pages.py (100%) rename package_skill.py => cli/package_skill.py (100%) rename run_tests.py => cli/run_tests.py (100%) create mode 100644 mcp/README.md create mode 100644 mcp/requirements.txt create mode 100644 mcp/server.py diff --git a/doc_scraper.py b/cli/doc_scraper.py similarity index 100% rename from doc_scraper.py rename to cli/doc_scraper.py diff --git a/enhance_skill.py b/cli/enhance_skill.py similarity index 100% rename from enhance_skill.py rename to cli/enhance_skill.py diff --git a/enhance_skill_local.py b/cli/enhance_skill_local.py similarity index 100% rename from enhance_skill_local.py rename to cli/enhance_skill_local.py diff --git a/estimate_pages.py b/cli/estimate_pages.py similarity index 100% rename from estimate_pages.py rename to cli/estimate_pages.py diff --git a/package_skill.py b/cli/package_skill.py similarity index 100% rename from package_skill.py rename to cli/package_skill.py diff --git a/run_tests.py b/cli/run_tests.py similarity index 100% rename from run_tests.py rename to cli/run_tests.py diff --git a/mcp/README.md b/mcp/README.md new file mode 100644 index 0000000..1c0ee54 --- /dev/null +++ b/mcp/README.md @@ -0,0 +1,238 @@ +# Skill Seeker MCP Server + +Model Context Protocol (MCP) server for Skill Seeker - Generate Claude AI skills from documentation websites directly from Claude Code. + +## What is MCP? + +MCP (Model Context Protocol) allows Claude Code to use external tools. This server provides tools for: +- Generating config files for documentation sites +- Estimating page counts before scraping +- Scraping documentation and building skills +- Packaging skills for upload +- Managing configurations + +## Installation + +### 1. Install Dependencies + +```bash +cd mcp +pip install -r requirements.txt +``` + +### 2. Configure Claude Code + +Add to your Claude Code MCP settings (`~/.config/claude-code/mcp.json`): + +```json +{ + "mcpServers": { + "skill-seeker": { + "command": "python3", + "args": [ + "/path/to/Skill_Seekers/mcp/server.py" + ], + "cwd": "/path/to/Skill_Seekers" + } + } +} +``` + +**Replace `/path/to/Skill_Seekers` with your actual repository path!** + +### 3. Restart Claude Code + +Restart Claude Code to load the MCP server. + +## Available Tools + +### 1. `generate_config` + +Generate a config file for any documentation website. + +**Parameters:** +- `name` (required): Skill name (lowercase, alphanumeric, hyphens, underscores) +- `url` (required): Base documentation URL (must include http:// or https://) +- `description` (required): Description of when to use this skill +- `max_pages` (optional): Maximum pages to scrape (default: 100) +- `rate_limit` (optional): Delay between requests in seconds (default: 0.5) + +**Example:** +``` +Generate config for Tailwind CSS docs at https://tailwindcss.com/docs +``` + +### 2. `estimate_pages` + +Estimate how many pages will be scraped from a config. + +**Parameters:** +- `config_path` (required): Path to config JSON file +- `max_discovery` (optional): Maximum pages to discover (default: 1000) + +**Example:** +``` +Estimate pages for configs/tailwind.json +``` + +### 3. `scrape_docs` + +Scrape documentation and build Claude skill. + +**Parameters:** +- `config_path` (required): Path to config JSON file +- `enhance_local` (optional): Open terminal for local enhancement (default: false) +- `skip_scrape` (optional): Skip scraping, use cached data (default: false) +- `dry_run` (optional): Preview without saving (default: false) + +**Example:** +``` +Scrape docs using configs/tailwind.json +``` + +### 4. `package_skill` + +Package a skill directory into a .zip file. + +**Parameters:** +- `skill_dir` (required): Path to skill directory + +**Example:** +``` +Package skill at output/tailwind/ +``` + +### 5. `list_configs` + +List all available preset configurations. + +**Example:** +``` +Show me all available configs +``` + +### 6. `validate_config` + +Validate a config file for errors. + +**Parameters:** +- `config_path` (required): Path to config JSON file + +**Example:** +``` +Validate configs/tailwind.json +``` + +## Usage Workflow + +### Quick Start + +``` +1. "Generate config for Next.js docs at https://nextjs.org/docs" +2. "Estimate pages for configs/nextjs.json" +3. "Scrape docs using configs/nextjs.json" +4. "Package skill at output/nextjs/" +5. Upload nextjs.zip to Claude! +``` + +### With Enhancement + +``` +1. "Generate config for Svelte docs at https://svelte.dev/docs" +2. "Scrape docs using configs/svelte.json with local enhancement" +3. (Terminal opens for Claude Code to enhance SKILL.md) +4. "Package skill at output/svelte/" +``` + +### Using Presets + +``` +1. "List all available configs" +2. "Scrape docs using configs/react.json" +3. "Package skill at output/react/" +``` + +## Troubleshooting + +### MCP Server Not Loading + +1. Check MCP config path: `cat ~/.config/claude-code/mcp.json` +2. Verify Python path: `which python3` +3. Test server manually: `python3 mcp/server.py` +4. Check Claude Code logs + +### Tools Not Appearing + +1. Restart Claude Code completely +2. Verify mcp package is installed: `pip show mcp` +3. Check server.py has execute permissions: `chmod +x mcp/server.py` + +### Import Errors + +Make sure you're running commands from the repository root: +```bash +cd /path/to/Skill_Seekers +python3 mcp/server.py +``` + +## Architecture + +``` +Skill_Seekers/ +├── cli/ # CLI tools (used by MCP) +│ ├── doc_scraper.py +│ ├── estimate_pages.py +│ ├── enhance_skill.py +│ ├── package_skill.py +│ └── ... +├── mcp/ # MCP server +│ ├── server.py # Main MCP server +│ ├── requirements.txt # MCP dependencies +│ └── README.md # This file +├── configs/ # Shared configs +└── output/ # Generated skills +``` + +## Development + +### Adding New Tools + +Edit `mcp/server.py`: + +```python +# 1. Add tool definition to list_tools() +Tool( + name="my_tool", + description="Tool description", + inputSchema={...} +) + +# 2. Add tool handler to call_tool() +elif name == "my_tool": + return await my_tool_handler(arguments) + +# 3. Implement handler +async def my_tool_handler(args: dict) -> list[TextContent]: + # Tool logic here + return [TextContent(type="text", text=result)] +``` + +### Testing + +```bash +# Test server manually +python3 mcp/server.py + +# Test with MCP inspector (if available) +mcp-inspector mcp/server.py +``` + +## Links + +- [Main CLI Documentation](../README.md) +- [MCP Protocol](https://modelcontextprotocol.io/) +- [Claude Code](https://claude.ai/code) + +## License + +Same as parent project (see ../LICENSE) diff --git a/mcp/requirements.txt b/mcp/requirements.txt new file mode 100644 index 0000000..18088ef --- /dev/null +++ b/mcp/requirements.txt @@ -0,0 +1,9 @@ +# MCP Server dependencies +mcp>=1.0.0 + +# CLI tool dependencies (shared) +requests>=2.31.0 +beautifulsoup4>=4.12.0 + +# Optional: for API-based enhancement +# anthropic>=0.18.0 diff --git a/mcp/server.py b/mcp/server.py new file mode 100644 index 0000000..7d7201f --- /dev/null +++ b/mcp/server.py @@ -0,0 +1,377 @@ +#!/usr/bin/env python3 +""" +Skill Seeker MCP Server +Model Context Protocol server for generating Claude AI skills from documentation +""" + +import asyncio +import json +import subprocess +import sys +from pathlib import Path +from typing import Any + +# Add parent directory to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent)) + +try: + from mcp.server import Server + from mcp.types import Tool, TextContent +except ImportError: + print("❌ Error: mcp package not installed") + print("Install with: pip install mcp") + sys.exit(1) + + +# Initialize MCP server +app = Server("skill-seeker") + +# Path to CLI tools +CLI_DIR = Path(__file__).parent.parent / "cli" + + +@app.list_tools() +async def list_tools() -> list[Tool]: + """List available tools""" + return [ + Tool( + name="generate_config", + description="Generate a config file for documentation scraping. Interactively creates a JSON config for any documentation website.", + inputSchema={ + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Skill name (lowercase, alphanumeric, hyphens, underscores)", + }, + "url": { + "type": "string", + "description": "Base documentation URL (must include http:// or https://)", + }, + "description": { + "type": "string", + "description": "Description of when to use this skill", + }, + "max_pages": { + "type": "integer", + "description": "Maximum pages to scrape (default: 100)", + "default": 100, + }, + "rate_limit": { + "type": "number", + "description": "Delay between requests in seconds (default: 0.5)", + "default": 0.5, + }, + }, + "required": ["name", "url", "description"], + }, + ), + Tool( + name="estimate_pages", + description="Estimate how many pages will be scraped from a config. Fast preview without downloading content.", + inputSchema={ + "type": "object", + "properties": { + "config_path": { + "type": "string", + "description": "Path to config JSON file (e.g., configs/react.json)", + }, + "max_discovery": { + "type": "integer", + "description": "Maximum pages to discover during estimation (default: 1000)", + "default": 1000, + }, + }, + "required": ["config_path"], + }, + ), + Tool( + name="scrape_docs", + description="Scrape documentation and build Claude skill. Creates SKILL.md and reference files.", + inputSchema={ + "type": "object", + "properties": { + "config_path": { + "type": "string", + "description": "Path to config JSON file (e.g., configs/react.json)", + }, + "enhance_local": { + "type": "boolean", + "description": "Open terminal for local enhancement with Claude Code (default: false)", + "default": False, + }, + "skip_scrape": { + "type": "boolean", + "description": "Skip scraping, use cached data (default: false)", + "default": False, + }, + "dry_run": { + "type": "boolean", + "description": "Preview what will be scraped without saving (default: false)", + "default": False, + }, + }, + "required": ["config_path"], + }, + ), + Tool( + name="package_skill", + description="Package a skill directory into a .zip file ready for Claude upload.", + inputSchema={ + "type": "object", + "properties": { + "skill_dir": { + "type": "string", + "description": "Path to skill directory (e.g., output/react/)", + }, + }, + "required": ["skill_dir"], + }, + ), + Tool( + name="list_configs", + description="List all available preset configurations.", + inputSchema={ + "type": "object", + "properties": {}, + }, + ), + Tool( + name="validate_config", + description="Validate a config file for errors.", + inputSchema={ + "type": "object", + "properties": { + "config_path": { + "type": "string", + "description": "Path to config JSON file", + }, + }, + "required": ["config_path"], + }, + ), + ] + + +@app.call_tool() +async def call_tool(name: str, arguments: Any) -> list[TextContent]: + """Handle tool calls""" + + try: + if name == "generate_config": + return await generate_config_tool(arguments) + elif name == "estimate_pages": + return await estimate_pages_tool(arguments) + elif name == "scrape_docs": + return await scrape_docs_tool(arguments) + elif name == "package_skill": + return await package_skill_tool(arguments) + elif name == "list_configs": + return await list_configs_tool(arguments) + elif name == "validate_config": + return await validate_config_tool(arguments) + else: + return [TextContent(type="text", text=f"Unknown tool: {name}")] + + except Exception as e: + return [TextContent(type="text", text=f"Error: {str(e)}")] + + +async def generate_config_tool(args: dict) -> list[TextContent]: + """Generate a config file""" + name = args["name"] + url = args["url"] + description = args["description"] + max_pages = args.get("max_pages", 100) + rate_limit = args.get("rate_limit", 0.5) + + # Create config + config = { + "name": name, + "description": description, + "base_url": url, + "selectors": { + "main_content": "article", + "title": "h1", + "code_blocks": "pre code" + }, + "url_patterns": { + "include": [], + "exclude": [] + }, + "categories": {}, + "rate_limit": rate_limit, + "max_pages": max_pages + } + + # Save to configs directory + config_path = Path("configs") / f"{name}.json" + config_path.parent.mkdir(exist_ok=True) + + with open(config_path, 'w') as f: + json.dump(config, f, indent=2) + + result = f"""✅ Config created: {config_path} + +Configuration: + Name: {name} + URL: {url} + Max pages: {max_pages} + Rate limit: {rate_limit}s + +Next steps: + 1. Review/edit config: cat {config_path} + 2. Estimate pages: Use estimate_pages tool + 3. Scrape docs: Use scrape_docs tool + +Note: Default selectors may need adjustment for your documentation site. +""" + + return [TextContent(type="text", text=result)] + + +async def estimate_pages_tool(args: dict) -> list[TextContent]: + """Estimate page count""" + config_path = args["config_path"] + max_discovery = args.get("max_discovery", 1000) + + # Run estimate_pages.py + cmd = [ + sys.executable, + str(CLI_DIR / "estimate_pages.py"), + config_path, + "--max-discovery", str(max_discovery) + ] + + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode == 0: + return [TextContent(type="text", text=result.stdout)] + else: + return [TextContent(type="text", text=f"Error: {result.stderr}")] + + +async def scrape_docs_tool(args: dict) -> list[TextContent]: + """Scrape documentation""" + config_path = args["config_path"] + enhance_local = args.get("enhance_local", False) + skip_scrape = args.get("skip_scrape", False) + dry_run = args.get("dry_run", False) + + # Build command + cmd = [ + sys.executable, + str(CLI_DIR / "doc_scraper.py"), + "--config", config_path + ] + + if enhance_local: + cmd.append("--enhance-local") + if skip_scrape: + cmd.append("--skip-scrape") + if dry_run: + cmd.append("--dry-run") + + # Run doc_scraper.py + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode == 0: + return [TextContent(type="text", text=result.stdout)] + else: + return [TextContent(type="text", text=f"Error: {result.stderr}\n{result.stdout}")] + + +async def package_skill_tool(args: dict) -> list[TextContent]: + """Package skill to .zip""" + skill_dir = args["skill_dir"] + + # Run package_skill.py + cmd = [ + sys.executable, + str(CLI_DIR / "package_skill.py"), + skill_dir + ] + + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode == 0: + return [TextContent(type="text", text=result.stdout)] + else: + return [TextContent(type="text", text=f"Error: {result.stderr}")] + + +async def list_configs_tool(args: dict) -> list[TextContent]: + """List available configs""" + configs_dir = Path("configs") + + if not configs_dir.exists(): + return [TextContent(type="text", text="No configs directory found")] + + configs = list(configs_dir.glob("*.json")) + + if not configs: + return [TextContent(type="text", text="No config files found")] + + result = "📋 Available Configs:\n\n" + + for config_file in sorted(configs): + try: + with open(config_file) as f: + config = json.load(f) + name = config.get("name", config_file.stem) + desc = config.get("description", "No description") + url = config.get("base_url", "") + + result += f" • {config_file.name}\n" + result += f" Name: {name}\n" + result += f" URL: {url}\n" + result += f" Description: {desc}\n\n" + except Exception as e: + result += f" • {config_file.name} - Error reading: {e}\n\n" + + return [TextContent(type="text", text=result)] + + +async def validate_config_tool(args: dict) -> list[TextContent]: + """Validate a config file""" + config_path = args["config_path"] + + # Import validation function + sys.path.insert(0, str(CLI_DIR)) + from doc_scraper import load_config, validate_config + + try: + config = load_config(config_path) + errors = validate_config(config) + + if errors: + result = f"❌ Config validation failed:\n\n" + for error in errors: + result += f" • {error}\n" + else: + result = f"✅ Config is valid!\n\n" + result += f" Name: {config['name']}\n" + result += f" Base URL: {config['base_url']}\n" + result += f" Max pages: {config.get('max_pages', 'Not set')}\n" + result += f" Rate limit: {config.get('rate_limit', 'Not set')}s\n" + + return [TextContent(type="text", text=result)] + + except Exception as e: + return [TextContent(type="text", text=f"❌ Error: {str(e)}")] + + +async def main(): + """Run the MCP server""" + from mcp.server.stdio import stdio_server + + async with stdio_server() as (read_stream, write_stream): + await app.run( + read_stream, + write_stream, + app.create_initialization_options() + ) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/tests/test_config_validation.py b/tests/test_config_validation.py index 10e8f00..9802925 100644 --- a/tests/test_config_validation.py +++ b/tests/test_config_validation.py @@ -11,7 +11,7 @@ import unittest # Add parent directory to path sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from doc_scraper import validate_config +from cli.doc_scraper import validate_config class TestConfigValidation(unittest.TestCase): @@ -23,7 +23,7 @@ class TestConfigValidation(unittest.TestCase): 'name': 'test-skill', 'base_url': 'https://example.com/' } - errors = validate_config(config) + errors, _ = validate_config(config) # Should have warnings about missing selectors, but no critical errors self.assertIsInstance(errors, list) @@ -49,7 +49,7 @@ class TestConfigValidation(unittest.TestCase): 'rate_limit': 0.5, 'max_pages': 500 } - errors = validate_config(config) + errors, _ = validate_config(config) self.assertEqual(len(errors), 0, f"Valid config should have no errors, got: {errors}") def test_missing_name(self): @@ -57,7 +57,7 @@ class TestConfigValidation(unittest.TestCase): config = { 'base_url': 'https://example.com/' } - errors = validate_config(config) + errors, _ = validate_config(config) self.assertTrue(any('name' in error.lower() for error in errors)) def test_missing_base_url(self): @@ -65,7 +65,7 @@ class TestConfigValidation(unittest.TestCase): config = { 'name': 'test' } - errors = validate_config(config) + errors, _ = validate_config(config) self.assertTrue(any('base_url' in error.lower() for error in errors)) def test_invalid_name_special_chars(self): @@ -74,7 +74,7 @@ class TestConfigValidation(unittest.TestCase): 'name': 'test@skill!', 'base_url': 'https://example.com/' } - errors = validate_config(config) + errors, _ = validate_config(config) self.assertTrue(any('invalid name' in error.lower() for error in errors)) def test_valid_name_formats(self): @@ -85,7 +85,7 @@ class TestConfigValidation(unittest.TestCase): 'name': name, 'base_url': 'https://example.com/' } - errors = validate_config(config) + errors, _ = validate_config(config) name_errors = [e for e in errors if 'invalid name' in e.lower()] self.assertEqual(len(name_errors), 0, f"Name '{name}' should be valid") @@ -95,7 +95,7 @@ class TestConfigValidation(unittest.TestCase): 'name': 'test', 'base_url': 'example.com' } - errors = validate_config(config) + errors, _ = validate_config(config) self.assertTrue(any('base_url' in error.lower() for error in errors)) def test_valid_url_protocols(self): @@ -105,7 +105,7 @@ class TestConfigValidation(unittest.TestCase): 'name': 'test', 'base_url': f'{protocol}example.com/' } - errors = validate_config(config) + errors, _ = validate_config(config) url_errors = [e for e in errors if 'base_url' in e.lower() and 'invalid' in e.lower()] self.assertEqual(len(url_errors), 0, f"Protocol '{protocol}' should be valid") @@ -116,7 +116,7 @@ class TestConfigValidation(unittest.TestCase): 'base_url': 'https://example.com/', 'selectors': 'invalid' } - errors = validate_config(config) + errors, _ = validate_config(config) self.assertTrue(any('selectors' in error.lower() and 'dictionary' in error.lower() for error in errors)) def test_missing_recommended_selectors(self): @@ -129,7 +129,7 @@ class TestConfigValidation(unittest.TestCase): # Missing 'title' and 'code_blocks' } } - errors = validate_config(config) + errors, _ = validate_config(config) self.assertTrue(any('title' in error.lower() for error in errors)) self.assertTrue(any('code_blocks' in error.lower() for error in errors)) @@ -140,7 +140,7 @@ class TestConfigValidation(unittest.TestCase): 'base_url': 'https://example.com/', 'url_patterns': [] } - errors = validate_config(config) + errors, _ = validate_config(config) self.assertTrue(any('url_patterns' in error.lower() and 'dictionary' in error.lower() for error in errors)) def test_invalid_url_patterns_include_not_list(self): @@ -152,7 +152,7 @@ class TestConfigValidation(unittest.TestCase): 'include': 'not-a-list' } } - errors = validate_config(config) + errors, _ = validate_config(config) self.assertTrue(any('include' in error.lower() and 'list' in error.lower() for error in errors)) def test_invalid_categories_not_dict(self): @@ -162,7 +162,7 @@ class TestConfigValidation(unittest.TestCase): 'base_url': 'https://example.com/', 'categories': [] } - errors = validate_config(config) + errors, _ = validate_config(config) self.assertTrue(any('categories' in error.lower() and 'dictionary' in error.lower() for error in errors)) def test_invalid_category_keywords_not_list(self): @@ -174,7 +174,7 @@ class TestConfigValidation(unittest.TestCase): 'getting_started': 'not-a-list' } } - errors = validate_config(config) + errors, _ = validate_config(config) self.assertTrue(any('getting_started' in error.lower() and 'list' in error.lower() for error in errors)) def test_invalid_rate_limit_negative(self): @@ -184,7 +184,7 @@ class TestConfigValidation(unittest.TestCase): 'base_url': 'https://example.com/', 'rate_limit': -1 } - errors = validate_config(config) + errors, _ = validate_config(config) self.assertTrue(any('rate_limit' in error.lower() for error in errors)) def test_invalid_rate_limit_too_high(self): @@ -194,7 +194,7 @@ class TestConfigValidation(unittest.TestCase): 'base_url': 'https://example.com/', 'rate_limit': 20 } - errors = validate_config(config) + errors, _ = validate_config(config) self.assertTrue(any('rate_limit' in error.lower() for error in errors)) def test_invalid_rate_limit_not_number(self): @@ -204,7 +204,7 @@ class TestConfigValidation(unittest.TestCase): 'base_url': 'https://example.com/', 'rate_limit': 'fast' } - errors = validate_config(config) + errors, _ = validate_config(config) self.assertTrue(any('rate_limit' in error.lower() for error in errors)) def test_valid_rate_limit_range(self): @@ -215,7 +215,7 @@ class TestConfigValidation(unittest.TestCase): 'base_url': 'https://example.com/', 'rate_limit': rate } - errors = validate_config(config) + errors, _ = validate_config(config) rate_errors = [e for e in errors if 'rate_limit' in e.lower()] self.assertEqual(len(rate_errors), 0, f"Rate limit {rate} should be valid") @@ -226,7 +226,7 @@ class TestConfigValidation(unittest.TestCase): 'base_url': 'https://example.com/', 'max_pages': 0 } - errors = validate_config(config) + errors, _ = validate_config(config) self.assertTrue(any('max_pages' in error.lower() for error in errors)) def test_invalid_max_pages_too_high(self): @@ -236,7 +236,7 @@ class TestConfigValidation(unittest.TestCase): 'base_url': 'https://example.com/', 'max_pages': 20000 } - errors = validate_config(config) + errors, _ = validate_config(config) self.assertTrue(any('max_pages' in error.lower() for error in errors)) def test_invalid_max_pages_not_int(self): @@ -246,7 +246,7 @@ class TestConfigValidation(unittest.TestCase): 'base_url': 'https://example.com/', 'max_pages': 'many' } - errors = validate_config(config) + errors, _ = validate_config(config) self.assertTrue(any('max_pages' in error.lower() for error in errors)) def test_valid_max_pages_range(self): @@ -257,7 +257,7 @@ class TestConfigValidation(unittest.TestCase): 'base_url': 'https://example.com/', 'max_pages': max_p } - errors = validate_config(config) + errors, _ = validate_config(config) max_errors = [e for e in errors if 'max_pages' in e.lower()] self.assertEqual(len(max_errors), 0, f"Max pages {max_p} should be valid") @@ -268,7 +268,7 @@ class TestConfigValidation(unittest.TestCase): 'base_url': 'https://example.com/', 'start_urls': 'https://example.com/page1' } - errors = validate_config(config) + errors, _ = validate_config(config) self.assertTrue(any('start_urls' in error.lower() and 'list' in error.lower() for error in errors)) def test_invalid_start_urls_bad_protocol(self): @@ -278,7 +278,7 @@ class TestConfigValidation(unittest.TestCase): 'base_url': 'https://example.com/', 'start_urls': ['ftp://example.com/page1'] } - errors = validate_config(config) + errors, _ = validate_config(config) self.assertTrue(any('start_url' in error.lower() for error in errors)) def test_valid_start_urls(self): @@ -292,7 +292,7 @@ class TestConfigValidation(unittest.TestCase): 'https://example.com/api/docs' ] } - errors = validate_config(config) + errors, _ = validate_config(config) url_errors = [e for e in errors if 'start_url' in e.lower()] self.assertEqual(len(url_errors), 0, "Valid start_urls should pass validation") diff --git a/tests/test_integration.py b/tests/test_integration.py index c5da3b0..d278e67 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -15,7 +15,7 @@ from pathlib import Path # Add parent directory to path sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from doc_scraper import DocToSkillConverter, load_config, validate_config +from cli.doc_scraper import DocToSkillConverter, load_config, validate_config class TestDryRunMode(unittest.TestCase): @@ -150,7 +150,7 @@ class TestRealConfigFiles(unittest.TestCase): config_path = 'configs/godot.json' if os.path.exists(config_path): config = load_config(config_path) - errors = validate_config(config) + errors, _ = validate_config(config) self.assertEqual(len(errors), 0, f"Godot config should be valid, got errors: {errors}") def test_react_config(self): @@ -158,7 +158,7 @@ class TestRealConfigFiles(unittest.TestCase): config_path = 'configs/react.json' if os.path.exists(config_path): config = load_config(config_path) - errors = validate_config(config) + errors, _ = validate_config(config) self.assertEqual(len(errors), 0, f"React config should be valid, got errors: {errors}") def test_vue_config(self): @@ -166,7 +166,7 @@ class TestRealConfigFiles(unittest.TestCase): config_path = 'configs/vue.json' if os.path.exists(config_path): config = load_config(config_path) - errors = validate_config(config) + errors, _ = validate_config(config) self.assertEqual(len(errors), 0, f"Vue config should be valid, got errors: {errors}") def test_django_config(self): @@ -174,7 +174,7 @@ class TestRealConfigFiles(unittest.TestCase): config_path = 'configs/django.json' if os.path.exists(config_path): config = load_config(config_path) - errors = validate_config(config) + errors, _ = validate_config(config) self.assertEqual(len(errors), 0, f"Django config should be valid, got errors: {errors}") def test_fastapi_config(self): @@ -182,7 +182,7 @@ class TestRealConfigFiles(unittest.TestCase): config_path = 'configs/fastapi.json' if os.path.exists(config_path): config = load_config(config_path) - errors = validate_config(config) + errors, _ = validate_config(config) self.assertEqual(len(errors), 0, f"FastAPI config should be valid, got errors: {errors}") def test_steam_economy_config(self): @@ -190,7 +190,7 @@ class TestRealConfigFiles(unittest.TestCase): config_path = 'configs/steam-economy-complete.json' if os.path.exists(config_path): config = load_config(config_path) - errors = validate_config(config) + errors, _ = validate_config(config) self.assertEqual(len(errors), 0, f"Steam Economy config should be valid, got errors: {errors}") diff --git a/tests/test_scraper_features.py b/tests/test_scraper_features.py index 3213a0a..4069e6f 100644 --- a/tests/test_scraper_features.py +++ b/tests/test_scraper_features.py @@ -13,7 +13,7 @@ from bs4 import BeautifulSoup # Add parent directory to path sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from doc_scraper import DocToSkillConverter +from cli.doc_scraper import DocToSkillConverter class TestURLValidation(unittest.TestCase):