From 4f87de6b5657ad50bf627b383404008113217971 Mon Sep 17 00:00:00 2001 From: yusyus Date: Fri, 20 Mar 2026 22:12:23 +0300 Subject: [PATCH] fix: improve MiniMax adaptor from PR #318 review (#319) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add MiniMax AI as LLM platform adaptor Original implementation by octo-patch in PR #318. This commit includes comprehensive improvements and documentation. Code Improvements: - Fix API key validation to properly check JWT format (eyJ prefix) - Add specific exception handling for timeout and connection errors - Remove unused variable in upload method Dependencies: - Add MiniMax to [all-llms] extra group in pyproject.toml Tests: - Remove duplicate setUp method in integration test class - Add 4 new test methods: * test_package_excludes_backup_files * test_upload_success_mocked (with OpenAI mocking) * test_upload_network_error * test_upload_connection_error * test_validate_api_key_jwt_format - Update test_validate_api_key_valid to use JWT format keys - Fix test assertions for error message matching Documentation: - Create comprehensive MINIMAX_INTEGRATION.md guide (380+ lines) - Update MULTI_LLM_SUPPORT.md with MiniMax platform entry - Update 01-installation.md extras table - Update INTEGRATIONS.md AI platforms table - Update AGENTS.md adaptor import pattern example - Fix README.md platform count from 4 to 5 All tests pass (33 passed, 3 skipped) Lint checks pass Co-authored-by: octo-patch * fix: improve MiniMax adaptor β€” typed exceptions, key validation, tests, docs - Remove invalid "minimax" self-reference from all-llms dependency group - Use typed OpenAI exceptions (APITimeoutError, APIConnectionError) instead of string-matching on generic Exception - Replace incorrect JWT assumption in validate_api_key with length check - Use DEFAULT_API_ENDPOINT constant instead of hardcoded URLs (3 sites) - Add Path() cast for output_path before .is_dir() call - Add sys.modules mock to test_enhance_missing_library - Add mocked test_enhance_success with backup/content verification - Update test assertions for new exception types and key validation - Add MiniMax to __init__.py docstrings (module, get_adaptor, list_platforms) - Add MiniMax sections to MULTI_LLM_SUPPORT.md (install, format, API key, workflow example, export-to-all) Follows up on PR #318 by @octo-patch (feat: add MiniMax AI as LLM platform adaptor). Co-Authored-By: Octopus Co-Authored-By: Claude Opus 4.6 (1M context) --------- Co-authored-by: octo-patch Co-authored-by: Claude Opus 4.6 (1M context) --- AGENTS.md | 4 +- CLAUDE.md | 2511 ++----------------- README.md | 27 +- docs/getting-started/01-installation.md | 3 + docs/integrations/INTEGRATIONS.md | 3 +- docs/integrations/MINIMAX_INTEGRATION.md | 391 +++ docs/integrations/MULTI_LLM_SUPPORT.md | 49 +- pyproject.toml | 5 + src/skill_seekers/cli/adaptors/__init__.py | 14 +- src/skill_seekers/cli/adaptors/minimax.py | 503 ++++ tests/test_adaptors/test_minimax_adaptor.py | 517 ++++ uv.lock | 8 +- 12 files changed, 1676 insertions(+), 2359 deletions(-) create mode 100644 docs/integrations/MINIMAX_INTEGRATION.md create mode 100644 src/skill_seekers/cli/adaptors/minimax.py create mode 100644 tests/test_adaptors/test_minimax_adaptor.py diff --git a/AGENTS.md b/AGENTS.md index 1afdc9a..9f69468 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -51,7 +51,7 @@ mypy src/skill_seekers --show-error-codes --pretty **Pytest config** (from pyproject.toml): `addopts = "-v --tb=short --strict-markers"`, `asyncio_mode = "auto"`, `asyncio_default_fixture_loop_scope = "function"`. **Test markers:** `slow`, `integration`, `e2e`, `venv`, `bootstrap`, `benchmark`, `asyncio`. **Async tests:** use `@pytest.mark.asyncio`; asyncio_mode is `auto` so the decorator is often implicit. -**Test count:** 120 test files (107 in `tests/`, 13 in `tests/test_adaptors/`). +**Test count:** 123 test files (107 in `tests/`, 16 in `tests/test_adaptors/`). ## Code Style @@ -69,8 +69,10 @@ mypy src/skill_seekers --show-error-codes --pretty ```python try: from .claude import ClaudeAdaptor + from .minimax import MiniMaxAdaptor except ImportError: ClaudeAdaptor = None + MiniMaxAdaptor = None ``` ### Naming Conventions diff --git a/CLAUDE.md b/CLAUDE.md index 3615cf0..185526c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -2,2389 +2,218 @@ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. -## 🎯 Project Overview +## Project Overview -**Skill Seekers** is the **universal documentation preprocessor** for AI systems. It transforms documentation websites, GitHub repositories, PDFs, and EPUBs into production-ready formats for **16+ platforms**: RAG pipelines (LangChain, LlamaIndex, Haystack), vector databases (Pinecone, Chroma, Weaviate, FAISS, Qdrant), AI coding assistants (Cursor, Windsurf, Cline, Continue.dev), and LLM platforms (Claude, Gemini, OpenAI). +**Skill Seekers** converts documentation from 17 source types into production-ready formats for 16+ AI platforms (LLM platforms, RAG frameworks, vector databases, AI coding assistants). Published on PyPI as `skill-seekers`. -**Current Version:** v3.1.3 -**Python Version:** 3.10+ required -**Status:** Production-ready, published on PyPI -**Website:** https://skillseekersweb.com/ - Browse configs, share, and access documentation +**Version:** 3.3.0 | **Python:** 3.10+ | **Website:** https://skillseekersweb.com/ -## πŸ“š Table of Contents - -- [First Time Here?](#-first-time-here) - Start here! -- [Quick Commands](#-quick-command-reference-most-used) - Common workflows -- [Architecture](#️-architecture) - How it works -- [Development](#️-development-commands) - Building & testing -- [Testing](#-testing-guidelines) - Test strategy -- [Debugging](#-debugging-tips) - Troubleshooting -- [Contributing](#-where-to-make-changes) - How to add features - -## πŸ‘‹ First Time Here? - -**Complete this 3-minute setup to start contributing:** +## Essential Commands ```bash -# 1. Install package in editable mode (REQUIRED for development) +# REQUIRED before running tests or CLI (src/ layout) pip install -e . -# 2. Verify installation -python -c "import skill_seekers; print(skill_seekers.__version__)" # Should print: 3.1.0-dev +# Run all tests (NEVER skip - all must pass before commits) +pytest tests/ -v -# 3. Run a quick test -pytest tests/test_scraper_features.py::test_detect_language -v +# Fast iteration (skip slow MCP tests ~20min) +pytest tests/ --ignore=tests/test_mcp_fastmcp.py --ignore=tests/test_mcp_server.py --ignore=tests/test_install_skill_e2e.py -q -# 4. You're ready! Pick a task from the roadmap: -# https://github.com/users/yusufkaraaslan/projects/2 +# Single test +pytest tests/test_scraper_features.py::test_detect_language -vv -s + +# Code quality (must pass before push - matches CI) +uvx ruff check src/ tests/ +uvx ruff format --check src/ tests/ +mypy src/skill_seekers # continue-on-error in CI + +# Auto-fix lint/format issues +uvx ruff check --fix --unsafe-fixes src/ tests/ +uvx ruff format src/ tests/ + +# Build & publish +uv build +uv publish ``` -**Quick Navigation:** -- Building/Testing β†’ [Development Commands](#️-development-commands) -- Architecture β†’ [Core Design Pattern](#️-architecture) -- Common Issues β†’ [Common Pitfalls](#-common-pitfalls--solutions) -- Contributing β†’ See `CONTRIBUTING.md` +## CI Matrix -## ⚑ Quick Command Reference (Most Used) +Runs on push/PR to `main` or `development`. Lint job (Python 3.12, Ubuntu) + Test job (Ubuntu + macOS, Python 3.10/3.11/3.12, excludes macOS+3.10). Both must pass for merge. -**First time setup:** -```bash -pip install -e . # REQUIRED before running tests or CLI +## Git Workflow + +- **Main branch:** `main` (requires tests + 1 review) +- **Development branch:** `development` (default PR target, requires tests) +- **Feature branches:** `feature/{task-id}-{description}` from `development` +- PRs always target `development`, never `main` directly + +## Architecture + +### CLI: Git-style dispatcher + +Entry point `src/skill_seekers/cli/main.py` maps subcommands to modules. The `create` command auto-detects source type and is the recommended entry point for users. + +``` +skill-seekers create # Auto-detect: URL, owner/repo, ./path, file.pdf, etc. +skill-seekers [options] # Direct: scrape, github, pdf, word, epub, video, jupyter, html, openapi, asciidoc, pptx, rss, manpage, confluence, notion, chat +skill-seekers package # Package for platform (--target claude/gemini/openai/markdown, --format langchain/llama-index/haystack/chroma/faiss/weaviate/qdrant) ``` -**Running tests (NEVER skip - user requirement):** -```bash -pytest tests/ -v # All tests -pytest tests/test_scraper_features.py -v # Single file -pytest tests/ --cov=src/skill_seekers --cov-report=html # With coverage -``` +### Data Flow (5 phases) -**Code quality checks (matches CI):** -```bash -ruff check src/ tests/ # Lint -ruff format src/ tests/ # Format -mypy src/skill_seekers # Type check -``` +1. **Scrape** - Source-specific scraper extracts content to `output/{name}_data/pages/*.json` +2. **Build** - `build_skill()` categorizes pages, extracts patterns, generates `output/{name}/SKILL.md` +3. **Enhance** (optional) - LLM rewrites SKILL.md (`--enhance-level 0-3`, auto-detects API vs LOCAL mode) +4. **Package** - Platform adaptor formats output (`.zip`, `.tar.gz`, JSON, vector index) +5. **Upload** (optional) - Platform API upload -**Common workflows:** -```bash -# NEW unified create command (auto-detects source type) -skill-seekers create https://docs.react.dev/ -p quick -skill-seekers create facebook/react -p standard -skill-seekers create ./my-project -p comprehensive -skill-seekers create tutorial.pdf - -# Legacy commands (still supported) -skill-seekers scrape --config configs/react.json -skill-seekers github --repo facebook/react -skill-seekers analyze --directory . --comprehensive - -# Package for LLM platforms -skill-seekers package output/react/ --target claude -skill-seekers package output/react/ --target gemini -``` - -**RAG Pipeline workflows:** -```bash -# LangChain Documents -skill-seekers package output/react/ --format langchain - -# LlamaIndex TextNodes -skill-seekers package output/react/ --format llama-index - -# Haystack Documents -skill-seekers package output/react/ --format haystack - -# ChromaDB direct upload -skill-seekers package output/react/ --format chroma --upload - -# FAISS export -skill-seekers package output/react/ --format faiss - -# Weaviate/Qdrant upload (requires API keys) -skill-seekers package output/react/ --format weaviate --upload -skill-seekers package output/react/ --format qdrant --upload -``` - -**AI Coding Assistant workflows:** -```bash -# Cursor IDE -skill-seekers package output/react/ --target claude -cp output/react-claude/SKILL.md .cursorrules - -# Windsurf -cp output/react-claude/SKILL.md .windsurf/rules/react.md - -# Cline (VS Code) -cp output/react-claude/SKILL.md .clinerules - -# Continue.dev (universal IDE) -python examples/continue-dev-universal/context_server.py -# Configure in ~/.continue/config.json -``` - -**Cloud Storage:** -```bash -# Upload to S3 -skill-seekers cloud upload --provider s3 --bucket my-skills output/react.zip - -# Upload to GCS -skill-seekers cloud upload --provider gcs --bucket my-skills output/react.zip - -# Upload to Azure -skill-seekers cloud upload --provider azure --container my-skills output/react.zip -``` - -## πŸ—οΈ Architecture - -### Core Design Pattern: Platform Adaptors - -The codebase uses the **Strategy Pattern** with a factory method to support **16 platforms** across 4 categories: +### Platform Adaptor Pattern (Strategy + Factory) ``` src/skill_seekers/cli/adaptors/ -β”œβ”€β”€ __init__.py # Factory: get_adaptor(target/format) -β”œβ”€β”€ base.py # Abstract base class -# LLM Platforms (3) -β”œβ”€β”€ claude.py # Claude AI (ZIP + YAML) -β”œβ”€β”€ gemini.py # Google Gemini (tar.gz) -β”œβ”€β”€ openai.py # OpenAI ChatGPT (ZIP + Vector Store) -# RAG Frameworks (3) -β”œβ”€β”€ langchain.py # LangChain Documents -β”œβ”€β”€ llama_index.py # LlamaIndex TextNodes -β”œβ”€β”€ haystack.py # Haystack Documents -# Vector Databases (5) -β”œβ”€β”€ chroma.py # ChromaDB -β”œβ”€β”€ faiss_helpers.py # FAISS -β”œβ”€β”€ qdrant.py # Qdrant -β”œβ”€β”€ weaviate.py # Weaviate -# AI Coding Assistants (4 - via Claude format + config files) -# - Cursor, Windsurf, Cline, Continue.dev -# Generic (1) -β”œβ”€β”€ markdown.py # Generic Markdown (ZIP) -└── streaming_adaptor.py # Streaming data ingest +β”œβ”€β”€ __init__.py # Factory: get_adaptor(target=..., format=...) +β”œβ”€β”€ base_adaptor.py # Abstract base: package(), upload(), enhance(), export() +β”œβ”€β”€ claude_adaptor.py # --target claude +β”œβ”€β”€ gemini_adaptor.py # --target gemini +β”œβ”€β”€ openai_adaptor.py # --target openai +β”œβ”€β”€ markdown_adaptor.py # --target markdown +β”œβ”€β”€ langchain.py # --format langchain +β”œβ”€β”€ llama_index.py # --format llama-index +β”œβ”€β”€ haystack.py # --format haystack +β”œβ”€β”€ chroma.py # --format chroma +β”œβ”€β”€ faiss_helpers.py # --format faiss +β”œβ”€β”€ qdrant.py # --format qdrant +β”œβ”€β”€ weaviate.py # --format weaviate +└── streaming_adaptor.py # --format streaming ``` -**Key Methods:** -- `package(skill_dir, output_path)` - Platform-specific packaging -- `upload(package_path, api_key)` - Platform-specific upload (where applicable) -- `enhance(skill_dir, mode)` - AI enhancement with platform-specific models -- `export(skill_dir, format)` - Export to RAG/vector DB formats +`--target` = LLM platforms, `--format` = RAG/vector DBs. -### Data Flow (5 Phases) +### 17 Source Type Scrapers -1. **Scrape Phase** (`doc_scraper.py:scrape_all()`) - - BFS traversal from base_url - - Output: `output/{name}_data/pages/*.json` +Each in `src/skill_seekers/cli/{type}_scraper.py` with a `main()` entry point. The `create_command.py` uses `source_detector.py` to auto-route. New scrapers added in v3.2.0+: jupyter, html, openapi, asciidoc, pptx, rss, manpage, confluence, notion, chat. -2. **Build Phase** (`doc_scraper.py:build_skill()`) - - Load pages β†’ Categorize β†’ Extract patterns - - Output: `output/{name}/SKILL.md` + `references/*.md` - -3. **Enhancement Phase** (optional, `enhance_skill_local.py`) - - LLM analyzes references β†’ Rewrites SKILL.md - - Platform-specific models (Sonnet 4, Gemini 2.0, GPT-4o) - -4. **Package Phase** (`package_skill.py` β†’ adaptor) - - Platform adaptor packages in appropriate format - - Output: `.zip` or `.tar.gz` - -5. **Upload Phase** (optional, `upload_skill.py` β†’ adaptor) - - Upload via platform API - -### File Structure (src/ layout) - Key Files Only +### CLI Argument System ``` -src/skill_seekers/ -β”œβ”€β”€ cli/ # All CLI commands -β”‚ β”œβ”€β”€ main.py # ⭐ Git-style CLI dispatcher -β”‚ β”œβ”€β”€ doc_scraper.py # ⭐ Main scraper (~790 lines) -β”‚ β”‚ β”œβ”€β”€ scrape_all() # BFS traversal engine -β”‚ β”‚ β”œβ”€β”€ smart_categorize() # Category detection -β”‚ β”‚ └── build_skill() # SKILL.md generation -β”‚ β”œβ”€β”€ github_scraper.py # GitHub repo analysis -β”‚ β”œβ”€β”€ codebase_scraper.py # ⭐ Local analysis (C2.x+C3.x) -β”‚ β”œβ”€β”€ package_skill.py # Platform packaging -β”‚ β”œβ”€β”€ unified_scraper.py # Multi-source scraping -β”‚ β”œβ”€β”€ unified_codebase_analyzer.py # Three-stream GitHub+local analyzer -β”‚ β”œβ”€β”€ enhance_skill_local.py # AI enhancement (LOCAL mode) -β”‚ β”œβ”€β”€ enhance_status.py # Enhancement status monitoring -β”‚ β”œβ”€β”€ upload_skill.py # Upload to platforms -β”‚ β”œβ”€β”€ install_skill.py # Complete workflow automation -β”‚ β”œβ”€β”€ install_agent.py # Install to AI agent directories -β”‚ β”œβ”€β”€ pattern_recognizer.py # C3.1 Design pattern detection -β”‚ β”œβ”€β”€ test_example_extractor.py # C3.2 Test example extraction -β”‚ β”œβ”€β”€ how_to_guide_builder.py # C3.3 How-to guide generation -β”‚ β”œβ”€β”€ config_extractor.py # C3.4 Configuration extraction -β”‚ β”œβ”€β”€ generate_router.py # C3.5 Router skill generation -β”‚ β”œβ”€β”€ code_analyzer.py # Multi-language code analysis -β”‚ β”œβ”€β”€ api_reference_builder.py # API documentation builder -β”‚ β”œβ”€β”€ dependency_analyzer.py # Dependency graph analysis -β”‚ β”œβ”€β”€ signal_flow_analyzer.py # C3.10 Signal flow analysis (Godot) -β”‚ β”œβ”€β”€ pdf_scraper.py # PDF extraction -β”‚ β”œβ”€β”€ epub_scraper.py # EPUB extraction -β”‚ └── adaptors/ # ⭐ Platform adaptor pattern -β”‚ β”œβ”€β”€ __init__.py # Factory: get_adaptor() -β”‚ β”œβ”€β”€ base_adaptor.py # Abstract base -β”‚ β”œβ”€β”€ claude_adaptor.py # Claude AI -β”‚ β”œβ”€β”€ gemini_adaptor.py # Google Gemini -β”‚ β”œβ”€β”€ openai_adaptor.py # OpenAI ChatGPT -β”‚ β”œβ”€β”€ markdown_adaptor.py # Generic Markdown -β”‚ β”œβ”€β”€ langchain.py # LangChain RAG -β”‚ β”œβ”€β”€ llama_index.py # LlamaIndex RAG -β”‚ β”œβ”€β”€ haystack.py # Haystack RAG -β”‚ β”œβ”€β”€ chroma.py # ChromaDB -β”‚ β”œβ”€β”€ faiss_helpers.py # FAISS -β”‚ β”œβ”€β”€ qdrant.py # Qdrant -β”‚ β”œβ”€β”€ weaviate.py # Weaviate -β”‚ └── streaming_adaptor.py # Streaming data ingest -└── mcp/ # MCP server (26 tools) - β”œβ”€β”€ server_fastmcp.py # FastMCP server - └── tools/ # Tool implementations +src/skill_seekers/cli/ +β”œβ”€β”€ parsers/ # Subcommand parser registration +β”‚ └── create_parser.py # Progressive help disclosure (--help-web, --help-github, etc.) +β”œβ”€β”€ arguments/ # Argument definitions +β”‚ β”œβ”€β”€ common.py # add_all_standard_arguments() - shared across all scrapers +β”‚ └── create.py # UNIVERSAL_ARGUMENTS, WEB_ARGUMENTS, GITHUB_ARGUMENTS, etc. +└── source_detector.py # Auto-detect source type from input string ``` -**Most Modified Files (when contributing):** -- Platform adaptors: `src/skill_seekers/cli/adaptors/{platform}.py` -- Tests: `tests/test_{feature}.py` -- Configs: `configs/{framework}.json` +### C3.x Codebase Analysis Pipeline -## πŸ› οΈ Development Commands +Local codebase analysis features, all opt-out (`--skip-*` flags): +- C3.1 `pattern_recognizer.py` - Design pattern detection (10 GoF patterns, 9 languages) +- C3.2 `test_example_extractor.py` - Usage examples from tests +- C3.3 `how_to_guide_builder.py` - AI-enhanced educational guides +- C3.4 `config_extractor.py` - Configuration pattern extraction +- C3.5 `generate_router.py` - Architecture overview generation +- C3.10 `signal_flow_analyzer.py` - Godot signal flow analysis -### Setup +### MCP Server + +`src/skill_seekers/mcp/server_fastmcp.py` - 26+ tools via FastMCP. Transport: stdio (Claude Code) or HTTP (Cursor/Windsurf). Optional dependency: `pip install -e ".[mcp]"` + +### Enhancement Modes + +- **API mode** (if `ANTHROPIC_API_KEY` set): Direct Claude API calls +- **LOCAL mode** (fallback): Uses Claude Code CLI (free with Max plan) +- Control: `--enhance-level 0` (off) / `1` (SKILL.md only) / `2` (default, balanced) / `3` (full) + +## Key Implementation Details + +### Smart Categorization (`doc_scraper.py:smart_categorize()`) + +Scores pages against category keywords: 3 points for URL match, 2 for title, 1 for content. Threshold of 2+ required. Falls back to "other". + +### Content Extraction (`doc_scraper.py`) + +`FALLBACK_MAIN_SELECTORS` constant + `_find_main_content()` helper handle CSS selector fallback. Links are extracted from the full page before early return (not just main content). `body` is deliberately excluded from fallbacks. + +### Three-Stream GitHub Architecture (`unified_codebase_analyzer.py`) + +Stream 1: Code Analysis (AST, patterns, tests, guides). Stream 2: Documentation (README, docs/, wiki). Stream 3: Community (issues, PRs, metadata). Depth control: `basic` (1-2 min) or `c3x` (20-60 min). + +## Testing + +### Test markers (pytest.ini) ```bash -# Install in editable mode (required before tests due to src/ layout) -pip install -e . - -# Install with all platform dependencies -pip install -e ".[all-llms]" - -# Install specific platforms -pip install -e ".[gemini]" # Google Gemini -pip install -e ".[openai]" # OpenAI ChatGPT +pytest tests/ -v # Default: fast tests only +pytest tests/ -v -m slow # Include slow tests (>5s) +pytest tests/ -v -m integration # External services required +pytest tests/ -v -m e2e # Resource-intensive +pytest tests/ -v -m "not slow and not integration" # Fastest subset ``` -### Running Tests +### Known legitimate skips (~11) -**CRITICAL: Never skip tests** - User requires all tests to pass before commits. +- 2: chromadb incompatible with Python 3.14 (pydantic v1) +- 2: weaviate-client not installed +- 2: Qdrant not running (requires docker) +- 2: langchain/llama_index not installed +- 3: GITHUB_TOKEN not set + +### sys.modules gotcha + +`test_swift_detection.py` deletes `skill_seekers.cli` modules from `sys.modules`. It must save and restore both `sys.modules` entries AND parent package attributes (`setattr`). See the test file for the pattern. + +## Dependencies + +Core deps include `langchain`, `llama-index`, `anthropic`, `httpx`, `PyMuPDF`, `pydantic`. Platform-specific deps are optional: ```bash -# All tests (must run pip install -e . first!) -pytest tests/ -v - -# Specific test file -pytest tests/test_scraper_features.py -v - -# Multi-platform tests -pytest tests/test_install_multiplatform.py -v - -# With coverage -pytest tests/ --cov=src/skill_seekers --cov-report=term --cov-report=html - -# Single test -pytest tests/test_scraper_features.py::test_detect_language -v - -# MCP server tests -pytest tests/test_mcp_fastmcp.py -v +pip install -e ".[mcp]" # MCP server +pip install -e ".[gemini]" # Google Gemini +pip install -e ".[openai]" # OpenAI +pip install -e ".[docx]" # Word documents +pip install -e ".[epub]" # EPUB books +pip install -e ".[video]" # Video (lightweight) +pip install -e ".[video-full]"# Video (Whisper + visual) +pip install -e ".[jupyter]" # Jupyter notebooks +pip install -e ".[pptx]" # PowerPoint +pip install -e ".[rss]" # RSS/Atom feeds +pip install -e ".[confluence]"# Confluence wiki +pip install -e ".[notion]" # Notion pages +pip install -e ".[chroma]" # ChromaDB +pip install -e ".[all]" # Everything (except video-full) ``` -**Test Architecture:** -- 46 test files covering all features -- CI Matrix: Ubuntu + macOS, Python 3.10-3.13 -- **2,540 tests passing** (current), up from 700+ in v2.x -- Must run `pip install -e .` before tests (src/ layout requirement) -- Tests include create command integration tests, CLI refactor E2E tests +Dev dependencies use PEP 735 `[dependency-groups]` in pyproject.toml. -### Building & Publishing +## Environment Variables ```bash -# Build package (using uv - recommended) -uv build - -# Or using build -python -m build - -# Publish to PyPI -uv publish - -# Or using twine -python -m twine upload dist/* +ANTHROPIC_API_KEY=sk-ant-... # Claude AI (or compatible endpoint) +ANTHROPIC_BASE_URL=https://... # Optional: Claude-compatible API endpoint +GOOGLE_API_KEY=AIza... # Google Gemini (optional) +OPENAI_API_KEY=sk-... # OpenAI (optional) +GITHUB_TOKEN=ghp_... # Higher GitHub rate limits ``` -### Testing CLI Commands - -```bash -# Test configuration wizard (NEW: v2.7.0) -skill-seekers config --show # Show current configuration -skill-seekers config --github # GitHub token setup -skill-seekers config --test # Test connections - -# Test resume functionality (NEW: v2.7.0) -skill-seekers resume --list # List resumable jobs -skill-seekers resume --clean # Clean up old jobs - -# Test GitHub scraping with profiles (NEW: v2.7.0) -skill-seekers github --repo facebook/react --profile personal # Use specific profile -skill-seekers github --repo owner/repo --non-interactive # CI/CD mode - -# Test scraping (dry run) -skill-seekers scrape --config configs/react.json --dry-run - -# Test codebase analysis (C2.x features) -skill-seekers analyze --directory . --output output/codebase/ - -# Test pattern detection (C3.1) -skill-seekers patterns --file src/skill_seekers/cli/code_analyzer.py - -# Test how-to guide generation (C3.3) -skill-seekers how-to-guides output/test_examples.json --output output/guides/ - -# Test enhancement status monitoring -skill-seekers enhance-status output/react/ --watch - -# Video setup (auto-detect GPU and install deps) -skill-seekers video --setup - -# Test multi-platform packaging -skill-seekers package output/react/ --target gemini --dry-run - -# Test MCP server (stdio mode) -python -m skill_seekers.mcp.server_fastmcp - -# Test MCP server (HTTP mode) -python -m skill_seekers.mcp.server_fastmcp --transport http --port 8765 -``` - -### New v3.0.0 CLI Commands - -```bash -# Setup wizard (interactive configuration) -skill-seekers-setup - -# Cloud storage operations -skill-seekers cloud upload --provider s3 --bucket my-bucket output/react.zip -skill-seekers cloud download --provider gcs --bucket my-bucket react.zip -skill-seekers cloud list --provider azure --container my-container - -# Embedding server (for RAG pipelines) -skill-seekers embed --port 8080 --model sentence-transformers - -# Sync & incremental updates -skill-seekers sync --source https://docs.react.dev/ --target output/react/ -skill-seekers update --skill output/react/ --check-changes - -# Quality metrics & benchmarking -skill-seekers quality --skill output/react/ --report -skill-seekers benchmark --config configs/react.json --compare-versions - -# Multilingual support -skill-seekers multilang --detect output/react/ -skill-seekers multilang --translate output/react/ --target zh-CN - -# Streaming data ingest -skill-seekers stream --source docs/ --target output/streaming/ -``` - -## πŸ”§ Key Implementation Details - -### CLI Architecture (Git-style) - -**Entry point:** `src/skill_seekers/cli/main.py` - -The unified CLI modifies `sys.argv` and calls existing `main()` functions to maintain backward compatibility: - -```python -# Example: skill-seekers scrape --config react.json -# Transforms to: doc_scraper.main() with modified sys.argv -``` - -**Subcommands:** create, scrape, github, pdf, epub, unified, codebase, enhance, enhance-status, package, upload, estimate, install, install-agent, patterns, how-to-guides - -### NEW: Unified `create` Command - -**The recommended way to create skills** - Auto-detects source type and provides progressive help disclosure: - -```bash -# Auto-detection examples -skill-seekers create https://docs.react.dev/ # β†’ Web scraping -skill-seekers create facebook/react # β†’ GitHub analysis -skill-seekers create ./my-project # β†’ Local codebase -skill-seekers create tutorial.pdf # β†’ PDF extraction -skill-seekers create book.epub # β†’ EPUB extraction -skill-seekers create configs/react.json # β†’ Multi-source - -# Progressive help system -skill-seekers create --help # Shows universal args only (13 flags) -skill-seekers create --help-web # Shows web-specific options -skill-seekers create --help-github # Shows GitHub-specific options -skill-seekers create --help-local # Shows local analysis options -skill-seekers create --help-pdf # Shows PDF extraction options -skill-seekers create --help-epub # Shows EPUB extraction options -skill-seekers create --help-advanced # Shows advanced/rare options -skill-seekers create --help-all # Shows all 120+ flags - -# Universal flags work for ALL sources -skill-seekers create -p quick # Preset (-p shortcut) -skill-seekers create --enhance-level 2 # AI enhancement (0-3) -skill-seekers create --chunk-for-rag # RAG chunking -skill-seekers create --dry-run # Preview -``` - -**Key improvements:** -- **Single command** replaces scrape/github/analyze for most use cases -- **Smart detection** - No need to specify source type -- **Progressive disclosure** - Default help shows 13 flags, detailed help available -- **-p shortcut** - Quick preset selection (`-p quick|standard|comprehensive`) -- **Universal features** - RAG chunking, dry-run, presets work everywhere - -**Recent Additions:** -- `create` - **NEW:** Unified command with auto-detection and progressive help -- `codebase` - Local codebase analysis without GitHub API (C2.x + C3.x features) -- `enhance-status` - Monitor background/daemon enhancement processes -- `patterns` - Detect design patterns in code (C3.1) -- `how-to-guides` - Generate educational guides from tests (C3.3) - -### Platform Adaptor Usage - -```python -from skill_seekers.cli.adaptors import get_adaptor - -# Get platform-specific adaptor -adaptor = get_adaptor('gemini') # or 'claude', 'openai', 'markdown' - -# Package skill -adaptor.package(skill_dir='output/react/', output_path='output/') - -# Upload to platform -adaptor.upload( - package_path='output/react-gemini.tar.gz', - api_key=os.getenv('GOOGLE_API_KEY') -) - -# AI enhancement -adaptor.enhance(skill_dir='output/react/', mode='api') -``` - -### C3.x Codebase Analysis Features - -The project has comprehensive codebase analysis capabilities (C3.1-C3.8): - -**C3.1 Design Pattern Detection** (`pattern_recognizer.py`): -- Detects 10 common patterns: Singleton, Factory, Observer, Strategy, Decorator, Builder, Adapter, Command, Template Method, Chain of Responsibility -- Supports 9 languages: Python, JavaScript, TypeScript, C++, C, C#, Go, Rust, Java -- Three detection levels: surface (fast), deep (balanced), full (thorough) -- 87% precision, 80% recall on real-world projects - -**C3.2 Test Example Extraction** (`test_example_extractor.py`): -- Extracts real usage examples from test files -- Categories: instantiation, method_call, config, setup, workflow -- AST-based for Python, regex-based for 8 other languages -- Quality filtering with confidence scoring - -**C3.3 How-To Guide Generation** (`how_to_guide_builder.py`): -- Transforms test workflows into educational guides -- 5 AI enhancements: step descriptions, troubleshooting, prerequisites, next steps, use cases -- Dual-mode AI: API (fast) or LOCAL (free with Claude Code Max) -- 4 grouping strategies: AI tutorial group, file path, test name, complexity - -**C3.4 Configuration Pattern Extraction** (`config_extractor.py`): -- Extracts configuration patterns from codebases -- Identifies config files, env vars, CLI arguments -- AI enhancement for better organization - -**C3.5 Architectural Overview** (`generate_router.py`): -- Generates comprehensive ARCHITECTURE.md files -- Router skill generation for large documentation -- Quality improvements: 6.5/10 β†’ 8.5/10 (+31%) -- Integrates GitHub metadata, issues, labels - -**C3.6 AI Enhancement** (Claude API integration): -- Enhances C3.1-C3.5 with AI-powered insights -- Pattern explanations and improvement suggestions -- Test example context and best practices -- Guide enhancement with troubleshooting and prerequisites - -**C3.7 Architectural Pattern Detection** (`architectural_pattern_detector.py`): -- Detects 8 architectural patterns (MVC, MVVM, MVP, Repository, etc.) -- Framework detection (Django, Flask, Spring, React, Angular, etc.) -- Multi-file analysis with directory structure patterns -- Evidence-based detection with confidence scoring - -**C3.8 Standalone Codebase Scraper** (`codebase_scraper.py`): -```bash -# Quick analysis (1-2 min, basic features only) -skill-seekers analyze --directory /path/to/repo --quick - -# Comprehensive analysis (20-60 min, all features + AI) -skill-seekers analyze --directory . --comprehensive - -# With AI enhancement (auto-detects API or LOCAL) -skill-seekers analyze --directory . --enhance - -# Granular AI enhancement control (NEW) -skill-seekers analyze --directory . --enhance-level 1 # SKILL.md only -skill-seekers analyze --directory . --enhance-level 2 # + Architecture + Config + Docs -skill-seekers analyze --directory . --enhance-level 3 # Full enhancement (all features) - -# Disable specific features -skill-seekers analyze --directory . --skip-patterns --skip-how-to-guides -``` - -- Generates 300+ line standalone SKILL.md files from codebases -- All C3.x features integrated (patterns, tests, guides, config, architecture, docs) -- Complete codebase analysis without documentation scraping -- **NEW**: Granular AI enhancement control with `--enhance-level` (0-3) - -**C3.9 Project Documentation Extraction** (`codebase_scraper.py`): -- Extracts and categorizes all markdown files from the project -- Auto-detects categories: overview, architecture, guides, workflows, features, etc. -- Integrates documentation into SKILL.md with summaries -- AI enhancement (level 2+) adds topic extraction and cross-references -- Controlled by depth: surface=raw copy, deep=parse+summarize, full=AI-enhanced -- Default ON, use `--skip-docs` to disable - -**C3.10 Signal Flow Analysis for Godot Projects** (`signal_flow_analyzer.py`): -- Complete signal flow analysis system for event-driven Godot architectures -- Signal declaration extraction (detects `signal` keyword declarations) -- Connection mapping (tracks `.connect()` calls with targets and methods) -- Emission tracking (finds `.emit()` and `emit_signal()` calls) -- Real-world metrics: 208 signals, 634 connections, 298 emissions in test project -- Signal density metrics (signals per file) -- Event chain detection (signals triggering other signals) -- Signal pattern detection: - - **EventBus Pattern** (0.90 confidence): Centralized signal hub in autoload - - **Observer Pattern** (0.85 confidence): Multi-observer signals (3+ listeners) - - **Event Chains** (0.80 confidence): Cascading signal propagation -- Signal-based how-to guides (C3.10.1): - - AI-generated step-by-step usage guides (Connect β†’ Emit β†’ Handle) - - Real code examples from project - - Common usage locations - - Parameter documentation -- Outputs: `signal_flow.json`, `signal_flow.mmd` (Mermaid diagram), `signal_reference.md`, `signal_how_to_guides.md` -- Comprehensive Godot 4.x support: - - GDScript (.gd), Scene files (.tscn), Resources (.tres), Shaders (.gdshader) - - GDScript test extraction (GUT, gdUnit4, WAT frameworks) - - 396 test cases extracted in test project - - Framework detection (Unity, Unreal, Godot) - -**Key Architecture Decision (BREAKING in v2.5.2):** -- Changed from opt-in (`--build-*`) to opt-out (`--skip-*`) flags -- All analysis features now ON by default for maximum value -- Backward compatibility warnings for deprecated flags - -### Smart Categorization Algorithm - -Located in `doc_scraper.py:smart_categorize()`: -- Scores pages against category keywords -- 3 points for URL match, 2 for title, 1 for content -- Threshold of 2+ for categorization -- Auto-infers categories from URL segments if none provided -- Falls back to "other" category - -### Language Detection - -Located in `doc_scraper.py:detect_language()`: -1. CSS class attributes (`language-*`, `lang-*`) -2. Heuristics (keywords like `def`, `const`, `func`) - -### Configuration File Structure - -Configs (`configs/*.json`) define scraping behavior: - -```json -{ - "name": "framework-name", - "description": "When to use this skill", - "base_url": "https://docs.example.com/", - "selectors": { - "main_content": "article", // CSS selector - "title": "h1", - "code_blocks": "pre code" - }, - "url_patterns": { - "include": ["/docs"], - "exclude": ["/blog"] - }, - "categories": { - "getting_started": ["intro", "quickstart"], - "api": ["api", "reference"] - }, - "rate_limit": 0.5, - "max_pages": 500 -} -``` - -## πŸ§ͺ Testing Guidelines - -### Test Coverage Requirements - -- Core features: 100% coverage required -- Platform adaptors: Each platform has dedicated tests -- MCP tools: All 18 tools must be tested -- Integration tests: End-to-end workflows - -### Test Markers (from pytest.ini_options) - -The project uses pytest markers to categorize tests: - -```bash -# Run only fast unit tests (default) -pytest tests/ -v - -# Include slow tests (>5 seconds) -pytest tests/ -v -m slow - -# Run integration tests (requires external services) -pytest tests/ -v -m integration - -# Run end-to-end tests (resource-intensive, creates files) -pytest tests/ -v -m e2e - -# Run tests requiring virtual environment setup -pytest tests/ -v -m venv - -# Run bootstrap feature tests -pytest tests/ -v -m bootstrap - -# Skip slow and integration tests (fastest) -pytest tests/ -v -m "not slow and not integration" -``` - -### Test Execution Strategy - -**By default, only fast tests run**. Use markers to control test execution: - -```bash -# Default: Only fast tests (skip slow/integration/e2e) -pytest tests/ -v - -# Include slow tests (>5 seconds) -pytest tests/ -v -m slow - -# Include integration tests (requires external services) -pytest tests/ -v -m integration - -# Include resource-intensive e2e tests (creates files) -pytest tests/ -v -m e2e - -# Run ONLY fast tests (explicit) -pytest tests/ -v -m "not slow and not integration and not e2e" - -# Run everything (CI does this) -pytest tests/ -v -m "" -``` - -**When to use which:** -- **Local development:** Default (fast tests only) - `pytest tests/ -v` -- **Pre-commit:** Fast tests - `pytest tests/ -v` -- **Before PR:** Include slow + integration - `pytest tests/ -v -m "not e2e"` -- **CI validation:** All tests run automatically - -### Key Test Files - -- `test_scraper_features.py` - Core scraping functionality -- `test_mcp_server.py` - MCP integration (18 tools) -- `test_mcp_fastmcp.py` - FastMCP framework -- `test_unified.py` - Multi-source scraping -- `test_github_scraper.py` - GitHub analysis -- `test_pdf_scraper.py` - PDF extraction -- `test_epub_scraper.py` - EPUB extraction -- `test_install_multiplatform.py` - Multi-platform packaging -- `test_integration.py` - End-to-end workflows -- `test_install_skill.py` - One-command install -- `test_install_agent.py` - AI agent installation -- `conftest.py` - Test configuration (checks package installation) - -## 🌐 Environment Variables - -```bash -# Claude AI / Compatible APIs -# Option 1: Official Anthropic API (default) -export ANTHROPIC_API_KEY=sk-ant-... - -# Option 2: GLM-4.7 Claude-compatible API (or any compatible endpoint) -export ANTHROPIC_API_KEY=your-api-key -export ANTHROPIC_BASE_URL=https://glm-4-7-endpoint.com/v1 - -# Google Gemini (optional) -export GOOGLE_API_KEY=AIza... - -# OpenAI ChatGPT (optional) -export OPENAI_API_KEY=sk-... - -# GitHub (for higher rate limits) -export GITHUB_TOKEN=ghp_... - -# Private config repositories (optional) -export GITLAB_TOKEN=glpat-... -export GITEA_TOKEN=... -export BITBUCKET_TOKEN=... -``` - -**All AI enhancement features respect these settings**: -- `enhance_skill.py` - API mode SKILL.md enhancement -- `ai_enhancer.py` - C3.1/C3.2 pattern and test example enhancement -- `guide_enhancer.py` - C3.3 guide enhancement -- `config_enhancer.py` - C3.4 configuration enhancement -- `adaptors/claude.py` - Claude platform adaptor enhancement - -**Note**: Setting `ANTHROPIC_BASE_URL` allows you to use any Claude-compatible API endpoint, such as GLM-4.7 (ζ™Ίθ°± AI). - -## πŸ“¦ Package Structure (pyproject.toml) - -### Entry Points - -```toml -[project.scripts] -# Main unified CLI -skill-seekers = "skill_seekers.cli.main:main" - -# Individual tool entry points (Core) -skill-seekers-config = "skill_seekers.cli.config_command:main" # v2.7.0 Configuration wizard -skill-seekers-resume = "skill_seekers.cli.resume_command:main" # v2.7.0 Resume interrupted jobs -skill-seekers-scrape = "skill_seekers.cli.doc_scraper:main" -skill-seekers-github = "skill_seekers.cli.github_scraper:main" -skill-seekers-pdf = "skill_seekers.cli.pdf_scraper:main" -skill-seekers-epub = "skill_seekers.cli.epub_scraper:main" -skill-seekers-unified = "skill_seekers.cli.unified_scraper:main" -skill-seekers-codebase = "skill_seekers.cli.codebase_scraper:main" # C2.x Local codebase analysis -skill-seekers-enhance = "skill_seekers.cli.enhance_skill_local:main" -skill-seekers-enhance-status = "skill_seekers.cli.enhance_status:main" # Status monitoring -skill-seekers-package = "skill_seekers.cli.package_skill:main" -skill-seekers-upload = "skill_seekers.cli.upload_skill:main" -skill-seekers-estimate = "skill_seekers.cli.estimate_pages:main" -skill-seekers-install = "skill_seekers.cli.install_skill:main" -skill-seekers-install-agent = "skill_seekers.cli.install_agent:main" -skill-seekers-patterns = "skill_seekers.cli.pattern_recognizer:main" # C3.1 Pattern detection -skill-seekers-how-to-guides = "skill_seekers.cli.how_to_guide_builder:main" # C3.3 Guide generation -skill-seekers-workflows = "skill_seekers.cli.workflows_command:main" # NEW: Workflow preset management -skill-seekers-video = "skill_seekers.cli.video_scraper:main" # Video scraping pipeline (use --setup to install deps) - -# New v3.0.0 Entry Points -skill-seekers-setup = "skill_seekers.cli.setup_wizard:main" # NEW: v3.0.0 Setup wizard -skill-seekers-cloud = "skill_seekers.cli.cloud_storage_cli:main" # NEW: v3.0.0 Cloud storage -skill-seekers-embed = "skill_seekers.embedding.server:main" # NEW: v3.0.0 Embedding server -skill-seekers-sync = "skill_seekers.cli.sync_cli:main" # NEW: v3.0.0 Sync & monitoring -skill-seekers-benchmark = "skill_seekers.cli.benchmark_cli:main" # NEW: v3.0.0 Benchmarking -skill-seekers-stream = "skill_seekers.cli.streaming_ingest:main" # NEW: v3.0.0 Streaming ingest -skill-seekers-update = "skill_seekers.cli.incremental_updater:main" # NEW: v3.0.0 Incremental updates -skill-seekers-multilang = "skill_seekers.cli.multilang_support:main" # NEW: v3.0.0 Multilingual -skill-seekers-quality = "skill_seekers.cli.quality_metrics:main" # NEW: v3.0.0 Quality metrics -``` - -### Optional Dependencies - -**Project uses PEP 735 `[dependency-groups]` (Python 3.13+)**: -- Replaces deprecated `tool.uv.dev-dependencies` -- Dev dependencies: `[dependency-groups] dev = [...]` in pyproject.toml -- Install with: `pip install -e .` (installs only core deps) -- Install dev deps: See CI workflow or manually install pytest, ruff, mypy - -**Note on video dependencies:** `easyocr` and GPU-specific PyTorch builds are **not** included in the `video-full` optional dependency group. They are installed at runtime by `skill-seekers video --setup`, which auto-detects the GPU (CUDA/ROCm/MPS/CPU) and installs the correct builds. - -```toml -[project.optional-dependencies] -gemini = ["google-generativeai>=0.8.0"] -openai = ["openai>=1.0.0"] -all-llms = ["google-generativeai>=0.8.0", "openai>=1.0.0"] - -[dependency-groups] # PEP 735 (replaces tool.uv.dev-dependencies) -dev = [ - "pytest>=8.4.2", - "pytest-asyncio>=0.24.0", - "pytest-cov>=7.0.0", - "coverage>=7.11.0", -] -``` - -## 🚨 Critical Development Notes - -### Must Run Before Tests - -```bash -# REQUIRED: Install package before running tests -pip install -e . - -# Why: src/ layout requires package installation -# Without this, imports will fail -``` - -### Never Skip Tests - -Per user instructions in `~/.claude/CLAUDE.md`: -- "never skip any test. always make sure all test pass" -- All 2,540 tests must pass before commits -- Run full test suite: `pytest tests/ -v` -- New tests added for create command and CLI refactor work - -### Platform-Specific Dependencies - -Platform dependencies are optional (install only what you need): - -```bash -# Install specific platform support -pip install -e ".[gemini]" # Google Gemini -pip install -e ".[openai]" # OpenAI ChatGPT -pip install -e ".[chroma]" # ChromaDB -pip install -e ".[weaviate]" # Weaviate -pip install -e ".[s3]" # AWS S3 -pip install -e ".[gcs]" # Google Cloud Storage -pip install -e ".[azure]" # Azure Blob Storage -pip install -e ".[mcp]" # MCP integration -pip install -e ".[all]" # Everything (16 platforms + cloud + embedding) - -# Or install from PyPI: -pip install skill-seekers[gemini] # Google Gemini support -pip install skill-seekers[openai] # OpenAI ChatGPT support -pip install skill-seekers[all-llms] # All LLM platforms -pip install skill-seekers[chroma] # ChromaDB support -pip install skill-seekers[weaviate] # Weaviate support -pip install skill-seekers[s3] # AWS S3 support -pip install skill-seekers[all] # All optional dependencies -``` - -### AI Enhancement Modes - -AI enhancement transforms basic skills (2-3/10) into production-ready skills (8-9/10). Two modes available: - -**API Mode** (default if ANTHROPIC_API_KEY is set): -- Direct Claude API calls (fast, efficient) -- Cost: ~$0.15-$0.30 per skill -- Perfect for CI/CD automation -- Requires: `export ANTHROPIC_API_KEY=sk-ant-...` - -**LOCAL Mode** (fallback if no API key): -- Uses Claude Code CLI (your existing Max plan) -- Free! No API charges -- 4 execution modes: - - Headless (default): Foreground, waits for completion - - Background (`--background`): Returns immediately - - Daemon (`--daemon`): Fully detached with nohup - - Terminal (`--interactive-enhancement`): Opens new terminal (macOS) -- Status monitoring: `skill-seekers enhance-status output/react/ --watch` -- Timeout configuration: `--timeout 300` (seconds) - -### Enhancement Flag Consolidation (Phase 1) - -**IMPORTANT CHANGE:** Three enhancement flags have been unified into a single granular control: - -**Old flags (deprecated):** -- `--enhance` - Enable AI enhancement -- `--enhance-local` - Use LOCAL mode (Claude Code) -- `--api-key KEY` - Anthropic API key - -**New unified flag:** -- `--enhance-level LEVEL` - Granular AI enhancement control (0-3, default: 2) - - `0` - Disabled, no AI enhancement - - `1` - SKILL.md only (core documentation) - - `2` - + Architecture + Config + Docs (default, balanced) - - `3` - Full enhancement (all features, comprehensive) - -**Auto-detection:** Mode (API vs LOCAL) is auto-detected: -- If `ANTHROPIC_API_KEY` is set β†’ API mode -- Otherwise β†’ LOCAL mode (Claude Code Max) - -**Examples:** -```bash -# Auto-detect mode, default enhancement level (2) -skill-seekers create https://docs.react.dev/ - -# Disable enhancement -skill-seekers create facebook/react --enhance-level 0 - -# SKILL.md only (fast) -skill-seekers create ./my-project --enhance-level 1 - -# Full enhancement (comprehensive) -skill-seekers create tutorial.pdf --enhance-level 3 - -# Force LOCAL mode with specific level -skill-seekers enhance output/react/ --mode LOCAL --enhance-level 2 - -# Background with status monitoring -skill-seekers enhance output/react/ --background -skill-seekers enhance-status output/react/ --watch -``` - -**Migration:** Old flags still work with deprecation warnings, will be removed in v4.0.0. - -See `docs/ENHANCEMENT_MODES.md` for detailed documentation. - -### Git Workflow - -**Git Workflow Notes:** -- Main branch: `main` -- Development branch: `development` -- Always create feature branches from `development` -- Branch naming: `feature/{task-id}-{description}` or `feature/{category}` - -**To see current status:** `git status` - -### CI/CD Pipeline - -The project has GitHub Actions workflows in `.github/workflows/`: - -**tests.yml** - Runs on every push and PR to `main` or `development`: - -1. **Lint Job** (Python 3.12, Ubuntu): - - `ruff check src/ tests/` - Code linting with GitHub annotations - - `ruff format --check src/ tests/` - Format validation - - `mypy src/skill_seekers` - Type checking (continue-on-error) - -2. **Test Job** (Matrix): - - **OS:** Ubuntu + macOS - - **Python:** 3.10, 3.11, 3.12 - - **Exclusions:** macOS + Python 3.10 (speed optimization) - - **Steps:** - - Install dependencies + `pip install -e .` - - Run CLI tests (scraper, config, integration) - - Run MCP server tests - - Generate coverage report β†’ Upload to Codecov - -3. **Summary Job** - Single status check for branch protection - - Ensures both lint and test jobs succeed - - Provides single "All Checks Complete" status - -**release.yml** - Triggers on version tags (e.g., `v2.9.0`): -- Builds package with `uv build` -- Publishes to PyPI with `uv publish` -- Creates GitHub release - -**Local Pre-Commit Validation** - -Run the same checks as CI before pushing: - -```bash -# 1. Code quality (matches lint job) - WITH AUTO-FIX -uvx ruff check --fix --unsafe-fixes src/ tests/ # Auto-fix issues -uvx ruff format src/ tests/ # Auto-format -uvx ruff check src/ tests/ # Verify clean -uvx ruff format --check src/ tests/ # Verify formatted -mypy src/skill_seekers - -# 2. Tests (matches test job) -pip install -e . -pytest tests/ -v --cov=src/skill_seekers --cov-report=term - -# 3. If all pass, you're good to push! -git add -A # Stage any auto-fixes -git commit --amend --no-edit # Add fixes to commit (or new commit) -git push origin feature/my-feature -``` - -**Branch Protection Rules:** -- **main:** Requires tests + 1 review, only maintainers merge -- **development:** Requires tests to pass, default target for PRs - -**Common CI Failure Patterns and Fixes** - -If CI fails after your changes, follow this debugging checklist: - -```bash -# 1. Fix linting errors automatically -uvx ruff check --fix --unsafe-fixes src/ tests/ - -# 2. Fix formatting issues -uvx ruff format src/ tests/ - -# 3. Check for remaining issues -uvx ruff check src/ tests/ -uvx ruff format --check src/ tests/ - -# 4. Verify tests pass locally -pip install -e . -pytest tests/ -v - -# 5. Push fixes -git add -A -git commit -m "fix: resolve CI linting/formatting issues" -git push -``` - -**Critical dependency patterns to check:** -- **MCP version mismatch**: Ensure `requirements.txt` and `pyproject.toml` have matching MCP versions -- **Missing module-level imports**: If a tool file imports a module at top level (e.g., `import yaml`), that module MUST be in core dependencies -- **Try/except ImportError**: Silent failures in try/except blocks can hide missing dependencies - -**Timing-sensitive tests:** -- Benchmark tests may fail on slower CI runners (macOS) -- If a test times out or exceeds threshold only in CI, consider relaxing the threshold -- Local passing doesn't guarantee CI passing for performance tests - -## 🚨 Common Pitfalls & Solutions - -### 1. Import Errors -**Problem:** `ModuleNotFoundError: No module named 'skill_seekers'` - -**Solution:** Must install package first due to src/ layout -```bash -pip install -e . -``` - -**Why:** The src/ layout prevents imports from repo root. Package must be installed. - -### 2. Tests Fail with "No module named..." -**Problem:** Package not installed in test environment - -**Solution:** CI runs `pip install -e .` before tests - do the same locally -```bash -pip install -e . -pytest tests/ -v -``` - -### 3. Platform-Specific Dependencies Not Found -**Problem:** `ModuleNotFoundError: No module named 'google.generativeai'` - -**Solution:** Install platform-specific dependencies -```bash -pip install -e ".[gemini]" # For Gemini -pip install -e ".[openai]" # For OpenAI -pip install -e ".[all-llms]" # For all platforms -``` - -### 4. Git Branch Confusion -**Problem:** PR targets `main` instead of `development` - -**Solution:** Always create PRs targeting `development` branch -```bash -git checkout development -git pull upstream development -git checkout -b feature/my-feature -# ... make changes ... -git push origin feature/my-feature -# Create PR: feature/my-feature β†’ development -``` - -**Important:** See `CONTRIBUTING.md` for complete branch workflow. - -### 5. Tests Pass Locally But Fail in CI -**Problem:** Different Python version or missing dependency - -**Solution:** Test with multiple Python versions locally -```bash -# CI tests: Python 3.10, 3.11, 3.12 on Ubuntu + macOS -# Use pyenv or docker to test locally: -pyenv install 3.10.13 3.11.7 3.12.1 - -pyenv local 3.10.13 -pip install -e . && pytest tests/ -v - -pyenv local 3.11.7 -pip install -e . && pytest tests/ -v - -pyenv local 3.12.1 -pip install -e . && pytest tests/ -v -``` - -### 6. Enhancement Not Working -**Problem:** AI enhancement fails or hangs - -**Solutions:** -```bash -# Check if API key is set -echo $ANTHROPIC_API_KEY - -# Try LOCAL mode instead (uses Claude Code Max, no API key needed) -skill-seekers enhance output/react/ --mode LOCAL - -# Monitor enhancement status for background jobs -skill-seekers enhance-status output/react/ --watch -``` - -### 7. Rate Limit Errors from GitHub -**Problem:** `403 Forbidden` from GitHub API - -**Solutions:** -```bash -# Check current rate limit -curl -H "Authorization: token $GITHUB_TOKEN" https://api.github.com/rate_limit - -# Configure multiple GitHub profiles (recommended) -skill-seekers config --github - -# Use specific profile -skill-seekers github --repo owner/repo --profile work - -# Test all configured tokens -skill-seekers config --test -``` - -### 8. Confused About Command Options -**Problem:** "Too many flags!" or "Which flags work with which sources?" - -**Solution:** Use the progressive disclosure help system in the `create` command: -```bash -# Start with universal options (13 flags) -skill-seekers create --help - -# Need web scraping options? -skill-seekers create --help-web - -# GitHub-specific flags? -skill-seekers create --help-github - -# See ALL options (120+ flags)? -skill-seekers create --help-all - -# Quick preset shortcut -skill-seekers create -p quick -skill-seekers create -p standard -skill-seekers create -p comprehensive -``` - -**Why:** The create command shows only relevant flags by default to reduce cognitive load. - -**Legacy commands** (scrape, github, analyze) show all flags in one help screen - use them if you prefer that style. - -### 9. CI Passes Locally But Fails in GitHub Actions -**Problem:** Ruff check/format or tests pass locally but fail in CI - -**Common causes:** -1. **Dependency version mismatch** - `requirements.txt` vs `pyproject.toml` conflicts - ```bash - # Check both files have matching versions for core deps - grep "mcp" requirements.txt pyproject.toml - grep "PyYAML" requirements.txt pyproject.toml - ``` - -2. **Module imported but not declared** - File imports module at top level but it's not in dependencies - ```bash - # Search for imports that might not be in dependencies - grep -r "^import yaml" src/ - grep -r "^from yaml" src/ - # Ensure PyYAML is in pyproject.toml core dependencies - ``` - -3. **Ruff version differences** - Local ruff vs CI ruff may have different rules - ```bash - # Use uvx to match CI's ruff version - uvx ruff check src/ tests/ - uvx ruff format src/ tests/ - ``` - -**Solution:** -```bash -# Run CI validation commands exactly as CI does -pip install -e . # Fresh install -uvx ruff check src/ tests/ # Use uvx, not local ruff -uvx ruff format --check src/ tests/ -pytest tests/ -v -``` - -## πŸ”Œ MCP Integration - -### MCP Server (26 Tools) - -**Transport modes:** -- stdio: Claude Code, VS Code + Cline -- HTTP: Cursor, Windsurf, IntelliJ IDEA - -**Core Tools (9):** -1. `list_configs` - List preset configurations -2. `generate_config` - Generate config from docs URL -3. `validate_config` - Validate config structure -4. `estimate_pages` - Estimate page count -5. `scrape_docs` - Scrape documentation -6. `package_skill` - Package to format (supports `--format` and `--target`) -7. `upload_skill` - Upload to platform (supports `--target`) -8. `enhance_skill` - AI enhancement with platform support -9. `install_skill` - Complete workflow automation - -**Extended Tools (10):** -10. `scrape_github` - GitHub repository analysis -11. `scrape_pdf` - PDF extraction -12. `unified_scrape` - Multi-source scraping -13. `merge_sources` - Merge docs + code -14. `detect_conflicts` - Find discrepancies -15. `add_config_source` - Register git repos -16. `fetch_config` - Fetch configs from git -17. `list_config_sources` - List registered sources -18. `remove_config_source` - Remove config source -19. `split_config` - Split large configs - -**NEW Vector DB Tools (4):** -20. `export_to_chroma` - Export to ChromaDB -21. `export_to_weaviate` - Export to Weaviate -22. `export_to_faiss` - Export to FAISS -23. `export_to_qdrant` - Export to Qdrant - -**NEW Cloud Tools (3):** -24. `cloud_upload` - Upload to S3/GCS/Azure -25. `cloud_download` - Download from cloud storage -26. `cloud_list` - List files in cloud storage - -### Starting MCP Server - -```bash -# stdio mode (Claude Code, VS Code + Cline) -python -m skill_seekers.mcp.server_fastmcp - -# HTTP mode (Cursor, Windsurf, IntelliJ) -python -m skill_seekers.mcp.server_fastmcp --transport http --port 8765 -``` - -## πŸ€– RAG Framework & Vector Database Integrations (**NEW - v3.0.0**) - -Skill Seekers is now the **universal preprocessor for RAG pipelines**. Export documentation to any RAG framework or vector database with a single command. - -### RAG Frameworks - -**LangChain Documents:** -```bash -# Export to LangChain Document format -skill-seekers package output/django --format langchain - -# Output: output/django-langchain.json -# Format: Array of LangChain Document objects -# - page_content: Full text content -# - metadata: {source, category, type, url} - -# Use in LangChain: -from langchain.document_loaders import JSONLoader -loader = JSONLoader("output/django-langchain.json") -documents = loader.load() -``` - -**LlamaIndex TextNodes:** -```bash -# Export to LlamaIndex TextNode format -skill-seekers package output/django --format llama-index - -# Output: output/django-llama-index.json -# Format: Array of LlamaIndex TextNode objects -# - text: Content -# - id_: Unique identifier -# - metadata: {source, category, type} -# - relationships: Document relationships - -# Use in LlamaIndex: -from llama_index import StorageContext, load_index_from_storage -from llama_index.schema import TextNode -nodes = [TextNode.from_dict(n) for n in json.load(open("output/django-llama-index.json"))] -``` - -**Haystack Documents:** -```bash -# Export to Haystack Document format -skill-seekers package output/django --format haystack - -# Output: output/django-haystack.json -# Format: Haystack Document objects for pipelines -# Perfect for: Question answering, search, RAG pipelines -``` - -### Vector Databases - -**ChromaDB (Direct Integration):** -```bash -# Export and optionally upload to ChromaDB -skill-seekers package output/django --format chroma - -# Output: output/django-chroma/ (ChromaDB collection) -# With direct upload (requires chromadb running): -skill-seekers package output/django --format chroma --upload - -# Configuration via environment: -export CHROMA_HOST=localhost -export CHROMA_PORT=8000 -``` - -**FAISS (Facebook AI Similarity Search):** -```bash -# Export to FAISS index format -skill-seekers package output/django --format faiss - -# Output: -# - output/django-faiss.index (FAISS index) -# - output/django-faiss-metadata.json (Document metadata) - -# Use with FAISS: -import faiss -index = faiss.read_index("output/django-faiss.index") -``` - -**Weaviate:** -```bash -# Export and upload to Weaviate -skill-seekers package output/django --format weaviate --upload - -# Requires environment variables: -export WEAVIATE_URL=http://localhost:8080 -export WEAVIATE_API_KEY=your-api-key - -# Creates class "DjangoDoc" with schema -``` - -**Qdrant:** -```bash -# Export and upload to Qdrant -skill-seekers package output/django --format qdrant --upload - -# Requires environment variables: -export QDRANT_URL=http://localhost:6333 -export QDRANT_API_KEY=your-api-key - -# Creates collection "django_docs" -``` - -**Pinecone (via Markdown):** -```bash -# Pinecone uses the markdown format -skill-seekers package output/django --target markdown - -# Then use Pinecone's Python client for upsert -# See: docs/integrations/PINECONE.md -``` - -### Complete RAG Pipeline Example - -```bash -# 1. Scrape documentation -skill-seekers scrape --config configs/django.json - -# 2. Export to your RAG stack -skill-seekers package output/django --format langchain # For LangChain -skill-seekers package output/django --format llama-index # For LlamaIndex -skill-seekers package output/django --format chroma --upload # Direct to ChromaDB - -# 3. Use in your application -# See examples/: -# - examples/langchain-rag-pipeline/ -# - examples/llama-index-query-engine/ -# - examples/pinecone-upsert/ -``` - -**Integration Hub:** [docs/integrations/RAG_PIPELINES.md](docs/integrations/RAG_PIPELINES.md) - -## πŸ› οΈ AI Coding Assistant Integrations (**NEW - v3.0.0**) - -Transform any framework documentation into persistent expert context for 4+ AI coding assistants. Your IDE's AI now "knows" your frameworks without manual prompting. - -### Cursor IDE - -**Setup:** -```bash -# 1. Generate skill -skill-seekers scrape --config configs/react.json -skill-seekers package output/react/ --target claude - -# 2. Install to Cursor -cp output/react-claude/SKILL.md .cursorrules - -# 3. Restart Cursor -# AI now has React expertise! -``` - -**Benefits:** -- βœ… AI suggests React-specific patterns -- βœ… No manual "use React hooks" prompts needed -- βœ… Consistent team patterns -- βœ… Works for ANY framework - -**Guide:** [docs/integrations/CURSOR.md](docs/integrations/CURSOR.md) -**Example:** [examples/cursor-react-skill/](examples/cursor-react-skill/) - -### Windsurf - -**Setup:** -```bash -# 1. Generate skill -skill-seekers scrape --config configs/django.json -skill-seekers package output/django/ --target claude - -# 2. Install to Windsurf -mkdir -p .windsurf/rules -cp output/django-claude/SKILL.md .windsurf/rules/django.md - -# 3. Restart Windsurf -# AI now knows Django patterns! -``` - -**Benefits:** -- βœ… Flow-based coding with framework knowledge -- βœ… IDE-native AI assistance -- βœ… Persistent context across sessions - -**Guide:** [docs/integrations/WINDSURF.md](docs/integrations/WINDSURF.md) -**Example:** [examples/windsurf-fastapi-context/](examples/windsurf-fastapi-context/) - -### Cline (VS Code Extension) - -**Setup:** -```bash -# 1. Generate skill -skill-seekers scrape --config configs/fastapi.json -skill-seekers package output/fastapi/ --target claude - -# 2. Install to Cline -cp output/fastapi-claude/SKILL.md .clinerules - -# 3. Reload VS Code -# Cline now has FastAPI expertise! -``` - -**Benefits:** -- βœ… Agentic code generation in VS Code -- βœ… Cursor Composer equivalent for VS Code -- βœ… System prompts + MCP integration - -**Guide:** [docs/integrations/CLINE.md](docs/integrations/CLINE.md) -**Example:** [examples/cline-django-assistant/](examples/cline-django-assistant/) - -### Continue.dev (Universal IDE) - -**Setup:** -```bash -# 1. Generate skill -skill-seekers scrape --config configs/react.json -skill-seekers package output/react/ --target claude - -# 2. Start context server -cd examples/continue-dev-universal/ -python context_server.py --port 8765 - -# 3. Configure in ~/.continue/config.json -{ - "contextProviders": [ - { - "name": "http", - "params": { - "url": "http://localhost:8765/context", - "title": "React Documentation" - } - } - ] -} - -# 4. Works in ALL IDEs! -# VS Code, JetBrains, Vim, Emacs... -``` - -**Benefits:** -- βœ… IDE-agnostic (works in VS Code, IntelliJ, Vim, Emacs) -- βœ… Custom LLM providers supported -- βœ… HTTP-based context serving -- βœ… Team consistency across mixed IDE environments - -**Guide:** [docs/integrations/CONTINUE_DEV.md](docs/integrations/CONTINUE_DEV.md) -**Example:** [examples/continue-dev-universal/](examples/continue-dev-universal/) - -### Multi-IDE Team Setup - -For teams using different IDEs (VS Code, IntelliJ, Vim): - -```bash -# Use Continue.dev as universal context provider -skill-seekers scrape --config configs/react.json -python context_server.py --host 0.0.0.0 --port 8765 - -# ALL team members configure Continue.dev -# Result: Identical AI suggestions across all IDEs! -``` - -**Integration Hub:** [docs/integrations/INTEGRATIONS.md](docs/integrations/INTEGRATIONS.md) - -## ☁️ Cloud Storage Integration (**NEW - v3.0.0**) - -Upload skills directly to cloud storage for team sharing and CI/CD pipelines. - -### Supported Providers - -**AWS S3:** -```bash -# Upload skill -skill-seekers cloud upload --provider s3 --bucket my-skills output/react.zip - -# Download skill -skill-seekers cloud download --provider s3 --bucket my-skills react.zip - -# List skills -skill-seekers cloud list --provider s3 --bucket my-skills - -# Environment variables: -export AWS_ACCESS_KEY_ID=your-key -export AWS_SECRET_ACCESS_KEY=your-secret -export AWS_REGION=us-east-1 -``` - -**Google Cloud Storage:** -```bash -# Upload skill -skill-seekers cloud upload --provider gcs --bucket my-skills output/react.zip - -# Download skill -skill-seekers cloud download --provider gcs --bucket my-skills react.zip - -# List skills -skill-seekers cloud list --provider gcs --bucket my-skills - -# Environment variables: -export GOOGLE_APPLICATION_CREDENTIALS=/path/to/credentials.json -``` - -**Azure Blob Storage:** -```bash -# Upload skill -skill-seekers cloud upload --provider azure --container my-skills output/react.zip - -# Download skill -skill-seekers cloud download --provider azure --container my-skills react.zip - -# List skills -skill-seekers cloud list --provider azure --container my-skills - -# Environment variables: -export AZURE_STORAGE_CONNECTION_STRING=your-connection-string -``` - -### CI/CD Integration - -```yaml -# GitHub Actions example -- name: Upload skill to S3 - run: | - skill-seekers scrape --config configs/react.json - skill-seekers package output/react/ - skill-seekers cloud upload --provider s3 --bucket ci-skills output/react.zip - env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} -``` - -**Guide:** [docs/integrations/CLOUD_STORAGE.md](docs/integrations/CLOUD_STORAGE.md) - -## πŸ“‹ Common Workflows - -### Adding a New Platform - -1. Create adaptor in `src/skill_seekers/cli/adaptors/{platform}_adaptor.py` -2. Inherit from `BaseAdaptor` -3. Implement `package()`, `upload()`, `enhance()` methods -4. Add to factory in `adaptors/__init__.py` -5. Add optional dependency to `pyproject.toml` -6. Add tests in `tests/test_install_multiplatform.py` - -### Adding a New Feature - -1. Implement in appropriate CLI module -2. Add entry point to `pyproject.toml` if needed -3. Add tests in `tests/test_{feature}.py` -4. Run full test suite: `pytest tests/ -v` -5. Update CHANGELOG.md -6. Commit only when all tests pass - -### Debugging Common Issues - -**Import Errors:** -```bash -# Always ensure package is installed first -pip install -e . - -# Verify installation -python -c "import skill_seekers; print(skill_seekers.__version__)" -``` - -**Rate Limit Issues:** -```bash -# Check current GitHub rate limit status -curl -H "Authorization: token $GITHUB_TOKEN" https://api.github.com/rate_limit - -# Configure multiple GitHub profiles -skill-seekers config --github - -# Test your tokens -skill-seekers config --test -``` - -**Enhancement Not Working:** -```bash -# Check if API key is set -echo $ANTHROPIC_API_KEY - -# Try LOCAL mode instead (uses Claude Code Max) -skill-seekers enhance output/react/ --mode LOCAL - -# Monitor enhancement status -skill-seekers enhance-status output/react/ --watch -``` - -**Test Failures:** -```bash -# Run specific failing test with verbose output -pytest tests/test_file.py::test_name -vv - -# Run with print statements visible -pytest tests/test_file.py -s - -# Run with coverage to see what's not tested -pytest tests/test_file.py --cov=src/skill_seekers --cov-report=term-missing - -# Run only unit tests (skip slow integration tests) -pytest tests/ -v -m "not slow and not integration" -``` - -**Config Issues:** -```bash -# Validate config structure -skill-seekers-validate configs/myconfig.json - -# Show current configuration -skill-seekers config --show - -# Estimate pages before scraping -skill-seekers estimate configs/myconfig.json -``` - -## 🎯 Where to Make Changes - -This section helps you quickly locate the right files when implementing common changes. - -### Adding a New CLI Command - -**Files to modify:** -1. **Create command file:** `src/skill_seekers/cli/my_command.py` - ```python - def main(): - """Entry point for my-command.""" - # Implementation - ``` - -2. **Add entry point:** `pyproject.toml` - ```toml - [project.scripts] - skill-seekers-my-command = "skill_seekers.cli.my_command:main" - ``` - -3. **Update unified CLI:** `src/skill_seekers/cli/main.py` - - Add subcommand handler to dispatcher - -4. **Add tests:** `tests/test_my_command.py` - - Test main functionality - - Test CLI argument parsing - - Test error cases - -5. **Update docs:** `CHANGELOG.md` + `README.md` (if user-facing) - -### Adding a New Platform Adaptor - -**Files to modify:** -1. **Create adaptor:** `src/skill_seekers/cli/adaptors/my_platform_adaptor.py` - ```python - from .base import BaseAdaptor - - class MyPlatformAdaptor(BaseAdaptor): - def package(self, skill_dir, output_path, **kwargs): - # Platform-specific packaging - pass - - def upload(self, package_path, api_key=None, **kwargs): - # Platform-specific upload (optional for some platforms) - pass - - def export(self, skill_dir, format, **kwargs): - # For RAG/vector DB adaptors: export to specific format - pass - ``` - -2. **Register in factory:** `src/skill_seekers/cli/adaptors/__init__.py` - ```python - def get_adaptor(target=None, format=None): - # For LLM platforms (--target flag) - target_adaptors = { - 'claude': ClaudeAdaptor, - 'gemini': GeminiAdaptor, - 'openai': OpenAIAdaptor, - 'markdown': MarkdownAdaptor, - 'myplatform': MyPlatformAdaptor, # ADD THIS - } - - # For RAG/vector DBs (--format flag) - format_adaptors = { - 'langchain': LangChainAdaptor, - 'llama-index': LlamaIndexAdaptor, - 'chroma': ChromaAdaptor, - # ... etc - } - ``` - -3. **Add optional dependency:** `pyproject.toml` - ```toml - [project.optional-dependencies] - myplatform = ["myplatform-sdk>=1.0.0"] - ``` - -4. **Add tests:** `tests/test_adaptors/test_my_platform_adaptor.py` - - Test export format - - Test upload (if applicable) - - Test with real data - -5. **Update documentation:** - - README.md - Platform comparison table - - docs/integrations/MY_PLATFORM.md - Integration guide - - examples/my-platform-example/ - Working example - -### Adding a New Config Preset - -**Files to modify:** -1. **Create config:** `configs/my_framework.json` - ```json - { - "name": "my_framework", - "base_url": "https://docs.myframework.com/", - "selectors": {...}, - "categories": {...} - } - ``` - -2. **Test locally:** - ```bash - # Estimate first - skill-seekers estimate configs/my_framework.json - - # Test scrape (small sample) - skill-seekers scrape --config configs/my_framework.json --max-pages 50 - ``` - -3. **Add to README:** Update presets table in `README.md` - -4. **Submit to website:** (Optional) Submit to SkillSeekersWeb.com - -### Modifying Core Scraping Logic - -**Key files by feature:** - -| Feature | File | Size | Notes | -|---------|------|------|-------| -| Doc scraping | `src/skill_seekers/cli/doc_scraper.py` | ~90KB | Main scraper, BFS traversal | -| GitHub scraping | `src/skill_seekers/cli/github_scraper.py` | ~56KB | Repo analysis + metadata | -| GitHub API | `src/skill_seekers/cli/github_fetcher.py` | ~17KB | Rate limit handling | -| PDF extraction | `src/skill_seekers/cli/pdf_scraper.py` | Medium | PyMuPDF + OCR | -| EPUB extraction | `src/skill_seekers/cli/epub_scraper.py` | Medium | ebooklib + BeautifulSoup | -| Code analysis | `src/skill_seekers/cli/code_analyzer.py` | ~65KB | Multi-language AST parsing | -| Pattern detection | `src/skill_seekers/cli/pattern_recognizer.py` | Medium | C3.1 - 10 GoF patterns | -| Test extraction | `src/skill_seekers/cli/test_example_extractor.py` | Medium | C3.2 - 5 categories | -| Guide generation | `src/skill_seekers/cli/how_to_guide_builder.py` | ~45KB | C3.3 - AI-enhanced guides | -| Config extraction | `src/skill_seekers/cli/config_extractor.py` | ~32KB | C3.4 - 9 formats | -| Router generation | `src/skill_seekers/cli/generate_router.py` | ~43KB | C3.5 - Architecture docs | -| Signal flow | `src/skill_seekers/cli/signal_flow_analyzer.py` | Medium | C3.10 - Godot-specific | - -**Always add tests when modifying core logic!** - -### Modifying the Unified Create Command - -**The create command uses a modular argument system:** - -**Files involved:** -1. **Parser:** `src/skill_seekers/cli/parsers/create_parser.py` - - Defines help text and formatter - - Registers help mode flags (`--help-web`, `--help-github`, etc.) - - Uses custom `NoWrapFormatter` for better help display - -2. **Arguments:** `src/skill_seekers/cli/arguments/create.py` - - Three tiers of arguments: - - `UNIVERSAL_ARGUMENTS` (13 flags) - Work for all sources - - Source-specific dicts (`WEB_ARGUMENTS`, `GITHUB_ARGUMENTS`, `EPUB_ARGUMENTS`, etc.) - - `ADVANCED_ARGUMENTS` - Rare/advanced options - - `add_create_arguments(parser, mode)` - Multi-mode argument addition - -3. **Source Detection:** `src/skill_seekers/cli/source_detector.py` (if implemented) - - Auto-detect source type from input - - Pattern matching (URLs, GitHub repos, file extensions) - -4. **Main Logic:** `src/skill_seekers/cli/create_command.py` (if implemented) - - Route to appropriate scraper based on detected type - - Argument validation and compatibility checking - -**When adding new arguments:** -- Universal args β†’ `UNIVERSAL_ARGUMENTS` in `arguments/create.py` -- Source-specific β†’ Appropriate dict (`WEB_ARGUMENTS`, etc.) -- Always update help text and add tests - -**Example: Adding a new universal flag:** -```python -# In arguments/create.py -UNIVERSAL_ARGUMENTS = { - # ... existing args ... - "my_flag": { - "flags": ("--my-flag", "-m"), - "kwargs": { - "action": "store_true", - "help": "Description of my flag", - }, - }, -} -``` - -### Adding MCP Tools - -**Files to modify:** -1. **Add tool function:** `src/skill_seekers/mcp/tools/{category}_tools.py` - -2. **Register tool:** `src/skill_seekers/mcp/server.py` - ```python - @mcp.tool() - def my_new_tool(param: str) -> str: - """Tool description.""" - # Implementation - ``` - -3. **Add tests:** `tests/test_mcp_fastmcp.py` - -4. **Update count:** README.md (currently 18 tools) - -## πŸ“ Key Files Quick Reference - -| Task | File(s) | What to Modify | -|------|---------|----------------| -| Add new CLI command | `src/skill_seekers/cli/my_cmd.py`
`pyproject.toml` | Create `main()` function
Add entry point | -| Add platform adaptor | `src/skill_seekers/cli/adaptors/my_platform.py`
`adaptors/__init__.py` | Inherit `BaseAdaptor`
Register in factory | -| Fix scraping logic | `src/skill_seekers/cli/doc_scraper.py` | `scrape_all()`, `extract_content()` | -| Add MCP tool | `src/skill_seekers/mcp/server_fastmcp.py` | Add `@mcp.tool()` function | -| Fix tests | `tests/test_{feature}.py` | Add/modify test functions | -| Add config preset | `configs/{framework}.json` | Create JSON config | -| Update CI | `.github/workflows/tests.yml` | Modify workflow steps | - -## πŸ“š Key Code Locations - -**Documentation Scraper** (`src/skill_seekers/cli/doc_scraper.py`): -- `FALLBACK_MAIN_SELECTORS` - Shared fallback CSS selectors for finding main content (no `body`) -- `_find_main_content()` - Centralized selector fallback: config selector β†’ fallback list -- `is_valid_url()` - URL validation -- `extract_content()` - Content extraction (links extracted from full page before early return) -- `detect_language()` - Code language detection -- `extract_patterns()` - Pattern extraction -- `smart_categorize()` - Smart categorization -- `infer_categories()` - Category inference -- `generate_quick_reference()` - Quick reference generation -- `create_enhanced_skill_md()` - SKILL.md generation -- `scrape_all()` - Main scraping loop (dry-run extracts links from full page) -- `main()` - Entry point - -**Codebase Analysis** (`src/skill_seekers/cli/`): -- `codebase_scraper.py` - Main CLI for local codebase analysis -- `code_analyzer.py` - Multi-language AST parsing (9 languages) -- `api_reference_builder.py` - API documentation generation -- `dependency_analyzer.py` - NetworkX-based dependency graphs -- `pattern_recognizer.py` - C3.1 design pattern detection -- `test_example_extractor.py` - C3.2 test example extraction -- `how_to_guide_builder.py` - C3.3 guide generation -- `config_extractor.py` - C3.4 configuration extraction -- `generate_router.py` - C3.5 router skill generation -- `signal_flow_analyzer.py` - C3.10 signal flow analysis (Godot projects) -- `unified_codebase_analyzer.py` - Three-stream GitHub+local analyzer - -**AI Enhancement** (`src/skill_seekers/cli/`): -- `enhance_skill_local.py` - LOCAL mode enhancement (4 execution modes) -- `enhance_skill.py` - API mode enhancement -- `enhance_status.py` - Status monitoring for background processes -- `ai_enhancer.py` - Shared AI enhancement logic -- `guide_enhancer.py` - C3.3 guide AI enhancement -- `config_enhancer.py` - C3.4 config AI enhancement - -**Platform Adaptors** (`src/skill_seekers/cli/adaptors/`): -- `__init__.py` - Factory function -- `base_adaptor.py` - Abstract base class -- `claude_adaptor.py` - Claude AI implementation -- `gemini_adaptor.py` - Google Gemini implementation -- `openai_adaptor.py` - OpenAI ChatGPT implementation -- `markdown_adaptor.py` - Generic Markdown implementation - -**MCP Server** (`src/skill_seekers/mcp/`): -- `server.py` - FastMCP-based server -- `tools/` - 18 MCP tool implementations - -**Configuration & Rate Limit Management** (NEW: v2.7.0 - `src/skill_seekers/cli/`): -- `config_manager.py` - Multi-token configuration system (~490 lines) - - `ConfigManager` class - Singleton pattern for global config access - - `add_github_profile()` - Add GitHub profile with token and strategy - - `get_github_token()` - Smart fallback chain (CLI β†’ Env β†’ Config β†’ Prompt) - - `get_next_profile()` - Profile switching for rate limit handling - - `save_progress()` / `load_progress()` - Job resumption support - - `cleanup_old_progress()` - Auto-cleanup of old jobs (7 days default) -- `config_command.py` - Interactive configuration wizard (~400 lines) - - `main_menu()` - 7-option main menu with navigation - - `github_token_menu()` - GitHub profile management - - `add_github_profile()` - Guided token setup with browser integration - - `api_keys_menu()` - API key configuration for Claude/Gemini/OpenAI - - `test_connections()` - Connection testing for tokens and API keys -- `rate_limit_handler.py` - Smart rate limit detection and handling (~450 lines) - - `RateLimitHandler` class - Strategy pattern for rate limit handling - - `check_upfront()` - Upfront rate limit check before starting - - `check_response()` - Real-time detection from API responses - - `handle_rate_limit()` - Execute strategy (prompt/wait/switch/fail) - - `try_switch_profile()` - Automatic profile switching - - `wait_for_reset()` - Countdown timer with live progress - - `show_countdown_timer()` - Live terminal countdown display -- `resume_command.py` - Resume interrupted scraping jobs (~150 lines) - - `list_resumable_jobs()` - Display all jobs with progress details - - `resume_job()` - Resume from saved checkpoint - - `clean_old_jobs()` - Cleanup old progress files - -**GitHub Integration** (Modified for v2.7.0 - `src/skill_seekers/cli/`): -- `github_fetcher.py` - Integrated rate limit handler - - Constructor now accepts `interactive` and `profile_name` parameters - - `fetch()` - Added upfront rate limit check - - All API calls check responses for rate limits - - Raises `RateLimitError` when rate limit cannot be handled -- `github_scraper.py` - Added CLI flags - - `--non-interactive` flag for CI/CD mode (fail fast) - - `--profile` flag to select GitHub profile from config - - Config supports `interactive` and `github_profile` keys - -**RAG & Vector Database Adaptors** (NEW: v3.0.0 - `src/skill_seekers/cli/adaptors/`): -- `langchain.py` - LangChain Documents export (~250 lines) - - Exports to LangChain Document format - - Preserves metadata (source, category, type, url) - - Smart chunking with overlap -- `llama_index.py` - LlamaIndex TextNodes export (~280 lines) - - Exports to TextNode format with unique IDs - - Relationship mapping between documents - - Metadata preservation -- `haystack.py` - Haystack Documents export (~230 lines) - - Pipeline-ready document format - - Supports embeddings and filters -- `chroma.py` - ChromaDB integration (~350 lines) - - Direct collection creation - - Batch upsert with embeddings - - Query interface -- `weaviate.py` - Weaviate vector search (~320 lines) - - Schema creation with auto-detection - - Batch import with error handling -- `faiss_helpers.py` - FAISS index generation (~280 lines) - - Index building with metadata - - Search utilities -- `qdrant.py` - Qdrant vector database (~300 lines) - - Collection management - - Payload indexing -- `streaming_adaptor.py` - Streaming data ingest (~200 lines) - - Real-time data processing - - Incremental updates - -**Cloud Storage & Infrastructure** (NEW: v3.0.0 - `src/skill_seekers/cli/`): -- `cloud_storage_cli.py` - S3/GCS/Azure upload/download (~450 lines) - - Multi-provider abstraction - - Parallel uploads for large files - - Retry logic with exponential backoff -- `embedding_pipeline.py` - Embedding generation for vectors (~320 lines) - - Sentence-transformers integration - - Batch processing - - Multiple embedding models -- `sync_cli.py` - Continuous sync & monitoring (~380 lines) - - File watching for changes - - Automatic re-scraping - - Smart diff detection -- `incremental_updater.py` - Smart incremental updates (~350 lines) - - Change detection algorithms - - Partial skill updates - - Version tracking -- `streaming_ingest.py` - Real-time data streaming (~290 lines) - - Stream processing pipelines - - WebSocket support -- `benchmark_cli.py` - Performance benchmarking (~280 lines) - - Scraping performance tests - - Comparison reports - - CI/CD integration -- `quality_metrics.py` - Quality analysis & reporting (~340 lines) - - Completeness scoring - - Link checking - - Content quality metrics -- `multilang_support.py` - Internationalization support (~260 lines) - - Language detection - - Translation integration - - Multi-locale skills -- `setup_wizard.py` - Interactive setup wizard (~220 lines) - - Configuration management - - Profile creation - - First-time setup - -**Video Scraper** (`src/skill_seekers/cli/`): -- `video_scraper.py` - Main video scraping pipeline CLI -- `video_setup.py` - GPU auto-detection, PyTorch installation, visual dependency setup (~835 lines) - - Detects CUDA/ROCm/MPS/CPU and installs matching PyTorch build - - Installs `easyocr` and other visual processing deps at runtime via `--setup` - - Run `skill-seekers video --setup` before first use - -## 🎯 Project-Specific Best Practices - -1. **Prefer the unified `create` command** - Use `skill-seekers create ` over legacy commands for consistency -2. **Always use platform adaptors** - Never hardcode platform-specific logic -3. **Test all platforms** - Changes must work for all 16 platforms (was 4 in v2.x) -4. **Maintain backward compatibility** - Legacy commands (scrape, github, analyze) must still work -5. **Document API changes** - Update CHANGELOG.md for every release -6. **Keep dependencies optional** - Platform-specific deps are optional (RAG, cloud, etc.) -7. **Use src/ layout** - Proper package structure with `pip install -e .` -8. **Run tests before commits** - Per user instructions, never skip tests (1,765+ tests must pass) -9. **RAG-first mindset** - v3.0.0 is the universal preprocessor for AI systems -10. **Export format clarity** - Use `--format` for RAG/vector DBs, `--target` for LLM platforms -11. **Test with real integrations** - Verify exports work with actual LangChain, ChromaDB, etc. -12. **Progressive disclosure** - When adding flags, categorize as universal/source-specific/advanced - -## πŸ› Debugging Tips - -### Enable Verbose Logging - -```bash -# Set environment variable for debug output -export SKILL_SEEKERS_DEBUG=1 -skill-seekers scrape --config configs/react.json -``` - -### Test Single Function/Module - -Run Python modules directly for debugging: -```bash -# Run modules with --help to see options -python -m skill_seekers.cli.doc_scraper --help -python -m skill_seekers.cli.github_scraper --repo facebook/react --dry-run -python -m skill_seekers.cli.package_skill --help - -# Test MCP server directly -python -m skill_seekers.mcp.server_fastmcp -``` - -### Use pytest with Debugging - -```bash -# Drop into debugger on failure -pytest tests/test_scraper_features.py --pdb - -# Show print statements (normally suppressed) -pytest tests/test_scraper_features.py -s - -# Verbose test output (shows full diff, more details) -pytest tests/test_scraper_features.py -vv - -# Run only failed tests from last run -pytest tests/ --lf - -# Run until first failure (stop immediately) -pytest tests/ -x - -# Show local variables on failure -pytest tests/ -l -``` - -### Debug Specific Test - -```bash -# Run single test with full output -pytest tests/test_scraper_features.py::test_detect_language -vv -s - -# With debugger -pytest tests/test_scraper_features.py::test_detect_language --pdb -``` - -### Check Package Installation - -```bash -# Verify package is installed -pip list | grep skill-seekers - -# Check installation mode (should show editable location) -pip show skill-seekers - -# Verify imports work -python -c "import skill_seekers; print(skill_seekers.__version__)" - -# Check CLI entry points -which skill-seekers -skill-seekers --version -``` - -### Common Error Messages & Solutions - -**"ModuleNotFoundError: No module named 'skill_seekers'"** -β†’ **Solution:** `pip install -e .` -β†’ **Why:** src/ layout requires package installation - -**"403 Forbidden" from GitHub API** -β†’ **Solution:** Rate limit hit, set `GITHUB_TOKEN` or use `skill-seekers config --github` -β†’ **Check limit:** `curl -H "Authorization: token $GITHUB_TOKEN" https://api.github.com/rate_limit` - -**"SKILL.md enhancement failed"** -β†’ **Solution:** Check if `ANTHROPIC_API_KEY` is set, or use `--mode LOCAL` -β†’ **Monitor:** `skill-seekers enhance-status output/react/ --watch` - -**"No such file or directory: 'configs/myconfig.json'"** -β†’ **Solution:** Config path resolution order: - 1. Exact path as provided - 2. `./configs/` (current directory) - 3. `~/.config/skill-seekers/configs/` (user config) - 4. SkillSeekersWeb.com API (presets) - -**"pytest: command not found"** -β†’ **Solution:** Install dev dependencies -```bash -pip install pytest pytest-asyncio pytest-cov coverage -# Or: pip install -e ".[dev]" (if available) -``` - -**"ruff: command not found"** -β†’ **Solution:** Install ruff -```bash -pip install ruff -# Or use uvx: uvx ruff check src/ -``` - -### Debugging Scraping Issues - -**No content extracted?** -```python -# Test selectors in Python -from bs4 import BeautifulSoup -import requests - -url = "https://docs.example.com/page" -soup = BeautifulSoup(requests.get(url).content, 'html.parser') - -# Try different selectors -print(soup.select_one('article')) -print(soup.select_one('main')) -print(soup.select_one('div[role="main"]')) -print(soup.select_one('.documentation-content')) -``` - -**Categories not working?** -- Check `categories` in config has correct keywords -- Run with `--dry-run` to see categorization without scraping -- Enable debug mode: `export SKILL_SEEKERS_DEBUG=1` - -### Profiling Performance - -```bash -# Profile scraping performance -python -m cProfile -o profile.stats -m skill_seekers.cli.doc_scraper --config configs/react.json --max-pages 10 - -# Analyze profile -python -m pstats profile.stats -# In pstats shell: -# > sort cumtime -# > stats 20 -``` - -## πŸ“– Additional Documentation - -**Official Website:** -- [SkillSeekersWeb.com](https://skillseekersweb.com/) - Browse 24+ preset configs, share configs, complete documentation - -**For Users:** -- [README.md](README.md) - Complete user documentation -- [BULLETPROOF_QUICKSTART.md](BULLETPROOF_QUICKSTART.md) - Beginner guide -- [TROUBLESHOOTING.md](TROUBLESHOOTING.md) - Common issues - -**For Developers:** -- [CHANGELOG.md](CHANGELOG.md) - Release history -- [ROADMAP.md](ROADMAP.md) - 136 tasks across 10 categories -- [docs/UNIFIED_SCRAPING.md](docs/UNIFIED_SCRAPING.md) - Multi-source scraping -- [docs/MCP_SETUP.md](docs/MCP_SETUP.md) - MCP server setup -- [docs/ENHANCEMENT_MODES.md](docs/ENHANCEMENT_MODES.md) - AI enhancement modes -- [docs/PATTERN_DETECTION.md](docs/PATTERN_DETECTION.md) - C3.1 pattern detection -- [docs/THREE_STREAM_STATUS_REPORT.md](docs/THREE_STREAM_STATUS_REPORT.md) - Three-stream architecture -- [docs/MULTI_LLM_SUPPORT.md](docs/MULTI_LLM_SUPPORT.md) - Multi-platform support - -## πŸŽ“ Understanding the Codebase - -### Why src/ Layout? - -Modern Python best practice (PEP 517/518): -- Prevents accidental imports from repo root -- Forces proper package installation -- Better isolation between package and tests -- Required: `pip install -e .` before running tests - -### Why Platform Adaptors? - -Strategy pattern benefits: -- Single codebase supports 4 platforms -- Platform-specific optimizations (format, APIs, models) -- Easy to add new platforms (implement BaseAdaptor) -- Clean separation of concerns -- Testable in isolation - -### Why Git-style CLI? - -User experience benefits: -- Familiar to developers (like `git`) -- Single entry point: `skill-seekers` -- Backward compatible: individual tools still work -- Cleaner than multiple separate commands -- Easier to document and teach - -### Three-Stream GitHub Architecture - -The `unified_codebase_analyzer.py` splits GitHub repositories into three independent streams: - -**Stream 1: Code Analysis** (C3.x features) -- Deep AST parsing (9 languages) -- Design pattern detection (C3.1) -- Test example extraction (C3.2) -- How-to guide generation (C3.3) -- Configuration extraction (C3.4) -- Architectural overview (C3.5) -- API reference + dependency graphs - -**Stream 2: Documentation** -- README, CONTRIBUTING, LICENSE -- docs/ directory markdown files -- Wiki pages (if available) -- CHANGELOG and version history - -**Stream 3: Community Insights** -- GitHub metadata (stars, forks, watchers) -- Issue analysis (top problems and solutions) -- PR trends and contributor stats -- Release history -- Label-based topic detection - -**Key Benefits:** -- Unified interface for GitHub URLs and local paths -- Analysis depth control: 'basic' (1-2 min) or 'c3x' (20-60 min) -- Enhanced router generation with GitHub context -- Smart keyword extraction weighted by GitHub labels (2x weight) -- 81 E2E tests passing (0.44 seconds) - -## πŸ”§ Helper Scripts - -The `scripts/` directory contains utility scripts: - -```bash -# Bootstrap skill generation - self-hosting skill-seekers as a Claude skill -./scripts/bootstrap_skill.sh - -# Start MCP server for HTTP transport -./scripts/start_mcp_server.sh - -# Script templates are in scripts/skill_header.md -``` - -**Bootstrap Skill Workflow:** -1. Analyzes skill-seekers codebase itself (dogfooding) -2. Combines handcrafted header with auto-generated analysis -3. Validates SKILL.md structure -4. Outputs ready-to-use skill for Claude Code - -## πŸ” Performance Characteristics - -| Operation | Time | Notes | -|-----------|------|-------| -| Scraping (sync) | 15-45 min | First time, thread-based | -| Scraping (async) | 5-15 min | 2-3x faster with `--async` | -| Building | 1-3 min | Fast rebuild from cache | -| Re-building | <1 min | With `--skip-scrape` | -| Enhancement (LOCAL) | 30-60 sec | Uses Claude Code Max | -| Enhancement (API) | 20-40 sec | Requires API key | -| Packaging | 5-10 sec | Final .zip creation | - -## πŸŽ‰ Recent Achievements - -**v3.1.4 (Unreleased) - "Selector Fallback & Dry-Run Fix":** -- πŸ› **Issue #300: `create https://reactflow.dev/` only found 1 page** β€” Now finds 20+ pages -- πŸ”§ **Centralized selector fallback** β€” `FALLBACK_MAIN_SELECTORS` constant + `_find_main_content()` helper replace 3 duplicated fallback loops -- πŸ”— **Link extraction before early return** β€” `extract_content()` now discovers links even when no content selector matches -- πŸ” **Dry-run full-page link discovery** β€” Both sync and async dry-run paths extract links from the full page (was main-content-only or missing entirely) -- πŸ›£οΈ **Smart `create --config` routing** β€” Peeks at JSON to route `base_url` configs to doc_scraper and `sources` configs to unified_scraper -- 🧹 **Removed `body` fallback** β€” `body` matched everything, hiding real selector failures -- βœ… **Pre-existing test fixes** β€” `test_auto_fetch_enabled` (react.json exists locally) and `test_mcp_validate_legacy_config` (react.json is now unified format) - -**v3.1.3 (Released) - "Unified Argument Interface":** -- πŸ”§ **Unified Scraper Arguments** - All scrapers (scrape, github, analyze, pdf) now share a common argument contract via `add_all_standard_arguments(parser)` in `arguments/common.py` -- πŸ› **Fix `create` Argument Forwarding** - `create --dry-run`, `create owner/repo --dry-run`, `create ./path --dry-run` all work now (previously crashed) -- πŸ—οΈ **Argument Deduplication** - Removed duplicated arg definitions from github.py, scrape.py, analyze.py, pdf.py; all import shared args -- βž• **New Flags** - GitHub and PDF scrapers gain `--dry-run`, `--verbose`, `--quiet`; analyze gains `--name`, `--description`, `--quiet` -- πŸ”€ **Route-Specific Forwarding** - `create` command's `_add_common_args()` now only forwards universal flags; route-specific flags moved to their respective methods - -**v3.1.0 - "Unified CLI & Developer Experience":** -- 🎯 **Unified `create` Command** - Auto-detects source type (web/GitHub/local/PDF/config) -- πŸ“‹ **Progressive Disclosure Help** - Default shows 13 universal flags, detailed help available per source -- ⚑ **-p Shortcut** - Quick preset selection (`-p quick|standard|comprehensive`) -- πŸ”§ **Enhancement Flag Consolidation** - `--enhance-level` (0-3) replaces 3 separate flags -- 🎨 **Smart Source Detection** - No need to specify whether input is URL, repo, or directory -- πŸ”„ **Enhancement Workflow Presets** - YAML-based presets; `skill-seekers workflows list/show/copy/add/remove/validate`; bundled presets: `default`, `minimal`, `security-focus`, `architecture-comprehensive`, `api-documentation` -- πŸ”€ **Multiple Workflows from CLI** - `--enhance-workflow wf-a --enhance-workflow wf-b` chains presets in a single command; `workflows copy/add/remove` all accept multiple names/files at once -- πŸ› **Bug Fix** - `create` command now correctly forwards multiple `--enhance-workflow` flags to sub-scrapers -- βœ… **2,121 Tests Passing** - All CLI refactor + workflow preset work verified -- πŸ“š **Improved Documentation** - CLAUDE.md, README, QUICK_REFERENCE updated with workflow preset details - -**v3.1.0 CI Stability (February 20, 2026):** -- πŸ”§ **Dependency Alignment** - Fixed MCP version mismatch between requirements.txt (was 1.18.0) and pyproject.toml (>=1.25) -- πŸ“¦ **PyYAML Core Dependency** - Added PyYAML>=6.0 to core dependencies (required by workflow_tools.py module-level import) -- ⚑ **Benchmark Stability** - Relaxed timing-sensitive test thresholds for CI environment variability -- βœ… **2,121 Tests Passing** - All CI matrix jobs passing (ubuntu 3.10/3.11/3.12, macos 3.11/3.12) - -**v3.0.0 (February 10, 2026) - "Universal Intelligence Platform":** -- πŸš€ **16 Platform Adaptors** - RAG frameworks (LangChain, LlamaIndex, Haystack), vector DBs (Chroma, FAISS, Weaviate, Qdrant), AI coding assistants (Cursor, Windsurf, Cline, Continue.dev), LLM platforms (Claude, Gemini, OpenAI) -- πŸ› οΈ **26 MCP Tools** (up from 18) - Complete automation for any AI system -- βœ… **1,852 Tests Passing** (up from 700+) - Production-grade reliability -- ☁️ **Cloud Storage** - S3, GCS, Azure Blob Storage integration -- 🎯 **AI Coding Assistants** - Persistent context for Cursor, Windsurf, Cline, Continue.dev -- πŸ“Š **Quality Metrics** - Automated completeness scoring and content analysis -- 🌐 **Multilingual Support** - Language detection and translation -- πŸ”„ **Streaming Ingest** - Real-time data processing pipelines -- πŸ“ˆ **Benchmarking Tools** - Performance comparison and CI/CD integration -- πŸ”§ **Setup Wizard** - Interactive first-time configuration -- πŸ“¦ **12 Example Projects** - Complete working examples for every integration -- πŸ“š **18 Integration Guides** - Comprehensive documentation for all platforms - -**v2.9.0 (February 3, 2026):** -- **C3.10: Signal Flow Analysis** - Complete signal flow analysis for Godot projects -- Comprehensive Godot 4.x support (GDScript, .tscn, .tres, .gdshader files) -- GDScript test extraction (GUT, gdUnit4, WAT frameworks) -- Signal pattern detection (EventBus, Observer, Event Chains) -- Signal-based how-to guides generation - -**v2.8.0 (February 1, 2026):** -- C3.9: Project Documentation Extraction -- Granular AI enhancement control with `--enhance-level` (0-3) - -**v2.7.1 (January 18, 2026 - Hotfix):** -- 🚨 **Critical Bug Fix:** Config download 404 errors resolved -- Fixed manual URL construction bug - now uses `download_url` from API response -- All 15 source tools tests + 8 fetch_config tests passing - -**v2.7.0 (January 18, 2026):** -- πŸ” **Smart Rate Limit Management** - Multi-token GitHub configuration system -- πŸ§™ **Interactive Configuration Wizard** - Beautiful terminal UI (`skill-seekers config`) -- 🚦 **Intelligent Rate Limit Handler** - Four strategies (prompt/wait/switch/fail) -- πŸ“₯ **Resume Capability** - Continue interrupted jobs with progress tracking -- πŸ”§ **CI/CD Support** - Non-interactive mode for automation -- 🎯 **Bootstrap Skill** - Self-hosting skill-seekers as Claude Code skill - -**v2.6.0 (January 14, 2026):** -- **C3.x Codebase Analysis Suite Complete** (C3.1-C3.8) -- Multi-platform support with platform adaptor architecture (4 platforms) -- 18 MCP tools fully functional -- 700+ tests passing -- Unified multi-source scraping maturity - -**C3.x Series (Complete - Code Analysis Features):** -- **C3.1:** Design pattern detection (10 GoF patterns, 9 languages, 87% precision) -- **C3.2:** Test example extraction (5 categories, AST-based for Python) -- **C3.3:** How-to guide generation with AI enhancement (5 improvements) -- **C3.4:** Configuration pattern extraction (env vars, config files, CLI args) -- **C3.5:** Architectural overview & router skill generation -- **C3.6:** AI enhancement for patterns and test examples (Claude API integration) -- **C3.7:** Architectural pattern detection (8 patterns, framework-aware) -- **C3.8:** Standalone codebase scraper (300+ line SKILL.md from code alone) -- **C3.9:** Project documentation extraction (markdown categorization, AI enhancement) -- **C3.10:** Signal flow analysis (Godot event-driven architecture, pattern detection) - -**v2.5.2:** -- UX Improvement: Analysis features now default ON with --skip-* flags (BREAKING) -- Router quality improvements: 6.5/10 β†’ 8.5/10 (+31%) -- All 107 codebase analysis tests passing - -**v2.5.0:** -- Multi-platform support (Claude, Gemini, OpenAI, Markdown) -- Platform adaptor architecture -- 18 MCP tools (up from 9) -- Complete feature parity across platforms - -**v2.1.0:** -- Unified multi-source scraping (docs + GitHub + PDF) -- Conflict detection between sources -- 427 tests passing - -**v1.0.0:** -- Production release with MCP integration -- Documentation scraping with smart categorization -- 12 preset configurations +## Adding New Features + +### New platform adaptor +1. Create `src/skill_seekers/cli/adaptors/{platform}_adaptor.py` inheriting `BaseAdaptor` +2. Register in `adaptors/__init__.py` factory +3. Add optional dep to `pyproject.toml` +4. Add tests in `tests/` + +### New source type scraper +1. Create `src/skill_seekers/cli/{type}_scraper.py` with `main()` +2. Add to `COMMAND_MODULES` in `cli/main.py` +3. Add entry point in `pyproject.toml` `[project.scripts]` +4. Add auto-detection in `source_detector.py` +5. Add optional dep if needed +6. Add tests + +### New CLI argument +- Universal: `UNIVERSAL_ARGUMENTS` in `arguments/create.py` +- Source-specific: appropriate dict (`WEB_ARGUMENTS`, `GITHUB_ARGUMENTS`, etc.) +- Shared across scrapers: `add_all_standard_arguments()` in `arguments/common.py` diff --git a/README.md b/README.md index 14299c5..d2640e1 100644 --- a/README.md +++ b/README.md @@ -248,7 +248,7 @@ Instead of spending days on manual preprocessing, Skill Seekers: - βœ… **Backward Compatible** - Legacy single-source configs still work ### πŸ€– Multi-LLM Platform Support -- βœ… **4 LLM Platforms** - Claude AI, Google Gemini, OpenAI ChatGPT, Generic Markdown +- βœ… **5 LLM Platforms** - Claude AI, Google Gemini, OpenAI ChatGPT, MiniMax AI, Generic Markdown - βœ… **Universal Scraping** - Same documentation works for all platforms - βœ… **Platform-Specific Packaging** - Optimized formats for each LLM - βœ… **One-Command Export** - `--target` flag selects platform @@ -260,6 +260,7 @@ Instead of spending days on manual preprocessing, Skill Seekers: | **Claude AI** | ZIP + YAML | βœ… Auto | βœ… Yes | ANTHROPIC_API_KEY | ANTHROPIC_BASE_URL | | **Google Gemini** | tar.gz | βœ… Auto | βœ… Yes | GOOGLE_API_KEY | - | | **OpenAI ChatGPT** | ZIP + Vector Store | βœ… Auto | βœ… Yes | OPENAI_API_KEY | - | +| **MiniMax AI** | ZIP + Knowledge Files | βœ… Auto | βœ… Yes | MINIMAX_API_KEY | - | | **Generic Markdown** | ZIP | ❌ Manual | ❌ No | - | - | ```bash @@ -277,6 +278,11 @@ pip install skill-seekers[openai] skill-seekers package output/react/ --target openai skill-seekers upload react-openai.zip --target openai +# MiniMax AI +pip install skill-seekers[minimax] +skill-seekers package output/react/ --target minimax +skill-seekers upload react-minimax.zip --target minimax + # Generic Markdown (universal export) skill-seekers package output/react/ --target markdown # Use the markdown files directly in any LLM @@ -312,6 +318,9 @@ pip install skill-seekers[gemini] # Install with OpenAI support pip install skill-seekers[openai] +# Install with MiniMax support +pip install skill-seekers[minimax] + # Install with all LLM platforms pip install skill-seekers[all-llms] ``` @@ -698,21 +707,21 @@ skill-seekers install --config react --dry-run ## πŸ“Š Feature Matrix -Skill Seekers supports **4 LLM platforms**, **17 source types**, and full feature parity across all targets. +Skill Seekers supports **5 LLM platforms**, **17 source types**, and full feature parity across all targets. -**Platforms:** Claude AI, Google Gemini, OpenAI ChatGPT, Generic Markdown +**Platforms:** Claude AI, Google Gemini, OpenAI ChatGPT, MiniMax AI, Generic Markdown **Source Types:** Documentation websites, GitHub repos, PDFs, Word (.docx), EPUB, Video, Local codebases, Jupyter Notebooks, Local HTML, OpenAPI/Swagger, AsciiDoc, PowerPoint (.pptx), RSS/Atom feeds, Man pages, Confluence wikis, Notion pages, Slack/Discord chat exports See [Complete Feature Matrix](docs/FEATURE_MATRIX.md) for detailed platform and feature support. ### Quick Platform Comparison -| Feature | Claude | Gemini | OpenAI | Markdown | -|---------|--------|--------|--------|----------| -| Format | ZIP + YAML | tar.gz | ZIP + Vector | ZIP | -| Upload | βœ… API | βœ… API | βœ… API | ❌ Manual | -| Enhancement | βœ… Sonnet 4 | βœ… 2.0 Flash | βœ… GPT-4o | ❌ None | -| All Skill Modes | βœ… | βœ… | βœ… | βœ… | +| Feature | Claude | Gemini | OpenAI | MiniMax | Markdown | +|---------|--------|--------|--------|--------|----------| +| Format | ZIP + YAML | tar.gz | ZIP + Vector | ZIP + Knowledge | ZIP | +| Upload | βœ… API | βœ… API | βœ… API | βœ… API | ❌ Manual | +| Enhancement | βœ… Sonnet 4 | βœ… 2.0 Flash | βœ… GPT-4o | βœ… M2.7 | ❌ None | +| All Skill Modes | βœ… | βœ… | βœ… | βœ… | βœ… | --- diff --git a/docs/getting-started/01-installation.md b/docs/getting-started/01-installation.md index 84ff3c6..6df3543 100644 --- a/docs/getting-started/01-installation.md +++ b/docs/getting-started/01-installation.md @@ -86,6 +86,7 @@ pip install skill-seekers[all-llms] - Claude AI support - Google Gemini support - OpenAI ChatGPT support +- MiniMax AI support - All vector databases - MCP server - Cloud storage (S3, GCS, Azure) @@ -98,6 +99,7 @@ Install only what you need: # Specific platform only pip install skill-seekers[gemini] # Google Gemini pip install skill-seekers[openai] # OpenAI +pip install skill-seekers[minimax] # MiniMax AI pip install skill-seekers[chroma] # ChromaDB # Multiple extras @@ -115,6 +117,7 @@ pip install skill-seekers[dev] |-------|-------------|-----------------| | `gemini` | Google Gemini support | `pip install skill-seekers[gemini]` | | `openai` | OpenAI ChatGPT support | `pip install skill-seekers[openai]` | +| `minimax` | MiniMax AI support | `pip install skill-seekers[minimax]` | | `mcp` | MCP server | `pip install skill-seekers[mcp]` | | `chroma` | ChromaDB export | `pip install skill-seekers[chroma]` | | `weaviate` | Weaviate export | `pip install skill-seekers[weaviate]` | diff --git a/docs/integrations/INTEGRATIONS.md b/docs/integrations/INTEGRATIONS.md index 19d2fa1..3adb0f3 100644 --- a/docs/integrations/INTEGRATIONS.md +++ b/docs/integrations/INTEGRATIONS.md @@ -112,6 +112,7 @@ Upload documentation as custom skills to AI chat platforms: | **[Claude](CLAUDE.md)** | Anthropic | ZIP + YAML | Claude.ai Projects | [Setup β†’](CLAUDE.md) | | **[Gemini](GEMINI_INTEGRATION.md)** | Google | tar.gz | Gemini AI | [Setup β†’](GEMINI_INTEGRATION.md) | | **[ChatGPT](OPENAI_INTEGRATION.md)** | OpenAI | ZIP + Vector Store | GPT Actions | [Setup β†’](OPENAI_INTEGRATION.md) | +| **[MiniMax](MINIMAX_INTEGRATION.md)** | MiniMax | ZIP | MiniMax AI Platform | [Setup β†’](MINIMAX_INTEGRATION.md) | **Quick Example:** ```bash @@ -139,7 +140,7 @@ skill-seekers upload output/vue-claude.zip --target claude | **AI coding (flow-based)** | Windsurf | Unique flow paradigm, Codeium AI | 5 min | | **AI coding (VS Code ext)** | Cline | Claude in VS Code, MCP integration | 10 min | | **AI coding (any IDE)** | Continue.dev | Works everywhere, open-source | 5 min | -| **Chat with documentation** | Claude/Gemini/ChatGPT | Direct upload as custom skill | 3 min | +| **Chat with documentation** | Claude/Gemini/ChatGPT/MiniMax | Direct upload as custom skill | 3 min | ### By Technical Requirements diff --git a/docs/integrations/MINIMAX_INTEGRATION.md b/docs/integrations/MINIMAX_INTEGRATION.md new file mode 100644 index 0000000..f162ae2 --- /dev/null +++ b/docs/integrations/MINIMAX_INTEGRATION.md @@ -0,0 +1,391 @@ +# MiniMax AI Integration Guide + +Complete guide for using Skill Seekers with MiniMax AI platform. + +--- + +## Overview + +**MiniMax AI** is a Chinese AI company offering OpenAI-compatible APIs with their M2.7 model. Skill Seekers packages documentation for use with MiniMax's platform. + +### Key Features + +- **OpenAI-Compatible API**: Uses standard OpenAI client library +- **MiniMax-M2.7 Model**: Powerful LLM for enhancement and chat +- **Simple ZIP Format**: Easy packaging with system instructions +- **Knowledge Files**: Reference documentation included in package + +--- + +## Prerequisites + +### 1. Get MiniMax API Key + +1. Visit [MiniMax Platform](https://platform.minimaxi.com/) +2. Create an account and verify +3. Navigate to API Keys section +4. Generate a new API key +5. Copy the key (starts with `eyJ` - JWT format) + +### 2. Install Dependencies + +```bash +# Install MiniMax support (includes openai library) +pip install skill-seekers[minimax] + +# Or install all LLM platforms +pip install skill-seekers[all-llms] +``` + +### 3. Configure Environment + +```bash +export MINIMAX_API_KEY=eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9... +``` + +Add to your `~/.bashrc`, `~/.zshrc`, or `.env` file for persistence. + +--- + +## Complete Workflow + +### Step 1: Scrape Documentation + +```bash +# Scrape documentation website +skill-seekers scrape --config configs/react.json + +# Or use quick preset +skill-seekers create https://docs.python.org/3/ --preset quick +``` + +### Step 2: Enhance with MiniMax-M2.7 + +```bash +# Enhance SKILL.md using MiniMax AI +skill-seekers enhance output/react/ --target minimax + +# With custom model (if available) +skill-seekers enhance output/react/ --target minimax --model MiniMax-M2.7 +``` + +This step: +- Reads reference documentation +- Generates enhanced system instructions +- Creates backup of original SKILL.md +- Uses MiniMax-M2.7 for AI enhancement + +### Step 3: Package for MiniMax + +```bash +# Package as MiniMax-compatible ZIP +skill-seekers package output/react/ --target minimax + +# Custom output path +skill-seekers package output/react/ --target minimax --output my-skill.zip +``` + +**Output structure:** +``` +react-minimax.zip +β”œβ”€β”€ system_instructions.txt # Main instructions (from SKILL.md) +β”œβ”€β”€ knowledge_files/ # Reference documentation +β”‚ β”œβ”€β”€ guide.md +β”‚ β”œβ”€β”€ api-reference.md +β”‚ └── examples.md +└── minimax_metadata.json # Skill metadata +``` + +### Step 4: Validate Package + +```bash +# Validate package with MiniMax API +skill-seekers upload react-minimax.zip --target minimax +``` + +This validates: +- Package structure +- API connectivity +- System instructions format + +**Note:** MiniMax doesn't have persistent skill storage like Claude. The upload validates your package but you'll use the ZIP file directly with MiniMax's API. + +--- + +## Using Your Skill + +### Direct API Usage + +```python +from openai import OpenAI +import zipfile +import json + +# Extract package +with zipfile.ZipFile('react-minimax.zip', 'r') as zf: + with zf.open('system_instructions.txt') as f: + system_instructions = f.read().decode('utf-8') + + # Load metadata + with zf.open('minimax_metadata.json') as f: + metadata = json.load(f) + +# Initialize MiniMax client (OpenAI-compatible) +client = OpenAI( + api_key="eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9...", + base_url="https://api.minimax.io/v1" +) + +# Use with chat completions +response = client.chat.completions.create( + model="MiniMax-M2.7", + messages=[ + {"role": "system", "content": system_instructions}, + {"role": "user", "content": "How do I create a React component?"} + ], + temperature=0.3, + max_tokens=2000 +) + +print(response.choices[0].message.content) +``` + +### With Knowledge Files + +```python +import zipfile +from pathlib import Path + +# Extract knowledge files +with zipfile.ZipFile('react-minimax.zip', 'r') as zf: + zf.extractall('extracted_skill') + +# Read all knowledge files +knowledge_dir = Path('extracted_skill/knowledge_files') +knowledge_files = [] +for md_file in knowledge_dir.glob('*.md'): + knowledge_files.append({ + 'name': md_file.name, + 'content': md_file.read_text() + }) + +# Include in context (truncate if too long) +context = "\n\n".join([f"## {kf['name']}\n{kf['content'][:5000]}" + for kf in knowledge_files[:5]]) + +response = client.chat.completions.create( + model="MiniMax-M2.7", + messages=[ + {"role": "system", "content": system_instructions}, + {"role": "user", "content": f"Context: {context}\n\nQuestion: What are React hooks?"} + ] +) +``` + +--- + +## API Reference + +### SkillAdaptor Methods + +```python +from skill_seekers.cli.adaptors import get_adaptor + +# Get MiniMax adaptor +adaptor = get_adaptor('minimax') + +# Format SKILL.md as system instructions +instructions = adaptor.format_skill_md(skill_dir, metadata) + +# Package skill +package_path = adaptor.package(skill_dir, output_path) + +# Validate package with MiniMax API +result = adaptor.upload(package_path, api_key) +print(result['message']) # Validation result + +# Enhance SKILL.md +success = adaptor.enhance(skill_dir, api_key) +``` + +### Environment Variables + +| Variable | Description | Required | +|----------|-------------|----------| +| `MINIMAX_API_KEY` | Your MiniMax API key (JWT format) | Yes | + +--- + +## Troubleshooting + +### Invalid API Key Format + +**Error:** `Invalid API key format` + +**Solution:** MiniMax API keys use JWT format starting with `eyJ`. Check: +```bash +# Should start with 'eyJ' +echo $MINIMAX_API_KEY | head -c 10 +# Output: eyJhbGciOi +``` + +### OpenAI Library Not Installed + +**Error:** `ModuleNotFoundError: No module named 'openai'` + +**Solution:** +```bash +pip install skill-seekers[minimax] +# or +pip install openai>=1.0.0 +``` + +### Upload Timeout + +**Error:** `Upload timed out` + +**Solution:** +- Check internet connection +- Try again (temporary network issue) +- Verify API key is correct +- Check MiniMax platform status + +### Connection Error + +**Error:** `Connection error` + +**Solution:** +- Verify internet connectivity +- Check if MiniMax API endpoint is accessible: +```bash +curl https://api.minimax.io/v1/models +``` +- Try with VPN if in restricted region + +### Package Validation Failed + +**Error:** `Invalid package: system_instructions.txt not found` + +**Solution:** +- Ensure SKILL.md exists before packaging +- Check package contents: +```bash +unzip -l react-minimax.zip +``` +- Re-package the skill + +--- + +## Best Practices + +### 1. Keep References Organized + +Structure your documentation: +``` +output/react/ +β”œβ”€β”€ SKILL.md # Main instructions +β”œβ”€β”€ references/ +β”‚ β”œβ”€β”€ 01-getting-started.md +β”‚ β”œβ”€β”€ 02-components.md +β”‚ β”œβ”€β”€ 03-hooks.md +β”‚ └── 04-api-reference.md +└── assets/ + └── diagrams/ +``` + +### 2. Use Enhancement + +Always enhance before packaging: +```bash +# Enhancement improves system instructions quality +skill-seekers enhance output/react/ --target minimax +``` + +### 3. Test Before Deployment + +```bash +# Validate package +skill-seekers upload react-minimax.zip --target minimax + +# If successful, package is ready to use +``` + +### 4. Version Your Skills + +```bash +# Include version in output name +skill-seekers package output/react/ --target minimax --output react-v2.0-minimax.zip +``` + +--- + +## Comparison with Other Platforms + +| Feature | MiniMax | Claude | Gemini | OpenAI | +|---------|---------|--------|--------|--------| +| **Format** | ZIP | ZIP | tar.gz | ZIP | +| **Upload** | Validation | Full API | Full API | Full API | +| **Enhancement** | MiniMax-M2.7 | Claude Sonnet | Gemini 2.0 | GPT-4o | +| **API Type** | OpenAI-compatible | Anthropic | Google | OpenAI | +| **Key Format** | JWT (eyJ...) | sk-ant... | AIza... | sk-... | +| **Knowledge Files** | Included in ZIP | Included | Included | Vector Store | + +--- + +## Advanced Usage + +### Custom Enhancement Prompt + +Programmatically customize enhancement: + +```python +from skill_seekers.cli.adaptors import get_adaptor +from pathlib import Path + +adaptor = get_adaptor('minimax') +skill_dir = Path('output/react') + +# Build custom prompt +references = adaptor._read_reference_files(skill_dir / 'references') +prompt = adaptor._build_enhancement_prompt( + skill_name='React', + references=references, + current_skill_md=(skill_dir / 'SKILL.md').read_text() +) + +# Customize prompt +prompt += "\n\nADDITIONAL FOCUS: Emphasize React 18 concurrent features." + +# Use with your own API call +``` + +### Batch Processing + +```bash +# Process multiple frameworks +for framework in react vue angular; do + skill-seekers scrape --config configs/${framework}.json + skill-seekers enhance output/${framework}/ --target minimax + skill-seekers package output/${framework}/ --target minimax --output ${framework}-minimax.zip +done +``` + +--- + +## Resources + +- [MiniMax Platform](https://platform.minimaxi.com/) +- [MiniMax API Documentation](https://platform.minimaxi.com/document) +- [OpenAI Python Client](https://github.com/openai/openai-python) +- [Multi-LLM Support Guide](MULTI_LLM_SUPPORT.md) + +--- + +## Next Steps + +1. Get your [MiniMax API key](https://platform.minimaxi.com/) +2. Install dependencies: `pip install skill-seekers[minimax]` +3. Try the [Quick Start example](#complete-workflow) +4. Explore [advanced usage](#advanced-usage) patterns + +For help, see [Troubleshooting](#troubleshooting) or open an issue on GitHub. diff --git a/docs/integrations/MULTI_LLM_SUPPORT.md b/docs/integrations/MULTI_LLM_SUPPORT.md index 0b96bd4..4313d46 100644 --- a/docs/integrations/MULTI_LLM_SUPPORT.md +++ b/docs/integrations/MULTI_LLM_SUPPORT.md @@ -9,6 +9,7 @@ Skill Seekers supports multiple LLM platforms through a clean adaptor system. Th | **Claude AI** | βœ… Full Support | ZIP + YAML | βœ… Automatic | βœ… Yes | ANTHROPIC_API_KEY | | **Google Gemini** | βœ… Full Support | tar.gz | βœ… Automatic | βœ… Yes | GOOGLE_API_KEY | | **OpenAI ChatGPT** | βœ… Full Support | ZIP + Vector Store | βœ… Automatic | βœ… Yes | OPENAI_API_KEY | +| **MiniMax AI** | βœ… Full Support | ZIP | βœ… Validation | βœ… Yes | MINIMAX_API_KEY | | **Generic Markdown** | βœ… Export Only | ZIP | ❌ Manual | ❌ No | None | ## Quick Start @@ -108,6 +109,9 @@ pip install skill-seekers[gemini] # OpenAI ChatGPT support pip install skill-seekers[openai] +# MiniMax AI support +pip install skill-seekers[minimax] + # All LLM platforms pip install skill-seekers[all-llms] @@ -150,6 +154,13 @@ pip install -e .[all-llms] - API: Assistants API + Vector Store - Enhancement: GPT-4o +**MiniMax AI:** +- Format: ZIP archive +- SKILL.md -> `system_instructions.txt` (plain text, no frontmatter) +- Structure: `system_instructions.txt`, `knowledge_files/`, `minimax_metadata.json` +- API: OpenAI-compatible chat completions +- Enhancement: MiniMax-M2.7 + **Generic Markdown:** - Format: ZIP archive - Structure: `README.md`, `references/`, `DOCUMENTATION.md` (combined) @@ -174,6 +185,11 @@ export GOOGLE_API_KEY=AIzaSy... export OPENAI_API_KEY=sk-proj-... ``` +**MiniMax AI:** +```bash +export MINIMAX_API_KEY=your-key +``` + ## Complete Workflow Examples ### Workflow 1: Claude AI (Default) @@ -238,7 +254,29 @@ skill-seekers upload react-openai.zip --target openai # Access at: https://platform.openai.com/assistants/ ``` -### Workflow 4: Export to All Platforms +### Workflow 4: MiniMax AI + +```bash +# Setup (one-time) +pip install skill-seekers[minimax] +export MINIMAX_API_KEY=your-key + +# 1. Scrape (universal) +skill-seekers scrape --config configs/react.json + +# 2. Enhance with MiniMax-M2.7 +skill-seekers enhance output/react/ --target minimax + +# 3. Package for MiniMax +skill-seekers package output/react/ --target minimax + +# 4. Upload to MiniMax (validates with API) +skill-seekers upload react-minimax.zip --target minimax + +# Access at: https://platform.minimaxi.com/ +``` + +### Workflow 5: Export to All Platforms ```bash # Install all platforms @@ -251,12 +289,14 @@ skill-seekers scrape --config configs/react.json skill-seekers package output/react/ --target claude skill-seekers package output/react/ --target gemini skill-seekers package output/react/ --target openai +skill-seekers package output/react/ --target minimax skill-seekers package output/react/ --target markdown # Result: # - react.zip (Claude) # - react-gemini.tar.gz (Gemini) # - react-openai.zip (OpenAI) +# - react-minimax.zip (MiniMax) # - react-markdown.zip (Universal) ``` @@ -300,7 +340,7 @@ from skill_seekers.cli.adaptors import list_platforms, is_platform_available # List all registered platforms platforms = list_platforms() -print(platforms) # ['claude', 'gemini', 'openai', 'markdown'] +print(platforms) # ['claude', 'gemini', 'minimax', 'openai', 'markdown'] # Check if platform is available if is_platform_available('gemini'): @@ -323,6 +363,7 @@ For detailed platform-specific instructions, see: - [Claude AI Integration](CLAUDE_INTEGRATION.md) (default) - [Google Gemini Integration](GEMINI_INTEGRATION.md) - [OpenAI ChatGPT Integration](OPENAI_INTEGRATION.md) +- [MiniMax AI Integration](MINIMAX_INTEGRATION.md) ## Troubleshooting @@ -340,6 +381,8 @@ pip install skill-seekers[gemini] **Solution:** ```bash pip install skill-seekers[openai] +# or for MiniMax (also uses openai library) +pip install skill-seekers[minimax] ``` ### API Key Issues @@ -350,6 +393,7 @@ pip install skill-seekers[openai] - Claude: `sk-ant-...` - Gemini: `AIza...` - OpenAI: `sk-proj-...` or `sk-...` +- MiniMax: Any valid API key string ### Package Format Errors @@ -380,6 +424,7 @@ A: Yes, each platform uses its own enhancement model: - Claude: Claude Sonnet 4 - Gemini: Gemini 2.0 Flash - OpenAI: GPT-4o +- MiniMax: MiniMax-M2.7 **Q: What if I don't want to upload automatically?** diff --git a/pyproject.toml b/pyproject.toml index 515e219..2bb1b8b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,6 +89,11 @@ openai = [ "openai>=1.0.0", ] +# MiniMax AI support (uses OpenAI-compatible API) +minimax = [ + "openai>=1.0.0", +] + # All LLM platforms combined all-llms = [ "google-generativeai>=0.8.0", diff --git a/src/skill_seekers/cli/adaptors/__init__.py b/src/skill_seekers/cli/adaptors/__init__.py index 6240082..2350858 100644 --- a/src/skill_seekers/cli/adaptors/__init__.py +++ b/src/skill_seekers/cli/adaptors/__init__.py @@ -3,7 +3,7 @@ Multi-LLM Adaptor Registry Provides factory function to get platform-specific adaptors for skill generation. -Supports Claude AI, Google Gemini, OpenAI ChatGPT, and generic Markdown export. +Supports Claude AI, Google Gemini, OpenAI ChatGPT, MiniMax AI, and generic Markdown export. """ from .base import SkillAdaptor, SkillMetadata @@ -69,6 +69,11 @@ try: except ImportError: PineconeAdaptor = None +try: + from .minimax import MiniMaxAdaptor +except ImportError: + MiniMaxAdaptor = None + # Registry of available adaptors ADAPTORS: dict[str, type[SkillAdaptor]] = {} @@ -98,6 +103,8 @@ if HaystackAdaptor: ADAPTORS["haystack"] = HaystackAdaptor if PineconeAdaptor: ADAPTORS["pinecone"] = PineconeAdaptor +if MiniMaxAdaptor: + ADAPTORS["minimax"] = MiniMaxAdaptor def get_adaptor(platform: str, config: dict = None) -> SkillAdaptor: @@ -105,7 +112,7 @@ def get_adaptor(platform: str, config: dict = None) -> SkillAdaptor: Factory function to get platform-specific adaptor instance. Args: - platform: Platform identifier ('claude', 'gemini', 'openai', 'markdown') + platform: Platform identifier ('claude', 'gemini', 'openai', 'minimax', 'markdown') config: Optional platform-specific configuration Returns: @@ -116,6 +123,7 @@ def get_adaptor(platform: str, config: dict = None) -> SkillAdaptor: Examples: >>> adaptor = get_adaptor('claude') + >>> adaptor = get_adaptor('minimax') >>> adaptor = get_adaptor('gemini', {'api_version': 'v1beta'}) """ if platform not in ADAPTORS: @@ -141,7 +149,7 @@ def list_platforms() -> list[str]: Examples: >>> list_platforms() - ['claude', 'gemini', 'openai', 'markdown'] + ['claude', 'gemini', 'openai', 'minimax', 'markdown'] """ return list(ADAPTORS.keys()) diff --git a/src/skill_seekers/cli/adaptors/minimax.py b/src/skill_seekers/cli/adaptors/minimax.py new file mode 100644 index 0000000..ca9a272 --- /dev/null +++ b/src/skill_seekers/cli/adaptors/minimax.py @@ -0,0 +1,503 @@ +#!/usr/bin/env python3 +""" +MiniMax AI Adaptor + +Implements platform-specific handling for MiniMax AI skills. +Uses MiniMax's OpenAI-compatible API for AI enhancement with M2.7 model. +""" + +import json +import zipfile +from pathlib import Path +from typing import Any + +from .base import SkillAdaptor, SkillMetadata +from skill_seekers.cli.arguments.common import DEFAULT_CHUNK_TOKENS, DEFAULT_CHUNK_OVERLAP_TOKENS + + +class MiniMaxAdaptor(SkillAdaptor): + """ + MiniMax AI platform adaptor. + + Handles: + - System instructions format (plain text, no YAML frontmatter) + - ZIP packaging with knowledge files + - AI enhancement using MiniMax-M2.7 + """ + + PLATFORM = "minimax" + PLATFORM_NAME = "MiniMax AI" + DEFAULT_API_ENDPOINT = "https://api.minimax.io/v1" + + def format_skill_md(self, skill_dir: Path, metadata: SkillMetadata) -> str: + """ + Format SKILL.md as system instructions for MiniMax AI. + + MiniMax uses OpenAI-compatible chat completions, so instructions + are formatted as clear system prompts without YAML frontmatter. + + Args: + skill_dir: Path to skill directory + metadata: Skill metadata + + Returns: + Formatted instructions for MiniMax AI + """ + existing_content = self._read_existing_content(skill_dir) + + if existing_content and len(existing_content) > 100: + content_body = f"""You are an expert assistant for {metadata.name}. + +{metadata.description} + +Use the attached knowledge files to provide accurate, detailed answers about {metadata.name}. + +{existing_content} + +## How to Assist Users + +When users ask questions: +1. Search the knowledge files for relevant information +2. Provide clear, practical answers with code examples +3. Reference specific documentation sections when helpful +4. Be concise but thorough + +Always prioritize accuracy by consulting the knowledge base before responding.""" + else: + content_body = f"""You are an expert assistant for {metadata.name}. + +{metadata.description} + +## Your Knowledge Base + +You have access to comprehensive documentation files about {metadata.name}. Use these files to provide accurate answers to user questions. + +{self._generate_toc(skill_dir)} + +## Quick Reference + +{self._extract_quick_reference(skill_dir)} + +## How to Assist Users + +When users ask questions about {metadata.name}: + +1. **Search the knowledge files** - Find relevant information in the documentation +2. **Provide code examples** - Include practical, working code snippets +3. **Reference documentation** - Cite specific sections when helpful +4. **Be practical** - Focus on real-world usage and best practices +5. **Stay accurate** - Always verify information against the knowledge base + +## Response Guidelines + +- Keep answers clear and concise +- Use proper code formatting with language tags +- Provide both simple and detailed explanations as needed +- Suggest related topics when relevant +- Admit when information isn't in the knowledge base + +Always prioritize accuracy by consulting the attached documentation files before responding.""" + + return content_body + + def package( + self, + skill_dir: Path, + output_path: Path, + enable_chunking: bool = False, + chunk_max_tokens: int = DEFAULT_CHUNK_TOKENS, + preserve_code_blocks: bool = True, + chunk_overlap_tokens: int = DEFAULT_CHUNK_OVERLAP_TOKENS, + ) -> Path: + """ + Package skill into ZIP file for MiniMax AI. + + Creates MiniMax-compatible structure: + - system_instructions.txt (main instructions) + - knowledge_files/*.md (reference files) + - minimax_metadata.json (skill metadata) + + Args: + skill_dir: Path to skill directory + output_path: Output path/filename for ZIP + + Returns: + Path to created ZIP file + """ + skill_dir = Path(skill_dir) + output_path = Path(output_path) + + if output_path.is_dir() or str(output_path).endswith("/"): + output_path = Path(output_path) / f"{skill_dir.name}-minimax.zip" + elif not str(output_path).endswith(".zip") and not str(output_path).endswith( + "-minimax.zip" + ): + output_str = str(output_path).replace(".zip", "-minimax.zip") + if not output_str.endswith(".zip"): + output_str += ".zip" + output_path = Path(output_str) + + output_path.parent.mkdir(parents=True, exist_ok=True) + + with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zf: + skill_md = skill_dir / "SKILL.md" + if skill_md.exists(): + instructions = skill_md.read_text(encoding="utf-8") + zf.writestr("system_instructions.txt", instructions) + + refs_dir = skill_dir / "references" + if refs_dir.exists(): + for ref_file in refs_dir.rglob("*.md"): + if ref_file.is_file() and not ref_file.name.startswith("."): + arcname = f"knowledge_files/{ref_file.name}" + zf.write(ref_file, arcname) + + metadata = { + "platform": "minimax", + "name": skill_dir.name, + "version": "1.0.0", + "created_with": "skill-seekers", + "model": "MiniMax-M2.7", + "api_base": self.DEFAULT_API_ENDPOINT, + } + + zf.writestr("minimax_metadata.json", json.dumps(metadata, indent=2)) + + return output_path + + def upload(self, package_path: Path, api_key: str, **kwargs) -> dict[str, Any]: + """ + Upload packaged skill to MiniMax AI. + + MiniMax uses an OpenAI-compatible chat completion API. + This method validates the package and prepares it for use + with the MiniMax API. + + Args: + package_path: Path to skill ZIP file + api_key: MiniMax API key + **kwargs: Additional arguments (model, etc.) + + Returns: + Dictionary with upload result + """ + package_path = Path(package_path) + if not package_path.exists(): + return { + "success": False, + "skill_id": None, + "url": None, + "message": f"File not found: {package_path}", + } + + if package_path.suffix != ".zip": + return { + "success": False, + "skill_id": None, + "url": None, + "message": f"Not a ZIP file: {package_path}", + } + + try: + from openai import OpenAI, APITimeoutError, APIConnectionError + except ImportError: + return { + "success": False, + "skill_id": None, + "url": None, + "message": "openai library not installed. Run: pip install openai", + } + + try: + import tempfile + + with tempfile.TemporaryDirectory() as temp_dir: + with zipfile.ZipFile(package_path, "r") as zf: + zf.extractall(temp_dir) + + temp_path = Path(temp_dir) + + instructions_file = temp_path / "system_instructions.txt" + if not instructions_file.exists(): + return { + "success": False, + "skill_id": None, + "url": None, + "message": "Invalid package: system_instructions.txt not found", + } + + instructions = instructions_file.read_text(encoding="utf-8") + + metadata_file = temp_path / "minimax_metadata.json" + skill_name = package_path.stem + model = kwargs.get("model", "MiniMax-M2.7") + + if metadata_file.exists(): + with open(metadata_file) as f: + metadata = json.load(f) + skill_name = metadata.get("name", skill_name) + model = metadata.get("model", model) + + knowledge_dir = temp_path / "knowledge_files" + knowledge_count = 0 + if knowledge_dir.exists(): + knowledge_count = len(list(knowledge_dir.glob("*.md"))) + + client = OpenAI( + api_key=api_key, + base_url=self.DEFAULT_API_ENDPOINT, + ) + + client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": instructions}, + { + "role": "user", + "content": f"Confirm you are ready to assist with {skill_name}. Reply briefly.", + }, + ], + temperature=0.3, + max_tokens=100, + ) + + return { + "success": True, + "skill_id": None, + "url": "https://platform.minimaxi.com/", + "message": f"Skill '{skill_name}' validated with MiniMax {model} ({knowledge_count} knowledge files)", + } + + except APITimeoutError: + return { + "success": False, + "skill_id": None, + "url": None, + "message": "Upload timed out. Try again.", + } + except APIConnectionError: + return { + "success": False, + "skill_id": None, + "url": None, + "message": "Connection error. Check your internet connection.", + } + except Exception as e: + return { + "success": False, + "skill_id": None, + "url": None, + "message": f"Upload failed: {str(e)}", + } + + def validate_api_key(self, api_key: str) -> bool: + """ + Validate MiniMax API key format. + + MiniMax API keys are opaque strings. We only check for + a non-empty key with a reasonable minimum length. + + Args: + api_key: API key to validate + + Returns: + True if key format appears valid + """ + key = api_key.strip() + return len(key) > 10 + + def get_env_var_name(self) -> str: + """ + Get environment variable name for MiniMax API key. + + Returns: + 'MINIMAX_API_KEY' + """ + return "MINIMAX_API_KEY" + + def supports_enhancement(self) -> bool: + """ + MiniMax supports AI enhancement via MiniMax-M2.7. + + Returns: + True + """ + return True + + def enhance(self, skill_dir: Path, api_key: str) -> bool: + """ + Enhance SKILL.md using MiniMax-M2.7 API. + + Uses MiniMax's OpenAI-compatible API endpoint for enhancement. + + Args: + skill_dir: Path to skill directory + api_key: MiniMax API key + + Returns: + True if enhancement succeeded + """ + try: + from openai import OpenAI + except ImportError: + print("❌ Error: openai package not installed") + print("Install with: pip install openai") + return False + + skill_dir = Path(skill_dir) + references_dir = skill_dir / "references" + skill_md_path = skill_dir / "SKILL.md" + + print("πŸ“– Reading reference documentation...") + references = self._read_reference_files(references_dir) + + if not references: + print("❌ No reference files found to analyze") + return False + + print(f" βœ“ Read {len(references)} reference files") + total_size = sum(len(c) for c in references.values()) + print(f" βœ“ Total size: {total_size:,} characters\n") + + current_skill_md = None + if skill_md_path.exists(): + current_skill_md = skill_md_path.read_text(encoding="utf-8") + print(f" β„Ή Found existing SKILL.md ({len(current_skill_md)} chars)") + else: + print(" β„Ή No existing SKILL.md, will create new one") + + prompt = self._build_enhancement_prompt(skill_dir.name, references, current_skill_md) + + print("\nπŸ€– Asking MiniMax-M2.7 to enhance SKILL.md...") + print(f" Input: {len(prompt):,} characters") + + try: + client = OpenAI( + api_key=api_key, + base_url="https://api.minimax.io/v1", + ) + + response = client.chat.completions.create( + model="MiniMax-M2.7", + messages=[ + { + "role": "system", + "content": "You are an expert technical writer creating system instructions for MiniMax AI.", + }, + {"role": "user", "content": prompt}, + ], + temperature=0.3, + max_tokens=4096, + ) + + enhanced_content = response.choices[0].message.content + print(f" βœ“ Generated enhanced SKILL.md ({len(enhanced_content)} chars)\n") + + if skill_md_path.exists(): + backup_path = skill_md_path.with_suffix(".md.backup") + skill_md_path.rename(backup_path) + print(f" πŸ’Ύ Backed up original to: {backup_path.name}") + + skill_md_path.write_text(enhanced_content, encoding="utf-8") + print(" βœ… Saved enhanced SKILL.md") + + return True + + except Exception as e: + print(f"❌ Error calling MiniMax API: {e}") + return False + + def _read_reference_files( + self, references_dir: Path, max_chars: int = 200000 + ) -> dict[str, str]: + """ + Read reference markdown files from skill directory. + + Args: + references_dir: Path to references directory + max_chars: Maximum total characters to read + + Returns: + Dictionary mapping filename to content + """ + if not references_dir.exists(): + return {} + + references = {} + total_chars = 0 + + for ref_file in sorted(references_dir.glob("*.md")): + if total_chars >= max_chars: + break + + try: + content = ref_file.read_text(encoding="utf-8") + if len(content) > 30000: + content = content[:30000] + "\n\n...(truncated)" + + references[ref_file.name] = content + total_chars += len(content) + + except Exception as e: + print(f" ⚠️ Could not read {ref_file.name}: {e}") + + return references + + def _build_enhancement_prompt( + self, skill_name: str, references: dict[str, str], current_skill_md: str = None + ) -> str: + """ + Build MiniMax API prompt for enhancement. + + Args: + skill_name: Name of the skill + references: Dictionary of reference content + current_skill_md: Existing SKILL.md content (optional) + + Returns: + Enhancement prompt for MiniMax-M2.7 + """ + prompt = f"""You are creating system instructions for a MiniMax AI assistant about: {skill_name} + +I've scraped documentation and organized it into reference files. Your job is to create EXCELLENT system instructions that will help the assistant use this documentation effectively. + +CURRENT INSTRUCTIONS: +{"```" if current_skill_md else "(none - create from scratch)"} +{current_skill_md or "No existing instructions"} +{"```" if current_skill_md else ""} + +REFERENCE DOCUMENTATION: +""" + + for filename, content in references.items(): + prompt += f"\n\n## {filename}\n```markdown\n{content[:30000]}\n```\n" + + prompt += """ + +YOUR TASK: +Create enhanced system instructions that include: + +1. **Clear role definition** - "You are an expert assistant for [topic]" +2. **Knowledge base description** - What documentation is attached +3. **Excellent Quick Reference** - Extract 5-10 of the BEST, most practical code examples from the reference docs + - Choose SHORT, clear examples that demonstrate common tasks + - Include both simple and intermediate examples + - Annotate examples with clear descriptions + - Use proper language tags (cpp, python, javascript, json, etc.) +4. **Response guidelines** - How the assistant should help users +5. **Search strategy** - How to find information in the knowledge base +6. **DO NOT use YAML frontmatter** - This is plain text instructions + +IMPORTANT: +- Extract REAL examples from the reference docs, don't make them up +- Prioritize SHORT, clear examples (5-20 lines max) +- Make it actionable and practical +- Write clear, direct instructions +- Focus on how the assistant should behave and respond +- NO YAML frontmatter (no --- blocks) + +OUTPUT: +Return ONLY the complete system instructions as plain text. +""" + + return prompt diff --git a/tests/test_adaptors/test_minimax_adaptor.py b/tests/test_adaptors/test_minimax_adaptor.py new file mode 100644 index 0000000..94098c6 --- /dev/null +++ b/tests/test_adaptors/test_minimax_adaptor.py @@ -0,0 +1,517 @@ +#!/usr/bin/env python3 +""" +Tests for MiniMax AI adaptor +""" + +import json +import os +import sys +import tempfile +import unittest +import zipfile +from pathlib import Path +from unittest.mock import patch, MagicMock + +try: + from openai import APITimeoutError, APIConnectionError +except ImportError: + APITimeoutError = None + APIConnectionError = None + +from skill_seekers.cli.adaptors import get_adaptor, is_platform_available +from skill_seekers.cli.adaptors.base import SkillMetadata + + +class TestMiniMaxAdaptor(unittest.TestCase): + """Test MiniMax AI adaptor functionality""" + + def setUp(self): + """Set up test adaptor""" + self.adaptor = get_adaptor("minimax") + + def test_platform_info(self): + """Test platform identifiers""" + self.assertEqual(self.adaptor.PLATFORM, "minimax") + self.assertEqual(self.adaptor.PLATFORM_NAME, "MiniMax AI") + self.assertIsNotNone(self.adaptor.DEFAULT_API_ENDPOINT) + self.assertIn("minimax", self.adaptor.DEFAULT_API_ENDPOINT) + + def test_platform_available(self): + """Test that minimax platform is registered""" + self.assertTrue(is_platform_available("minimax")) + + def test_validate_api_key_valid(self): + """Test valid MiniMax API keys (any string >10 chars)""" + self.assertTrue( + self.adaptor.validate_api_key("eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.test.key") + ) + self.assertTrue(self.adaptor.validate_api_key("sk-some-long-api-key-string-here")) + self.assertTrue(self.adaptor.validate_api_key(" a-valid-key-with-spaces ")) + + def test_validate_api_key_invalid(self): + """Test invalid API keys""" + self.assertFalse(self.adaptor.validate_api_key("")) + self.assertFalse(self.adaptor.validate_api_key(" ")) + self.assertFalse(self.adaptor.validate_api_key("short")) + + def test_get_env_var_name(self): + """Test environment variable name""" + self.assertEqual(self.adaptor.get_env_var_name(), "MINIMAX_API_KEY") + + def test_supports_enhancement(self): + """Test enhancement support""" + self.assertTrue(self.adaptor.supports_enhancement()) + + def test_format_skill_md_no_frontmatter(self): + """Test that MiniMax format has no YAML frontmatter""" + with tempfile.TemporaryDirectory() as temp_dir: + skill_dir = Path(temp_dir) + + (skill_dir / "references").mkdir() + (skill_dir / "references" / "test.md").write_text("# Test content") + + metadata = SkillMetadata(name="test-skill", description="Test skill description") + + formatted = self.adaptor.format_skill_md(skill_dir, metadata) + + self.assertFalse(formatted.startswith("---")) + self.assertIn("You are an expert assistant", formatted) + self.assertIn("test-skill", formatted) + self.assertIn("Test skill description", formatted) + + def test_format_skill_md_with_existing_content(self): + """Test formatting when SKILL.md already has substantial content""" + with tempfile.TemporaryDirectory() as temp_dir: + skill_dir = Path(temp_dir) + + (skill_dir / "references").mkdir() + existing_content = "# Existing Content\n\n" + "x" * 200 + (skill_dir / "SKILL.md").write_text(existing_content) + + metadata = SkillMetadata(name="test-skill", description="Test description") + + formatted = self.adaptor.format_skill_md(skill_dir, metadata) + + self.assertIn("You are an expert assistant", formatted) + self.assertIn("test-skill", formatted) + + def test_format_skill_md_without_references(self): + """Test formatting without references directory""" + with tempfile.TemporaryDirectory() as temp_dir: + skill_dir = Path(temp_dir) + + metadata = SkillMetadata(name="test-skill", description="Test description") + + formatted = self.adaptor.format_skill_md(skill_dir, metadata) + + self.assertIn("You are an expert assistant", formatted) + self.assertIn("test-skill", formatted) + + def test_package_creates_zip(self): + """Test that package creates ZIP file with correct structure""" + with tempfile.TemporaryDirectory() as temp_dir: + skill_dir = Path(temp_dir) / "test-skill" + skill_dir.mkdir() + + (skill_dir / "SKILL.md").write_text("You are an expert assistant") + (skill_dir / "references").mkdir() + (skill_dir / "references" / "test.md").write_text("# Reference") + + output_dir = Path(temp_dir) / "output" + output_dir.mkdir() + + package_path = self.adaptor.package(skill_dir, output_dir) + + self.assertTrue(package_path.exists()) + self.assertTrue(str(package_path).endswith(".zip")) + self.assertIn("minimax", package_path.name) + + with zipfile.ZipFile(package_path, "r") as zf: + names = zf.namelist() + self.assertIn("system_instructions.txt", names) + self.assertIn("minimax_metadata.json", names) + self.assertTrue(any("knowledge_files" in name for name in names)) + + def test_package_metadata_content(self): + """Test that packaged ZIP contains correct metadata""" + with tempfile.TemporaryDirectory() as temp_dir: + skill_dir = Path(temp_dir) / "test-skill" + skill_dir.mkdir() + + (skill_dir / "SKILL.md").write_text("Test instructions") + (skill_dir / "references").mkdir() + (skill_dir / "references" / "guide.md").write_text("# User Guide") + + output_dir = Path(temp_dir) / "output" + output_dir.mkdir() + + package_path = self.adaptor.package(skill_dir, output_dir) + + with zipfile.ZipFile(package_path, "r") as zf: + instructions = zf.read("system_instructions.txt").decode("utf-8") + self.assertEqual(instructions, "Test instructions") + + self.assertIn("knowledge_files/guide.md", zf.namelist()) + + metadata_content = zf.read("minimax_metadata.json").decode("utf-8") + metadata = json.loads(metadata_content) + self.assertEqual(metadata["platform"], "minimax") + self.assertEqual(metadata["name"], "test-skill") + self.assertEqual(metadata["model"], "MiniMax-M2.7") + self.assertIn("minimax", metadata["api_base"]) + + def test_package_output_path_as_file(self): + """Test packaging when output_path is a file path""" + with tempfile.TemporaryDirectory() as temp_dir: + skill_dir = Path(temp_dir) / "test-skill" + skill_dir.mkdir() + (skill_dir / "SKILL.md").write_text("Test") + + output_file = Path(temp_dir) / "output" / "custom-name-minimax.zip" + output_file.parent.mkdir(parents=True, exist_ok=True) + + package_path = self.adaptor.package(skill_dir, output_file) + + self.assertTrue(package_path.exists()) + self.assertTrue(str(package_path).endswith(".zip")) + + def test_package_without_references(self): + """Test packaging without reference files""" + with tempfile.TemporaryDirectory() as temp_dir: + skill_dir = Path(temp_dir) / "test-skill" + skill_dir.mkdir() + (skill_dir / "SKILL.md").write_text("Test instructions") + + output_dir = Path(temp_dir) / "output" + output_dir.mkdir() + + package_path = self.adaptor.package(skill_dir, output_dir) + + self.assertTrue(package_path.exists()) + with zipfile.ZipFile(package_path, "r") as zf: + names = zf.namelist() + self.assertIn("system_instructions.txt", names) + self.assertIn("minimax_metadata.json", names) + self.assertFalse(any("knowledge_files" in name for name in names)) + + def test_upload_missing_library(self): + """Test upload when openai library is not installed""" + with tempfile.NamedTemporaryFile(suffix=".zip") as tmp: + with patch.dict(sys.modules, {"openai": None}): + result = self.adaptor.upload(Path(tmp.name), "test-api-key") + + self.assertFalse(result["success"]) + self.assertIn("openai", result["message"]) + self.assertIn("not installed", result["message"]) + + def test_upload_invalid_file(self): + """Test upload with invalid file""" + result = self.adaptor.upload(Path("/nonexistent/file.zip"), "test-api-key") + + self.assertFalse(result["success"]) + self.assertIn("not found", result["message"].lower()) + + def test_upload_wrong_format(self): + """Test upload with wrong file format""" + with tempfile.NamedTemporaryFile(suffix=".tar.gz") as tmp: + result = self.adaptor.upload(Path(tmp.name), "test-api-key") + + self.assertFalse(result["success"]) + self.assertIn("not a zip", result["message"].lower()) + + @unittest.skip("covered by test_upload_success_mocked") + def test_upload_success(self): + """Test successful upload - skipped (needs real API for integration test)""" + pass + + def test_enhance_missing_references(self): + """Test enhance when no reference files exist""" + with tempfile.TemporaryDirectory() as temp_dir: + skill_dir = Path(temp_dir) + + success = self.adaptor.enhance(skill_dir, "test-api-key") + self.assertFalse(success) + + @patch("openai.OpenAI") + def test_enhance_success_mocked(self, mock_openai_class): + """Test successful enhancement with mocked OpenAI client""" + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = "Enhanced SKILL.md content" + mock_client.chat.completions.create.return_value = mock_response + mock_openai_class.return_value = mock_client + + with tempfile.TemporaryDirectory() as temp_dir: + skill_dir = Path(temp_dir) + refs_dir = skill_dir / "references" + refs_dir.mkdir() + (refs_dir / "test.md").write_text("# Test\nContent") + (skill_dir / "SKILL.md").write_text("Original content") + + success = self.adaptor.enhance(skill_dir, "test-api-key") + + self.assertTrue(success) + new_content = (skill_dir / "SKILL.md").read_text() + self.assertEqual(new_content, "Enhanced SKILL.md content") + backup = skill_dir / "SKILL.md.backup" + self.assertTrue(backup.exists()) + self.assertEqual(backup.read_text(), "Original content") + mock_client.chat.completions.create.assert_called_once() + + def test_enhance_missing_library(self): + """Test enhance when openai library is not installed""" + with tempfile.TemporaryDirectory() as temp_dir: + skill_dir = Path(temp_dir) + refs_dir = skill_dir / "references" + refs_dir.mkdir() + (refs_dir / "test.md").write_text("Test content") + + with patch.dict(sys.modules, {"openai": None}): + success = self.adaptor.enhance(skill_dir, "test-api-key") + + self.assertFalse(success) + + def test_read_reference_files(self): + """Test reading reference files""" + with tempfile.TemporaryDirectory() as temp_dir: + refs_dir = Path(temp_dir) + (refs_dir / "guide.md").write_text("# Guide\nContent here") + (refs_dir / "api.md").write_text("# API\nAPI docs") + + references = self.adaptor._read_reference_files(refs_dir) + + self.assertEqual(len(references), 2) + self.assertIn("guide.md", references) + self.assertIn("api.md", references) + + def test_read_reference_files_empty_dir(self): + """Test reading from empty references directory""" + with tempfile.TemporaryDirectory() as temp_dir: + references = self.adaptor._read_reference_files(Path(temp_dir)) + self.assertEqual(len(references), 0) + + def test_read_reference_files_nonexistent(self): + """Test reading from nonexistent directory""" + references = self.adaptor._read_reference_files(Path("/nonexistent/path")) + self.assertEqual(len(references), 0) + + def test_read_reference_files_truncation(self): + """Test that large reference files are truncated""" + with tempfile.TemporaryDirectory() as temp_dir: + (Path(temp_dir) / "large.md").write_text("x" * 50000) + + references = self.adaptor._read_reference_files(Path(temp_dir)) + + self.assertIn("large.md", references) + self.assertIn("truncated", references["large.md"]) + self.assertLessEqual(len(references["large.md"]), 31000) + + def test_build_enhancement_prompt(self): + """Test enhancement prompt generation""" + references = { + "guide.md": "# User Guide\nContent here", + "api.md": "# API Reference\nAPI docs", + } + + prompt = self.adaptor._build_enhancement_prompt( + "test-skill", references, "Existing SKILL.md content" + ) + + self.assertIn("test-skill", prompt) + self.assertIn("guide.md", prompt) + self.assertIn("api.md", prompt) + self.assertIn("Existing SKILL.md content", prompt) + self.assertIn("MiniMax", prompt) + + def test_build_enhancement_prompt_no_existing(self): + """Test enhancement prompt when no existing SKILL.md""" + references = {"test.md": "# Test\nContent"} + + prompt = self.adaptor._build_enhancement_prompt("test-skill", references, None) + + self.assertIn("test-skill", prompt) + self.assertIn("create from scratch", prompt) + + def test_config_initialization(self): + """Test adaptor initializes with config""" + config = {"custom_model": "MiniMax-M2.5"} + adaptor = get_adaptor("minimax", config) + self.assertEqual(adaptor.config, config) + + def test_default_config(self): + """Test adaptor initializes with empty config by default""" + self.assertEqual(self.adaptor.config, {}) + + def test_package_excludes_backup_files(self): + """Test that backup files are excluded from package""" + with tempfile.TemporaryDirectory() as temp_dir: + skill_dir = Path(temp_dir) / "test-skill" + skill_dir.mkdir() + + (skill_dir / "SKILL.md").write_text("Test instructions") + (skill_dir / "references").mkdir() + (skill_dir / "references" / "guide.md").write_text("# Guide") + (skill_dir / "references" / "guide.md.backup").write_text("# Old backup") + + output_dir = Path(temp_dir) / "output" + output_dir.mkdir() + + package_path = self.adaptor.package(skill_dir, output_dir) + + with zipfile.ZipFile(package_path, "r") as zf: + names = zf.namelist() + self.assertIn("knowledge_files/guide.md", names) + self.assertNotIn("knowledge_files/guide.md.backup", names) + + @patch("openai.OpenAI") + def test_upload_success_mocked(self, mock_openai_class): + """Test successful upload with mocked OpenAI client""" + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = "Ready to assist with Python testing" + mock_client.chat.completions.create.return_value = mock_response + mock_openai_class.return_value = mock_client + + with tempfile.TemporaryDirectory() as temp_dir: + skill_dir = Path(temp_dir) / "test-skill" + skill_dir.mkdir() + (skill_dir / "SKILL.md").write_text("You are an expert assistant") + (skill_dir / "references").mkdir() + (skill_dir / "references" / "test.md").write_text("# Test") + + output_dir = Path(temp_dir) / "output" + output_dir.mkdir() + + package_path = self.adaptor.package(skill_dir, output_dir) + result = self.adaptor.upload(package_path, "test-long-api-key-string") + + self.assertTrue(result["success"]) + self.assertIn("validated", result["message"]) + self.assertEqual(result["url"], "https://platform.minimaxi.com/") + mock_client.chat.completions.create.assert_called_once() + + @unittest.skipUnless(APITimeoutError, "openai library not installed") + @patch("openai.OpenAI") + def test_upload_network_error(self, mock_openai_class): + """Test upload with network timeout error""" + mock_client = MagicMock() + mock_client.chat.completions.create.side_effect = APITimeoutError(request=MagicMock()) + mock_openai_class.return_value = mock_client + + with tempfile.TemporaryDirectory() as temp_dir: + skill_dir = Path(temp_dir) / "test-skill" + skill_dir.mkdir() + (skill_dir / "SKILL.md").write_text("Test") + (skill_dir / "references").mkdir() + (skill_dir / "references" / "test.md").write_text("Content") + + output_dir = Path(temp_dir) / "output" + output_dir.mkdir() + + package_path = self.adaptor.package(skill_dir, output_dir) + result = self.adaptor.upload(package_path, "test-long-api-key-string") + + self.assertFalse(result["success"]) + self.assertIn("timed out", result["message"].lower()) + + @unittest.skipUnless(APIConnectionError, "openai library not installed") + @patch("openai.OpenAI") + def test_upload_connection_error(self, mock_openai_class): + """Test upload with connection error""" + mock_client = MagicMock() + mock_client.chat.completions.create.side_effect = APIConnectionError(request=MagicMock()) + mock_openai_class.return_value = mock_client + + with tempfile.TemporaryDirectory() as temp_dir: + skill_dir = Path(temp_dir) / "test-skill" + skill_dir.mkdir() + (skill_dir / "SKILL.md").write_text("Test") + (skill_dir / "references").mkdir() + (skill_dir / "references" / "test.md").write_text("Content") + + output_dir = Path(temp_dir) / "output" + output_dir.mkdir() + + package_path = self.adaptor.package(skill_dir, output_dir) + result = self.adaptor.upload(package_path, "test-long-api-key-string") + + self.assertFalse(result["success"]) + self.assertIn("connection", result["message"].lower()) + + def test_validate_api_key_format(self): + """Test that API key validation uses length-based check""" + # Valid - long enough strings + self.assertTrue(self.adaptor.validate_api_key("eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.test")) + self.assertTrue(self.adaptor.validate_api_key("sk-api-abc123-long-enough")) + # Invalid - too short + self.assertFalse(self.adaptor.validate_api_key("eyJshort")) + self.assertFalse(self.adaptor.validate_api_key("short")) + + +class TestMiniMaxAdaptorIntegration(unittest.TestCase): + """Integration tests for MiniMax AI adaptor (require MINIMAX_API_KEY)""" + + def setUp(self): + """Set up test adaptor""" + self.adaptor = get_adaptor("minimax") + + @unittest.skipUnless( + os.getenv("MINIMAX_API_KEY"), "MINIMAX_API_KEY not set - skipping integration test" + ) + def test_enhance_with_real_api(self): + """Test enhancement with real MiniMax API""" + with tempfile.TemporaryDirectory() as temp_dir: + skill_dir = Path(temp_dir) + refs_dir = skill_dir / "references" + refs_dir.mkdir() + (refs_dir / "test.md").write_text( + "# Python Testing\n\n" + "Use pytest for testing:\n" + "```python\n" + "def test_example():\n" + " assert 1 + 1 == 2\n" + "```\n" + ) + + api_key = os.getenv("MINIMAX_API_KEY") + success = self.adaptor.enhance(skill_dir, api_key) + + self.assertTrue(success) + skill_md = (skill_dir / "SKILL.md").read_text() + self.assertTrue(len(skill_md) > 100) + + @unittest.skipUnless( + os.getenv("MINIMAX_API_KEY"), "MINIMAX_API_KEY not set - skipping integration test" + ) + def test_upload_with_real_api(self): + """Test upload validation with real MiniMax API""" + with tempfile.TemporaryDirectory() as temp_dir: + skill_dir = Path(temp_dir) / "test-skill" + skill_dir.mkdir() + (skill_dir / "SKILL.md").write_text("You are an expert assistant for Python testing.") + (skill_dir / "references").mkdir() + (skill_dir / "references" / "test.md").write_text("# Test\nContent") + + output_dir = Path(temp_dir) / "output" + output_dir.mkdir() + + package_path = self.adaptor.package(skill_dir, output_dir) + api_key = os.getenv("MINIMAX_API_KEY") + result = self.adaptor.upload(package_path, api_key) + + self.assertTrue(result["success"]) + self.assertIn("validated", result["message"]) + + @unittest.skipUnless( + os.getenv("MINIMAX_API_KEY"), "MINIMAX_API_KEY not set - skipping integration test" + ) + def test_validate_api_key_real(self): + """Test validating a real API key""" + api_key = os.getenv("MINIMAX_API_KEY") + self.assertTrue(self.adaptor.validate_api_key(api_key)) + + +if __name__ == "__main__": + unittest.main() diff --git a/uv.lock b/uv.lock index ff87560..a754fd0 100644 --- a/uv.lock +++ b/uv.lock @@ -5699,7 +5699,7 @@ wheels = [ [[package]] name = "skill-seekers" -version = "3.2.0" +version = "3.3.0" source = { editable = "." } dependencies = [ { name = "anthropic" }, @@ -5816,6 +5816,9 @@ mcp = [ { name = "starlette" }, { name = "uvicorn" }, ] +minimax = [ + { name = "openai" }, +] notion = [ { name = "notion-client" }, ] @@ -5930,6 +5933,7 @@ requires-dist = [ { name = "numpy", marker = "extra == 'embedding'", specifier = ">=1.24.0" }, { name = "openai", marker = "extra == 'all'", specifier = ">=1.0.0" }, { name = "openai", marker = "extra == 'all-llms'", specifier = ">=1.0.0" }, + { name = "openai", marker = "extra == 'minimax'", specifier = ">=1.0.0" }, { name = "openai", marker = "extra == 'openai'", specifier = ">=1.0.0" }, { name = "opencv-python-headless", marker = "extra == 'video-full'", specifier = ">=4.9.0" }, { name = "pathspec", specifier = ">=0.12.1" }, @@ -5978,7 +5982,7 @@ requires-dist = [ { name = "yt-dlp", marker = "extra == 'video'", specifier = ">=2024.12.0" }, { name = "yt-dlp", marker = "extra == 'video-full'", specifier = ">=2024.12.0" }, ] -provides-extras = ["mcp", "gemini", "openai", "all-llms", "s3", "gcs", "azure", "docx", "epub", "video", "video-full", "chroma", "weaviate", "sentence-transformers", "pinecone", "rag-upload", "all-cloud", "jupyter", "asciidoc", "pptx", "confluence", "notion", "rss", "chat", "embedding", "all"] +provides-extras = ["mcp", "gemini", "openai", "minimax", "all-llms", "s3", "gcs", "azure", "docx", "epub", "video", "video-full", "chroma", "weaviate", "sentence-transformers", "pinecone", "rag-upload", "all-cloud", "jupyter", "asciidoc", "pptx", "confluence", "notion", "rss", "chat", "embedding", "all"] [package.metadata.requires-dev] dev = [