From 1c888e78173e424730d9c1862604601f752c4b95 Mon Sep 17 00:00:00 2001 From: yusyus Date: Sat, 7 Feb 2026 21:01:49 +0300 Subject: [PATCH] feat: Add Haystack RAG framework adaptor (Task 2.2) Implements complete Haystack 2.x integration for RAG pipelines: **Haystack Adaptor (src/skill_seekers/cli/adaptors/haystack.py):** - Document format: {content: str, meta: dict} - JSON packaging for Haystack pipelines - Compatible with InMemoryDocumentStore, BM25Retriever - Registered in adaptor factory as 'haystack' **Example Pipeline (examples/haystack-pipeline/):** - README.md with comprehensive guide and troubleshooting - quickstart.py demonstrating BM25 retrieval - requirements.txt (haystack-ai>=2.0.0) - Shows document loading, indexing, and querying **Tests (tests/test_adaptors/test_haystack_adaptor.py):** - 11 tests covering all adaptor functionality - Format validation, packaging, upload messages - Edge cases: empty dirs, references-only skills - All 93 adaptor tests passing (100% suite pass rate) **Features:** - No upload endpoint (local use only like LangChain/LlamaIndex) - No AI enhancement (enhance before packaging) - Same packaging pattern as other RAG frameworks - InMemoryDocumentStore + BM25Retriever example Test: pytest tests/test_adaptors/test_haystack_adaptor.py -v --- examples/haystack-pipeline/README.md | 278 ++++++++++++++++++ examples/haystack-pipeline/quickstart.py | 128 ++++++++ examples/haystack-pipeline/requirements.txt | 11 + src/skill_seekers/cli/adaptors/__init__.py | 7 + src/skill_seekers/cli/adaptors/haystack.py | 294 +++++++++++++++++++ tests/test_adaptors/test_haystack_adaptor.py | 192 ++++++++++++ 6 files changed, 910 insertions(+) create mode 100644 examples/haystack-pipeline/README.md create mode 100644 examples/haystack-pipeline/quickstart.py create mode 100644 examples/haystack-pipeline/requirements.txt create mode 100644 src/skill_seekers/cli/adaptors/haystack.py create mode 100644 tests/test_adaptors/test_haystack_adaptor.py diff --git a/examples/haystack-pipeline/README.md b/examples/haystack-pipeline/README.md new file mode 100644 index 0000000..d40ba08 --- /dev/null +++ b/examples/haystack-pipeline/README.md @@ -0,0 +1,278 @@ +# Haystack Pipeline Example + +Complete example showing how to use Skill Seekers with Haystack 2.x for building RAG pipelines. + +## What This Example Does + +- āœ… Converts documentation into Haystack Documents +- āœ… Creates an in-memory document store +- āœ… Builds a BM25 retriever for semantic search +- āœ… Shows complete RAG pipeline workflow + +## Prerequisites + +```bash +# Install Skill Seekers +pip install skill-seekers + +# Install Haystack 2.x +pip install haystack-ai +``` + +## Quick Start + +### 1. Generate React Documentation Skill + +```bash +# Scrape React documentation +skill-seekers scrape --config configs/react.json --max-pages 100 + +# Package for Haystack +skill-seekers package output/react --target haystack +``` + +This creates `output/react-haystack.json` with Haystack Documents. + +### 2. Run the Pipeline + +```bash +# Run the example script +python quickstart.py +``` + +## What the Example Does + +### Step 1: Load Documents + +```python +from haystack import Document +import json + +# Load Haystack documents +with open("../../output/react-haystack.json") as f: + docs_data = json.load(f) + +documents = [ + Document(content=doc["content"], meta=doc["meta"]) + for doc in docs_data +] + +print(f"šŸ“š Loaded {len(documents)} documents") +``` + +### Step 2: Create Document Store + +```python +from haystack.document_stores.in_memory import InMemoryDocumentStore + +# Create in-memory store +document_store = InMemoryDocumentStore() +document_store.write_documents(documents) + +print(f"šŸ’¾ Indexed {document_store.count_documents()} documents") +``` + +### Step 3: Build Retriever + +```python +from haystack.components.retrievers.in_memory import InMemoryBM25Retriever + +# Create BM25 retriever +retriever = InMemoryBM25Retriever(document_store=document_store) + +# Query +results = retriever.run( + query="How do I use useState hook?", + top_k=3 +) + +# Display results +for doc in results["documents"]: + print(f"\nšŸ“– Source: {doc.meta.get('file', 'unknown')}") + print(f" Category: {doc.meta.get('category', 'unknown')}") + print(f" Preview: {doc.content[:200]}...") +``` + +## Expected Output + +``` +šŸ“š Loaded 15 documents +šŸ’¾ Indexed 15 documents + +šŸ” Query: How do I use useState hook? + +šŸ“– Source: hooks.md + Category: hooks + Preview: # React Hooks + +React Hooks are functions that let you "hook into" React state and lifecycle features from function components. + +## useState + +The useState Hook lets you add React state to function components... + +šŸ“– Source: getting_started.md + Category: getting started + Preview: # Getting Started with React + +React is a JavaScript library for building user interfaces... + +šŸ“– Source: best_practices.md + Category: best practices + Preview: # React Best Practices + +When working with Hooks... +``` + +## Advanced Usage + +### With RAG Chunking + +For better retrieval quality, use semantic chunking: + +```bash +# Generate with chunking +skill-seekers scrape --config configs/react.json --max-pages 100 --chunk-for-rag --chunk-size 512 --chunk-overlap 50 + +# Use chunked output +python quickstart.py --chunked +``` + +### With Vector Embeddings + +For semantic search instead of BM25: + +```python +from haystack.components.embedders import SentenceTransformersDocumentEmbedder +from haystack.document_stores.in_memory import InMemoryDocumentStore +from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever + +# Create document store with embeddings +document_store = InMemoryDocumentStore() + +# Embed documents +embedder = SentenceTransformersDocumentEmbedder( + model="sentence-transformers/all-MiniLM-L6-v2" +) +embedder.warm_up() + +# Process documents +docs_with_embeddings = embedder.run(documents) +document_store.write_documents(docs_with_embeddings["documents"]) + +# Create embedding retriever +retriever = InMemoryEmbeddingRetriever(document_store=document_store) + +# Query (requires query embedding) +from haystack.components.embedders import SentenceTransformersTextEmbedder + +query_embedder = SentenceTransformersTextEmbedder( + model="sentence-transformers/all-MiniLM-L6-v2" +) +query_embedder.warm_up() + +query_embedding = query_embedder.run("How do I use useState?") + +results = retriever.run( + query_embedding=query_embedding["embedding"], + top_k=3 +) +``` + +### Building Complete RAG Pipeline + +For question answering with LLMs: + +```python +from haystack import Pipeline +from haystack.components.builders import PromptBuilder +from haystack.components.generators import OpenAIGenerator + +# Create RAG pipeline +rag_pipeline = Pipeline() + +# Add components +rag_pipeline.add_component("retriever", retriever) +rag_pipeline.add_component("prompt_builder", PromptBuilder( + template=""" + Based on the following context, answer the question. + + Context: + {% for doc in documents %} + {{ doc.content }} + {% endfor %} + + Question: {{ question }} + + Answer: + """ +)) +rag_pipeline.add_component("llm", OpenAIGenerator(api_key="your-key")) + +# Connect components +rag_pipeline.connect("retriever", "prompt_builder.documents") +rag_pipeline.connect("prompt_builder", "llm") + +# Run pipeline +response = rag_pipeline.run({ + "retriever": {"query": "How do I use useState?"}, + "prompt_builder": {"question": "How do I use useState?"} +}) + +print(response["llm"]["replies"][0]) +``` + +## Files in This Example + +- `README.md` - This file +- `quickstart.py` - Basic BM25 retrieval pipeline +- `requirements.txt` - Python dependencies + +## Troubleshooting + +### Issue: ModuleNotFoundError: No module named 'haystack' + +**Solution:** Install Haystack 2.x + +```bash +pip install haystack-ai +``` + +### Issue: Documents not found + +**Solution:** Run scraping first + +```bash +skill-seekers scrape --config configs/react.json +skill-seekers package output/react --target haystack +``` + +### Issue: Poor retrieval quality + +**Solution:** Use semantic chunking or vector embeddings + +```bash +# Semantic chunking +skill-seekers scrape --config configs/react.json --chunk-for-rag + +# Or use vector embeddings (see Advanced Usage) +``` + +## Next Steps + +1. Try different documentation sources (Django, FastAPI, etc.) +2. Experiment with vector embeddings for semantic search +3. Build complete RAG pipeline with LLM generation +4. Deploy to production with persistent document stores + +## Related Examples + +- [LangChain RAG Pipeline](../langchain-rag-pipeline/) +- [LlamaIndex Query Engine](../llama-index-query-engine/) +- [Pinecone Vector Store](../pinecone-upsert/) + +## Resources + +- [Haystack Documentation](https://docs.haystack.deepset.ai/) +- [Skill Seekers Documentation](https://github.com/yusufkaraaslan/Skill_Seekers) +- [Haystack Tutorials](https://haystack.deepset.ai/tutorials) diff --git a/examples/haystack-pipeline/quickstart.py b/examples/haystack-pipeline/quickstart.py new file mode 100644 index 0000000..dccdd9a --- /dev/null +++ b/examples/haystack-pipeline/quickstart.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +""" +Haystack Pipeline Example + +Demonstrates how to use Skill Seekers documentation with Haystack 2.x +for building RAG pipelines. +""" + +import json +import sys +from pathlib import Path + + +def main(): + """Run Haystack pipeline example.""" + print("=" * 60) + print("Haystack Pipeline Example") + print("=" * 60) + + # Check if Haystack is installed + try: + from haystack import Document + from haystack.document_stores.in_memory import InMemoryDocumentStore + from haystack.components.retrievers.in_memory import InMemoryBM25Retriever + except ImportError: + print("āŒ Error: Haystack not installed") + print(" Install with: pip install haystack-ai") + sys.exit(1) + + # Find the Haystack documents file + docs_path = Path("../../output/react-haystack.json") + + if not docs_path.exists(): + print(f"āŒ Error: Documents not found at {docs_path}") + print("\nšŸ“ Generate documents first:") + print(" skill-seekers scrape --config configs/react.json --max-pages 100") + print(" skill-seekers package output/react --target haystack") + sys.exit(1) + + # Step 1: Load documents + print("\nšŸ“š Step 1: Loading documents...") + with open(docs_path) as f: + docs_data = json.load(f) + + documents = [ + Document(content=doc["content"], meta=doc["meta"]) for doc in docs_data + ] + + print(f"āœ… Loaded {len(documents)} documents") + + # Show document breakdown + categories = {} + for doc in documents: + cat = doc.meta.get("category", "unknown") + categories[cat] = categories.get(cat, 0) + 1 + + print("\nšŸ“ Categories:") + for cat, count in sorted(categories.items()): + print(f" - {cat}: {count}") + + # Step 2: Create document store + print("\nšŸ’¾ Step 2: Creating document store...") + document_store = InMemoryDocumentStore() + document_store.write_documents(documents) + + indexed_count = document_store.count_documents() + print(f"āœ… Indexed {indexed_count} documents") + + # Step 3: Create retriever + print("\nšŸ” Step 3: Creating BM25 retriever...") + retriever = InMemoryBM25Retriever(document_store=document_store) + print("āœ… Retriever ready") + + # Step 4: Query examples + print("\nšŸŽÆ Step 4: Running queries...\n") + + queries = [ + "How do I use useState hook?", + "What are React components?", + "How to handle events in React?", + ] + + for i, query in enumerate(queries, 1): + print(f"\n{'=' * 60}") + print(f"Query {i}: {query}") + print("=" * 60) + + # Run query + results = retriever.run(query=query, top_k=3) + + if not results["documents"]: + print(" No results found") + continue + + # Display results + for j, doc in enumerate(results["documents"], 1): + print(f"\nšŸ“– Result {j}:") + print(f" Source: {doc.meta.get('file', 'unknown')}") + print(f" Category: {doc.meta.get('category', 'unknown')}") + + # Show preview (first 200 chars) + preview = doc.content[:200].replace("\n", " ") + print(f" Preview: {preview}...") + + # Summary + print("\n" + "=" * 60) + print("āœ… Example complete!") + print("=" * 60) + print("\nšŸ“Š Summary:") + print(f" • Documents loaded: {len(documents)}") + print(f" • Documents indexed: {indexed_count}") + print(f" • Queries executed: {len(queries)}") + print("\nšŸ’” Next steps:") + print(" • Try different queries") + print(" • Experiment with top_k parameter") + print(" • Build RAG pipeline with LLM generation") + print(" • Use vector embeddings for semantic search") + + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print("\n\nāš ļø Interrupted by user") + sys.exit(0) + except Exception as e: + print(f"\nāŒ Error: {e}") + sys.exit(1) diff --git a/examples/haystack-pipeline/requirements.txt b/examples/haystack-pipeline/requirements.txt new file mode 100644 index 0000000..16a99bb --- /dev/null +++ b/examples/haystack-pipeline/requirements.txt @@ -0,0 +1,11 @@ +# Haystack Pipeline Example Requirements + +# Haystack 2.x - RAG framework +haystack-ai>=2.0.0 + +# Optional: For vector embeddings +# sentence-transformers>=2.2.0 + +# Optional: For LLM generation +# openai>=1.0.0 +# anthropic>=0.7.0 diff --git a/src/skill_seekers/cli/adaptors/__init__.py b/src/skill_seekers/cli/adaptors/__init__.py index 40449aa..a012843 100644 --- a/src/skill_seekers/cli/adaptors/__init__.py +++ b/src/skill_seekers/cli/adaptors/__init__.py @@ -59,6 +59,11 @@ try: except ImportError: QdrantAdaptor = None +try: + from .haystack import HaystackAdaptor +except ImportError: + HaystackAdaptor = None + # Registry of available adaptors ADAPTORS: dict[str, type[SkillAdaptor]] = {} @@ -84,6 +89,8 @@ if FAISSHelpers: ADAPTORS["faiss"] = FAISSHelpers if QdrantAdaptor: ADAPTORS["qdrant"] = QdrantAdaptor +if HaystackAdaptor: + ADAPTORS["haystack"] = HaystackAdaptor def get_adaptor(platform: str, config: dict = None) -> SkillAdaptor: diff --git a/src/skill_seekers/cli/adaptors/haystack.py b/src/skill_seekers/cli/adaptors/haystack.py new file mode 100644 index 0000000..eb5f24c --- /dev/null +++ b/src/skill_seekers/cli/adaptors/haystack.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python3 +""" +Haystack Adaptor + +Implements Haystack Document format for RAG pipelines. +Converts Skill Seekers documentation into Haystack-compatible Document objects. +""" + +import json +from pathlib import Path +from typing import Any + +from .base import SkillAdaptor, SkillMetadata + + +class HaystackAdaptor(SkillAdaptor): + """ + Haystack platform adaptor. + + Handles: + - Haystack Document format (content + meta) + - JSON packaging with array of documents + - No upload (users import directly into code) + - Optimized for Haystack 2.x pipelines + """ + + PLATFORM = "haystack" + PLATFORM_NAME = "Haystack (RAG Framework)" + DEFAULT_API_ENDPOINT = None # No upload endpoint + + def format_skill_md(self, skill_dir: Path, metadata: SkillMetadata) -> str: + """ + Format skill as JSON array of Haystack Documents. + + Converts SKILL.md and all references/*.md into Haystack Document format: + { + "content": "...", + "meta": {"source": "...", "category": "...", ...} + } + + Args: + skill_dir: Path to skill directory + metadata: Skill metadata + + Returns: + JSON string containing array of Haystack Documents + """ + documents = [] + + # Convert SKILL.md (main documentation) + skill_md_path = skill_dir / "SKILL.md" + if skill_md_path.exists(): + content = self._read_existing_content(skill_dir) + if content.strip(): + documents.append( + { + "content": content, + "meta": { + "source": metadata.name, + "category": "overview", + "file": "SKILL.md", + "type": "documentation", + "version": metadata.version, + }, + } + ) + + # Convert all reference files + refs_dir = skill_dir / "references" + if refs_dir.exists(): + for ref_file in sorted(refs_dir.glob("*.md")): + if ref_file.is_file() and not ref_file.name.startswith("."): + try: + ref_content = ref_file.read_text(encoding="utf-8") + if ref_content.strip(): + # Derive category from filename + category = ref_file.stem.replace("_", " ").lower() + + documents.append( + { + "content": ref_content, + "meta": { + "source": metadata.name, + "category": category, + "file": ref_file.name, + "type": "reference", + "version": metadata.version, + }, + } + ) + except Exception as e: + print(f"āš ļø Warning: Could not read {ref_file.name}: {e}") + continue + + # Return as formatted JSON + return json.dumps(documents, indent=2, ensure_ascii=False) + + def package(self, skill_dir: Path, output_path: Path) -> Path: + """ + Package skill into JSON file for Haystack. + + Creates a JSON file containing an array of Haystack Documents ready + for ingestion into Haystack 2.x pipelines and document stores. + + Args: + skill_dir: Path to skill directory + output_path: Output path/filename for JSON file + + Returns: + Path to created JSON file + """ + skill_dir = Path(skill_dir) + + # Determine output filename + if output_path.is_dir() or str(output_path).endswith("/"): + output_path = Path(output_path) / f"{skill_dir.name}-haystack.json" + elif not str(output_path).endswith(".json"): + # Replace extension if needed + output_str = str(output_path).replace(".zip", ".json").replace(".tar.gz", ".json") + if not output_str.endswith("-haystack.json"): + output_str = output_str.replace(".json", "-haystack.json") + if not output_str.endswith(".json"): + output_str += ".json" + output_path = Path(output_str) + + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + + # Read metadata + metadata = SkillMetadata( + name=skill_dir.name, + description=f"Haystack documents for {skill_dir.name}", + version="1.0.0", + ) + + # Generate Haystack documents + documents_json = self.format_skill_md(skill_dir, metadata) + + # Write to file + output_path.write_text(documents_json, encoding="utf-8") + + print(f"\nāœ… Haystack documents packaged successfully!") + print(f"šŸ“¦ Output: {output_path}") + + # Parse and show stats + documents = json.loads(documents_json) + print(f"šŸ“Š Total documents: {len(documents)}") + + # Show category breakdown + categories = {} + for doc in documents: + cat = doc["meta"].get("category", "unknown") + categories[cat] = categories.get(cat, 0) + 1 + + print("šŸ“ Categories:") + for cat, count in sorted(categories.items()): + print(f" - {cat}: {count}") + + return output_path + + def upload(self, package_path: Path, _api_key: str, **_kwargs) -> dict[str, Any]: + """ + Haystack format does not support direct upload. + + Users should import the JSON file into their Haystack code: + + ```python + from haystack import Document + import json + + # Load documents + with open("skill-haystack.json") as f: + docs_data = json.load(f) + + # Convert to Haystack Documents + documents = [ + Document(content=doc["content"], meta=doc["meta"]) + for doc in docs_data + ] + + # Use with document store + from haystack.document_stores.in_memory import InMemoryDocumentStore + + document_store = InMemoryDocumentStore() + document_store.write_documents(documents) + + # Create pipeline + from haystack.components.retrievers.in_memory import InMemoryBM25Retriever + + retriever = InMemoryBM25Retriever(document_store=document_store) + results = retriever.run(query="your query here") + ``` + + Args: + package_path: Path to JSON file + api_key: Not used + **kwargs: Not used + + Returns: + Result indicating no upload capability + """ + example_code = """ +# Example: Load into Haystack 2.x + +from haystack import Document +from haystack.document_stores.in_memory import InMemoryDocumentStore +from haystack.components.retrievers.in_memory import InMemoryBM25Retriever +import json + +# Load documents +with open("{path}") as f: + docs_data = json.load(f) + +# Convert to Haystack Documents +documents = [ + Document(content=doc["content"], meta=doc["meta"]) + for doc in docs_data +] + +# Create document store +document_store = InMemoryDocumentStore() +document_store.write_documents(documents) + +# Create retriever +retriever = InMemoryBM25Retriever(document_store=document_store) + +# Query +results = retriever.run(query="your question here") +for doc in results["documents"]: + print(doc.content) +""".format( + path=package_path.name + ) + + return { + "success": False, + "skill_id": None, + "url": str(package_path.absolute()), + "message": ( + f"Haystack documents packaged at: {package_path.absolute()}\n\n" + "Load into your code:\n" + f"{example_code}" + ), + } + + def validate_api_key(self, _api_key: str) -> bool: + """ + Haystack format doesn't use API keys for packaging. + + Args: + api_key: Not used + + Returns: + Always False (no API needed for packaging) + """ + return False + + def get_env_var_name(self) -> str: + """ + No API key needed for Haystack packaging. + + Returns: + Empty string + """ + return "" + + def supports_enhancement(self) -> bool: + """ + Haystack format doesn't support AI enhancement. + + Enhancement should be done before conversion using: + skill-seekers enhance output/skill/ --mode LOCAL + + Returns: + False + """ + return False + + def enhance(self, _skill_dir: Path, _api_key: str) -> bool: + """ + Haystack format doesn't support enhancement. + + Args: + skill_dir: Not used + api_key: Not used + + Returns: + False + """ + print("āŒ Haystack format does not support enhancement") + print(" Enhance before packaging:") + print(" skill-seekers enhance output/skill/ --mode LOCAL") + print(" skill-seekers package output/skill/ --target haystack") + return False diff --git a/tests/test_adaptors/test_haystack_adaptor.py b/tests/test_adaptors/test_haystack_adaptor.py new file mode 100644 index 0000000..207e36d --- /dev/null +++ b/tests/test_adaptors/test_haystack_adaptor.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python3 +""" +Tests for Haystack Adaptor +""" + +import json +import tempfile +from pathlib import Path + +import pytest + +from skill_seekers.cli.adaptors import get_adaptor +from skill_seekers.cli.adaptors.base import SkillMetadata + + +class TestHaystackAdaptor: + """Test suite for HaystackAdaptor class.""" + + def test_adaptor_registration(self): + """Test that Haystack adaptor is registered.""" + adaptor = get_adaptor("haystack") + assert adaptor.PLATFORM == "haystack" + assert adaptor.PLATFORM_NAME == "Haystack (RAG Framework)" + + def test_format_skill_md(self, tmp_path): + """Test formatting SKILL.md as Haystack Documents.""" + # Create test skill directory + skill_dir = tmp_path / "test_skill" + skill_dir.mkdir() + + # Create SKILL.md + skill_md = skill_dir / "SKILL.md" + skill_md.write_text( + "# Test Skill\n\nThis is a test skill for Haystack format." + ) + + # Create references directory with files + refs_dir = skill_dir / "references" + refs_dir.mkdir() + (refs_dir / "getting_started.md").write_text("# Getting Started\n\nQuick start.") + (refs_dir / "api.md").write_text("# API Reference\n\nAPI docs.") + + # Format as Haystack Documents + adaptor = get_adaptor("haystack") + metadata = SkillMetadata( + name="test_skill", description="Test skill", version="1.0.0" + ) + + documents_json = adaptor.format_skill_md(skill_dir, metadata) + + # Parse and validate + documents = json.loads(documents_json) + + assert len(documents) == 3 # SKILL.md + 2 references + + # Check document structure + for doc in documents: + assert "content" in doc + assert "meta" in doc + assert doc["meta"]["source"] == "test_skill" + assert doc["meta"]["version"] == "1.0.0" + assert "category" in doc["meta"] + assert "file" in doc["meta"] + assert "type" in doc["meta"] + + # Check categories + categories = {doc["meta"]["category"] for doc in documents} + assert "overview" in categories # From SKILL.md + assert "getting started" in categories or "api" in categories # From references + + def test_package_creates_json(self, tmp_path): + """Test packaging skill into JSON file.""" + # Create test skill + skill_dir = tmp_path / "test_skill" + skill_dir.mkdir() + (skill_dir / "SKILL.md").write_text("# Test\n\nTest content.") + + # Package + adaptor = get_adaptor("haystack") + output_path = adaptor.package(skill_dir, tmp_path) + + # Verify output + assert output_path.exists() + assert output_path.suffix == ".json" + assert "haystack" in output_path.name + + # Verify content + with open(output_path) as f: + documents = json.load(f) + + assert isinstance(documents, list) + assert len(documents) > 0 + assert "content" in documents[0] + assert "meta" in documents[0] + + def test_package_output_filename(self, tmp_path): + """Test package output filename generation.""" + skill_dir = tmp_path / "react" + skill_dir.mkdir() + (skill_dir / "SKILL.md").write_text("# React\n\nReact docs.") + + adaptor = get_adaptor("haystack") + + # Test directory output + output_path = adaptor.package(skill_dir, tmp_path) + assert output_path.name == "react-haystack.json" + + # Test with .zip extension (should replace) + output_path = adaptor.package(skill_dir, tmp_path / "test.zip") + assert output_path.suffix == ".json" + assert "haystack" in output_path.name + + def test_upload_returns_message(self, tmp_path): + """Test upload returns instructions (no actual upload).""" + # Create test package + package_path = tmp_path / "test-haystack.json" + package_path.write_text('[]') + + adaptor = get_adaptor("haystack") + result = adaptor.upload(package_path, "fake-key") + + assert result["success"] is False # No upload capability + assert result["skill_id"] is None + assert "message" in result + assert "from haystack import Document" in result["message"] + assert "InMemoryDocumentStore" in result["message"] + + def test_validate_api_key_returns_false(self): + """Test that API key validation returns False (no API needed).""" + adaptor = get_adaptor("haystack") + assert adaptor.validate_api_key("any-key") is False + + def test_get_env_var_name_returns_empty(self): + """Test that env var name is empty (no API needed).""" + adaptor = get_adaptor("haystack") + assert adaptor.get_env_var_name() == "" + + def test_supports_enhancement_returns_false(self): + """Test that enhancement is not supported.""" + adaptor = get_adaptor("haystack") + assert adaptor.supports_enhancement() is False + + def test_enhance_returns_false(self, tmp_path): + """Test that enhance returns False.""" + skill_dir = tmp_path / "test_skill" + skill_dir.mkdir() + + adaptor = get_adaptor("haystack") + result = adaptor.enhance(skill_dir, "fake-key") + + assert result is False + + def test_empty_skill_directory(self, tmp_path): + """Test handling of empty skill directory.""" + skill_dir = tmp_path / "empty_skill" + skill_dir.mkdir() + + adaptor = get_adaptor("haystack") + metadata = SkillMetadata( + name="empty_skill", description="Empty", version="1.0.0" + ) + + documents_json = adaptor.format_skill_md(skill_dir, metadata) + documents = json.loads(documents_json) + + # Should return empty list + assert documents == [] + + def test_references_only(self, tmp_path): + """Test skill with references but no SKILL.md.""" + skill_dir = tmp_path / "refs_only" + skill_dir.mkdir() + + refs_dir = skill_dir / "references" + refs_dir.mkdir() + (refs_dir / "test.md").write_text("# Test\n\nTest content.") + + adaptor = get_adaptor("haystack") + metadata = SkillMetadata( + name="refs_only", description="Refs only", version="1.0.0" + ) + + documents_json = adaptor.format_skill_md(skill_dir, metadata) + documents = json.loads(documents_json) + + assert len(documents) == 1 + assert documents[0]["meta"]["category"] == "test" + assert documents[0]["meta"]["type"] == "reference" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"])