#!/usr/bin/env python3 """ Haystack Pipeline Example Demonstrates how to use Skill Seekers documentation with Haystack 2.x for building RAG pipelines. """ import json import sys from pathlib import Path def main(): """Run Haystack pipeline example.""" print("=" * 60) print("Haystack Pipeline Example") print("=" * 60) # Check if Haystack is installed try: from haystack import Document from haystack.document_stores.in_memory import InMemoryDocumentStore from haystack.components.retrievers.in_memory import InMemoryBM25Retriever except ImportError: print("āŒ Error: Haystack not installed") print(" Install with: pip install haystack-ai") sys.exit(1) # Find the Haystack documents file docs_path = Path("../../output/react-haystack.json") if not docs_path.exists(): print(f"āŒ Error: Documents not found at {docs_path}") print("\nšŸ“ Generate documents first:") print(" skill-seekers scrape --config configs/react.json --max-pages 100") print(" skill-seekers package output/react --target haystack") sys.exit(1) # Step 1: Load documents print("\nšŸ“š Step 1: Loading documents...") with open(docs_path) as f: docs_data = json.load(f) documents = [ Document(content=doc["content"], meta=doc["meta"]) for doc in docs_data ] print(f"āœ… Loaded {len(documents)} documents") # Show document breakdown categories = {} for doc in documents: cat = doc.meta.get("category", "unknown") categories[cat] = categories.get(cat, 0) + 1 print("\nšŸ“ Categories:") for cat, count in sorted(categories.items()): print(f" - {cat}: {count}") # Step 2: Create document store print("\nšŸ’¾ Step 2: Creating document store...") document_store = InMemoryDocumentStore() document_store.write_documents(documents) indexed_count = document_store.count_documents() print(f"āœ… Indexed {indexed_count} documents") # Step 3: Create retriever print("\nšŸ” Step 3: Creating BM25 retriever...") retriever = InMemoryBM25Retriever(document_store=document_store) print("āœ… Retriever ready") # Step 4: Query examples print("\nšŸŽÆ Step 4: Running queries...\n") queries = [ "How do I use useState hook?", "What are React components?", "How to handle events in React?", ] for i, query in enumerate(queries, 1): print(f"\n{'=' * 60}") print(f"Query {i}: {query}") print("=" * 60) # Run query results = retriever.run(query=query, top_k=3) if not results["documents"]: print(" No results found") continue # Display results for j, doc in enumerate(results["documents"], 1): print(f"\nšŸ“– Result {j}:") print(f" Source: {doc.meta.get('file', 'unknown')}") print(f" Category: {doc.meta.get('category', 'unknown')}") # Show preview (first 200 chars) preview = doc.content[:200].replace("\n", " ") print(f" Preview: {preview}...") # Summary print("\n" + "=" * 60) print("āœ… Example complete!") print("=" * 60) print("\nšŸ“Š Summary:") print(f" • Documents loaded: {len(documents)}") print(f" • Documents indexed: {indexed_count}") print(f" • Queries executed: {len(queries)}") print("\nšŸ’” Next steps:") print(" • Try different queries") print(" • Experiment with top_k parameter") print(" • Build RAG pipeline with LLM generation") print(" • Use vector embeddings for semantic search") if __name__ == "__main__": try: main() except KeyboardInterrupt: print("\n\nāš ļø Interrupted by user") sys.exit(0) except Exception as e: print(f"\nāŒ Error: {e}") sys.exit(1)