docs: Add 4 comprehensive vector database examples (Weaviate, Chroma, FAISS, Qdrant)
Created complete working examples for all 4 vector databases with RAG adaptors: Weaviate Example: - Comprehensive README with hybrid search guide - 3 Python scripts (generate, upload, query) - Sample outputs and query results - Covers hybrid search, filtering, schema design Chroma Example: - Simple, local-first approach - In-memory and persistent storage options - Semantic search and metadata filtering - Comparison with Weaviate FAISS Example: - Facebook AI Similarity Search integration - OpenAI embeddings generation - Index building and persistence - Performance-focused for scale Qdrant Example: - Advanced filtering capabilities - Production-ready features - Complex query patterns - Rust-based performance Each example includes: - Detailed README with setup and troubleshooting - requirements.txt with dependencies - 3 working Python scripts - Sample outputs directory Total files: 20 (4 examples × 5 files each) Documentation: 4 comprehensive READMEs (~800 lines total) Phase 2 of optional enhancements complete. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
185
examples/weaviate-example/2_upload_to_weaviate.py
Normal file
185
examples/weaviate-example/2_upload_to_weaviate.py
Normal file
@@ -0,0 +1,185 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Step 2: Upload to Weaviate
|
||||
|
||||
This script:
|
||||
1. Connects to Weaviate instance (local or cloud)
|
||||
2. Creates the schema (class + properties)
|
||||
3. Batch uploads all objects
|
||||
4. Verifies the upload
|
||||
|
||||
Usage:
|
||||
# Local Docker
|
||||
python 2_upload_to_weaviate.py
|
||||
|
||||
# Weaviate Cloud
|
||||
python 2_upload_to_weaviate.py --url https://your-cluster.weaviate.network --api-key YOUR_KEY
|
||||
|
||||
# Reset existing data
|
||||
python 2_upload_to_weaviate.py --reset
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import weaviate
|
||||
from weaviate.auth import AuthApiKey
|
||||
except ImportError:
|
||||
print("❌ weaviate-client not installed!")
|
||||
print("Install it with: pip install weaviate-client")
|
||||
sys.exit(1)
|
||||
|
||||
def connect_to_weaviate(url: str, api_key: str = None):
|
||||
"""Connect to Weaviate instance."""
|
||||
print(f"\n🔗 Connecting to Weaviate at {url}...")
|
||||
|
||||
try:
|
||||
if api_key:
|
||||
# Weaviate Cloud with authentication
|
||||
auth_config = AuthApiKey(api_key)
|
||||
client = weaviate.Client(
|
||||
url=url,
|
||||
auth_client_secret=auth_config
|
||||
)
|
||||
else:
|
||||
# Local Docker without authentication
|
||||
client = weaviate.Client(url=url)
|
||||
|
||||
# Check if ready
|
||||
if client.is_ready():
|
||||
print("✅ Weaviate is ready!\n")
|
||||
return client
|
||||
else:
|
||||
print("❌ Weaviate is not ready")
|
||||
sys.exit(1)
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Connection failed: {e}")
|
||||
print("\n💡 Tips:")
|
||||
print(" - For local: Ensure Docker is running (docker ps | grep weaviate)")
|
||||
print(" - For cloud: Check your URL and API key")
|
||||
sys.exit(1)
|
||||
|
||||
def load_skill_data(filepath: str = "output/react-weaviate.json"):
|
||||
"""Load the Weaviate-format skill JSON."""
|
||||
path = Path(filepath)
|
||||
|
||||
if not path.exists():
|
||||
print(f"❌ Skill file not found: {filepath}")
|
||||
print("Run '1_generate_skill.py' first!")
|
||||
sys.exit(1)
|
||||
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
|
||||
def create_schema(client, schema: dict):
|
||||
"""Create Weaviate schema (class + properties)."""
|
||||
class_name = schema["class"]
|
||||
|
||||
print(f"📊 Creating schema: {class_name}")
|
||||
|
||||
# Check if class already exists
|
||||
existing_schema = client.schema.get()
|
||||
class_exists = any(c["class"] == class_name for c in existing_schema.get("classes", []))
|
||||
|
||||
if class_exists:
|
||||
print(f"⚠️ Class '{class_name}' already exists")
|
||||
response = input("Delete and recreate? [y/N]: ")
|
||||
if response.lower() == "y":
|
||||
client.schema.delete_class(class_name)
|
||||
print(f"🗑️ Deleted existing class")
|
||||
else:
|
||||
print("Skipping schema creation")
|
||||
return
|
||||
|
||||
# Create the class
|
||||
client.schema.create_class(schema)
|
||||
print("✅ Schema created successfully!\n")
|
||||
|
||||
def upload_objects(client, class_name: str, objects: list):
|
||||
"""Batch upload objects to Weaviate."""
|
||||
total = len(objects)
|
||||
batch_size = 100
|
||||
|
||||
print(f"📤 Uploading {total} objects in batches...")
|
||||
|
||||
with client.batch as batch:
|
||||
batch.batch_size = batch_size
|
||||
|
||||
for i, obj in enumerate(objects):
|
||||
# Add object to batch
|
||||
batch.add_data_object(
|
||||
data_object=obj["properties"],
|
||||
class_name=class_name,
|
||||
uuid=obj["id"]
|
||||
)
|
||||
|
||||
# Print progress
|
||||
if (i + 1) % batch_size == 0:
|
||||
batch_num = (i + 1) // batch_size
|
||||
print(f"✅ Batch {batch_num} uploaded ({i + 1}/{total} objects)")
|
||||
|
||||
# Final batch
|
||||
final_count = total % batch_size
|
||||
if final_count > 0:
|
||||
batch_num = (total // batch_size) + 1
|
||||
print(f"✅ Batch {batch_num} uploaded ({final_count} objects)")
|
||||
|
||||
print(f"\n✅ Successfully uploaded {total} documents to Weaviate")
|
||||
|
||||
def verify_upload(client, class_name: str):
|
||||
"""Verify objects were uploaded correctly."""
|
||||
result = client.query.aggregate(class_name).with_meta_count().do()
|
||||
count = result["data"]["Aggregate"][class_name][0]["meta"]["count"]
|
||||
print(f"🔍 Class '{class_name}' now contains {count} objects")
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Upload skill to Weaviate")
|
||||
parser.add_argument(
|
||||
"--url",
|
||||
default="http://localhost:8080",
|
||||
help="Weaviate URL (default: http://localhost:8080)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--api-key",
|
||||
help="Weaviate API key (for cloud instances)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--file",
|
||||
default="output/react-weaviate.json",
|
||||
help="Path to Weaviate JSON file"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--reset",
|
||||
action="store_true",
|
||||
help="Delete existing class before uploading"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print("=" * 60)
|
||||
print("Step 2: Upload to Weaviate")
|
||||
print("=" * 60)
|
||||
|
||||
# Connect to Weaviate
|
||||
client = connect_to_weaviate(args.url, args.api_key)
|
||||
|
||||
# Load skill data
|
||||
data = load_skill_data(args.file)
|
||||
|
||||
# Create schema
|
||||
create_schema(client, data["schema"])
|
||||
|
||||
# Upload objects
|
||||
upload_objects(client, data["class_name"], data["objects"])
|
||||
|
||||
# Verify
|
||||
verify_upload(client, data["class_name"])
|
||||
|
||||
print("\n✅ Upload complete! Next step: python 3_query_example.py")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user