diff --git a/src/skill_seekers/cli/adaptors/__init__.py b/src/skill_seekers/cli/adaptors/__init__.py index 6e05a66..40449aa 100644 --- a/src/skill_seekers/cli/adaptors/__init__.py +++ b/src/skill_seekers/cli/adaptors/__init__.py @@ -54,6 +54,11 @@ try: except ImportError: FAISSHelpers = None +try: + from .qdrant import QdrantAdaptor +except ImportError: + QdrantAdaptor = None + # Registry of available adaptors ADAPTORS: dict[str, type[SkillAdaptor]] = {} @@ -77,6 +82,8 @@ if ChromaAdaptor: ADAPTORS["chroma"] = ChromaAdaptor if FAISSHelpers: ADAPTORS["faiss"] = FAISSHelpers +if QdrantAdaptor: + ADAPTORS["qdrant"] = QdrantAdaptor def get_adaptor(platform: str, config: dict = None) -> SkillAdaptor: diff --git a/src/skill_seekers/cli/adaptors/qdrant.py b/src/skill_seekers/cli/adaptors/qdrant.py new file mode 100644 index 0000000..fce93a4 --- /dev/null +++ b/src/skill_seekers/cli/adaptors/qdrant.py @@ -0,0 +1,434 @@ +#!/usr/bin/env python3 +""" +Qdrant Vector Database Adaptor + +Converts skill documentation to Qdrant format for vector similarity search. +Qdrant stores vectors and metadata together in collections with points. +""" + +import json +from pathlib import Path +from typing import Any +import hashlib +import uuid + +from .base import SkillAdaptor, SkillMetadata + + +class QdrantAdaptor(SkillAdaptor): + """ + Qdrant vector database adaptor. + + Provides format conversion for: + - Qdrant collections (vector + payload format) + - Point-based storage with metadata payloads + - REST API compatible output + - Collection configuration with distance metrics + + Note: Qdrant supports rich metadata payloads with filtering. + """ + + PLATFORM = "qdrant" + PLATFORM_NAME = "Qdrant Vector Database" + DEFAULT_API_ENDPOINT = "http://localhost:6333" + + def _generate_point_id(self, content: str, metadata: dict) -> str: + """ + Generate deterministic point ID from content and metadata. + + Args: + content: Document content + metadata: Document metadata + + Returns: + UUID string (version 5, deterministic) + """ + # Use content hash + source for deterministic UUID + namespace = uuid.UUID("00000000-0000-0000-0000-000000000000") + id_string = f"{metadata.get('source', '')}-{metadata.get('file', '')}-{content[:100]}" + return str(uuid.uuid5(namespace, id_string)) + + def format_skill_md(self, skill_dir: Path, metadata: SkillMetadata) -> str: + """ + Format skill as Qdrant collection JSON. + + Creates a package with: + - collection_name: Collection identifier + - points: Array of point objects (id, vector, payload) + - config: Collection configuration (vector size, distance metric) + + Args: + skill_dir: Path to skill directory + metadata: Skill metadata + + Returns: + JSON string containing Qdrant-compatible data + """ + points = [] + + # Convert SKILL.md (main documentation) + skill_md_path = skill_dir / "SKILL.md" + if skill_md_path.exists(): + content = self._read_existing_content(skill_dir) + if content.strip(): + point_id = self._generate_point_id(content, { + "source": metadata.name, + "file": "SKILL.md" + }) + + points.append({ + "id": point_id, + "vector": None, # User will generate embeddings + "payload": { + "content": content, + "source": metadata.name, + "category": "overview", + "file": "SKILL.md", + "type": "documentation", + "version": metadata.version, + } + }) + + # Convert all reference files + refs_dir = skill_dir / "references" + if refs_dir.exists(): + for ref_file in sorted(refs_dir.glob("*.md")): + if ref_file.is_file() and not ref_file.name.startswith("."): + try: + ref_content = ref_file.read_text(encoding="utf-8") + if ref_content.strip(): + category = ref_file.stem.replace("_", " ").lower() + + point_id = self._generate_point_id(ref_content, { + "source": metadata.name, + "file": ref_file.name + }) + + points.append({ + "id": point_id, + "vector": None, # User will generate embeddings + "payload": { + "content": ref_content, + "source": metadata.name, + "category": category, + "file": ref_file.name, + "type": "reference", + "version": metadata.version, + } + }) + except Exception as e: + print(f"āš ļø Warning: Could not read {ref_file.name}: {e}") + continue + + # Qdrant configuration + config = { + "vector_size": 1536, # OpenAI ada-002 default + "distance": "Cosine", # Recommended for semantic search + "description": ( + "Qdrant requires embeddings. Use OpenAI, Cohere, or local models " + "to generate embeddings before uploading points." + ), + } + + # Generate collection name (replace underscores, lowercase) + collection_name = metadata.name.replace("_", "-").lower() + + return json.dumps( + { + "collection_name": collection_name, + "points": points, + "config": config, + }, + indent=2, + ensure_ascii=False, + ) + + def package(self, skill_dir: Path, output_path: Path) -> Path: + """ + Package skill into JSON file for Qdrant. + + Creates a JSON file containing points, payloads, and config. + + Args: + skill_dir: Path to skill directory + output_path: Output path/filename for JSON file + + Returns: + Path to created JSON file + """ + skill_dir = Path(skill_dir) + + # Determine output filename + if output_path.is_dir() or str(output_path).endswith("/"): + output_path = Path(output_path) / f"{skill_dir.name}-qdrant.json" + elif not str(output_path).endswith(".json"): + output_str = str(output_path).replace(".zip", ".json").replace(".tar.gz", ".json") + if not output_str.endswith("-qdrant.json"): + output_str = output_str.replace(".json", "-qdrant.json") + if not output_str.endswith(".json"): + output_str += ".json" + output_path = Path(output_str) + + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + + # Read metadata + metadata = SkillMetadata( + name=skill_dir.name, + description=f"Qdrant data for {skill_dir.name}", + version="1.0.0", + ) + + # Generate Qdrant data + qdrant_json = self.format_skill_md(skill_dir, metadata) + + # Write to file + output_path.write_text(qdrant_json, encoding="utf-8") + + print(f"\nāœ… Qdrant data packaged successfully!") + print(f"šŸ“¦ Output: {output_path}") + + # Parse and show stats + data = json.loads(qdrant_json) + + print(f"šŸ“Š Collection: {data['collection_name']}") + print(f"šŸ“ Total points: {len(data['points'])}") + print(f"šŸ“ Vector size: {data['config']['vector_size']}") + print(f"šŸ“Š Distance metric: {data['config']['distance']}") + + # Show category breakdown + categories = {} + for point in data["points"]: + cat = point["payload"].get("category", "unknown") + categories[cat] = categories.get(cat, 0) + 1 + + print("šŸ“ Categories:") + for cat, count in sorted(categories.items()): + print(f" - {cat}: {count}") + + return output_path + + def upload(self, package_path: Path, _api_key: str, **_kwargs) -> dict[str, Any]: + """ + Qdrant format does not support direct upload via this tool. + + Users should use the Qdrant client library or REST API. + Metadata is stored in payloads (native Qdrant feature). + + Args: + package_path: Path to JSON file + api_key: Not used (Qdrant can use API keys for cloud) + **kwargs: Not used + + Returns: + Result with usage instructions + """ + example_code = """ +# Example: Create Qdrant collection and upload points + +from qdrant_client import QdrantClient +from qdrant_client.models import Distance, VectorParams, PointStruct +import json +from pathlib import Path +from openai import OpenAI + +# Load data +with open("{path}") as f: + data = json.load(f) + +# Connect to Qdrant (local or cloud) +# Option 1: Local instance +client = QdrantClient(host="localhost", port=6333) + +# Option 2: Qdrant Cloud +# client = QdrantClient( +# url="https://your-cluster.qdrant.io", +# api_key="your-api-key" +# ) + +# Create collection +collection_name = data["collection_name"] +vector_size = data["config"]["vector_size"] +distance = Distance.COSINE # or Distance.EUCLID, Distance.DOT + +print(f"Creating collection: {{collection_name}}") +client.create_collection( + collection_name=collection_name, + vectors_config=VectorParams(size=vector_size, distance=distance) +) + +# Generate embeddings and upload points +print("Generating embeddings...") +openai_client = OpenAI() +points_to_upload = [] + +for i, point in enumerate(data["points"]): + # Generate embedding + content = point["payload"]["content"] + response = openai_client.embeddings.create( + model="text-embedding-ada-002", + input=content + ) + embedding = response.data[0].embedding + + # Create point with vector and payload + points_to_upload.append( + PointStruct( + id=point["id"], + vector=embedding, + payload=point["payload"] + ) + ) + + if (i + 1) % 10 == 0: + print(f" Generated {{i + 1}}/{{len(data['points'])}} embeddings") + +# Upload points in batch +print(f"\\nUploading {{len(points_to_upload)}} points...") +client.upsert( + collection_name=collection_name, + points=points_to_upload +) +print(f"āœ… Uploaded {{len(points_to_upload)}} points to Qdrant") + +# Search with metadata filtering +def search(query_text: str, category_filter: str = None, k: int = 5): + # Generate query embedding + response = openai_client.embeddings.create( + model="text-embedding-ada-002", + input=query_text + ) + query_vector = response.data[0].embedding + + # Build filter + filter_dict = None + if category_filter: + filter_dict = {{ + "must": [ + {{"key": "category", "match": {{"value": category_filter}}}} + ] + }} + + # Search + results = client.search( + collection_name=collection_name, + query_vector=query_vector, + limit=k, + query_filter=filter_dict + ) + + return results + +# Test search +results = search("How do I get started?") +for i, result in enumerate(results, 1): + print(f"\\nRank {{i}} (score={{result.score:.4f}}):") + print(f" Category: {{result.payload['category']}}") + print(f" File: {{result.payload['file']}}") + print(f" Text: {{result.payload['content'][:200]}}...") + +# Advanced filtering examples +# Filter by multiple conditions +results = search( + "configuration options", + category_filter="api" # Only search in "api" category +) + +# Complex filter with multiple conditions +from qdrant_client.models import Filter, FieldCondition, MatchValue + +filter_complex = Filter( + must=[ + FieldCondition(key="category", match=MatchValue(value="api")), + FieldCondition(key="type", match=MatchValue(value="documentation")) + ] +) + +results = client.search( + collection_name=collection_name, + query_vector=query_vector, + limit=5, + query_filter=filter_complex +) + +# Update point payload +client.set_payload( + collection_name=collection_name, + payload={{"updated": True, "last_updated": "2026-02-05"}}, + points=["point-id-1", "point-id-2"] +) + +# Delete points by filter +client.delete( + collection_name=collection_name, + points_selector={{"filter": {{"must": [{{"key": "category", "match": {{"value": "deprecated"}}}}]}}}} +) + +# Get collection info +info = client.get_collection(collection_name) +print(f"\\nCollection stats:") +print(f" Points: {{info.points_count}}") +print(f" Vectors: {{info.vectors_count}}") +print(f" Status: {{info.status}}") + +# Scroll through all points (pagination) +offset = None +all_points = [] + +while True: + records, next_offset = client.scroll( + collection_name=collection_name, + limit=100, + offset=offset + ) + all_points.extend(records) + + if next_offset is None: + break + offset = next_offset + +print(f"\\nRetrieved {{len(all_points)}} total points") + +# Create snapshot (backup) +snapshot_info = client.create_snapshot(collection_name) +print(f"\\nSnapshot created: {{snapshot_info.name}}") + +# Recommend similar documents +similar = client.recommend( + collection_name=collection_name, + positive=["point-id-1"], # Similar to this + negative=["point-id-2"], # But not this + limit=5 +) +""".format(path=package_path.name) + + return { + "success": False, + "skill_id": None, + "url": str(package_path.absolute()), + "message": ( + f"Qdrant data packaged at: {package_path.absolute()}\n\n" + "Create Qdrant collection and upload points:\n" + f"{example_code}" + ), + } + + def validate_api_key(self, _api_key: str) -> bool: + """Qdrant Cloud uses API keys, local instances don't.""" + return False + + def get_env_var_name(self) -> str: + """Qdrant Cloud API key (optional).""" + return "QDRANT_API_KEY" + + def supports_enhancement(self) -> bool: + """Qdrant format doesn't support AI enhancement.""" + return False + + def enhance(self, _skill_dir: Path, _api_key: str) -> bool: + """Qdrant format doesn't support enhancement.""" + print("āŒ Qdrant format does not support enhancement") + print(" Enhance before packaging:") + print(" skill-seekers enhance output/skill/ --mode LOCAL") + print(" skill-seekers package output/skill/ --target qdrant") + return False diff --git a/src/skill_seekers/cli/main.py b/src/skill_seekers/cli/main.py index 999f60b..3d8f192 100644 --- a/src/skill_seekers/cli/main.py +++ b/src/skill_seekers/cli/main.py @@ -215,7 +215,7 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers package_parser.add_argument("--upload", action="store_true", help="Auto-upload after packaging") package_parser.add_argument( "--target", - choices=["claude", "gemini", "openai", "markdown", "langchain", "llama-index", "weaviate", "chroma", "faiss"], + choices=["claude", "gemini", "openai", "markdown", "langchain", "llama-index", "weaviate", "chroma", "faiss", "qdrant"], default="claude", help="Target LLM platform (default: claude)", ) diff --git a/src/skill_seekers/cli/package_skill.py b/src/skill_seekers/cli/package_skill.py index 292db64..8eaa768 100644 --- a/src/skill_seekers/cli/package_skill.py +++ b/src/skill_seekers/cli/package_skill.py @@ -155,7 +155,7 @@ Examples: parser.add_argument( "--target", - choices=["claude", "gemini", "openai", "markdown", "langchain", "llama-index", "weaviate", "chroma", "faiss"], + choices=["claude", "gemini", "openai", "markdown", "langchain", "llama-index", "weaviate", "chroma", "faiss", "qdrant"], default="claude", help="Target LLM platform (default: claude)", )