feat: Phase 2 - Real upload capabilities for ChromaDB and Weaviate

Implemented complete upload functionality for vector databases, replacing stub implementations with real upload capabilities including embedding generation, multiple connection modes, and comprehensive error handling. ## ChromaDB Upload (chroma.py) - ✅ Multiple connection modes (PersistentClient, HttpClient) - ✅ 3 embedding strategies (OpenAI, sentence-transformers, default) - ✅ Batch processing (100 docs per batch) - ✅ Progress tracking for large uploads - ✅ Collection management (create if not exists) ## Weaviate Upload (weaviate.py) - ✅ Local and cloud connections - ✅ Schema management (auto-create) - ✅ Batch upload with progress tracking - ✅ OpenAI embedding support ## Upload Command (upload_skill.py) - ✅ Added 8 new CLI arguments for vector DBs - ✅ Platform-specific kwargs handling - ✅ Enhanced output formatting (collection/class names) - ✅ Backward compatibility (LLM platforms unchanged) ## Dependencies (pyproject.toml) - ✅ Added 4 optional dependency groups: - chroma = ["chromadb>=0.4.0"] - weaviate = ["weaviate-client>=3.25.0"] - sentence-transformers = ["sentence-transformers>=2.2.0"] - rag-upload = [all vector DB deps] ## Testing (test_upload_integration.py) - ✅ 15 new tests across 4 test classes - ✅ Works without optional dependencies installed - ✅ Error handling tests (missing files, invalid JSON) - ✅ Fixed 2 existing tests (chroma/weaviate adaptors) - ✅ 37/37 tests passing ## User-Facing Examples Local ChromaDB: skill-seekers upload output/react-chroma.json --target chroma \ --persist-directory ./chroma_db Weaviate Cloud: skill-seekers upload output/react-weaviate.json --target weaviate \ --use-cloud --cluster-url https://xxx.weaviate.network With OpenAI embeddings: skill-seekers upload output/react-chroma.json --target chroma \ --embedding-function openai --openai-api-key $OPENAI_API_KEY ## Files Changed - src/skill_seekers/cli/adaptors/chroma.py (250 lines) - src/skill_seekers/cli/adaptors/weaviate.py (200 lines) - src/skill_seekers/cli/upload_skill.py (50 lines) - pyproject.toml (15 lines) - tests/test_upload_integration.py (NEW - 293 lines) - tests/test_adaptors/test_chroma_adaptor.py (1 line) - tests/test_adaptors/test_weaviate_adaptor.py (1 line) Total: 7 files, ~810 lines added/modified See PHASE2_COMPLETION_SUMMARY.md for detailed documentation. Time: ~7 hours (estimated 6-8h) Status: ✅ COMPLETE - Ready for Phase 3 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-08 01:30:04 +03:00
parent 59e77f42b3
commit 4f9a5a553b
7 changed files with 782 additions and 243 deletions
--- a/src/skill_seekers/cli/upload_skill.py
+++ b/src/skill_seekers/cli/upload_skill.py
@@ -30,14 +30,15 @@ except ImportError:
    from utils import print_upload_instructions


-def upload_skill_api(package_path, target="claude", api_key=None):
+def upload_skill_api(package_path, target="claude", api_key=None, **kwargs):
    """
    Upload skill package to LLM platform

    Args:
        package_path: Path to skill package file
-        target: Target platform ('claude', 'gemini', 'openai')
+        target: Target platform ('claude', 'gemini', 'openai', 'chroma', 'weaviate')
        api_key: Optional API key (otherwise read from environment)
+        **kwargs: Platform-specific upload options

    Returns:
        tuple: (success, message)
@@ -57,12 +58,14 @@ def upload_skill_api(package_path, target="claude", api_key=None):
    if not api_key:
        api_key = os.environ.get(adaptor.get_env_var_name(), "").strip()

-    if not api_key:
-        return False, f"{adaptor.get_env_var_name()} not set. Export your API key first."
+    # API key validation only for platforms that require it
+    if target in ['claude', 'gemini', 'openai']:
+        if not api_key:
+            return False, f"{adaptor.get_env_var_name()} not set. Export your API key first."

-    # Validate API key format
-    if not adaptor.validate_api_key(api_key):
-        return False, f"Invalid API key format for {adaptor.PLATFORM_NAME}"
+        # Validate API key format
+        if not adaptor.validate_api_key(api_key):
+            return False, f"Invalid API key format for {adaptor.PLATFORM_NAME}"

    package_path = Path(package_path)

@@ -82,17 +85,23 @@ def upload_skill_api(package_path, target="claude", api_key=None):
    print(f"⏳ Uploading to {adaptor.PLATFORM_NAME}...")

    try:
-        result = adaptor.upload(package_path, api_key)
+        result = adaptor.upload(package_path, api_key, **kwargs)

        if result["success"]:
            print()
            print(f"✅ {result['message']}")
            print()
-            if result["url"]:
+            if result.get("url"):
                print("Your skill is now available at:")
                print(f"   {result['url']}")
-            if result["skill_id"]:
+            if result.get("skill_id"):
                print(f"   Skill ID: {result['skill_id']}")
+            if result.get("collection"):
+                print(f"   Collection: {result['collection']}")
+            if result.get("class_name"):
+                print(f"   Class: {result['class_name']}")
+            if result.get("count"):
+                print(f"   Documents uploaded: {result['count']}")
            print()
            return True, "Upload successful"
        else:
@@ -104,7 +113,7 @@ def upload_skill_api(package_path, target="claude", api_key=None):

 def main():
    parser = argparse.ArgumentParser(
-        description="Upload a skill package to LLM platforms",
+        description="Upload a skill package to LLM platforms and vector databases",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
 Setup:
@@ -117,6 +126,14 @@ Setup:
  OpenAI:
    export OPENAI_API_KEY=sk-proj-...

+  ChromaDB (local):
+    # No API key needed for local instance
+    chroma run  # Start server
+
+  Weaviate (local):
+    # No API key needed for local instance
+    docker run -p 8080:8080 semitechnologies/weaviate:latest
+
 Examples:
  # Upload to Claude (default)
  skill-seekers upload output/react.zip
@@ -127,8 +144,17 @@ Examples:
  # Upload to OpenAI
  skill-seekers upload output/react-openai.zip --target openai

-  # Upload with explicit API key
-  skill-seekers upload output/react.zip --api-key sk-ant-...
+  # Upload to ChromaDB (local)
+  skill-seekers upload output/react-chroma.json --target chroma
+
+  # Upload to ChromaDB with OpenAI embeddings
+  skill-seekers upload output/react-chroma.json --target chroma --embedding-function openai
+
+  # Upload to Weaviate (local)
+  skill-seekers upload output/react-weaviate.json --target weaviate
+
+  # Upload to Weaviate Cloud
+  skill-seekers upload output/react-weaviate.json --target weaviate --use-cloud --cluster-url https://xxx.weaviate.network --api-key YOUR_KEY
        """,
    )

@@ -136,17 +162,80 @@ Examples:

    parser.add_argument(
        "--target",
-        choices=["claude", "gemini", "openai"],
+        choices=["claude", "gemini", "openai", "chroma", "weaviate"],
        default="claude",
-        help="Target LLM platform (default: claude)",
+        help="Target platform (default: claude)",
    )

    parser.add_argument("--api-key", help="Platform API key (or set environment variable)")

+    # ChromaDB upload options
+    parser.add_argument(
+        "--chroma-url",
+        help="ChromaDB URL (default: http://localhost:8000 for HTTP, or use --persist-directory for local)"
+    )
+
+    parser.add_argument(
+        "--persist-directory",
+        help="Local directory for persistent ChromaDB storage (default: ./chroma_db)"
+    )
+
+    parser.add_argument(
+        "--embedding-function",
+        choices=["openai", "sentence-transformers", "none"],
+        help="Embedding function for ChromaDB/Weaviate (default: platform default)"
+    )
+
+    parser.add_argument(
+        "--openai-api-key",
+        help="OpenAI API key for embeddings (or set OPENAI_API_KEY env var)"
+    )
+
+    # Weaviate upload options
+    parser.add_argument(
+        "--weaviate-url",
+        default="http://localhost:8080",
+        help="Weaviate URL (default: http://localhost:8080)"
+    )
+
+    parser.add_argument(
+        "--use-cloud",
+        action="store_true",
+        help="Use Weaviate Cloud (requires --api-key and --cluster-url)"
+    )
+
+    parser.add_argument(
+        "--cluster-url",
+        help="Weaviate Cloud cluster URL (e.g., https://xxx.weaviate.network)"
+    )
+
    args = parser.parse_args()

+    # Build kwargs for vector DB upload
+    upload_kwargs = {}
+
+    if args.target == 'chroma':
+        if args.chroma_url:
+            upload_kwargs['chroma_url'] = args.chroma_url
+        if args.persist_directory:
+            upload_kwargs['persist_directory'] = args.persist_directory
+        if args.embedding_function:
+            upload_kwargs['embedding_function'] = args.embedding_function
+        if args.openai_api_key:
+            upload_kwargs['openai_api_key'] = args.openai_api_key
+
+    elif args.target == 'weaviate':
+        upload_kwargs['weaviate_url'] = args.weaviate_url
+        upload_kwargs['use_cloud'] = args.use_cloud
+        if args.cluster_url:
+            upload_kwargs['cluster_url'] = args.cluster_url
+        if args.embedding_function:
+            upload_kwargs['embedding_function'] = args.embedding_function
+        if args.openai_api_key:
+            upload_kwargs['openai_api_key'] = args.openai_api_key
+
    # Upload skill
-    success, message = upload_skill_api(args.package_file, args.target, args.api_key)
+    success, message = upload_skill_api(args.package_file, args.target, args.api_key, **upload_kwargs)

    if success:
        sys.exit(0)