Files
skill-seekers-reference/tests/test_upload_integration.py
yusyus 064405c052 fix: resolve 18 bugs and code quality issues across adaptors, CLI, and chunking pipeline
Bug fixes:
- Fix --var flag silently dropped in create routing (args.workflow_var → args.var)
- Fix double _score_code_quality() call in word scraper
- Add .docx file extension validation in WordToSkillConverter
- Fix weaviate ImportError masked by generic Exception handler
- Fix RAG chunking crash using non-existent converter.output_dir

Chunking pipeline improvements:
- Wire --chunk-overlap-tokens through entire package pipeline
  (package_skill → adaptor.package → format_skill_md → _maybe_chunk_content → RAGChunker)
- Add auto-scaling overlap: max(50, chunk_tokens//10) when chunk size is non-default
- Rename --no-preserve-code to --no-preserve-code-blocks (backward-compat alias kept)
- Replace hardcoded 512/50 chunk defaults with DEFAULT_CHUNK_TOKENS/DEFAULT_CHUNK_OVERLAP_TOKENS
  constants across all 12 concrete adaptors, rag_chunker, base, and package_skill

Code quality:
- Extract shared _generate_openai_embeddings() and _generate_st_embeddings() to SkillAdaptor
  base class, removing ~150 lines of duplication from chroma/weaviate/pinecone
- Add Pinecone adaptor with full upload support (pinecone_adaptor.py)

Tests (14 new):
- chunk_overlap_tokens parameter wiring, auto-scaling overlap, preserve_code_blocks flag
- .docx/.doc/no-extension file validation, --var flag routing E2E
- Embedding method inheritance verification, backward-compatible flag aliases

Docs:
- Update CHANGELOG, CLI_REFERENCE, API_REFERENCE, packaging guide (EN+ZH)
- Update README test count badge (1880+ → 2283+)

All 2283 tests passing, 8 skipped, 0 failures.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 21:57:59 +03:00

317 lines
11 KiB
Python

#!/usr/bin/env python3
"""
Integration tests for ChromaDB and Weaviate upload functionality.
Tests real upload capabilities for vector databases.
"""
import json
import pytest
# Import adaptors
from skill_seekers.cli.adaptors import get_adaptor
@pytest.fixture
def sample_chroma_package(tmp_path):
"""Create a sample ChromaDB package for testing."""
package_data = {
"collection_name": "test_collection",
"documents": ["Test doc 1", "Test doc 2", "Test doc 3"],
"metadatas": [
{"source": "test", "category": "overview", "file": "SKILL.md"},
{"source": "test", "category": "api", "file": "API.md"},
{"source": "test", "category": "guide", "file": "GUIDE.md"},
],
"ids": ["id1", "id2", "id3"],
}
package_path = tmp_path / "test-chroma.json"
package_path.write_text(json.dumps(package_data))
return package_path
@pytest.fixture
def sample_weaviate_package(tmp_path):
"""Create a sample Weaviate package for testing."""
package_data = {
"class_name": "TestSkill",
"schema": {
"class": "TestSkill",
"description": "Test skill documentation",
"vectorizer": "none",
"properties": [
{"name": "content", "dataType": ["text"]},
{"name": "source", "dataType": ["string"]},
{"name": "category", "dataType": ["string"]},
],
},
"objects": [
{
"id": "00000000-0000-0000-0000-000000000001",
"properties": {
"content": "Test content 1",
"source": "test",
"category": "overview",
},
},
{
"id": "00000000-0000-0000-0000-000000000002",
"properties": {"content": "Test content 2", "source": "test", "category": "api"},
},
],
}
package_path = tmp_path / "test-weaviate.json"
package_path.write_text(json.dumps(package_data))
return package_path
class TestChromaUploadBasics:
"""Test ChromaDB upload basic functionality."""
def test_chroma_adaptor_exists(self):
"""Test that ChromaDB adaptor can be loaded."""
adaptor = get_adaptor("chroma")
assert adaptor is not None
assert adaptor.PLATFORM == "chroma"
def test_chroma_upload_without_chromadb_installed(self, sample_chroma_package):
"""Test upload fails gracefully without chromadb installed."""
adaptor = get_adaptor("chroma")
# Temporarily remove chromadb if it exists
import sys
chromadb_backup = sys.modules.get("chromadb")
if "chromadb" in sys.modules:
del sys.modules["chromadb"]
try:
result = adaptor.upload(sample_chroma_package)
assert result["success"] is False
assert "chromadb not installed" in result["message"]
assert "pip install chromadb" in result["message"]
finally:
if chromadb_backup:
sys.modules["chromadb"] = chromadb_backup
def test_chroma_upload_api_signature(self, sample_chroma_package):
"""Test ChromaDB upload has correct API signature."""
adaptor = get_adaptor("chroma")
# Verify upload method exists and accepts kwargs
assert hasattr(adaptor, "upload")
assert callable(adaptor.upload)
# Verify adaptor methods exist
assert hasattr(adaptor, "_generate_openai_embeddings")
class TestWeaviateUploadBasics:
"""Test Weaviate upload basic functionality."""
def test_weaviate_adaptor_exists(self):
"""Test that Weaviate adaptor can be loaded."""
adaptor = get_adaptor("weaviate")
assert adaptor is not None
assert adaptor.PLATFORM == "weaviate"
def test_weaviate_upload_without_weaviate_installed(self, sample_weaviate_package):
"""Test upload fails gracefully without weaviate-client installed."""
adaptor = get_adaptor("weaviate")
# Temporarily remove weaviate if it exists
import sys
weaviate_backup = sys.modules.get("weaviate")
if "weaviate" in sys.modules:
del sys.modules["weaviate"]
try:
result = adaptor.upload(sample_weaviate_package)
assert result["success"] is False
assert "weaviate-client not installed" in result["message"]
assert "pip install weaviate-client" in result["message"]
finally:
if weaviate_backup:
sys.modules["weaviate"] = weaviate_backup
def test_weaviate_upload_api_signature(self, sample_weaviate_package):
"""Test Weaviate upload has correct API signature."""
adaptor = get_adaptor("weaviate")
# Verify upload method exists and accepts kwargs
assert hasattr(adaptor, "upload")
assert callable(adaptor.upload)
# Verify adaptor methods exist
assert hasattr(adaptor, "_generate_openai_embeddings")
class TestEmbeddingMethodInheritance:
"""Test that shared embedding methods are properly inherited from base."""
def test_chroma_inherits_openai_embeddings(self):
"""Test chroma adaptor gets _generate_openai_embeddings from base."""
adaptor = get_adaptor("chroma")
assert hasattr(adaptor, "_generate_openai_embeddings")
# Verify it's the base class method, not a local override
from skill_seekers.cli.adaptors.base import SkillAdaptor
assert adaptor._generate_openai_embeddings.__func__ is SkillAdaptor._generate_openai_embeddings
def test_weaviate_inherits_both_embedding_methods(self):
"""Test weaviate adaptor gets both embedding methods from base."""
adaptor = get_adaptor("weaviate")
assert hasattr(adaptor, "_generate_openai_embeddings")
assert hasattr(adaptor, "_generate_st_embeddings")
from skill_seekers.cli.adaptors.base import SkillAdaptor
assert adaptor._generate_openai_embeddings.__func__ is SkillAdaptor._generate_openai_embeddings
assert adaptor._generate_st_embeddings.__func__ is SkillAdaptor._generate_st_embeddings
def test_pinecone_inherits_both_embedding_methods(self):
"""Test pinecone adaptor gets both embedding methods from base."""
adaptor = get_adaptor("pinecone")
assert hasattr(adaptor, "_generate_openai_embeddings")
assert hasattr(adaptor, "_generate_st_embeddings")
from skill_seekers.cli.adaptors.base import SkillAdaptor
assert adaptor._generate_openai_embeddings.__func__ is SkillAdaptor._generate_openai_embeddings
assert adaptor._generate_st_embeddings.__func__ is SkillAdaptor._generate_st_embeddings
class TestPackageStructure:
"""Test that packages are correctly structured for upload."""
def test_chroma_package_structure(self, sample_chroma_package):
"""Test ChromaDB package has required fields."""
with open(sample_chroma_package) as f:
data = json.load(f)
assert "collection_name" in data
assert "documents" in data
assert "metadatas" in data
assert "ids" in data
assert len(data["documents"]) == len(data["metadatas"]) == len(data["ids"])
def test_weaviate_package_structure(self, sample_weaviate_package):
"""Test Weaviate package has required fields."""
with open(sample_weaviate_package) as f:
data = json.load(f)
assert "class_name" in data
assert "schema" in data
assert "objects" in data
assert len(data["objects"]) == 2
# Verify schema structure
assert "class" in data["schema"]
assert "properties" in data["schema"]
# Verify object structure
for obj in data["objects"]:
assert "id" in obj
assert "properties" in obj
class TestUploadCommandIntegration:
"""Test upload command integration."""
def test_upload_skill_api_signature(self):
"""Test upload_skill_api has correct signature."""
from skill_seekers.cli.upload_skill import upload_skill_api
# Verify function exists
assert callable(upload_skill_api)
# Verify it accepts kwargs for vector DBs
import inspect
sig = inspect.signature(upload_skill_api)
params = list(sig.parameters.keys())
assert "package_path" in params
assert "target" in params
assert "api_key" in params
assert "kwargs" in params # For platform-specific options
def test_upload_command_supports_chroma(self):
"""Test upload command recognizes chroma as target."""
# This should not raise ValueError
adaptor = get_adaptor("chroma")
assert adaptor is not None
def test_upload_command_supports_weaviate(self):
"""Test upload command recognizes weaviate as target."""
# This should not raise ValueError
adaptor = get_adaptor("weaviate")
assert adaptor is not None
class TestErrorHandling:
"""Test error handling in upload functionality."""
def test_chroma_handles_missing_file(self, tmp_path):
"""Test ChromaDB upload handles missing files gracefully."""
adaptor = get_adaptor("chroma")
missing_file = tmp_path / "nonexistent.json"
# Should raise FileNotFoundError or return error dict
try:
result = adaptor.upload(missing_file)
# If it returns a dict, it should indicate failure
assert result["success"] is False
except FileNotFoundError:
# This is also acceptable
pass
def test_weaviate_handles_missing_file(self, tmp_path):
"""Test Weaviate upload handles missing files gracefully."""
adaptor = get_adaptor("weaviate")
missing_file = tmp_path / "nonexistent.json"
# Should raise FileNotFoundError or return error dict
try:
result = adaptor.upload(missing_file)
# If it returns a dict, it should indicate failure
assert result["success"] is False
except FileNotFoundError:
# This is also acceptable
pass
def test_chroma_handles_invalid_json(self, tmp_path):
"""Test ChromaDB upload handles invalid JSON gracefully."""
adaptor = get_adaptor("chroma")
invalid_file = tmp_path / "invalid.json"
invalid_file.write_text("not valid json{")
# Should raise JSONDecodeError or return error dict
try:
result = adaptor.upload(invalid_file)
# If it returns a dict, it should indicate failure
assert result["success"] is False
except json.JSONDecodeError:
# This is also acceptable
pass
def test_weaviate_handles_invalid_json(self, tmp_path):
"""Test Weaviate upload handles invalid JSON gracefully."""
adaptor = get_adaptor("weaviate")
invalid_file = tmp_path / "invalid.json"
invalid_file.write_text("not valid json{")
# Should raise JSONDecodeError or return error dict
try:
result = adaptor.upload(invalid_file)
# If it returns a dict, it should indicate failure
assert result["success"] is False
except json.JSONDecodeError:
# This is also acceptable
pass