style: Format all Python files with ruff
- Formatted 103 files to comply with ruff format requirements - No code logic changes, only formatting/whitespace - Fixes CI formatting check failures
This commit is contained in:
@@ -85,9 +85,17 @@ class TestAdaptorBenchmarks(unittest.TestCase):
|
||||
|
||||
# Platforms to benchmark
|
||||
platforms = [
|
||||
"claude", "gemini", "openai", "markdown", # IDE integrations
|
||||
"langchain", "llama-index", "haystack", # RAG frameworks
|
||||
"weaviate", "chroma", "faiss", "qdrant" # Vector DBs
|
||||
"claude",
|
||||
"gemini",
|
||||
"openai",
|
||||
"markdown", # IDE integrations
|
||||
"langchain",
|
||||
"llama-index",
|
||||
"haystack", # RAG frameworks
|
||||
"weaviate",
|
||||
"chroma",
|
||||
"faiss",
|
||||
"qdrant", # Vector DBs
|
||||
]
|
||||
|
||||
results = {}
|
||||
@@ -115,20 +123,19 @@ class TestAdaptorBenchmarks(unittest.TestCase):
|
||||
min_time = min(times)
|
||||
max_time = max(times)
|
||||
|
||||
results[platform] = {
|
||||
"avg": avg_time,
|
||||
"min": min_time,
|
||||
"max": max_time
|
||||
}
|
||||
results[platform] = {"avg": avg_time, "min": min_time, "max": max_time}
|
||||
|
||||
print(f"{platform:15} - Avg: {avg_time*1000:6.2f}ms | "
|
||||
f"Min: {min_time*1000:6.2f}ms | Max: {max_time*1000:6.2f}ms")
|
||||
print(
|
||||
f"{platform:15} - Avg: {avg_time * 1000:6.2f}ms | "
|
||||
f"Min: {min_time * 1000:6.2f}ms | Max: {max_time * 1000:6.2f}ms"
|
||||
)
|
||||
|
||||
# Performance assertions (should complete in reasonable time)
|
||||
for platform, metrics in results.items():
|
||||
self.assertLess(
|
||||
metrics["avg"], 0.5, # Should average < 500ms
|
||||
f"{platform} format_skill_md too slow: {metrics['avg']*1000:.2f}ms"
|
||||
metrics["avg"],
|
||||
0.5, # Should average < 500ms
|
||||
f"{platform} format_skill_md too slow: {metrics['avg'] * 1000:.2f}ms",
|
||||
)
|
||||
|
||||
def test_benchmark_package_operations(self):
|
||||
@@ -158,12 +165,9 @@ class TestAdaptorBenchmarks(unittest.TestCase):
|
||||
# Get file size
|
||||
file_size_kb = package_path.stat().st_size / 1024
|
||||
|
||||
results[platform] = {
|
||||
"time": elapsed,
|
||||
"size_kb": file_size_kb
|
||||
}
|
||||
results[platform] = {"time": elapsed, "size_kb": file_size_kb}
|
||||
|
||||
print(f"{platform:15} - Time: {elapsed*1000:7.2f}ms | Size: {file_size_kb:7.1f} KB")
|
||||
print(f"{platform:15} - Time: {elapsed * 1000:7.2f}ms | Size: {file_size_kb:7.1f} KB")
|
||||
|
||||
# Validate output
|
||||
self.assertTrue(package_path.exists())
|
||||
@@ -171,12 +175,14 @@ class TestAdaptorBenchmarks(unittest.TestCase):
|
||||
# Performance assertions
|
||||
for platform, metrics in results.items():
|
||||
self.assertLess(
|
||||
metrics["time"], 1.0, # Should complete < 1 second
|
||||
f"{platform} packaging too slow: {metrics['time']*1000:.2f}ms"
|
||||
metrics["time"],
|
||||
1.0, # Should complete < 1 second
|
||||
f"{platform} packaging too slow: {metrics['time'] * 1000:.2f}ms",
|
||||
)
|
||||
self.assertLess(
|
||||
metrics["size_kb"], 1000, # Should be < 1MB for 10 refs
|
||||
f"{platform} package too large: {metrics['size_kb']:.1f}KB"
|
||||
metrics["size_kb"],
|
||||
1000, # Should be < 1MB for 10 refs
|
||||
f"{platform} package too large: {metrics['size_kb']:.1f}KB",
|
||||
)
|
||||
|
||||
def test_benchmark_scaling_with_reference_count(self):
|
||||
@@ -210,14 +216,18 @@ class TestAdaptorBenchmarks(unittest.TestCase):
|
||||
json.loads(formatted)
|
||||
size_kb = len(formatted) / 1024
|
||||
|
||||
results.append({
|
||||
"count": ref_count,
|
||||
"time": elapsed,
|
||||
"time_per_ref": time_per_ref,
|
||||
"size_kb": size_kb
|
||||
})
|
||||
results.append(
|
||||
{
|
||||
"count": ref_count,
|
||||
"time": elapsed,
|
||||
"time_per_ref": time_per_ref,
|
||||
"size_kb": size_kb,
|
||||
}
|
||||
)
|
||||
|
||||
print(f"{ref_count:4} | {elapsed*1000:10.2f} | {time_per_ref*1000:10.3f} | {size_kb:10.1f}")
|
||||
print(
|
||||
f"{ref_count:4} | {elapsed * 1000:10.2f} | {time_per_ref * 1000:10.3f} | {size_kb:10.1f}"
|
||||
)
|
||||
|
||||
# Analyze scaling behavior
|
||||
# Time per ref should not increase significantly (linear scaling)
|
||||
@@ -230,10 +240,7 @@ class TestAdaptorBenchmarks(unittest.TestCase):
|
||||
print(f"(Time per ref at 50 refs / Time per ref at 1 ref)")
|
||||
|
||||
# Assert linear or sub-linear scaling (not exponential)
|
||||
self.assertLess(
|
||||
scaling_factor, 3.0,
|
||||
f"Non-linear scaling detected: {scaling_factor:.2f}x"
|
||||
)
|
||||
self.assertLess(scaling_factor, 3.0, f"Non-linear scaling detected: {scaling_factor:.2f}x")
|
||||
|
||||
def test_benchmark_json_vs_zip_size_comparison(self):
|
||||
"""Compare output sizes: JSON vs ZIP/tar.gz"""
|
||||
@@ -263,16 +270,15 @@ class TestAdaptorBenchmarks(unittest.TestCase):
|
||||
|
||||
size_kb = package_path.stat().st_size / 1024
|
||||
|
||||
results[platform] = {
|
||||
"format": format_name,
|
||||
"size_kb": size_kb
|
||||
}
|
||||
results[platform] = {"format": format_name, "size_kb": size_kb}
|
||||
|
||||
print(f"{platform:15} | {format_name:8} | {size_kb:10.1f}")
|
||||
|
||||
# Analyze results
|
||||
json_sizes = [v["size_kb"] for k, v in results.items() if v["format"] == "JSON"]
|
||||
compressed_sizes = [v["size_kb"] for k, v in results.items() if v["format"] in ["ZIP", "tar.gz"]]
|
||||
compressed_sizes = [
|
||||
v["size_kb"] for k, v in results.items() if v["format"] in ["ZIP", "tar.gz"]
|
||||
]
|
||||
|
||||
if json_sizes and compressed_sizes:
|
||||
avg_json = sum(json_sizes) / len(json_sizes)
|
||||
@@ -280,7 +286,7 @@ class TestAdaptorBenchmarks(unittest.TestCase):
|
||||
|
||||
print(f"\nAverage JSON size: {avg_json:.1f} KB")
|
||||
print(f"Average compressed size: {avg_compressed:.1f} KB")
|
||||
print(f"Compression ratio: {avg_json/avg_compressed:.2f}x")
|
||||
print(f"Compression ratio: {avg_json / avg_compressed:.2f}x")
|
||||
|
||||
def test_benchmark_metadata_overhead(self):
|
||||
"""Measure metadata processing overhead"""
|
||||
@@ -299,7 +305,7 @@ class TestAdaptorBenchmarks(unittest.TestCase):
|
||||
description="A comprehensive test skill for benchmarking purposes",
|
||||
version="2.5.0",
|
||||
author="Benchmark Suite",
|
||||
tags=["test", "benchmark", "performance", "validation", "quality"]
|
||||
tags=["test", "benchmark", "performance", "validation", "quality"],
|
||||
)
|
||||
|
||||
adaptor = get_adaptor("langchain")
|
||||
@@ -326,15 +332,12 @@ class TestAdaptorBenchmarks(unittest.TestCase):
|
||||
overhead = avg_rich - avg_minimal
|
||||
overhead_pct = (overhead / avg_minimal) * 100
|
||||
|
||||
print(f"\nMinimal metadata: {avg_minimal*1000:.2f}ms")
|
||||
print(f"Rich metadata: {avg_rich*1000:.2f}ms")
|
||||
print(f"Overhead: {overhead*1000:.2f}ms ({overhead_pct:.1f}%)")
|
||||
print(f"\nMinimal metadata: {avg_minimal * 1000:.2f}ms")
|
||||
print(f"Rich metadata: {avg_rich * 1000:.2f}ms")
|
||||
print(f"Overhead: {overhead * 1000:.2f}ms ({overhead_pct:.1f}%)")
|
||||
|
||||
# Overhead should be negligible (< 10%)
|
||||
self.assertLess(
|
||||
overhead_pct, 10.0,
|
||||
f"Metadata overhead too high: {overhead_pct:.1f}%"
|
||||
)
|
||||
self.assertLess(overhead_pct, 10.0, f"Metadata overhead too high: {overhead_pct:.1f}%")
|
||||
|
||||
def test_benchmark_empty_vs_full_skill(self):
|
||||
"""Compare performance: empty skill vs full skill"""
|
||||
@@ -360,9 +363,9 @@ class TestAdaptorBenchmarks(unittest.TestCase):
|
||||
adaptor.format_skill_md(full_dir, metadata)
|
||||
full_time = time.perf_counter() - start
|
||||
|
||||
print(f"\nEmpty skill: {empty_time*1000:.2f}ms")
|
||||
print(f"Full skill (50 refs): {full_time*1000:.2f}ms")
|
||||
print(f"Ratio: {full_time/empty_time:.1f}x")
|
||||
print(f"\nEmpty skill: {empty_time * 1000:.2f}ms")
|
||||
print(f"Full skill (50 refs): {full_time * 1000:.2f}ms")
|
||||
print(f"Ratio: {full_time / empty_time:.1f}x")
|
||||
|
||||
# Empty should be very fast
|
||||
self.assertLess(empty_time, 0.01, "Empty skill processing too slow")
|
||||
|
||||
@@ -662,8 +662,13 @@ export default {
|
||||
def test_e2e_all_rag_adaptors_from_same_skill(self):
|
||||
"""Test all 7 RAG adaptors can package the same skill"""
|
||||
rag_platforms = [
|
||||
"langchain", "llama-index", "haystack",
|
||||
"weaviate", "chroma", "faiss", "qdrant"
|
||||
"langchain",
|
||||
"llama-index",
|
||||
"haystack",
|
||||
"weaviate",
|
||||
"chroma",
|
||||
"faiss",
|
||||
"qdrant",
|
||||
]
|
||||
packages = {}
|
||||
|
||||
@@ -674,15 +679,11 @@ export default {
|
||||
package_path = adaptor.package(self.skill_dir, self.output_dir)
|
||||
|
||||
# Verify package was created
|
||||
self.assertTrue(
|
||||
package_path.exists(),
|
||||
f"Package not created for {platform}"
|
||||
)
|
||||
self.assertTrue(package_path.exists(), f"Package not created for {platform}")
|
||||
|
||||
# Verify it's a JSON file
|
||||
self.assertTrue(
|
||||
str(package_path).endswith(".json"),
|
||||
f"{platform} should produce JSON file"
|
||||
str(package_path).endswith(".json"), f"{platform} should produce JSON file"
|
||||
)
|
||||
|
||||
# Store for later verification
|
||||
@@ -696,10 +697,7 @@ export default {
|
||||
with open(path) as f:
|
||||
data = json.load(f)
|
||||
# Should be valid JSON (dict or list)
|
||||
self.assertIsInstance(
|
||||
data, (dict, list),
|
||||
f"{platform} should produce valid JSON"
|
||||
)
|
||||
self.assertIsInstance(data, (dict, list), f"{platform} should produce valid JSON")
|
||||
|
||||
def test_e2e_rag_adaptors_preserve_metadata(self):
|
||||
"""Test that metadata is preserved across RAG adaptors"""
|
||||
@@ -708,7 +706,7 @@ export default {
|
||||
description="Vue.js framework skill",
|
||||
version="2.0.0",
|
||||
author="Test Author",
|
||||
tags=["vue", "javascript", "frontend"]
|
||||
tags=["vue", "javascript", "frontend"],
|
||||
)
|
||||
|
||||
# Test subset of platforms (representative sample)
|
||||
@@ -758,33 +756,30 @@ export default {
|
||||
# Define expected structure for each platform
|
||||
validations = {
|
||||
"langchain": lambda d: (
|
||||
isinstance(d, list) and
|
||||
all("page_content" in item and "metadata" in item for item in d)
|
||||
isinstance(d, list)
|
||||
and all("page_content" in item and "metadata" in item for item in d)
|
||||
),
|
||||
"llama-index": lambda d: (
|
||||
isinstance(d, list) and
|
||||
all("text" in item and "metadata" in item for item in d)
|
||||
isinstance(d, list) and all("text" in item and "metadata" in item for item in d)
|
||||
),
|
||||
"haystack": lambda d: (
|
||||
isinstance(d, list) and
|
||||
all("content" in item and "meta" in item for item in d)
|
||||
isinstance(d, list) and all("content" in item and "meta" in item for item in d)
|
||||
),
|
||||
"weaviate": lambda d: (
|
||||
isinstance(d, dict) and
|
||||
"schema" in d and "objects" in d and "class_name" in d
|
||||
isinstance(d, dict) and "schema" in d and "objects" in d and "class_name" in d
|
||||
),
|
||||
"chroma": lambda d: (
|
||||
isinstance(d, dict) and
|
||||
"documents" in d and "metadatas" in d and "ids" in d and
|
||||
"collection_name" in d
|
||||
isinstance(d, dict)
|
||||
and "documents" in d
|
||||
and "metadatas" in d
|
||||
and "ids" in d
|
||||
and "collection_name" in d
|
||||
),
|
||||
"faiss": lambda d: (
|
||||
isinstance(d, dict) and
|
||||
"documents" in d and "metadatas" in d and "ids" in d
|
||||
isinstance(d, dict) and "documents" in d and "metadatas" in d and "ids" in d
|
||||
),
|
||||
"qdrant": lambda d: (
|
||||
isinstance(d, dict) and
|
||||
"collection_name" in d and "points" in d and "config" in d
|
||||
isinstance(d, dict) and "collection_name" in d and "points" in d and "config" in d
|
||||
),
|
||||
}
|
||||
|
||||
@@ -795,8 +790,7 @@ export default {
|
||||
|
||||
# Validate structure
|
||||
self.assertTrue(
|
||||
validate_func(data),
|
||||
f"{platform} validation failed: incorrect JSON structure"
|
||||
validate_func(data), f"{platform} validation failed: incorrect JSON structure"
|
||||
)
|
||||
|
||||
def test_e2e_rag_empty_skill_handling(self):
|
||||
@@ -838,9 +832,7 @@ export default {
|
||||
if platform == "langchain":
|
||||
categories = {item["metadata"]["category"] for item in data}
|
||||
elif platform == "weaviate":
|
||||
categories = {
|
||||
obj["properties"]["category"] for obj in data["objects"]
|
||||
}
|
||||
categories = {obj["properties"]["category"] for obj in data["objects"]}
|
||||
elif platform == "chroma":
|
||||
categories = {meta["category"] for meta in data["metadatas"]}
|
||||
|
||||
@@ -854,8 +846,7 @@ export default {
|
||||
# Check that at least one reference category exists
|
||||
ref_categories = categories - {"overview"}
|
||||
self.assertGreater(
|
||||
len(ref_categories), 0,
|
||||
f"{platform}: Should have at least one reference category"
|
||||
len(ref_categories), 0, f"{platform}: Should have at least one reference category"
|
||||
)
|
||||
|
||||
def test_e2e_rag_integration_workflow_chromadb(self):
|
||||
@@ -878,17 +869,10 @@ export default {
|
||||
|
||||
# Create collection and add documents
|
||||
collection = client.create_collection(data["collection_name"])
|
||||
collection.add(
|
||||
documents=data["documents"],
|
||||
metadatas=data["metadatas"],
|
||||
ids=data["ids"]
|
||||
)
|
||||
collection.add(documents=data["documents"], metadatas=data["metadatas"], ids=data["ids"])
|
||||
|
||||
# Query
|
||||
results = collection.query(
|
||||
query_texts=["reactivity"],
|
||||
n_results=2
|
||||
)
|
||||
results = collection.query(query_texts=["reactivity"], n_results=2)
|
||||
|
||||
# Verify results
|
||||
self.assertGreater(len(results["documents"][0]), 0, "Should return results")
|
||||
|
||||
@@ -28,9 +28,7 @@ class TestChromaAdaptor:
|
||||
|
||||
# Create SKILL.md
|
||||
skill_md = skill_dir / "SKILL.md"
|
||||
skill_md.write_text(
|
||||
"# Test Skill\n\nThis is a test skill for Chroma format."
|
||||
)
|
||||
skill_md.write_text("# Test Skill\n\nThis is a test skill for Chroma format.")
|
||||
|
||||
# Create references directory with files
|
||||
refs_dir = skill_dir / "references"
|
||||
@@ -40,9 +38,7 @@ class TestChromaAdaptor:
|
||||
|
||||
# Format as Chroma collection
|
||||
adaptor = get_adaptor("chroma")
|
||||
metadata = SkillMetadata(
|
||||
name="test_skill", description="Test skill", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="test_skill", description="Test skill", version="1.0.0")
|
||||
|
||||
collection_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
|
||||
@@ -124,7 +120,10 @@ class TestChromaAdaptor:
|
||||
# Upload may fail if chromadb not installed (expected)
|
||||
assert "message" in result
|
||||
# Either chromadb not installed or connection error
|
||||
assert ("chromadb not installed" in result["message"] or "Failed to connect" in result["message"])
|
||||
assert (
|
||||
"chromadb not installed" in result["message"]
|
||||
or "Failed to connect" in result["message"]
|
||||
)
|
||||
|
||||
def test_validate_api_key_returns_false(self):
|
||||
"""Test that API key validation returns False (no API needed)."""
|
||||
@@ -157,9 +156,7 @@ class TestChromaAdaptor:
|
||||
skill_dir.mkdir()
|
||||
|
||||
adaptor = get_adaptor("chroma")
|
||||
metadata = SkillMetadata(
|
||||
name="empty_skill", description="Empty", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="empty_skill", description="Empty", version="1.0.0")
|
||||
|
||||
collection_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
collection = json.loads(collection_json)
|
||||
@@ -179,9 +176,7 @@ class TestChromaAdaptor:
|
||||
(refs_dir / "test.md").write_text("# Test\n\nTest content.")
|
||||
|
||||
adaptor = get_adaptor("chroma")
|
||||
metadata = SkillMetadata(
|
||||
name="refs_only", description="Refs only", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="refs_only", description="Refs only", version="1.0.0")
|
||||
|
||||
collection_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
collection = json.loads(collection_json)
|
||||
|
||||
@@ -28,9 +28,7 @@ class TestFAISSAdaptor:
|
||||
|
||||
# Create SKILL.md
|
||||
skill_md = skill_dir / "SKILL.md"
|
||||
skill_md.write_text(
|
||||
"# Test Skill\n\nThis is a test skill for FAISS format."
|
||||
)
|
||||
skill_md.write_text("# Test Skill\n\nThis is a test skill for FAISS format.")
|
||||
|
||||
# Create references directory with files
|
||||
refs_dir = skill_dir / "references"
|
||||
@@ -40,9 +38,7 @@ class TestFAISSAdaptor:
|
||||
|
||||
# Format as FAISS index data
|
||||
adaptor = get_adaptor("faiss")
|
||||
metadata = SkillMetadata(
|
||||
name="test_skill", description="Test skill", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="test_skill", description="Test skill", version="1.0.0")
|
||||
|
||||
index_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
|
||||
@@ -158,9 +154,7 @@ class TestFAISSAdaptor:
|
||||
skill_dir.mkdir()
|
||||
|
||||
adaptor = get_adaptor("faiss")
|
||||
metadata = SkillMetadata(
|
||||
name="empty_skill", description="Empty", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="empty_skill", description="Empty", version="1.0.0")
|
||||
|
||||
index_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
index_data = json.loads(index_json)
|
||||
@@ -180,9 +174,7 @@ class TestFAISSAdaptor:
|
||||
(refs_dir / "test.md").write_text("# Test\n\nTest content.")
|
||||
|
||||
adaptor = get_adaptor("faiss")
|
||||
metadata = SkillMetadata(
|
||||
name="refs_only", description="Refs only", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="refs_only", description="Refs only", version="1.0.0")
|
||||
|
||||
index_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
index_data = json.loads(index_json)
|
||||
|
||||
@@ -28,9 +28,7 @@ class TestHaystackAdaptor:
|
||||
|
||||
# Create SKILL.md
|
||||
skill_md = skill_dir / "SKILL.md"
|
||||
skill_md.write_text(
|
||||
"# Test Skill\n\nThis is a test skill for Haystack format."
|
||||
)
|
||||
skill_md.write_text("# Test Skill\n\nThis is a test skill for Haystack format.")
|
||||
|
||||
# Create references directory with files
|
||||
refs_dir = skill_dir / "references"
|
||||
@@ -40,9 +38,7 @@ class TestHaystackAdaptor:
|
||||
|
||||
# Format as Haystack Documents
|
||||
adaptor = get_adaptor("haystack")
|
||||
metadata = SkillMetadata(
|
||||
name="test_skill", description="Test skill", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="test_skill", description="Test skill", version="1.0.0")
|
||||
|
||||
documents_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
|
||||
@@ -112,7 +108,7 @@ class TestHaystackAdaptor:
|
||||
"""Test upload returns instructions (no actual upload)."""
|
||||
# Create test package
|
||||
package_path = tmp_path / "test-haystack.json"
|
||||
package_path.write_text('[]')
|
||||
package_path.write_text("[]")
|
||||
|
||||
adaptor = get_adaptor("haystack")
|
||||
result = adaptor.upload(package_path, "fake-key")
|
||||
@@ -154,9 +150,7 @@ class TestHaystackAdaptor:
|
||||
skill_dir.mkdir()
|
||||
|
||||
adaptor = get_adaptor("haystack")
|
||||
metadata = SkillMetadata(
|
||||
name="empty_skill", description="Empty", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="empty_skill", description="Empty", version="1.0.0")
|
||||
|
||||
documents_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
documents = json.loads(documents_json)
|
||||
@@ -174,9 +168,7 @@ class TestHaystackAdaptor:
|
||||
(refs_dir / "test.md").write_text("# Test\n\nTest content.")
|
||||
|
||||
adaptor = get_adaptor("haystack")
|
||||
metadata = SkillMetadata(
|
||||
name="refs_only", description="Refs only", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="refs_only", description="Refs only", version="1.0.0")
|
||||
|
||||
documents_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
documents = json.loads(documents_json)
|
||||
|
||||
@@ -28,9 +28,7 @@ class TestLangChainAdaptor:
|
||||
|
||||
# Create SKILL.md
|
||||
skill_md = skill_dir / "SKILL.md"
|
||||
skill_md.write_text(
|
||||
"# Test Skill\n\nThis is a test skill for LangChain format."
|
||||
)
|
||||
skill_md.write_text("# Test Skill\n\nThis is a test skill for LangChain format.")
|
||||
|
||||
# Create references directory with files
|
||||
refs_dir = skill_dir / "references"
|
||||
@@ -40,9 +38,7 @@ class TestLangChainAdaptor:
|
||||
|
||||
# Format as LangChain Documents
|
||||
adaptor = get_adaptor("langchain")
|
||||
metadata = SkillMetadata(
|
||||
name="test_skill", description="Test skill", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="test_skill", description="Test skill", version="1.0.0")
|
||||
|
||||
documents_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
|
||||
@@ -112,7 +108,7 @@ class TestLangChainAdaptor:
|
||||
"""Test upload returns instructions (no actual upload)."""
|
||||
# Create test package
|
||||
package_path = tmp_path / "test-langchain.json"
|
||||
package_path.write_text('[]')
|
||||
package_path.write_text("[]")
|
||||
|
||||
adaptor = get_adaptor("langchain")
|
||||
result = adaptor.upload(package_path, "fake-key")
|
||||
@@ -153,9 +149,7 @@ class TestLangChainAdaptor:
|
||||
skill_dir.mkdir()
|
||||
|
||||
adaptor = get_adaptor("langchain")
|
||||
metadata = SkillMetadata(
|
||||
name="empty_skill", description="Empty", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="empty_skill", description="Empty", version="1.0.0")
|
||||
|
||||
documents_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
documents = json.loads(documents_json)
|
||||
@@ -173,9 +167,7 @@ class TestLangChainAdaptor:
|
||||
(refs_dir / "test.md").write_text("# Test\n\nTest content.")
|
||||
|
||||
adaptor = get_adaptor("langchain")
|
||||
metadata = SkillMetadata(
|
||||
name="refs_only", description="Refs only", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="refs_only", description="Refs only", version="1.0.0")
|
||||
|
||||
documents_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
documents = json.loads(documents_json)
|
||||
|
||||
@@ -28,9 +28,7 @@ class TestLlamaIndexAdaptor:
|
||||
|
||||
# Create SKILL.md
|
||||
skill_md = skill_dir / "SKILL.md"
|
||||
skill_md.write_text(
|
||||
"# Test Skill\n\nThis is a test skill for LlamaIndex format."
|
||||
)
|
||||
skill_md.write_text("# Test Skill\n\nThis is a test skill for LlamaIndex format.")
|
||||
|
||||
# Create references directory with files
|
||||
refs_dir = skill_dir / "references"
|
||||
@@ -40,9 +38,7 @@ class TestLlamaIndexAdaptor:
|
||||
|
||||
# Format as LlamaIndex Documents
|
||||
adaptor = get_adaptor("llama-index")
|
||||
metadata = SkillMetadata(
|
||||
name="test_skill", description="Test skill", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="test_skill", description="Test skill", version="1.0.0")
|
||||
|
||||
documents_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
|
||||
@@ -112,7 +108,7 @@ class TestLlamaIndexAdaptor:
|
||||
"""Test upload returns instructions (no actual upload)."""
|
||||
# Create test package
|
||||
package_path = tmp_path / "test-llama-index.json"
|
||||
package_path.write_text('[]')
|
||||
package_path.write_text("[]")
|
||||
|
||||
adaptor = get_adaptor("llama-index")
|
||||
result = adaptor.upload(package_path, "fake-key")
|
||||
@@ -153,9 +149,7 @@ class TestLlamaIndexAdaptor:
|
||||
skill_dir.mkdir()
|
||||
|
||||
adaptor = get_adaptor("llama-index")
|
||||
metadata = SkillMetadata(
|
||||
name="empty_skill", description="Empty", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="empty_skill", description="Empty", version="1.0.0")
|
||||
|
||||
documents_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
documents = json.loads(documents_json)
|
||||
@@ -173,9 +167,7 @@ class TestLlamaIndexAdaptor:
|
||||
(refs_dir / "test.md").write_text("# Test\n\nTest content.")
|
||||
|
||||
adaptor = get_adaptor("llama-index")
|
||||
metadata = SkillMetadata(
|
||||
name="refs_only", description="Refs only", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="refs_only", description="Refs only", version="1.0.0")
|
||||
|
||||
documents_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
documents = json.loads(documents_json)
|
||||
|
||||
@@ -28,9 +28,7 @@ class TestQdrantAdaptor:
|
||||
|
||||
# Create SKILL.md
|
||||
skill_md = skill_dir / "SKILL.md"
|
||||
skill_md.write_text(
|
||||
"# Test Skill\n\nThis is a test skill for Qdrant format."
|
||||
)
|
||||
skill_md.write_text("# Test Skill\n\nThis is a test skill for Qdrant format.")
|
||||
|
||||
# Create references directory with files
|
||||
refs_dir = skill_dir / "references"
|
||||
@@ -40,9 +38,7 @@ class TestQdrantAdaptor:
|
||||
|
||||
# Format as Qdrant points
|
||||
adaptor = get_adaptor("qdrant")
|
||||
metadata = SkillMetadata(
|
||||
name="test_skill", description="Test skill", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="test_skill", description="Test skill", version="1.0.0")
|
||||
|
||||
points_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
|
||||
@@ -119,7 +115,7 @@ class TestQdrantAdaptor:
|
||||
"""Test upload returns instructions (no actual upload)."""
|
||||
# Create test package
|
||||
package_path = tmp_path / "test-qdrant.json"
|
||||
package_path.write_text('[]')
|
||||
package_path.write_text("[]")
|
||||
|
||||
adaptor = get_adaptor("qdrant")
|
||||
result = adaptor.upload(package_path, "fake-key")
|
||||
@@ -160,9 +156,7 @@ class TestQdrantAdaptor:
|
||||
skill_dir.mkdir()
|
||||
|
||||
adaptor = get_adaptor("qdrant")
|
||||
metadata = SkillMetadata(
|
||||
name="empty_skill", description="Empty", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="empty_skill", description="Empty", version="1.0.0")
|
||||
|
||||
points_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
result = json.loads(points_json)
|
||||
@@ -181,9 +175,7 @@ class TestQdrantAdaptor:
|
||||
(refs_dir / "test.md").write_text("# Test\n\nTest content.")
|
||||
|
||||
adaptor = get_adaptor("qdrant")
|
||||
metadata = SkillMetadata(
|
||||
name="refs_only", description="Refs only", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="refs_only", description="Refs only", version="1.0.0")
|
||||
|
||||
points_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
result = json.loads(points_json)
|
||||
|
||||
@@ -28,9 +28,7 @@ class TestWeaviateAdaptor:
|
||||
|
||||
# Create SKILL.md
|
||||
skill_md = skill_dir / "SKILL.md"
|
||||
skill_md.write_text(
|
||||
"# Test Skill\n\nThis is a test skill for Weaviate format."
|
||||
)
|
||||
skill_md.write_text("# Test Skill\n\nThis is a test skill for Weaviate format.")
|
||||
|
||||
# Create references directory with files
|
||||
refs_dir = skill_dir / "references"
|
||||
@@ -40,9 +38,7 @@ class TestWeaviateAdaptor:
|
||||
|
||||
# Format as Weaviate objects
|
||||
adaptor = get_adaptor("weaviate")
|
||||
metadata = SkillMetadata(
|
||||
name="test_skill", description="Test skill", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="test_skill", description="Test skill", version="1.0.0")
|
||||
|
||||
objects_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
|
||||
@@ -119,7 +115,7 @@ class TestWeaviateAdaptor:
|
||||
"""Test upload returns instructions (no actual upload)."""
|
||||
# Create test package
|
||||
package_path = tmp_path / "test-weaviate.json"
|
||||
package_path.write_text('[]')
|
||||
package_path.write_text("[]")
|
||||
|
||||
adaptor = get_adaptor("weaviate")
|
||||
result = adaptor.upload(package_path, "fake-key")
|
||||
@@ -127,7 +123,11 @@ class TestWeaviateAdaptor:
|
||||
# Upload may fail if weaviate not installed (expected)
|
||||
assert "message" in result
|
||||
# Either weaviate not installed, invalid JSON, or connection error
|
||||
assert ("import weaviate" in result["message"] or "Failed to connect" in result["message"] or result["success"] is False)
|
||||
assert (
|
||||
"import weaviate" in result["message"]
|
||||
or "Failed to connect" in result["message"]
|
||||
or result["success"] is False
|
||||
)
|
||||
|
||||
def test_validate_api_key_returns_false(self):
|
||||
"""Test that API key validation returns False (no API needed)."""
|
||||
@@ -160,9 +160,7 @@ class TestWeaviateAdaptor:
|
||||
skill_dir.mkdir()
|
||||
|
||||
adaptor = get_adaptor("weaviate")
|
||||
metadata = SkillMetadata(
|
||||
name="empty_skill", description="Empty", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="empty_skill", description="Empty", version="1.0.0")
|
||||
|
||||
objects_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
result = json.loads(objects_json)
|
||||
@@ -181,9 +179,7 @@ class TestWeaviateAdaptor:
|
||||
(refs_dir / "test.md").write_text("# Test\n\nTest content.")
|
||||
|
||||
adaptor = get_adaptor("weaviate")
|
||||
metadata = SkillMetadata(
|
||||
name="refs_only", description="Refs only", version="1.0.0"
|
||||
)
|
||||
metadata = SkillMetadata(name="refs_only", description="Refs only", version="1.0.0")
|
||||
|
||||
objects_json = adaptor.format_skill_md(skill_dir, metadata)
|
||||
result = json.loads(objects_json)
|
||||
|
||||
@@ -12,7 +12,7 @@ from skill_seekers.benchmark import (
|
||||
BenchmarkResult,
|
||||
BenchmarkRunner,
|
||||
BenchmarkReport,
|
||||
Metric
|
||||
Metric,
|
||||
)
|
||||
from skill_seekers.benchmark.models import TimingResult, MemoryUsage
|
||||
|
||||
@@ -37,12 +37,7 @@ class TestBenchmarkResult:
|
||||
"""Test adding timing result."""
|
||||
result = BenchmarkResult("test")
|
||||
|
||||
timing = TimingResult(
|
||||
operation="test_op",
|
||||
duration=1.5,
|
||||
iterations=1,
|
||||
avg_duration=1.5
|
||||
)
|
||||
timing = TimingResult(operation="test_op", duration=1.5, iterations=1, avg_duration=1.5)
|
||||
|
||||
result.add_timing(timing)
|
||||
|
||||
@@ -55,11 +50,7 @@ class TestBenchmarkResult:
|
||||
result = BenchmarkResult("test")
|
||||
|
||||
usage = MemoryUsage(
|
||||
operation="test_op",
|
||||
before_mb=100.0,
|
||||
after_mb=150.0,
|
||||
peak_mb=160.0,
|
||||
allocated_mb=50.0
|
||||
operation="test_op", before_mb=100.0, after_mb=150.0, peak_mb=160.0, allocated_mb=50.0
|
||||
)
|
||||
|
||||
result.add_memory(usage)
|
||||
@@ -72,11 +63,7 @@ class TestBenchmarkResult:
|
||||
"""Test adding custom metric."""
|
||||
result = BenchmarkResult("test")
|
||||
|
||||
metric = Metric(
|
||||
name="pages_per_sec",
|
||||
value=12.5,
|
||||
unit="pages/sec"
|
||||
)
|
||||
metric = Metric(name="pages_per_sec", value=12.5, unit="pages/sec")
|
||||
|
||||
result.add_metric(metric)
|
||||
|
||||
@@ -107,12 +94,7 @@ class TestBenchmarkResult:
|
||||
"""Test report generation."""
|
||||
result = BenchmarkResult("test")
|
||||
|
||||
timing = TimingResult(
|
||||
operation="test_op",
|
||||
duration=1.0,
|
||||
iterations=1,
|
||||
avg_duration=1.0
|
||||
)
|
||||
timing = TimingResult(operation="test_op", duration=1.0, iterations=1, avg_duration=1.0)
|
||||
result.add_timing(timing)
|
||||
|
||||
report = result.to_report()
|
||||
@@ -303,7 +285,7 @@ class TestBenchmark:
|
||||
before_mb=100.0,
|
||||
after_mb=1200.0,
|
||||
peak_mb=1500.0,
|
||||
allocated_mb=1100.0
|
||||
allocated_mb=1100.0,
|
||||
)
|
||||
benchmark.result.add_memory(usage)
|
||||
|
||||
@@ -370,10 +352,7 @@ class TestBenchmarkRunner:
|
||||
with bench.timer("op2"):
|
||||
time.sleep(0.03)
|
||||
|
||||
reports = runner.run_suite({
|
||||
"test1": bench1,
|
||||
"test2": bench2
|
||||
})
|
||||
reports = runner.run_suite({"test1": bench1, "test2": bench2})
|
||||
|
||||
assert len(reports) == 2
|
||||
assert "test1" in reports
|
||||
@@ -405,6 +384,7 @@ class TestBenchmarkRunner:
|
||||
|
||||
# Compare
|
||||
from skill_seekers.benchmark.models import ComparisonReport
|
||||
|
||||
comparison = runner.compare(baseline_path, improved_path)
|
||||
|
||||
assert isinstance(comparison, ComparisonReport)
|
||||
@@ -458,6 +438,7 @@ class TestBenchmarkRunner:
|
||||
def test_cleanup_old(self, tmp_path):
|
||||
"""Test cleaning up old benchmarks."""
|
||||
import os
|
||||
|
||||
runner = BenchmarkRunner(output_dir=tmp_path)
|
||||
|
||||
# Create 10 benchmark files with different timestamps
|
||||
@@ -476,10 +457,10 @@ class TestBenchmarkRunner:
|
||||
"memory": [],
|
||||
"metrics": [],
|
||||
"system_info": {},
|
||||
"recommendations": []
|
||||
"recommendations": [],
|
||||
}
|
||||
|
||||
with open(file_path, 'w') as f:
|
||||
with open(file_path, "w") as f:
|
||||
json.dump(report_data, f)
|
||||
|
||||
# Set different modification times
|
||||
@@ -505,12 +486,7 @@ class TestBenchmarkModels:
|
||||
|
||||
def test_timing_result_model(self):
|
||||
"""Test TimingResult model."""
|
||||
timing = TimingResult(
|
||||
operation="test",
|
||||
duration=1.5,
|
||||
iterations=10,
|
||||
avg_duration=0.15
|
||||
)
|
||||
timing = TimingResult(operation="test", duration=1.5, iterations=10, avg_duration=0.15)
|
||||
|
||||
assert timing.operation == "test"
|
||||
assert timing.duration == 1.5
|
||||
@@ -520,11 +496,7 @@ class TestBenchmarkModels:
|
||||
def test_memory_usage_model(self):
|
||||
"""Test MemoryUsage model."""
|
||||
usage = MemoryUsage(
|
||||
operation="allocate",
|
||||
before_mb=100.0,
|
||||
after_mb=200.0,
|
||||
peak_mb=250.0,
|
||||
allocated_mb=100.0
|
||||
operation="allocate", before_mb=100.0, after_mb=200.0, peak_mb=250.0, allocated_mb=100.0
|
||||
)
|
||||
|
||||
assert usage.operation == "allocate"
|
||||
@@ -533,11 +505,7 @@ class TestBenchmarkModels:
|
||||
|
||||
def test_metric_model(self):
|
||||
"""Test Metric model."""
|
||||
metric = Metric(
|
||||
name="throughput",
|
||||
value=125.5,
|
||||
unit="ops/sec"
|
||||
)
|
||||
metric = Metric(name="throughput", value=125.5, unit="ops/sec")
|
||||
|
||||
assert metric.name == "throughput"
|
||||
assert metric.value == 125.5
|
||||
@@ -551,26 +519,19 @@ class TestBenchmarkModels:
|
||||
started_at=datetime.utcnow(),
|
||||
finished_at=datetime.utcnow(),
|
||||
total_duration=5.0,
|
||||
timings=[
|
||||
TimingResult(
|
||||
operation="op1",
|
||||
duration=2.0,
|
||||
iterations=1,
|
||||
avg_duration=2.0
|
||||
)
|
||||
],
|
||||
timings=[TimingResult(operation="op1", duration=2.0, iterations=1, avg_duration=2.0)],
|
||||
memory=[
|
||||
MemoryUsage(
|
||||
operation="op1",
|
||||
before_mb=100.0,
|
||||
after_mb=200.0,
|
||||
peak_mb=250.0,
|
||||
allocated_mb=100.0
|
||||
allocated_mb=100.0,
|
||||
)
|
||||
],
|
||||
metrics=[],
|
||||
system_info={},
|
||||
recommendations=[]
|
||||
recommendations=[],
|
||||
)
|
||||
|
||||
summary = report.summary
|
||||
@@ -592,7 +553,7 @@ class TestBenchmarkModels:
|
||||
memory=[],
|
||||
metrics=[],
|
||||
system_info={},
|
||||
recommendations=[]
|
||||
recommendations=[],
|
||||
)
|
||||
|
||||
current = BenchmarkReport(
|
||||
@@ -604,7 +565,7 @@ class TestBenchmarkModels:
|
||||
memory=[],
|
||||
metrics=[],
|
||||
system_info={},
|
||||
recommendations=[]
|
||||
recommendations=[],
|
||||
)
|
||||
|
||||
comparison = ComparisonReport(
|
||||
@@ -614,7 +575,7 @@ class TestBenchmarkModels:
|
||||
improvements=[],
|
||||
regressions=["Slower performance"],
|
||||
speedup_factor=0.5,
|
||||
memory_change_mb=0.0
|
||||
memory_change_mb=0.0,
|
||||
)
|
||||
|
||||
assert comparison.has_regressions is True
|
||||
@@ -632,7 +593,7 @@ class TestBenchmarkModels:
|
||||
memory=[],
|
||||
metrics=[],
|
||||
system_info={},
|
||||
recommendations=[]
|
||||
recommendations=[],
|
||||
)
|
||||
|
||||
current = BenchmarkReport(
|
||||
@@ -644,7 +605,7 @@ class TestBenchmarkModels:
|
||||
memory=[],
|
||||
metrics=[],
|
||||
system_info={},
|
||||
recommendations=[]
|
||||
recommendations=[],
|
||||
)
|
||||
|
||||
comparison = ComparisonReport(
|
||||
@@ -654,7 +615,7 @@ class TestBenchmarkModels:
|
||||
improvements=[],
|
||||
regressions=[],
|
||||
speedup_factor=2.0,
|
||||
memory_change_mb=0.0
|
||||
memory_change_mb=0.0,
|
||||
)
|
||||
|
||||
improvement = comparison.overall_improvement
|
||||
|
||||
@@ -60,7 +60,7 @@ class TestChunkingDisabledByDefault:
|
||||
"""Test that LangChain doesn't chunk by default."""
|
||||
skill_dir = create_test_skill(tmp_path, large_doc=True)
|
||||
|
||||
adaptor = get_adaptor('langchain')
|
||||
adaptor = get_adaptor("langchain")
|
||||
package_path = adaptor.package(skill_dir, tmp_path)
|
||||
|
||||
with open(package_path) as f:
|
||||
@@ -71,8 +71,8 @@ class TestChunkingDisabledByDefault:
|
||||
|
||||
# No chunking metadata
|
||||
for doc in data:
|
||||
assert 'is_chunked' not in doc['metadata']
|
||||
assert 'chunk_index' not in doc['metadata']
|
||||
assert "is_chunked" not in doc["metadata"]
|
||||
assert "chunk_index" not in doc["metadata"]
|
||||
|
||||
|
||||
class TestChunkingEnabled:
|
||||
@@ -82,12 +82,9 @@ class TestChunkingEnabled:
|
||||
"""Test that LangChain chunks large documents when enabled."""
|
||||
skill_dir = create_test_skill(tmp_path, large_doc=True)
|
||||
|
||||
adaptor = get_adaptor('langchain')
|
||||
adaptor = get_adaptor("langchain")
|
||||
package_path = adaptor.package(
|
||||
skill_dir,
|
||||
tmp_path,
|
||||
enable_chunking=True,
|
||||
chunk_max_tokens=512
|
||||
skill_dir, tmp_path, enable_chunking=True, chunk_max_tokens=512
|
||||
)
|
||||
|
||||
with open(package_path) as f:
|
||||
@@ -97,25 +94,22 @@ class TestChunkingEnabled:
|
||||
assert len(data) > 2, f"Large doc should be chunked, got {len(data)} docs"
|
||||
|
||||
# Check for chunking metadata
|
||||
chunked_docs = [doc for doc in data if doc['metadata'].get('is_chunked')]
|
||||
chunked_docs = [doc for doc in data if doc["metadata"].get("is_chunked")]
|
||||
assert len(chunked_docs) > 0, "Should have chunked documents"
|
||||
|
||||
# Verify chunk metadata structure
|
||||
for doc in chunked_docs:
|
||||
assert 'chunk_index' in doc['metadata']
|
||||
assert 'total_chunks' in doc['metadata']
|
||||
assert 'chunk_id' in doc['metadata']
|
||||
assert "chunk_index" in doc["metadata"]
|
||||
assert "total_chunks" in doc["metadata"]
|
||||
assert "chunk_id" in doc["metadata"]
|
||||
|
||||
def test_chunking_preserves_small_docs(self, tmp_path):
|
||||
"""Test that small documents are not chunked."""
|
||||
skill_dir = create_test_skill(tmp_path, large_doc=False)
|
||||
|
||||
adaptor = get_adaptor('langchain')
|
||||
adaptor = get_adaptor("langchain")
|
||||
package_path = adaptor.package(
|
||||
skill_dir,
|
||||
tmp_path,
|
||||
enable_chunking=True,
|
||||
chunk_max_tokens=512
|
||||
skill_dir, tmp_path, enable_chunking=True, chunk_max_tokens=512
|
||||
)
|
||||
|
||||
with open(package_path) as f:
|
||||
@@ -125,7 +119,7 @@ class TestChunkingEnabled:
|
||||
assert len(data) == 2, "Small docs should not be chunked"
|
||||
|
||||
for doc in data:
|
||||
assert 'is_chunked' not in doc['metadata']
|
||||
assert "is_chunked" not in doc["metadata"]
|
||||
|
||||
|
||||
class TestCodeBlockPreservation:
|
||||
@@ -158,43 +152,43 @@ More content after code block.
|
||||
# Create references dir (required)
|
||||
(skill_dir / "references").mkdir()
|
||||
|
||||
adaptor = get_adaptor('langchain')
|
||||
adaptor = get_adaptor("langchain")
|
||||
package_path = adaptor.package(
|
||||
skill_dir,
|
||||
tmp_path,
|
||||
enable_chunking=True,
|
||||
chunk_max_tokens=200, # Small chunks to force splitting
|
||||
preserve_code_blocks=True
|
||||
preserve_code_blocks=True,
|
||||
)
|
||||
|
||||
with open(package_path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Find chunks with code block
|
||||
code_chunks = [
|
||||
doc for doc in data
|
||||
if '```python' in doc['page_content']
|
||||
]
|
||||
code_chunks = [doc for doc in data if "```python" in doc["page_content"]]
|
||||
|
||||
# Code block should be in at least one chunk
|
||||
assert len(code_chunks) >= 1, "Code block should be preserved"
|
||||
|
||||
# Code block should be complete (opening and closing backticks)
|
||||
for chunk in code_chunks:
|
||||
content = chunk['page_content']
|
||||
if '```python' in content:
|
||||
content = chunk["page_content"]
|
||||
if "```python" in content:
|
||||
# Should also have closing backticks
|
||||
assert content.count('```') >= 2, "Code block should be complete"
|
||||
assert content.count("```") >= 2, "Code block should be complete"
|
||||
|
||||
|
||||
class TestAutoChunkingForRAGPlatforms:
|
||||
"""Test that chunking is auto-enabled for RAG platforms."""
|
||||
|
||||
@pytest.mark.parametrize("platform", [
|
||||
'langchain',
|
||||
# Add others after they're updated:
|
||||
# 'llama-index', 'haystack', 'weaviate', 'chroma', 'faiss', 'qdrant'
|
||||
])
|
||||
@pytest.mark.parametrize(
|
||||
"platform",
|
||||
[
|
||||
"langchain",
|
||||
# Add others after they're updated:
|
||||
# 'llama-index', 'haystack', 'weaviate', 'chroma', 'faiss', 'qdrant'
|
||||
],
|
||||
)
|
||||
def test_rag_platforms_auto_chunk(self, platform, tmp_path):
|
||||
"""Test that RAG platforms auto-enable chunking."""
|
||||
skill_dir = create_test_skill(tmp_path, large_doc=True)
|
||||
@@ -208,7 +202,7 @@ class TestAutoChunkingForRAGPlatforms:
|
||||
open_folder_after=False,
|
||||
skip_quality_check=True,
|
||||
target=platform,
|
||||
enable_chunking=False # Explicitly disabled, but should be auto-enabled
|
||||
enable_chunking=False, # Explicitly disabled, but should be auto-enabled
|
||||
)
|
||||
|
||||
assert success, f"Packaging failed for {platform}"
|
||||
@@ -221,8 +215,8 @@ class TestAutoChunkingForRAGPlatforms:
|
||||
# Should have multiple documents/chunks
|
||||
if isinstance(data, list):
|
||||
assert len(data) > 2, f"{platform}: Should auto-chunk large docs"
|
||||
elif isinstance(data, dict) and 'documents' in data:
|
||||
assert len(data['documents']) > 2, f"{platform}: Should auto-chunk large docs"
|
||||
elif isinstance(data, dict) and "documents" in data:
|
||||
assert len(data["documents"]) > 2, f"{platform}: Should auto-chunk large docs"
|
||||
|
||||
|
||||
class TestBaseAdaptorChunkingHelper:
|
||||
@@ -237,11 +231,7 @@ class TestBaseAdaptorChunkingHelper:
|
||||
content = "Test content " * 1000 # Large content
|
||||
metadata = {"source": "test"}
|
||||
|
||||
chunks = adaptor._maybe_chunk_content(
|
||||
content,
|
||||
metadata,
|
||||
enable_chunking=False
|
||||
)
|
||||
chunks = adaptor._maybe_chunk_content(content, metadata, enable_chunking=False)
|
||||
|
||||
# Should return single chunk
|
||||
assert len(chunks) == 1
|
||||
@@ -258,10 +248,7 @@ class TestBaseAdaptorChunkingHelper:
|
||||
metadata = {"source": "test"}
|
||||
|
||||
chunks = adaptor._maybe_chunk_content(
|
||||
content,
|
||||
metadata,
|
||||
enable_chunking=True,
|
||||
chunk_max_tokens=512
|
||||
content, metadata, enable_chunking=True, chunk_max_tokens=512
|
||||
)
|
||||
|
||||
# Should return single chunk
|
||||
@@ -282,7 +269,7 @@ class TestBaseAdaptorChunkingHelper:
|
||||
enable_chunking=True,
|
||||
chunk_max_tokens=512,
|
||||
preserve_code_blocks=True,
|
||||
source_file="test.md"
|
||||
source_file="test.md",
|
||||
)
|
||||
|
||||
# Should return multiple chunks
|
||||
@@ -292,12 +279,12 @@ class TestBaseAdaptorChunkingHelper:
|
||||
for chunk_text, chunk_meta in chunks:
|
||||
assert isinstance(chunk_text, str)
|
||||
assert isinstance(chunk_meta, dict)
|
||||
assert chunk_meta['is_chunked']
|
||||
assert 'chunk_index' in chunk_meta
|
||||
assert 'chunk_id' in chunk_meta
|
||||
assert chunk_meta["is_chunked"]
|
||||
assert "chunk_index" in chunk_meta
|
||||
assert "chunk_id" in chunk_meta
|
||||
# Original metadata preserved
|
||||
assert chunk_meta['source'] == 'test'
|
||||
assert chunk_meta['file'] == 'test.md'
|
||||
assert chunk_meta["source"] == "test"
|
||||
assert chunk_meta["file"] == "test.md"
|
||||
|
||||
|
||||
class TestChunkingCLIIntegration:
|
||||
@@ -313,10 +300,10 @@ class TestChunkingCLIIntegration:
|
||||
skill_dir=skill_dir,
|
||||
open_folder_after=False,
|
||||
skip_quality_check=True,
|
||||
target='langchain',
|
||||
target="langchain",
|
||||
enable_chunking=True, # --chunk flag
|
||||
chunk_max_tokens=512,
|
||||
preserve_code_blocks=True
|
||||
preserve_code_blocks=True,
|
||||
)
|
||||
|
||||
assert success
|
||||
@@ -339,10 +326,10 @@ class TestChunkingCLIIntegration:
|
||||
skill_dir=skill_dir,
|
||||
open_folder_after=False,
|
||||
skip_quality_check=True,
|
||||
target='langchain',
|
||||
target="langchain",
|
||||
enable_chunking=True,
|
||||
chunk_max_tokens=256, # Small chunks
|
||||
preserve_code_blocks=True
|
||||
preserve_code_blocks=True,
|
||||
)
|
||||
|
||||
assert success
|
||||
@@ -355,10 +342,10 @@ class TestChunkingCLIIntegration:
|
||||
skill_dir=skill_dir,
|
||||
open_folder_after=False,
|
||||
skip_quality_check=True,
|
||||
target='langchain',
|
||||
target="langchain",
|
||||
enable_chunking=True,
|
||||
chunk_max_tokens=1024, # Large chunks
|
||||
preserve_code_blocks=True
|
||||
preserve_code_blocks=True,
|
||||
)
|
||||
|
||||
assert success
|
||||
@@ -367,9 +354,10 @@ class TestChunkingCLIIntegration:
|
||||
data_large = json.load(f)
|
||||
|
||||
# Small chunk size should produce more chunks
|
||||
assert len(data_small) > len(data_large), \
|
||||
assert len(data_small) > len(data_large), (
|
||||
f"Small chunks ({len(data_small)}) should be more than large chunks ({len(data_large)})"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
|
||||
@@ -30,12 +30,12 @@ class TestParserRegistry:
|
||||
"""Test getting list of parser names."""
|
||||
names = get_parser_names()
|
||||
assert len(names) == 19
|
||||
assert 'scrape' in names
|
||||
assert 'github' in names
|
||||
assert 'package' in names
|
||||
assert 'upload' in names
|
||||
assert 'analyze' in names
|
||||
assert 'config' in names
|
||||
assert "scrape" in names
|
||||
assert "github" in names
|
||||
assert "package" in names
|
||||
assert "upload" in names
|
||||
assert "analyze" in names
|
||||
assert "config" in names
|
||||
|
||||
def test_all_parsers_are_subcommand_parsers(self):
|
||||
"""Test that all parsers inherit from SubcommandParser."""
|
||||
@@ -45,9 +45,9 @@ class TestParserRegistry:
|
||||
def test_all_parsers_have_required_properties(self):
|
||||
"""Test that all parsers have name, help, description."""
|
||||
for parser in PARSERS:
|
||||
assert hasattr(parser, 'name')
|
||||
assert hasattr(parser, 'help')
|
||||
assert hasattr(parser, 'description')
|
||||
assert hasattr(parser, "name")
|
||||
assert hasattr(parser, "help")
|
||||
assert hasattr(parser, "description")
|
||||
assert isinstance(parser.name, str)
|
||||
assert isinstance(parser.help, str)
|
||||
assert isinstance(parser.description, str)
|
||||
@@ -57,7 +57,7 @@ class TestParserRegistry:
|
||||
def test_all_parsers_have_add_arguments_method(self):
|
||||
"""Test that all parsers implement add_arguments."""
|
||||
for parser in PARSERS:
|
||||
assert hasattr(parser, 'add_arguments')
|
||||
assert hasattr(parser, "add_arguments")
|
||||
assert callable(parser.add_arguments)
|
||||
|
||||
def test_no_duplicate_parser_names(self):
|
||||
@@ -106,21 +106,21 @@ class TestParserCreation:
|
||||
def test_register_parsers_creates_all_subcommands(self):
|
||||
"""Test that register_parsers creates all 19 subcommands."""
|
||||
main_parser = argparse.ArgumentParser()
|
||||
subparsers = main_parser.add_subparsers(dest='command')
|
||||
subparsers = main_parser.add_subparsers(dest="command")
|
||||
|
||||
# Register all parsers
|
||||
register_parsers(subparsers)
|
||||
|
||||
# Test that all commands can be parsed
|
||||
test_commands = [
|
||||
'config --show',
|
||||
'scrape --config test.json',
|
||||
'github --repo owner/repo',
|
||||
'package output/test/',
|
||||
'upload test.zip',
|
||||
'analyze --directory .',
|
||||
'enhance output/test/',
|
||||
'estimate test.json',
|
||||
"config --show",
|
||||
"scrape --config test.json",
|
||||
"github --repo owner/repo",
|
||||
"package output/test/",
|
||||
"upload test.zip",
|
||||
"analyze --directory .",
|
||||
"enhance output/test/",
|
||||
"estimate test.json",
|
||||
]
|
||||
|
||||
for cmd in test_commands:
|
||||
@@ -134,75 +134,76 @@ class TestSpecificParsers:
|
||||
def test_scrape_parser_arguments(self):
|
||||
"""Test ScrapeParser has correct arguments."""
|
||||
main_parser = argparse.ArgumentParser()
|
||||
subparsers = main_parser.add_subparsers(dest='command')
|
||||
subparsers = main_parser.add_subparsers(dest="command")
|
||||
|
||||
scrape_parser = ScrapeParser()
|
||||
scrape_parser.create_parser(subparsers)
|
||||
|
||||
# Test various argument combinations
|
||||
args = main_parser.parse_args(['scrape', '--config', 'test.json'])
|
||||
assert args.command == 'scrape'
|
||||
assert args.config == 'test.json'
|
||||
args = main_parser.parse_args(["scrape", "--config", "test.json"])
|
||||
assert args.command == "scrape"
|
||||
assert args.config == "test.json"
|
||||
|
||||
args = main_parser.parse_args(['scrape', '--config', 'test.json', '--max-pages', '100'])
|
||||
args = main_parser.parse_args(["scrape", "--config", "test.json", "--max-pages", "100"])
|
||||
assert args.max_pages == 100
|
||||
|
||||
args = main_parser.parse_args(['scrape', '--enhance'])
|
||||
args = main_parser.parse_args(["scrape", "--enhance"])
|
||||
assert args.enhance is True
|
||||
|
||||
def test_github_parser_arguments(self):
|
||||
"""Test GitHubParser has correct arguments."""
|
||||
main_parser = argparse.ArgumentParser()
|
||||
subparsers = main_parser.add_subparsers(dest='command')
|
||||
subparsers = main_parser.add_subparsers(dest="command")
|
||||
|
||||
github_parser = GitHubParser()
|
||||
github_parser.create_parser(subparsers)
|
||||
|
||||
args = main_parser.parse_args(['github', '--repo', 'owner/repo'])
|
||||
assert args.command == 'github'
|
||||
assert args.repo == 'owner/repo'
|
||||
args = main_parser.parse_args(["github", "--repo", "owner/repo"])
|
||||
assert args.command == "github"
|
||||
assert args.repo == "owner/repo"
|
||||
|
||||
args = main_parser.parse_args(['github', '--repo', 'owner/repo', '--non-interactive'])
|
||||
args = main_parser.parse_args(["github", "--repo", "owner/repo", "--non-interactive"])
|
||||
assert args.non_interactive is True
|
||||
|
||||
def test_package_parser_arguments(self):
|
||||
"""Test PackageParser has correct arguments."""
|
||||
main_parser = argparse.ArgumentParser()
|
||||
subparsers = main_parser.add_subparsers(dest='command')
|
||||
subparsers = main_parser.add_subparsers(dest="command")
|
||||
|
||||
package_parser = PackageParser()
|
||||
package_parser.create_parser(subparsers)
|
||||
|
||||
args = main_parser.parse_args(['package', 'output/test/'])
|
||||
assert args.command == 'package'
|
||||
assert args.skill_directory == 'output/test/'
|
||||
args = main_parser.parse_args(["package", "output/test/"])
|
||||
assert args.command == "package"
|
||||
assert args.skill_directory == "output/test/"
|
||||
|
||||
args = main_parser.parse_args(['package', 'output/test/', '--target', 'gemini'])
|
||||
assert args.target == 'gemini'
|
||||
args = main_parser.parse_args(["package", "output/test/", "--target", "gemini"])
|
||||
assert args.target == "gemini"
|
||||
|
||||
args = main_parser.parse_args(['package', 'output/test/', '--no-open'])
|
||||
args = main_parser.parse_args(["package", "output/test/", "--no-open"])
|
||||
assert args.no_open is True
|
||||
|
||||
def test_analyze_parser_arguments(self):
|
||||
"""Test AnalyzeParser has correct arguments."""
|
||||
main_parser = argparse.ArgumentParser()
|
||||
subparsers = main_parser.add_subparsers(dest='command')
|
||||
subparsers = main_parser.add_subparsers(dest="command")
|
||||
|
||||
from skill_seekers.cli.parsers.analyze_parser import AnalyzeParser
|
||||
|
||||
analyze_parser = AnalyzeParser()
|
||||
analyze_parser.create_parser(subparsers)
|
||||
|
||||
args = main_parser.parse_args(['analyze', '--directory', '.'])
|
||||
assert args.command == 'analyze'
|
||||
assert args.directory == '.'
|
||||
args = main_parser.parse_args(["analyze", "--directory", "."])
|
||||
assert args.command == "analyze"
|
||||
assert args.directory == "."
|
||||
|
||||
args = main_parser.parse_args(['analyze', '--directory', '.', '--quick'])
|
||||
args = main_parser.parse_args(["analyze", "--directory", ".", "--quick"])
|
||||
assert args.quick is True
|
||||
|
||||
args = main_parser.parse_args(['analyze', '--directory', '.', '--comprehensive'])
|
||||
args = main_parser.parse_args(["analyze", "--directory", ".", "--comprehensive"])
|
||||
assert args.comprehensive is True
|
||||
|
||||
args = main_parser.parse_args(['analyze', '--directory', '.', '--skip-patterns'])
|
||||
args = main_parser.parse_args(["analyze", "--directory", ".", "--skip-patterns"])
|
||||
assert args.skip_patterns is True
|
||||
|
||||
|
||||
@@ -215,11 +216,25 @@ class TestBackwardCompatibility:
|
||||
|
||||
# Original commands from old main.py
|
||||
original_commands = [
|
||||
'config', 'scrape', 'github', 'pdf', 'unified',
|
||||
'enhance', 'enhance-status', 'package', 'upload',
|
||||
'estimate', 'extract-test-examples', 'install-agent',
|
||||
'analyze', 'install', 'resume', 'stream',
|
||||
'update', 'multilang', 'quality'
|
||||
"config",
|
||||
"scrape",
|
||||
"github",
|
||||
"pdf",
|
||||
"unified",
|
||||
"enhance",
|
||||
"enhance-status",
|
||||
"package",
|
||||
"upload",
|
||||
"estimate",
|
||||
"extract-test-examples",
|
||||
"install-agent",
|
||||
"analyze",
|
||||
"install",
|
||||
"resume",
|
||||
"stream",
|
||||
"update",
|
||||
"multilang",
|
||||
"quality",
|
||||
]
|
||||
|
||||
for cmd in original_commands:
|
||||
|
||||
@@ -20,18 +20,21 @@ from skill_seekers.cli.storage import (
|
||||
# Check if cloud storage dependencies are available
|
||||
try:
|
||||
import boto3 # noqa: F401
|
||||
|
||||
BOTO3_AVAILABLE = True
|
||||
except ImportError:
|
||||
BOTO3_AVAILABLE = False
|
||||
|
||||
try:
|
||||
from google.cloud import storage # noqa: F401
|
||||
|
||||
GCS_AVAILABLE = True
|
||||
except ImportError:
|
||||
GCS_AVAILABLE = False
|
||||
|
||||
try:
|
||||
from azure.storage.blob import BlobServiceClient # noqa: F401
|
||||
|
||||
AZURE_AVAILABLE = True
|
||||
except ImportError:
|
||||
AZURE_AVAILABLE = False
|
||||
@@ -41,12 +44,13 @@ except ImportError:
|
||||
# Factory Tests
|
||||
# ========================================
|
||||
|
||||
|
||||
def test_get_storage_adaptor_s3():
|
||||
"""Test S3 adaptor factory."""
|
||||
if not BOTO3_AVAILABLE:
|
||||
pytest.skip("boto3 not installed")
|
||||
with patch('skill_seekers.cli.storage.s3_storage.boto3'):
|
||||
adaptor = get_storage_adaptor('s3', bucket='test-bucket')
|
||||
with patch("skill_seekers.cli.storage.s3_storage.boto3"):
|
||||
adaptor = get_storage_adaptor("s3", bucket="test-bucket")
|
||||
assert isinstance(adaptor, S3StorageAdaptor)
|
||||
|
||||
|
||||
@@ -54,8 +58,8 @@ def test_get_storage_adaptor_gcs():
|
||||
"""Test GCS adaptor factory."""
|
||||
if not GCS_AVAILABLE:
|
||||
pytest.skip("google-cloud-storage not installed")
|
||||
with patch('skill_seekers.cli.storage.gcs_storage.storage'):
|
||||
adaptor = get_storage_adaptor('gcs', bucket='test-bucket')
|
||||
with patch("skill_seekers.cli.storage.gcs_storage.storage"):
|
||||
adaptor = get_storage_adaptor("gcs", bucket="test-bucket")
|
||||
assert isinstance(adaptor, GCSStorageAdaptor)
|
||||
|
||||
|
||||
@@ -63,11 +67,11 @@ def test_get_storage_adaptor_azure():
|
||||
"""Test Azure adaptor factory."""
|
||||
if not AZURE_AVAILABLE:
|
||||
pytest.skip("azure-storage-blob not installed")
|
||||
with patch('skill_seekers.cli.storage.azure_storage.BlobServiceClient'):
|
||||
with patch("skill_seekers.cli.storage.azure_storage.BlobServiceClient"):
|
||||
adaptor = get_storage_adaptor(
|
||||
'azure',
|
||||
container='test-container',
|
||||
connection_string='DefaultEndpointsProtocol=https;AccountName=test;AccountKey=key'
|
||||
"azure",
|
||||
container="test-container",
|
||||
connection_string="DefaultEndpointsProtocol=https;AccountName=test;AccountKey=key",
|
||||
)
|
||||
assert isinstance(adaptor, AzureStorageAdaptor)
|
||||
|
||||
@@ -75,36 +79,37 @@ def test_get_storage_adaptor_azure():
|
||||
def test_get_storage_adaptor_invalid_provider():
|
||||
"""Test invalid provider raises error."""
|
||||
with pytest.raises(ValueError, match="Unsupported storage provider"):
|
||||
get_storage_adaptor('invalid', bucket='test')
|
||||
get_storage_adaptor("invalid", bucket="test")
|
||||
|
||||
|
||||
# ========================================
|
||||
# S3 Storage Tests
|
||||
# ========================================
|
||||
|
||||
|
||||
def test_s3_upload_file():
|
||||
"""Test S3 file upload."""
|
||||
if not BOTO3_AVAILABLE:
|
||||
pytest.skip("boto3 not installed")
|
||||
|
||||
with patch('skill_seekers.cli.storage.s3_storage.boto3') as mock_boto3:
|
||||
with patch("skill_seekers.cli.storage.s3_storage.boto3") as mock_boto3:
|
||||
# Setup mocks
|
||||
mock_client = Mock()
|
||||
mock_boto3.client.return_value = mock_client
|
||||
mock_boto3.resource.return_value = Mock()
|
||||
|
||||
adaptor = S3StorageAdaptor(bucket='test-bucket')
|
||||
adaptor = S3StorageAdaptor(bucket="test-bucket")
|
||||
|
||||
# Create temporary file
|
||||
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
||||
tmp_file.write(b'test content')
|
||||
tmp_file.write(b"test content")
|
||||
tmp_path = tmp_file.name
|
||||
|
||||
try:
|
||||
# Test upload
|
||||
result = adaptor.upload_file(tmp_path, 'test.txt')
|
||||
result = adaptor.upload_file(tmp_path, "test.txt")
|
||||
|
||||
assert result == 's3://test-bucket/test.txt'
|
||||
assert result == "s3://test-bucket/test.txt"
|
||||
mock_client.upload_file.assert_called_once()
|
||||
finally:
|
||||
Path(tmp_path).unlink()
|
||||
@@ -115,23 +120,21 @@ def test_s3_download_file():
|
||||
if not BOTO3_AVAILABLE:
|
||||
pytest.skip("boto3 not installed")
|
||||
|
||||
with patch('skill_seekers.cli.storage.s3_storage.boto3') as mock_boto3:
|
||||
with patch("skill_seekers.cli.storage.s3_storage.boto3") as mock_boto3:
|
||||
# Setup mocks
|
||||
mock_client = Mock()
|
||||
mock_boto3.client.return_value = mock_client
|
||||
mock_boto3.resource.return_value = Mock()
|
||||
|
||||
adaptor = S3StorageAdaptor(bucket='test-bucket')
|
||||
adaptor = S3StorageAdaptor(bucket="test-bucket")
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
local_path = os.path.join(tmp_dir, 'downloaded.txt')
|
||||
local_path = os.path.join(tmp_dir, "downloaded.txt")
|
||||
|
||||
# Test download
|
||||
adaptor.download_file('test.txt', local_path)
|
||||
adaptor.download_file("test.txt", local_path)
|
||||
|
||||
mock_client.download_file.assert_called_once_with(
|
||||
'test-bucket', 'test.txt', local_path
|
||||
)
|
||||
mock_client.download_file.assert_called_once_with("test-bucket", "test.txt", local_path)
|
||||
|
||||
|
||||
def test_s3_list_files():
|
||||
@@ -139,18 +142,18 @@ def test_s3_list_files():
|
||||
if not BOTO3_AVAILABLE:
|
||||
pytest.skip("boto3 not installed")
|
||||
|
||||
with patch('skill_seekers.cli.storage.s3_storage.boto3') as mock_boto3:
|
||||
with patch("skill_seekers.cli.storage.s3_storage.boto3") as mock_boto3:
|
||||
# Setup mocks
|
||||
mock_client = Mock()
|
||||
mock_paginator = Mock()
|
||||
mock_page_iterator = [
|
||||
{
|
||||
'Contents': [
|
||||
"Contents": [
|
||||
{
|
||||
'Key': 'file1.txt',
|
||||
'Size': 100,
|
||||
'LastModified': Mock(isoformat=lambda: '2024-01-01T00:00:00'),
|
||||
'ETag': '"abc123"'
|
||||
"Key": "file1.txt",
|
||||
"Size": 100,
|
||||
"LastModified": Mock(isoformat=lambda: "2024-01-01T00:00:00"),
|
||||
"ETag": '"abc123"',
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -161,15 +164,15 @@ def test_s3_list_files():
|
||||
mock_boto3.client.return_value = mock_client
|
||||
mock_boto3.resource.return_value = Mock()
|
||||
|
||||
adaptor = S3StorageAdaptor(bucket='test-bucket')
|
||||
adaptor = S3StorageAdaptor(bucket="test-bucket")
|
||||
|
||||
# Test list
|
||||
files = adaptor.list_files('prefix/')
|
||||
files = adaptor.list_files("prefix/")
|
||||
|
||||
assert len(files) == 1
|
||||
assert files[0].key == 'file1.txt'
|
||||
assert files[0].key == "file1.txt"
|
||||
assert files[0].size == 100
|
||||
assert files[0].etag == 'abc123'
|
||||
assert files[0].etag == "abc123"
|
||||
|
||||
|
||||
def test_s3_file_exists():
|
||||
@@ -177,17 +180,17 @@ def test_s3_file_exists():
|
||||
if not BOTO3_AVAILABLE:
|
||||
pytest.skip("boto3 not installed")
|
||||
|
||||
with patch('skill_seekers.cli.storage.s3_storage.boto3') as mock_boto3:
|
||||
with patch("skill_seekers.cli.storage.s3_storage.boto3") as mock_boto3:
|
||||
# Setup mocks
|
||||
mock_client = Mock()
|
||||
mock_client.head_object.return_value = {}
|
||||
mock_boto3.client.return_value = mock_client
|
||||
mock_boto3.resource.return_value = Mock()
|
||||
|
||||
adaptor = S3StorageAdaptor(bucket='test-bucket')
|
||||
adaptor = S3StorageAdaptor(bucket="test-bucket")
|
||||
|
||||
# Test exists
|
||||
assert adaptor.file_exists('test.txt') is True
|
||||
assert adaptor.file_exists("test.txt") is True
|
||||
|
||||
|
||||
def test_s3_get_file_url():
|
||||
@@ -195,19 +198,19 @@ def test_s3_get_file_url():
|
||||
if not BOTO3_AVAILABLE:
|
||||
pytest.skip("boto3 not installed")
|
||||
|
||||
with patch('skill_seekers.cli.storage.s3_storage.boto3') as mock_boto3:
|
||||
with patch("skill_seekers.cli.storage.s3_storage.boto3") as mock_boto3:
|
||||
# Setup mocks
|
||||
mock_client = Mock()
|
||||
mock_client.generate_presigned_url.return_value = 'https://s3.amazonaws.com/signed-url'
|
||||
mock_client.generate_presigned_url.return_value = "https://s3.amazonaws.com/signed-url"
|
||||
mock_boto3.client.return_value = mock_client
|
||||
mock_boto3.resource.return_value = Mock()
|
||||
|
||||
adaptor = S3StorageAdaptor(bucket='test-bucket')
|
||||
adaptor = S3StorageAdaptor(bucket="test-bucket")
|
||||
|
||||
# Test URL generation
|
||||
url = adaptor.get_file_url('test.txt', expires_in=7200)
|
||||
url = adaptor.get_file_url("test.txt", expires_in=7200)
|
||||
|
||||
assert url == 'https://s3.amazonaws.com/signed-url'
|
||||
assert url == "https://s3.amazonaws.com/signed-url"
|
||||
mock_client.generate_presigned_url.assert_called_once()
|
||||
|
||||
|
||||
@@ -215,12 +218,13 @@ def test_s3_get_file_url():
|
||||
# GCS Storage Tests
|
||||
# ========================================
|
||||
|
||||
|
||||
def test_gcs_upload_file():
|
||||
"""Test GCS file upload."""
|
||||
if not GCS_AVAILABLE:
|
||||
pytest.skip("google-cloud-storage not installed")
|
||||
|
||||
with patch('skill_seekers.cli.storage.gcs_storage.storage') as mock_storage:
|
||||
with patch("skill_seekers.cli.storage.gcs_storage.storage") as mock_storage:
|
||||
# Setup mocks
|
||||
mock_client = Mock()
|
||||
mock_bucket = Mock()
|
||||
@@ -230,18 +234,18 @@ def test_gcs_upload_file():
|
||||
mock_bucket.blob.return_value = mock_blob
|
||||
mock_storage.Client.return_value = mock_client
|
||||
|
||||
adaptor = GCSStorageAdaptor(bucket='test-bucket')
|
||||
adaptor = GCSStorageAdaptor(bucket="test-bucket")
|
||||
|
||||
# Create temporary file
|
||||
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
||||
tmp_file.write(b'test content')
|
||||
tmp_file.write(b"test content")
|
||||
tmp_path = tmp_file.name
|
||||
|
||||
try:
|
||||
# Test upload
|
||||
result = adaptor.upload_file(tmp_path, 'test.txt')
|
||||
result = adaptor.upload_file(tmp_path, "test.txt")
|
||||
|
||||
assert result == 'gs://test-bucket/test.txt'
|
||||
assert result == "gs://test-bucket/test.txt"
|
||||
mock_blob.upload_from_filename.assert_called_once()
|
||||
finally:
|
||||
Path(tmp_path).unlink()
|
||||
@@ -252,7 +256,7 @@ def test_gcs_download_file():
|
||||
if not GCS_AVAILABLE:
|
||||
pytest.skip("google-cloud-storage not installed")
|
||||
|
||||
with patch('skill_seekers.cli.storage.gcs_storage.storage') as mock_storage:
|
||||
with patch("skill_seekers.cli.storage.gcs_storage.storage") as mock_storage:
|
||||
# Setup mocks
|
||||
mock_client = Mock()
|
||||
mock_bucket = Mock()
|
||||
@@ -262,13 +266,13 @@ def test_gcs_download_file():
|
||||
mock_bucket.blob.return_value = mock_blob
|
||||
mock_storage.Client.return_value = mock_client
|
||||
|
||||
adaptor = GCSStorageAdaptor(bucket='test-bucket')
|
||||
adaptor = GCSStorageAdaptor(bucket="test-bucket")
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
local_path = os.path.join(tmp_dir, 'downloaded.txt')
|
||||
local_path = os.path.join(tmp_dir, "downloaded.txt")
|
||||
|
||||
# Test download
|
||||
adaptor.download_file('test.txt', local_path)
|
||||
adaptor.download_file("test.txt", local_path)
|
||||
|
||||
mock_blob.download_to_filename.assert_called_once()
|
||||
|
||||
@@ -278,27 +282,27 @@ def test_gcs_list_files():
|
||||
if not GCS_AVAILABLE:
|
||||
pytest.skip("google-cloud-storage not installed")
|
||||
|
||||
with patch('skill_seekers.cli.storage.gcs_storage.storage') as mock_storage:
|
||||
with patch("skill_seekers.cli.storage.gcs_storage.storage") as mock_storage:
|
||||
# Setup mocks
|
||||
mock_client = Mock()
|
||||
mock_blob = Mock()
|
||||
mock_blob.name = 'file1.txt'
|
||||
mock_blob.name = "file1.txt"
|
||||
mock_blob.size = 100
|
||||
mock_blob.updated = Mock(isoformat=lambda: '2024-01-01T00:00:00')
|
||||
mock_blob.etag = 'abc123'
|
||||
mock_blob.updated = Mock(isoformat=lambda: "2024-01-01T00:00:00")
|
||||
mock_blob.etag = "abc123"
|
||||
mock_blob.metadata = {}
|
||||
|
||||
mock_client.list_blobs.return_value = [mock_blob]
|
||||
mock_storage.Client.return_value = mock_client
|
||||
mock_client.bucket.return_value = Mock()
|
||||
|
||||
adaptor = GCSStorageAdaptor(bucket='test-bucket')
|
||||
adaptor = GCSStorageAdaptor(bucket="test-bucket")
|
||||
|
||||
# Test list
|
||||
files = adaptor.list_files('prefix/')
|
||||
files = adaptor.list_files("prefix/")
|
||||
|
||||
assert len(files) == 1
|
||||
assert files[0].key == 'file1.txt'
|
||||
assert files[0].key == "file1.txt"
|
||||
assert files[0].size == 100
|
||||
|
||||
|
||||
@@ -306,12 +310,13 @@ def test_gcs_list_files():
|
||||
# Azure Storage Tests
|
||||
# ========================================
|
||||
|
||||
|
||||
def test_azure_upload_file():
|
||||
"""Test Azure file upload."""
|
||||
if not AZURE_AVAILABLE:
|
||||
pytest.skip("azure-storage-blob not installed")
|
||||
|
||||
with patch('skill_seekers.cli.storage.azure_storage.BlobServiceClient') as mock_blob_service:
|
||||
with patch("skill_seekers.cli.storage.azure_storage.BlobServiceClient") as mock_blob_service:
|
||||
# Setup mocks
|
||||
mock_service_client = Mock()
|
||||
mock_container_client = Mock()
|
||||
@@ -321,19 +326,21 @@ def test_azure_upload_file():
|
||||
mock_container_client.get_blob_client.return_value = mock_blob_client
|
||||
mock_blob_service.from_connection_string.return_value = mock_service_client
|
||||
|
||||
connection_string = 'DefaultEndpointsProtocol=https;AccountName=test;AccountKey=key'
|
||||
adaptor = AzureStorageAdaptor(container='test-container', connection_string=connection_string)
|
||||
connection_string = "DefaultEndpointsProtocol=https;AccountName=test;AccountKey=key"
|
||||
adaptor = AzureStorageAdaptor(
|
||||
container="test-container", connection_string=connection_string
|
||||
)
|
||||
|
||||
# Create temporary file
|
||||
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
||||
tmp_file.write(b'test content')
|
||||
tmp_file.write(b"test content")
|
||||
tmp_path = tmp_file.name
|
||||
|
||||
try:
|
||||
# Test upload
|
||||
result = adaptor.upload_file(tmp_path, 'test.txt')
|
||||
result = adaptor.upload_file(tmp_path, "test.txt")
|
||||
|
||||
assert 'test.blob.core.windows.net' in result
|
||||
assert "test.blob.core.windows.net" in result
|
||||
mock_blob_client.upload_blob.assert_called_once()
|
||||
finally:
|
||||
Path(tmp_path).unlink()
|
||||
@@ -344,30 +351,32 @@ def test_azure_download_file():
|
||||
if not AZURE_AVAILABLE:
|
||||
pytest.skip("azure-storage-blob not installed")
|
||||
|
||||
with patch('skill_seekers.cli.storage.azure_storage.BlobServiceClient') as mock_blob_service:
|
||||
with patch("skill_seekers.cli.storage.azure_storage.BlobServiceClient") as mock_blob_service:
|
||||
# Setup mocks
|
||||
mock_service_client = Mock()
|
||||
mock_container_client = Mock()
|
||||
mock_blob_client = Mock()
|
||||
mock_download_stream = Mock()
|
||||
mock_download_stream.readall.return_value = b'test content'
|
||||
mock_download_stream.readall.return_value = b"test content"
|
||||
|
||||
mock_service_client.get_container_client.return_value = mock_container_client
|
||||
mock_container_client.get_blob_client.return_value = mock_blob_client
|
||||
mock_blob_client.download_blob.return_value = mock_download_stream
|
||||
mock_blob_service.from_connection_string.return_value = mock_service_client
|
||||
|
||||
connection_string = 'DefaultEndpointsProtocol=https;AccountName=test;AccountKey=key'
|
||||
adaptor = AzureStorageAdaptor(container='test-container', connection_string=connection_string)
|
||||
connection_string = "DefaultEndpointsProtocol=https;AccountName=test;AccountKey=key"
|
||||
adaptor = AzureStorageAdaptor(
|
||||
container="test-container", connection_string=connection_string
|
||||
)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
local_path = os.path.join(tmp_dir, 'downloaded.txt')
|
||||
local_path = os.path.join(tmp_dir, "downloaded.txt")
|
||||
|
||||
# Test download
|
||||
adaptor.download_file('test.txt', local_path)
|
||||
adaptor.download_file("test.txt", local_path)
|
||||
|
||||
assert Path(local_path).exists()
|
||||
assert Path(local_path).read_bytes() == b'test content'
|
||||
assert Path(local_path).read_bytes() == b"test content"
|
||||
|
||||
|
||||
def test_azure_list_files():
|
||||
@@ -375,29 +384,31 @@ def test_azure_list_files():
|
||||
if not AZURE_AVAILABLE:
|
||||
pytest.skip("azure-storage-blob not installed")
|
||||
|
||||
with patch('skill_seekers.cli.storage.azure_storage.BlobServiceClient') as mock_blob_service:
|
||||
with patch("skill_seekers.cli.storage.azure_storage.BlobServiceClient") as mock_blob_service:
|
||||
# Setup mocks
|
||||
mock_service_client = Mock()
|
||||
mock_container_client = Mock()
|
||||
mock_blob = Mock()
|
||||
mock_blob.name = 'file1.txt'
|
||||
mock_blob.name = "file1.txt"
|
||||
mock_blob.size = 100
|
||||
mock_blob.last_modified = Mock(isoformat=lambda: '2024-01-01T00:00:00')
|
||||
mock_blob.etag = 'abc123'
|
||||
mock_blob.last_modified = Mock(isoformat=lambda: "2024-01-01T00:00:00")
|
||||
mock_blob.etag = "abc123"
|
||||
mock_blob.metadata = {}
|
||||
|
||||
mock_container_client.list_blobs.return_value = [mock_blob]
|
||||
mock_service_client.get_container_client.return_value = mock_container_client
|
||||
mock_blob_service.from_connection_string.return_value = mock_service_client
|
||||
|
||||
connection_string = 'DefaultEndpointsProtocol=https;AccountName=test;AccountKey=key'
|
||||
adaptor = AzureStorageAdaptor(container='test-container', connection_string=connection_string)
|
||||
connection_string = "DefaultEndpointsProtocol=https;AccountName=test;AccountKey=key"
|
||||
adaptor = AzureStorageAdaptor(
|
||||
container="test-container", connection_string=connection_string
|
||||
)
|
||||
|
||||
# Test list
|
||||
files = adaptor.list_files('prefix/')
|
||||
files = adaptor.list_files("prefix/")
|
||||
|
||||
assert len(files) == 1
|
||||
assert files[0].key == 'file1.txt'
|
||||
assert files[0].key == "file1.txt"
|
||||
assert files[0].size == 100
|
||||
|
||||
|
||||
@@ -405,53 +416,55 @@ def test_azure_list_files():
|
||||
# Base Adaptor Tests
|
||||
# ========================================
|
||||
|
||||
|
||||
def test_storage_object():
|
||||
"""Test StorageObject dataclass."""
|
||||
obj = StorageObject(
|
||||
key='test.txt',
|
||||
key="test.txt",
|
||||
size=100,
|
||||
last_modified='2024-01-01T00:00:00',
|
||||
etag='abc123',
|
||||
metadata={'key': 'value'}
|
||||
last_modified="2024-01-01T00:00:00",
|
||||
etag="abc123",
|
||||
metadata={"key": "value"},
|
||||
)
|
||||
|
||||
assert obj.key == 'test.txt'
|
||||
assert obj.key == "test.txt"
|
||||
assert obj.size == 100
|
||||
assert obj.metadata == {'key': 'value'}
|
||||
assert obj.metadata == {"key": "value"}
|
||||
|
||||
|
||||
def test_base_adaptor_abstract():
|
||||
"""Test that BaseStorageAdaptor cannot be instantiated."""
|
||||
with pytest.raises(TypeError):
|
||||
BaseStorageAdaptor(bucket='test')
|
||||
BaseStorageAdaptor(bucket="test")
|
||||
|
||||
|
||||
# ========================================
|
||||
# Integration-style Tests
|
||||
# ========================================
|
||||
|
||||
|
||||
def test_upload_directory():
|
||||
"""Test directory upload."""
|
||||
if not BOTO3_AVAILABLE:
|
||||
pytest.skip("boto3 not installed")
|
||||
|
||||
with patch('skill_seekers.cli.storage.s3_storage.boto3') as mock_boto3:
|
||||
with patch("skill_seekers.cli.storage.s3_storage.boto3") as mock_boto3:
|
||||
# Setup mocks
|
||||
mock_client = Mock()
|
||||
mock_boto3.client.return_value = mock_client
|
||||
mock_boto3.resource.return_value = Mock()
|
||||
|
||||
adaptor = S3StorageAdaptor(bucket='test-bucket')
|
||||
adaptor = S3StorageAdaptor(bucket="test-bucket")
|
||||
|
||||
# Create temporary directory with files
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
(Path(tmp_dir) / 'file1.txt').write_text('content1')
|
||||
(Path(tmp_dir) / 'file2.txt').write_text('content2')
|
||||
(Path(tmp_dir) / 'subdir').mkdir()
|
||||
(Path(tmp_dir) / 'subdir' / 'file3.txt').write_text('content3')
|
||||
(Path(tmp_dir) / "file1.txt").write_text("content1")
|
||||
(Path(tmp_dir) / "file2.txt").write_text("content2")
|
||||
(Path(tmp_dir) / "subdir").mkdir()
|
||||
(Path(tmp_dir) / "subdir" / "file3.txt").write_text("content3")
|
||||
|
||||
# Test upload directory
|
||||
uploaded_files = adaptor.upload_directory(tmp_dir, 'skills/')
|
||||
uploaded_files = adaptor.upload_directory(tmp_dir, "skills/")
|
||||
|
||||
assert len(uploaded_files) == 3
|
||||
assert mock_client.upload_file.call_count == 3
|
||||
@@ -462,25 +475,25 @@ def test_download_directory():
|
||||
if not BOTO3_AVAILABLE:
|
||||
pytest.skip("boto3 not installed")
|
||||
|
||||
with patch('skill_seekers.cli.storage.s3_storage.boto3') as mock_boto3:
|
||||
with patch("skill_seekers.cli.storage.s3_storage.boto3") as mock_boto3:
|
||||
# Setup mocks
|
||||
mock_client = Mock()
|
||||
mock_paginator = Mock()
|
||||
mock_page_iterator = [
|
||||
{
|
||||
'Contents': [
|
||||
"Contents": [
|
||||
{
|
||||
'Key': 'skills/file1.txt',
|
||||
'Size': 100,
|
||||
'LastModified': Mock(isoformat=lambda: '2024-01-01T00:00:00'),
|
||||
'ETag': '"abc"'
|
||||
"Key": "skills/file1.txt",
|
||||
"Size": 100,
|
||||
"LastModified": Mock(isoformat=lambda: "2024-01-01T00:00:00"),
|
||||
"ETag": '"abc"',
|
||||
},
|
||||
{
|
||||
'Key': 'skills/file2.txt',
|
||||
'Size': 200,
|
||||
'LastModified': Mock(isoformat=lambda: '2024-01-01T00:00:00'),
|
||||
'ETag': '"def"'
|
||||
}
|
||||
"Key": "skills/file2.txt",
|
||||
"Size": 200,
|
||||
"LastModified": Mock(isoformat=lambda: "2024-01-01T00:00:00"),
|
||||
"ETag": '"def"',
|
||||
},
|
||||
]
|
||||
}
|
||||
]
|
||||
@@ -490,11 +503,11 @@ def test_download_directory():
|
||||
mock_boto3.client.return_value = mock_client
|
||||
mock_boto3.resource.return_value = Mock()
|
||||
|
||||
adaptor = S3StorageAdaptor(bucket='test-bucket')
|
||||
adaptor = S3StorageAdaptor(bucket="test-bucket")
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
# Test download directory
|
||||
downloaded_files = adaptor.download_directory('skills/', tmp_dir)
|
||||
downloaded_files = adaptor.download_directory("skills/", tmp_dir)
|
||||
|
||||
assert len(downloaded_files) == 2
|
||||
assert mock_client.download_file.call_count == 2
|
||||
|
||||
@@ -23,6 +23,7 @@ from skill_seekers.embedding.cache import EmbeddingCache
|
||||
# Cache Tests
|
||||
# ========================================
|
||||
|
||||
|
||||
def test_cache_init():
|
||||
"""Test cache initialization."""
|
||||
cache = EmbeddingCache(":memory:")
|
||||
@@ -121,6 +122,7 @@ def test_cache_context_manager():
|
||||
# Generator Tests
|
||||
# ========================================
|
||||
|
||||
|
||||
def test_generator_init():
|
||||
"""Test generator initialization."""
|
||||
generator = EmbeddingGenerator()
|
||||
@@ -174,7 +176,7 @@ def test_generator_compute_hash():
|
||||
assert hash1 != hash4
|
||||
|
||||
|
||||
@patch('skill_seekers.embedding.generator.SENTENCE_TRANSFORMERS_AVAILABLE', False)
|
||||
@patch("skill_seekers.embedding.generator.SENTENCE_TRANSFORMERS_AVAILABLE", False)
|
||||
def test_generator_sentence_transformers_not_available():
|
||||
"""Test sentence-transformers not available."""
|
||||
generator = EmbeddingGenerator()
|
||||
@@ -183,7 +185,7 @@ def test_generator_sentence_transformers_not_available():
|
||||
generator.generate("test", model="all-MiniLM-L6-v2")
|
||||
|
||||
|
||||
@patch('skill_seekers.embedding.generator.OPENAI_AVAILABLE', False)
|
||||
@patch("skill_seekers.embedding.generator.OPENAI_AVAILABLE", False)
|
||||
def test_generator_openai_not_available():
|
||||
"""Test OpenAI not available."""
|
||||
generator = EmbeddingGenerator()
|
||||
@@ -192,7 +194,7 @@ def test_generator_openai_not_available():
|
||||
generator.generate("test", model="text-embedding-3-small")
|
||||
|
||||
|
||||
@patch('skill_seekers.embedding.generator.VOYAGE_AVAILABLE', False)
|
||||
@patch("skill_seekers.embedding.generator.VOYAGE_AVAILABLE", False)
|
||||
def test_generator_voyage_not_available():
|
||||
"""Test Voyage AI not available."""
|
||||
generator = EmbeddingGenerator()
|
||||
@@ -227,13 +229,10 @@ def test_generator_voyage_large_2_model_info():
|
||||
# Model Tests
|
||||
# ========================================
|
||||
|
||||
|
||||
def test_embedding_request():
|
||||
"""Test EmbeddingRequest model."""
|
||||
request = EmbeddingRequest(
|
||||
text="Hello world",
|
||||
model="text-embedding-3-small",
|
||||
normalize=True
|
||||
)
|
||||
request = EmbeddingRequest(text="Hello world", model="text-embedding-3-small", normalize=True)
|
||||
|
||||
assert request.text == "Hello world"
|
||||
assert request.model == "text-embedding-3-small"
|
||||
@@ -243,9 +242,7 @@ def test_embedding_request():
|
||||
def test_batch_embedding_request():
|
||||
"""Test BatchEmbeddingRequest model."""
|
||||
request = BatchEmbeddingRequest(
|
||||
texts=["text1", "text2", "text3"],
|
||||
model="text-embedding-3-small",
|
||||
batch_size=32
|
||||
texts=["text1", "text2", "text3"], model="text-embedding-3-small", batch_size=32
|
||||
)
|
||||
|
||||
assert len(request.texts) == 3
|
||||
@@ -255,10 +252,7 @@ def test_batch_embedding_request():
|
||||
def test_embedding_response():
|
||||
"""Test EmbeddingResponse model."""
|
||||
response = EmbeddingResponse(
|
||||
embedding=[0.1, 0.2, 0.3],
|
||||
model="test-model",
|
||||
dimensions=3,
|
||||
cached=False
|
||||
embedding=[0.1, 0.2, 0.3], model="test-model", dimensions=3, cached=False
|
||||
)
|
||||
|
||||
assert len(response.embedding) == 3
|
||||
@@ -273,7 +267,7 @@ def test_batch_embedding_response():
|
||||
model="test-model",
|
||||
dimensions=2,
|
||||
count=2,
|
||||
cached_count=1
|
||||
cached_count=1,
|
||||
)
|
||||
|
||||
assert len(response.embeddings) == 2
|
||||
@@ -288,7 +282,7 @@ def test_health_response():
|
||||
version="1.0.0",
|
||||
models=["model1", "model2"],
|
||||
cache_enabled=True,
|
||||
cache_size=100
|
||||
cache_size=100,
|
||||
)
|
||||
|
||||
assert response.status == "ok"
|
||||
@@ -303,7 +297,7 @@ def test_model_info():
|
||||
provider="openai",
|
||||
dimensions=1536,
|
||||
max_tokens=8191,
|
||||
cost_per_million=0.02
|
||||
cost_per_million=0.02,
|
||||
)
|
||||
|
||||
assert info.name == "test-model"
|
||||
@@ -315,6 +309,7 @@ def test_model_info():
|
||||
# Integration Tests
|
||||
# ========================================
|
||||
|
||||
|
||||
def test_cache_batch_operations():
|
||||
"""Test cache batch operations."""
|
||||
cache = EmbeddingCache(":memory:")
|
||||
|
||||
@@ -23,7 +23,7 @@ from skill_seekers.cli.embedding_pipeline import (
|
||||
EmbeddingPipeline,
|
||||
LocalEmbeddingProvider,
|
||||
EmbeddingCache,
|
||||
CostTracker
|
||||
CostTracker,
|
||||
)
|
||||
|
||||
|
||||
@@ -112,21 +112,16 @@ def test_cost_tracker():
|
||||
|
||||
stats = tracker.get_stats()
|
||||
|
||||
assert stats['total_requests'] == 2
|
||||
assert stats['total_tokens'] == 1500
|
||||
assert stats['cache_hits'] == 1
|
||||
assert stats['cache_misses'] == 1
|
||||
assert '50.0%' in stats['cache_rate']
|
||||
assert stats["total_requests"] == 2
|
||||
assert stats["total_tokens"] == 1500
|
||||
assert stats["cache_hits"] == 1
|
||||
assert stats["cache_misses"] == 1
|
||||
assert "50.0%" in stats["cache_rate"]
|
||||
|
||||
|
||||
def test_pipeline_initialization():
|
||||
"""Test pipeline initialization."""
|
||||
config = EmbeddingConfig(
|
||||
provider='local',
|
||||
model='test-model',
|
||||
dimension=128,
|
||||
batch_size=10
|
||||
)
|
||||
config = EmbeddingConfig(provider="local", model="test-model", dimension=128, batch_size=10)
|
||||
|
||||
pipeline = EmbeddingPipeline(config)
|
||||
|
||||
@@ -137,12 +132,7 @@ def test_pipeline_initialization():
|
||||
|
||||
def test_pipeline_generate_batch():
|
||||
"""Test batch embedding generation."""
|
||||
config = EmbeddingConfig(
|
||||
provider='local',
|
||||
model='test-model',
|
||||
dimension=64,
|
||||
batch_size=2
|
||||
)
|
||||
config = EmbeddingConfig(provider="local", model="test-model", dimension=64, batch_size=2)
|
||||
|
||||
pipeline = EmbeddingPipeline(config)
|
||||
|
||||
@@ -159,11 +149,11 @@ def test_pipeline_caching():
|
||||
"""Test pipeline uses caching."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
config = EmbeddingConfig(
|
||||
provider='local',
|
||||
model='test-model',
|
||||
provider="local",
|
||||
model="test-model",
|
||||
dimension=32,
|
||||
batch_size=10,
|
||||
cache_dir=Path(tmpdir)
|
||||
cache_dir=Path(tmpdir),
|
||||
)
|
||||
|
||||
pipeline = EmbeddingPipeline(config)
|
||||
@@ -184,10 +174,10 @@ def test_pipeline_caching():
|
||||
def test_pipeline_batch_processing():
|
||||
"""Test large batch is processed in chunks."""
|
||||
config = EmbeddingConfig(
|
||||
provider='local',
|
||||
model='test-model',
|
||||
provider="local",
|
||||
model="test-model",
|
||||
dimension=16,
|
||||
batch_size=3 # Small batch size
|
||||
batch_size=3, # Small batch size
|
||||
)
|
||||
|
||||
pipeline = EmbeddingPipeline(config)
|
||||
@@ -201,11 +191,7 @@ def test_pipeline_batch_processing():
|
||||
|
||||
def test_validate_dimensions_valid():
|
||||
"""Test dimension validation with valid embeddings."""
|
||||
config = EmbeddingConfig(
|
||||
provider='local',
|
||||
model='test-model',
|
||||
dimension=128
|
||||
)
|
||||
config = EmbeddingConfig(provider="local", model="test-model", dimension=128)
|
||||
|
||||
pipeline = EmbeddingPipeline(config)
|
||||
|
||||
@@ -217,11 +203,7 @@ def test_validate_dimensions_valid():
|
||||
|
||||
def test_validate_dimensions_invalid():
|
||||
"""Test dimension validation with invalid embeddings."""
|
||||
config = EmbeddingConfig(
|
||||
provider='local',
|
||||
model='test-model',
|
||||
dimension=128
|
||||
)
|
||||
config = EmbeddingConfig(provider="local", model="test-model", dimension=128)
|
||||
|
||||
pipeline = EmbeddingPipeline(config)
|
||||
|
||||
@@ -234,30 +216,22 @@ def test_validate_dimensions_invalid():
|
||||
|
||||
def test_embedding_result_metadata():
|
||||
"""Test embedding result includes metadata."""
|
||||
config = EmbeddingConfig(
|
||||
provider='local',
|
||||
model='test-model',
|
||||
dimension=256
|
||||
)
|
||||
config = EmbeddingConfig(provider="local", model="test-model", dimension=256)
|
||||
|
||||
pipeline = EmbeddingPipeline(config)
|
||||
|
||||
texts = ["test"]
|
||||
result = pipeline.generate_batch(texts, show_progress=False)
|
||||
|
||||
assert 'provider' in result.metadata
|
||||
assert 'model' in result.metadata
|
||||
assert 'dimension' in result.metadata
|
||||
assert result.metadata['dimension'] == 256
|
||||
assert "provider" in result.metadata
|
||||
assert "model" in result.metadata
|
||||
assert "dimension" in result.metadata
|
||||
assert result.metadata["dimension"] == 256
|
||||
|
||||
|
||||
def test_cost_stats():
|
||||
"""Test cost statistics tracking."""
|
||||
config = EmbeddingConfig(
|
||||
provider='local',
|
||||
model='test-model',
|
||||
dimension=64
|
||||
)
|
||||
config = EmbeddingConfig(provider="local", model="test-model", dimension=64)
|
||||
|
||||
pipeline = EmbeddingPipeline(config)
|
||||
|
||||
@@ -266,18 +240,14 @@ def test_cost_stats():
|
||||
|
||||
stats = pipeline.get_cost_stats()
|
||||
|
||||
assert 'total_requests' in stats
|
||||
assert 'cache_hits' in stats
|
||||
assert 'estimated_cost' in stats
|
||||
assert "total_requests" in stats
|
||||
assert "cache_hits" in stats
|
||||
assert "estimated_cost" in stats
|
||||
|
||||
|
||||
def test_empty_batch():
|
||||
"""Test handling empty batch."""
|
||||
config = EmbeddingConfig(
|
||||
provider='local',
|
||||
model='test-model',
|
||||
dimension=32
|
||||
)
|
||||
config = EmbeddingConfig(provider="local", model="test-model", dimension=32)
|
||||
|
||||
pipeline = EmbeddingPipeline(config)
|
||||
|
||||
@@ -289,11 +259,7 @@ def test_empty_batch():
|
||||
|
||||
def test_single_document():
|
||||
"""Test single document generation."""
|
||||
config = EmbeddingConfig(
|
||||
provider='local',
|
||||
model='test-model',
|
||||
dimension=128
|
||||
)
|
||||
config = EmbeddingConfig(provider="local", model="test-model", dimension=128)
|
||||
|
||||
pipeline = EmbeddingPipeline(config)
|
||||
|
||||
@@ -306,11 +272,7 @@ def test_single_document():
|
||||
def test_different_dimensions():
|
||||
"""Test different embedding dimensions."""
|
||||
for dim in [64, 128, 256, 512]:
|
||||
config = EmbeddingConfig(
|
||||
provider='local',
|
||||
model='test-model',
|
||||
dimension=dim
|
||||
)
|
||||
config = EmbeddingConfig(provider="local", model="test-model", dimension=dim)
|
||||
|
||||
pipeline = EmbeddingPipeline(config)
|
||||
result = pipeline.generate_batch(["test"], show_progress=False)
|
||||
|
||||
@@ -152,9 +152,7 @@ class TestMultiAgentSupport:
|
||||
|
||||
def test_rejects_missing_executable(self, tmp_path, monkeypatch):
|
||||
"""Test rejection when executable is not found on PATH."""
|
||||
monkeypatch.setattr(
|
||||
"skill_seekers.cli.enhance_skill_local.shutil.which", lambda _exe: None
|
||||
)
|
||||
monkeypatch.setattr("skill_seekers.cli.enhance_skill_local.shutil.which", lambda _exe: None)
|
||||
skill_dir = _make_skill_dir(tmp_path)
|
||||
|
||||
with pytest.raises(ValueError, match="not found in PATH"):
|
||||
|
||||
@@ -80,8 +80,9 @@ class TestFrameworkDetection(unittest.TestCase):
|
||||
arch_data = json.load(f)
|
||||
|
||||
self.assertIn("frameworks_detected", arch_data)
|
||||
self.assertIn("Flask", arch_data["frameworks_detected"],
|
||||
"Flask should be detected from imports")
|
||||
self.assertIn(
|
||||
"Flask", arch_data["frameworks_detected"], "Flask should be detected from imports"
|
||||
)
|
||||
|
||||
def test_files_with_imports_are_included(self):
|
||||
"""Test that files with only imports are included in analysis (Issue #239)."""
|
||||
@@ -119,24 +120,19 @@ class TestFrameworkDetection(unittest.TestCase):
|
||||
analysis_data = json.load(f)
|
||||
|
||||
# File should be included
|
||||
self.assertGreater(len(analysis_data["files"]), 0,
|
||||
"Files with imports should be included")
|
||||
self.assertGreater(len(analysis_data["files"]), 0, "Files with imports should be included")
|
||||
|
||||
# Find our import-only file
|
||||
import_file = next(
|
||||
(f for f in analysis_data["files"] if "imports_only.py" in f["file"]),
|
||||
None
|
||||
(f for f in analysis_data["files"] if "imports_only.py" in f["file"]), None
|
||||
)
|
||||
self.assertIsNotNone(import_file, "Import-only file should be in analysis")
|
||||
|
||||
# Verify imports were extracted
|
||||
self.assertIn("imports", import_file, "Imports should be extracted")
|
||||
self.assertGreater(len(import_file["imports"]), 0,
|
||||
"Should have captured imports")
|
||||
self.assertIn("django", import_file["imports"],
|
||||
"Django import should be captured")
|
||||
self.assertIn("flask", import_file["imports"],
|
||||
"Flask import should be captured")
|
||||
self.assertGreater(len(import_file["imports"]), 0, "Should have captured imports")
|
||||
self.assertIn("django", import_file["imports"], "Django import should be captured")
|
||||
self.assertIn("flask", import_file["imports"], "Flask import should be captured")
|
||||
|
||||
def test_no_false_positive_frameworks(self):
|
||||
"""Test that framework detection doesn't produce false positives (Issue #239)."""
|
||||
@@ -145,10 +141,7 @@ class TestFrameworkDetection(unittest.TestCase):
|
||||
app_dir.mkdir()
|
||||
|
||||
# File with no framework imports
|
||||
(app_dir / "utils.py").write_text(
|
||||
"def my_function():\n"
|
||||
" return 'hello'\n"
|
||||
)
|
||||
(app_dir / "utils.py").write_text("def my_function():\n return 'hello'\n")
|
||||
|
||||
# Run codebase analyzer
|
||||
from skill_seekers.cli.codebase_scraper import main as scraper_main
|
||||
@@ -180,12 +173,10 @@ class TestFrameworkDetection(unittest.TestCase):
|
||||
|
||||
frameworks = arch_data.get("frameworks_detected", [])
|
||||
# Should not detect Flask just from "app" directory name
|
||||
self.assertNotIn("Flask", frameworks,
|
||||
"Should not detect Flask without imports")
|
||||
self.assertNotIn("Flask", frameworks, "Should not detect Flask without imports")
|
||||
# Should not detect other frameworks with "app" in markers
|
||||
for fw in ["ASP.NET", "Rails", "Laravel"]:
|
||||
self.assertNotIn(fw, frameworks,
|
||||
f"Should not detect {fw} without real evidence")
|
||||
self.assertNotIn(fw, frameworks, f"Should not detect {fw} without real evidence")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -20,9 +20,7 @@ import time
|
||||
# Add src to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
||||
|
||||
from skill_seekers.cli.incremental_updater import (
|
||||
IncrementalUpdater
|
||||
)
|
||||
from skill_seekers.cli.incremental_updater import IncrementalUpdater
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -281,15 +279,15 @@ def test_apply_update_package(temp_skill_dir):
|
||||
"timestamp": "2026-02-05T12:00:00",
|
||||
"skill_name": "test_skill",
|
||||
"change_summary": {"modified": 1},
|
||||
"total_changes": 1
|
||||
"total_changes": 1,
|
||||
},
|
||||
"changes": {
|
||||
"SKILL.md": {
|
||||
"action": "modify",
|
||||
"version": 2,
|
||||
"content": "# Updated Content\n\nApplied from package"
|
||||
"content": "# Updated Content\n\nApplied from package",
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
package_path.write_text(json.dumps(update_data))
|
||||
@@ -298,7 +296,9 @@ def test_apply_update_package(temp_skill_dir):
|
||||
success = updater.apply_update_package(package_path)
|
||||
|
||||
assert success
|
||||
assert (temp_skill_dir / "SKILL.md").read_text() == "# Updated Content\n\nApplied from package"
|
||||
assert (
|
||||
temp_skill_dir / "SKILL.md"
|
||||
).read_text() == "# Updated Content\n\nApplied from package"
|
||||
|
||||
|
||||
def test_content_hash_consistency(temp_skill_dir):
|
||||
|
||||
@@ -92,7 +92,11 @@ class TestConfigLoading(unittest.TestCase):
|
||||
{
|
||||
"type": "documentation",
|
||||
"base_url": "https://example.com/",
|
||||
"selectors": {"main_content": "article", "title": "h1", "code_blocks": "pre code"},
|
||||
"selectors": {
|
||||
"main_content": "article",
|
||||
"title": "h1",
|
||||
"code_blocks": "pre code",
|
||||
},
|
||||
"rate_limit": 0.5,
|
||||
"max_pages": 100,
|
||||
}
|
||||
|
||||
@@ -113,6 +113,7 @@ def check_service_available(url: str, timeout: int = 5) -> bool:
|
||||
"""Check if a service is available."""
|
||||
try:
|
||||
import requests
|
||||
|
||||
response = requests.get(url, timeout=timeout)
|
||||
return response.status_code == 200
|
||||
except Exception:
|
||||
@@ -133,7 +134,9 @@ class TestWeaviateIntegration:
|
||||
|
||||
# Check if Weaviate is running
|
||||
if not check_service_available("http://localhost:8080/v1/.well-known/ready"):
|
||||
pytest.skip("Weaviate not running (start with: docker-compose -f tests/docker-compose.test.yml up -d)")
|
||||
pytest.skip(
|
||||
"Weaviate not running (start with: docker-compose -f tests/docker-compose.test.yml up -d)"
|
||||
)
|
||||
|
||||
# Connect to Weaviate
|
||||
try:
|
||||
@@ -144,10 +147,7 @@ class TestWeaviateIntegration:
|
||||
|
||||
# Package skill
|
||||
adaptor = get_adaptor("weaviate")
|
||||
SkillMetadata(
|
||||
name="integration_test",
|
||||
description="Integration test skill for Weaviate"
|
||||
)
|
||||
SkillMetadata(name="integration_test", description="Integration test skill for Weaviate")
|
||||
package_path = adaptor.package(sample_skill_dir, tmp_path)
|
||||
|
||||
assert package_path.exists(), "Package not created"
|
||||
@@ -173,19 +173,16 @@ class TestWeaviateIntegration:
|
||||
with client.batch as batch:
|
||||
for obj in data["objects"]:
|
||||
batch.add_data_object(
|
||||
data_object=obj["properties"],
|
||||
class_name=class_name,
|
||||
uuid=obj["id"]
|
||||
data_object=obj["properties"], class_name=class_name, uuid=obj["id"]
|
||||
)
|
||||
|
||||
# Wait for indexing
|
||||
time.sleep(1)
|
||||
|
||||
# Query - Get all objects
|
||||
result = client.query.get(
|
||||
class_name,
|
||||
["content", "source", "category"]
|
||||
).with_limit(10).do()
|
||||
result = (
|
||||
client.query.get(class_name, ["content", "source", "category"]).with_limit(10).do()
|
||||
)
|
||||
|
||||
# Verify results
|
||||
assert "data" in result, "Query returned no data"
|
||||
@@ -203,8 +200,9 @@ class TestWeaviateIntegration:
|
||||
|
||||
# Verify content
|
||||
contents = [obj["content"] for obj in objects]
|
||||
assert any("vector" in content.lower() for content in contents), \
|
||||
assert any("vector" in content.lower() for content in contents), (
|
||||
"Expected content not found"
|
||||
)
|
||||
|
||||
finally:
|
||||
# Cleanup - Delete collection
|
||||
@@ -234,7 +232,7 @@ class TestWeaviateIntegration:
|
||||
description="Test metadata preservation",
|
||||
version="2.0.0",
|
||||
author="Integration Test Suite",
|
||||
tags=["test", "integration", "weaviate"]
|
||||
tags=["test", "integration", "weaviate"],
|
||||
)
|
||||
package_path = adaptor.package(sample_skill_dir, tmp_path)
|
||||
|
||||
@@ -249,18 +247,17 @@ class TestWeaviateIntegration:
|
||||
with client.batch as batch:
|
||||
for obj in data["objects"]:
|
||||
batch.add_data_object(
|
||||
data_object=obj["properties"],
|
||||
class_name=class_name,
|
||||
uuid=obj["id"]
|
||||
data_object=obj["properties"], class_name=class_name, uuid=obj["id"]
|
||||
)
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
# Query and verify metadata
|
||||
result = client.query.get(
|
||||
class_name,
|
||||
["source", "version", "author", "tags"]
|
||||
).with_limit(1).do()
|
||||
result = (
|
||||
client.query.get(class_name, ["source", "version", "author", "tags"])
|
||||
.with_limit(1)
|
||||
.do()
|
||||
)
|
||||
|
||||
obj = result["data"]["Get"][class_name][0]
|
||||
assert obj["source"] == "metadata_test", "Source not preserved"
|
||||
@@ -287,7 +284,9 @@ class TestChromaIntegration:
|
||||
|
||||
# Check if Chroma is running
|
||||
if not check_service_available("http://localhost:8000/api/v1/heartbeat"):
|
||||
pytest.skip("ChromaDB not running (start with: docker-compose -f tests/docker-compose.test.yml up -d)")
|
||||
pytest.skip(
|
||||
"ChromaDB not running (start with: docker-compose -f tests/docker-compose.test.yml up -d)"
|
||||
)
|
||||
|
||||
# Connect to ChromaDB
|
||||
try:
|
||||
@@ -299,8 +298,7 @@ class TestChromaIntegration:
|
||||
# Package skill
|
||||
adaptor = get_adaptor("chroma")
|
||||
SkillMetadata(
|
||||
name="chroma_integration_test",
|
||||
description="Integration test skill for ChromaDB"
|
||||
name="chroma_integration_test", description="Integration test skill for ChromaDB"
|
||||
)
|
||||
package_path = adaptor.package(sample_skill_dir, tmp_path)
|
||||
|
||||
@@ -326,9 +324,7 @@ class TestChromaIntegration:
|
||||
|
||||
# Add documents
|
||||
collection.add(
|
||||
documents=data["documents"],
|
||||
metadatas=data["metadatas"],
|
||||
ids=data["ids"]
|
||||
documents=data["documents"], metadatas=data["metadatas"], ids=data["ids"]
|
||||
)
|
||||
|
||||
# Wait for indexing
|
||||
@@ -340,8 +336,7 @@ class TestChromaIntegration:
|
||||
# Verify results
|
||||
assert "documents" in results, "Query returned no documents"
|
||||
assert len(results["documents"]) > 0, "No documents returned"
|
||||
assert len(results["documents"]) == len(data["documents"]), \
|
||||
"Document count mismatch"
|
||||
assert len(results["documents"]) == len(data["documents"]), "Document count mismatch"
|
||||
|
||||
# Verify metadata
|
||||
assert "metadatas" in results, "Query returned no metadatas"
|
||||
@@ -350,8 +345,9 @@ class TestChromaIntegration:
|
||||
assert "category" in first_metadata, "Missing category in metadata"
|
||||
|
||||
# Verify content
|
||||
assert any("vector" in doc.lower() for doc in results["documents"]), \
|
||||
assert any("vector" in doc.lower() for doc in results["documents"]), (
|
||||
"Expected content not found"
|
||||
)
|
||||
|
||||
finally:
|
||||
# Cleanup - Delete collection
|
||||
@@ -377,8 +373,7 @@ class TestChromaIntegration:
|
||||
# Package and upload
|
||||
adaptor = get_adaptor("chroma")
|
||||
metadata = SkillMetadata(
|
||||
name="chroma_filter_test",
|
||||
description="Test filtering capabilities"
|
||||
name="chroma_filter_test", description="Test filtering capabilities"
|
||||
)
|
||||
package_path = adaptor.package(sample_skill_dir, tmp_path)
|
||||
|
||||
@@ -390,23 +385,18 @@ class TestChromaIntegration:
|
||||
try:
|
||||
collection = client.get_or_create_collection(name=collection_name)
|
||||
collection.add(
|
||||
documents=data["documents"],
|
||||
metadatas=data["metadatas"],
|
||||
ids=data["ids"]
|
||||
documents=data["documents"], metadatas=data["metadatas"], ids=data["ids"]
|
||||
)
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
# Query with category filter
|
||||
results = collection.get(
|
||||
where={"category": "getting started"}
|
||||
)
|
||||
results = collection.get(where={"category": "getting started"})
|
||||
|
||||
# Verify filtering worked
|
||||
assert len(results["documents"]) > 0, "No documents matched filter"
|
||||
for metadata in results["metadatas"]:
|
||||
assert metadata["category"] == "getting started", \
|
||||
"Filter returned wrong category"
|
||||
assert metadata["category"] == "getting started", "Filter returned wrong category"
|
||||
|
||||
finally:
|
||||
with contextlib.suppress(Exception):
|
||||
@@ -428,7 +418,9 @@ class TestQdrantIntegration:
|
||||
|
||||
# Check if Qdrant is running
|
||||
if not check_service_available("http://localhost:6333/"):
|
||||
pytest.skip("Qdrant not running (start with: docker-compose -f tests/docker-compose.test.yml up -d)")
|
||||
pytest.skip(
|
||||
"Qdrant not running (start with: docker-compose -f tests/docker-compose.test.yml up -d)"
|
||||
)
|
||||
|
||||
# Connect to Qdrant
|
||||
try:
|
||||
@@ -440,8 +432,7 @@ class TestQdrantIntegration:
|
||||
# Package skill
|
||||
adaptor = get_adaptor("qdrant")
|
||||
SkillMetadata(
|
||||
name="qdrant_integration_test",
|
||||
description="Integration test skill for Qdrant"
|
||||
name="qdrant_integration_test", description="Integration test skill for Qdrant"
|
||||
)
|
||||
package_path = adaptor.package(sample_skill_dir, tmp_path)
|
||||
|
||||
@@ -465,25 +456,21 @@ class TestQdrantIntegration:
|
||||
# Create collection
|
||||
client.create_collection(
|
||||
collection_name=collection_name,
|
||||
vectors_config=VectorParams(
|
||||
size=vector_size,
|
||||
distance=Distance.COSINE
|
||||
)
|
||||
vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE),
|
||||
)
|
||||
|
||||
# Upload points (with placeholder vectors for testing)
|
||||
points = []
|
||||
for point in data["points"]:
|
||||
points.append(PointStruct(
|
||||
id=point["id"],
|
||||
vector=[0.0] * vector_size, # Placeholder vectors
|
||||
payload=point["payload"]
|
||||
))
|
||||
points.append(
|
||||
PointStruct(
|
||||
id=point["id"],
|
||||
vector=[0.0] * vector_size, # Placeholder vectors
|
||||
payload=point["payload"],
|
||||
)
|
||||
)
|
||||
|
||||
client.upsert(
|
||||
collection_name=collection_name,
|
||||
points=points
|
||||
)
|
||||
client.upsert(collection_name=collection_name, points=points)
|
||||
|
||||
# Wait for indexing
|
||||
time.sleep(1)
|
||||
@@ -493,14 +480,10 @@ class TestQdrantIntegration:
|
||||
|
||||
# Verify collection
|
||||
assert collection_info.points_count > 0, "No points in collection"
|
||||
assert collection_info.points_count == len(data["points"]), \
|
||||
"Point count mismatch"
|
||||
assert collection_info.points_count == len(data["points"]), "Point count mismatch"
|
||||
|
||||
# Query - Scroll through points
|
||||
scroll_result = client.scroll(
|
||||
collection_name=collection_name,
|
||||
limit=10
|
||||
)
|
||||
scroll_result = client.scroll(collection_name=collection_name, limit=10)
|
||||
|
||||
points_list = scroll_result[0]
|
||||
assert len(points_list) > 0, "No points returned"
|
||||
@@ -514,8 +497,9 @@ class TestQdrantIntegration:
|
||||
|
||||
# Verify content
|
||||
contents = [p.payload["content"] for p in points_list]
|
||||
assert any("vector" in content.lower() for content in contents), \
|
||||
assert any("vector" in content.lower() for content in contents), (
|
||||
"Expected content not found"
|
||||
)
|
||||
|
||||
finally:
|
||||
# Cleanup - Delete collection
|
||||
@@ -527,8 +511,12 @@ class TestQdrantIntegration:
|
||||
try:
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import (
|
||||
Distance, VectorParams, PointStruct,
|
||||
Filter, FieldCondition, MatchValue
|
||||
Distance,
|
||||
VectorParams,
|
||||
PointStruct,
|
||||
Filter,
|
||||
FieldCondition,
|
||||
MatchValue,
|
||||
)
|
||||
except ImportError:
|
||||
pytest.skip("qdrant-client not installed")
|
||||
@@ -544,10 +532,7 @@ class TestQdrantIntegration:
|
||||
|
||||
# Package and upload
|
||||
adaptor = get_adaptor("qdrant")
|
||||
SkillMetadata(
|
||||
name="qdrant_filter_test",
|
||||
description="Test filtering capabilities"
|
||||
)
|
||||
SkillMetadata(name="qdrant_filter_test", description="Test filtering capabilities")
|
||||
package_path = adaptor.package(sample_skill_dir, tmp_path)
|
||||
|
||||
with open(package_path) as f:
|
||||
@@ -560,19 +545,16 @@ class TestQdrantIntegration:
|
||||
# Create and upload
|
||||
client.create_collection(
|
||||
collection_name=collection_name,
|
||||
vectors_config=VectorParams(
|
||||
size=vector_size,
|
||||
distance=Distance.COSINE
|
||||
)
|
||||
vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE),
|
||||
)
|
||||
|
||||
points = []
|
||||
for point in data["points"]:
|
||||
points.append(PointStruct(
|
||||
id=point["id"],
|
||||
vector=[0.0] * vector_size,
|
||||
payload=point["payload"]
|
||||
))
|
||||
points.append(
|
||||
PointStruct(
|
||||
id=point["id"], vector=[0.0] * vector_size, payload=point["payload"]
|
||||
)
|
||||
)
|
||||
|
||||
client.upsert(collection_name=collection_name, points=points)
|
||||
time.sleep(1)
|
||||
@@ -581,14 +563,9 @@ class TestQdrantIntegration:
|
||||
scroll_result = client.scroll(
|
||||
collection_name=collection_name,
|
||||
scroll_filter=Filter(
|
||||
must=[
|
||||
FieldCondition(
|
||||
key="type",
|
||||
match=MatchValue(value="reference")
|
||||
)
|
||||
]
|
||||
must=[FieldCondition(key="type", match=MatchValue(value="reference"))]
|
||||
),
|
||||
limit=10
|
||||
limit=10,
|
||||
)
|
||||
|
||||
points_list = scroll_result[0]
|
||||
@@ -596,8 +573,7 @@ class TestQdrantIntegration:
|
||||
# Verify filtering worked
|
||||
assert len(points_list) > 0, "No points matched filter"
|
||||
for point in points_list:
|
||||
assert point.payload["type"] == "reference", \
|
||||
"Filter returned wrong type"
|
||||
assert point.payload["type"] == "reference", "Filter returned wrong type"
|
||||
|
||||
finally:
|
||||
with contextlib.suppress(Exception):
|
||||
@@ -607,4 +583,5 @@ class TestQdrantIntegration:
|
||||
if __name__ == "__main__":
|
||||
# Run integration tests
|
||||
import sys
|
||||
|
||||
sys.exit(pytest.main([__file__, "-v", "-m", "integration"]))
|
||||
|
||||
@@ -192,9 +192,7 @@ https://mikro-orm.io/docs/defining-entities#formulas
|
||||
|
||||
# Verify converted URLs are valid
|
||||
# In real scenario, these would be added to pending_urls and scraped
|
||||
self.assertTrue(
|
||||
len(converted_urls) > 0, "Should generate at least one URL to scrape"
|
||||
)
|
||||
self.assertTrue(len(converted_urls) > 0, "Should generate at least one URL to scrape")
|
||||
|
||||
# Verify no URLs would cause 404 (no anchors in middle of path)
|
||||
for url in converted_urls:
|
||||
|
||||
@@ -464,13 +464,15 @@ class TestValidateConfigTool(unittest.IsolatedAsyncioTestCase):
|
||||
valid_config = {
|
||||
"name": "valid-test",
|
||||
"description": "Test configuration",
|
||||
"sources": [{
|
||||
"type": "documentation",
|
||||
"base_url": "https://example.com/",
|
||||
"selectors": {"main_content": "article", "title": "h1", "code_blocks": "pre"},
|
||||
"rate_limit": 0.5,
|
||||
"max_pages": 100,
|
||||
}],
|
||||
"sources": [
|
||||
{
|
||||
"type": "documentation",
|
||||
"base_url": "https://example.com/",
|
||||
"selectors": {"main_content": "article", "title": "h1", "code_blocks": "pre"},
|
||||
"rate_limit": 0.5,
|
||||
"max_pages": 100,
|
||||
}
|
||||
],
|
||||
}
|
||||
with open(config_path, "w") as f:
|
||||
json.dump(valid_config, f)
|
||||
|
||||
@@ -19,10 +19,7 @@ import json
|
||||
# Add src to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
||||
|
||||
from skill_seekers.cli.multilang_support import (
|
||||
LanguageDetector,
|
||||
MultiLanguageManager
|
||||
)
|
||||
from skill_seekers.cli.multilang_support import LanguageDetector, MultiLanguageManager
|
||||
|
||||
|
||||
def test_detect_english():
|
||||
@@ -32,8 +29,8 @@ def test_detect_english():
|
||||
text = "This is an English document. It contains common English words."
|
||||
lang_info = detector.detect(text)
|
||||
|
||||
assert lang_info.code == 'en'
|
||||
assert lang_info.name == 'English'
|
||||
assert lang_info.code == "en"
|
||||
assert lang_info.name == "English"
|
||||
assert lang_info.confidence > 0.0
|
||||
|
||||
|
||||
@@ -44,8 +41,8 @@ def test_detect_spanish():
|
||||
text = "Este es un documento en español. Contiene palabras comunes en español."
|
||||
lang_info = detector.detect(text)
|
||||
|
||||
assert lang_info.code == 'es'
|
||||
assert lang_info.name == 'Spanish'
|
||||
assert lang_info.code == "es"
|
||||
assert lang_info.name == "Spanish"
|
||||
|
||||
|
||||
def test_detect_french():
|
||||
@@ -55,8 +52,8 @@ def test_detect_french():
|
||||
text = "Ceci est un document en français. Il contient des mots français communs."
|
||||
lang_info = detector.detect(text)
|
||||
|
||||
assert lang_info.code == 'fr'
|
||||
assert lang_info.name == 'French'
|
||||
assert lang_info.code == "fr"
|
||||
assert lang_info.name == "French"
|
||||
|
||||
|
||||
def test_detect_german():
|
||||
@@ -66,8 +63,8 @@ def test_detect_german():
|
||||
text = "Dies ist ein deutsches Dokument. Es enthält übliche deutsche Wörter."
|
||||
lang_info = detector.detect(text)
|
||||
|
||||
assert lang_info.code == 'de'
|
||||
assert lang_info.name == 'German'
|
||||
assert lang_info.code == "de"
|
||||
assert lang_info.name == "German"
|
||||
|
||||
|
||||
def test_detect_chinese():
|
||||
@@ -77,33 +74,33 @@ def test_detect_chinese():
|
||||
text = "这是一个中文文档。它包含常见的中文字符。"
|
||||
lang_info = detector.detect(text)
|
||||
|
||||
assert lang_info.code == 'zh'
|
||||
assert lang_info.name == 'Chinese'
|
||||
assert lang_info.code == "zh"
|
||||
assert lang_info.name == "Chinese"
|
||||
|
||||
|
||||
def test_detect_from_filename_dot_pattern():
|
||||
"""Test language detection from filename (file.en.md pattern)."""
|
||||
detector = LanguageDetector()
|
||||
|
||||
assert detector.detect_from_filename("README.en.md") == 'en'
|
||||
assert detector.detect_from_filename("guide.es.md") == 'es'
|
||||
assert detector.detect_from_filename("doc.fr.md") == 'fr'
|
||||
assert detector.detect_from_filename("README.en.md") == "en"
|
||||
assert detector.detect_from_filename("guide.es.md") == "es"
|
||||
assert detector.detect_from_filename("doc.fr.md") == "fr"
|
||||
|
||||
|
||||
def test_detect_from_filename_underscore_pattern():
|
||||
"""Test language detection from filename (file_en.md pattern)."""
|
||||
detector = LanguageDetector()
|
||||
|
||||
assert detector.detect_from_filename("README_en.md") == 'en'
|
||||
assert detector.detect_from_filename("guide_es.md") == 'es'
|
||||
assert detector.detect_from_filename("README_en.md") == "en"
|
||||
assert detector.detect_from_filename("guide_es.md") == "es"
|
||||
|
||||
|
||||
def test_detect_from_filename_dash_pattern():
|
||||
"""Test language detection from filename (file-en.md pattern)."""
|
||||
detector = LanguageDetector()
|
||||
|
||||
assert detector.detect_from_filename("README-en.md") == 'en'
|
||||
assert detector.detect_from_filename("guide-es.md") == 'es'
|
||||
assert detector.detect_from_filename("README-en.md") == "en"
|
||||
assert detector.detect_from_filename("guide-es.md") == "es"
|
||||
|
||||
|
||||
def test_detect_from_filename_no_match():
|
||||
@@ -118,15 +115,11 @@ def test_add_document_single_language():
|
||||
"""Test adding documents in single language."""
|
||||
manager = MultiLanguageManager()
|
||||
|
||||
manager.add_document(
|
||||
"README.md",
|
||||
"This is an English document.",
|
||||
{"category": "overview"}
|
||||
)
|
||||
manager.add_document("README.md", "This is an English document.", {"category": "overview"})
|
||||
|
||||
assert len(manager.get_languages()) == 1
|
||||
assert 'en' in manager.get_languages()
|
||||
assert manager.get_document_count('en') == 1
|
||||
assert "en" in manager.get_languages()
|
||||
assert manager.get_document_count("en") == 1
|
||||
|
||||
|
||||
def test_add_document_multiple_languages():
|
||||
@@ -138,9 +131,9 @@ def test_add_document_multiple_languages():
|
||||
manager.add_document("README.fr.md", "Ceci est français.", {})
|
||||
|
||||
assert len(manager.get_languages()) == 3
|
||||
assert 'en' in manager.get_languages()
|
||||
assert 'es' in manager.get_languages()
|
||||
assert 'fr' in manager.get_languages()
|
||||
assert "en" in manager.get_languages()
|
||||
assert "es" in manager.get_languages()
|
||||
assert "fr" in manager.get_languages()
|
||||
|
||||
|
||||
def test_force_language():
|
||||
@@ -148,15 +141,10 @@ def test_force_language():
|
||||
manager = MultiLanguageManager()
|
||||
|
||||
# Force Spanish despite English content
|
||||
manager.add_document(
|
||||
"file.md",
|
||||
"This is actually English content.",
|
||||
{},
|
||||
force_language='es'
|
||||
)
|
||||
manager.add_document("file.md", "This is actually English content.", {}, force_language="es")
|
||||
|
||||
assert 'es' in manager.get_languages()
|
||||
assert manager.get_document_count('es') == 1
|
||||
assert "es" in manager.get_languages()
|
||||
assert manager.get_document_count("es") == 1
|
||||
|
||||
|
||||
def test_filename_language_priority():
|
||||
@@ -164,14 +152,10 @@ def test_filename_language_priority():
|
||||
manager = MultiLanguageManager()
|
||||
|
||||
# Filename says Spanish, but content is English
|
||||
manager.add_document(
|
||||
"guide.es.md",
|
||||
"This is English content.",
|
||||
{}
|
||||
)
|
||||
manager.add_document("guide.es.md", "This is English content.", {})
|
||||
|
||||
# Should use filename language
|
||||
assert 'es' in manager.get_languages()
|
||||
assert "es" in manager.get_languages()
|
||||
|
||||
|
||||
def test_document_count_all():
|
||||
@@ -183,8 +167,8 @@ def test_document_count_all():
|
||||
manager.add_document("file3.es.md", "Spanish doc", {})
|
||||
|
||||
assert manager.get_document_count() == 3
|
||||
assert manager.get_document_count('en') == 2
|
||||
assert manager.get_document_count('es') == 1
|
||||
assert manager.get_document_count("en") == 2
|
||||
assert manager.get_document_count("es") == 1
|
||||
|
||||
|
||||
def test_primary_language():
|
||||
@@ -195,7 +179,7 @@ def test_primary_language():
|
||||
manager.add_document("file2.es.md", "Spanish doc", {})
|
||||
|
||||
# Primary should be first added
|
||||
assert manager.primary_language == 'en'
|
||||
assert manager.primary_language == "en"
|
||||
|
||||
|
||||
def test_translation_status():
|
||||
@@ -208,9 +192,9 @@ def test_translation_status():
|
||||
|
||||
status = manager.get_translation_status()
|
||||
|
||||
assert status.source_language == 'en'
|
||||
assert 'es' in status.translated_languages
|
||||
assert 'fr' in status.translated_languages
|
||||
assert status.source_language == "en"
|
||||
assert "es" in status.translated_languages
|
||||
assert "fr" in status.translated_languages
|
||||
assert len(status.translated_languages) == 2
|
||||
|
||||
|
||||
@@ -225,17 +209,17 @@ def test_export_by_language():
|
||||
exports = manager.export_by_language(Path(tmpdir))
|
||||
|
||||
assert len(exports) == 2
|
||||
assert 'en' in exports
|
||||
assert 'es' in exports
|
||||
assert "en" in exports
|
||||
assert "es" in exports
|
||||
|
||||
# Check files exist
|
||||
assert exports['en'].exists()
|
||||
assert exports['es'].exists()
|
||||
assert exports["en"].exists()
|
||||
assert exports["es"].exists()
|
||||
|
||||
# Check content
|
||||
en_data = json.loads(exports['en'].read_text())
|
||||
assert en_data['language'] == 'en'
|
||||
assert en_data['document_count'] == 1
|
||||
en_data = json.loads(exports["en"].read_text())
|
||||
assert en_data["language"] == "en"
|
||||
assert en_data["document_count"] == 1
|
||||
|
||||
|
||||
def test_translation_report_generation():
|
||||
@@ -268,11 +252,11 @@ def test_script_detection():
|
||||
|
||||
# English uses Latin script
|
||||
en_info = detector.detect("This is English")
|
||||
assert en_info.script == 'Latin'
|
||||
assert en_info.script == "Latin"
|
||||
|
||||
# Chinese uses Han script
|
||||
zh_info = detector.detect("这是中文")
|
||||
assert zh_info.script == 'Han'
|
||||
assert zh_info.script == "Han"
|
||||
|
||||
|
||||
def test_confidence_scoring():
|
||||
@@ -283,7 +267,7 @@ def test_confidence_scoring():
|
||||
strong_en = "The quick brown fox jumps over the lazy dog. This is clearly English."
|
||||
lang_info = detector.detect(strong_en)
|
||||
|
||||
assert lang_info.code == 'en'
|
||||
assert lang_info.code == "en"
|
||||
assert lang_info.confidence > 0.3 # Should have decent confidence
|
||||
|
||||
|
||||
@@ -294,9 +278,9 @@ def test_metadata_preservation():
|
||||
metadata = {"category": "guide", "version": "1.0"}
|
||||
manager.add_document("file.md", "English content", metadata)
|
||||
|
||||
docs = manager.documents['en']
|
||||
docs = manager.documents["en"]
|
||||
assert len(docs) == 1
|
||||
assert docs[0]['metadata'] == metadata
|
||||
assert docs[0]["metadata"] == metadata
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -14,9 +14,9 @@ class TestPresetDefinitions:
|
||||
|
||||
def test_all_presets_defined(self):
|
||||
"""Test that all expected presets are defined."""
|
||||
assert 'quick' in PRESETS
|
||||
assert 'standard' in PRESETS
|
||||
assert 'comprehensive' in PRESETS
|
||||
assert "quick" in PRESETS
|
||||
assert "standard" in PRESETS
|
||||
assert "comprehensive" in PRESETS
|
||||
assert len(PRESETS) == 3
|
||||
|
||||
def test_preset_structure(self):
|
||||
@@ -25,7 +25,7 @@ class TestPresetDefinitions:
|
||||
assert isinstance(preset, AnalysisPreset)
|
||||
assert preset.name
|
||||
assert preset.description
|
||||
assert preset.depth in ['surface', 'deep', 'full']
|
||||
assert preset.depth in ["surface", "deep", "full"]
|
||||
assert isinstance(preset.features, dict)
|
||||
assert 0 <= preset.enhance_level <= 3
|
||||
assert preset.estimated_time
|
||||
@@ -33,45 +33,45 @@ class TestPresetDefinitions:
|
||||
|
||||
def test_quick_preset(self):
|
||||
"""Test quick preset configuration."""
|
||||
quick = PRESETS['quick']
|
||||
assert quick.name == 'Quick'
|
||||
assert quick.depth == 'surface'
|
||||
quick = PRESETS["quick"]
|
||||
assert quick.name == "Quick"
|
||||
assert quick.depth == "surface"
|
||||
assert quick.enhance_level == 0
|
||||
assert quick.estimated_time == '1-2 minutes'
|
||||
assert quick.icon == '⚡'
|
||||
assert quick.estimated_time == "1-2 minutes"
|
||||
assert quick.icon == "⚡"
|
||||
# Quick should disable slow features
|
||||
assert quick.features['api_reference'] # Essential
|
||||
assert not quick.features['dependency_graph'] # Slow
|
||||
assert not quick.features['patterns'] # Slow
|
||||
assert not quick.features['test_examples'] # Slow
|
||||
assert not quick.features['how_to_guides'] # Requires AI
|
||||
assert quick.features['docs'] # Essential
|
||||
assert quick.features["api_reference"] # Essential
|
||||
assert not quick.features["dependency_graph"] # Slow
|
||||
assert not quick.features["patterns"] # Slow
|
||||
assert not quick.features["test_examples"] # Slow
|
||||
assert not quick.features["how_to_guides"] # Requires AI
|
||||
assert quick.features["docs"] # Essential
|
||||
|
||||
def test_standard_preset(self):
|
||||
"""Test standard preset configuration."""
|
||||
standard = PRESETS['standard']
|
||||
assert standard.name == 'Standard'
|
||||
assert standard.depth == 'deep'
|
||||
standard = PRESETS["standard"]
|
||||
assert standard.name == "Standard"
|
||||
assert standard.depth == "deep"
|
||||
assert standard.enhance_level == 1
|
||||
assert standard.estimated_time == '5-10 minutes'
|
||||
assert standard.icon == '🎯'
|
||||
assert standard.estimated_time == "5-10 minutes"
|
||||
assert standard.icon == "🎯"
|
||||
# Standard should enable core features
|
||||
assert standard.features['api_reference']
|
||||
assert standard.features['dependency_graph']
|
||||
assert standard.features['patterns']
|
||||
assert standard.features['test_examples']
|
||||
assert not standard.features['how_to_guides'] # Slow
|
||||
assert standard.features['config_patterns']
|
||||
assert standard.features['docs']
|
||||
assert standard.features["api_reference"]
|
||||
assert standard.features["dependency_graph"]
|
||||
assert standard.features["patterns"]
|
||||
assert standard.features["test_examples"]
|
||||
assert not standard.features["how_to_guides"] # Slow
|
||||
assert standard.features["config_patterns"]
|
||||
assert standard.features["docs"]
|
||||
|
||||
def test_comprehensive_preset(self):
|
||||
"""Test comprehensive preset configuration."""
|
||||
comprehensive = PRESETS['comprehensive']
|
||||
assert comprehensive.name == 'Comprehensive'
|
||||
assert comprehensive.depth == 'full'
|
||||
comprehensive = PRESETS["comprehensive"]
|
||||
assert comprehensive.name == "Comprehensive"
|
||||
assert comprehensive.depth == "full"
|
||||
assert comprehensive.enhance_level == 3
|
||||
assert comprehensive.estimated_time == '20-60 minutes'
|
||||
assert comprehensive.icon == '🚀'
|
||||
assert comprehensive.estimated_time == "20-60 minutes"
|
||||
assert comprehensive.icon == "🚀"
|
||||
# Comprehensive should enable ALL features
|
||||
assert all(comprehensive.features.values())
|
||||
|
||||
@@ -81,44 +81,44 @@ class TestPresetManager:
|
||||
|
||||
def test_get_preset(self):
|
||||
"""Test PresetManager.get_preset()."""
|
||||
quick = PresetManager.get_preset('quick')
|
||||
quick = PresetManager.get_preset("quick")
|
||||
assert quick is not None
|
||||
assert quick.name == 'Quick'
|
||||
assert quick.depth == 'surface'
|
||||
assert quick.name == "Quick"
|
||||
assert quick.depth == "surface"
|
||||
|
||||
# Case insensitive
|
||||
standard = PresetManager.get_preset('STANDARD')
|
||||
standard = PresetManager.get_preset("STANDARD")
|
||||
assert standard is not None
|
||||
assert standard.name == 'Standard'
|
||||
assert standard.name == "Standard"
|
||||
|
||||
def test_get_preset_invalid(self):
|
||||
"""Test PresetManager.get_preset() with invalid name."""
|
||||
invalid = PresetManager.get_preset('nonexistent')
|
||||
invalid = PresetManager.get_preset("nonexistent")
|
||||
assert invalid is None
|
||||
|
||||
def test_list_presets(self):
|
||||
"""Test PresetManager.list_presets()."""
|
||||
presets = PresetManager.list_presets()
|
||||
assert len(presets) == 3
|
||||
assert 'quick' in presets
|
||||
assert 'standard' in presets
|
||||
assert 'comprehensive' in presets
|
||||
assert "quick" in presets
|
||||
assert "standard" in presets
|
||||
assert "comprehensive" in presets
|
||||
|
||||
def test_format_preset_help(self):
|
||||
"""Test PresetManager.format_preset_help()."""
|
||||
help_text = PresetManager.format_preset_help()
|
||||
assert 'Available presets:' in help_text
|
||||
assert '⚡ quick' in help_text
|
||||
assert '🎯 standard' in help_text
|
||||
assert '🚀 comprehensive' in help_text
|
||||
assert '1-2 minutes' in help_text
|
||||
assert '5-10 minutes' in help_text
|
||||
assert '20-60 minutes' in help_text
|
||||
assert "Available presets:" in help_text
|
||||
assert "⚡ quick" in help_text
|
||||
assert "🎯 standard" in help_text
|
||||
assert "🚀 comprehensive" in help_text
|
||||
assert "1-2 minutes" in help_text
|
||||
assert "5-10 minutes" in help_text
|
||||
assert "20-60 minutes" in help_text
|
||||
|
||||
def test_get_default_preset(self):
|
||||
"""Test PresetManager.get_default_preset()."""
|
||||
default = PresetManager.get_default_preset()
|
||||
assert default == 'standard'
|
||||
assert default == "standard"
|
||||
|
||||
|
||||
class TestPresetApplication:
|
||||
@@ -126,85 +126,85 @@ class TestPresetApplication:
|
||||
|
||||
def test_apply_preset_quick(self):
|
||||
"""Test applying quick preset."""
|
||||
args = {'directory': '/tmp/test'}
|
||||
updated = PresetManager.apply_preset('quick', args)
|
||||
args = {"directory": "/tmp/test"}
|
||||
updated = PresetManager.apply_preset("quick", args)
|
||||
|
||||
assert updated['depth'] == 'surface'
|
||||
assert updated['enhance_level'] == 0
|
||||
assert updated['skip_patterns'] # Quick disables patterns
|
||||
assert updated['skip_dependency_graph'] # Quick disables dep graph
|
||||
assert updated['skip_test_examples'] # Quick disables tests
|
||||
assert updated['skip_how_to_guides'] # Quick disables guides
|
||||
assert not updated['skip_api_reference'] # Quick enables API ref
|
||||
assert not updated['skip_docs'] # Quick enables docs
|
||||
assert updated["depth"] == "surface"
|
||||
assert updated["enhance_level"] == 0
|
||||
assert updated["skip_patterns"] # Quick disables patterns
|
||||
assert updated["skip_dependency_graph"] # Quick disables dep graph
|
||||
assert updated["skip_test_examples"] # Quick disables tests
|
||||
assert updated["skip_how_to_guides"] # Quick disables guides
|
||||
assert not updated["skip_api_reference"] # Quick enables API ref
|
||||
assert not updated["skip_docs"] # Quick enables docs
|
||||
|
||||
def test_apply_preset_standard(self):
|
||||
"""Test applying standard preset."""
|
||||
args = {'directory': '/tmp/test'}
|
||||
updated = PresetManager.apply_preset('standard', args)
|
||||
args = {"directory": "/tmp/test"}
|
||||
updated = PresetManager.apply_preset("standard", args)
|
||||
|
||||
assert updated['depth'] == 'deep'
|
||||
assert updated['enhance_level'] == 1
|
||||
assert not updated['skip_patterns'] # Standard enables patterns
|
||||
assert not updated['skip_dependency_graph'] # Standard enables dep graph
|
||||
assert not updated['skip_test_examples'] # Standard enables tests
|
||||
assert updated['skip_how_to_guides'] # Standard disables guides (slow)
|
||||
assert not updated['skip_api_reference'] # Standard enables API ref
|
||||
assert not updated['skip_docs'] # Standard enables docs
|
||||
assert updated["depth"] == "deep"
|
||||
assert updated["enhance_level"] == 1
|
||||
assert not updated["skip_patterns"] # Standard enables patterns
|
||||
assert not updated["skip_dependency_graph"] # Standard enables dep graph
|
||||
assert not updated["skip_test_examples"] # Standard enables tests
|
||||
assert updated["skip_how_to_guides"] # Standard disables guides (slow)
|
||||
assert not updated["skip_api_reference"] # Standard enables API ref
|
||||
assert not updated["skip_docs"] # Standard enables docs
|
||||
|
||||
def test_apply_preset_comprehensive(self):
|
||||
"""Test applying comprehensive preset."""
|
||||
args = {'directory': '/tmp/test'}
|
||||
updated = PresetManager.apply_preset('comprehensive', args)
|
||||
args = {"directory": "/tmp/test"}
|
||||
updated = PresetManager.apply_preset("comprehensive", args)
|
||||
|
||||
assert updated['depth'] == 'full'
|
||||
assert updated['enhance_level'] == 3
|
||||
assert updated["depth"] == "full"
|
||||
assert updated["enhance_level"] == 3
|
||||
# Comprehensive enables ALL features
|
||||
assert not updated['skip_patterns']
|
||||
assert not updated['skip_dependency_graph']
|
||||
assert not updated['skip_test_examples']
|
||||
assert not updated['skip_how_to_guides']
|
||||
assert not updated['skip_api_reference']
|
||||
assert not updated['skip_config_patterns']
|
||||
assert not updated['skip_docs']
|
||||
assert not updated["skip_patterns"]
|
||||
assert not updated["skip_dependency_graph"]
|
||||
assert not updated["skip_test_examples"]
|
||||
assert not updated["skip_how_to_guides"]
|
||||
assert not updated["skip_api_reference"]
|
||||
assert not updated["skip_config_patterns"]
|
||||
assert not updated["skip_docs"]
|
||||
|
||||
def test_cli_overrides_preset(self):
|
||||
"""Test that CLI args override preset defaults."""
|
||||
args = {
|
||||
'directory': '/tmp/test',
|
||||
'enhance_level': 2, # Override preset default
|
||||
'skip_patterns': False # Override preset default
|
||||
"directory": "/tmp/test",
|
||||
"enhance_level": 2, # Override preset default
|
||||
"skip_patterns": False, # Override preset default
|
||||
}
|
||||
|
||||
updated = PresetManager.apply_preset('quick', args)
|
||||
updated = PresetManager.apply_preset("quick", args)
|
||||
|
||||
# Preset says enhance_level=0, but CLI said 2
|
||||
assert updated['enhance_level'] == 2 # CLI wins
|
||||
assert updated["enhance_level"] == 2 # CLI wins
|
||||
|
||||
# Preset says skip_patterns=True (disabled), but CLI said False (enabled)
|
||||
assert not updated['skip_patterns'] # CLI wins
|
||||
assert not updated["skip_patterns"] # CLI wins
|
||||
|
||||
def test_apply_preset_preserves_args(self):
|
||||
"""Test that apply_preset preserves existing args."""
|
||||
args = {
|
||||
'directory': '/tmp/test',
|
||||
'output': 'custom_output/',
|
||||
'languages': 'Python,JavaScript'
|
||||
"directory": "/tmp/test",
|
||||
"output": "custom_output/",
|
||||
"languages": "Python,JavaScript",
|
||||
}
|
||||
|
||||
updated = PresetManager.apply_preset('standard', args)
|
||||
updated = PresetManager.apply_preset("standard", args)
|
||||
|
||||
# Existing args should be preserved
|
||||
assert updated['directory'] == '/tmp/test'
|
||||
assert updated['output'] == 'custom_output/'
|
||||
assert updated['languages'] == 'Python,JavaScript'
|
||||
assert updated["directory"] == "/tmp/test"
|
||||
assert updated["output"] == "custom_output/"
|
||||
assert updated["languages"] == "Python,JavaScript"
|
||||
|
||||
def test_apply_preset_invalid(self):
|
||||
"""Test applying invalid preset raises error."""
|
||||
args = {'directory': '/tmp/test'}
|
||||
args = {"directory": "/tmp/test"}
|
||||
|
||||
with pytest.raises(ValueError, match="Unknown preset: nonexistent"):
|
||||
PresetManager.apply_preset('nonexistent', args)
|
||||
PresetManager.apply_preset("nonexistent", args)
|
||||
|
||||
|
||||
class TestDeprecationWarnings:
|
||||
@@ -215,12 +215,7 @@ class TestDeprecationWarnings:
|
||||
from skill_seekers.cli.codebase_scraper import _check_deprecated_flags
|
||||
import argparse
|
||||
|
||||
args = argparse.Namespace(
|
||||
quick=True,
|
||||
comprehensive=False,
|
||||
depth=None,
|
||||
ai_mode='auto'
|
||||
)
|
||||
args = argparse.Namespace(quick=True, comprehensive=False, depth=None, ai_mode="auto")
|
||||
|
||||
_check_deprecated_flags(args)
|
||||
|
||||
@@ -235,12 +230,7 @@ class TestDeprecationWarnings:
|
||||
from skill_seekers.cli.codebase_scraper import _check_deprecated_flags
|
||||
import argparse
|
||||
|
||||
args = argparse.Namespace(
|
||||
quick=False,
|
||||
comprehensive=True,
|
||||
depth=None,
|
||||
ai_mode='auto'
|
||||
)
|
||||
args = argparse.Namespace(quick=False, comprehensive=True, depth=None, ai_mode="auto")
|
||||
|
||||
_check_deprecated_flags(args)
|
||||
|
||||
@@ -255,12 +245,7 @@ class TestDeprecationWarnings:
|
||||
from skill_seekers.cli.codebase_scraper import _check_deprecated_flags
|
||||
import argparse
|
||||
|
||||
args = argparse.Namespace(
|
||||
quick=False,
|
||||
comprehensive=False,
|
||||
depth='full',
|
||||
ai_mode='auto'
|
||||
)
|
||||
args = argparse.Namespace(quick=False, comprehensive=False, depth="full", ai_mode="auto")
|
||||
|
||||
_check_deprecated_flags(args)
|
||||
|
||||
@@ -275,12 +260,7 @@ class TestDeprecationWarnings:
|
||||
from skill_seekers.cli.codebase_scraper import _check_deprecated_flags
|
||||
import argparse
|
||||
|
||||
args = argparse.Namespace(
|
||||
quick=False,
|
||||
comprehensive=False,
|
||||
depth=None,
|
||||
ai_mode='api'
|
||||
)
|
||||
args = argparse.Namespace(quick=False, comprehensive=False, depth=None, ai_mode="api")
|
||||
|
||||
_check_deprecated_flags(args)
|
||||
|
||||
@@ -295,12 +275,7 @@ class TestDeprecationWarnings:
|
||||
from skill_seekers.cli.codebase_scraper import _check_deprecated_flags
|
||||
import argparse
|
||||
|
||||
args = argparse.Namespace(
|
||||
quick=True,
|
||||
comprehensive=False,
|
||||
depth='surface',
|
||||
ai_mode='local'
|
||||
)
|
||||
args = argparse.Namespace(quick=True, comprehensive=False, depth="surface", ai_mode="local")
|
||||
|
||||
_check_deprecated_flags(args)
|
||||
|
||||
@@ -317,12 +292,7 @@ class TestDeprecationWarnings:
|
||||
from skill_seekers.cli.codebase_scraper import _check_deprecated_flags
|
||||
import argparse
|
||||
|
||||
args = argparse.Namespace(
|
||||
quick=False,
|
||||
comprehensive=False,
|
||||
depth=None,
|
||||
ai_mode='auto'
|
||||
)
|
||||
args = argparse.Namespace(quick=False, comprehensive=False, depth=None, ai_mode="auto")
|
||||
|
||||
_check_deprecated_flags(args)
|
||||
|
||||
@@ -337,31 +307,31 @@ class TestBackwardCompatibility:
|
||||
def test_old_flags_still_work(self):
|
||||
"""Test that old flags still work (with warnings)."""
|
||||
# --quick flag
|
||||
args = {'quick': True}
|
||||
updated = PresetManager.apply_preset('quick', args)
|
||||
assert updated['depth'] == 'surface'
|
||||
args = {"quick": True}
|
||||
updated = PresetManager.apply_preset("quick", args)
|
||||
assert updated["depth"] == "surface"
|
||||
|
||||
# --comprehensive flag
|
||||
args = {'comprehensive': True}
|
||||
updated = PresetManager.apply_preset('comprehensive', args)
|
||||
assert updated['depth'] == 'full'
|
||||
args = {"comprehensive": True}
|
||||
updated = PresetManager.apply_preset("comprehensive", args)
|
||||
assert updated["depth"] == "full"
|
||||
|
||||
def test_preset_flag_preferred(self):
|
||||
"""Test that --preset flag is the recommended way."""
|
||||
# Using --preset quick
|
||||
args = {'preset': 'quick'}
|
||||
updated = PresetManager.apply_preset('quick', args)
|
||||
assert updated['depth'] == 'surface'
|
||||
args = {"preset": "quick"}
|
||||
updated = PresetManager.apply_preset("quick", args)
|
||||
assert updated["depth"] == "surface"
|
||||
|
||||
# Using --preset standard
|
||||
args = {'preset': 'standard'}
|
||||
updated = PresetManager.apply_preset('standard', args)
|
||||
assert updated['depth'] == 'deep'
|
||||
args = {"preset": "standard"}
|
||||
updated = PresetManager.apply_preset("standard", args)
|
||||
assert updated["depth"] == "deep"
|
||||
|
||||
# Using --preset comprehensive
|
||||
args = {'preset': 'comprehensive'}
|
||||
updated = PresetManager.apply_preset('comprehensive', args)
|
||||
assert updated['depth'] == 'full'
|
||||
args = {"preset": "comprehensive"}
|
||||
updated = PresetManager.apply_preset("comprehensive", args)
|
||||
assert updated["depth"] == "full"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -19,10 +19,7 @@ import tempfile
|
||||
# Add src to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
||||
|
||||
from skill_seekers.cli.quality_metrics import (
|
||||
QualityAnalyzer,
|
||||
MetricLevel
|
||||
)
|
||||
from skill_seekers.cli.quality_metrics import QualityAnalyzer, MetricLevel
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -176,9 +173,9 @@ def test_calculate_statistics(complete_skill_dir):
|
||||
analyzer = QualityAnalyzer(complete_skill_dir)
|
||||
stats = analyzer.calculate_statistics()
|
||||
|
||||
assert stats['total_files'] > 0
|
||||
assert stats['markdown_files'] > 0
|
||||
assert stats['total_words'] > 0
|
||||
assert stats["total_files"] > 0
|
||||
assert stats["markdown_files"] > 0
|
||||
assert stats["total_words"] > 0
|
||||
|
||||
|
||||
def test_overall_score_calculation():
|
||||
@@ -197,9 +194,7 @@ def test_overall_score_calculation():
|
||||
coverage = 70.0
|
||||
health = 85.0
|
||||
|
||||
overall = analyzer.calculate_overall_score(
|
||||
completeness, accuracy, coverage, health
|
||||
)
|
||||
overall = analyzer.calculate_overall_score(completeness, accuracy, coverage, health)
|
||||
|
||||
assert overall.completeness == 80.0
|
||||
assert overall.accuracy == 90.0
|
||||
@@ -218,13 +213,13 @@ def test_grade_assignment():
|
||||
|
||||
# Test various scores
|
||||
score_95 = analyzer.calculate_overall_score(95, 95, 95, 95)
|
||||
assert score_95.grade == 'A+'
|
||||
assert score_95.grade == "A+"
|
||||
|
||||
score_85 = analyzer.calculate_overall_score(85, 85, 85, 85)
|
||||
assert score_85.grade in ['A-', 'B+']
|
||||
assert score_85.grade in ["A-", "B+"]
|
||||
|
||||
score_70 = analyzer.calculate_overall_score(70, 70, 70, 70)
|
||||
assert score_70.grade in ['B-', 'C+', 'C']
|
||||
assert score_70.grade in ["B-", "C+", "C"]
|
||||
|
||||
|
||||
def test_generate_recommendations():
|
||||
@@ -240,7 +235,7 @@ def test_generate_recommendations():
|
||||
recommendations = analyzer.generate_recommendations(score)
|
||||
|
||||
assert len(recommendations) > 0
|
||||
assert any('completeness' in r.lower() for r in recommendations)
|
||||
assert any("completeness" in r.lower() for r in recommendations)
|
||||
|
||||
|
||||
def test_generate_report(complete_skill_dir):
|
||||
|
||||
@@ -28,7 +28,7 @@ class TestRAGChunker:
|
||||
chunk_overlap=100,
|
||||
preserve_code_blocks=False,
|
||||
preserve_paragraphs=False,
|
||||
min_chunk_size=50
|
||||
min_chunk_size=50,
|
||||
)
|
||||
|
||||
assert chunker.chunk_size == 1024
|
||||
@@ -180,13 +180,17 @@ class TestRAGChunker:
|
||||
|
||||
# Create SKILL.md
|
||||
skill_md = skill_dir / "SKILL.md"
|
||||
skill_md.write_text("# Main Skill\n\nThis is the main skill content.\n\nWith multiple paragraphs.")
|
||||
skill_md.write_text(
|
||||
"# Main Skill\n\nThis is the main skill content.\n\nWith multiple paragraphs."
|
||||
)
|
||||
|
||||
# Create references directory with files
|
||||
references_dir = skill_dir / "references"
|
||||
references_dir.mkdir()
|
||||
|
||||
(references_dir / "getting_started.md").write_text("# Getting Started\n\nQuick start guide.")
|
||||
(references_dir / "getting_started.md").write_text(
|
||||
"# Getting Started\n\nQuick start guide."
|
||||
)
|
||||
(references_dir / "api.md").write_text("# API Reference\n\nAPI documentation.")
|
||||
|
||||
# Chunk skill
|
||||
@@ -209,7 +213,7 @@ class TestRAGChunker:
|
||||
{
|
||||
"chunk_id": "test_0",
|
||||
"page_content": "Test content",
|
||||
"metadata": {"source": "test", "chunk_index": 0}
|
||||
"metadata": {"source": "test", "chunk_index": 0},
|
||||
}
|
||||
]
|
||||
|
||||
@@ -340,7 +344,7 @@ class TestRAGChunker:
|
||||
metadata = {
|
||||
"source": "react-docs",
|
||||
"category": "hooks",
|
||||
"url": "https://react.dev/reference/react"
|
||||
"url": "https://react.dev/reference/react",
|
||||
}
|
||||
|
||||
chunks = chunker.chunk_document(text, metadata)
|
||||
@@ -379,10 +383,7 @@ class TestRAGChunkerIntegration:
|
||||
|
||||
# Convert to LangChain Documents
|
||||
docs = [
|
||||
Document(
|
||||
page_content=chunk["page_content"],
|
||||
metadata=chunk["metadata"]
|
||||
)
|
||||
Document(page_content=chunk["page_content"], metadata=chunk["metadata"])
|
||||
for chunk in chunks
|
||||
]
|
||||
|
||||
@@ -407,11 +408,7 @@ class TestRAGChunkerIntegration:
|
||||
|
||||
# Convert to LlamaIndex TextNodes
|
||||
nodes = [
|
||||
TextNode(
|
||||
text=chunk["page_content"],
|
||||
metadata=chunk["metadata"],
|
||||
id_=chunk["chunk_id"]
|
||||
)
|
||||
TextNode(text=chunk["page_content"], metadata=chunk["metadata"], id_=chunk["chunk_id"])
|
||||
for chunk in chunks
|
||||
]
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ pytest.importorskip("mcp.server")
|
||||
# Check if starlette is available
|
||||
try:
|
||||
from starlette.testclient import TestClient
|
||||
|
||||
STARLETTE_AVAILABLE = True
|
||||
except ImportError:
|
||||
STARLETTE_AVAILABLE = False
|
||||
@@ -21,8 +22,7 @@ from skill_seekers.mcp.server_fastmcp import mcp
|
||||
|
||||
# Skip all tests if starlette is not installed
|
||||
pytestmark = pytest.mark.skipif(
|
||||
not STARLETTE_AVAILABLE,
|
||||
reason="starlette not installed (pip install starlette httpx)"
|
||||
not STARLETTE_AVAILABLE, reason="starlette not installed (pip install starlette httpx)"
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -18,10 +18,7 @@ import tempfile
|
||||
# Add src to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
||||
|
||||
from skill_seekers.cli.streaming_ingest import (
|
||||
StreamingIngester,
|
||||
IngestionProgress
|
||||
)
|
||||
from skill_seekers.cli.streaming_ingest import StreamingIngester, IngestionProgress
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -158,11 +155,13 @@ def test_progress_tracking(temp_skill_dir):
|
||||
progress_updates = []
|
||||
|
||||
def callback(progress: IngestionProgress):
|
||||
progress_updates.append({
|
||||
"processed_docs": progress.processed_documents,
|
||||
"processed_chunks": progress.processed_chunks,
|
||||
"percent": progress.progress_percent
|
||||
})
|
||||
progress_updates.append(
|
||||
{
|
||||
"processed_docs": progress.processed_documents,
|
||||
"processed_chunks": progress.processed_chunks,
|
||||
"percent": progress.progress_percent,
|
||||
}
|
||||
)
|
||||
|
||||
list(ingester.stream_skill_directory(temp_skill_dir, callback=callback))
|
||||
|
||||
@@ -171,7 +170,9 @@ def test_progress_tracking(temp_skill_dir):
|
||||
|
||||
# Progress should increase
|
||||
for i in range(len(progress_updates) - 1):
|
||||
assert progress_updates[i + 1]["processed_chunks"] >= progress_updates[i]["processed_chunks"]
|
||||
assert (
|
||||
progress_updates[i + 1]["processed_chunks"] >= progress_updates[i]["processed_chunks"]
|
||||
)
|
||||
|
||||
|
||||
def test_checkpoint_save_load():
|
||||
@@ -189,7 +190,7 @@ def test_checkpoint_save_load():
|
||||
processed_chunks=50,
|
||||
failed_chunks=2,
|
||||
bytes_processed=10000,
|
||||
start_time=1234567890.0
|
||||
start_time=1234567890.0,
|
||||
)
|
||||
|
||||
# Save checkpoint
|
||||
@@ -215,7 +216,7 @@ def test_format_progress():
|
||||
processed_chunks=50,
|
||||
failed_chunks=0,
|
||||
bytes_processed=10000,
|
||||
start_time=0.0
|
||||
start_time=0.0,
|
||||
)
|
||||
|
||||
progress_str = ingester.format_progress()
|
||||
@@ -245,17 +246,19 @@ def test_chunk_size_validation():
|
||||
|
||||
# Small chunks
|
||||
ingester_small = StreamingIngester(chunk_size=100, chunk_overlap=10)
|
||||
chunks_small = list(ingester_small.chunk_document(
|
||||
content,
|
||||
{"source": "test", "file": "test.md", "category": "test"}
|
||||
))
|
||||
chunks_small = list(
|
||||
ingester_small.chunk_document(
|
||||
content, {"source": "test", "file": "test.md", "category": "test"}
|
||||
)
|
||||
)
|
||||
|
||||
# Large chunks
|
||||
ingester_large = StreamingIngester(chunk_size=500, chunk_overlap=50)
|
||||
chunks_large = list(ingester_large.chunk_document(
|
||||
content,
|
||||
{"source": "test", "file": "test.md", "category": "test"}
|
||||
))
|
||||
chunks_large = list(
|
||||
ingester_large.chunk_document(
|
||||
content, {"source": "test", "file": "test.md", "category": "test"}
|
||||
)
|
||||
)
|
||||
|
||||
# Smaller chunk size should create more chunks
|
||||
assert len(chunks_small) > len(chunks_large)
|
||||
|
||||
@@ -21,9 +21,9 @@ def sample_chroma_package(tmp_path):
|
||||
"metadatas": [
|
||||
{"source": "test", "category": "overview", "file": "SKILL.md"},
|
||||
{"source": "test", "category": "api", "file": "API.md"},
|
||||
{"source": "test", "category": "guide", "file": "GUIDE.md"}
|
||||
{"source": "test", "category": "guide", "file": "GUIDE.md"},
|
||||
],
|
||||
"ids": ["id1", "id2", "id3"]
|
||||
"ids": ["id1", "id2", "id3"],
|
||||
}
|
||||
|
||||
package_path = tmp_path / "test-chroma.json"
|
||||
@@ -43,8 +43,8 @@ def sample_weaviate_package(tmp_path):
|
||||
"properties": [
|
||||
{"name": "content", "dataType": ["text"]},
|
||||
{"name": "source", "dataType": ["string"]},
|
||||
{"name": "category", "dataType": ["string"]}
|
||||
]
|
||||
{"name": "category", "dataType": ["string"]},
|
||||
],
|
||||
},
|
||||
"objects": [
|
||||
{
|
||||
@@ -52,18 +52,14 @@ def sample_weaviate_package(tmp_path):
|
||||
"properties": {
|
||||
"content": "Test content 1",
|
||||
"source": "test",
|
||||
"category": "overview"
|
||||
}
|
||||
"category": "overview",
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": "00000000-0000-0000-0000-000000000002",
|
||||
"properties": {
|
||||
"content": "Test content 2",
|
||||
"source": "test",
|
||||
"category": "api"
|
||||
}
|
||||
}
|
||||
]
|
||||
"properties": {"content": "Test content 2", "source": "test", "category": "api"},
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
package_path = tmp_path / "test-weaviate.json"
|
||||
@@ -76,40 +72,41 @@ class TestChromaUploadBasics:
|
||||
|
||||
def test_chroma_adaptor_exists(self):
|
||||
"""Test that ChromaDB adaptor can be loaded."""
|
||||
adaptor = get_adaptor('chroma')
|
||||
adaptor = get_adaptor("chroma")
|
||||
assert adaptor is not None
|
||||
assert adaptor.PLATFORM == 'chroma'
|
||||
assert adaptor.PLATFORM == "chroma"
|
||||
|
||||
def test_chroma_upload_without_chromadb_installed(self, sample_chroma_package):
|
||||
"""Test upload fails gracefully without chromadb installed."""
|
||||
adaptor = get_adaptor('chroma')
|
||||
adaptor = get_adaptor("chroma")
|
||||
|
||||
# Temporarily remove chromadb if it exists
|
||||
import sys
|
||||
chromadb_backup = sys.modules.get('chromadb')
|
||||
if 'chromadb' in sys.modules:
|
||||
del sys.modules['chromadb']
|
||||
|
||||
chromadb_backup = sys.modules.get("chromadb")
|
||||
if "chromadb" in sys.modules:
|
||||
del sys.modules["chromadb"]
|
||||
|
||||
try:
|
||||
result = adaptor.upload(sample_chroma_package)
|
||||
|
||||
assert result['success'] is False
|
||||
assert 'chromadb not installed' in result['message']
|
||||
assert 'pip install chromadb' in result['message']
|
||||
assert result["success"] is False
|
||||
assert "chromadb not installed" in result["message"]
|
||||
assert "pip install chromadb" in result["message"]
|
||||
finally:
|
||||
if chromadb_backup:
|
||||
sys.modules['chromadb'] = chromadb_backup
|
||||
sys.modules["chromadb"] = chromadb_backup
|
||||
|
||||
def test_chroma_upload_api_signature(self, sample_chroma_package):
|
||||
"""Test ChromaDB upload has correct API signature."""
|
||||
adaptor = get_adaptor('chroma')
|
||||
adaptor = get_adaptor("chroma")
|
||||
|
||||
# Verify upload method exists and accepts kwargs
|
||||
assert hasattr(adaptor, 'upload')
|
||||
assert hasattr(adaptor, "upload")
|
||||
assert callable(adaptor.upload)
|
||||
|
||||
# Verify adaptor methods exist
|
||||
assert hasattr(adaptor, '_generate_openai_embeddings')
|
||||
assert hasattr(adaptor, "_generate_openai_embeddings")
|
||||
|
||||
|
||||
class TestWeaviateUploadBasics:
|
||||
@@ -117,40 +114,41 @@ class TestWeaviateUploadBasics:
|
||||
|
||||
def test_weaviate_adaptor_exists(self):
|
||||
"""Test that Weaviate adaptor can be loaded."""
|
||||
adaptor = get_adaptor('weaviate')
|
||||
adaptor = get_adaptor("weaviate")
|
||||
assert adaptor is not None
|
||||
assert adaptor.PLATFORM == 'weaviate'
|
||||
assert adaptor.PLATFORM == "weaviate"
|
||||
|
||||
def test_weaviate_upload_without_weaviate_installed(self, sample_weaviate_package):
|
||||
"""Test upload fails gracefully without weaviate-client installed."""
|
||||
adaptor = get_adaptor('weaviate')
|
||||
adaptor = get_adaptor("weaviate")
|
||||
|
||||
# Temporarily remove weaviate if it exists
|
||||
import sys
|
||||
weaviate_backup = sys.modules.get('weaviate')
|
||||
if 'weaviate' in sys.modules:
|
||||
del sys.modules['weaviate']
|
||||
|
||||
weaviate_backup = sys.modules.get("weaviate")
|
||||
if "weaviate" in sys.modules:
|
||||
del sys.modules["weaviate"]
|
||||
|
||||
try:
|
||||
result = adaptor.upload(sample_weaviate_package)
|
||||
|
||||
assert result['success'] is False
|
||||
assert 'weaviate-client not installed' in result['message']
|
||||
assert 'pip install weaviate-client' in result['message']
|
||||
assert result["success"] is False
|
||||
assert "weaviate-client not installed" in result["message"]
|
||||
assert "pip install weaviate-client" in result["message"]
|
||||
finally:
|
||||
if weaviate_backup:
|
||||
sys.modules['weaviate'] = weaviate_backup
|
||||
sys.modules["weaviate"] = weaviate_backup
|
||||
|
||||
def test_weaviate_upload_api_signature(self, sample_weaviate_package):
|
||||
"""Test Weaviate upload has correct API signature."""
|
||||
adaptor = get_adaptor('weaviate')
|
||||
adaptor = get_adaptor("weaviate")
|
||||
|
||||
# Verify upload method exists and accepts kwargs
|
||||
assert hasattr(adaptor, 'upload')
|
||||
assert hasattr(adaptor, "upload")
|
||||
assert callable(adaptor.upload)
|
||||
|
||||
# Verify adaptor methods exist
|
||||
assert hasattr(adaptor, '_generate_openai_embeddings')
|
||||
assert hasattr(adaptor, "_generate_openai_embeddings")
|
||||
|
||||
|
||||
class TestPackageStructure:
|
||||
@@ -161,30 +159,30 @@ class TestPackageStructure:
|
||||
with open(sample_chroma_package) as f:
|
||||
data = json.load(f)
|
||||
|
||||
assert 'collection_name' in data
|
||||
assert 'documents' in data
|
||||
assert 'metadatas' in data
|
||||
assert 'ids' in data
|
||||
assert len(data['documents']) == len(data['metadatas']) == len(data['ids'])
|
||||
assert "collection_name" in data
|
||||
assert "documents" in data
|
||||
assert "metadatas" in data
|
||||
assert "ids" in data
|
||||
assert len(data["documents"]) == len(data["metadatas"]) == len(data["ids"])
|
||||
|
||||
def test_weaviate_package_structure(self, sample_weaviate_package):
|
||||
"""Test Weaviate package has required fields."""
|
||||
with open(sample_weaviate_package) as f:
|
||||
data = json.load(f)
|
||||
|
||||
assert 'class_name' in data
|
||||
assert 'schema' in data
|
||||
assert 'objects' in data
|
||||
assert len(data['objects']) == 2
|
||||
assert "class_name" in data
|
||||
assert "schema" in data
|
||||
assert "objects" in data
|
||||
assert len(data["objects"]) == 2
|
||||
|
||||
# Verify schema structure
|
||||
assert 'class' in data['schema']
|
||||
assert 'properties' in data['schema']
|
||||
assert "class" in data["schema"]
|
||||
assert "properties" in data["schema"]
|
||||
|
||||
# Verify object structure
|
||||
for obj in data['objects']:
|
||||
assert 'id' in obj
|
||||
assert 'properties' in obj
|
||||
for obj in data["objects"]:
|
||||
assert "id" in obj
|
||||
assert "properties" in obj
|
||||
|
||||
|
||||
class TestUploadCommandIntegration:
|
||||
@@ -199,25 +197,26 @@ class TestUploadCommandIntegration:
|
||||
|
||||
# Verify it accepts kwargs for vector DBs
|
||||
import inspect
|
||||
|
||||
sig = inspect.signature(upload_skill_api)
|
||||
params = list(sig.parameters.keys())
|
||||
assert 'package_path' in params
|
||||
assert 'target' in params
|
||||
assert 'api_key' in params
|
||||
assert 'kwargs' in params # For platform-specific options
|
||||
assert "package_path" in params
|
||||
assert "target" in params
|
||||
assert "api_key" in params
|
||||
assert "kwargs" in params # For platform-specific options
|
||||
|
||||
def test_upload_command_supports_chroma(self):
|
||||
"""Test upload command recognizes chroma as target."""
|
||||
|
||||
# This should not raise ValueError
|
||||
adaptor = get_adaptor('chroma')
|
||||
adaptor = get_adaptor("chroma")
|
||||
assert adaptor is not None
|
||||
|
||||
def test_upload_command_supports_weaviate(self):
|
||||
"""Test upload command recognizes weaviate as target."""
|
||||
|
||||
# This should not raise ValueError
|
||||
adaptor = get_adaptor('weaviate')
|
||||
adaptor = get_adaptor("weaviate")
|
||||
assert adaptor is not None
|
||||
|
||||
|
||||
@@ -226,7 +225,7 @@ class TestErrorHandling:
|
||||
|
||||
def test_chroma_handles_missing_file(self, tmp_path):
|
||||
"""Test ChromaDB upload handles missing files gracefully."""
|
||||
adaptor = get_adaptor('chroma')
|
||||
adaptor = get_adaptor("chroma")
|
||||
|
||||
missing_file = tmp_path / "nonexistent.json"
|
||||
|
||||
@@ -234,14 +233,14 @@ class TestErrorHandling:
|
||||
try:
|
||||
result = adaptor.upload(missing_file)
|
||||
# If it returns a dict, it should indicate failure
|
||||
assert result['success'] is False
|
||||
assert result["success"] is False
|
||||
except FileNotFoundError:
|
||||
# This is also acceptable
|
||||
pass
|
||||
|
||||
def test_weaviate_handles_missing_file(self, tmp_path):
|
||||
"""Test Weaviate upload handles missing files gracefully."""
|
||||
adaptor = get_adaptor('weaviate')
|
||||
adaptor = get_adaptor("weaviate")
|
||||
|
||||
missing_file = tmp_path / "nonexistent.json"
|
||||
|
||||
@@ -249,14 +248,14 @@ class TestErrorHandling:
|
||||
try:
|
||||
result = adaptor.upload(missing_file)
|
||||
# If it returns a dict, it should indicate failure
|
||||
assert result['success'] is False
|
||||
assert result["success"] is False
|
||||
except FileNotFoundError:
|
||||
# This is also acceptable
|
||||
pass
|
||||
|
||||
def test_chroma_handles_invalid_json(self, tmp_path):
|
||||
"""Test ChromaDB upload handles invalid JSON gracefully."""
|
||||
adaptor = get_adaptor('chroma')
|
||||
adaptor = get_adaptor("chroma")
|
||||
|
||||
invalid_file = tmp_path / "invalid.json"
|
||||
invalid_file.write_text("not valid json{")
|
||||
@@ -265,14 +264,14 @@ class TestErrorHandling:
|
||||
try:
|
||||
result = adaptor.upload(invalid_file)
|
||||
# If it returns a dict, it should indicate failure
|
||||
assert result['success'] is False
|
||||
assert result["success"] is False
|
||||
except json.JSONDecodeError:
|
||||
# This is also acceptable
|
||||
pass
|
||||
|
||||
def test_weaviate_handles_invalid_json(self, tmp_path):
|
||||
"""Test Weaviate upload handles invalid JSON gracefully."""
|
||||
adaptor = get_adaptor('weaviate')
|
||||
adaptor = get_adaptor("weaviate")
|
||||
|
||||
invalid_file = tmp_path / "invalid.json"
|
||||
invalid_file.write_text("not valid json{")
|
||||
@@ -281,7 +280,7 @@ class TestErrorHandling:
|
||||
try:
|
||||
result = adaptor.upload(invalid_file)
|
||||
# If it returns a dict, it should indicate failure
|
||||
assert result['success'] is False
|
||||
assert result["success"] is False
|
||||
except json.JSONDecodeError:
|
||||
# This is also acceptable
|
||||
pass
|
||||
|
||||
@@ -155,13 +155,9 @@ class TestConvertToMdUrls(unittest.TestCase):
|
||||
|
||||
# Should deduplicate to 3 unique base URLs
|
||||
self.assertEqual(len(result), 3)
|
||||
self.assertIn(
|
||||
"https://mikro-orm.io/docs/quick-start/index.html.md", result
|
||||
)
|
||||
self.assertIn("https://mikro-orm.io/docs/quick-start/index.html.md", result)
|
||||
self.assertIn("https://mikro-orm.io/docs/propagation/index.html.md", result)
|
||||
self.assertIn(
|
||||
"https://mikro-orm.io/docs/defining-entities/index.html.md", result
|
||||
)
|
||||
self.assertIn("https://mikro-orm.io/docs/defining-entities/index.html.md", result)
|
||||
|
||||
# Should NOT contain any URLs with anchor fragments
|
||||
for url in result:
|
||||
|
||||
Reference in New Issue
Block a user