style: Format all Python files with ruff

- Formatted 103 files to comply with ruff format requirements
- No code logic changes, only formatting/whitespace
- Fixes CI formatting check failures
This commit is contained in:
yusyus
2026-02-08 14:42:27 +03:00
parent 6e4f623b9d
commit 0265de5816
103 changed files with 2241 additions and 2627 deletions

View File

@@ -85,9 +85,17 @@ class TestAdaptorBenchmarks(unittest.TestCase):
# Platforms to benchmark
platforms = [
"claude", "gemini", "openai", "markdown", # IDE integrations
"langchain", "llama-index", "haystack", # RAG frameworks
"weaviate", "chroma", "faiss", "qdrant" # Vector DBs
"claude",
"gemini",
"openai",
"markdown", # IDE integrations
"langchain",
"llama-index",
"haystack", # RAG frameworks
"weaviate",
"chroma",
"faiss",
"qdrant", # Vector DBs
]
results = {}
@@ -115,20 +123,19 @@ class TestAdaptorBenchmarks(unittest.TestCase):
min_time = min(times)
max_time = max(times)
results[platform] = {
"avg": avg_time,
"min": min_time,
"max": max_time
}
results[platform] = {"avg": avg_time, "min": min_time, "max": max_time}
print(f"{platform:15} - Avg: {avg_time*1000:6.2f}ms | "
f"Min: {min_time*1000:6.2f}ms | Max: {max_time*1000:6.2f}ms")
print(
f"{platform:15} - Avg: {avg_time * 1000:6.2f}ms | "
f"Min: {min_time * 1000:6.2f}ms | Max: {max_time * 1000:6.2f}ms"
)
# Performance assertions (should complete in reasonable time)
for platform, metrics in results.items():
self.assertLess(
metrics["avg"], 0.5, # Should average < 500ms
f"{platform} format_skill_md too slow: {metrics['avg']*1000:.2f}ms"
metrics["avg"],
0.5, # Should average < 500ms
f"{platform} format_skill_md too slow: {metrics['avg'] * 1000:.2f}ms",
)
def test_benchmark_package_operations(self):
@@ -158,12 +165,9 @@ class TestAdaptorBenchmarks(unittest.TestCase):
# Get file size
file_size_kb = package_path.stat().st_size / 1024
results[platform] = {
"time": elapsed,
"size_kb": file_size_kb
}
results[platform] = {"time": elapsed, "size_kb": file_size_kb}
print(f"{platform:15} - Time: {elapsed*1000:7.2f}ms | Size: {file_size_kb:7.1f} KB")
print(f"{platform:15} - Time: {elapsed * 1000:7.2f}ms | Size: {file_size_kb:7.1f} KB")
# Validate output
self.assertTrue(package_path.exists())
@@ -171,12 +175,14 @@ class TestAdaptorBenchmarks(unittest.TestCase):
# Performance assertions
for platform, metrics in results.items():
self.assertLess(
metrics["time"], 1.0, # Should complete < 1 second
f"{platform} packaging too slow: {metrics['time']*1000:.2f}ms"
metrics["time"],
1.0, # Should complete < 1 second
f"{platform} packaging too slow: {metrics['time'] * 1000:.2f}ms",
)
self.assertLess(
metrics["size_kb"], 1000, # Should be < 1MB for 10 refs
f"{platform} package too large: {metrics['size_kb']:.1f}KB"
metrics["size_kb"],
1000, # Should be < 1MB for 10 refs
f"{platform} package too large: {metrics['size_kb']:.1f}KB",
)
def test_benchmark_scaling_with_reference_count(self):
@@ -210,14 +216,18 @@ class TestAdaptorBenchmarks(unittest.TestCase):
json.loads(formatted)
size_kb = len(formatted) / 1024
results.append({
"count": ref_count,
"time": elapsed,
"time_per_ref": time_per_ref,
"size_kb": size_kb
})
results.append(
{
"count": ref_count,
"time": elapsed,
"time_per_ref": time_per_ref,
"size_kb": size_kb,
}
)
print(f"{ref_count:4} | {elapsed*1000:10.2f} | {time_per_ref*1000:10.3f} | {size_kb:10.1f}")
print(
f"{ref_count:4} | {elapsed * 1000:10.2f} | {time_per_ref * 1000:10.3f} | {size_kb:10.1f}"
)
# Analyze scaling behavior
# Time per ref should not increase significantly (linear scaling)
@@ -230,10 +240,7 @@ class TestAdaptorBenchmarks(unittest.TestCase):
print(f"(Time per ref at 50 refs / Time per ref at 1 ref)")
# Assert linear or sub-linear scaling (not exponential)
self.assertLess(
scaling_factor, 3.0,
f"Non-linear scaling detected: {scaling_factor:.2f}x"
)
self.assertLess(scaling_factor, 3.0, f"Non-linear scaling detected: {scaling_factor:.2f}x")
def test_benchmark_json_vs_zip_size_comparison(self):
"""Compare output sizes: JSON vs ZIP/tar.gz"""
@@ -263,16 +270,15 @@ class TestAdaptorBenchmarks(unittest.TestCase):
size_kb = package_path.stat().st_size / 1024
results[platform] = {
"format": format_name,
"size_kb": size_kb
}
results[platform] = {"format": format_name, "size_kb": size_kb}
print(f"{platform:15} | {format_name:8} | {size_kb:10.1f}")
# Analyze results
json_sizes = [v["size_kb"] for k, v in results.items() if v["format"] == "JSON"]
compressed_sizes = [v["size_kb"] for k, v in results.items() if v["format"] in ["ZIP", "tar.gz"]]
compressed_sizes = [
v["size_kb"] for k, v in results.items() if v["format"] in ["ZIP", "tar.gz"]
]
if json_sizes and compressed_sizes:
avg_json = sum(json_sizes) / len(json_sizes)
@@ -280,7 +286,7 @@ class TestAdaptorBenchmarks(unittest.TestCase):
print(f"\nAverage JSON size: {avg_json:.1f} KB")
print(f"Average compressed size: {avg_compressed:.1f} KB")
print(f"Compression ratio: {avg_json/avg_compressed:.2f}x")
print(f"Compression ratio: {avg_json / avg_compressed:.2f}x")
def test_benchmark_metadata_overhead(self):
"""Measure metadata processing overhead"""
@@ -299,7 +305,7 @@ class TestAdaptorBenchmarks(unittest.TestCase):
description="A comprehensive test skill for benchmarking purposes",
version="2.5.0",
author="Benchmark Suite",
tags=["test", "benchmark", "performance", "validation", "quality"]
tags=["test", "benchmark", "performance", "validation", "quality"],
)
adaptor = get_adaptor("langchain")
@@ -326,15 +332,12 @@ class TestAdaptorBenchmarks(unittest.TestCase):
overhead = avg_rich - avg_minimal
overhead_pct = (overhead / avg_minimal) * 100
print(f"\nMinimal metadata: {avg_minimal*1000:.2f}ms")
print(f"Rich metadata: {avg_rich*1000:.2f}ms")
print(f"Overhead: {overhead*1000:.2f}ms ({overhead_pct:.1f}%)")
print(f"\nMinimal metadata: {avg_minimal * 1000:.2f}ms")
print(f"Rich metadata: {avg_rich * 1000:.2f}ms")
print(f"Overhead: {overhead * 1000:.2f}ms ({overhead_pct:.1f}%)")
# Overhead should be negligible (< 10%)
self.assertLess(
overhead_pct, 10.0,
f"Metadata overhead too high: {overhead_pct:.1f}%"
)
self.assertLess(overhead_pct, 10.0, f"Metadata overhead too high: {overhead_pct:.1f}%")
def test_benchmark_empty_vs_full_skill(self):
"""Compare performance: empty skill vs full skill"""
@@ -360,9 +363,9 @@ class TestAdaptorBenchmarks(unittest.TestCase):
adaptor.format_skill_md(full_dir, metadata)
full_time = time.perf_counter() - start
print(f"\nEmpty skill: {empty_time*1000:.2f}ms")
print(f"Full skill (50 refs): {full_time*1000:.2f}ms")
print(f"Ratio: {full_time/empty_time:.1f}x")
print(f"\nEmpty skill: {empty_time * 1000:.2f}ms")
print(f"Full skill (50 refs): {full_time * 1000:.2f}ms")
print(f"Ratio: {full_time / empty_time:.1f}x")
# Empty should be very fast
self.assertLess(empty_time, 0.01, "Empty skill processing too slow")

View File

@@ -662,8 +662,13 @@ export default {
def test_e2e_all_rag_adaptors_from_same_skill(self):
"""Test all 7 RAG adaptors can package the same skill"""
rag_platforms = [
"langchain", "llama-index", "haystack",
"weaviate", "chroma", "faiss", "qdrant"
"langchain",
"llama-index",
"haystack",
"weaviate",
"chroma",
"faiss",
"qdrant",
]
packages = {}
@@ -674,15 +679,11 @@ export default {
package_path = adaptor.package(self.skill_dir, self.output_dir)
# Verify package was created
self.assertTrue(
package_path.exists(),
f"Package not created for {platform}"
)
self.assertTrue(package_path.exists(), f"Package not created for {platform}")
# Verify it's a JSON file
self.assertTrue(
str(package_path).endswith(".json"),
f"{platform} should produce JSON file"
str(package_path).endswith(".json"), f"{platform} should produce JSON file"
)
# Store for later verification
@@ -696,10 +697,7 @@ export default {
with open(path) as f:
data = json.load(f)
# Should be valid JSON (dict or list)
self.assertIsInstance(
data, (dict, list),
f"{platform} should produce valid JSON"
)
self.assertIsInstance(data, (dict, list), f"{platform} should produce valid JSON")
def test_e2e_rag_adaptors_preserve_metadata(self):
"""Test that metadata is preserved across RAG adaptors"""
@@ -708,7 +706,7 @@ export default {
description="Vue.js framework skill",
version="2.0.0",
author="Test Author",
tags=["vue", "javascript", "frontend"]
tags=["vue", "javascript", "frontend"],
)
# Test subset of platforms (representative sample)
@@ -758,33 +756,30 @@ export default {
# Define expected structure for each platform
validations = {
"langchain": lambda d: (
isinstance(d, list) and
all("page_content" in item and "metadata" in item for item in d)
isinstance(d, list)
and all("page_content" in item and "metadata" in item for item in d)
),
"llama-index": lambda d: (
isinstance(d, list) and
all("text" in item and "metadata" in item for item in d)
isinstance(d, list) and all("text" in item and "metadata" in item for item in d)
),
"haystack": lambda d: (
isinstance(d, list) and
all("content" in item and "meta" in item for item in d)
isinstance(d, list) and all("content" in item and "meta" in item for item in d)
),
"weaviate": lambda d: (
isinstance(d, dict) and
"schema" in d and "objects" in d and "class_name" in d
isinstance(d, dict) and "schema" in d and "objects" in d and "class_name" in d
),
"chroma": lambda d: (
isinstance(d, dict) and
"documents" in d and "metadatas" in d and "ids" in d and
"collection_name" in d
isinstance(d, dict)
and "documents" in d
and "metadatas" in d
and "ids" in d
and "collection_name" in d
),
"faiss": lambda d: (
isinstance(d, dict) and
"documents" in d and "metadatas" in d and "ids" in d
isinstance(d, dict) and "documents" in d and "metadatas" in d and "ids" in d
),
"qdrant": lambda d: (
isinstance(d, dict) and
"collection_name" in d and "points" in d and "config" in d
isinstance(d, dict) and "collection_name" in d and "points" in d and "config" in d
),
}
@@ -795,8 +790,7 @@ export default {
# Validate structure
self.assertTrue(
validate_func(data),
f"{platform} validation failed: incorrect JSON structure"
validate_func(data), f"{platform} validation failed: incorrect JSON structure"
)
def test_e2e_rag_empty_skill_handling(self):
@@ -838,9 +832,7 @@ export default {
if platform == "langchain":
categories = {item["metadata"]["category"] for item in data}
elif platform == "weaviate":
categories = {
obj["properties"]["category"] for obj in data["objects"]
}
categories = {obj["properties"]["category"] for obj in data["objects"]}
elif platform == "chroma":
categories = {meta["category"] for meta in data["metadatas"]}
@@ -854,8 +846,7 @@ export default {
# Check that at least one reference category exists
ref_categories = categories - {"overview"}
self.assertGreater(
len(ref_categories), 0,
f"{platform}: Should have at least one reference category"
len(ref_categories), 0, f"{platform}: Should have at least one reference category"
)
def test_e2e_rag_integration_workflow_chromadb(self):
@@ -878,17 +869,10 @@ export default {
# Create collection and add documents
collection = client.create_collection(data["collection_name"])
collection.add(
documents=data["documents"],
metadatas=data["metadatas"],
ids=data["ids"]
)
collection.add(documents=data["documents"], metadatas=data["metadatas"], ids=data["ids"])
# Query
results = collection.query(
query_texts=["reactivity"],
n_results=2
)
results = collection.query(query_texts=["reactivity"], n_results=2)
# Verify results
self.assertGreater(len(results["documents"][0]), 0, "Should return results")

View File

@@ -28,9 +28,7 @@ class TestChromaAdaptor:
# Create SKILL.md
skill_md = skill_dir / "SKILL.md"
skill_md.write_text(
"# Test Skill\n\nThis is a test skill for Chroma format."
)
skill_md.write_text("# Test Skill\n\nThis is a test skill for Chroma format.")
# Create references directory with files
refs_dir = skill_dir / "references"
@@ -40,9 +38,7 @@ class TestChromaAdaptor:
# Format as Chroma collection
adaptor = get_adaptor("chroma")
metadata = SkillMetadata(
name="test_skill", description="Test skill", version="1.0.0"
)
metadata = SkillMetadata(name="test_skill", description="Test skill", version="1.0.0")
collection_json = adaptor.format_skill_md(skill_dir, metadata)
@@ -124,7 +120,10 @@ class TestChromaAdaptor:
# Upload may fail if chromadb not installed (expected)
assert "message" in result
# Either chromadb not installed or connection error
assert ("chromadb not installed" in result["message"] or "Failed to connect" in result["message"])
assert (
"chromadb not installed" in result["message"]
or "Failed to connect" in result["message"]
)
def test_validate_api_key_returns_false(self):
"""Test that API key validation returns False (no API needed)."""
@@ -157,9 +156,7 @@ class TestChromaAdaptor:
skill_dir.mkdir()
adaptor = get_adaptor("chroma")
metadata = SkillMetadata(
name="empty_skill", description="Empty", version="1.0.0"
)
metadata = SkillMetadata(name="empty_skill", description="Empty", version="1.0.0")
collection_json = adaptor.format_skill_md(skill_dir, metadata)
collection = json.loads(collection_json)
@@ -179,9 +176,7 @@ class TestChromaAdaptor:
(refs_dir / "test.md").write_text("# Test\n\nTest content.")
adaptor = get_adaptor("chroma")
metadata = SkillMetadata(
name="refs_only", description="Refs only", version="1.0.0"
)
metadata = SkillMetadata(name="refs_only", description="Refs only", version="1.0.0")
collection_json = adaptor.format_skill_md(skill_dir, metadata)
collection = json.loads(collection_json)

View File

@@ -28,9 +28,7 @@ class TestFAISSAdaptor:
# Create SKILL.md
skill_md = skill_dir / "SKILL.md"
skill_md.write_text(
"# Test Skill\n\nThis is a test skill for FAISS format."
)
skill_md.write_text("# Test Skill\n\nThis is a test skill for FAISS format.")
# Create references directory with files
refs_dir = skill_dir / "references"
@@ -40,9 +38,7 @@ class TestFAISSAdaptor:
# Format as FAISS index data
adaptor = get_adaptor("faiss")
metadata = SkillMetadata(
name="test_skill", description="Test skill", version="1.0.0"
)
metadata = SkillMetadata(name="test_skill", description="Test skill", version="1.0.0")
index_json = adaptor.format_skill_md(skill_dir, metadata)
@@ -158,9 +154,7 @@ class TestFAISSAdaptor:
skill_dir.mkdir()
adaptor = get_adaptor("faiss")
metadata = SkillMetadata(
name="empty_skill", description="Empty", version="1.0.0"
)
metadata = SkillMetadata(name="empty_skill", description="Empty", version="1.0.0")
index_json = adaptor.format_skill_md(skill_dir, metadata)
index_data = json.loads(index_json)
@@ -180,9 +174,7 @@ class TestFAISSAdaptor:
(refs_dir / "test.md").write_text("# Test\n\nTest content.")
adaptor = get_adaptor("faiss")
metadata = SkillMetadata(
name="refs_only", description="Refs only", version="1.0.0"
)
metadata = SkillMetadata(name="refs_only", description="Refs only", version="1.0.0")
index_json = adaptor.format_skill_md(skill_dir, metadata)
index_data = json.loads(index_json)

View File

@@ -28,9 +28,7 @@ class TestHaystackAdaptor:
# Create SKILL.md
skill_md = skill_dir / "SKILL.md"
skill_md.write_text(
"# Test Skill\n\nThis is a test skill for Haystack format."
)
skill_md.write_text("# Test Skill\n\nThis is a test skill for Haystack format.")
# Create references directory with files
refs_dir = skill_dir / "references"
@@ -40,9 +38,7 @@ class TestHaystackAdaptor:
# Format as Haystack Documents
adaptor = get_adaptor("haystack")
metadata = SkillMetadata(
name="test_skill", description="Test skill", version="1.0.0"
)
metadata = SkillMetadata(name="test_skill", description="Test skill", version="1.0.0")
documents_json = adaptor.format_skill_md(skill_dir, metadata)
@@ -112,7 +108,7 @@ class TestHaystackAdaptor:
"""Test upload returns instructions (no actual upload)."""
# Create test package
package_path = tmp_path / "test-haystack.json"
package_path.write_text('[]')
package_path.write_text("[]")
adaptor = get_adaptor("haystack")
result = adaptor.upload(package_path, "fake-key")
@@ -154,9 +150,7 @@ class TestHaystackAdaptor:
skill_dir.mkdir()
adaptor = get_adaptor("haystack")
metadata = SkillMetadata(
name="empty_skill", description="Empty", version="1.0.0"
)
metadata = SkillMetadata(name="empty_skill", description="Empty", version="1.0.0")
documents_json = adaptor.format_skill_md(skill_dir, metadata)
documents = json.loads(documents_json)
@@ -174,9 +168,7 @@ class TestHaystackAdaptor:
(refs_dir / "test.md").write_text("# Test\n\nTest content.")
adaptor = get_adaptor("haystack")
metadata = SkillMetadata(
name="refs_only", description="Refs only", version="1.0.0"
)
metadata = SkillMetadata(name="refs_only", description="Refs only", version="1.0.0")
documents_json = adaptor.format_skill_md(skill_dir, metadata)
documents = json.loads(documents_json)

View File

@@ -28,9 +28,7 @@ class TestLangChainAdaptor:
# Create SKILL.md
skill_md = skill_dir / "SKILL.md"
skill_md.write_text(
"# Test Skill\n\nThis is a test skill for LangChain format."
)
skill_md.write_text("# Test Skill\n\nThis is a test skill for LangChain format.")
# Create references directory with files
refs_dir = skill_dir / "references"
@@ -40,9 +38,7 @@ class TestLangChainAdaptor:
# Format as LangChain Documents
adaptor = get_adaptor("langchain")
metadata = SkillMetadata(
name="test_skill", description="Test skill", version="1.0.0"
)
metadata = SkillMetadata(name="test_skill", description="Test skill", version="1.0.0")
documents_json = adaptor.format_skill_md(skill_dir, metadata)
@@ -112,7 +108,7 @@ class TestLangChainAdaptor:
"""Test upload returns instructions (no actual upload)."""
# Create test package
package_path = tmp_path / "test-langchain.json"
package_path.write_text('[]')
package_path.write_text("[]")
adaptor = get_adaptor("langchain")
result = adaptor.upload(package_path, "fake-key")
@@ -153,9 +149,7 @@ class TestLangChainAdaptor:
skill_dir.mkdir()
adaptor = get_adaptor("langchain")
metadata = SkillMetadata(
name="empty_skill", description="Empty", version="1.0.0"
)
metadata = SkillMetadata(name="empty_skill", description="Empty", version="1.0.0")
documents_json = adaptor.format_skill_md(skill_dir, metadata)
documents = json.loads(documents_json)
@@ -173,9 +167,7 @@ class TestLangChainAdaptor:
(refs_dir / "test.md").write_text("# Test\n\nTest content.")
adaptor = get_adaptor("langchain")
metadata = SkillMetadata(
name="refs_only", description="Refs only", version="1.0.0"
)
metadata = SkillMetadata(name="refs_only", description="Refs only", version="1.0.0")
documents_json = adaptor.format_skill_md(skill_dir, metadata)
documents = json.loads(documents_json)

View File

@@ -28,9 +28,7 @@ class TestLlamaIndexAdaptor:
# Create SKILL.md
skill_md = skill_dir / "SKILL.md"
skill_md.write_text(
"# Test Skill\n\nThis is a test skill for LlamaIndex format."
)
skill_md.write_text("# Test Skill\n\nThis is a test skill for LlamaIndex format.")
# Create references directory with files
refs_dir = skill_dir / "references"
@@ -40,9 +38,7 @@ class TestLlamaIndexAdaptor:
# Format as LlamaIndex Documents
adaptor = get_adaptor("llama-index")
metadata = SkillMetadata(
name="test_skill", description="Test skill", version="1.0.0"
)
metadata = SkillMetadata(name="test_skill", description="Test skill", version="1.0.0")
documents_json = adaptor.format_skill_md(skill_dir, metadata)
@@ -112,7 +108,7 @@ class TestLlamaIndexAdaptor:
"""Test upload returns instructions (no actual upload)."""
# Create test package
package_path = tmp_path / "test-llama-index.json"
package_path.write_text('[]')
package_path.write_text("[]")
adaptor = get_adaptor("llama-index")
result = adaptor.upload(package_path, "fake-key")
@@ -153,9 +149,7 @@ class TestLlamaIndexAdaptor:
skill_dir.mkdir()
adaptor = get_adaptor("llama-index")
metadata = SkillMetadata(
name="empty_skill", description="Empty", version="1.0.0"
)
metadata = SkillMetadata(name="empty_skill", description="Empty", version="1.0.0")
documents_json = adaptor.format_skill_md(skill_dir, metadata)
documents = json.loads(documents_json)
@@ -173,9 +167,7 @@ class TestLlamaIndexAdaptor:
(refs_dir / "test.md").write_text("# Test\n\nTest content.")
adaptor = get_adaptor("llama-index")
metadata = SkillMetadata(
name="refs_only", description="Refs only", version="1.0.0"
)
metadata = SkillMetadata(name="refs_only", description="Refs only", version="1.0.0")
documents_json = adaptor.format_skill_md(skill_dir, metadata)
documents = json.loads(documents_json)

View File

@@ -28,9 +28,7 @@ class TestQdrantAdaptor:
# Create SKILL.md
skill_md = skill_dir / "SKILL.md"
skill_md.write_text(
"# Test Skill\n\nThis is a test skill for Qdrant format."
)
skill_md.write_text("# Test Skill\n\nThis is a test skill for Qdrant format.")
# Create references directory with files
refs_dir = skill_dir / "references"
@@ -40,9 +38,7 @@ class TestQdrantAdaptor:
# Format as Qdrant points
adaptor = get_adaptor("qdrant")
metadata = SkillMetadata(
name="test_skill", description="Test skill", version="1.0.0"
)
metadata = SkillMetadata(name="test_skill", description="Test skill", version="1.0.0")
points_json = adaptor.format_skill_md(skill_dir, metadata)
@@ -119,7 +115,7 @@ class TestQdrantAdaptor:
"""Test upload returns instructions (no actual upload)."""
# Create test package
package_path = tmp_path / "test-qdrant.json"
package_path.write_text('[]')
package_path.write_text("[]")
adaptor = get_adaptor("qdrant")
result = adaptor.upload(package_path, "fake-key")
@@ -160,9 +156,7 @@ class TestQdrantAdaptor:
skill_dir.mkdir()
adaptor = get_adaptor("qdrant")
metadata = SkillMetadata(
name="empty_skill", description="Empty", version="1.0.0"
)
metadata = SkillMetadata(name="empty_skill", description="Empty", version="1.0.0")
points_json = adaptor.format_skill_md(skill_dir, metadata)
result = json.loads(points_json)
@@ -181,9 +175,7 @@ class TestQdrantAdaptor:
(refs_dir / "test.md").write_text("# Test\n\nTest content.")
adaptor = get_adaptor("qdrant")
metadata = SkillMetadata(
name="refs_only", description="Refs only", version="1.0.0"
)
metadata = SkillMetadata(name="refs_only", description="Refs only", version="1.0.0")
points_json = adaptor.format_skill_md(skill_dir, metadata)
result = json.loads(points_json)

View File

@@ -28,9 +28,7 @@ class TestWeaviateAdaptor:
# Create SKILL.md
skill_md = skill_dir / "SKILL.md"
skill_md.write_text(
"# Test Skill\n\nThis is a test skill for Weaviate format."
)
skill_md.write_text("# Test Skill\n\nThis is a test skill for Weaviate format.")
# Create references directory with files
refs_dir = skill_dir / "references"
@@ -40,9 +38,7 @@ class TestWeaviateAdaptor:
# Format as Weaviate objects
adaptor = get_adaptor("weaviate")
metadata = SkillMetadata(
name="test_skill", description="Test skill", version="1.0.0"
)
metadata = SkillMetadata(name="test_skill", description="Test skill", version="1.0.0")
objects_json = adaptor.format_skill_md(skill_dir, metadata)
@@ -119,7 +115,7 @@ class TestWeaviateAdaptor:
"""Test upload returns instructions (no actual upload)."""
# Create test package
package_path = tmp_path / "test-weaviate.json"
package_path.write_text('[]')
package_path.write_text("[]")
adaptor = get_adaptor("weaviate")
result = adaptor.upload(package_path, "fake-key")
@@ -127,7 +123,11 @@ class TestWeaviateAdaptor:
# Upload may fail if weaviate not installed (expected)
assert "message" in result
# Either weaviate not installed, invalid JSON, or connection error
assert ("import weaviate" in result["message"] or "Failed to connect" in result["message"] or result["success"] is False)
assert (
"import weaviate" in result["message"]
or "Failed to connect" in result["message"]
or result["success"] is False
)
def test_validate_api_key_returns_false(self):
"""Test that API key validation returns False (no API needed)."""
@@ -160,9 +160,7 @@ class TestWeaviateAdaptor:
skill_dir.mkdir()
adaptor = get_adaptor("weaviate")
metadata = SkillMetadata(
name="empty_skill", description="Empty", version="1.0.0"
)
metadata = SkillMetadata(name="empty_skill", description="Empty", version="1.0.0")
objects_json = adaptor.format_skill_md(skill_dir, metadata)
result = json.loads(objects_json)
@@ -181,9 +179,7 @@ class TestWeaviateAdaptor:
(refs_dir / "test.md").write_text("# Test\n\nTest content.")
adaptor = get_adaptor("weaviate")
metadata = SkillMetadata(
name="refs_only", description="Refs only", version="1.0.0"
)
metadata = SkillMetadata(name="refs_only", description="Refs only", version="1.0.0")
objects_json = adaptor.format_skill_md(skill_dir, metadata)
result = json.loads(objects_json)

View File

@@ -12,7 +12,7 @@ from skill_seekers.benchmark import (
BenchmarkResult,
BenchmarkRunner,
BenchmarkReport,
Metric
Metric,
)
from skill_seekers.benchmark.models import TimingResult, MemoryUsage
@@ -37,12 +37,7 @@ class TestBenchmarkResult:
"""Test adding timing result."""
result = BenchmarkResult("test")
timing = TimingResult(
operation="test_op",
duration=1.5,
iterations=1,
avg_duration=1.5
)
timing = TimingResult(operation="test_op", duration=1.5, iterations=1, avg_duration=1.5)
result.add_timing(timing)
@@ -55,11 +50,7 @@ class TestBenchmarkResult:
result = BenchmarkResult("test")
usage = MemoryUsage(
operation="test_op",
before_mb=100.0,
after_mb=150.0,
peak_mb=160.0,
allocated_mb=50.0
operation="test_op", before_mb=100.0, after_mb=150.0, peak_mb=160.0, allocated_mb=50.0
)
result.add_memory(usage)
@@ -72,11 +63,7 @@ class TestBenchmarkResult:
"""Test adding custom metric."""
result = BenchmarkResult("test")
metric = Metric(
name="pages_per_sec",
value=12.5,
unit="pages/sec"
)
metric = Metric(name="pages_per_sec", value=12.5, unit="pages/sec")
result.add_metric(metric)
@@ -107,12 +94,7 @@ class TestBenchmarkResult:
"""Test report generation."""
result = BenchmarkResult("test")
timing = TimingResult(
operation="test_op",
duration=1.0,
iterations=1,
avg_duration=1.0
)
timing = TimingResult(operation="test_op", duration=1.0, iterations=1, avg_duration=1.0)
result.add_timing(timing)
report = result.to_report()
@@ -303,7 +285,7 @@ class TestBenchmark:
before_mb=100.0,
after_mb=1200.0,
peak_mb=1500.0,
allocated_mb=1100.0
allocated_mb=1100.0,
)
benchmark.result.add_memory(usage)
@@ -370,10 +352,7 @@ class TestBenchmarkRunner:
with bench.timer("op2"):
time.sleep(0.03)
reports = runner.run_suite({
"test1": bench1,
"test2": bench2
})
reports = runner.run_suite({"test1": bench1, "test2": bench2})
assert len(reports) == 2
assert "test1" in reports
@@ -405,6 +384,7 @@ class TestBenchmarkRunner:
# Compare
from skill_seekers.benchmark.models import ComparisonReport
comparison = runner.compare(baseline_path, improved_path)
assert isinstance(comparison, ComparisonReport)
@@ -458,6 +438,7 @@ class TestBenchmarkRunner:
def test_cleanup_old(self, tmp_path):
"""Test cleaning up old benchmarks."""
import os
runner = BenchmarkRunner(output_dir=tmp_path)
# Create 10 benchmark files with different timestamps
@@ -476,10 +457,10 @@ class TestBenchmarkRunner:
"memory": [],
"metrics": [],
"system_info": {},
"recommendations": []
"recommendations": [],
}
with open(file_path, 'w') as f:
with open(file_path, "w") as f:
json.dump(report_data, f)
# Set different modification times
@@ -505,12 +486,7 @@ class TestBenchmarkModels:
def test_timing_result_model(self):
"""Test TimingResult model."""
timing = TimingResult(
operation="test",
duration=1.5,
iterations=10,
avg_duration=0.15
)
timing = TimingResult(operation="test", duration=1.5, iterations=10, avg_duration=0.15)
assert timing.operation == "test"
assert timing.duration == 1.5
@@ -520,11 +496,7 @@ class TestBenchmarkModels:
def test_memory_usage_model(self):
"""Test MemoryUsage model."""
usage = MemoryUsage(
operation="allocate",
before_mb=100.0,
after_mb=200.0,
peak_mb=250.0,
allocated_mb=100.0
operation="allocate", before_mb=100.0, after_mb=200.0, peak_mb=250.0, allocated_mb=100.0
)
assert usage.operation == "allocate"
@@ -533,11 +505,7 @@ class TestBenchmarkModels:
def test_metric_model(self):
"""Test Metric model."""
metric = Metric(
name="throughput",
value=125.5,
unit="ops/sec"
)
metric = Metric(name="throughput", value=125.5, unit="ops/sec")
assert metric.name == "throughput"
assert metric.value == 125.5
@@ -551,26 +519,19 @@ class TestBenchmarkModels:
started_at=datetime.utcnow(),
finished_at=datetime.utcnow(),
total_duration=5.0,
timings=[
TimingResult(
operation="op1",
duration=2.0,
iterations=1,
avg_duration=2.0
)
],
timings=[TimingResult(operation="op1", duration=2.0, iterations=1, avg_duration=2.0)],
memory=[
MemoryUsage(
operation="op1",
before_mb=100.0,
after_mb=200.0,
peak_mb=250.0,
allocated_mb=100.0
allocated_mb=100.0,
)
],
metrics=[],
system_info={},
recommendations=[]
recommendations=[],
)
summary = report.summary
@@ -592,7 +553,7 @@ class TestBenchmarkModels:
memory=[],
metrics=[],
system_info={},
recommendations=[]
recommendations=[],
)
current = BenchmarkReport(
@@ -604,7 +565,7 @@ class TestBenchmarkModels:
memory=[],
metrics=[],
system_info={},
recommendations=[]
recommendations=[],
)
comparison = ComparisonReport(
@@ -614,7 +575,7 @@ class TestBenchmarkModels:
improvements=[],
regressions=["Slower performance"],
speedup_factor=0.5,
memory_change_mb=0.0
memory_change_mb=0.0,
)
assert comparison.has_regressions is True
@@ -632,7 +593,7 @@ class TestBenchmarkModels:
memory=[],
metrics=[],
system_info={},
recommendations=[]
recommendations=[],
)
current = BenchmarkReport(
@@ -644,7 +605,7 @@ class TestBenchmarkModels:
memory=[],
metrics=[],
system_info={},
recommendations=[]
recommendations=[],
)
comparison = ComparisonReport(
@@ -654,7 +615,7 @@ class TestBenchmarkModels:
improvements=[],
regressions=[],
speedup_factor=2.0,
memory_change_mb=0.0
memory_change_mb=0.0,
)
improvement = comparison.overall_improvement

View File

@@ -60,7 +60,7 @@ class TestChunkingDisabledByDefault:
"""Test that LangChain doesn't chunk by default."""
skill_dir = create_test_skill(tmp_path, large_doc=True)
adaptor = get_adaptor('langchain')
adaptor = get_adaptor("langchain")
package_path = adaptor.package(skill_dir, tmp_path)
with open(package_path) as f:
@@ -71,8 +71,8 @@ class TestChunkingDisabledByDefault:
# No chunking metadata
for doc in data:
assert 'is_chunked' not in doc['metadata']
assert 'chunk_index' not in doc['metadata']
assert "is_chunked" not in doc["metadata"]
assert "chunk_index" not in doc["metadata"]
class TestChunkingEnabled:
@@ -82,12 +82,9 @@ class TestChunkingEnabled:
"""Test that LangChain chunks large documents when enabled."""
skill_dir = create_test_skill(tmp_path, large_doc=True)
adaptor = get_adaptor('langchain')
adaptor = get_adaptor("langchain")
package_path = adaptor.package(
skill_dir,
tmp_path,
enable_chunking=True,
chunk_max_tokens=512
skill_dir, tmp_path, enable_chunking=True, chunk_max_tokens=512
)
with open(package_path) as f:
@@ -97,25 +94,22 @@ class TestChunkingEnabled:
assert len(data) > 2, f"Large doc should be chunked, got {len(data)} docs"
# Check for chunking metadata
chunked_docs = [doc for doc in data if doc['metadata'].get('is_chunked')]
chunked_docs = [doc for doc in data if doc["metadata"].get("is_chunked")]
assert len(chunked_docs) > 0, "Should have chunked documents"
# Verify chunk metadata structure
for doc in chunked_docs:
assert 'chunk_index' in doc['metadata']
assert 'total_chunks' in doc['metadata']
assert 'chunk_id' in doc['metadata']
assert "chunk_index" in doc["metadata"]
assert "total_chunks" in doc["metadata"]
assert "chunk_id" in doc["metadata"]
def test_chunking_preserves_small_docs(self, tmp_path):
"""Test that small documents are not chunked."""
skill_dir = create_test_skill(tmp_path, large_doc=False)
adaptor = get_adaptor('langchain')
adaptor = get_adaptor("langchain")
package_path = adaptor.package(
skill_dir,
tmp_path,
enable_chunking=True,
chunk_max_tokens=512
skill_dir, tmp_path, enable_chunking=True, chunk_max_tokens=512
)
with open(package_path) as f:
@@ -125,7 +119,7 @@ class TestChunkingEnabled:
assert len(data) == 2, "Small docs should not be chunked"
for doc in data:
assert 'is_chunked' not in doc['metadata']
assert "is_chunked" not in doc["metadata"]
class TestCodeBlockPreservation:
@@ -158,43 +152,43 @@ More content after code block.
# Create references dir (required)
(skill_dir / "references").mkdir()
adaptor = get_adaptor('langchain')
adaptor = get_adaptor("langchain")
package_path = adaptor.package(
skill_dir,
tmp_path,
enable_chunking=True,
chunk_max_tokens=200, # Small chunks to force splitting
preserve_code_blocks=True
preserve_code_blocks=True,
)
with open(package_path) as f:
data = json.load(f)
# Find chunks with code block
code_chunks = [
doc for doc in data
if '```python' in doc['page_content']
]
code_chunks = [doc for doc in data if "```python" in doc["page_content"]]
# Code block should be in at least one chunk
assert len(code_chunks) >= 1, "Code block should be preserved"
# Code block should be complete (opening and closing backticks)
for chunk in code_chunks:
content = chunk['page_content']
if '```python' in content:
content = chunk["page_content"]
if "```python" in content:
# Should also have closing backticks
assert content.count('```') >= 2, "Code block should be complete"
assert content.count("```") >= 2, "Code block should be complete"
class TestAutoChunkingForRAGPlatforms:
"""Test that chunking is auto-enabled for RAG platforms."""
@pytest.mark.parametrize("platform", [
'langchain',
# Add others after they're updated:
# 'llama-index', 'haystack', 'weaviate', 'chroma', 'faiss', 'qdrant'
])
@pytest.mark.parametrize(
"platform",
[
"langchain",
# Add others after they're updated:
# 'llama-index', 'haystack', 'weaviate', 'chroma', 'faiss', 'qdrant'
],
)
def test_rag_platforms_auto_chunk(self, platform, tmp_path):
"""Test that RAG platforms auto-enable chunking."""
skill_dir = create_test_skill(tmp_path, large_doc=True)
@@ -208,7 +202,7 @@ class TestAutoChunkingForRAGPlatforms:
open_folder_after=False,
skip_quality_check=True,
target=platform,
enable_chunking=False # Explicitly disabled, but should be auto-enabled
enable_chunking=False, # Explicitly disabled, but should be auto-enabled
)
assert success, f"Packaging failed for {platform}"
@@ -221,8 +215,8 @@ class TestAutoChunkingForRAGPlatforms:
# Should have multiple documents/chunks
if isinstance(data, list):
assert len(data) > 2, f"{platform}: Should auto-chunk large docs"
elif isinstance(data, dict) and 'documents' in data:
assert len(data['documents']) > 2, f"{platform}: Should auto-chunk large docs"
elif isinstance(data, dict) and "documents" in data:
assert len(data["documents"]) > 2, f"{platform}: Should auto-chunk large docs"
class TestBaseAdaptorChunkingHelper:
@@ -237,11 +231,7 @@ class TestBaseAdaptorChunkingHelper:
content = "Test content " * 1000 # Large content
metadata = {"source": "test"}
chunks = adaptor._maybe_chunk_content(
content,
metadata,
enable_chunking=False
)
chunks = adaptor._maybe_chunk_content(content, metadata, enable_chunking=False)
# Should return single chunk
assert len(chunks) == 1
@@ -258,10 +248,7 @@ class TestBaseAdaptorChunkingHelper:
metadata = {"source": "test"}
chunks = adaptor._maybe_chunk_content(
content,
metadata,
enable_chunking=True,
chunk_max_tokens=512
content, metadata, enable_chunking=True, chunk_max_tokens=512
)
# Should return single chunk
@@ -282,7 +269,7 @@ class TestBaseAdaptorChunkingHelper:
enable_chunking=True,
chunk_max_tokens=512,
preserve_code_blocks=True,
source_file="test.md"
source_file="test.md",
)
# Should return multiple chunks
@@ -292,12 +279,12 @@ class TestBaseAdaptorChunkingHelper:
for chunk_text, chunk_meta in chunks:
assert isinstance(chunk_text, str)
assert isinstance(chunk_meta, dict)
assert chunk_meta['is_chunked']
assert 'chunk_index' in chunk_meta
assert 'chunk_id' in chunk_meta
assert chunk_meta["is_chunked"]
assert "chunk_index" in chunk_meta
assert "chunk_id" in chunk_meta
# Original metadata preserved
assert chunk_meta['source'] == 'test'
assert chunk_meta['file'] == 'test.md'
assert chunk_meta["source"] == "test"
assert chunk_meta["file"] == "test.md"
class TestChunkingCLIIntegration:
@@ -313,10 +300,10 @@ class TestChunkingCLIIntegration:
skill_dir=skill_dir,
open_folder_after=False,
skip_quality_check=True,
target='langchain',
target="langchain",
enable_chunking=True, # --chunk flag
chunk_max_tokens=512,
preserve_code_blocks=True
preserve_code_blocks=True,
)
assert success
@@ -339,10 +326,10 @@ class TestChunkingCLIIntegration:
skill_dir=skill_dir,
open_folder_after=False,
skip_quality_check=True,
target='langchain',
target="langchain",
enable_chunking=True,
chunk_max_tokens=256, # Small chunks
preserve_code_blocks=True
preserve_code_blocks=True,
)
assert success
@@ -355,10 +342,10 @@ class TestChunkingCLIIntegration:
skill_dir=skill_dir,
open_folder_after=False,
skip_quality_check=True,
target='langchain',
target="langchain",
enable_chunking=True,
chunk_max_tokens=1024, # Large chunks
preserve_code_blocks=True
preserve_code_blocks=True,
)
assert success
@@ -367,9 +354,10 @@ class TestChunkingCLIIntegration:
data_large = json.load(f)
# Small chunk size should produce more chunks
assert len(data_small) > len(data_large), \
assert len(data_small) > len(data_large), (
f"Small chunks ({len(data_small)}) should be more than large chunks ({len(data_large)})"
)
if __name__ == '__main__':
pytest.main([__file__, '-v'])
if __name__ == "__main__":
pytest.main([__file__, "-v"])

View File

@@ -30,12 +30,12 @@ class TestParserRegistry:
"""Test getting list of parser names."""
names = get_parser_names()
assert len(names) == 19
assert 'scrape' in names
assert 'github' in names
assert 'package' in names
assert 'upload' in names
assert 'analyze' in names
assert 'config' in names
assert "scrape" in names
assert "github" in names
assert "package" in names
assert "upload" in names
assert "analyze" in names
assert "config" in names
def test_all_parsers_are_subcommand_parsers(self):
"""Test that all parsers inherit from SubcommandParser."""
@@ -45,9 +45,9 @@ class TestParserRegistry:
def test_all_parsers_have_required_properties(self):
"""Test that all parsers have name, help, description."""
for parser in PARSERS:
assert hasattr(parser, 'name')
assert hasattr(parser, 'help')
assert hasattr(parser, 'description')
assert hasattr(parser, "name")
assert hasattr(parser, "help")
assert hasattr(parser, "description")
assert isinstance(parser.name, str)
assert isinstance(parser.help, str)
assert isinstance(parser.description, str)
@@ -57,7 +57,7 @@ class TestParserRegistry:
def test_all_parsers_have_add_arguments_method(self):
"""Test that all parsers implement add_arguments."""
for parser in PARSERS:
assert hasattr(parser, 'add_arguments')
assert hasattr(parser, "add_arguments")
assert callable(parser.add_arguments)
def test_no_duplicate_parser_names(self):
@@ -106,21 +106,21 @@ class TestParserCreation:
def test_register_parsers_creates_all_subcommands(self):
"""Test that register_parsers creates all 19 subcommands."""
main_parser = argparse.ArgumentParser()
subparsers = main_parser.add_subparsers(dest='command')
subparsers = main_parser.add_subparsers(dest="command")
# Register all parsers
register_parsers(subparsers)
# Test that all commands can be parsed
test_commands = [
'config --show',
'scrape --config test.json',
'github --repo owner/repo',
'package output/test/',
'upload test.zip',
'analyze --directory .',
'enhance output/test/',
'estimate test.json',
"config --show",
"scrape --config test.json",
"github --repo owner/repo",
"package output/test/",
"upload test.zip",
"analyze --directory .",
"enhance output/test/",
"estimate test.json",
]
for cmd in test_commands:
@@ -134,75 +134,76 @@ class TestSpecificParsers:
def test_scrape_parser_arguments(self):
"""Test ScrapeParser has correct arguments."""
main_parser = argparse.ArgumentParser()
subparsers = main_parser.add_subparsers(dest='command')
subparsers = main_parser.add_subparsers(dest="command")
scrape_parser = ScrapeParser()
scrape_parser.create_parser(subparsers)
# Test various argument combinations
args = main_parser.parse_args(['scrape', '--config', 'test.json'])
assert args.command == 'scrape'
assert args.config == 'test.json'
args = main_parser.parse_args(["scrape", "--config", "test.json"])
assert args.command == "scrape"
assert args.config == "test.json"
args = main_parser.parse_args(['scrape', '--config', 'test.json', '--max-pages', '100'])
args = main_parser.parse_args(["scrape", "--config", "test.json", "--max-pages", "100"])
assert args.max_pages == 100
args = main_parser.parse_args(['scrape', '--enhance'])
args = main_parser.parse_args(["scrape", "--enhance"])
assert args.enhance is True
def test_github_parser_arguments(self):
"""Test GitHubParser has correct arguments."""
main_parser = argparse.ArgumentParser()
subparsers = main_parser.add_subparsers(dest='command')
subparsers = main_parser.add_subparsers(dest="command")
github_parser = GitHubParser()
github_parser.create_parser(subparsers)
args = main_parser.parse_args(['github', '--repo', 'owner/repo'])
assert args.command == 'github'
assert args.repo == 'owner/repo'
args = main_parser.parse_args(["github", "--repo", "owner/repo"])
assert args.command == "github"
assert args.repo == "owner/repo"
args = main_parser.parse_args(['github', '--repo', 'owner/repo', '--non-interactive'])
args = main_parser.parse_args(["github", "--repo", "owner/repo", "--non-interactive"])
assert args.non_interactive is True
def test_package_parser_arguments(self):
"""Test PackageParser has correct arguments."""
main_parser = argparse.ArgumentParser()
subparsers = main_parser.add_subparsers(dest='command')
subparsers = main_parser.add_subparsers(dest="command")
package_parser = PackageParser()
package_parser.create_parser(subparsers)
args = main_parser.parse_args(['package', 'output/test/'])
assert args.command == 'package'
assert args.skill_directory == 'output/test/'
args = main_parser.parse_args(["package", "output/test/"])
assert args.command == "package"
assert args.skill_directory == "output/test/"
args = main_parser.parse_args(['package', 'output/test/', '--target', 'gemini'])
assert args.target == 'gemini'
args = main_parser.parse_args(["package", "output/test/", "--target", "gemini"])
assert args.target == "gemini"
args = main_parser.parse_args(['package', 'output/test/', '--no-open'])
args = main_parser.parse_args(["package", "output/test/", "--no-open"])
assert args.no_open is True
def test_analyze_parser_arguments(self):
"""Test AnalyzeParser has correct arguments."""
main_parser = argparse.ArgumentParser()
subparsers = main_parser.add_subparsers(dest='command')
subparsers = main_parser.add_subparsers(dest="command")
from skill_seekers.cli.parsers.analyze_parser import AnalyzeParser
analyze_parser = AnalyzeParser()
analyze_parser.create_parser(subparsers)
args = main_parser.parse_args(['analyze', '--directory', '.'])
assert args.command == 'analyze'
assert args.directory == '.'
args = main_parser.parse_args(["analyze", "--directory", "."])
assert args.command == "analyze"
assert args.directory == "."
args = main_parser.parse_args(['analyze', '--directory', '.', '--quick'])
args = main_parser.parse_args(["analyze", "--directory", ".", "--quick"])
assert args.quick is True
args = main_parser.parse_args(['analyze', '--directory', '.', '--comprehensive'])
args = main_parser.parse_args(["analyze", "--directory", ".", "--comprehensive"])
assert args.comprehensive is True
args = main_parser.parse_args(['analyze', '--directory', '.', '--skip-patterns'])
args = main_parser.parse_args(["analyze", "--directory", ".", "--skip-patterns"])
assert args.skip_patterns is True
@@ -215,11 +216,25 @@ class TestBackwardCompatibility:
# Original commands from old main.py
original_commands = [
'config', 'scrape', 'github', 'pdf', 'unified',
'enhance', 'enhance-status', 'package', 'upload',
'estimate', 'extract-test-examples', 'install-agent',
'analyze', 'install', 'resume', 'stream',
'update', 'multilang', 'quality'
"config",
"scrape",
"github",
"pdf",
"unified",
"enhance",
"enhance-status",
"package",
"upload",
"estimate",
"extract-test-examples",
"install-agent",
"analyze",
"install",
"resume",
"stream",
"update",
"multilang",
"quality",
]
for cmd in original_commands:

View File

@@ -20,18 +20,21 @@ from skill_seekers.cli.storage import (
# Check if cloud storage dependencies are available
try:
import boto3 # noqa: F401
BOTO3_AVAILABLE = True
except ImportError:
BOTO3_AVAILABLE = False
try:
from google.cloud import storage # noqa: F401
GCS_AVAILABLE = True
except ImportError:
GCS_AVAILABLE = False
try:
from azure.storage.blob import BlobServiceClient # noqa: F401
AZURE_AVAILABLE = True
except ImportError:
AZURE_AVAILABLE = False
@@ -41,12 +44,13 @@ except ImportError:
# Factory Tests
# ========================================
def test_get_storage_adaptor_s3():
"""Test S3 adaptor factory."""
if not BOTO3_AVAILABLE:
pytest.skip("boto3 not installed")
with patch('skill_seekers.cli.storage.s3_storage.boto3'):
adaptor = get_storage_adaptor('s3', bucket='test-bucket')
with patch("skill_seekers.cli.storage.s3_storage.boto3"):
adaptor = get_storage_adaptor("s3", bucket="test-bucket")
assert isinstance(adaptor, S3StorageAdaptor)
@@ -54,8 +58,8 @@ def test_get_storage_adaptor_gcs():
"""Test GCS adaptor factory."""
if not GCS_AVAILABLE:
pytest.skip("google-cloud-storage not installed")
with patch('skill_seekers.cli.storage.gcs_storage.storage'):
adaptor = get_storage_adaptor('gcs', bucket='test-bucket')
with patch("skill_seekers.cli.storage.gcs_storage.storage"):
adaptor = get_storage_adaptor("gcs", bucket="test-bucket")
assert isinstance(adaptor, GCSStorageAdaptor)
@@ -63,11 +67,11 @@ def test_get_storage_adaptor_azure():
"""Test Azure adaptor factory."""
if not AZURE_AVAILABLE:
pytest.skip("azure-storage-blob not installed")
with patch('skill_seekers.cli.storage.azure_storage.BlobServiceClient'):
with patch("skill_seekers.cli.storage.azure_storage.BlobServiceClient"):
adaptor = get_storage_adaptor(
'azure',
container='test-container',
connection_string='DefaultEndpointsProtocol=https;AccountName=test;AccountKey=key'
"azure",
container="test-container",
connection_string="DefaultEndpointsProtocol=https;AccountName=test;AccountKey=key",
)
assert isinstance(adaptor, AzureStorageAdaptor)
@@ -75,36 +79,37 @@ def test_get_storage_adaptor_azure():
def test_get_storage_adaptor_invalid_provider():
"""Test invalid provider raises error."""
with pytest.raises(ValueError, match="Unsupported storage provider"):
get_storage_adaptor('invalid', bucket='test')
get_storage_adaptor("invalid", bucket="test")
# ========================================
# S3 Storage Tests
# ========================================
def test_s3_upload_file():
"""Test S3 file upload."""
if not BOTO3_AVAILABLE:
pytest.skip("boto3 not installed")
with patch('skill_seekers.cli.storage.s3_storage.boto3') as mock_boto3:
with patch("skill_seekers.cli.storage.s3_storage.boto3") as mock_boto3:
# Setup mocks
mock_client = Mock()
mock_boto3.client.return_value = mock_client
mock_boto3.resource.return_value = Mock()
adaptor = S3StorageAdaptor(bucket='test-bucket')
adaptor = S3StorageAdaptor(bucket="test-bucket")
# Create temporary file
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
tmp_file.write(b'test content')
tmp_file.write(b"test content")
tmp_path = tmp_file.name
try:
# Test upload
result = adaptor.upload_file(tmp_path, 'test.txt')
result = adaptor.upload_file(tmp_path, "test.txt")
assert result == 's3://test-bucket/test.txt'
assert result == "s3://test-bucket/test.txt"
mock_client.upload_file.assert_called_once()
finally:
Path(tmp_path).unlink()
@@ -115,23 +120,21 @@ def test_s3_download_file():
if not BOTO3_AVAILABLE:
pytest.skip("boto3 not installed")
with patch('skill_seekers.cli.storage.s3_storage.boto3') as mock_boto3:
with patch("skill_seekers.cli.storage.s3_storage.boto3") as mock_boto3:
# Setup mocks
mock_client = Mock()
mock_boto3.client.return_value = mock_client
mock_boto3.resource.return_value = Mock()
adaptor = S3StorageAdaptor(bucket='test-bucket')
adaptor = S3StorageAdaptor(bucket="test-bucket")
with tempfile.TemporaryDirectory() as tmp_dir:
local_path = os.path.join(tmp_dir, 'downloaded.txt')
local_path = os.path.join(tmp_dir, "downloaded.txt")
# Test download
adaptor.download_file('test.txt', local_path)
adaptor.download_file("test.txt", local_path)
mock_client.download_file.assert_called_once_with(
'test-bucket', 'test.txt', local_path
)
mock_client.download_file.assert_called_once_with("test-bucket", "test.txt", local_path)
def test_s3_list_files():
@@ -139,18 +142,18 @@ def test_s3_list_files():
if not BOTO3_AVAILABLE:
pytest.skip("boto3 not installed")
with patch('skill_seekers.cli.storage.s3_storage.boto3') as mock_boto3:
with patch("skill_seekers.cli.storage.s3_storage.boto3") as mock_boto3:
# Setup mocks
mock_client = Mock()
mock_paginator = Mock()
mock_page_iterator = [
{
'Contents': [
"Contents": [
{
'Key': 'file1.txt',
'Size': 100,
'LastModified': Mock(isoformat=lambda: '2024-01-01T00:00:00'),
'ETag': '"abc123"'
"Key": "file1.txt",
"Size": 100,
"LastModified": Mock(isoformat=lambda: "2024-01-01T00:00:00"),
"ETag": '"abc123"',
}
]
}
@@ -161,15 +164,15 @@ def test_s3_list_files():
mock_boto3.client.return_value = mock_client
mock_boto3.resource.return_value = Mock()
adaptor = S3StorageAdaptor(bucket='test-bucket')
adaptor = S3StorageAdaptor(bucket="test-bucket")
# Test list
files = adaptor.list_files('prefix/')
files = adaptor.list_files("prefix/")
assert len(files) == 1
assert files[0].key == 'file1.txt'
assert files[0].key == "file1.txt"
assert files[0].size == 100
assert files[0].etag == 'abc123'
assert files[0].etag == "abc123"
def test_s3_file_exists():
@@ -177,17 +180,17 @@ def test_s3_file_exists():
if not BOTO3_AVAILABLE:
pytest.skip("boto3 not installed")
with patch('skill_seekers.cli.storage.s3_storage.boto3') as mock_boto3:
with patch("skill_seekers.cli.storage.s3_storage.boto3") as mock_boto3:
# Setup mocks
mock_client = Mock()
mock_client.head_object.return_value = {}
mock_boto3.client.return_value = mock_client
mock_boto3.resource.return_value = Mock()
adaptor = S3StorageAdaptor(bucket='test-bucket')
adaptor = S3StorageAdaptor(bucket="test-bucket")
# Test exists
assert adaptor.file_exists('test.txt') is True
assert adaptor.file_exists("test.txt") is True
def test_s3_get_file_url():
@@ -195,19 +198,19 @@ def test_s3_get_file_url():
if not BOTO3_AVAILABLE:
pytest.skip("boto3 not installed")
with patch('skill_seekers.cli.storage.s3_storage.boto3') as mock_boto3:
with patch("skill_seekers.cli.storage.s3_storage.boto3") as mock_boto3:
# Setup mocks
mock_client = Mock()
mock_client.generate_presigned_url.return_value = 'https://s3.amazonaws.com/signed-url'
mock_client.generate_presigned_url.return_value = "https://s3.amazonaws.com/signed-url"
mock_boto3.client.return_value = mock_client
mock_boto3.resource.return_value = Mock()
adaptor = S3StorageAdaptor(bucket='test-bucket')
adaptor = S3StorageAdaptor(bucket="test-bucket")
# Test URL generation
url = adaptor.get_file_url('test.txt', expires_in=7200)
url = adaptor.get_file_url("test.txt", expires_in=7200)
assert url == 'https://s3.amazonaws.com/signed-url'
assert url == "https://s3.amazonaws.com/signed-url"
mock_client.generate_presigned_url.assert_called_once()
@@ -215,12 +218,13 @@ def test_s3_get_file_url():
# GCS Storage Tests
# ========================================
def test_gcs_upload_file():
"""Test GCS file upload."""
if not GCS_AVAILABLE:
pytest.skip("google-cloud-storage not installed")
with patch('skill_seekers.cli.storage.gcs_storage.storage') as mock_storage:
with patch("skill_seekers.cli.storage.gcs_storage.storage") as mock_storage:
# Setup mocks
mock_client = Mock()
mock_bucket = Mock()
@@ -230,18 +234,18 @@ def test_gcs_upload_file():
mock_bucket.blob.return_value = mock_blob
mock_storage.Client.return_value = mock_client
adaptor = GCSStorageAdaptor(bucket='test-bucket')
adaptor = GCSStorageAdaptor(bucket="test-bucket")
# Create temporary file
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
tmp_file.write(b'test content')
tmp_file.write(b"test content")
tmp_path = tmp_file.name
try:
# Test upload
result = adaptor.upload_file(tmp_path, 'test.txt')
result = adaptor.upload_file(tmp_path, "test.txt")
assert result == 'gs://test-bucket/test.txt'
assert result == "gs://test-bucket/test.txt"
mock_blob.upload_from_filename.assert_called_once()
finally:
Path(tmp_path).unlink()
@@ -252,7 +256,7 @@ def test_gcs_download_file():
if not GCS_AVAILABLE:
pytest.skip("google-cloud-storage not installed")
with patch('skill_seekers.cli.storage.gcs_storage.storage') as mock_storage:
with patch("skill_seekers.cli.storage.gcs_storage.storage") as mock_storage:
# Setup mocks
mock_client = Mock()
mock_bucket = Mock()
@@ -262,13 +266,13 @@ def test_gcs_download_file():
mock_bucket.blob.return_value = mock_blob
mock_storage.Client.return_value = mock_client
adaptor = GCSStorageAdaptor(bucket='test-bucket')
adaptor = GCSStorageAdaptor(bucket="test-bucket")
with tempfile.TemporaryDirectory() as tmp_dir:
local_path = os.path.join(tmp_dir, 'downloaded.txt')
local_path = os.path.join(tmp_dir, "downloaded.txt")
# Test download
adaptor.download_file('test.txt', local_path)
adaptor.download_file("test.txt", local_path)
mock_blob.download_to_filename.assert_called_once()
@@ -278,27 +282,27 @@ def test_gcs_list_files():
if not GCS_AVAILABLE:
pytest.skip("google-cloud-storage not installed")
with patch('skill_seekers.cli.storage.gcs_storage.storage') as mock_storage:
with patch("skill_seekers.cli.storage.gcs_storage.storage") as mock_storage:
# Setup mocks
mock_client = Mock()
mock_blob = Mock()
mock_blob.name = 'file1.txt'
mock_blob.name = "file1.txt"
mock_blob.size = 100
mock_blob.updated = Mock(isoformat=lambda: '2024-01-01T00:00:00')
mock_blob.etag = 'abc123'
mock_blob.updated = Mock(isoformat=lambda: "2024-01-01T00:00:00")
mock_blob.etag = "abc123"
mock_blob.metadata = {}
mock_client.list_blobs.return_value = [mock_blob]
mock_storage.Client.return_value = mock_client
mock_client.bucket.return_value = Mock()
adaptor = GCSStorageAdaptor(bucket='test-bucket')
adaptor = GCSStorageAdaptor(bucket="test-bucket")
# Test list
files = adaptor.list_files('prefix/')
files = adaptor.list_files("prefix/")
assert len(files) == 1
assert files[0].key == 'file1.txt'
assert files[0].key == "file1.txt"
assert files[0].size == 100
@@ -306,12 +310,13 @@ def test_gcs_list_files():
# Azure Storage Tests
# ========================================
def test_azure_upload_file():
"""Test Azure file upload."""
if not AZURE_AVAILABLE:
pytest.skip("azure-storage-blob not installed")
with patch('skill_seekers.cli.storage.azure_storage.BlobServiceClient') as mock_blob_service:
with patch("skill_seekers.cli.storage.azure_storage.BlobServiceClient") as mock_blob_service:
# Setup mocks
mock_service_client = Mock()
mock_container_client = Mock()
@@ -321,19 +326,21 @@ def test_azure_upload_file():
mock_container_client.get_blob_client.return_value = mock_blob_client
mock_blob_service.from_connection_string.return_value = mock_service_client
connection_string = 'DefaultEndpointsProtocol=https;AccountName=test;AccountKey=key'
adaptor = AzureStorageAdaptor(container='test-container', connection_string=connection_string)
connection_string = "DefaultEndpointsProtocol=https;AccountName=test;AccountKey=key"
adaptor = AzureStorageAdaptor(
container="test-container", connection_string=connection_string
)
# Create temporary file
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
tmp_file.write(b'test content')
tmp_file.write(b"test content")
tmp_path = tmp_file.name
try:
# Test upload
result = adaptor.upload_file(tmp_path, 'test.txt')
result = adaptor.upload_file(tmp_path, "test.txt")
assert 'test.blob.core.windows.net' in result
assert "test.blob.core.windows.net" in result
mock_blob_client.upload_blob.assert_called_once()
finally:
Path(tmp_path).unlink()
@@ -344,30 +351,32 @@ def test_azure_download_file():
if not AZURE_AVAILABLE:
pytest.skip("azure-storage-blob not installed")
with patch('skill_seekers.cli.storage.azure_storage.BlobServiceClient') as mock_blob_service:
with patch("skill_seekers.cli.storage.azure_storage.BlobServiceClient") as mock_blob_service:
# Setup mocks
mock_service_client = Mock()
mock_container_client = Mock()
mock_blob_client = Mock()
mock_download_stream = Mock()
mock_download_stream.readall.return_value = b'test content'
mock_download_stream.readall.return_value = b"test content"
mock_service_client.get_container_client.return_value = mock_container_client
mock_container_client.get_blob_client.return_value = mock_blob_client
mock_blob_client.download_blob.return_value = mock_download_stream
mock_blob_service.from_connection_string.return_value = mock_service_client
connection_string = 'DefaultEndpointsProtocol=https;AccountName=test;AccountKey=key'
adaptor = AzureStorageAdaptor(container='test-container', connection_string=connection_string)
connection_string = "DefaultEndpointsProtocol=https;AccountName=test;AccountKey=key"
adaptor = AzureStorageAdaptor(
container="test-container", connection_string=connection_string
)
with tempfile.TemporaryDirectory() as tmp_dir:
local_path = os.path.join(tmp_dir, 'downloaded.txt')
local_path = os.path.join(tmp_dir, "downloaded.txt")
# Test download
adaptor.download_file('test.txt', local_path)
adaptor.download_file("test.txt", local_path)
assert Path(local_path).exists()
assert Path(local_path).read_bytes() == b'test content'
assert Path(local_path).read_bytes() == b"test content"
def test_azure_list_files():
@@ -375,29 +384,31 @@ def test_azure_list_files():
if not AZURE_AVAILABLE:
pytest.skip("azure-storage-blob not installed")
with patch('skill_seekers.cli.storage.azure_storage.BlobServiceClient') as mock_blob_service:
with patch("skill_seekers.cli.storage.azure_storage.BlobServiceClient") as mock_blob_service:
# Setup mocks
mock_service_client = Mock()
mock_container_client = Mock()
mock_blob = Mock()
mock_blob.name = 'file1.txt'
mock_blob.name = "file1.txt"
mock_blob.size = 100
mock_blob.last_modified = Mock(isoformat=lambda: '2024-01-01T00:00:00')
mock_blob.etag = 'abc123'
mock_blob.last_modified = Mock(isoformat=lambda: "2024-01-01T00:00:00")
mock_blob.etag = "abc123"
mock_blob.metadata = {}
mock_container_client.list_blobs.return_value = [mock_blob]
mock_service_client.get_container_client.return_value = mock_container_client
mock_blob_service.from_connection_string.return_value = mock_service_client
connection_string = 'DefaultEndpointsProtocol=https;AccountName=test;AccountKey=key'
adaptor = AzureStorageAdaptor(container='test-container', connection_string=connection_string)
connection_string = "DefaultEndpointsProtocol=https;AccountName=test;AccountKey=key"
adaptor = AzureStorageAdaptor(
container="test-container", connection_string=connection_string
)
# Test list
files = adaptor.list_files('prefix/')
files = adaptor.list_files("prefix/")
assert len(files) == 1
assert files[0].key == 'file1.txt'
assert files[0].key == "file1.txt"
assert files[0].size == 100
@@ -405,53 +416,55 @@ def test_azure_list_files():
# Base Adaptor Tests
# ========================================
def test_storage_object():
"""Test StorageObject dataclass."""
obj = StorageObject(
key='test.txt',
key="test.txt",
size=100,
last_modified='2024-01-01T00:00:00',
etag='abc123',
metadata={'key': 'value'}
last_modified="2024-01-01T00:00:00",
etag="abc123",
metadata={"key": "value"},
)
assert obj.key == 'test.txt'
assert obj.key == "test.txt"
assert obj.size == 100
assert obj.metadata == {'key': 'value'}
assert obj.metadata == {"key": "value"}
def test_base_adaptor_abstract():
"""Test that BaseStorageAdaptor cannot be instantiated."""
with pytest.raises(TypeError):
BaseStorageAdaptor(bucket='test')
BaseStorageAdaptor(bucket="test")
# ========================================
# Integration-style Tests
# ========================================
def test_upload_directory():
"""Test directory upload."""
if not BOTO3_AVAILABLE:
pytest.skip("boto3 not installed")
with patch('skill_seekers.cli.storage.s3_storage.boto3') as mock_boto3:
with patch("skill_seekers.cli.storage.s3_storage.boto3") as mock_boto3:
# Setup mocks
mock_client = Mock()
mock_boto3.client.return_value = mock_client
mock_boto3.resource.return_value = Mock()
adaptor = S3StorageAdaptor(bucket='test-bucket')
adaptor = S3StorageAdaptor(bucket="test-bucket")
# Create temporary directory with files
with tempfile.TemporaryDirectory() as tmp_dir:
(Path(tmp_dir) / 'file1.txt').write_text('content1')
(Path(tmp_dir) / 'file2.txt').write_text('content2')
(Path(tmp_dir) / 'subdir').mkdir()
(Path(tmp_dir) / 'subdir' / 'file3.txt').write_text('content3')
(Path(tmp_dir) / "file1.txt").write_text("content1")
(Path(tmp_dir) / "file2.txt").write_text("content2")
(Path(tmp_dir) / "subdir").mkdir()
(Path(tmp_dir) / "subdir" / "file3.txt").write_text("content3")
# Test upload directory
uploaded_files = adaptor.upload_directory(tmp_dir, 'skills/')
uploaded_files = adaptor.upload_directory(tmp_dir, "skills/")
assert len(uploaded_files) == 3
assert mock_client.upload_file.call_count == 3
@@ -462,25 +475,25 @@ def test_download_directory():
if not BOTO3_AVAILABLE:
pytest.skip("boto3 not installed")
with patch('skill_seekers.cli.storage.s3_storage.boto3') as mock_boto3:
with patch("skill_seekers.cli.storage.s3_storage.boto3") as mock_boto3:
# Setup mocks
mock_client = Mock()
mock_paginator = Mock()
mock_page_iterator = [
{
'Contents': [
"Contents": [
{
'Key': 'skills/file1.txt',
'Size': 100,
'LastModified': Mock(isoformat=lambda: '2024-01-01T00:00:00'),
'ETag': '"abc"'
"Key": "skills/file1.txt",
"Size": 100,
"LastModified": Mock(isoformat=lambda: "2024-01-01T00:00:00"),
"ETag": '"abc"',
},
{
'Key': 'skills/file2.txt',
'Size': 200,
'LastModified': Mock(isoformat=lambda: '2024-01-01T00:00:00'),
'ETag': '"def"'
}
"Key": "skills/file2.txt",
"Size": 200,
"LastModified": Mock(isoformat=lambda: "2024-01-01T00:00:00"),
"ETag": '"def"',
},
]
}
]
@@ -490,11 +503,11 @@ def test_download_directory():
mock_boto3.client.return_value = mock_client
mock_boto3.resource.return_value = Mock()
adaptor = S3StorageAdaptor(bucket='test-bucket')
adaptor = S3StorageAdaptor(bucket="test-bucket")
with tempfile.TemporaryDirectory() as tmp_dir:
# Test download directory
downloaded_files = adaptor.download_directory('skills/', tmp_dir)
downloaded_files = adaptor.download_directory("skills/", tmp_dir)
assert len(downloaded_files) == 2
assert mock_client.download_file.call_count == 2

View File

@@ -23,6 +23,7 @@ from skill_seekers.embedding.cache import EmbeddingCache
# Cache Tests
# ========================================
def test_cache_init():
"""Test cache initialization."""
cache = EmbeddingCache(":memory:")
@@ -121,6 +122,7 @@ def test_cache_context_manager():
# Generator Tests
# ========================================
def test_generator_init():
"""Test generator initialization."""
generator = EmbeddingGenerator()
@@ -174,7 +176,7 @@ def test_generator_compute_hash():
assert hash1 != hash4
@patch('skill_seekers.embedding.generator.SENTENCE_TRANSFORMERS_AVAILABLE', False)
@patch("skill_seekers.embedding.generator.SENTENCE_TRANSFORMERS_AVAILABLE", False)
def test_generator_sentence_transformers_not_available():
"""Test sentence-transformers not available."""
generator = EmbeddingGenerator()
@@ -183,7 +185,7 @@ def test_generator_sentence_transformers_not_available():
generator.generate("test", model="all-MiniLM-L6-v2")
@patch('skill_seekers.embedding.generator.OPENAI_AVAILABLE', False)
@patch("skill_seekers.embedding.generator.OPENAI_AVAILABLE", False)
def test_generator_openai_not_available():
"""Test OpenAI not available."""
generator = EmbeddingGenerator()
@@ -192,7 +194,7 @@ def test_generator_openai_not_available():
generator.generate("test", model="text-embedding-3-small")
@patch('skill_seekers.embedding.generator.VOYAGE_AVAILABLE', False)
@patch("skill_seekers.embedding.generator.VOYAGE_AVAILABLE", False)
def test_generator_voyage_not_available():
"""Test Voyage AI not available."""
generator = EmbeddingGenerator()
@@ -227,13 +229,10 @@ def test_generator_voyage_large_2_model_info():
# Model Tests
# ========================================
def test_embedding_request():
"""Test EmbeddingRequest model."""
request = EmbeddingRequest(
text="Hello world",
model="text-embedding-3-small",
normalize=True
)
request = EmbeddingRequest(text="Hello world", model="text-embedding-3-small", normalize=True)
assert request.text == "Hello world"
assert request.model == "text-embedding-3-small"
@@ -243,9 +242,7 @@ def test_embedding_request():
def test_batch_embedding_request():
"""Test BatchEmbeddingRequest model."""
request = BatchEmbeddingRequest(
texts=["text1", "text2", "text3"],
model="text-embedding-3-small",
batch_size=32
texts=["text1", "text2", "text3"], model="text-embedding-3-small", batch_size=32
)
assert len(request.texts) == 3
@@ -255,10 +252,7 @@ def test_batch_embedding_request():
def test_embedding_response():
"""Test EmbeddingResponse model."""
response = EmbeddingResponse(
embedding=[0.1, 0.2, 0.3],
model="test-model",
dimensions=3,
cached=False
embedding=[0.1, 0.2, 0.3], model="test-model", dimensions=3, cached=False
)
assert len(response.embedding) == 3
@@ -273,7 +267,7 @@ def test_batch_embedding_response():
model="test-model",
dimensions=2,
count=2,
cached_count=1
cached_count=1,
)
assert len(response.embeddings) == 2
@@ -288,7 +282,7 @@ def test_health_response():
version="1.0.0",
models=["model1", "model2"],
cache_enabled=True,
cache_size=100
cache_size=100,
)
assert response.status == "ok"
@@ -303,7 +297,7 @@ def test_model_info():
provider="openai",
dimensions=1536,
max_tokens=8191,
cost_per_million=0.02
cost_per_million=0.02,
)
assert info.name == "test-model"
@@ -315,6 +309,7 @@ def test_model_info():
# Integration Tests
# ========================================
def test_cache_batch_operations():
"""Test cache batch operations."""
cache = EmbeddingCache(":memory:")

View File

@@ -23,7 +23,7 @@ from skill_seekers.cli.embedding_pipeline import (
EmbeddingPipeline,
LocalEmbeddingProvider,
EmbeddingCache,
CostTracker
CostTracker,
)
@@ -112,21 +112,16 @@ def test_cost_tracker():
stats = tracker.get_stats()
assert stats['total_requests'] == 2
assert stats['total_tokens'] == 1500
assert stats['cache_hits'] == 1
assert stats['cache_misses'] == 1
assert '50.0%' in stats['cache_rate']
assert stats["total_requests"] == 2
assert stats["total_tokens"] == 1500
assert stats["cache_hits"] == 1
assert stats["cache_misses"] == 1
assert "50.0%" in stats["cache_rate"]
def test_pipeline_initialization():
"""Test pipeline initialization."""
config = EmbeddingConfig(
provider='local',
model='test-model',
dimension=128,
batch_size=10
)
config = EmbeddingConfig(provider="local", model="test-model", dimension=128, batch_size=10)
pipeline = EmbeddingPipeline(config)
@@ -137,12 +132,7 @@ def test_pipeline_initialization():
def test_pipeline_generate_batch():
"""Test batch embedding generation."""
config = EmbeddingConfig(
provider='local',
model='test-model',
dimension=64,
batch_size=2
)
config = EmbeddingConfig(provider="local", model="test-model", dimension=64, batch_size=2)
pipeline = EmbeddingPipeline(config)
@@ -159,11 +149,11 @@ def test_pipeline_caching():
"""Test pipeline uses caching."""
with tempfile.TemporaryDirectory() as tmpdir:
config = EmbeddingConfig(
provider='local',
model='test-model',
provider="local",
model="test-model",
dimension=32,
batch_size=10,
cache_dir=Path(tmpdir)
cache_dir=Path(tmpdir),
)
pipeline = EmbeddingPipeline(config)
@@ -184,10 +174,10 @@ def test_pipeline_caching():
def test_pipeline_batch_processing():
"""Test large batch is processed in chunks."""
config = EmbeddingConfig(
provider='local',
model='test-model',
provider="local",
model="test-model",
dimension=16,
batch_size=3 # Small batch size
batch_size=3, # Small batch size
)
pipeline = EmbeddingPipeline(config)
@@ -201,11 +191,7 @@ def test_pipeline_batch_processing():
def test_validate_dimensions_valid():
"""Test dimension validation with valid embeddings."""
config = EmbeddingConfig(
provider='local',
model='test-model',
dimension=128
)
config = EmbeddingConfig(provider="local", model="test-model", dimension=128)
pipeline = EmbeddingPipeline(config)
@@ -217,11 +203,7 @@ def test_validate_dimensions_valid():
def test_validate_dimensions_invalid():
"""Test dimension validation with invalid embeddings."""
config = EmbeddingConfig(
provider='local',
model='test-model',
dimension=128
)
config = EmbeddingConfig(provider="local", model="test-model", dimension=128)
pipeline = EmbeddingPipeline(config)
@@ -234,30 +216,22 @@ def test_validate_dimensions_invalid():
def test_embedding_result_metadata():
"""Test embedding result includes metadata."""
config = EmbeddingConfig(
provider='local',
model='test-model',
dimension=256
)
config = EmbeddingConfig(provider="local", model="test-model", dimension=256)
pipeline = EmbeddingPipeline(config)
texts = ["test"]
result = pipeline.generate_batch(texts, show_progress=False)
assert 'provider' in result.metadata
assert 'model' in result.metadata
assert 'dimension' in result.metadata
assert result.metadata['dimension'] == 256
assert "provider" in result.metadata
assert "model" in result.metadata
assert "dimension" in result.metadata
assert result.metadata["dimension"] == 256
def test_cost_stats():
"""Test cost statistics tracking."""
config = EmbeddingConfig(
provider='local',
model='test-model',
dimension=64
)
config = EmbeddingConfig(provider="local", model="test-model", dimension=64)
pipeline = EmbeddingPipeline(config)
@@ -266,18 +240,14 @@ def test_cost_stats():
stats = pipeline.get_cost_stats()
assert 'total_requests' in stats
assert 'cache_hits' in stats
assert 'estimated_cost' in stats
assert "total_requests" in stats
assert "cache_hits" in stats
assert "estimated_cost" in stats
def test_empty_batch():
"""Test handling empty batch."""
config = EmbeddingConfig(
provider='local',
model='test-model',
dimension=32
)
config = EmbeddingConfig(provider="local", model="test-model", dimension=32)
pipeline = EmbeddingPipeline(config)
@@ -289,11 +259,7 @@ def test_empty_batch():
def test_single_document():
"""Test single document generation."""
config = EmbeddingConfig(
provider='local',
model='test-model',
dimension=128
)
config = EmbeddingConfig(provider="local", model="test-model", dimension=128)
pipeline = EmbeddingPipeline(config)
@@ -306,11 +272,7 @@ def test_single_document():
def test_different_dimensions():
"""Test different embedding dimensions."""
for dim in [64, 128, 256, 512]:
config = EmbeddingConfig(
provider='local',
model='test-model',
dimension=dim
)
config = EmbeddingConfig(provider="local", model="test-model", dimension=dim)
pipeline = EmbeddingPipeline(config)
result = pipeline.generate_batch(["test"], show_progress=False)

View File

@@ -152,9 +152,7 @@ class TestMultiAgentSupport:
def test_rejects_missing_executable(self, tmp_path, monkeypatch):
"""Test rejection when executable is not found on PATH."""
monkeypatch.setattr(
"skill_seekers.cli.enhance_skill_local.shutil.which", lambda _exe: None
)
monkeypatch.setattr("skill_seekers.cli.enhance_skill_local.shutil.which", lambda _exe: None)
skill_dir = _make_skill_dir(tmp_path)
with pytest.raises(ValueError, match="not found in PATH"):

View File

@@ -80,8 +80,9 @@ class TestFrameworkDetection(unittest.TestCase):
arch_data = json.load(f)
self.assertIn("frameworks_detected", arch_data)
self.assertIn("Flask", arch_data["frameworks_detected"],
"Flask should be detected from imports")
self.assertIn(
"Flask", arch_data["frameworks_detected"], "Flask should be detected from imports"
)
def test_files_with_imports_are_included(self):
"""Test that files with only imports are included in analysis (Issue #239)."""
@@ -119,24 +120,19 @@ class TestFrameworkDetection(unittest.TestCase):
analysis_data = json.load(f)
# File should be included
self.assertGreater(len(analysis_data["files"]), 0,
"Files with imports should be included")
self.assertGreater(len(analysis_data["files"]), 0, "Files with imports should be included")
# Find our import-only file
import_file = next(
(f for f in analysis_data["files"] if "imports_only.py" in f["file"]),
None
(f for f in analysis_data["files"] if "imports_only.py" in f["file"]), None
)
self.assertIsNotNone(import_file, "Import-only file should be in analysis")
# Verify imports were extracted
self.assertIn("imports", import_file, "Imports should be extracted")
self.assertGreater(len(import_file["imports"]), 0,
"Should have captured imports")
self.assertIn("django", import_file["imports"],
"Django import should be captured")
self.assertIn("flask", import_file["imports"],
"Flask import should be captured")
self.assertGreater(len(import_file["imports"]), 0, "Should have captured imports")
self.assertIn("django", import_file["imports"], "Django import should be captured")
self.assertIn("flask", import_file["imports"], "Flask import should be captured")
def test_no_false_positive_frameworks(self):
"""Test that framework detection doesn't produce false positives (Issue #239)."""
@@ -145,10 +141,7 @@ class TestFrameworkDetection(unittest.TestCase):
app_dir.mkdir()
# File with no framework imports
(app_dir / "utils.py").write_text(
"def my_function():\n"
" return 'hello'\n"
)
(app_dir / "utils.py").write_text("def my_function():\n return 'hello'\n")
# Run codebase analyzer
from skill_seekers.cli.codebase_scraper import main as scraper_main
@@ -180,12 +173,10 @@ class TestFrameworkDetection(unittest.TestCase):
frameworks = arch_data.get("frameworks_detected", [])
# Should not detect Flask just from "app" directory name
self.assertNotIn("Flask", frameworks,
"Should not detect Flask without imports")
self.assertNotIn("Flask", frameworks, "Should not detect Flask without imports")
# Should not detect other frameworks with "app" in markers
for fw in ["ASP.NET", "Rails", "Laravel"]:
self.assertNotIn(fw, frameworks,
f"Should not detect {fw} without real evidence")
self.assertNotIn(fw, frameworks, f"Should not detect {fw} without real evidence")
if __name__ == "__main__":

View File

@@ -20,9 +20,7 @@ import time
# Add src to path
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from skill_seekers.cli.incremental_updater import (
IncrementalUpdater
)
from skill_seekers.cli.incremental_updater import IncrementalUpdater
@pytest.fixture
@@ -281,15 +279,15 @@ def test_apply_update_package(temp_skill_dir):
"timestamp": "2026-02-05T12:00:00",
"skill_name": "test_skill",
"change_summary": {"modified": 1},
"total_changes": 1
"total_changes": 1,
},
"changes": {
"SKILL.md": {
"action": "modify",
"version": 2,
"content": "# Updated Content\n\nApplied from package"
"content": "# Updated Content\n\nApplied from package",
}
}
},
}
package_path.write_text(json.dumps(update_data))
@@ -298,7 +296,9 @@ def test_apply_update_package(temp_skill_dir):
success = updater.apply_update_package(package_path)
assert success
assert (temp_skill_dir / "SKILL.md").read_text() == "# Updated Content\n\nApplied from package"
assert (
temp_skill_dir / "SKILL.md"
).read_text() == "# Updated Content\n\nApplied from package"
def test_content_hash_consistency(temp_skill_dir):

View File

@@ -92,7 +92,11 @@ class TestConfigLoading(unittest.TestCase):
{
"type": "documentation",
"base_url": "https://example.com/",
"selectors": {"main_content": "article", "title": "h1", "code_blocks": "pre code"},
"selectors": {
"main_content": "article",
"title": "h1",
"code_blocks": "pre code",
},
"rate_limit": 0.5,
"max_pages": 100,
}

View File

@@ -113,6 +113,7 @@ def check_service_available(url: str, timeout: int = 5) -> bool:
"""Check if a service is available."""
try:
import requests
response = requests.get(url, timeout=timeout)
return response.status_code == 200
except Exception:
@@ -133,7 +134,9 @@ class TestWeaviateIntegration:
# Check if Weaviate is running
if not check_service_available("http://localhost:8080/v1/.well-known/ready"):
pytest.skip("Weaviate not running (start with: docker-compose -f tests/docker-compose.test.yml up -d)")
pytest.skip(
"Weaviate not running (start with: docker-compose -f tests/docker-compose.test.yml up -d)"
)
# Connect to Weaviate
try:
@@ -144,10 +147,7 @@ class TestWeaviateIntegration:
# Package skill
adaptor = get_adaptor("weaviate")
SkillMetadata(
name="integration_test",
description="Integration test skill for Weaviate"
)
SkillMetadata(name="integration_test", description="Integration test skill for Weaviate")
package_path = adaptor.package(sample_skill_dir, tmp_path)
assert package_path.exists(), "Package not created"
@@ -173,19 +173,16 @@ class TestWeaviateIntegration:
with client.batch as batch:
for obj in data["objects"]:
batch.add_data_object(
data_object=obj["properties"],
class_name=class_name,
uuid=obj["id"]
data_object=obj["properties"], class_name=class_name, uuid=obj["id"]
)
# Wait for indexing
time.sleep(1)
# Query - Get all objects
result = client.query.get(
class_name,
["content", "source", "category"]
).with_limit(10).do()
result = (
client.query.get(class_name, ["content", "source", "category"]).with_limit(10).do()
)
# Verify results
assert "data" in result, "Query returned no data"
@@ -203,8 +200,9 @@ class TestWeaviateIntegration:
# Verify content
contents = [obj["content"] for obj in objects]
assert any("vector" in content.lower() for content in contents), \
assert any("vector" in content.lower() for content in contents), (
"Expected content not found"
)
finally:
# Cleanup - Delete collection
@@ -234,7 +232,7 @@ class TestWeaviateIntegration:
description="Test metadata preservation",
version="2.0.0",
author="Integration Test Suite",
tags=["test", "integration", "weaviate"]
tags=["test", "integration", "weaviate"],
)
package_path = adaptor.package(sample_skill_dir, tmp_path)
@@ -249,18 +247,17 @@ class TestWeaviateIntegration:
with client.batch as batch:
for obj in data["objects"]:
batch.add_data_object(
data_object=obj["properties"],
class_name=class_name,
uuid=obj["id"]
data_object=obj["properties"], class_name=class_name, uuid=obj["id"]
)
time.sleep(1)
# Query and verify metadata
result = client.query.get(
class_name,
["source", "version", "author", "tags"]
).with_limit(1).do()
result = (
client.query.get(class_name, ["source", "version", "author", "tags"])
.with_limit(1)
.do()
)
obj = result["data"]["Get"][class_name][0]
assert obj["source"] == "metadata_test", "Source not preserved"
@@ -287,7 +284,9 @@ class TestChromaIntegration:
# Check if Chroma is running
if not check_service_available("http://localhost:8000/api/v1/heartbeat"):
pytest.skip("ChromaDB not running (start with: docker-compose -f tests/docker-compose.test.yml up -d)")
pytest.skip(
"ChromaDB not running (start with: docker-compose -f tests/docker-compose.test.yml up -d)"
)
# Connect to ChromaDB
try:
@@ -299,8 +298,7 @@ class TestChromaIntegration:
# Package skill
adaptor = get_adaptor("chroma")
SkillMetadata(
name="chroma_integration_test",
description="Integration test skill for ChromaDB"
name="chroma_integration_test", description="Integration test skill for ChromaDB"
)
package_path = adaptor.package(sample_skill_dir, tmp_path)
@@ -326,9 +324,7 @@ class TestChromaIntegration:
# Add documents
collection.add(
documents=data["documents"],
metadatas=data["metadatas"],
ids=data["ids"]
documents=data["documents"], metadatas=data["metadatas"], ids=data["ids"]
)
# Wait for indexing
@@ -340,8 +336,7 @@ class TestChromaIntegration:
# Verify results
assert "documents" in results, "Query returned no documents"
assert len(results["documents"]) > 0, "No documents returned"
assert len(results["documents"]) == len(data["documents"]), \
"Document count mismatch"
assert len(results["documents"]) == len(data["documents"]), "Document count mismatch"
# Verify metadata
assert "metadatas" in results, "Query returned no metadatas"
@@ -350,8 +345,9 @@ class TestChromaIntegration:
assert "category" in first_metadata, "Missing category in metadata"
# Verify content
assert any("vector" in doc.lower() for doc in results["documents"]), \
assert any("vector" in doc.lower() for doc in results["documents"]), (
"Expected content not found"
)
finally:
# Cleanup - Delete collection
@@ -377,8 +373,7 @@ class TestChromaIntegration:
# Package and upload
adaptor = get_adaptor("chroma")
metadata = SkillMetadata(
name="chroma_filter_test",
description="Test filtering capabilities"
name="chroma_filter_test", description="Test filtering capabilities"
)
package_path = adaptor.package(sample_skill_dir, tmp_path)
@@ -390,23 +385,18 @@ class TestChromaIntegration:
try:
collection = client.get_or_create_collection(name=collection_name)
collection.add(
documents=data["documents"],
metadatas=data["metadatas"],
ids=data["ids"]
documents=data["documents"], metadatas=data["metadatas"], ids=data["ids"]
)
time.sleep(1)
# Query with category filter
results = collection.get(
where={"category": "getting started"}
)
results = collection.get(where={"category": "getting started"})
# Verify filtering worked
assert len(results["documents"]) > 0, "No documents matched filter"
for metadata in results["metadatas"]:
assert metadata["category"] == "getting started", \
"Filter returned wrong category"
assert metadata["category"] == "getting started", "Filter returned wrong category"
finally:
with contextlib.suppress(Exception):
@@ -428,7 +418,9 @@ class TestQdrantIntegration:
# Check if Qdrant is running
if not check_service_available("http://localhost:6333/"):
pytest.skip("Qdrant not running (start with: docker-compose -f tests/docker-compose.test.yml up -d)")
pytest.skip(
"Qdrant not running (start with: docker-compose -f tests/docker-compose.test.yml up -d)"
)
# Connect to Qdrant
try:
@@ -440,8 +432,7 @@ class TestQdrantIntegration:
# Package skill
adaptor = get_adaptor("qdrant")
SkillMetadata(
name="qdrant_integration_test",
description="Integration test skill for Qdrant"
name="qdrant_integration_test", description="Integration test skill for Qdrant"
)
package_path = adaptor.package(sample_skill_dir, tmp_path)
@@ -465,25 +456,21 @@ class TestQdrantIntegration:
# Create collection
client.create_collection(
collection_name=collection_name,
vectors_config=VectorParams(
size=vector_size,
distance=Distance.COSINE
)
vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE),
)
# Upload points (with placeholder vectors for testing)
points = []
for point in data["points"]:
points.append(PointStruct(
id=point["id"],
vector=[0.0] * vector_size, # Placeholder vectors
payload=point["payload"]
))
points.append(
PointStruct(
id=point["id"],
vector=[0.0] * vector_size, # Placeholder vectors
payload=point["payload"],
)
)
client.upsert(
collection_name=collection_name,
points=points
)
client.upsert(collection_name=collection_name, points=points)
# Wait for indexing
time.sleep(1)
@@ -493,14 +480,10 @@ class TestQdrantIntegration:
# Verify collection
assert collection_info.points_count > 0, "No points in collection"
assert collection_info.points_count == len(data["points"]), \
"Point count mismatch"
assert collection_info.points_count == len(data["points"]), "Point count mismatch"
# Query - Scroll through points
scroll_result = client.scroll(
collection_name=collection_name,
limit=10
)
scroll_result = client.scroll(collection_name=collection_name, limit=10)
points_list = scroll_result[0]
assert len(points_list) > 0, "No points returned"
@@ -514,8 +497,9 @@ class TestQdrantIntegration:
# Verify content
contents = [p.payload["content"] for p in points_list]
assert any("vector" in content.lower() for content in contents), \
assert any("vector" in content.lower() for content in contents), (
"Expected content not found"
)
finally:
# Cleanup - Delete collection
@@ -527,8 +511,12 @@ class TestQdrantIntegration:
try:
from qdrant_client import QdrantClient
from qdrant_client.models import (
Distance, VectorParams, PointStruct,
Filter, FieldCondition, MatchValue
Distance,
VectorParams,
PointStruct,
Filter,
FieldCondition,
MatchValue,
)
except ImportError:
pytest.skip("qdrant-client not installed")
@@ -544,10 +532,7 @@ class TestQdrantIntegration:
# Package and upload
adaptor = get_adaptor("qdrant")
SkillMetadata(
name="qdrant_filter_test",
description="Test filtering capabilities"
)
SkillMetadata(name="qdrant_filter_test", description="Test filtering capabilities")
package_path = adaptor.package(sample_skill_dir, tmp_path)
with open(package_path) as f:
@@ -560,19 +545,16 @@ class TestQdrantIntegration:
# Create and upload
client.create_collection(
collection_name=collection_name,
vectors_config=VectorParams(
size=vector_size,
distance=Distance.COSINE
)
vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE),
)
points = []
for point in data["points"]:
points.append(PointStruct(
id=point["id"],
vector=[0.0] * vector_size,
payload=point["payload"]
))
points.append(
PointStruct(
id=point["id"], vector=[0.0] * vector_size, payload=point["payload"]
)
)
client.upsert(collection_name=collection_name, points=points)
time.sleep(1)
@@ -581,14 +563,9 @@ class TestQdrantIntegration:
scroll_result = client.scroll(
collection_name=collection_name,
scroll_filter=Filter(
must=[
FieldCondition(
key="type",
match=MatchValue(value="reference")
)
]
must=[FieldCondition(key="type", match=MatchValue(value="reference"))]
),
limit=10
limit=10,
)
points_list = scroll_result[0]
@@ -596,8 +573,7 @@ class TestQdrantIntegration:
# Verify filtering worked
assert len(points_list) > 0, "No points matched filter"
for point in points_list:
assert point.payload["type"] == "reference", \
"Filter returned wrong type"
assert point.payload["type"] == "reference", "Filter returned wrong type"
finally:
with contextlib.suppress(Exception):
@@ -607,4 +583,5 @@ class TestQdrantIntegration:
if __name__ == "__main__":
# Run integration tests
import sys
sys.exit(pytest.main([__file__, "-v", "-m", "integration"]))

View File

@@ -192,9 +192,7 @@ https://mikro-orm.io/docs/defining-entities#formulas
# Verify converted URLs are valid
# In real scenario, these would be added to pending_urls and scraped
self.assertTrue(
len(converted_urls) > 0, "Should generate at least one URL to scrape"
)
self.assertTrue(len(converted_urls) > 0, "Should generate at least one URL to scrape")
# Verify no URLs would cause 404 (no anchors in middle of path)
for url in converted_urls:

View File

@@ -464,13 +464,15 @@ class TestValidateConfigTool(unittest.IsolatedAsyncioTestCase):
valid_config = {
"name": "valid-test",
"description": "Test configuration",
"sources": [{
"type": "documentation",
"base_url": "https://example.com/",
"selectors": {"main_content": "article", "title": "h1", "code_blocks": "pre"},
"rate_limit": 0.5,
"max_pages": 100,
}],
"sources": [
{
"type": "documentation",
"base_url": "https://example.com/",
"selectors": {"main_content": "article", "title": "h1", "code_blocks": "pre"},
"rate_limit": 0.5,
"max_pages": 100,
}
],
}
with open(config_path, "w") as f:
json.dump(valid_config, f)

View File

@@ -19,10 +19,7 @@ import json
# Add src to path
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from skill_seekers.cli.multilang_support import (
LanguageDetector,
MultiLanguageManager
)
from skill_seekers.cli.multilang_support import LanguageDetector, MultiLanguageManager
def test_detect_english():
@@ -32,8 +29,8 @@ def test_detect_english():
text = "This is an English document. It contains common English words."
lang_info = detector.detect(text)
assert lang_info.code == 'en'
assert lang_info.name == 'English'
assert lang_info.code == "en"
assert lang_info.name == "English"
assert lang_info.confidence > 0.0
@@ -44,8 +41,8 @@ def test_detect_spanish():
text = "Este es un documento en español. Contiene palabras comunes en español."
lang_info = detector.detect(text)
assert lang_info.code == 'es'
assert lang_info.name == 'Spanish'
assert lang_info.code == "es"
assert lang_info.name == "Spanish"
def test_detect_french():
@@ -55,8 +52,8 @@ def test_detect_french():
text = "Ceci est un document en français. Il contient des mots français communs."
lang_info = detector.detect(text)
assert lang_info.code == 'fr'
assert lang_info.name == 'French'
assert lang_info.code == "fr"
assert lang_info.name == "French"
def test_detect_german():
@@ -66,8 +63,8 @@ def test_detect_german():
text = "Dies ist ein deutsches Dokument. Es enthält übliche deutsche Wörter."
lang_info = detector.detect(text)
assert lang_info.code == 'de'
assert lang_info.name == 'German'
assert lang_info.code == "de"
assert lang_info.name == "German"
def test_detect_chinese():
@@ -77,33 +74,33 @@ def test_detect_chinese():
text = "这是一个中文文档。它包含常见的中文字符。"
lang_info = detector.detect(text)
assert lang_info.code == 'zh'
assert lang_info.name == 'Chinese'
assert lang_info.code == "zh"
assert lang_info.name == "Chinese"
def test_detect_from_filename_dot_pattern():
"""Test language detection from filename (file.en.md pattern)."""
detector = LanguageDetector()
assert detector.detect_from_filename("README.en.md") == 'en'
assert detector.detect_from_filename("guide.es.md") == 'es'
assert detector.detect_from_filename("doc.fr.md") == 'fr'
assert detector.detect_from_filename("README.en.md") == "en"
assert detector.detect_from_filename("guide.es.md") == "es"
assert detector.detect_from_filename("doc.fr.md") == "fr"
def test_detect_from_filename_underscore_pattern():
"""Test language detection from filename (file_en.md pattern)."""
detector = LanguageDetector()
assert detector.detect_from_filename("README_en.md") == 'en'
assert detector.detect_from_filename("guide_es.md") == 'es'
assert detector.detect_from_filename("README_en.md") == "en"
assert detector.detect_from_filename("guide_es.md") == "es"
def test_detect_from_filename_dash_pattern():
"""Test language detection from filename (file-en.md pattern)."""
detector = LanguageDetector()
assert detector.detect_from_filename("README-en.md") == 'en'
assert detector.detect_from_filename("guide-es.md") == 'es'
assert detector.detect_from_filename("README-en.md") == "en"
assert detector.detect_from_filename("guide-es.md") == "es"
def test_detect_from_filename_no_match():
@@ -118,15 +115,11 @@ def test_add_document_single_language():
"""Test adding documents in single language."""
manager = MultiLanguageManager()
manager.add_document(
"README.md",
"This is an English document.",
{"category": "overview"}
)
manager.add_document("README.md", "This is an English document.", {"category": "overview"})
assert len(manager.get_languages()) == 1
assert 'en' in manager.get_languages()
assert manager.get_document_count('en') == 1
assert "en" in manager.get_languages()
assert manager.get_document_count("en") == 1
def test_add_document_multiple_languages():
@@ -138,9 +131,9 @@ def test_add_document_multiple_languages():
manager.add_document("README.fr.md", "Ceci est français.", {})
assert len(manager.get_languages()) == 3
assert 'en' in manager.get_languages()
assert 'es' in manager.get_languages()
assert 'fr' in manager.get_languages()
assert "en" in manager.get_languages()
assert "es" in manager.get_languages()
assert "fr" in manager.get_languages()
def test_force_language():
@@ -148,15 +141,10 @@ def test_force_language():
manager = MultiLanguageManager()
# Force Spanish despite English content
manager.add_document(
"file.md",
"This is actually English content.",
{},
force_language='es'
)
manager.add_document("file.md", "This is actually English content.", {}, force_language="es")
assert 'es' in manager.get_languages()
assert manager.get_document_count('es') == 1
assert "es" in manager.get_languages()
assert manager.get_document_count("es") == 1
def test_filename_language_priority():
@@ -164,14 +152,10 @@ def test_filename_language_priority():
manager = MultiLanguageManager()
# Filename says Spanish, but content is English
manager.add_document(
"guide.es.md",
"This is English content.",
{}
)
manager.add_document("guide.es.md", "This is English content.", {})
# Should use filename language
assert 'es' in manager.get_languages()
assert "es" in manager.get_languages()
def test_document_count_all():
@@ -183,8 +167,8 @@ def test_document_count_all():
manager.add_document("file3.es.md", "Spanish doc", {})
assert manager.get_document_count() == 3
assert manager.get_document_count('en') == 2
assert manager.get_document_count('es') == 1
assert manager.get_document_count("en") == 2
assert manager.get_document_count("es") == 1
def test_primary_language():
@@ -195,7 +179,7 @@ def test_primary_language():
manager.add_document("file2.es.md", "Spanish doc", {})
# Primary should be first added
assert manager.primary_language == 'en'
assert manager.primary_language == "en"
def test_translation_status():
@@ -208,9 +192,9 @@ def test_translation_status():
status = manager.get_translation_status()
assert status.source_language == 'en'
assert 'es' in status.translated_languages
assert 'fr' in status.translated_languages
assert status.source_language == "en"
assert "es" in status.translated_languages
assert "fr" in status.translated_languages
assert len(status.translated_languages) == 2
@@ -225,17 +209,17 @@ def test_export_by_language():
exports = manager.export_by_language(Path(tmpdir))
assert len(exports) == 2
assert 'en' in exports
assert 'es' in exports
assert "en" in exports
assert "es" in exports
# Check files exist
assert exports['en'].exists()
assert exports['es'].exists()
assert exports["en"].exists()
assert exports["es"].exists()
# Check content
en_data = json.loads(exports['en'].read_text())
assert en_data['language'] == 'en'
assert en_data['document_count'] == 1
en_data = json.loads(exports["en"].read_text())
assert en_data["language"] == "en"
assert en_data["document_count"] == 1
def test_translation_report_generation():
@@ -268,11 +252,11 @@ def test_script_detection():
# English uses Latin script
en_info = detector.detect("This is English")
assert en_info.script == 'Latin'
assert en_info.script == "Latin"
# Chinese uses Han script
zh_info = detector.detect("这是中文")
assert zh_info.script == 'Han'
assert zh_info.script == "Han"
def test_confidence_scoring():
@@ -283,7 +267,7 @@ def test_confidence_scoring():
strong_en = "The quick brown fox jumps over the lazy dog. This is clearly English."
lang_info = detector.detect(strong_en)
assert lang_info.code == 'en'
assert lang_info.code == "en"
assert lang_info.confidence > 0.3 # Should have decent confidence
@@ -294,9 +278,9 @@ def test_metadata_preservation():
metadata = {"category": "guide", "version": "1.0"}
manager.add_document("file.md", "English content", metadata)
docs = manager.documents['en']
docs = manager.documents["en"]
assert len(docs) == 1
assert docs[0]['metadata'] == metadata
assert docs[0]["metadata"] == metadata
if __name__ == "__main__":

View File

@@ -14,9 +14,9 @@ class TestPresetDefinitions:
def test_all_presets_defined(self):
"""Test that all expected presets are defined."""
assert 'quick' in PRESETS
assert 'standard' in PRESETS
assert 'comprehensive' in PRESETS
assert "quick" in PRESETS
assert "standard" in PRESETS
assert "comprehensive" in PRESETS
assert len(PRESETS) == 3
def test_preset_structure(self):
@@ -25,7 +25,7 @@ class TestPresetDefinitions:
assert isinstance(preset, AnalysisPreset)
assert preset.name
assert preset.description
assert preset.depth in ['surface', 'deep', 'full']
assert preset.depth in ["surface", "deep", "full"]
assert isinstance(preset.features, dict)
assert 0 <= preset.enhance_level <= 3
assert preset.estimated_time
@@ -33,45 +33,45 @@ class TestPresetDefinitions:
def test_quick_preset(self):
"""Test quick preset configuration."""
quick = PRESETS['quick']
assert quick.name == 'Quick'
assert quick.depth == 'surface'
quick = PRESETS["quick"]
assert quick.name == "Quick"
assert quick.depth == "surface"
assert quick.enhance_level == 0
assert quick.estimated_time == '1-2 minutes'
assert quick.icon == ''
assert quick.estimated_time == "1-2 minutes"
assert quick.icon == ""
# Quick should disable slow features
assert quick.features['api_reference'] # Essential
assert not quick.features['dependency_graph'] # Slow
assert not quick.features['patterns'] # Slow
assert not quick.features['test_examples'] # Slow
assert not quick.features['how_to_guides'] # Requires AI
assert quick.features['docs'] # Essential
assert quick.features["api_reference"] # Essential
assert not quick.features["dependency_graph"] # Slow
assert not quick.features["patterns"] # Slow
assert not quick.features["test_examples"] # Slow
assert not quick.features["how_to_guides"] # Requires AI
assert quick.features["docs"] # Essential
def test_standard_preset(self):
"""Test standard preset configuration."""
standard = PRESETS['standard']
assert standard.name == 'Standard'
assert standard.depth == 'deep'
standard = PRESETS["standard"]
assert standard.name == "Standard"
assert standard.depth == "deep"
assert standard.enhance_level == 1
assert standard.estimated_time == '5-10 minutes'
assert standard.icon == '🎯'
assert standard.estimated_time == "5-10 minutes"
assert standard.icon == "🎯"
# Standard should enable core features
assert standard.features['api_reference']
assert standard.features['dependency_graph']
assert standard.features['patterns']
assert standard.features['test_examples']
assert not standard.features['how_to_guides'] # Slow
assert standard.features['config_patterns']
assert standard.features['docs']
assert standard.features["api_reference"]
assert standard.features["dependency_graph"]
assert standard.features["patterns"]
assert standard.features["test_examples"]
assert not standard.features["how_to_guides"] # Slow
assert standard.features["config_patterns"]
assert standard.features["docs"]
def test_comprehensive_preset(self):
"""Test comprehensive preset configuration."""
comprehensive = PRESETS['comprehensive']
assert comprehensive.name == 'Comprehensive'
assert comprehensive.depth == 'full'
comprehensive = PRESETS["comprehensive"]
assert comprehensive.name == "Comprehensive"
assert comprehensive.depth == "full"
assert comprehensive.enhance_level == 3
assert comprehensive.estimated_time == '20-60 minutes'
assert comprehensive.icon == '🚀'
assert comprehensive.estimated_time == "20-60 minutes"
assert comprehensive.icon == "🚀"
# Comprehensive should enable ALL features
assert all(comprehensive.features.values())
@@ -81,44 +81,44 @@ class TestPresetManager:
def test_get_preset(self):
"""Test PresetManager.get_preset()."""
quick = PresetManager.get_preset('quick')
quick = PresetManager.get_preset("quick")
assert quick is not None
assert quick.name == 'Quick'
assert quick.depth == 'surface'
assert quick.name == "Quick"
assert quick.depth == "surface"
# Case insensitive
standard = PresetManager.get_preset('STANDARD')
standard = PresetManager.get_preset("STANDARD")
assert standard is not None
assert standard.name == 'Standard'
assert standard.name == "Standard"
def test_get_preset_invalid(self):
"""Test PresetManager.get_preset() with invalid name."""
invalid = PresetManager.get_preset('nonexistent')
invalid = PresetManager.get_preset("nonexistent")
assert invalid is None
def test_list_presets(self):
"""Test PresetManager.list_presets()."""
presets = PresetManager.list_presets()
assert len(presets) == 3
assert 'quick' in presets
assert 'standard' in presets
assert 'comprehensive' in presets
assert "quick" in presets
assert "standard" in presets
assert "comprehensive" in presets
def test_format_preset_help(self):
"""Test PresetManager.format_preset_help()."""
help_text = PresetManager.format_preset_help()
assert 'Available presets:' in help_text
assert '⚡ quick' in help_text
assert '🎯 standard' in help_text
assert '🚀 comprehensive' in help_text
assert '1-2 minutes' in help_text
assert '5-10 minutes' in help_text
assert '20-60 minutes' in help_text
assert "Available presets:" in help_text
assert "⚡ quick" in help_text
assert "🎯 standard" in help_text
assert "🚀 comprehensive" in help_text
assert "1-2 minutes" in help_text
assert "5-10 minutes" in help_text
assert "20-60 minutes" in help_text
def test_get_default_preset(self):
"""Test PresetManager.get_default_preset()."""
default = PresetManager.get_default_preset()
assert default == 'standard'
assert default == "standard"
class TestPresetApplication:
@@ -126,85 +126,85 @@ class TestPresetApplication:
def test_apply_preset_quick(self):
"""Test applying quick preset."""
args = {'directory': '/tmp/test'}
updated = PresetManager.apply_preset('quick', args)
args = {"directory": "/tmp/test"}
updated = PresetManager.apply_preset("quick", args)
assert updated['depth'] == 'surface'
assert updated['enhance_level'] == 0
assert updated['skip_patterns'] # Quick disables patterns
assert updated['skip_dependency_graph'] # Quick disables dep graph
assert updated['skip_test_examples'] # Quick disables tests
assert updated['skip_how_to_guides'] # Quick disables guides
assert not updated['skip_api_reference'] # Quick enables API ref
assert not updated['skip_docs'] # Quick enables docs
assert updated["depth"] == "surface"
assert updated["enhance_level"] == 0
assert updated["skip_patterns"] # Quick disables patterns
assert updated["skip_dependency_graph"] # Quick disables dep graph
assert updated["skip_test_examples"] # Quick disables tests
assert updated["skip_how_to_guides"] # Quick disables guides
assert not updated["skip_api_reference"] # Quick enables API ref
assert not updated["skip_docs"] # Quick enables docs
def test_apply_preset_standard(self):
"""Test applying standard preset."""
args = {'directory': '/tmp/test'}
updated = PresetManager.apply_preset('standard', args)
args = {"directory": "/tmp/test"}
updated = PresetManager.apply_preset("standard", args)
assert updated['depth'] == 'deep'
assert updated['enhance_level'] == 1
assert not updated['skip_patterns'] # Standard enables patterns
assert not updated['skip_dependency_graph'] # Standard enables dep graph
assert not updated['skip_test_examples'] # Standard enables tests
assert updated['skip_how_to_guides'] # Standard disables guides (slow)
assert not updated['skip_api_reference'] # Standard enables API ref
assert not updated['skip_docs'] # Standard enables docs
assert updated["depth"] == "deep"
assert updated["enhance_level"] == 1
assert not updated["skip_patterns"] # Standard enables patterns
assert not updated["skip_dependency_graph"] # Standard enables dep graph
assert not updated["skip_test_examples"] # Standard enables tests
assert updated["skip_how_to_guides"] # Standard disables guides (slow)
assert not updated["skip_api_reference"] # Standard enables API ref
assert not updated["skip_docs"] # Standard enables docs
def test_apply_preset_comprehensive(self):
"""Test applying comprehensive preset."""
args = {'directory': '/tmp/test'}
updated = PresetManager.apply_preset('comprehensive', args)
args = {"directory": "/tmp/test"}
updated = PresetManager.apply_preset("comprehensive", args)
assert updated['depth'] == 'full'
assert updated['enhance_level'] == 3
assert updated["depth"] == "full"
assert updated["enhance_level"] == 3
# Comprehensive enables ALL features
assert not updated['skip_patterns']
assert not updated['skip_dependency_graph']
assert not updated['skip_test_examples']
assert not updated['skip_how_to_guides']
assert not updated['skip_api_reference']
assert not updated['skip_config_patterns']
assert not updated['skip_docs']
assert not updated["skip_patterns"]
assert not updated["skip_dependency_graph"]
assert not updated["skip_test_examples"]
assert not updated["skip_how_to_guides"]
assert not updated["skip_api_reference"]
assert not updated["skip_config_patterns"]
assert not updated["skip_docs"]
def test_cli_overrides_preset(self):
"""Test that CLI args override preset defaults."""
args = {
'directory': '/tmp/test',
'enhance_level': 2, # Override preset default
'skip_patterns': False # Override preset default
"directory": "/tmp/test",
"enhance_level": 2, # Override preset default
"skip_patterns": False, # Override preset default
}
updated = PresetManager.apply_preset('quick', args)
updated = PresetManager.apply_preset("quick", args)
# Preset says enhance_level=0, but CLI said 2
assert updated['enhance_level'] == 2 # CLI wins
assert updated["enhance_level"] == 2 # CLI wins
# Preset says skip_patterns=True (disabled), but CLI said False (enabled)
assert not updated['skip_patterns'] # CLI wins
assert not updated["skip_patterns"] # CLI wins
def test_apply_preset_preserves_args(self):
"""Test that apply_preset preserves existing args."""
args = {
'directory': '/tmp/test',
'output': 'custom_output/',
'languages': 'Python,JavaScript'
"directory": "/tmp/test",
"output": "custom_output/",
"languages": "Python,JavaScript",
}
updated = PresetManager.apply_preset('standard', args)
updated = PresetManager.apply_preset("standard", args)
# Existing args should be preserved
assert updated['directory'] == '/tmp/test'
assert updated['output'] == 'custom_output/'
assert updated['languages'] == 'Python,JavaScript'
assert updated["directory"] == "/tmp/test"
assert updated["output"] == "custom_output/"
assert updated["languages"] == "Python,JavaScript"
def test_apply_preset_invalid(self):
"""Test applying invalid preset raises error."""
args = {'directory': '/tmp/test'}
args = {"directory": "/tmp/test"}
with pytest.raises(ValueError, match="Unknown preset: nonexistent"):
PresetManager.apply_preset('nonexistent', args)
PresetManager.apply_preset("nonexistent", args)
class TestDeprecationWarnings:
@@ -215,12 +215,7 @@ class TestDeprecationWarnings:
from skill_seekers.cli.codebase_scraper import _check_deprecated_flags
import argparse
args = argparse.Namespace(
quick=True,
comprehensive=False,
depth=None,
ai_mode='auto'
)
args = argparse.Namespace(quick=True, comprehensive=False, depth=None, ai_mode="auto")
_check_deprecated_flags(args)
@@ -235,12 +230,7 @@ class TestDeprecationWarnings:
from skill_seekers.cli.codebase_scraper import _check_deprecated_flags
import argparse
args = argparse.Namespace(
quick=False,
comprehensive=True,
depth=None,
ai_mode='auto'
)
args = argparse.Namespace(quick=False, comprehensive=True, depth=None, ai_mode="auto")
_check_deprecated_flags(args)
@@ -255,12 +245,7 @@ class TestDeprecationWarnings:
from skill_seekers.cli.codebase_scraper import _check_deprecated_flags
import argparse
args = argparse.Namespace(
quick=False,
comprehensive=False,
depth='full',
ai_mode='auto'
)
args = argparse.Namespace(quick=False, comprehensive=False, depth="full", ai_mode="auto")
_check_deprecated_flags(args)
@@ -275,12 +260,7 @@ class TestDeprecationWarnings:
from skill_seekers.cli.codebase_scraper import _check_deprecated_flags
import argparse
args = argparse.Namespace(
quick=False,
comprehensive=False,
depth=None,
ai_mode='api'
)
args = argparse.Namespace(quick=False, comprehensive=False, depth=None, ai_mode="api")
_check_deprecated_flags(args)
@@ -295,12 +275,7 @@ class TestDeprecationWarnings:
from skill_seekers.cli.codebase_scraper import _check_deprecated_flags
import argparse
args = argparse.Namespace(
quick=True,
comprehensive=False,
depth='surface',
ai_mode='local'
)
args = argparse.Namespace(quick=True, comprehensive=False, depth="surface", ai_mode="local")
_check_deprecated_flags(args)
@@ -317,12 +292,7 @@ class TestDeprecationWarnings:
from skill_seekers.cli.codebase_scraper import _check_deprecated_flags
import argparse
args = argparse.Namespace(
quick=False,
comprehensive=False,
depth=None,
ai_mode='auto'
)
args = argparse.Namespace(quick=False, comprehensive=False, depth=None, ai_mode="auto")
_check_deprecated_flags(args)
@@ -337,31 +307,31 @@ class TestBackwardCompatibility:
def test_old_flags_still_work(self):
"""Test that old flags still work (with warnings)."""
# --quick flag
args = {'quick': True}
updated = PresetManager.apply_preset('quick', args)
assert updated['depth'] == 'surface'
args = {"quick": True}
updated = PresetManager.apply_preset("quick", args)
assert updated["depth"] == "surface"
# --comprehensive flag
args = {'comprehensive': True}
updated = PresetManager.apply_preset('comprehensive', args)
assert updated['depth'] == 'full'
args = {"comprehensive": True}
updated = PresetManager.apply_preset("comprehensive", args)
assert updated["depth"] == "full"
def test_preset_flag_preferred(self):
"""Test that --preset flag is the recommended way."""
# Using --preset quick
args = {'preset': 'quick'}
updated = PresetManager.apply_preset('quick', args)
assert updated['depth'] == 'surface'
args = {"preset": "quick"}
updated = PresetManager.apply_preset("quick", args)
assert updated["depth"] == "surface"
# Using --preset standard
args = {'preset': 'standard'}
updated = PresetManager.apply_preset('standard', args)
assert updated['depth'] == 'deep'
args = {"preset": "standard"}
updated = PresetManager.apply_preset("standard", args)
assert updated["depth"] == "deep"
# Using --preset comprehensive
args = {'preset': 'comprehensive'}
updated = PresetManager.apply_preset('comprehensive', args)
assert updated['depth'] == 'full'
args = {"preset": "comprehensive"}
updated = PresetManager.apply_preset("comprehensive", args)
assert updated["depth"] == "full"
if __name__ == "__main__":

View File

@@ -19,10 +19,7 @@ import tempfile
# Add src to path
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from skill_seekers.cli.quality_metrics import (
QualityAnalyzer,
MetricLevel
)
from skill_seekers.cli.quality_metrics import QualityAnalyzer, MetricLevel
@pytest.fixture
@@ -176,9 +173,9 @@ def test_calculate_statistics(complete_skill_dir):
analyzer = QualityAnalyzer(complete_skill_dir)
stats = analyzer.calculate_statistics()
assert stats['total_files'] > 0
assert stats['markdown_files'] > 0
assert stats['total_words'] > 0
assert stats["total_files"] > 0
assert stats["markdown_files"] > 0
assert stats["total_words"] > 0
def test_overall_score_calculation():
@@ -197,9 +194,7 @@ def test_overall_score_calculation():
coverage = 70.0
health = 85.0
overall = analyzer.calculate_overall_score(
completeness, accuracy, coverage, health
)
overall = analyzer.calculate_overall_score(completeness, accuracy, coverage, health)
assert overall.completeness == 80.0
assert overall.accuracy == 90.0
@@ -218,13 +213,13 @@ def test_grade_assignment():
# Test various scores
score_95 = analyzer.calculate_overall_score(95, 95, 95, 95)
assert score_95.grade == 'A+'
assert score_95.grade == "A+"
score_85 = analyzer.calculate_overall_score(85, 85, 85, 85)
assert score_85.grade in ['A-', 'B+']
assert score_85.grade in ["A-", "B+"]
score_70 = analyzer.calculate_overall_score(70, 70, 70, 70)
assert score_70.grade in ['B-', 'C+', 'C']
assert score_70.grade in ["B-", "C+", "C"]
def test_generate_recommendations():
@@ -240,7 +235,7 @@ def test_generate_recommendations():
recommendations = analyzer.generate_recommendations(score)
assert len(recommendations) > 0
assert any('completeness' in r.lower() for r in recommendations)
assert any("completeness" in r.lower() for r in recommendations)
def test_generate_report(complete_skill_dir):

View File

@@ -28,7 +28,7 @@ class TestRAGChunker:
chunk_overlap=100,
preserve_code_blocks=False,
preserve_paragraphs=False,
min_chunk_size=50
min_chunk_size=50,
)
assert chunker.chunk_size == 1024
@@ -180,13 +180,17 @@ class TestRAGChunker:
# Create SKILL.md
skill_md = skill_dir / "SKILL.md"
skill_md.write_text("# Main Skill\n\nThis is the main skill content.\n\nWith multiple paragraphs.")
skill_md.write_text(
"# Main Skill\n\nThis is the main skill content.\n\nWith multiple paragraphs."
)
# Create references directory with files
references_dir = skill_dir / "references"
references_dir.mkdir()
(references_dir / "getting_started.md").write_text("# Getting Started\n\nQuick start guide.")
(references_dir / "getting_started.md").write_text(
"# Getting Started\n\nQuick start guide."
)
(references_dir / "api.md").write_text("# API Reference\n\nAPI documentation.")
# Chunk skill
@@ -209,7 +213,7 @@ class TestRAGChunker:
{
"chunk_id": "test_0",
"page_content": "Test content",
"metadata": {"source": "test", "chunk_index": 0}
"metadata": {"source": "test", "chunk_index": 0},
}
]
@@ -340,7 +344,7 @@ class TestRAGChunker:
metadata = {
"source": "react-docs",
"category": "hooks",
"url": "https://react.dev/reference/react"
"url": "https://react.dev/reference/react",
}
chunks = chunker.chunk_document(text, metadata)
@@ -379,10 +383,7 @@ class TestRAGChunkerIntegration:
# Convert to LangChain Documents
docs = [
Document(
page_content=chunk["page_content"],
metadata=chunk["metadata"]
)
Document(page_content=chunk["page_content"], metadata=chunk["metadata"])
for chunk in chunks
]
@@ -407,11 +408,7 @@ class TestRAGChunkerIntegration:
# Convert to LlamaIndex TextNodes
nodes = [
TextNode(
text=chunk["page_content"],
metadata=chunk["metadata"],
id_=chunk["chunk_id"]
)
TextNode(text=chunk["page_content"], metadata=chunk["metadata"], id_=chunk["chunk_id"])
for chunk in chunks
]

View File

@@ -13,6 +13,7 @@ pytest.importorskip("mcp.server")
# Check if starlette is available
try:
from starlette.testclient import TestClient
STARLETTE_AVAILABLE = True
except ImportError:
STARLETTE_AVAILABLE = False
@@ -21,8 +22,7 @@ from skill_seekers.mcp.server_fastmcp import mcp
# Skip all tests if starlette is not installed
pytestmark = pytest.mark.skipif(
not STARLETTE_AVAILABLE,
reason="starlette not installed (pip install starlette httpx)"
not STARLETTE_AVAILABLE, reason="starlette not installed (pip install starlette httpx)"
)

View File

@@ -18,10 +18,7 @@ import tempfile
# Add src to path
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from skill_seekers.cli.streaming_ingest import (
StreamingIngester,
IngestionProgress
)
from skill_seekers.cli.streaming_ingest import StreamingIngester, IngestionProgress
@pytest.fixture
@@ -158,11 +155,13 @@ def test_progress_tracking(temp_skill_dir):
progress_updates = []
def callback(progress: IngestionProgress):
progress_updates.append({
"processed_docs": progress.processed_documents,
"processed_chunks": progress.processed_chunks,
"percent": progress.progress_percent
})
progress_updates.append(
{
"processed_docs": progress.processed_documents,
"processed_chunks": progress.processed_chunks,
"percent": progress.progress_percent,
}
)
list(ingester.stream_skill_directory(temp_skill_dir, callback=callback))
@@ -171,7 +170,9 @@ def test_progress_tracking(temp_skill_dir):
# Progress should increase
for i in range(len(progress_updates) - 1):
assert progress_updates[i + 1]["processed_chunks"] >= progress_updates[i]["processed_chunks"]
assert (
progress_updates[i + 1]["processed_chunks"] >= progress_updates[i]["processed_chunks"]
)
def test_checkpoint_save_load():
@@ -189,7 +190,7 @@ def test_checkpoint_save_load():
processed_chunks=50,
failed_chunks=2,
bytes_processed=10000,
start_time=1234567890.0
start_time=1234567890.0,
)
# Save checkpoint
@@ -215,7 +216,7 @@ def test_format_progress():
processed_chunks=50,
failed_chunks=0,
bytes_processed=10000,
start_time=0.0
start_time=0.0,
)
progress_str = ingester.format_progress()
@@ -245,17 +246,19 @@ def test_chunk_size_validation():
# Small chunks
ingester_small = StreamingIngester(chunk_size=100, chunk_overlap=10)
chunks_small = list(ingester_small.chunk_document(
content,
{"source": "test", "file": "test.md", "category": "test"}
))
chunks_small = list(
ingester_small.chunk_document(
content, {"source": "test", "file": "test.md", "category": "test"}
)
)
# Large chunks
ingester_large = StreamingIngester(chunk_size=500, chunk_overlap=50)
chunks_large = list(ingester_large.chunk_document(
content,
{"source": "test", "file": "test.md", "category": "test"}
))
chunks_large = list(
ingester_large.chunk_document(
content, {"source": "test", "file": "test.md", "category": "test"}
)
)
# Smaller chunk size should create more chunks
assert len(chunks_small) > len(chunks_large)

View File

@@ -21,9 +21,9 @@ def sample_chroma_package(tmp_path):
"metadatas": [
{"source": "test", "category": "overview", "file": "SKILL.md"},
{"source": "test", "category": "api", "file": "API.md"},
{"source": "test", "category": "guide", "file": "GUIDE.md"}
{"source": "test", "category": "guide", "file": "GUIDE.md"},
],
"ids": ["id1", "id2", "id3"]
"ids": ["id1", "id2", "id3"],
}
package_path = tmp_path / "test-chroma.json"
@@ -43,8 +43,8 @@ def sample_weaviate_package(tmp_path):
"properties": [
{"name": "content", "dataType": ["text"]},
{"name": "source", "dataType": ["string"]},
{"name": "category", "dataType": ["string"]}
]
{"name": "category", "dataType": ["string"]},
],
},
"objects": [
{
@@ -52,18 +52,14 @@ def sample_weaviate_package(tmp_path):
"properties": {
"content": "Test content 1",
"source": "test",
"category": "overview"
}
"category": "overview",
},
},
{
"id": "00000000-0000-0000-0000-000000000002",
"properties": {
"content": "Test content 2",
"source": "test",
"category": "api"
}
}
]
"properties": {"content": "Test content 2", "source": "test", "category": "api"},
},
],
}
package_path = tmp_path / "test-weaviate.json"
@@ -76,40 +72,41 @@ class TestChromaUploadBasics:
def test_chroma_adaptor_exists(self):
"""Test that ChromaDB adaptor can be loaded."""
adaptor = get_adaptor('chroma')
adaptor = get_adaptor("chroma")
assert adaptor is not None
assert adaptor.PLATFORM == 'chroma'
assert adaptor.PLATFORM == "chroma"
def test_chroma_upload_without_chromadb_installed(self, sample_chroma_package):
"""Test upload fails gracefully without chromadb installed."""
adaptor = get_adaptor('chroma')
adaptor = get_adaptor("chroma")
# Temporarily remove chromadb if it exists
import sys
chromadb_backup = sys.modules.get('chromadb')
if 'chromadb' in sys.modules:
del sys.modules['chromadb']
chromadb_backup = sys.modules.get("chromadb")
if "chromadb" in sys.modules:
del sys.modules["chromadb"]
try:
result = adaptor.upload(sample_chroma_package)
assert result['success'] is False
assert 'chromadb not installed' in result['message']
assert 'pip install chromadb' in result['message']
assert result["success"] is False
assert "chromadb not installed" in result["message"]
assert "pip install chromadb" in result["message"]
finally:
if chromadb_backup:
sys.modules['chromadb'] = chromadb_backup
sys.modules["chromadb"] = chromadb_backup
def test_chroma_upload_api_signature(self, sample_chroma_package):
"""Test ChromaDB upload has correct API signature."""
adaptor = get_adaptor('chroma')
adaptor = get_adaptor("chroma")
# Verify upload method exists and accepts kwargs
assert hasattr(adaptor, 'upload')
assert hasattr(adaptor, "upload")
assert callable(adaptor.upload)
# Verify adaptor methods exist
assert hasattr(adaptor, '_generate_openai_embeddings')
assert hasattr(adaptor, "_generate_openai_embeddings")
class TestWeaviateUploadBasics:
@@ -117,40 +114,41 @@ class TestWeaviateUploadBasics:
def test_weaviate_adaptor_exists(self):
"""Test that Weaviate adaptor can be loaded."""
adaptor = get_adaptor('weaviate')
adaptor = get_adaptor("weaviate")
assert adaptor is not None
assert adaptor.PLATFORM == 'weaviate'
assert adaptor.PLATFORM == "weaviate"
def test_weaviate_upload_without_weaviate_installed(self, sample_weaviate_package):
"""Test upload fails gracefully without weaviate-client installed."""
adaptor = get_adaptor('weaviate')
adaptor = get_adaptor("weaviate")
# Temporarily remove weaviate if it exists
import sys
weaviate_backup = sys.modules.get('weaviate')
if 'weaviate' in sys.modules:
del sys.modules['weaviate']
weaviate_backup = sys.modules.get("weaviate")
if "weaviate" in sys.modules:
del sys.modules["weaviate"]
try:
result = adaptor.upload(sample_weaviate_package)
assert result['success'] is False
assert 'weaviate-client not installed' in result['message']
assert 'pip install weaviate-client' in result['message']
assert result["success"] is False
assert "weaviate-client not installed" in result["message"]
assert "pip install weaviate-client" in result["message"]
finally:
if weaviate_backup:
sys.modules['weaviate'] = weaviate_backup
sys.modules["weaviate"] = weaviate_backup
def test_weaviate_upload_api_signature(self, sample_weaviate_package):
"""Test Weaviate upload has correct API signature."""
adaptor = get_adaptor('weaviate')
adaptor = get_adaptor("weaviate")
# Verify upload method exists and accepts kwargs
assert hasattr(adaptor, 'upload')
assert hasattr(adaptor, "upload")
assert callable(adaptor.upload)
# Verify adaptor methods exist
assert hasattr(adaptor, '_generate_openai_embeddings')
assert hasattr(adaptor, "_generate_openai_embeddings")
class TestPackageStructure:
@@ -161,30 +159,30 @@ class TestPackageStructure:
with open(sample_chroma_package) as f:
data = json.load(f)
assert 'collection_name' in data
assert 'documents' in data
assert 'metadatas' in data
assert 'ids' in data
assert len(data['documents']) == len(data['metadatas']) == len(data['ids'])
assert "collection_name" in data
assert "documents" in data
assert "metadatas" in data
assert "ids" in data
assert len(data["documents"]) == len(data["metadatas"]) == len(data["ids"])
def test_weaviate_package_structure(self, sample_weaviate_package):
"""Test Weaviate package has required fields."""
with open(sample_weaviate_package) as f:
data = json.load(f)
assert 'class_name' in data
assert 'schema' in data
assert 'objects' in data
assert len(data['objects']) == 2
assert "class_name" in data
assert "schema" in data
assert "objects" in data
assert len(data["objects"]) == 2
# Verify schema structure
assert 'class' in data['schema']
assert 'properties' in data['schema']
assert "class" in data["schema"]
assert "properties" in data["schema"]
# Verify object structure
for obj in data['objects']:
assert 'id' in obj
assert 'properties' in obj
for obj in data["objects"]:
assert "id" in obj
assert "properties" in obj
class TestUploadCommandIntegration:
@@ -199,25 +197,26 @@ class TestUploadCommandIntegration:
# Verify it accepts kwargs for vector DBs
import inspect
sig = inspect.signature(upload_skill_api)
params = list(sig.parameters.keys())
assert 'package_path' in params
assert 'target' in params
assert 'api_key' in params
assert 'kwargs' in params # For platform-specific options
assert "package_path" in params
assert "target" in params
assert "api_key" in params
assert "kwargs" in params # For platform-specific options
def test_upload_command_supports_chroma(self):
"""Test upload command recognizes chroma as target."""
# This should not raise ValueError
adaptor = get_adaptor('chroma')
adaptor = get_adaptor("chroma")
assert adaptor is not None
def test_upload_command_supports_weaviate(self):
"""Test upload command recognizes weaviate as target."""
# This should not raise ValueError
adaptor = get_adaptor('weaviate')
adaptor = get_adaptor("weaviate")
assert adaptor is not None
@@ -226,7 +225,7 @@ class TestErrorHandling:
def test_chroma_handles_missing_file(self, tmp_path):
"""Test ChromaDB upload handles missing files gracefully."""
adaptor = get_adaptor('chroma')
adaptor = get_adaptor("chroma")
missing_file = tmp_path / "nonexistent.json"
@@ -234,14 +233,14 @@ class TestErrorHandling:
try:
result = adaptor.upload(missing_file)
# If it returns a dict, it should indicate failure
assert result['success'] is False
assert result["success"] is False
except FileNotFoundError:
# This is also acceptable
pass
def test_weaviate_handles_missing_file(self, tmp_path):
"""Test Weaviate upload handles missing files gracefully."""
adaptor = get_adaptor('weaviate')
adaptor = get_adaptor("weaviate")
missing_file = tmp_path / "nonexistent.json"
@@ -249,14 +248,14 @@ class TestErrorHandling:
try:
result = adaptor.upload(missing_file)
# If it returns a dict, it should indicate failure
assert result['success'] is False
assert result["success"] is False
except FileNotFoundError:
# This is also acceptable
pass
def test_chroma_handles_invalid_json(self, tmp_path):
"""Test ChromaDB upload handles invalid JSON gracefully."""
adaptor = get_adaptor('chroma')
adaptor = get_adaptor("chroma")
invalid_file = tmp_path / "invalid.json"
invalid_file.write_text("not valid json{")
@@ -265,14 +264,14 @@ class TestErrorHandling:
try:
result = adaptor.upload(invalid_file)
# If it returns a dict, it should indicate failure
assert result['success'] is False
assert result["success"] is False
except json.JSONDecodeError:
# This is also acceptable
pass
def test_weaviate_handles_invalid_json(self, tmp_path):
"""Test Weaviate upload handles invalid JSON gracefully."""
adaptor = get_adaptor('weaviate')
adaptor = get_adaptor("weaviate")
invalid_file = tmp_path / "invalid.json"
invalid_file.write_text("not valid json{")
@@ -281,7 +280,7 @@ class TestErrorHandling:
try:
result = adaptor.upload(invalid_file)
# If it returns a dict, it should indicate failure
assert result['success'] is False
assert result["success"] is False
except json.JSONDecodeError:
# This is also acceptable
pass

View File

@@ -155,13 +155,9 @@ class TestConvertToMdUrls(unittest.TestCase):
# Should deduplicate to 3 unique base URLs
self.assertEqual(len(result), 3)
self.assertIn(
"https://mikro-orm.io/docs/quick-start/index.html.md", result
)
self.assertIn("https://mikro-orm.io/docs/quick-start/index.html.md", result)
self.assertIn("https://mikro-orm.io/docs/propagation/index.html.md", result)
self.assertIn(
"https://mikro-orm.io/docs/defining-entities/index.html.md", result
)
self.assertIn("https://mikro-orm.io/docs/defining-entities/index.html.md", result)
# Should NOT contain any URLs with anchor fragments
for url in result: