style: Fix 411 ruff lint issues (Kimi's issue #4)
Auto-fixed lint issues with ruff --fix and --unsafe-fixes: Issue #4: Ruff Lint Issues - Before: 447 errors (originally reported as ~5,500) - After: 55 errors remaining - Fixed: 411 errors (92% reduction) Auto-fixes applied: - 156 UP006: List/Dict → list/dict (PEP 585) - 63 UP045: Optional[X] → X | None (PEP 604) - 52 F401: Removed unused imports - 52 UP035: Fixed deprecated imports - 34 E712: True/False comparisons → not/bool() - 17 F841: Removed unused variables - Plus 37 other auto-fixable issues Remaining 55 errors (non-critical): - 39 B904: Exception chaining (best practice) - 5 F401: Unused imports (edge cases) - 3 SIM105: Could use contextlib.suppress - 8 other minor style issues These remaining issues are code quality improvements, not critical bugs. Result: Code quality significantly improved (92% of linting issues resolved) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -7,7 +7,8 @@ import psutil
|
||||
import functools
|
||||
from contextlib import contextmanager
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Optional, Callable
|
||||
from typing import Any
|
||||
from collections.abc import Callable
|
||||
from pathlib import Path
|
||||
|
||||
from .models import (
|
||||
@@ -38,13 +39,13 @@ class BenchmarkResult:
|
||||
"""
|
||||
self.name = name
|
||||
self.started_at = datetime.utcnow()
|
||||
self.finished_at: Optional[datetime] = None
|
||||
self.finished_at: datetime | None = None
|
||||
|
||||
self.timings: List[TimingResult] = []
|
||||
self.memory: List[MemoryUsage] = []
|
||||
self.metrics: List[Metric] = []
|
||||
self.system_info: Dict[str, Any] = {}
|
||||
self.recommendations: List[str] = []
|
||||
self.timings: list[TimingResult] = []
|
||||
self.memory: list[MemoryUsage] = []
|
||||
self.metrics: list[Metric] = []
|
||||
self.system_info: dict[str, Any] = {}
|
||||
self.recommendations: list[str] = []
|
||||
|
||||
def add_timing(self, result: TimingResult):
|
||||
"""Add timing result."""
|
||||
@@ -209,7 +210,7 @@ class Benchmark:
|
||||
self,
|
||||
func: Callable,
|
||||
*args,
|
||||
operation: Optional[str] = None,
|
||||
operation: str | None = None,
|
||||
track_memory: bool = False,
|
||||
**kwargs
|
||||
) -> Any:
|
||||
@@ -237,14 +238,13 @@ class Benchmark:
|
||||
op_name = operation or func.__name__
|
||||
|
||||
if track_memory:
|
||||
with self.memory(op_name):
|
||||
with self.timer(op_name):
|
||||
return func(*args, **kwargs)
|
||||
with self.memory(op_name), self.timer(op_name):
|
||||
return func(*args, **kwargs)
|
||||
else:
|
||||
with self.timer(op_name):
|
||||
return func(*args, **kwargs)
|
||||
|
||||
def timed(self, operation: Optional[str] = None, track_memory: bool = False):
|
||||
def timed(self, operation: str | None = None, track_memory: bool = False):
|
||||
"""
|
||||
Decorator for timing functions.
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
Pydantic models for benchmarking.
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Optional, Any
|
||||
from typing import Any
|
||||
from datetime import datetime
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
@@ -26,8 +26,8 @@ class TimingResult(BaseModel):
|
||||
duration: float = Field(..., description="Duration in seconds")
|
||||
iterations: int = Field(default=1, description="Number of iterations")
|
||||
avg_duration: float = Field(..., description="Average duration per iteration")
|
||||
min_duration: Optional[float] = Field(None, description="Minimum duration")
|
||||
max_duration: Optional[float] = Field(None, description="Maximum duration")
|
||||
min_duration: float | None = Field(None, description="Minimum duration")
|
||||
max_duration: float | None = Field(None, description="Maximum duration")
|
||||
|
||||
|
||||
class MemoryUsage(BaseModel):
|
||||
@@ -48,24 +48,24 @@ class BenchmarkReport(BaseModel):
|
||||
finished_at: datetime = Field(..., description="Finish time")
|
||||
total_duration: float = Field(..., description="Total duration in seconds")
|
||||
|
||||
timings: List[TimingResult] = Field(
|
||||
timings: list[TimingResult] = Field(
|
||||
default_factory=list,
|
||||
description="Timing results"
|
||||
)
|
||||
memory: List[MemoryUsage] = Field(
|
||||
memory: list[MemoryUsage] = Field(
|
||||
default_factory=list,
|
||||
description="Memory usage results"
|
||||
)
|
||||
metrics: List[Metric] = Field(
|
||||
metrics: list[Metric] = Field(
|
||||
default_factory=list,
|
||||
description="Additional metrics"
|
||||
)
|
||||
|
||||
system_info: Dict[str, Any] = Field(
|
||||
system_info: dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description="System information"
|
||||
)
|
||||
recommendations: List[str] = Field(
|
||||
recommendations: list[str] = Field(
|
||||
default_factory=list,
|
||||
description="Optimization recommendations"
|
||||
)
|
||||
@@ -89,11 +89,11 @@ class ComparisonReport(BaseModel):
|
||||
baseline: BenchmarkReport = Field(..., description="Baseline benchmark")
|
||||
current: BenchmarkReport = Field(..., description="Current benchmark")
|
||||
|
||||
improvements: List[str] = Field(
|
||||
improvements: list[str] = Field(
|
||||
default_factory=list,
|
||||
description="Performance improvements"
|
||||
)
|
||||
regressions: List[str] = Field(
|
||||
regressions: list[str] = Field(
|
||||
default_factory=list,
|
||||
description="Performance regressions"
|
||||
)
|
||||
|
||||
@@ -4,7 +4,8 @@ Benchmark execution and orchestration.
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any, Optional, Callable
|
||||
from typing import Any
|
||||
from collections.abc import Callable
|
||||
from datetime import datetime
|
||||
|
||||
from .framework import Benchmark
|
||||
@@ -34,7 +35,7 @@ class BenchmarkRunner:
|
||||
})
|
||||
"""
|
||||
|
||||
def __init__(self, output_dir: Optional[Path] = None):
|
||||
def __init__(self, output_dir: Path | None = None):
|
||||
"""
|
||||
Initialize runner.
|
||||
|
||||
@@ -91,9 +92,9 @@ class BenchmarkRunner:
|
||||
|
||||
def run_suite(
|
||||
self,
|
||||
benchmarks: Dict[str, Callable[[Benchmark], None]],
|
||||
benchmarks: dict[str, Callable[[Benchmark], None]],
|
||||
save: bool = True
|
||||
) -> Dict[str, BenchmarkReport]:
|
||||
) -> dict[str, BenchmarkReport]:
|
||||
"""
|
||||
Run multiple benchmarks.
|
||||
|
||||
@@ -217,7 +218,7 @@ class BenchmarkRunner:
|
||||
memory_change_mb=memory_change_mb
|
||||
)
|
||||
|
||||
def list_benchmarks(self) -> List[Dict[str, Any]]:
|
||||
def list_benchmarks(self) -> list[dict[str, Any]]:
|
||||
"""
|
||||
List saved benchmarks.
|
||||
|
||||
@@ -252,7 +253,7 @@ class BenchmarkRunner:
|
||||
|
||||
return benchmarks
|
||||
|
||||
def get_latest(self, name: str) -> Optional[Path]:
|
||||
def get_latest(self, name: str) -> Path | None:
|
||||
"""
|
||||
Get path to latest benchmark with given name.
|
||||
|
||||
@@ -292,7 +293,7 @@ class BenchmarkRunner:
|
||||
runner.cleanup_old(keep_latest=3)
|
||||
"""
|
||||
# Group by benchmark name
|
||||
by_name: Dict[str, List[Path]] = {}
|
||||
by_name: dict[str, list[Path]] = {}
|
||||
|
||||
for path in self.output_dir.glob("*.json"):
|
||||
# Extract name from filename (name_timestamp.json)
|
||||
|
||||
@@ -9,7 +9,7 @@ This enables Skill Seekers to generate skills for multiple LLM platforms (Claude
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Tuple
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -283,7 +283,7 @@ class SkillAdaptor(ABC):
|
||||
chunk_max_tokens: int = 512,
|
||||
preserve_code_blocks: bool = True,
|
||||
source_file: str = None
|
||||
) -> List[Tuple[str, dict]]:
|
||||
) -> list[tuple[str, dict]]:
|
||||
"""
|
||||
Optionally chunk content for RAG platforms.
|
||||
|
||||
|
||||
@@ -256,10 +256,9 @@ class ChromaAdaptor(SkillAdaptor):
|
||||
# Parse URL
|
||||
if '://' in chroma_url:
|
||||
parts = chroma_url.split('://')
|
||||
protocol = parts[0]
|
||||
parts[0]
|
||||
host_port = parts[1]
|
||||
else:
|
||||
protocol = 'http'
|
||||
host_port = chroma_url
|
||||
|
||||
if ':' in host_port:
|
||||
|
||||
@@ -236,7 +236,7 @@ class FAISSHelpers(SkillAdaptor):
|
||||
Returns:
|
||||
Result with usage instructions
|
||||
"""
|
||||
example_code = """
|
||||
example_code = f"""
|
||||
# Example: Create FAISS index with JSON metadata (safe & portable)
|
||||
|
||||
import faiss
|
||||
@@ -246,7 +246,7 @@ from openai import OpenAI
|
||||
from pathlib import Path
|
||||
|
||||
# Load data
|
||||
with open("{path}") as f:
|
||||
with open("{package_path.name}") as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Generate embeddings (using OpenAI)
|
||||
@@ -387,9 +387,7 @@ print(f"\\nIndex stats:")
|
||||
print(f" Total vectors: {{index.ntotal}}")
|
||||
print(f" Dimension: {{dimension}}")
|
||||
print(f" Type: {{type(index).__name__}}")
|
||||
""".format(
|
||||
path=package_path.name
|
||||
)
|
||||
"""
|
||||
|
||||
return {
|
||||
"success": False,
|
||||
|
||||
@@ -225,7 +225,7 @@ class HaystackAdaptor(SkillAdaptor):
|
||||
Returns:
|
||||
Result indicating no upload capability
|
||||
"""
|
||||
example_code = """
|
||||
example_code = f"""
|
||||
# Example: Load into Haystack 2.x
|
||||
|
||||
from haystack import Document
|
||||
@@ -234,7 +234,7 @@ from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
|
||||
import json
|
||||
|
||||
# Load documents
|
||||
with open("{path}") as f:
|
||||
with open("{package_path.name}") as f:
|
||||
docs_data = json.load(f)
|
||||
|
||||
# Convert to Haystack Documents
|
||||
@@ -254,9 +254,7 @@ retriever = InMemoryBM25Retriever(document_store=document_store)
|
||||
results = retriever.run(query="your question here")
|
||||
for doc in results["documents"]:
|
||||
print(doc.content)
|
||||
""".format(
|
||||
path=package_path.name
|
||||
)
|
||||
"""
|
||||
|
||||
return {
|
||||
"success": False,
|
||||
|
||||
@@ -222,14 +222,14 @@ class LangChainAdaptor(SkillAdaptor):
|
||||
Returns:
|
||||
Result indicating no upload capability
|
||||
"""
|
||||
example_code = """
|
||||
example_code = f"""
|
||||
# Example: Load into LangChain
|
||||
|
||||
from langchain.schema import Document
|
||||
import json
|
||||
|
||||
# Load documents
|
||||
with open("{path}") as f:
|
||||
with open("{package_path.name}") as f:
|
||||
docs_data = json.load(f)
|
||||
|
||||
# Convert to LangChain Documents
|
||||
@@ -247,9 +247,7 @@ retriever = vectorstore.as_retriever()
|
||||
|
||||
# Query
|
||||
results = retriever.get_relevant_documents("your query here")
|
||||
""".format(
|
||||
path=package_path.name
|
||||
)
|
||||
"""
|
||||
|
||||
return {
|
||||
"success": False,
|
||||
|
||||
@@ -245,7 +245,7 @@ class LlamaIndexAdaptor(SkillAdaptor):
|
||||
Returns:
|
||||
Result indicating no upload capability
|
||||
"""
|
||||
example_code = """
|
||||
example_code = f"""
|
||||
# Example: Load into LlamaIndex
|
||||
|
||||
from llama_index.core.schema import TextNode
|
||||
@@ -253,7 +253,7 @@ from llama_index.core import VectorStoreIndex
|
||||
import json
|
||||
|
||||
# Load nodes
|
||||
with open("{path}") as f:
|
||||
with open("{package_path.name}") as f:
|
||||
nodes_data = json.load(f)
|
||||
|
||||
# Convert to LlamaIndex Nodes
|
||||
@@ -275,9 +275,7 @@ query_engine = index.as_query_engine()
|
||||
# Query
|
||||
response = query_engine.query("your question here")
|
||||
print(response)
|
||||
""".format(
|
||||
path=package_path.name
|
||||
)
|
||||
"""
|
||||
|
||||
return {
|
||||
"success": False,
|
||||
|
||||
@@ -261,7 +261,7 @@ class QdrantAdaptor(SkillAdaptor):
|
||||
Returns:
|
||||
Result with usage instructions
|
||||
"""
|
||||
example_code = """
|
||||
example_code = f"""
|
||||
# Example: Create Qdrant collection and upload points
|
||||
|
||||
from qdrant_client import QdrantClient
|
||||
@@ -271,7 +271,7 @@ from pathlib import Path
|
||||
from openai import OpenAI
|
||||
|
||||
# Load data
|
||||
with open("{path}") as f:
|
||||
with open("{package_path.name}") as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Connect to Qdrant (local or cloud)
|
||||
@@ -438,7 +438,7 @@ similar = client.recommend(
|
||||
negative=["point-id-2"], # But not this
|
||||
limit=5
|
||||
)
|
||||
""".format(path=package_path.name)
|
||||
"""
|
||||
|
||||
return {
|
||||
"success": False,
|
||||
|
||||
@@ -8,7 +8,7 @@ Enables memory-efficient processing of large documentation sets.
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterator, Optional
|
||||
from typing import Any
|
||||
import sys
|
||||
|
||||
# Add parent directory to path for imports
|
||||
@@ -36,7 +36,7 @@ class StreamingAdaptorMixin:
|
||||
chunk_size: int = 4000,
|
||||
chunk_overlap: int = 200,
|
||||
batch_size: int = 100,
|
||||
progress_callback: Optional[callable] = None
|
||||
progress_callback: callable | None = None
|
||||
) -> Path:
|
||||
"""
|
||||
Package skill using streaming ingestion.
|
||||
@@ -179,7 +179,7 @@ class StreamingAdaptorMixin:
|
||||
Estimation statistics
|
||||
"""
|
||||
skill_dir = Path(skill_dir)
|
||||
ingester = StreamingIngester(
|
||||
StreamingIngester(
|
||||
chunk_size=chunk_size,
|
||||
chunk_overlap=chunk_overlap
|
||||
)
|
||||
|
||||
@@ -42,17 +42,15 @@ def run_scraping_benchmark(runner, config):
|
||||
scrape_config_path = config.get("scrape_config")
|
||||
|
||||
# Time scraping
|
||||
with bench.timer("scrape_docs"):
|
||||
with bench.memory("scrape_docs"):
|
||||
pages = scrape_all(scrape_config_path)
|
||||
with bench.timer("scrape_docs"), bench.memory("scrape_docs"):
|
||||
pages = scrape_all(scrape_config_path)
|
||||
|
||||
# Track metrics
|
||||
bench.metric("pages_scraped", len(pages), "pages")
|
||||
|
||||
# Time building
|
||||
with bench.timer("build_skill"):
|
||||
with bench.memory("build_skill"):
|
||||
build_skill(scrape_config_path, pages)
|
||||
with bench.timer("build_skill"), bench.memory("build_skill"):
|
||||
build_skill(scrape_config_path, pages)
|
||||
|
||||
name = config.get("name", "scraping-benchmark")
|
||||
report = runner.run(name, benchmark_func)
|
||||
@@ -76,9 +74,8 @@ def run_embedding_benchmark(runner, config):
|
||||
|
||||
# Batch embedding
|
||||
if len(texts) > 1:
|
||||
with bench.timer("batch_embedding"):
|
||||
with bench.memory("batch_embedding"):
|
||||
embeddings = generator.generate_batch(texts, model=model)
|
||||
with bench.timer("batch_embedding"), bench.memory("batch_embedding"):
|
||||
embeddings = generator.generate_batch(texts, model=model)
|
||||
|
||||
bench.metric("embeddings_per_sec", len(embeddings) / bench.result.timings[-1].duration, "emb/sec")
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ Upload, download, and manage skills in cloud storage (S3, GCS, Azure).
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from .storage import get_storage_adaptor
|
||||
|
||||
@@ -155,7 +154,7 @@ def format_size(size_bytes: int) -> str:
|
||||
return f"{size_bytes:.1f}PB"
|
||||
|
||||
|
||||
def parse_extra_args(extra: Optional[list]) -> dict:
|
||||
def parse_extra_args(extra: list | None) -> dict:
|
||||
"""Parse extra arguments into dictionary."""
|
||||
if not extra:
|
||||
return {}
|
||||
|
||||
@@ -10,7 +10,7 @@ import hashlib
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Dict, Any, Tuple
|
||||
from typing import Any
|
||||
from dataclasses import dataclass, field
|
||||
from abc import ABC, abstractmethod
|
||||
import numpy as np
|
||||
@@ -23,7 +23,7 @@ class EmbeddingConfig:
|
||||
model: str
|
||||
dimension: int
|
||||
batch_size: int = 100
|
||||
cache_dir: Optional[Path] = None
|
||||
cache_dir: Path | None = None
|
||||
max_retries: int = 3
|
||||
retry_delay: float = 1.0
|
||||
|
||||
@@ -31,8 +31,8 @@ class EmbeddingConfig:
|
||||
@dataclass
|
||||
class EmbeddingResult:
|
||||
"""Result of embedding generation."""
|
||||
embeddings: List[List[float]]
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
embeddings: list[list[float]]
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
cached_count: int = 0
|
||||
generated_count: int = 0
|
||||
total_time: float = 0.0
|
||||
@@ -59,7 +59,7 @@ class CostTracker:
|
||||
else:
|
||||
self.cache_misses += 1
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
def get_stats(self) -> dict[str, Any]:
|
||||
"""Get statistics."""
|
||||
cache_rate = (self.cache_hits / self.total_requests * 100) if self.total_requests > 0 else 0
|
||||
|
||||
@@ -77,7 +77,7 @@ class EmbeddingProvider(ABC):
|
||||
"""Abstract base class for embedding providers."""
|
||||
|
||||
@abstractmethod
|
||||
def generate_embeddings(self, texts: List[str]) -> List[List[float]]:
|
||||
def generate_embeddings(self, texts: list[str]) -> list[list[float]]:
|
||||
"""Generate embeddings for texts."""
|
||||
pass
|
||||
|
||||
@@ -108,7 +108,7 @@ class OpenAIEmbeddingProvider(EmbeddingProvider):
|
||||
'text-embedding-3-large': 3072,
|
||||
}
|
||||
|
||||
def __init__(self, model: str = 'text-embedding-ada-002', api_key: Optional[str] = None):
|
||||
def __init__(self, model: str = 'text-embedding-ada-002', api_key: str | None = None):
|
||||
"""Initialize OpenAI provider."""
|
||||
self.model = model
|
||||
self.api_key = api_key
|
||||
@@ -124,7 +124,7 @@ class OpenAIEmbeddingProvider(EmbeddingProvider):
|
||||
raise ImportError("OpenAI package not installed. Install with: pip install openai")
|
||||
return self._client
|
||||
|
||||
def generate_embeddings(self, texts: List[str]) -> List[List[float]]:
|
||||
def generate_embeddings(self, texts: list[str]) -> list[list[float]]:
|
||||
"""Generate embeddings using OpenAI."""
|
||||
client = self._get_client()
|
||||
|
||||
@@ -155,7 +155,7 @@ class LocalEmbeddingProvider(EmbeddingProvider):
|
||||
"""Initialize local provider."""
|
||||
self.dimension = dimension
|
||||
|
||||
def generate_embeddings(self, texts: List[str]) -> List[List[float]]:
|
||||
def generate_embeddings(self, texts: list[str]) -> list[list[float]]:
|
||||
"""Generate embeddings using local model (simulated)."""
|
||||
# In production, would use sentence-transformers or similar
|
||||
embeddings = []
|
||||
@@ -180,10 +180,10 @@ class LocalEmbeddingProvider(EmbeddingProvider):
|
||||
class EmbeddingCache:
|
||||
"""Cache for embeddings to avoid recomputation."""
|
||||
|
||||
def __init__(self, cache_dir: Optional[Path] = None):
|
||||
def __init__(self, cache_dir: Path | None = None):
|
||||
"""Initialize cache."""
|
||||
self.cache_dir = Path(cache_dir) if cache_dir else None
|
||||
self._memory_cache: Dict[str, List[float]] = {}
|
||||
self._memory_cache: dict[str, list[float]] = {}
|
||||
|
||||
if self.cache_dir:
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
@@ -193,7 +193,7 @@ class EmbeddingCache:
|
||||
key = f"{model}:{text}"
|
||||
return hashlib.sha256(key.encode()).hexdigest()
|
||||
|
||||
def get(self, text: str, model: str) -> Optional[List[float]]:
|
||||
def get(self, text: str, model: str) -> list[float] | None:
|
||||
"""Get embedding from cache."""
|
||||
cache_key = self._compute_hash(text, model)
|
||||
|
||||
@@ -215,7 +215,7 @@ class EmbeddingCache:
|
||||
|
||||
return None
|
||||
|
||||
def set(self, text: str, model: str, embedding: List[float]) -> None:
|
||||
def set(self, text: str, model: str, embedding: list[float]) -> None:
|
||||
"""Store embedding in cache."""
|
||||
cache_key = self._compute_hash(text, model)
|
||||
|
||||
@@ -266,7 +266,7 @@ class EmbeddingPipeline:
|
||||
|
||||
def generate_batch(
|
||||
self,
|
||||
texts: List[str],
|
||||
texts: list[str],
|
||||
show_progress: bool = True
|
||||
) -> EmbeddingResult:
|
||||
"""
|
||||
@@ -313,7 +313,7 @@ class EmbeddingPipeline:
|
||||
new_embeddings = self.provider.generate_embeddings(to_generate)
|
||||
|
||||
# Store in cache
|
||||
for text, embedding in zip(to_generate, new_embeddings):
|
||||
for text, embedding in zip(to_generate, new_embeddings, strict=False):
|
||||
self.cache.set(text, self.config.model, embedding)
|
||||
|
||||
# Track cost
|
||||
@@ -322,7 +322,7 @@ class EmbeddingPipeline:
|
||||
self.cost_tracker.add_request(total_tokens, cost, from_cache=False)
|
||||
|
||||
# Merge with cached
|
||||
for idx, embedding in zip(to_generate_indices, new_embeddings):
|
||||
for idx, embedding in zip(to_generate_indices, new_embeddings, strict=False):
|
||||
batch_embeddings.insert(idx, embedding)
|
||||
|
||||
generated_count += len(to_generate)
|
||||
@@ -359,7 +359,7 @@ class EmbeddingPipeline:
|
||||
cost_estimate=self.cost_tracker.estimated_cost
|
||||
)
|
||||
|
||||
def validate_dimensions(self, embeddings: List[List[float]]) -> bool:
|
||||
def validate_dimensions(self, embeddings: list[list[float]]) -> bool:
|
||||
"""
|
||||
Validate embedding dimensions.
|
||||
|
||||
@@ -379,7 +379,7 @@ class EmbeddingPipeline:
|
||||
|
||||
return True
|
||||
|
||||
def get_cost_stats(self) -> Dict[str, Any]:
|
||||
def get_cost_stats(self) -> dict[str, Any]:
|
||||
"""Get cost tracking statistics."""
|
||||
return self.cost_tracker.get_stats()
|
||||
|
||||
|
||||
@@ -9,10 +9,8 @@ Tracks document versions and generates delta packages.
|
||||
import json
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, List, Set
|
||||
from dataclasses import dataclass, asdict
|
||||
from datetime import datetime
|
||||
import difflib
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -28,10 +26,10 @@ class DocumentVersion:
|
||||
@dataclass
|
||||
class ChangeSet:
|
||||
"""Set of changes detected."""
|
||||
added: List[DocumentVersion]
|
||||
modified: List[DocumentVersion]
|
||||
deleted: List[str]
|
||||
unchanged: List[DocumentVersion]
|
||||
added: list[DocumentVersion]
|
||||
modified: list[DocumentVersion]
|
||||
deleted: list[str]
|
||||
unchanged: list[DocumentVersion]
|
||||
|
||||
@property
|
||||
def has_changes(self) -> bool:
|
||||
@@ -50,7 +48,7 @@ class UpdateMetadata:
|
||||
timestamp: str
|
||||
previous_version: str
|
||||
new_version: str
|
||||
change_summary: Dict[str, int]
|
||||
change_summary: dict[str, int]
|
||||
total_documents: int
|
||||
|
||||
|
||||
@@ -72,8 +70,8 @@ class IncrementalUpdater:
|
||||
"""
|
||||
self.skill_dir = Path(skill_dir)
|
||||
self.version_file = self.skill_dir / version_file
|
||||
self.current_versions: Dict[str, DocumentVersion] = {}
|
||||
self.previous_versions: Dict[str, DocumentVersion] = {}
|
||||
self.current_versions: dict[str, DocumentVersion] = {}
|
||||
self.previous_versions: dict[str, DocumentVersion] = {}
|
||||
|
||||
def _compute_file_hash(self, file_path: Path) -> str:
|
||||
"""
|
||||
@@ -96,7 +94,7 @@ class IncrementalUpdater:
|
||||
print(f"⚠️ Warning: Failed to hash {file_path}: {e}")
|
||||
return ""
|
||||
|
||||
def _scan_documents(self) -> Dict[str, DocumentVersion]:
|
||||
def _scan_documents(self) -> dict[str, DocumentVersion]:
|
||||
"""
|
||||
Scan skill directory and build version map.
|
||||
|
||||
@@ -356,7 +354,7 @@ class IncrementalUpdater:
|
||||
|
||||
# Read current content
|
||||
current_path = self.skill_dir / doc.file_path
|
||||
current_content = current_path.read_text(encoding="utf-8").splitlines()
|
||||
current_path.read_text(encoding="utf-8").splitlines()
|
||||
|
||||
# Generate diff (simplified)
|
||||
lines.append(f" Size: {prev.size_bytes:,} → {doc.size_bytes:,} bytes")
|
||||
|
||||
@@ -8,9 +8,7 @@ and translation-ready format generation.
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Set
|
||||
from dataclasses import dataclass
|
||||
from collections import Counter
|
||||
import json
|
||||
|
||||
|
||||
@@ -20,16 +18,16 @@ class LanguageInfo:
|
||||
code: str # ISO 639-1 code (e.g., 'en', 'es', 'zh')
|
||||
name: str # Full name (e.g., 'English', 'Spanish', 'Chinese')
|
||||
confidence: float # Detection confidence (0.0-1.0)
|
||||
script: Optional[str] = None # Script type (e.g., 'Latin', 'Cyrillic')
|
||||
script: str | None = None # Script type (e.g., 'Latin', 'Cyrillic')
|
||||
|
||||
|
||||
@dataclass
|
||||
class TranslationStatus:
|
||||
"""Translation status for a document."""
|
||||
source_language: str
|
||||
target_languages: List[str]
|
||||
translated_languages: Set[str]
|
||||
missing_languages: Set[str]
|
||||
target_languages: list[str]
|
||||
translated_languages: set[str]
|
||||
missing_languages: set[str]
|
||||
completeness: float # Percentage (0.0-1.0)
|
||||
|
||||
|
||||
@@ -155,7 +153,7 @@ class LanguageDetector:
|
||||
script=self.SCRIPTS.get(best_lang)
|
||||
)
|
||||
|
||||
def detect_from_filename(self, filename: str) -> Optional[str]:
|
||||
def detect_from_filename(self, filename: str) -> str | None:
|
||||
"""
|
||||
Detect language from filename pattern.
|
||||
|
||||
@@ -194,15 +192,15 @@ class MultiLanguageManager:
|
||||
def __init__(self):
|
||||
"""Initialize multi-language manager."""
|
||||
self.detector = LanguageDetector()
|
||||
self.documents: Dict[str, List[Dict]] = {} # lang_code -> [docs]
|
||||
self.primary_language: Optional[str] = None
|
||||
self.documents: dict[str, list[dict]] = {} # lang_code -> [docs]
|
||||
self.primary_language: str | None = None
|
||||
|
||||
def add_document(
|
||||
self,
|
||||
file_path: str,
|
||||
content: str,
|
||||
metadata: Optional[Dict] = None,
|
||||
force_language: Optional[str] = None
|
||||
metadata: dict | None = None,
|
||||
force_language: str | None = None
|
||||
) -> None:
|
||||
"""
|
||||
Add document with language detection.
|
||||
@@ -258,11 +256,11 @@ class MultiLanguageManager:
|
||||
|
||||
self.documents[lang_code].append(doc)
|
||||
|
||||
def get_languages(self) -> List[str]:
|
||||
def get_languages(self) -> list[str]:
|
||||
"""Get list of detected languages."""
|
||||
return sorted(self.documents.keys())
|
||||
|
||||
def get_document_count(self, language: Optional[str] = None) -> int:
|
||||
def get_document_count(self, language: str | None = None) -> int:
|
||||
"""
|
||||
Get document count for a language.
|
||||
|
||||
@@ -276,7 +274,7 @@ class MultiLanguageManager:
|
||||
return len(self.documents.get(language, []))
|
||||
return sum(len(docs) for docs in self.documents.values())
|
||||
|
||||
def get_translation_status(self, base_language: Optional[str] = None) -> TranslationStatus:
|
||||
def get_translation_status(self, base_language: str | None = None) -> TranslationStatus:
|
||||
"""
|
||||
Get translation status.
|
||||
|
||||
@@ -320,7 +318,7 @@ class MultiLanguageManager:
|
||||
completeness=min(completeness, 1.0)
|
||||
)
|
||||
|
||||
def export_by_language(self, output_dir: Path) -> Dict[str, Path]:
|
||||
def export_by_language(self, output_dir: Path) -> dict[str, Path]:
|
||||
"""
|
||||
Export documents organized by language.
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ Provides predefined analysis configurations with clear trade-offs
|
||||
between speed and comprehensiveness.
|
||||
"""
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -17,7 +16,7 @@ class AnalysisPreset:
|
||||
name: str
|
||||
description: str
|
||||
depth: str # surface, deep, full
|
||||
features: Dict[str, bool] # Feature flags (api_reference, patterns, etc.)
|
||||
features: dict[str, bool] # Feature flags (api_reference, patterns, etc.)
|
||||
enhance_level: int # 0=none, 1=SKILL.md, 2=+Arch+Config, 3=full
|
||||
estimated_time: str
|
||||
icon: str
|
||||
@@ -85,7 +84,7 @@ class PresetManager:
|
||||
"""Manages analysis presets and applies them to CLI arguments."""
|
||||
|
||||
@staticmethod
|
||||
def get_preset(name: str) -> Optional[AnalysisPreset]:
|
||||
def get_preset(name: str) -> AnalysisPreset | None:
|
||||
"""Get preset by name.
|
||||
|
||||
Args:
|
||||
|
||||
@@ -8,7 +8,7 @@ Tracks completeness, accuracy, coverage, and health metrics.
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any
|
||||
from typing import Any
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
@@ -29,7 +29,7 @@ class QualityMetric:
|
||||
value: float # 0.0-1.0 (or 0-100 percentage)
|
||||
level: MetricLevel
|
||||
description: str
|
||||
suggestions: List[str] = field(default_factory=list)
|
||||
suggestions: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -49,10 +49,10 @@ class QualityReport:
|
||||
timestamp: str
|
||||
skill_name: str
|
||||
overall_score: QualityScore
|
||||
metrics: List[QualityMetric]
|
||||
statistics: Dict[str, Any]
|
||||
recommendations: List[str]
|
||||
history: List[Dict[str, Any]] = field(default_factory=list)
|
||||
metrics: list[QualityMetric]
|
||||
statistics: dict[str, Any]
|
||||
recommendations: list[str]
|
||||
history: list[dict[str, Any]] = field(default_factory=list)
|
||||
|
||||
|
||||
class QualityAnalyzer:
|
||||
@@ -73,8 +73,8 @@ class QualityAnalyzer:
|
||||
def __init__(self, skill_dir: Path):
|
||||
"""Initialize quality analyzer."""
|
||||
self.skill_dir = Path(skill_dir)
|
||||
self.metrics: List[QualityMetric] = []
|
||||
self.statistics: Dict[str, Any] = {}
|
||||
self.metrics: list[QualityMetric] = []
|
||||
self.statistics: dict[str, Any] = {}
|
||||
|
||||
def analyze_completeness(self) -> float:
|
||||
"""
|
||||
@@ -192,9 +192,8 @@ class QualityAnalyzer:
|
||||
|
||||
level = MetricLevel.INFO if accuracy >= 80 else MetricLevel.WARNING
|
||||
suggestions = []
|
||||
if accuracy < 100:
|
||||
if issues:
|
||||
suggestions.extend(issues[:3]) # Top 3 issues
|
||||
if accuracy < 100 and issues:
|
||||
suggestions.extend(issues[:3]) # Top 3 issues
|
||||
|
||||
self.metrics.append(QualityMetric(
|
||||
name="Accuracy",
|
||||
@@ -319,7 +318,7 @@ class QualityAnalyzer:
|
||||
|
||||
return health
|
||||
|
||||
def calculate_statistics(self) -> Dict[str, Any]:
|
||||
def calculate_statistics(self) -> dict[str, Any]:
|
||||
"""Calculate skill statistics."""
|
||||
stats = {
|
||||
'total_files': 0,
|
||||
@@ -392,7 +391,7 @@ class QualityAnalyzer:
|
||||
grade=grade
|
||||
)
|
||||
|
||||
def generate_recommendations(self, score: QualityScore) -> List[str]:
|
||||
def generate_recommendations(self, score: QualityScore) -> list[str]:
|
||||
"""Generate improvement recommendations."""
|
||||
recommendations = []
|
||||
|
||||
@@ -545,10 +544,7 @@ def main():
|
||||
print(formatted)
|
||||
|
||||
# Save report
|
||||
if args.output:
|
||||
report_path = Path(args.output)
|
||||
else:
|
||||
report_path = skill_dir / "quality_report.json"
|
||||
report_path = Path(args.output) if args.output else skill_dir / "quality_report.json"
|
||||
|
||||
report_path.write_text(json.dumps(asdict(report), indent=2, default=str))
|
||||
print(f"\n✅ Report saved: {report_path}")
|
||||
|
||||
@@ -16,7 +16,6 @@ Usage:
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Tuple, Optional
|
||||
import json
|
||||
import logging
|
||||
|
||||
@@ -78,9 +77,9 @@ class RAGChunker:
|
||||
def chunk_document(
|
||||
self,
|
||||
text: str,
|
||||
metadata: Dict,
|
||||
source_file: Optional[str] = None
|
||||
) -> List[Dict]:
|
||||
metadata: dict,
|
||||
source_file: str | None = None
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Chunk single document into RAG-ready chunks.
|
||||
|
||||
@@ -139,7 +138,7 @@ class RAGChunker:
|
||||
|
||||
return result
|
||||
|
||||
def chunk_skill(self, skill_dir: Path) -> List[Dict]:
|
||||
def chunk_skill(self, skill_dir: Path) -> list[dict]:
|
||||
"""
|
||||
Chunk entire skill directory.
|
||||
|
||||
@@ -154,7 +153,7 @@ class RAGChunker:
|
||||
# Chunk main SKILL.md
|
||||
skill_md = skill_dir / "SKILL.md"
|
||||
if skill_md.exists():
|
||||
with open(skill_md, 'r', encoding='utf-8') as f:
|
||||
with open(skill_md, encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
metadata = {
|
||||
@@ -170,7 +169,7 @@ class RAGChunker:
|
||||
references_dir = skill_dir / "references"
|
||||
if references_dir.exists():
|
||||
for ref_file in references_dir.glob("*.md"):
|
||||
with open(ref_file, 'r', encoding='utf-8') as f:
|
||||
with open(ref_file, encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
metadata = {
|
||||
@@ -193,7 +192,7 @@ class RAGChunker:
|
||||
|
||||
return all_chunks
|
||||
|
||||
def _extract_code_blocks(self, text: str) -> Tuple[str, List[Dict]]:
|
||||
def _extract_code_blocks(self, text: str) -> tuple[str, list[dict]]:
|
||||
"""
|
||||
Extract code blocks and replace with placeholders.
|
||||
|
||||
@@ -231,9 +230,9 @@ class RAGChunker:
|
||||
|
||||
def _reinsert_code_blocks(
|
||||
self,
|
||||
chunks: List[str],
|
||||
code_blocks: List[Dict]
|
||||
) -> List[str]:
|
||||
chunks: list[str],
|
||||
code_blocks: list[dict]
|
||||
) -> list[str]:
|
||||
"""
|
||||
Re-insert code blocks into chunks.
|
||||
|
||||
@@ -255,7 +254,7 @@ class RAGChunker:
|
||||
|
||||
return result
|
||||
|
||||
def _find_semantic_boundaries(self, text: str) -> List[int]:
|
||||
def _find_semantic_boundaries(self, text: str) -> list[int]:
|
||||
"""
|
||||
Find paragraph and section boundaries.
|
||||
|
||||
@@ -303,7 +302,7 @@ class RAGChunker:
|
||||
|
||||
return boundaries
|
||||
|
||||
def _split_with_overlap(self, text: str, boundaries: List[int]) -> List[str]:
|
||||
def _split_with_overlap(self, text: str, boundaries: list[int]) -> list[str]:
|
||||
"""
|
||||
Split text at semantic boundaries with overlap.
|
||||
|
||||
@@ -375,7 +374,7 @@ class RAGChunker:
|
||||
|
||||
return chunks
|
||||
|
||||
def save_chunks(self, chunks: List[Dict], output_path: Path) -> None:
|
||||
def save_chunks(self, chunks: list[dict], output_path: Path) -> None:
|
||||
"""
|
||||
Save chunks to JSON file.
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ Azure Blob Storage adaptor implementation.
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
try:
|
||||
@@ -118,7 +117,7 @@ class AzureStorageAdaptor(BaseStorageAdaptor):
|
||||
)
|
||||
|
||||
def upload_file(
|
||||
self, local_path: str, remote_path: str, metadata: Optional[Dict[str, str]] = None
|
||||
self, local_path: str, remote_path: str, metadata: dict[str, str] | None = None
|
||||
) -> str:
|
||||
"""Upload file to Azure Blob Storage."""
|
||||
local_file = Path(local_path)
|
||||
@@ -167,7 +166,7 @@ class AzureStorageAdaptor(BaseStorageAdaptor):
|
||||
|
||||
def list_files(
|
||||
self, prefix: str = "", max_results: int = 1000
|
||||
) -> List[StorageObject]:
|
||||
) -> list[StorageObject]:
|
||||
"""List files in Azure container."""
|
||||
try:
|
||||
blobs = self.container_client.list_blobs(
|
||||
|
||||
@@ -4,7 +4,6 @@ Base storage adaptor interface for cloud storage providers.
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@@ -23,9 +22,9 @@ class StorageObject:
|
||||
|
||||
key: str
|
||||
size: int
|
||||
last_modified: Optional[str] = None
|
||||
etag: Optional[str] = None
|
||||
metadata: Optional[Dict[str, str]] = None
|
||||
last_modified: str | None = None
|
||||
etag: str | None = None
|
||||
metadata: dict[str, str] | None = None
|
||||
|
||||
|
||||
class BaseStorageAdaptor(ABC):
|
||||
@@ -47,7 +46,7 @@ class BaseStorageAdaptor(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def upload_file(
|
||||
self, local_path: str, remote_path: str, metadata: Optional[Dict[str, str]] = None
|
||||
self, local_path: str, remote_path: str, metadata: dict[str, str] | None = None
|
||||
) -> str:
|
||||
"""
|
||||
Upload file to cloud storage.
|
||||
@@ -98,7 +97,7 @@ class BaseStorageAdaptor(ABC):
|
||||
@abstractmethod
|
||||
def list_files(
|
||||
self, prefix: str = "", max_results: int = 1000
|
||||
) -> List[StorageObject]:
|
||||
) -> list[StorageObject]:
|
||||
"""
|
||||
List files in cloud storage.
|
||||
|
||||
@@ -146,8 +145,8 @@ class BaseStorageAdaptor(ABC):
|
||||
pass
|
||||
|
||||
def upload_directory(
|
||||
self, local_dir: str, remote_prefix: str = "", exclude_patterns: Optional[List[str]] = None
|
||||
) -> List[str]:
|
||||
self, local_dir: str, remote_prefix: str = "", exclude_patterns: list[str] | None = None
|
||||
) -> list[str]:
|
||||
"""
|
||||
Upload entire directory to cloud storage.
|
||||
|
||||
@@ -194,7 +193,7 @@ class BaseStorageAdaptor(ABC):
|
||||
|
||||
def download_directory(
|
||||
self, remote_prefix: str, local_dir: str
|
||||
) -> List[str]:
|
||||
) -> list[str]:
|
||||
"""
|
||||
Download directory from cloud storage.
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ Google Cloud Storage (GCS) adaptor implementation.
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional
|
||||
from datetime import timedelta
|
||||
|
||||
try:
|
||||
@@ -82,7 +81,7 @@ class GCSStorageAdaptor(BaseStorageAdaptor):
|
||||
self.bucket = self.storage_client.bucket(self.bucket_name)
|
||||
|
||||
def upload_file(
|
||||
self, local_path: str, remote_path: str, metadata: Optional[Dict[str, str]] = None
|
||||
self, local_path: str, remote_path: str, metadata: dict[str, str] | None = None
|
||||
) -> str:
|
||||
"""Upload file to GCS."""
|
||||
local_file = Path(local_path)
|
||||
@@ -125,7 +124,7 @@ class GCSStorageAdaptor(BaseStorageAdaptor):
|
||||
|
||||
def list_files(
|
||||
self, prefix: str = "", max_results: int = 1000
|
||||
) -> List[StorageObject]:
|
||||
) -> list[StorageObject]:
|
||||
"""List files in GCS bucket."""
|
||||
try:
|
||||
blobs = self.storage_client.list_blobs(
|
||||
|
||||
@@ -4,7 +4,6 @@ AWS S3 storage adaptor implementation.
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional
|
||||
|
||||
try:
|
||||
import boto3
|
||||
@@ -93,7 +92,7 @@ class S3StorageAdaptor(BaseStorageAdaptor):
|
||||
self.s3_resource = boto3.resource('s3', **client_kwargs)
|
||||
|
||||
def upload_file(
|
||||
self, local_path: str, remote_path: str, metadata: Optional[Dict[str, str]] = None
|
||||
self, local_path: str, remote_path: str, metadata: dict[str, str] | None = None
|
||||
) -> str:
|
||||
"""Upload file to S3."""
|
||||
local_file = Path(local_path)
|
||||
@@ -143,7 +142,7 @@ class S3StorageAdaptor(BaseStorageAdaptor):
|
||||
|
||||
def list_files(
|
||||
self, prefix: str = "", max_results: int = 1000
|
||||
) -> List[StorageObject]:
|
||||
) -> list[StorageObject]:
|
||||
"""List files in S3 bucket."""
|
||||
try:
|
||||
paginator = self.s3_client.get_paginator('list_objects_v2')
|
||||
|
||||
@@ -9,7 +9,7 @@ skill documentation. Handles chunking, progress tracking, and resume functionali
|
||||
import json
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterator, Optional
|
||||
from collections.abc import Iterator
|
||||
from dataclasses import dataclass
|
||||
import time
|
||||
|
||||
@@ -102,8 +102,8 @@ class StreamingIngester:
|
||||
self,
|
||||
content: str,
|
||||
metadata: dict,
|
||||
chunk_size: Optional[int] = None,
|
||||
chunk_overlap: Optional[int] = None
|
||||
chunk_size: int | None = None,
|
||||
chunk_overlap: int | None = None
|
||||
) -> Iterator[tuple[str, ChunkMetadata]]:
|
||||
"""
|
||||
Split document into overlapping chunks.
|
||||
@@ -180,7 +180,7 @@ class StreamingIngester:
|
||||
def stream_skill_directory(
|
||||
self,
|
||||
skill_dir: Path,
|
||||
callback: Optional[callable] = None
|
||||
callback: callable | None = None
|
||||
) -> Iterator[tuple[str, dict]]:
|
||||
"""
|
||||
Stream all documents from skill directory.
|
||||
@@ -276,7 +276,7 @@ class StreamingIngester:
|
||||
def batch_iterator(
|
||||
self,
|
||||
chunks: Iterator[tuple[str, dict]],
|
||||
batch_size: Optional[int] = None
|
||||
batch_size: int | None = None
|
||||
) -> Iterator[list[tuple[str, dict]]]:
|
||||
"""
|
||||
Group chunks into batches for efficient processing.
|
||||
@@ -328,7 +328,7 @@ class StreamingIngester:
|
||||
|
||||
checkpoint_path.write_text(json.dumps(checkpoint_data, indent=2))
|
||||
|
||||
def load_checkpoint(self, checkpoint_path: Path) -> Optional[dict]:
|
||||
def load_checkpoint(self, checkpoint_path: Path) -> dict | None:
|
||||
"""
|
||||
Load ingestion checkpoint for resume.
|
||||
|
||||
|
||||
@@ -5,7 +5,6 @@ Caching layer for embeddings.
|
||||
import json
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Tuple
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
|
||||
@@ -78,7 +77,7 @@ class EmbeddingCache:
|
||||
def set(
|
||||
self,
|
||||
hash_key: str,
|
||||
embedding: List[float],
|
||||
embedding: list[float],
|
||||
model: str
|
||||
) -> None:
|
||||
"""
|
||||
@@ -103,7 +102,7 @@ class EmbeddingCache:
|
||||
|
||||
self.conn.commit()
|
||||
|
||||
def get(self, hash_key: str) -> Optional[List[float]]:
|
||||
def get(self, hash_key: str) -> list[float] | None:
|
||||
"""
|
||||
Retrieve embedding from cache.
|
||||
|
||||
@@ -146,7 +145,7 @@ class EmbeddingCache:
|
||||
|
||||
return json.loads(embedding_json)
|
||||
|
||||
def get_batch(self, hash_keys: List[str]) -> Tuple[List[Optional[List[float]]], List[bool]]:
|
||||
def get_batch(self, hash_keys: list[str]) -> tuple[list[list[float] | None], list[bool]]:
|
||||
"""
|
||||
Retrieve multiple embeddings from cache.
|
||||
|
||||
@@ -214,7 +213,7 @@ class EmbeddingCache:
|
||||
|
||||
self.conn.commit()
|
||||
|
||||
def clear(self, model: Optional[str] = None) -> int:
|
||||
def clear(self, model: str | None = None) -> int:
|
||||
"""
|
||||
Clear cache entries.
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ Embedding generation with multiple model support.
|
||||
|
||||
import os
|
||||
import hashlib
|
||||
from typing import List, Optional, Tuple
|
||||
import numpy as np
|
||||
|
||||
# OpenAI support
|
||||
@@ -128,9 +127,9 @@ class EmbeddingGenerator:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: Optional[str] = None,
|
||||
voyage_api_key: Optional[str] = None,
|
||||
cache_dir: Optional[str] = None
|
||||
api_key: str | None = None,
|
||||
voyage_api_key: str | None = None,
|
||||
cache_dir: str | None = None
|
||||
):
|
||||
"""
|
||||
Initialize embedding generator.
|
||||
@@ -168,7 +167,7 @@ class EmbeddingGenerator:
|
||||
)
|
||||
return self.MODELS[model]
|
||||
|
||||
def list_models(self) -> List[dict]:
|
||||
def list_models(self) -> list[dict]:
|
||||
"""List all available models."""
|
||||
models = []
|
||||
for name, info in self.MODELS.items():
|
||||
@@ -186,7 +185,7 @@ class EmbeddingGenerator:
|
||||
text: str,
|
||||
model: str = "text-embedding-3-small",
|
||||
normalize: bool = True
|
||||
) -> List[float]:
|
||||
) -> list[float]:
|
||||
"""
|
||||
Generate embedding for a single text.
|
||||
|
||||
@@ -216,11 +215,11 @@ class EmbeddingGenerator:
|
||||
|
||||
def generate_batch(
|
||||
self,
|
||||
texts: List[str],
|
||||
texts: list[str],
|
||||
model: str = "text-embedding-3-small",
|
||||
normalize: bool = True,
|
||||
batch_size: int = 32
|
||||
) -> Tuple[List[List[float]], int]:
|
||||
) -> tuple[list[list[float]], int]:
|
||||
"""
|
||||
Generate embeddings for multiple texts.
|
||||
|
||||
@@ -251,7 +250,7 @@ class EmbeddingGenerator:
|
||||
|
||||
def _generate_openai(
|
||||
self, text: str, model: str, normalize: bool
|
||||
) -> List[float]:
|
||||
) -> list[float]:
|
||||
"""Generate embedding using OpenAI API."""
|
||||
if not OPENAI_AVAILABLE:
|
||||
raise ImportError(
|
||||
@@ -277,8 +276,8 @@ class EmbeddingGenerator:
|
||||
raise Exception(f"OpenAI embedding generation failed: {e}")
|
||||
|
||||
def _generate_openai_batch(
|
||||
self, texts: List[str], model: str, normalize: bool, batch_size: int
|
||||
) -> Tuple[List[List[float]], int]:
|
||||
self, texts: list[str], model: str, normalize: bool, batch_size: int
|
||||
) -> tuple[list[list[float]], int]:
|
||||
"""Generate embeddings using OpenAI API in batches."""
|
||||
if not OPENAI_AVAILABLE:
|
||||
raise ImportError(
|
||||
@@ -316,7 +315,7 @@ class EmbeddingGenerator:
|
||||
|
||||
def _generate_voyage(
|
||||
self, text: str, model: str, normalize: bool
|
||||
) -> List[float]:
|
||||
) -> list[float]:
|
||||
"""Generate embedding using Voyage AI API."""
|
||||
if not VOYAGE_AVAILABLE:
|
||||
raise ImportError(
|
||||
@@ -342,8 +341,8 @@ class EmbeddingGenerator:
|
||||
raise Exception(f"Voyage AI embedding generation failed: {e}")
|
||||
|
||||
def _generate_voyage_batch(
|
||||
self, texts: List[str], model: str, normalize: bool, batch_size: int
|
||||
) -> Tuple[List[List[float]], int]:
|
||||
self, texts: list[str], model: str, normalize: bool, batch_size: int
|
||||
) -> tuple[list[list[float]], int]:
|
||||
"""Generate embeddings using Voyage AI API in batches."""
|
||||
if not VOYAGE_AVAILABLE:
|
||||
raise ImportError(
|
||||
@@ -381,7 +380,7 @@ class EmbeddingGenerator:
|
||||
|
||||
def _generate_sentence_transformer(
|
||||
self, text: str, model: str, normalize: bool
|
||||
) -> List[float]:
|
||||
) -> list[float]:
|
||||
"""Generate embedding using sentence-transformers."""
|
||||
if not SENTENCE_TRANSFORMERS_AVAILABLE:
|
||||
raise ImportError(
|
||||
@@ -401,8 +400,8 @@ class EmbeddingGenerator:
|
||||
return embedding.tolist()
|
||||
|
||||
def _generate_sentence_transformer_batch(
|
||||
self, texts: List[str], model: str, normalize: bool, batch_size: int
|
||||
) -> Tuple[List[List[float]], int]:
|
||||
self, texts: list[str], model: str, normalize: bool, batch_size: int
|
||||
) -> tuple[list[list[float]], int]:
|
||||
"""Generate embeddings using sentence-transformers in batches."""
|
||||
if not SENTENCE_TRANSFORMERS_AVAILABLE:
|
||||
raise ImportError(
|
||||
@@ -428,7 +427,7 @@ class EmbeddingGenerator:
|
||||
return embeddings.tolist(), dimensions
|
||||
|
||||
@staticmethod
|
||||
def _normalize(embedding: List[float]) -> List[float]:
|
||||
def _normalize(embedding: list[float]) -> list[float]:
|
||||
"""Normalize embedding to unit length."""
|
||||
vec = np.array(embedding)
|
||||
norm = np.linalg.norm(vec)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
Pydantic models for embedding API.
|
||||
"""
|
||||
|
||||
from typing import List, Optional, Dict, Any
|
||||
from typing import Any
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ class EmbeddingRequest(BaseModel):
|
||||
class BatchEmbeddingRequest(BaseModel):
|
||||
"""Request model for batch embedding generation."""
|
||||
|
||||
texts: List[str] = Field(..., description="List of texts to embed")
|
||||
texts: list[str] = Field(..., description="List of texts to embed")
|
||||
model: str = Field(
|
||||
default="text-embedding-3-small",
|
||||
description="Embedding model to use"
|
||||
@@ -41,7 +41,7 @@ class BatchEmbeddingRequest(BaseModel):
|
||||
default=True,
|
||||
description="Normalize embeddings to unit length"
|
||||
)
|
||||
batch_size: Optional[int] = Field(
|
||||
batch_size: int | None = Field(
|
||||
default=32,
|
||||
description="Batch size for processing (default: 32)"
|
||||
)
|
||||
@@ -64,7 +64,7 @@ class BatchEmbeddingRequest(BaseModel):
|
||||
class EmbeddingResponse(BaseModel):
|
||||
"""Response model for embedding generation."""
|
||||
|
||||
embedding: List[float] = Field(..., description="Generated embedding vector")
|
||||
embedding: list[float] = Field(..., description="Generated embedding vector")
|
||||
model: str = Field(..., description="Model used for generation")
|
||||
dimensions: int = Field(..., description="Embedding dimensions")
|
||||
cached: bool = Field(
|
||||
@@ -76,7 +76,7 @@ class EmbeddingResponse(BaseModel):
|
||||
class BatchEmbeddingResponse(BaseModel):
|
||||
"""Response model for batch embedding generation."""
|
||||
|
||||
embeddings: List[List[float]] = Field(..., description="List of embedding vectors")
|
||||
embeddings: list[list[float]] = Field(..., description="List of embedding vectors")
|
||||
model: str = Field(..., description="Model used for generation")
|
||||
dimensions: int = Field(..., description="Embedding dimensions")
|
||||
count: int = Field(..., description="Number of embeddings generated")
|
||||
@@ -121,7 +121,7 @@ class SkillEmbeddingResponse(BaseModel):
|
||||
total_chunks: int = Field(..., description="Total number of chunks embedded")
|
||||
model: str = Field(..., description="Model used for generation")
|
||||
dimensions: int = Field(..., description="Embedding dimensions")
|
||||
metadata: Dict[str, Any] = Field(
|
||||
metadata: dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description="Skill metadata"
|
||||
)
|
||||
@@ -132,9 +132,9 @@ class HealthResponse(BaseModel):
|
||||
|
||||
status: str = Field(..., description="Service status")
|
||||
version: str = Field(..., description="API version")
|
||||
models: List[str] = Field(..., description="Available embedding models")
|
||||
models: list[str] = Field(..., description="Available embedding models")
|
||||
cache_enabled: bool = Field(..., description="Whether cache is enabled")
|
||||
cache_size: Optional[int] = Field(None, description="Number of cached embeddings")
|
||||
cache_size: int | None = Field(None, description="Number of cached embeddings")
|
||||
|
||||
|
||||
class ModelInfo(BaseModel):
|
||||
@@ -144,7 +144,7 @@ class ModelInfo(BaseModel):
|
||||
provider: str = Field(..., description="Model provider (openai, anthropic, sentence-transformers)")
|
||||
dimensions: int = Field(..., description="Embedding dimensions")
|
||||
max_tokens: int = Field(..., description="Maximum input tokens")
|
||||
cost_per_million: Optional[float] = Field(
|
||||
cost_per_million: float | None = Field(
|
||||
None,
|
||||
description="Cost per million tokens (if applicable)"
|
||||
)
|
||||
@@ -153,5 +153,5 @@ class ModelInfo(BaseModel):
|
||||
class ModelsResponse(BaseModel):
|
||||
"""Response model for listing available models."""
|
||||
|
||||
models: List[ModelInfo] = Field(..., description="List of available models")
|
||||
models: list[ModelInfo] = Field(..., description="List of available models")
|
||||
count: int = Field(..., description="Number of available models")
|
||||
|
||||
@@ -20,7 +20,6 @@ Usage:
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
try:
|
||||
from fastapi import FastAPI, HTTPException, Query
|
||||
@@ -208,7 +207,7 @@ if FASTAPI_AVAILABLE:
|
||||
)
|
||||
|
||||
# Fill in placeholders and cache
|
||||
for idx, text, embedding in zip(text_indices, texts_to_generate, generated_embeddings):
|
||||
for idx, text, embedding in zip(text_indices, texts_to_generate, generated_embeddings, strict=False):
|
||||
embeddings[idx] = embedding
|
||||
|
||||
if cache:
|
||||
@@ -300,7 +299,7 @@ if FASTAPI_AVAILABLE:
|
||||
|
||||
@app.post("/cache/clear", response_model=dict)
|
||||
async def clear_cache(
|
||||
model: Optional[str] = Query(None, description="Model to clear (all if not specified)")
|
||||
model: str | None = Query(None, description="Model to clear (all if not specified)")
|
||||
):
|
||||
"""Clear cache entries."""
|
||||
if not cache:
|
||||
|
||||
@@ -12,7 +12,6 @@ Each tool provides a direct interface to its respective vector database adaptor.
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
try:
|
||||
from mcp.types import TextContent
|
||||
@@ -36,7 +35,7 @@ except ImportError:
|
||||
get_adaptor = None # Will handle gracefully below
|
||||
|
||||
|
||||
async def export_to_weaviate_impl(args: dict) -> List[TextContent]:
|
||||
async def export_to_weaviate_impl(args: dict) -> list[TextContent]:
|
||||
"""
|
||||
Export skill to Weaviate vector database format.
|
||||
|
||||
@@ -140,7 +139,7 @@ async def export_to_weaviate_impl(args: dict) -> List[TextContent]:
|
||||
]
|
||||
|
||||
|
||||
async def export_to_chroma_impl(args: dict) -> List[TextContent]:
|
||||
async def export_to_chroma_impl(args: dict) -> list[TextContent]:
|
||||
"""
|
||||
Export skill to Chroma vector database format.
|
||||
|
||||
@@ -244,7 +243,7 @@ async def export_to_chroma_impl(args: dict) -> List[TextContent]:
|
||||
]
|
||||
|
||||
|
||||
async def export_to_faiss_impl(args: dict) -> List[TextContent]:
|
||||
async def export_to_faiss_impl(args: dict) -> list[TextContent]:
|
||||
"""
|
||||
Export skill to FAISS vector index format.
|
||||
|
||||
@@ -363,7 +362,7 @@ async def export_to_faiss_impl(args: dict) -> List[TextContent]:
|
||||
]
|
||||
|
||||
|
||||
async def export_to_qdrant_impl(args: dict) -> List[TextContent]:
|
||||
async def export_to_qdrant_impl(args: dict) -> list[TextContent]:
|
||||
"""
|
||||
Export skill to Qdrant vector database format.
|
||||
|
||||
|
||||
@@ -4,10 +4,8 @@ Change detection for documentation pages.
|
||||
|
||||
import hashlib
|
||||
import difflib
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from datetime import datetime
|
||||
import requests
|
||||
from pathlib import Path
|
||||
|
||||
from .models import PageChange, ChangeType, ChangeReport
|
||||
|
||||
@@ -59,7 +57,7 @@ class ChangeDetector:
|
||||
"""
|
||||
return hashlib.sha256(content.encode('utf-8')).hexdigest()
|
||||
|
||||
def fetch_page(self, url: str) -> Tuple[str, Dict[str, str]]:
|
||||
def fetch_page(self, url: str) -> tuple[str, dict[str, str]]:
|
||||
"""
|
||||
Fetch page content and metadata.
|
||||
|
||||
@@ -92,9 +90,9 @@ class ChangeDetector:
|
||||
def check_page(
|
||||
self,
|
||||
url: str,
|
||||
old_hash: Optional[str] = None,
|
||||
old_hash: str | None = None,
|
||||
generate_diff: bool = False,
|
||||
old_content: Optional[str] = None
|
||||
old_content: str | None = None
|
||||
) -> PageChange:
|
||||
"""
|
||||
Check if page has changed.
|
||||
@@ -137,7 +135,7 @@ class ChangeDetector:
|
||||
detected_at=datetime.utcnow()
|
||||
)
|
||||
|
||||
except requests.RequestException as e:
|
||||
except requests.RequestException:
|
||||
# Page might be deleted or temporarily unavailable
|
||||
return PageChange(
|
||||
url=url,
|
||||
@@ -149,8 +147,8 @@ class ChangeDetector:
|
||||
|
||||
def check_pages(
|
||||
self,
|
||||
urls: List[str],
|
||||
previous_hashes: Dict[str, str],
|
||||
urls: list[str],
|
||||
previous_hashes: dict[str, str],
|
||||
generate_diffs: bool = False
|
||||
) -> ChangeReport:
|
||||
"""
|
||||
@@ -254,8 +252,8 @@ class ChangeDetector:
|
||||
def check_header_changes(
|
||||
self,
|
||||
url: str,
|
||||
old_modified: Optional[str] = None,
|
||||
old_etag: Optional[str] = None
|
||||
old_modified: str | None = None,
|
||||
old_etag: str | None = None
|
||||
) -> bool:
|
||||
"""
|
||||
Quick check using HTTP headers (no content download).
|
||||
@@ -284,10 +282,7 @@ class ChangeDetector:
|
||||
if old_modified and new_modified and old_modified != new_modified:
|
||||
return True
|
||||
|
||||
if old_etag and new_etag and old_etag != new_etag:
|
||||
return True
|
||||
|
||||
return False
|
||||
return bool(old_etag and new_etag and old_etag != new_etag)
|
||||
|
||||
except requests.RequestException:
|
||||
# If HEAD request fails, assume change (will be verified with GET)
|
||||
@@ -295,9 +290,9 @@ class ChangeDetector:
|
||||
|
||||
def batch_check_headers(
|
||||
self,
|
||||
urls: List[str],
|
||||
previous_metadata: Dict[str, Dict[str, str]]
|
||||
) -> List[str]:
|
||||
urls: list[str],
|
||||
previous_metadata: dict[str, dict[str, str]]
|
||||
) -> list[str]:
|
||||
"""
|
||||
Batch check URLs using headers only.
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
Pydantic models for sync system.
|
||||
"""
|
||||
|
||||
from typing import List, Optional, Dict, Any
|
||||
from typing import Any
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from pydantic import BaseModel, Field
|
||||
@@ -21,9 +21,9 @@ class PageChange(BaseModel):
|
||||
|
||||
url: str = Field(..., description="Page URL")
|
||||
change_type: ChangeType = Field(..., description="Type of change")
|
||||
old_hash: Optional[str] = Field(None, description="Previous content hash")
|
||||
new_hash: Optional[str] = Field(None, description="New content hash")
|
||||
diff: Optional[str] = Field(None, description="Content diff (if available)")
|
||||
old_hash: str | None = Field(None, description="Previous content hash")
|
||||
new_hash: str | None = Field(None, description="New content hash")
|
||||
diff: str | None = Field(None, description="Content diff (if available)")
|
||||
detected_at: datetime = Field(
|
||||
default_factory=datetime.utcnow,
|
||||
description="When change was detected"
|
||||
@@ -47,9 +47,9 @@ class ChangeReport(BaseModel):
|
||||
|
||||
skill_name: str = Field(..., description="Skill name")
|
||||
total_pages: int = Field(..., description="Total pages checked")
|
||||
added: List[PageChange] = Field(default_factory=list, description="Added pages")
|
||||
modified: List[PageChange] = Field(default_factory=list, description="Modified pages")
|
||||
deleted: List[PageChange] = Field(default_factory=list, description="Deleted pages")
|
||||
added: list[PageChange] = Field(default_factory=list, description="Added pages")
|
||||
modified: list[PageChange] = Field(default_factory=list, description="Modified pages")
|
||||
deleted: list[PageChange] = Field(default_factory=list, description="Deleted pages")
|
||||
unchanged: int = Field(0, description="Number of unchanged pages")
|
||||
checked_at: datetime = Field(
|
||||
default_factory=datetime.utcnow,
|
||||
@@ -84,19 +84,19 @@ class SyncConfig(BaseModel):
|
||||
default=True,
|
||||
description="Send notifications on changes"
|
||||
)
|
||||
notification_channels: List[str] = Field(
|
||||
notification_channels: list[str] = Field(
|
||||
default_factory=list,
|
||||
description="Notification channels (email, slack, webhook)"
|
||||
)
|
||||
webhook_url: Optional[str] = Field(
|
||||
webhook_url: str | None = Field(
|
||||
None,
|
||||
description="Webhook URL for change notifications"
|
||||
)
|
||||
email_recipients: List[str] = Field(
|
||||
email_recipients: list[str] = Field(
|
||||
default_factory=list,
|
||||
description="Email recipients for notifications"
|
||||
)
|
||||
slack_webhook: Optional[str] = Field(
|
||||
slack_webhook: str | None = Field(
|
||||
None,
|
||||
description="Slack webhook URL"
|
||||
)
|
||||
@@ -120,16 +120,16 @@ class SyncState(BaseModel):
|
||||
"""Current state of sync monitoring."""
|
||||
|
||||
skill_name: str = Field(..., description="Skill name")
|
||||
last_check: Optional[datetime] = Field(None, description="Last check time")
|
||||
last_change: Optional[datetime] = Field(None, description="Last change detected")
|
||||
last_check: datetime | None = Field(None, description="Last check time")
|
||||
last_change: datetime | None = Field(None, description="Last change detected")
|
||||
total_checks: int = Field(default=0, description="Total checks performed")
|
||||
total_changes: int = Field(default=0, description="Total changes detected")
|
||||
page_hashes: Dict[str, str] = Field(
|
||||
page_hashes: dict[str, str] = Field(
|
||||
default_factory=dict,
|
||||
description="URL -> content hash mapping"
|
||||
)
|
||||
status: str = Field(default="idle", description="Current status")
|
||||
error: Optional[str] = Field(None, description="Last error message")
|
||||
error: str | None = Field(None, description="Last error message")
|
||||
|
||||
|
||||
class WebhookPayload(BaseModel):
|
||||
@@ -141,8 +141,8 @@ class WebhookPayload(BaseModel):
|
||||
default_factory=datetime.utcnow,
|
||||
description="Event timestamp"
|
||||
)
|
||||
changes: Optional[ChangeReport] = Field(None, description="Change report")
|
||||
metadata: Dict[str, Any] = Field(
|
||||
changes: ChangeReport | None = Field(None, description="Change report")
|
||||
metadata: dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description="Additional metadata"
|
||||
)
|
||||
|
||||
@@ -6,12 +6,12 @@ import json
|
||||
import time
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, List, Callable
|
||||
from collections.abc import Callable
|
||||
from datetime import datetime
|
||||
import schedule
|
||||
|
||||
from .detector import ChangeDetector
|
||||
from .models import SyncConfig, SyncState, ChangeReport, WebhookPayload
|
||||
from .models import SyncState, ChangeReport, WebhookPayload
|
||||
from .notifier import Notifier
|
||||
|
||||
|
||||
@@ -50,8 +50,8 @@ class SyncMonitor:
|
||||
config_path: str,
|
||||
check_interval: int = 3600,
|
||||
auto_update: bool = False,
|
||||
state_file: Optional[str] = None,
|
||||
on_change: Optional[Callable[[ChangeReport], None]] = None
|
||||
state_file: str | None = None,
|
||||
on_change: Callable[[ChangeReport], None] | None = None
|
||||
):
|
||||
"""
|
||||
Initialize sync monitor.
|
||||
@@ -244,7 +244,7 @@ class SyncMonitor:
|
||||
|
||||
print(f"🛑 Stopped monitoring {self.skill_name}")
|
||||
|
||||
def stats(self) -> Dict:
|
||||
def stats(self) -> dict:
|
||||
"""Get monitoring statistics."""
|
||||
return {
|
||||
"skill_name": self.skill_name,
|
||||
|
||||
@@ -4,7 +4,6 @@ Notification system for sync events.
|
||||
|
||||
import os
|
||||
import requests
|
||||
from typing import Optional, List
|
||||
from .models import WebhookPayload
|
||||
|
||||
|
||||
@@ -32,9 +31,9 @@ class Notifier:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
webhook_url: Optional[str] = None,
|
||||
slack_webhook: Optional[str] = None,
|
||||
email_recipients: Optional[List[str]] = None,
|
||||
webhook_url: str | None = None,
|
||||
slack_webhook: str | None = None,
|
||||
email_recipients: list[str] | None = None,
|
||||
console: bool = True
|
||||
):
|
||||
"""
|
||||
|
||||
@@ -207,7 +207,7 @@ class TestAdaptorBenchmarks(unittest.TestCase):
|
||||
time_per_ref = elapsed / ref_count
|
||||
|
||||
# Get output size
|
||||
data = json.loads(formatted)
|
||||
json.loads(formatted)
|
||||
size_kb = len(formatted) / 1024
|
||||
|
||||
results.append({
|
||||
@@ -350,14 +350,14 @@ class TestAdaptorBenchmarks(unittest.TestCase):
|
||||
empty_dir.mkdir()
|
||||
|
||||
start = time.perf_counter()
|
||||
empty_result = adaptor.format_skill_md(empty_dir, metadata)
|
||||
adaptor.format_skill_md(empty_dir, metadata)
|
||||
empty_time = time.perf_counter() - start
|
||||
|
||||
# Full skill (50 references)
|
||||
full_dir = self._create_skill_with_n_references(50)
|
||||
|
||||
start = time.perf_counter()
|
||||
full_result = adaptor.format_skill_md(full_dir, metadata)
|
||||
adaptor.format_skill_md(full_dir, metadata)
|
||||
full_time = time.perf_counter() - start
|
||||
|
||||
print(f"\nEmpty skill: {empty_time*1000:.2f}ms")
|
||||
|
||||
@@ -850,7 +850,6 @@ export default {
|
||||
# Should have categories from reference files
|
||||
# Files: getting_started.md, reactivity_api.md, components_guide.md
|
||||
# Categories derived from filenames (stem.replace("_", " ").lower())
|
||||
expected_refs = {"getting started", "reactivity api", "components guide"}
|
||||
|
||||
# Check that at least one reference category exists
|
||||
ref_categories = categories - {"overview"}
|
||||
|
||||
@@ -4,8 +4,6 @@ Tests for Chroma Adaptor
|
||||
"""
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -4,8 +4,6 @@ Tests for FAISS Adaptor
|
||||
"""
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -4,8 +4,6 @@ Tests for Haystack Adaptor
|
||||
"""
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -4,8 +4,6 @@ Tests for LangChain Adaptor
|
||||
"""
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -4,8 +4,6 @@ Tests for LlamaIndex Adaptor
|
||||
"""
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -4,8 +4,6 @@ Tests for Qdrant Adaptor
|
||||
"""
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -4,8 +4,6 @@ Tests for Weaviate Adaptor
|
||||
"""
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -4,10 +4,8 @@ Tests for benchmarking suite.
|
||||
|
||||
import time
|
||||
import json
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
import pytest
|
||||
|
||||
from skill_seekers.benchmark import (
|
||||
Benchmark,
|
||||
@@ -164,7 +162,7 @@ class TestBenchmark:
|
||||
|
||||
with benchmark.memory("operation"):
|
||||
# Allocate some memory
|
||||
data = [0] * 1000000
|
||||
pass
|
||||
|
||||
assert len(benchmark.result.memory) == 1
|
||||
assert benchmark.result.memory[0].operation == "operation"
|
||||
@@ -394,7 +392,7 @@ class TestBenchmarkRunner:
|
||||
with bench.timer("operation"):
|
||||
time.sleep(0.1)
|
||||
|
||||
baseline_report = runner.run("baseline", baseline_bench, save=True)
|
||||
runner.run("baseline", baseline_bench, save=True)
|
||||
baseline_path = list(tmp_path.glob("baseline_*.json"))[0]
|
||||
|
||||
# Create faster version
|
||||
@@ -402,7 +400,7 @@ class TestBenchmarkRunner:
|
||||
with bench.timer("operation"):
|
||||
time.sleep(0.05)
|
||||
|
||||
improved_report = runner.run("improved", improved_bench, save=True)
|
||||
runner.run("improved", improved_bench, save=True)
|
||||
improved_path = list(tmp_path.glob("improved_*.json"))[0]
|
||||
|
||||
# Compare
|
||||
|
||||
@@ -12,7 +12,6 @@ import pytest
|
||||
import json
|
||||
from pathlib import Path
|
||||
from skill_seekers.cli.adaptors import get_adaptor
|
||||
from skill_seekers.cli.adaptors.base import SkillMetadata
|
||||
|
||||
|
||||
def create_test_skill(tmp_path: Path, large_doc: bool = False) -> Path:
|
||||
@@ -293,7 +292,7 @@ class TestBaseAdaptorChunkingHelper:
|
||||
for chunk_text, chunk_meta in chunks:
|
||||
assert isinstance(chunk_text, str)
|
||||
assert isinstance(chunk_meta, dict)
|
||||
assert chunk_meta['is_chunked'] == True
|
||||
assert chunk_meta['is_chunked']
|
||||
assert 'chunk_index' in chunk_meta
|
||||
assert 'chunk_id' in chunk_meta
|
||||
# Original metadata preserved
|
||||
|
||||
@@ -6,7 +6,7 @@ import os
|
||||
import pytest
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
from skill_seekers.cli.storage import (
|
||||
get_storage_adaptor,
|
||||
|
||||
@@ -5,7 +5,7 @@ Tests for embedding generation system.
|
||||
import pytest
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch
|
||||
from unittest.mock import patch
|
||||
|
||||
from skill_seekers.embedding.models import (
|
||||
EmbeddingRequest,
|
||||
|
||||
@@ -14,7 +14,6 @@ import pytest
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import tempfile
|
||||
import json
|
||||
|
||||
# Add src to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
||||
|
||||
@@ -21,9 +21,7 @@ import time
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
||||
|
||||
from skill_seekers.cli.incremental_updater import (
|
||||
IncrementalUpdater,
|
||||
DocumentVersion,
|
||||
ChangeSet
|
||||
IncrementalUpdater
|
||||
)
|
||||
|
||||
|
||||
@@ -67,7 +65,7 @@ def test_no_changes_after_save(temp_skill_dir):
|
||||
updater = IncrementalUpdater(temp_skill_dir)
|
||||
|
||||
# First scan
|
||||
change_set1 = updater.detect_changes()
|
||||
updater.detect_changes()
|
||||
updater.save_current_versions()
|
||||
|
||||
# Second scan (no changes)
|
||||
|
||||
@@ -17,12 +17,12 @@ Usage:
|
||||
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from skill_seekers.cli.adaptors import get_adaptor
|
||||
from skill_seekers.cli.adaptors.base import SkillMetadata
|
||||
import contextlib
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -144,7 +144,7 @@ class TestWeaviateIntegration:
|
||||
|
||||
# Package skill
|
||||
adaptor = get_adaptor("weaviate")
|
||||
metadata = SkillMetadata(
|
||||
SkillMetadata(
|
||||
name="integration_test",
|
||||
description="Integration test skill for Weaviate"
|
||||
)
|
||||
@@ -231,7 +231,7 @@ class TestWeaviateIntegration:
|
||||
|
||||
# Package with rich metadata
|
||||
adaptor = get_adaptor("weaviate")
|
||||
metadata = SkillMetadata(
|
||||
SkillMetadata(
|
||||
name="metadata_test",
|
||||
description="Test metadata preservation",
|
||||
version="2.0.0",
|
||||
@@ -271,10 +271,8 @@ class TestWeaviateIntegration:
|
||||
assert "test" in obj["tags"], "Tags not preserved"
|
||||
|
||||
finally:
|
||||
try:
|
||||
with contextlib.suppress(Exception):
|
||||
client.schema.delete_class(class_name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
@@ -302,7 +300,7 @@ class TestChromaIntegration:
|
||||
|
||||
# Package skill
|
||||
adaptor = get_adaptor("chroma")
|
||||
metadata = SkillMetadata(
|
||||
SkillMetadata(
|
||||
name="chroma_integration_test",
|
||||
description="Integration test skill for ChromaDB"
|
||||
)
|
||||
@@ -415,10 +413,8 @@ class TestChromaIntegration:
|
||||
"Filter returned wrong category"
|
||||
|
||||
finally:
|
||||
try:
|
||||
with contextlib.suppress(Exception):
|
||||
client.delete_collection(name=collection_name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
@@ -447,7 +443,7 @@ class TestQdrantIntegration:
|
||||
|
||||
# Package skill
|
||||
adaptor = get_adaptor("qdrant")
|
||||
metadata = SkillMetadata(
|
||||
SkillMetadata(
|
||||
name="qdrant_integration_test",
|
||||
description="Integration test skill for Qdrant"
|
||||
)
|
||||
@@ -554,7 +550,7 @@ class TestQdrantIntegration:
|
||||
|
||||
# Package and upload
|
||||
adaptor = get_adaptor("qdrant")
|
||||
metadata = SkillMetadata(
|
||||
SkillMetadata(
|
||||
name="qdrant_filter_test",
|
||||
description="Test filtering capabilities"
|
||||
)
|
||||
@@ -610,10 +606,8 @@ class TestQdrantIntegration:
|
||||
"Filter returned wrong type"
|
||||
|
||||
finally:
|
||||
try:
|
||||
with contextlib.suppress(Exception):
|
||||
client.delete_collection(collection_name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -61,15 +61,6 @@ class TestIssue277RealWorld(unittest.TestCase):
|
||||
)
|
||||
|
||||
# Verify correct transformed URLs
|
||||
expected_urls = {
|
||||
"https://mikro-orm.io/docs/index.html.md", # Root URL
|
||||
"https://mikro-orm.io/docs/reference.md", # Already .md
|
||||
"https://mikro-orm.io/docs/quick-start/index.html.md", # Deduplicated from anchor
|
||||
"https://mikro-orm.io/docs/repositories.md", # Already .md, anchor stripped
|
||||
"https://mikro-orm.io/docs/propagation/index.html.md",
|
||||
"https://mikro-orm.io/docs/defining-entities.md", # Already .md, deduplicated
|
||||
"https://mikro-orm.io/docs/defining-entities/index.html.md", # Non-.md version
|
||||
}
|
||||
|
||||
# Check that we got the expected number of unique URLs
|
||||
# Note: defining-entities has both .md and non-.md versions, so we have 2 entries for it
|
||||
|
||||
@@ -21,8 +21,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
||||
|
||||
from skill_seekers.cli.multilang_support import (
|
||||
LanguageDetector,
|
||||
MultiLanguageManager,
|
||||
LanguageInfo
|
||||
MultiLanguageManager
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -40,12 +40,12 @@ class TestPresetDefinitions:
|
||||
assert quick.estimated_time == '1-2 minutes'
|
||||
assert quick.icon == '⚡'
|
||||
# Quick should disable slow features
|
||||
assert quick.features['api_reference'] == True # Essential
|
||||
assert quick.features['dependency_graph'] == False # Slow
|
||||
assert quick.features['patterns'] == False # Slow
|
||||
assert quick.features['test_examples'] == False # Slow
|
||||
assert quick.features['how_to_guides'] == False # Requires AI
|
||||
assert quick.features['docs'] == True # Essential
|
||||
assert quick.features['api_reference'] # Essential
|
||||
assert not quick.features['dependency_graph'] # Slow
|
||||
assert not quick.features['patterns'] # Slow
|
||||
assert not quick.features['test_examples'] # Slow
|
||||
assert not quick.features['how_to_guides'] # Requires AI
|
||||
assert quick.features['docs'] # Essential
|
||||
|
||||
def test_standard_preset(self):
|
||||
"""Test standard preset configuration."""
|
||||
@@ -56,13 +56,13 @@ class TestPresetDefinitions:
|
||||
assert standard.estimated_time == '5-10 minutes'
|
||||
assert standard.icon == '🎯'
|
||||
# Standard should enable core features
|
||||
assert standard.features['api_reference'] == True
|
||||
assert standard.features['dependency_graph'] == True
|
||||
assert standard.features['patterns'] == True
|
||||
assert standard.features['test_examples'] == True
|
||||
assert standard.features['how_to_guides'] == False # Slow
|
||||
assert standard.features['config_patterns'] == True
|
||||
assert standard.features['docs'] == True
|
||||
assert standard.features['api_reference']
|
||||
assert standard.features['dependency_graph']
|
||||
assert standard.features['patterns']
|
||||
assert standard.features['test_examples']
|
||||
assert not standard.features['how_to_guides'] # Slow
|
||||
assert standard.features['config_patterns']
|
||||
assert standard.features['docs']
|
||||
|
||||
def test_comprehensive_preset(self):
|
||||
"""Test comprehensive preset configuration."""
|
||||
@@ -131,12 +131,12 @@ class TestPresetApplication:
|
||||
|
||||
assert updated['depth'] == 'surface'
|
||||
assert updated['enhance_level'] == 0
|
||||
assert updated['skip_patterns'] == True # Quick disables patterns
|
||||
assert updated['skip_dependency_graph'] == True # Quick disables dep graph
|
||||
assert updated['skip_test_examples'] == True # Quick disables tests
|
||||
assert updated['skip_how_to_guides'] == True # Quick disables guides
|
||||
assert updated['skip_api_reference'] == False # Quick enables API ref
|
||||
assert updated['skip_docs'] == False # Quick enables docs
|
||||
assert updated['skip_patterns'] # Quick disables patterns
|
||||
assert updated['skip_dependency_graph'] # Quick disables dep graph
|
||||
assert updated['skip_test_examples'] # Quick disables tests
|
||||
assert updated['skip_how_to_guides'] # Quick disables guides
|
||||
assert not updated['skip_api_reference'] # Quick enables API ref
|
||||
assert not updated['skip_docs'] # Quick enables docs
|
||||
|
||||
def test_apply_preset_standard(self):
|
||||
"""Test applying standard preset."""
|
||||
@@ -145,12 +145,12 @@ class TestPresetApplication:
|
||||
|
||||
assert updated['depth'] == 'deep'
|
||||
assert updated['enhance_level'] == 1
|
||||
assert updated['skip_patterns'] == False # Standard enables patterns
|
||||
assert updated['skip_dependency_graph'] == False # Standard enables dep graph
|
||||
assert updated['skip_test_examples'] == False # Standard enables tests
|
||||
assert updated['skip_how_to_guides'] == True # Standard disables guides (slow)
|
||||
assert updated['skip_api_reference'] == False # Standard enables API ref
|
||||
assert updated['skip_docs'] == False # Standard enables docs
|
||||
assert not updated['skip_patterns'] # Standard enables patterns
|
||||
assert not updated['skip_dependency_graph'] # Standard enables dep graph
|
||||
assert not updated['skip_test_examples'] # Standard enables tests
|
||||
assert updated['skip_how_to_guides'] # Standard disables guides (slow)
|
||||
assert not updated['skip_api_reference'] # Standard enables API ref
|
||||
assert not updated['skip_docs'] # Standard enables docs
|
||||
|
||||
def test_apply_preset_comprehensive(self):
|
||||
"""Test applying comprehensive preset."""
|
||||
@@ -160,13 +160,13 @@ class TestPresetApplication:
|
||||
assert updated['depth'] == 'full'
|
||||
assert updated['enhance_level'] == 3
|
||||
# Comprehensive enables ALL features
|
||||
assert updated['skip_patterns'] == False
|
||||
assert updated['skip_dependency_graph'] == False
|
||||
assert updated['skip_test_examples'] == False
|
||||
assert updated['skip_how_to_guides'] == False
|
||||
assert updated['skip_api_reference'] == False
|
||||
assert updated['skip_config_patterns'] == False
|
||||
assert updated['skip_docs'] == False
|
||||
assert not updated['skip_patterns']
|
||||
assert not updated['skip_dependency_graph']
|
||||
assert not updated['skip_test_examples']
|
||||
assert not updated['skip_how_to_guides']
|
||||
assert not updated['skip_api_reference']
|
||||
assert not updated['skip_config_patterns']
|
||||
assert not updated['skip_docs']
|
||||
|
||||
def test_cli_overrides_preset(self):
|
||||
"""Test that CLI args override preset defaults."""
|
||||
@@ -182,7 +182,7 @@ class TestPresetApplication:
|
||||
assert updated['enhance_level'] == 2 # CLI wins
|
||||
|
||||
# Preset says skip_patterns=True (disabled), but CLI said False (enabled)
|
||||
assert updated['skip_patterns'] == False # CLI wins
|
||||
assert not updated['skip_patterns'] # CLI wins
|
||||
|
||||
def test_apply_preset_preserves_args(self):
|
||||
"""Test that apply_preset preserves existing args."""
|
||||
|
||||
@@ -3,9 +3,7 @@ Tests for RAG Chunker (semantic chunking for RAG pipelines).
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
import json
|
||||
import tempfile
|
||||
|
||||
from skill_seekers.cli.rag_chunker import RAGChunker
|
||||
|
||||
@@ -199,7 +197,7 @@ class TestRAGChunker:
|
||||
assert len(chunks) > 0
|
||||
|
||||
# Check metadata diversity
|
||||
categories = set(chunk["metadata"]["category"] for chunk in chunks)
|
||||
categories = {chunk["metadata"]["category"] for chunk in chunks}
|
||||
assert "overview" in categories # From SKILL.md
|
||||
assert "getting_started" in categories or "api" in categories # From references
|
||||
|
||||
@@ -222,7 +220,7 @@ class TestRAGChunker:
|
||||
assert output_path.exists()
|
||||
|
||||
# Check content
|
||||
with open(output_path, 'r') as f:
|
||||
with open(output_path) as f:
|
||||
loaded = json.load(f)
|
||||
|
||||
assert len(loaded) == 1
|
||||
|
||||
@@ -14,15 +14,13 @@ import pytest
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import tempfile
|
||||
import json
|
||||
|
||||
# Add src to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
||||
|
||||
from skill_seekers.cli.streaming_ingest import (
|
||||
StreamingIngester,
|
||||
IngestionProgress,
|
||||
ChunkMetadata
|
||||
IngestionProgress
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -6,10 +6,7 @@ Tests real upload capabilities for vector databases.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
# Import adaptors
|
||||
from skill_seekers.cli.adaptors import get_adaptor
|
||||
@@ -211,7 +208,6 @@ class TestUploadCommandIntegration:
|
||||
|
||||
def test_upload_command_supports_chroma(self):
|
||||
"""Test upload command recognizes chroma as target."""
|
||||
from skill_seekers.cli.upload_skill import upload_skill_api
|
||||
|
||||
# This should not raise ValueError
|
||||
adaptor = get_adaptor('chroma')
|
||||
@@ -219,7 +215,6 @@ class TestUploadCommandIntegration:
|
||||
|
||||
def test_upload_command_supports_weaviate(self):
|
||||
"""Test upload command recognizes weaviate as target."""
|
||||
from skill_seekers.cli.upload_skill import upload_skill_api
|
||||
|
||||
# This should not raise ValueError
|
||||
adaptor = get_adaptor('weaviate')
|
||||
|
||||
@@ -4,7 +4,6 @@ Covers bug fix for issue #277: URLs with anchor fragments causing 404 errors.
|
||||
"""
|
||||
|
||||
import unittest
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from skill_seekers.cli.doc_scraper import DocToSkillConverter
|
||||
|
||||
|
||||
Reference in New Issue
Block a user