style: Format all Python files with ruff

- Formatted 103 files to comply with ruff format requirements
- No code logic changes, only formatting/whitespace
- Fixes CI formatting check failures
This commit is contained in:
yusyus
2026-02-08 14:42:27 +03:00
parent 6e4f623b9d
commit 0265de5816
103 changed files with 2241 additions and 2627 deletions

View File

@@ -9,6 +9,7 @@ import numpy as np
# OpenAI support
try:
from openai import OpenAI
OPENAI_AVAILABLE = True
except ImportError:
OPENAI_AVAILABLE = False
@@ -16,6 +17,7 @@ except ImportError:
# Sentence transformers support
try:
from sentence_transformers import SentenceTransformer
SENTENCE_TRANSFORMERS_AVAILABLE = True
except ImportError:
SENTENCE_TRANSFORMERS_AVAILABLE = False
@@ -23,6 +25,7 @@ except ImportError:
# Voyage AI support (recommended by Anthropic for embeddings)
try:
import voyageai
VOYAGE_AVAILABLE = True
except ImportError:
VOYAGE_AVAILABLE = False
@@ -129,7 +132,7 @@ class EmbeddingGenerator:
self,
api_key: str | None = None,
voyage_api_key: str | None = None,
cache_dir: str | None = None
cache_dir: str | None = None,
):
"""
Initialize embedding generator.
@@ -162,8 +165,7 @@ class EmbeddingGenerator:
"""Get information about a model."""
if model not in self.MODELS:
raise ValueError(
f"Unknown model: {model}. "
f"Available models: {', '.join(self.MODELS.keys())}"
f"Unknown model: {model}. Available models: {', '.join(self.MODELS.keys())}"
)
return self.MODELS[model]
@@ -171,20 +173,19 @@ class EmbeddingGenerator:
"""List all available models."""
models = []
for name, info in self.MODELS.items():
models.append({
"name": name,
"provider": info["provider"],
"dimensions": info["dimensions"],
"max_tokens": info["max_tokens"],
"cost_per_million": info.get("cost_per_million", 0.0),
})
models.append(
{
"name": name,
"provider": info["provider"],
"dimensions": info["dimensions"],
"max_tokens": info["max_tokens"],
"cost_per_million": info.get("cost_per_million", 0.0),
}
)
return models
def generate(
self,
text: str,
model: str = "text-embedding-3-small",
normalize: bool = True
self, text: str, model: str = "text-embedding-3-small", normalize: bool = True
) -> list[float]:
"""
Generate embedding for a single text.
@@ -218,7 +219,7 @@ class EmbeddingGenerator:
texts: list[str],
model: str = "text-embedding-3-small",
normalize: bool = True,
batch_size: int = 32
batch_size: int = 32,
) -> tuple[list[list[float]], int]:
"""
Generate embeddings for multiple texts.
@@ -248,24 +249,18 @@ class EmbeddingGenerator:
else:
raise ValueError(f"Unsupported provider: {provider}")
def _generate_openai(
self, text: str, model: str, normalize: bool
) -> list[float]:
def _generate_openai(self, text: str, model: str, normalize: bool) -> list[float]:
"""Generate embedding using OpenAI API."""
if not OPENAI_AVAILABLE:
raise ImportError(
"OpenAI is required for OpenAI embeddings. "
"Install with: pip install openai"
"OpenAI is required for OpenAI embeddings. Install with: pip install openai"
)
if not self.openai_client:
raise ValueError("OpenAI API key not provided")
try:
response = self.openai_client.embeddings.create(
input=text,
model=model
)
response = self.openai_client.embeddings.create(input=text, model=model)
embedding = response.data[0].embedding
if normalize:
@@ -281,8 +276,7 @@ class EmbeddingGenerator:
"""Generate embeddings using OpenAI API in batches."""
if not OPENAI_AVAILABLE:
raise ImportError(
"OpenAI is required for OpenAI embeddings. "
"Install with: pip install openai"
"OpenAI is required for OpenAI embeddings. Install with: pip install openai"
)
if not self.openai_client:
@@ -292,13 +286,10 @@ class EmbeddingGenerator:
# Process in batches
for i in range(0, len(texts), batch_size):
batch = texts[i:i + batch_size]
batch = texts[i : i + batch_size]
try:
response = self.openai_client.embeddings.create(
input=batch,
model=model
)
response = self.openai_client.embeddings.create(input=batch, model=model)
batch_embeddings = [item.embedding for item in response.data]
@@ -313,24 +304,18 @@ class EmbeddingGenerator:
dimensions = len(all_embeddings[0]) if all_embeddings else 0
return all_embeddings, dimensions
def _generate_voyage(
self, text: str, model: str, normalize: bool
) -> list[float]:
def _generate_voyage(self, text: str, model: str, normalize: bool) -> list[float]:
"""Generate embedding using Voyage AI API."""
if not VOYAGE_AVAILABLE:
raise ImportError(
"voyageai is required for Voyage AI embeddings. "
"Install with: pip install voyageai"
"voyageai is required for Voyage AI embeddings. Install with: pip install voyageai"
)
if not self.voyage_client:
raise ValueError("Voyage API key not provided")
try:
result = self.voyage_client.embed(
texts=[text],
model=model
)
result = self.voyage_client.embed(texts=[text], model=model)
embedding = result.embeddings[0]
if normalize:
@@ -346,8 +331,7 @@ class EmbeddingGenerator:
"""Generate embeddings using Voyage AI API in batches."""
if not VOYAGE_AVAILABLE:
raise ImportError(
"voyageai is required for Voyage AI embeddings. "
"Install with: pip install voyageai"
"voyageai is required for Voyage AI embeddings. Install with: pip install voyageai"
)
if not self.voyage_client:
@@ -357,13 +341,10 @@ class EmbeddingGenerator:
# Process in batches (Voyage AI supports up to 128 texts per request)
for i in range(0, len(texts), batch_size):
batch = texts[i:i + batch_size]
batch = texts[i : i + batch_size]
try:
result = self.voyage_client.embed(
texts=batch,
model=model
)
result = self.voyage_client.embed(texts=batch, model=model)
batch_embeddings = result.embeddings
@@ -378,9 +359,7 @@ class EmbeddingGenerator:
dimensions = len(all_embeddings[0]) if all_embeddings else 0
return all_embeddings, dimensions
def _generate_sentence_transformer(
self, text: str, model: str, normalize: bool
) -> list[float]:
def _generate_sentence_transformer(self, text: str, model: str, normalize: bool) -> list[float]:
"""Generate embedding using sentence-transformers."""
if not SENTENCE_TRANSFORMERS_AVAILABLE:
raise ImportError(
@@ -417,10 +396,7 @@ class EmbeddingGenerator:
# Generate embeddings in batches
embeddings = st_model.encode(
texts,
batch_size=batch_size,
normalize_embeddings=normalize,
show_progress_bar=False
texts, batch_size=batch_size, normalize_embeddings=normalize, show_progress_bar=False
)
dimensions = len(embeddings[0]) if len(embeddings) > 0 else 0