style: ruff format remaining 14 files
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -371,7 +371,10 @@ class SkillAdaptor(ABC):
|
||||
# If overlap is at the default value but chunk size was customized,
|
||||
# scale overlap proportionally (10% of chunk size, min DEFAULT_CHUNK_OVERLAP_TOKENS)
|
||||
effective_overlap = chunk_overlap_tokens
|
||||
if chunk_overlap_tokens == DEFAULT_CHUNK_OVERLAP_TOKENS and chunk_max_tokens != DEFAULT_CHUNK_TOKENS:
|
||||
if (
|
||||
chunk_overlap_tokens == DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
and chunk_max_tokens != DEFAULT_CHUNK_TOKENS
|
||||
):
|
||||
effective_overlap = max(DEFAULT_CHUNK_OVERLAP_TOKENS, chunk_max_tokens // 10)
|
||||
|
||||
chunker = RAGChunker(
|
||||
@@ -518,9 +521,7 @@ class SkillAdaptor(ABC):
|
||||
for i in range(0, len(documents), batch_size):
|
||||
batch = documents[i : i + batch_size]
|
||||
try:
|
||||
response = client.embeddings.create(
|
||||
input=batch, model="text-embedding-3-small"
|
||||
)
|
||||
response = client.embeddings.create(input=batch, model="text-embedding-3-small")
|
||||
embeddings.extend([item.embedding for item in response.data])
|
||||
print(f" ✓ Embedded {min(i + batch_size, len(documents))}/{len(documents)}")
|
||||
except Exception as e:
|
||||
|
||||
@@ -91,7 +91,9 @@ class ChromaAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file="SKILL.md",
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks to parallel arrays
|
||||
@@ -123,7 +125,9 @@ class ChromaAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file=ref_file.name,
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks to parallel arrays
|
||||
|
||||
@@ -93,7 +93,9 @@ class FAISSHelpers(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file="SKILL.md",
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks to parallel arrays
|
||||
@@ -124,7 +126,9 @@ class FAISSHelpers(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file=ref_file.name,
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks to parallel arrays
|
||||
|
||||
@@ -74,7 +74,9 @@ class HaystackAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file="SKILL.md",
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks as documents
|
||||
@@ -109,7 +111,9 @@ class HaystackAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file=ref_file.name,
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks as documents
|
||||
|
||||
@@ -74,7 +74,9 @@ class LangChainAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file="SKILL.md",
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks to documents
|
||||
@@ -104,7 +106,9 @@ class LangChainAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file=ref_file.name,
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks to documents
|
||||
|
||||
@@ -89,7 +89,9 @@ class LlamaIndexAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file="SKILL.md",
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks as nodes
|
||||
@@ -126,7 +128,9 @@ class LlamaIndexAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file=ref_file.name,
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks as nodes
|
||||
|
||||
@@ -38,7 +38,9 @@ class PineconeAdaptor(SkillAdaptor):
|
||||
"""Generate deterministic ID from content and metadata."""
|
||||
return self._generate_deterministic_id(content, metadata, format="hex")
|
||||
|
||||
def _truncate_text_for_metadata(self, text: str, max_bytes: int = PINECONE_METADATA_BYTES_LIMIT) -> str:
|
||||
def _truncate_text_for_metadata(
|
||||
self, text: str, max_bytes: int = PINECONE_METADATA_BYTES_LIMIT
|
||||
) -> str:
|
||||
"""Truncate text to fit within Pinecone's metadata byte limit.
|
||||
|
||||
Pinecone limits metadata to 40KB per vector. This truncates
|
||||
@@ -120,7 +122,9 @@ class PineconeAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file="SKILL.md",
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
for chunk_text, chunk_meta in chunks:
|
||||
@@ -155,7 +159,9 @@ class PineconeAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file=ref_file.name,
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
for chunk_text, chunk_meta in chunks:
|
||||
@@ -281,10 +287,7 @@ class PineconeAdaptor(SkillAdaptor):
|
||||
if not api_key:
|
||||
return {
|
||||
"success": False,
|
||||
"message": (
|
||||
"PINECONE_API_KEY not set. "
|
||||
"Set via env var or pass api_key parameter."
|
||||
),
|
||||
"message": ("PINECONE_API_KEY not set. Set via env var or pass api_key parameter."),
|
||||
}
|
||||
|
||||
# Load package
|
||||
@@ -332,7 +335,9 @@ class PineconeAdaptor(SkillAdaptor):
|
||||
# Create index if it doesn't exist
|
||||
existing_indexes = [idx.name for idx in pc.list_indexes()]
|
||||
if index_name not in existing_indexes:
|
||||
print(f"🔧 Creating Pinecone index: {index_name} (dimension={dimension}, metric={metric})")
|
||||
print(
|
||||
f"🔧 Creating Pinecone index: {index_name} (dimension={dimension}, metric={metric})"
|
||||
)
|
||||
pc.create_index(
|
||||
name=index_name,
|
||||
dimension=dimension,
|
||||
|
||||
@@ -88,7 +88,9 @@ class QdrantAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file="SKILL.md",
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks as points
|
||||
@@ -139,7 +141,9 @@ class QdrantAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file=ref_file.name,
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks as points
|
||||
|
||||
@@ -156,7 +156,9 @@ class WeaviateAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file="SKILL.md",
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks as objects
|
||||
@@ -199,7 +201,9 @@ class WeaviateAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file=ref_file.name,
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks as objects
|
||||
|
||||
Reference in New Issue
Block a user