style: ruff format remaining 14 files
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -371,7 +371,10 @@ class SkillAdaptor(ABC):
|
||||
# If overlap is at the default value but chunk size was customized,
|
||||
# scale overlap proportionally (10% of chunk size, min DEFAULT_CHUNK_OVERLAP_TOKENS)
|
||||
effective_overlap = chunk_overlap_tokens
|
||||
if chunk_overlap_tokens == DEFAULT_CHUNK_OVERLAP_TOKENS and chunk_max_tokens != DEFAULT_CHUNK_TOKENS:
|
||||
if (
|
||||
chunk_overlap_tokens == DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
and chunk_max_tokens != DEFAULT_CHUNK_TOKENS
|
||||
):
|
||||
effective_overlap = max(DEFAULT_CHUNK_OVERLAP_TOKENS, chunk_max_tokens // 10)
|
||||
|
||||
chunker = RAGChunker(
|
||||
@@ -518,9 +521,7 @@ class SkillAdaptor(ABC):
|
||||
for i in range(0, len(documents), batch_size):
|
||||
batch = documents[i : i + batch_size]
|
||||
try:
|
||||
response = client.embeddings.create(
|
||||
input=batch, model="text-embedding-3-small"
|
||||
)
|
||||
response = client.embeddings.create(input=batch, model="text-embedding-3-small")
|
||||
embeddings.extend([item.embedding for item in response.data])
|
||||
print(f" ✓ Embedded {min(i + batch_size, len(documents))}/{len(documents)}")
|
||||
except Exception as e:
|
||||
|
||||
@@ -91,7 +91,9 @@ class ChromaAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file="SKILL.md",
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks to parallel arrays
|
||||
@@ -123,7 +125,9 @@ class ChromaAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file=ref_file.name,
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks to parallel arrays
|
||||
|
||||
@@ -93,7 +93,9 @@ class FAISSHelpers(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file="SKILL.md",
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks to parallel arrays
|
||||
@@ -124,7 +126,9 @@ class FAISSHelpers(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file=ref_file.name,
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks to parallel arrays
|
||||
|
||||
@@ -74,7 +74,9 @@ class HaystackAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file="SKILL.md",
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks as documents
|
||||
@@ -109,7 +111,9 @@ class HaystackAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file=ref_file.name,
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks as documents
|
||||
|
||||
@@ -74,7 +74,9 @@ class LangChainAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file="SKILL.md",
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks to documents
|
||||
@@ -104,7 +106,9 @@ class LangChainAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file=ref_file.name,
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks to documents
|
||||
|
||||
@@ -89,7 +89,9 @@ class LlamaIndexAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file="SKILL.md",
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks as nodes
|
||||
@@ -126,7 +128,9 @@ class LlamaIndexAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file=ref_file.name,
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks as nodes
|
||||
|
||||
@@ -38,7 +38,9 @@ class PineconeAdaptor(SkillAdaptor):
|
||||
"""Generate deterministic ID from content and metadata."""
|
||||
return self._generate_deterministic_id(content, metadata, format="hex")
|
||||
|
||||
def _truncate_text_for_metadata(self, text: str, max_bytes: int = PINECONE_METADATA_BYTES_LIMIT) -> str:
|
||||
def _truncate_text_for_metadata(
|
||||
self, text: str, max_bytes: int = PINECONE_METADATA_BYTES_LIMIT
|
||||
) -> str:
|
||||
"""Truncate text to fit within Pinecone's metadata byte limit.
|
||||
|
||||
Pinecone limits metadata to 40KB per vector. This truncates
|
||||
@@ -120,7 +122,9 @@ class PineconeAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file="SKILL.md",
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
for chunk_text, chunk_meta in chunks:
|
||||
@@ -155,7 +159,9 @@ class PineconeAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file=ref_file.name,
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
for chunk_text, chunk_meta in chunks:
|
||||
@@ -281,10 +287,7 @@ class PineconeAdaptor(SkillAdaptor):
|
||||
if not api_key:
|
||||
return {
|
||||
"success": False,
|
||||
"message": (
|
||||
"PINECONE_API_KEY not set. "
|
||||
"Set via env var or pass api_key parameter."
|
||||
),
|
||||
"message": ("PINECONE_API_KEY not set. Set via env var or pass api_key parameter."),
|
||||
}
|
||||
|
||||
# Load package
|
||||
@@ -332,7 +335,9 @@ class PineconeAdaptor(SkillAdaptor):
|
||||
# Create index if it doesn't exist
|
||||
existing_indexes = [idx.name for idx in pc.list_indexes()]
|
||||
if index_name not in existing_indexes:
|
||||
print(f"🔧 Creating Pinecone index: {index_name} (dimension={dimension}, metric={metric})")
|
||||
print(
|
||||
f"🔧 Creating Pinecone index: {index_name} (dimension={dimension}, metric={metric})"
|
||||
)
|
||||
pc.create_index(
|
||||
name=index_name,
|
||||
dimension=dimension,
|
||||
|
||||
@@ -88,7 +88,9 @@ class QdrantAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file="SKILL.md",
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks as points
|
||||
@@ -139,7 +141,9 @@ class QdrantAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file=ref_file.name,
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks as points
|
||||
|
||||
@@ -156,7 +156,9 @@ class WeaviateAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file="SKILL.md",
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks as objects
|
||||
@@ -199,7 +201,9 @@ class WeaviateAdaptor(SkillAdaptor):
|
||||
chunk_max_tokens=kwargs.get("chunk_max_tokens", DEFAULT_CHUNK_TOKENS),
|
||||
preserve_code_blocks=kwargs.get("preserve_code_blocks", True),
|
||||
source_file=ref_file.name,
|
||||
chunk_overlap_tokens=kwargs.get("chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS),
|
||||
chunk_overlap_tokens=kwargs.get(
|
||||
"chunk_overlap_tokens", DEFAULT_CHUNK_OVERLAP_TOKENS
|
||||
),
|
||||
)
|
||||
|
||||
# Add all chunks as objects
|
||||
|
||||
@@ -161,7 +161,10 @@ class CreateCommand:
|
||||
# RAG arguments (web scraper only)
|
||||
if getattr(self.args, "chunk_for_rag", False):
|
||||
argv.append("--chunk-for-rag")
|
||||
if getattr(self.args, "chunk_tokens", None) and self.args.chunk_tokens != DEFAULT_CHUNK_TOKENS:
|
||||
if (
|
||||
getattr(self.args, "chunk_tokens", None)
|
||||
and self.args.chunk_tokens != DEFAULT_CHUNK_TOKENS
|
||||
):
|
||||
argv.extend(["--chunk-tokens", str(self.args.chunk_tokens)])
|
||||
if (
|
||||
getattr(self.args, "chunk_overlap_tokens", None)
|
||||
|
||||
@@ -1009,7 +1009,7 @@ Use this skill when you need to:
|
||||
skill_content += f"- **Homepage:** {repo_info.get('homepage') or 'N/A'}\n"
|
||||
skill_content += f"- **Topics:** {', '.join(repo_info.get('topics', []))}\n"
|
||||
skill_content += f"- **Open Issues:** {repo_info.get('open_issues', 0)}\n"
|
||||
updated_at = repo_info.get('updated_at') or 'N/A'
|
||||
updated_at = repo_info.get("updated_at") or "N/A"
|
||||
skill_content += f"- **Last Updated:** {updated_at[:10]}\n\n"
|
||||
|
||||
# Languages
|
||||
@@ -1105,11 +1105,9 @@ Use this skill when you need to:
|
||||
|
||||
lines = []
|
||||
for release in releases[:3]:
|
||||
published_at = release.get('published_at') or 'N/A'
|
||||
release_name = release.get('name') or release['tag_name']
|
||||
lines.append(
|
||||
f"- **{release['tag_name']}** ({published_at[:10]}): {release_name}"
|
||||
)
|
||||
published_at = release.get("published_at") or "N/A"
|
||||
release_name = release.get("name") or release["tag_name"]
|
||||
lines.append(f"- **{release['tag_name']}** ({published_at[:10]}): {release_name}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
@@ -1304,7 +1302,7 @@ Use this skill when you need to:
|
||||
content += f"## Open Issues ({len(open_issues)})\n\n"
|
||||
for issue in open_issues:
|
||||
labels = ", ".join(issue["labels"]) if issue["labels"] else "No labels"
|
||||
created_at = issue.get('created_at') or 'N/A'
|
||||
created_at = issue.get("created_at") or "N/A"
|
||||
content += f"### #{issue['number']}: {issue['title']}\n"
|
||||
content += f"**Labels:** {labels} | **Created:** {created_at[:10]}\n"
|
||||
content += f"[View on GitHub]({issue['url']})\n\n"
|
||||
@@ -1312,7 +1310,7 @@ Use this skill when you need to:
|
||||
content += f"\n## Recently Closed Issues ({len(closed_issues)})\n\n"
|
||||
for issue in closed_issues:
|
||||
labels = ", ".join(issue["labels"]) if issue["labels"] else "No labels"
|
||||
closed_at = issue.get('closed_at') or 'N/A'
|
||||
closed_at = issue.get("closed_at") or "N/A"
|
||||
content += f"### #{issue['number']}: {issue['title']}\n"
|
||||
content += f"**Labels:** {labels} | **Closed:** {closed_at[:10]}\n"
|
||||
content += f"[View on GitHub]({issue['url']})\n\n"
|
||||
@@ -1331,9 +1329,9 @@ Use this skill when you need to:
|
||||
)
|
||||
|
||||
for release in releases:
|
||||
published_at = release.get('published_at') or 'N/A'
|
||||
release_name = release.get('name') or release['tag_name']
|
||||
release_body = release.get('body') or ''
|
||||
published_at = release.get("published_at") or "N/A"
|
||||
release_name = release.get("name") or release["tag_name"]
|
||||
release_body = release.get("body") or ""
|
||||
content += f"## {release['tag_name']}: {release_name}\n"
|
||||
content += f"**Published:** {published_at[:10]}\n"
|
||||
if release["prerelease"]:
|
||||
|
||||
@@ -385,9 +385,14 @@ def main():
|
||||
)
|
||||
parser.add_argument("skill_dir", type=Path, help="Path to skill directory")
|
||||
parser.add_argument("--output", "-o", type=Path, help="Output JSON file")
|
||||
parser.add_argument("--chunk-tokens", type=int, default=DEFAULT_CHUNK_TOKENS, help="Target chunk size in tokens")
|
||||
parser.add_argument(
|
||||
"--chunk-overlap-tokens", type=int, default=DEFAULT_CHUNK_OVERLAP_TOKENS, help="Overlap size in tokens"
|
||||
"--chunk-tokens", type=int, default=DEFAULT_CHUNK_TOKENS, help="Target chunk size in tokens"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--chunk-overlap-tokens",
|
||||
type=int,
|
||||
default=DEFAULT_CHUNK_OVERLAP_TOKENS,
|
||||
help="Overlap size in tokens",
|
||||
)
|
||||
parser.add_argument("--no-code-blocks", action="store_true", help="Don't preserve code blocks")
|
||||
parser.add_argument("--no-paragraphs", action="store_true", help="Don't preserve paragraphs")
|
||||
|
||||
@@ -1296,7 +1296,9 @@ This skill combines knowledge from multiple sources:
|
||||
f.write(f"- **File**: `{ex.get('file_path', 'N/A')}`\n")
|
||||
if ex.get("code_snippet"):
|
||||
lang = ex.get("language", "text")
|
||||
f.write(f"\n```{lang}\n{ex['code_snippet']}\n```\n") # Full code, no truncation
|
||||
f.write(
|
||||
f"\n```{lang}\n{ex['code_snippet']}\n```\n"
|
||||
) # Full code, no truncation
|
||||
f.write("\n")
|
||||
|
||||
logger.info(f" ✓ Test examples: {total} total, {high_value} high-value")
|
||||
|
||||
Reference in New Issue
Block a user