Files
skill-seekers-reference/src/skill_seekers/cli/adaptors/langchain.py
yusyus d84e5878a1 refactor: Adopt helper methods across 7 RAG adaptors to eliminate duplication
Refactored all RAG adaptors (LangChain, LlamaIndex, Haystack, Weaviate, Chroma,
FAISS, Qdrant) to use existing helper methods from base.py, removing ~215 lines
of duplicate code (26% reduction).

Key improvements:
- All adaptors now use _format_output_path() for consistent path handling
- All adaptors now use _iterate_references() for reference file iteration
- Added _generate_deterministic_id() helper with 3 formats (hex, uuid, uuid5)
- 5 adaptors refactored to use unified ID generation
- Removed 6 unused imports (hashlib, uuid)

Benefits:
- DRY principles enforced across all RAG adaptors
- Single source of truth for common logic
- Easier maintenance and testing
- Consistent behavior across platforms

All 159 adaptor tests passing. Zero regressions.

Phase 1 of optional enhancements (Phases 2-5 pending).

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-07 22:31:10 +03:00

266 lines
7.8 KiB
Python

#!/usr/bin/env python3
"""
LangChain Adaptor
Implements LangChain Document format for RAG pipelines.
Converts Skill Seekers documentation into LangChain-compatible Document objects.
"""
import json
from pathlib import Path
from typing import Any
from .base import SkillAdaptor, SkillMetadata
class LangChainAdaptor(SkillAdaptor):
"""
LangChain platform adaptor.
Handles:
- LangChain Document format (page_content + metadata)
- JSON packaging with array of documents
- No upload (users import directly into code)
- Optimized for RAG/vector store ingestion
"""
PLATFORM = "langchain"
PLATFORM_NAME = "LangChain (RAG Framework)"
DEFAULT_API_ENDPOINT = None # No upload endpoint
def format_skill_md(self, skill_dir: Path, metadata: SkillMetadata) -> str:
"""
Format skill as JSON array of LangChain Documents.
Converts SKILL.md and all references/*.md into LangChain Document format:
{
"page_content": "...",
"metadata": {"source": "...", "category": "...", ...}
}
Args:
skill_dir: Path to skill directory
metadata: Skill metadata
Returns:
JSON string containing array of LangChain Documents
"""
documents = []
# Convert SKILL.md (main documentation)
skill_md_path = skill_dir / "SKILL.md"
if skill_md_path.exists():
content = self._read_existing_content(skill_dir)
if content.strip():
documents.append(
{
"page_content": content,
"metadata": {
"source": metadata.name,
"category": "overview",
"file": "SKILL.md",
"type": "documentation",
"version": metadata.version,
},
}
)
# Convert all reference files using base helper method
for ref_file, ref_content in self._iterate_references(skill_dir):
if ref_content.strip():
# Derive category from filename
category = ref_file.stem.replace("_", " ").lower()
documents.append(
{
"page_content": ref_content,
"metadata": {
"source": metadata.name,
"category": category,
"file": ref_file.name,
"type": "reference",
"version": metadata.version,
},
}
)
# Return as formatted JSON
return json.dumps(documents, indent=2, ensure_ascii=False)
def package(self, skill_dir: Path, output_path: Path) -> Path:
"""
Package skill into JSON file for LangChain.
Creates a JSON file containing an array of LangChain Documents ready
for ingestion into vector stores (Chroma, Pinecone, etc.)
Args:
skill_dir: Path to skill directory
output_path: Output path/filename for JSON file
Returns:
Path to created JSON file
"""
skill_dir = Path(skill_dir)
# Determine output filename using base helper method
output_path = self._format_output_path(skill_dir, Path(output_path), "-langchain.json")
output_path.parent.mkdir(parents=True, exist_ok=True)
# Read metadata
metadata = SkillMetadata(
name=skill_dir.name,
description=f"LangChain documents for {skill_dir.name}",
version="1.0.0",
)
# Generate LangChain documents
documents_json = self.format_skill_md(skill_dir, metadata)
# Write to file
output_path.write_text(documents_json, encoding="utf-8")
print(f"\n✅ LangChain documents packaged successfully!")
print(f"📦 Output: {output_path}")
# Parse and show stats
documents = json.loads(documents_json)
print(f"📊 Total documents: {len(documents)}")
# Show category breakdown
categories = {}
for doc in documents:
cat = doc["metadata"].get("category", "unknown")
categories[cat] = categories.get(cat, 0) + 1
print("📁 Categories:")
for cat, count in sorted(categories.items()):
print(f" - {cat}: {count}")
return output_path
def upload(self, package_path: Path, _api_key: str, **_kwargs) -> dict[str, Any]:
"""
LangChain format does not support direct upload.
Users should import the JSON file into their LangChain code:
```python
from langchain.schema import Document
import json
# Load documents
with open("skill-langchain.json") as f:
docs_data = json.load(f)
# Convert to LangChain Documents
documents = [
Document(page_content=doc["page_content"], metadata=doc["metadata"])
for doc in docs_data
]
# Use with vector store
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
vectorstore = Chroma.from_documents(documents, OpenAIEmbeddings())
```
Args:
package_path: Path to JSON file
api_key: Not used
**kwargs: Not used
Returns:
Result indicating no upload capability
"""
example_code = """
# Example: Load into LangChain
from langchain.schema import Document
import json
# Load documents
with open("{path}") as f:
docs_data = json.load(f)
# Convert to LangChain Documents
documents = [
Document(page_content=doc["page_content"], metadata=doc["metadata"])
for doc in docs_data
]
# Use with vector store
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
vectorstore = Chroma.from_documents(documents, OpenAIEmbeddings())
retriever = vectorstore.as_retriever()
# Query
results = retriever.get_relevant_documents("your query here")
""".format(
path=package_path.name
)
return {
"success": False,
"skill_id": None,
"url": str(package_path.absolute()),
"message": (
f"LangChain documents packaged at: {package_path.absolute()}\n\n"
"Load into your code:\n"
f"{example_code}"
),
}
def validate_api_key(self, _api_key: str) -> bool:
"""
LangChain format doesn't use API keys for packaging.
Args:
api_key: Not used
Returns:
Always False (no API needed for packaging)
"""
return False
def get_env_var_name(self) -> str:
"""
No API key needed for LangChain packaging.
Returns:
Empty string
"""
return ""
def supports_enhancement(self) -> bool:
"""
LangChain format doesn't support AI enhancement.
Enhancement should be done before conversion using:
skill-seekers enhance output/skill/ --mode LOCAL
Returns:
False
"""
return False
def enhance(self, _skill_dir: Path, _api_key: str) -> bool:
"""
LangChain format doesn't support enhancement.
Args:
skill_dir: Not used
api_key: Not used
Returns:
False
"""
print("❌ LangChain format does not support enhancement")
print(" Enhance before packaging:")
print(" skill-seekers enhance output/skill/ --mode LOCAL")
print(" skill-seekers package output/skill/ --target langchain")
return False