refactor: Adopt helper methods across 7 RAG adaptors to eliminate duplication

Refactored all RAG adaptors (LangChain, LlamaIndex, Haystack, Weaviate, Chroma, FAISS, Qdrant) to use existing helper methods from base.py, removing ~215 lines of duplicate code (26% reduction). Key improvements: - All adaptors now use _format_output_path() for consistent path handling - All adaptors now use _iterate_references() for reference file iteration - Added _generate_deterministic_id() helper with 3 formats (hex, uuid, uuid5) - 5 adaptors refactored to use unified ID generation - Removed 6 unused imports (hashlib, uuid) Benefits: - DRY principles enforced across all RAG adaptors - Single source of truth for common logic - Easier maintenance and testing - Consistent behavior across platforms All 159 adaptor tests passing. Zero regressions. Phase 1 of optional enhancements (Phases 2-5 pending). Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-07 22:31:10 +03:00
parent ffe8fc4de2
commit d84e5878a1
9 changed files with 489 additions and 304 deletions
--- a/src/skill_seekers/cli/adaptors/qdrant.py
+++ b/src/skill_seekers/cli/adaptors/qdrant.py
@@ -9,8 +9,6 @@ Qdrant stores vectors and metadata together in collections with points.
 import json
 from pathlib import Path
 from typing import Any
-import hashlib
-import uuid

 from .base import SkillAdaptor, SkillMetadata

@@ -43,10 +41,7 @@ class QdrantAdaptor(SkillAdaptor):
        Returns:
            UUID string (version 5, deterministic)
        """
-        # Use content hash + source for deterministic UUID
-        namespace = uuid.UUID("00000000-0000-0000-0000-000000000000")
-        id_string = f"{metadata.get('source', '')}-{metadata.get('file', '')}-{content[:100]}"
-        return str(uuid.uuid5(namespace, id_string))
+        return self._generate_deterministic_id(content, metadata, format="uuid5")

    def format_skill_md(self, skill_dir: Path, metadata: SkillMetadata) -> str:
        """
@@ -89,36 +84,28 @@ class QdrantAdaptor(SkillAdaptor):
                    }
                })

-        # Convert all reference files
-        refs_dir = skill_dir / "references"
-        if refs_dir.exists():
-            for ref_file in sorted(refs_dir.glob("*.md")):
-                if ref_file.is_file() and not ref_file.name.startswith("."):
-                    try:
-                        ref_content = ref_file.read_text(encoding="utf-8")
-                        if ref_content.strip():
-                            category = ref_file.stem.replace("_", " ").lower()
+        # Convert all reference files using base helper method
+        for ref_file, ref_content in self._iterate_references(skill_dir):
+            if ref_content.strip():
+                category = ref_file.stem.replace("_", " ").lower()

-                            point_id = self._generate_point_id(ref_content, {
-                                "source": metadata.name,
-                                "file": ref_file.name
-                            })
+                point_id = self._generate_point_id(ref_content, {
+                    "source": metadata.name,
+                    "file": ref_file.name
+                })

-                            points.append({
-                                "id": point_id,
-                                "vector": None,  # User will generate embeddings
-                                "payload": {
-                                    "content": ref_content,
-                                    "source": metadata.name,
-                                    "category": category,
-                                    "file": ref_file.name,
-                                    "type": "reference",
-                                    "version": metadata.version,
-                                }
-                            })
-                    except Exception as e:
-                        print(f"⚠️  Warning: Could not read {ref_file.name}: {e}")
-                        continue
+                points.append({
+                    "id": point_id,
+                    "vector": None,  # User will generate embeddings
+                    "payload": {
+                        "content": ref_content,
+                        "source": metadata.name,
+                        "category": category,
+                        "file": ref_file.name,
+                        "type": "reference",
+                        "version": metadata.version,
+                    }
+                })

        # Qdrant configuration
        config = {
@@ -158,18 +145,8 @@ class QdrantAdaptor(SkillAdaptor):
        """
        skill_dir = Path(skill_dir)

-        # Determine output filename
-        if output_path.is_dir() or str(output_path).endswith("/"):
-            output_path = Path(output_path) / f"{skill_dir.name}-qdrant.json"
-        elif not str(output_path).endswith(".json"):
-            output_str = str(output_path).replace(".zip", ".json").replace(".tar.gz", ".json")
-            if not output_str.endswith("-qdrant.json"):
-                output_str = output_str.replace(".json", "-qdrant.json")
-            if not output_str.endswith(".json"):
-                output_str += ".json"
-            output_path = Path(output_str)
-
-        output_path = Path(output_path)
+        # Determine output filename using base helper method
+        output_path = self._format_output_path(skill_dir, Path(output_path), "-qdrant.json")
        output_path.parent.mkdir(parents=True, exist_ok=True)

        # Read metadata