- Filter out chunks smaller than min_chunk_size (default 100 tokens) - Exception: Keep all chunks if entire document is smaller than target size - All 15 tests passing (100% pass rate) Fixes edge case where very small chunks (e.g., 'Short.' = 6 chars) were being created despite min_chunk_size=100 setting. Test: pytest tests/test_rag_chunker.py -v
50 lines
1.6 KiB
YAML
50 lines
1.6 KiB
YAML
{{- if .Values.vectorDatabases.chroma.enabled -}}
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: {{ include "skill-seekers.fullname" . }}-chroma
|
|
labels:
|
|
{{- include "skill-seekers.labels" . | nindent 4 }}
|
|
app.kubernetes.io/component: chroma
|
|
spec:
|
|
replicas: {{ .Values.vectorDatabases.chroma.replicaCount }}
|
|
selector:
|
|
matchLabels:
|
|
{{- include "skill-seekers.selectorLabels" . | nindent 6 }}
|
|
app.kubernetes.io/component: chroma
|
|
template:
|
|
metadata:
|
|
labels:
|
|
{{- include "skill-seekers.selectorLabels" . | nindent 8 }}
|
|
app.kubernetes.io/component: chroma
|
|
spec:
|
|
containers:
|
|
- name: chroma
|
|
image: "{{ .Values.vectorDatabases.chroma.image.repository }}:{{ .Values.vectorDatabases.chroma.image.tag }}"
|
|
imagePullPolicy: {{ .Values.vectorDatabases.chroma.image.pullPolicy }}
|
|
ports:
|
|
- name: http
|
|
containerPort: 8000
|
|
protocol: TCP
|
|
env:
|
|
- name: IS_PERSISTENT
|
|
value: "TRUE"
|
|
- name: PERSIST_DIRECTORY
|
|
value: "/chroma/chroma"
|
|
- name: ANONYMIZED_TELEMETRY
|
|
value: "FALSE"
|
|
resources:
|
|
{{- toYaml .Values.vectorDatabases.chroma.resources | nindent 12 }}
|
|
volumeMounts:
|
|
- name: data
|
|
mountPath: /chroma/chroma
|
|
volumes:
|
|
- name: data
|
|
{{- if .Values.vectorDatabases.chroma.persistence.enabled }}
|
|
persistentVolumeClaim:
|
|
claimName: {{ include "skill-seekers.fullname" . }}-chroma-data
|
|
{{- else }}
|
|
emptyDir: {}
|
|
{{- end }}
|
|
{{- end }}
|