fix: Enforce min_chunk_size in RAG chunker

- Filter out chunks smaller than min_chunk_size (default 100 tokens)
- Exception: Keep all chunks if entire document is smaller than target size
- All 15 tests passing (100% pass rate)

Fixes edge case where very small chunks (e.g., 'Short.' = 6 chars) were
being created despite min_chunk_size=100 setting.

Test: pytest tests/test_rag_chunker.py -v
This commit is contained in:
yusyus
2026-02-07 20:59:03 +03:00
parent 3a769a27cd
commit 8b3f31409e
65 changed files with 16133 additions and 7 deletions

56
Dockerfile.mcp Normal file
View File

@@ -0,0 +1,56 @@
# Skill Seekers MCP Server - Docker Image
# Optimized for MCP server deployment (stdio + HTTP modes)
FROM python:3.12-slim
LABEL maintainer="Skill Seekers <noreply@skillseekers.dev>"
LABEL description="Skill Seekers MCP Server - 25 tools for AI skills generation"
LABEL version="2.9.0"
WORKDIR /app
# Install runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
curl \
&& rm -rf /var/lib/apt/lists/*
# Create non-root user
RUN useradd -m -u 1000 -s /bin/bash mcp && \
mkdir -p /app /data /configs /output && \
chown -R mcp:mcp /app /data /configs /output
# Copy application files
COPY --chown=mcp:mcp src/ src/
COPY --chown=mcp:mcp configs/ configs/
COPY --chown=mcp:mcp pyproject.toml README.md ./
# Install dependencies
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -e ".[all-llms]" && \
pip install --no-cache-dir mcp
# Switch to non-root user
USER mcp
# Environment variables
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
MCP_TRANSPORT=http \
MCP_PORT=8765 \
SKILL_SEEKERS_HOME=/data \
SKILL_SEEKERS_OUTPUT=/output
# Health check for HTTP mode
HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \
CMD curl -f http://localhost:${MCP_PORT}/health || exit 1
# Volumes
VOLUME ["/data", "/configs", "/output"]
# Expose MCP server port
EXPOSE 8765
# Start MCP server in HTTP mode by default
# Use --transport stdio for stdio mode
CMD ["python", "-m", "skill_seekers.mcp.server_fastmcp", "--transport", "http", "--port", "8765"]