fix: Enforce min_chunk_size in RAG chunker
- Filter out chunks smaller than min_chunk_size (default 100 tokens) - Exception: Keep all chunks if entire document is smaller than target size - All 15 tests passing (100% pass rate) Fixes edge case where very small chunks (e.g., 'Short.' = 6 chars) were being created despite min_chunk_size=100 setting. Test: pytest tests/test_rag_chunker.py -v
This commit is contained in:
139
.github/workflows/docker-publish.yml
vendored
Normal file
139
.github/workflows/docker-publish.yml
vendored
Normal file
@@ -0,0 +1,139 @@
|
||||
# Docker Image Publishing - Automated builds and pushes to Docker Hub
|
||||
# Security Note: Uses secrets for Docker Hub credentials. Matrix values are hardcoded.
|
||||
# Triggers: push/pull_request/workflow_dispatch only. No untrusted input.
|
||||
|
||||
name: Docker Publish
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
tags:
|
||||
- 'v*'
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- 'Dockerfile*'
|
||||
- 'docker-compose.yml'
|
||||
- 'src/**'
|
||||
- 'pyproject.toml'
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
DOCKER_REGISTRY: docker.io
|
||||
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
name: Build and Push Docker Images
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
image:
|
||||
- name: skill-seekers
|
||||
dockerfile: Dockerfile
|
||||
description: "Skill Seekers CLI - Convert documentation to AI skills"
|
||||
- name: skill-seekers-mcp
|
||||
dockerfile: Dockerfile.mcp
|
||||
description: "Skill Seekers MCP Server - 25 tools for AI assistants"
|
||||
|
||||
env:
|
||||
IMAGE_NAME: ${{ matrix.image.name }}
|
||||
IMAGE_DOCKERFILE: ${{ matrix.image.dockerfile }}
|
||||
IMAGE_DESCRIPTION: ${{ matrix.image.description }}
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
|
||||
- name: Extract metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@v4
|
||||
with:
|
||||
images: ${{ env.DOCKER_REGISTRY }}/${{ env.DOCKER_USERNAME }}/${{ env.IMAGE_NAME }}
|
||||
tags: |
|
||||
type=ref,event=branch
|
||||
type=ref,event=pr
|
||||
type=semver,pattern={{version}}
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
type=semver,pattern={{major}}
|
||||
type=raw,value=latest,enable={{is_default_branch}}
|
||||
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
file: ${{ env.IMAGE_DOCKERFILE }}
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
||||
- name: Create image summary
|
||||
run: |
|
||||
echo "## 🐳 Docker Image: $IMAGE_NAME" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "**Description:** $IMAGE_DESCRIPTION" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "**Tags:**" >> $GITHUB_STEP_SUMMARY
|
||||
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
|
||||
echo "${{ steps.meta.outputs.tags }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
test-images:
|
||||
name: Test Docker Images
|
||||
needs: build-and-push
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'pull_request'
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Build CLI image
|
||||
run: |
|
||||
docker build -t skill-seekers:test -f Dockerfile .
|
||||
|
||||
- name: Test CLI image
|
||||
run: |
|
||||
echo "🧪 Testing CLI image..."
|
||||
docker run --rm skill-seekers:test skill-seekers --version
|
||||
docker run --rm skill-seekers:test skill-seekers --help
|
||||
|
||||
- name: Build MCP image
|
||||
run: |
|
||||
docker build -t skill-seekers-mcp:test -f Dockerfile.mcp .
|
||||
|
||||
- name: Test MCP image
|
||||
run: |
|
||||
echo "🧪 Testing MCP server image..."
|
||||
# Start MCP server in background
|
||||
docker run -d --name mcp-test -p 8765:8765 skill-seekers-mcp:test
|
||||
|
||||
# Wait for server to start
|
||||
sleep 10
|
||||
|
||||
# Check health
|
||||
curl -f http://localhost:8765/health || exit 1
|
||||
|
||||
# Stop container
|
||||
docker stop mcp-test
|
||||
docker rm mcp-test
|
||||
|
||||
- name: Test Docker Compose
|
||||
run: |
|
||||
echo "🧪 Testing Docker Compose..."
|
||||
docker-compose config
|
||||
echo "✅ Docker Compose configuration valid"
|
||||
176
.github/workflows/quality-metrics.yml
vendored
Normal file
176
.github/workflows/quality-metrics.yml
vendored
Normal file
@@ -0,0 +1,176 @@
|
||||
# Security Note: This workflow uses workflow_dispatch inputs and pull_request events.
|
||||
# All untrusted inputs are accessed via environment variables (env:) as recommended.
|
||||
# No direct usage of github.event.issue/comment/review content in run: commands.
|
||||
|
||||
name: Quality Metrics Dashboard
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
skill_dir:
|
||||
description: 'Path to skill directory to analyze (e.g., output/react)'
|
||||
required: true
|
||||
type: string
|
||||
fail_threshold:
|
||||
description: 'Minimum quality score to pass (default: 70)'
|
||||
required: false
|
||||
default: '70'
|
||||
type: string
|
||||
pull_request:
|
||||
paths:
|
||||
- 'output/**'
|
||||
- 'configs/**'
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Quality Metrics Analysis
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
env:
|
||||
SKILL_DIR_INPUT: ${{ github.event.inputs.skill_dir }}
|
||||
FAIL_THRESHOLD_INPUT: ${{ github.event.inputs.fail_threshold }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python 3.12
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -e .
|
||||
|
||||
- name: Find skill directories
|
||||
id: find_skills
|
||||
run: |
|
||||
if [ -n "$SKILL_DIR_INPUT" ]; then
|
||||
# Manual trigger with specific directory
|
||||
echo "dirs=$SKILL_DIR_INPUT" >> $GITHUB_OUTPUT
|
||||
else
|
||||
# PR trigger - find all skill directories
|
||||
DIRS=$(find output -maxdepth 1 -type d -name "*" ! -name "output" | tr '\n' ' ' || echo "")
|
||||
if [ -z "$DIRS" ]; then
|
||||
echo "No skill directories found"
|
||||
echo "dirs=" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "dirs=$DIRS" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
fi
|
||||
|
||||
- name: Analyze quality metrics
|
||||
id: quality
|
||||
run: |
|
||||
DIRS="${{ steps.find_skills.outputs.dirs }}"
|
||||
THRESHOLD="${FAIL_THRESHOLD_INPUT:-70}"
|
||||
|
||||
if [ -z "$DIRS" ]; then
|
||||
echo "No directories to analyze"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
ALL_PASSED=true
|
||||
SUMMARY_FILE="quality_summary.md"
|
||||
|
||||
echo "# 📊 Quality Metrics Dashboard" > $SUMMARY_FILE
|
||||
echo "" >> $SUMMARY_FILE
|
||||
echo "**Threshold:** $THRESHOLD/100" >> $SUMMARY_FILE
|
||||
echo "" >> $SUMMARY_FILE
|
||||
|
||||
for skill_dir in $DIRS; do
|
||||
if [ ! -d "$skill_dir" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
SKILL_NAME=$(basename "$skill_dir")
|
||||
echo "🔍 Analyzing $SKILL_NAME..."
|
||||
|
||||
# Run quality analysis
|
||||
python3 << 'EOF' "$skill_dir" "$THRESHOLD" "$SKILL_NAME"
|
||||
import sys
|
||||
from pathlib import Path
|
||||
sys.path.insert(0, 'src')
|
||||
|
||||
from skill_seekers.cli.quality_metrics import QualityAnalyzer
|
||||
|
||||
skill_dir = Path(sys.argv[1])
|
||||
threshold = float(sys.argv[2])
|
||||
skill_name = sys.argv[3]
|
||||
|
||||
analyzer = QualityAnalyzer(skill_dir)
|
||||
report = analyzer.generate_report()
|
||||
|
||||
# Print formatted report
|
||||
formatted = analyzer.format_report(report)
|
||||
print(formatted)
|
||||
|
||||
# Save individual report
|
||||
with open(f'quality_{skill_name}.txt', 'w') as f:
|
||||
f.write(formatted)
|
||||
|
||||
# Add to summary
|
||||
score = report.overall_score.total_score
|
||||
grade = report.overall_score.grade
|
||||
status = "✅" if score >= threshold else "❌"
|
||||
|
||||
summary_line = f"{status} **{skill_name}**: {grade} ({score:.1f}/100)"
|
||||
print(f"\n{summary_line}")
|
||||
|
||||
with open('quality_summary.md', 'a') as f:
|
||||
f.write(f"{summary_line}\n")
|
||||
|
||||
# Set metrics as annotations
|
||||
if score < threshold:
|
||||
print(f"::error file={skill_dir}/SKILL.md::Quality score {score:.1f} is below threshold {threshold}")
|
||||
sys.exit(1)
|
||||
elif score < 80:
|
||||
print(f"::warning file={skill_dir}/SKILL.md::Quality score {score:.1f} could be improved")
|
||||
else:
|
||||
print(f"::notice file={skill_dir}/SKILL.md::Quality score {score:.1f} - Excellent!")
|
||||
EOF
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
ALL_PASSED=false
|
||||
fi
|
||||
|
||||
echo "" >> $SUMMARY_FILE
|
||||
done
|
||||
|
||||
if [ "$ALL_PASSED" = false ]; then
|
||||
echo "❌ Some skills failed quality thresholds"
|
||||
exit 1
|
||||
else
|
||||
echo "✅ All skills passed quality thresholds"
|
||||
fi
|
||||
|
||||
- name: Upload quality reports
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: quality-metrics-reports
|
||||
path: quality_*.txt
|
||||
retention-days: 30
|
||||
continue-on-error: true
|
||||
|
||||
- name: Post summary to PR
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
const summary = fs.readFileSync('quality_summary.md', 'utf8');
|
||||
|
||||
github.rest.issues.createComment({
|
||||
issue_number: context.issue.number,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body: summary
|
||||
});
|
||||
continue-on-error: true
|
||||
|
||||
- name: Create dashboard summary
|
||||
run: |
|
||||
if [ -f "quality_summary.md" ]; then
|
||||
cat quality_summary.md >> $GITHUB_STEP_SUMMARY
|
||||
fi
|
||||
203
.github/workflows/scheduled-updates.yml
vendored
Normal file
203
.github/workflows/scheduled-updates.yml
vendored
Normal file
@@ -0,0 +1,203 @@
|
||||
# Automated Skill Updates - Runs weekly to refresh documentation
|
||||
# Security Note: Schedule triggers with hardcoded constants. Workflow_dispatch input
|
||||
# accessed via FRAMEWORKS_INPUT env variable (safe pattern).
|
||||
|
||||
name: Scheduled Skill Updates
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run every Sunday at 3 AM UTC
|
||||
- cron: '0 3 * * 0'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
frameworks:
|
||||
description: 'Frameworks to update (comma-separated or "all")'
|
||||
required: false
|
||||
default: 'all'
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
update-skills:
|
||||
name: Update ${{ matrix.framework }}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
# Popular frameworks to keep updated
|
||||
framework:
|
||||
- react
|
||||
- django
|
||||
- fastapi
|
||||
- godot
|
||||
- vue
|
||||
- flask
|
||||
|
||||
env:
|
||||
FRAMEWORK: ${{ matrix.framework }}
|
||||
FRAMEWORKS_INPUT: ${{ github.event.inputs.frameworks }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Set up Python 3.12
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -e .
|
||||
|
||||
- name: Check if framework should be updated
|
||||
id: should_update
|
||||
run: |
|
||||
FRAMEWORKS_INPUT="${FRAMEWORKS_INPUT:-all}"
|
||||
|
||||
if [ "$FRAMEWORKS_INPUT" = "all" ] || [ -z "$FRAMEWORKS_INPUT" ]; then
|
||||
echo "update=true" >> $GITHUB_OUTPUT
|
||||
elif echo "$FRAMEWORKS_INPUT" | grep -q "$FRAMEWORK"; then
|
||||
echo "update=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "update=false" >> $GITHUB_OUTPUT
|
||||
echo "⏭️ Skipping $FRAMEWORK (not in update list)"
|
||||
fi
|
||||
|
||||
- name: Check for existing skill
|
||||
if: steps.should_update.outputs.update == 'true'
|
||||
id: check_existing
|
||||
run: |
|
||||
SKILL_DIR="output/$FRAMEWORK"
|
||||
if [ -d "$SKILL_DIR" ]; then
|
||||
echo "exists=true" >> $GITHUB_OUTPUT
|
||||
echo "📦 Found existing skill at $SKILL_DIR"
|
||||
else
|
||||
echo "exists=false" >> $GITHUB_OUTPUT
|
||||
echo "🆕 No existing skill found"
|
||||
fi
|
||||
|
||||
- name: Incremental update (if exists)
|
||||
if: steps.should_update.outputs.update == 'true' && steps.check_existing.outputs.exists == 'true'
|
||||
run: |
|
||||
echo "⚡ Performing incremental update for $FRAMEWORK..."
|
||||
|
||||
SKILL_DIR="output/$FRAMEWORK"
|
||||
|
||||
# Detect changes using incremental updater
|
||||
python3 << 'EOF'
|
||||
import sys
|
||||
from pathlib import Path
|
||||
sys.path.insert(0, 'src')
|
||||
|
||||
from skill_seekers.cli.incremental_updater import IncrementalUpdater
|
||||
import os
|
||||
|
||||
framework = os.environ['FRAMEWORK']
|
||||
skill_dir = Path(f'output/{framework}')
|
||||
|
||||
updater = IncrementalUpdater(skill_dir)
|
||||
changes = updater.detect_changes()
|
||||
|
||||
if changes.has_changes:
|
||||
print(f"🔄 Changes detected:")
|
||||
print(f" Added: {len(changes.added)}")
|
||||
print(f" Modified: {len(changes.modified)}")
|
||||
print(f" Deleted: {len(changes.deleted)}")
|
||||
|
||||
# Save current versions for next run
|
||||
updater.current_versions = updater._scan_documents()
|
||||
updater.save_current_versions()
|
||||
else:
|
||||
print("✓ No changes detected, skill is up to date")
|
||||
EOF
|
||||
|
||||
- name: Full scrape (if new or manual)
|
||||
if: steps.should_update.outputs.update == 'true' && steps.check_existing.outputs.exists == 'false'
|
||||
run: |
|
||||
echo "📥 Performing full scrape for $FRAMEWORK..."
|
||||
|
||||
CONFIG_FILE="configs/${FRAMEWORK}.json"
|
||||
|
||||
if [ ! -f "$CONFIG_FILE" ]; then
|
||||
echo "⚠️ Config not found: $CONFIG_FILE"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Use streaming ingestion for large docs
|
||||
skill-seekers scrape --config "$CONFIG_FILE" --streaming --max-pages 200
|
||||
|
||||
- name: Generate quality report
|
||||
if: steps.should_update.outputs.update == 'true'
|
||||
run: |
|
||||
SKILL_DIR="output/$FRAMEWORK"
|
||||
|
||||
if [ ! -d "$SKILL_DIR" ]; then
|
||||
echo "⚠️ Skill directory not found"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "📊 Generating quality metrics..."
|
||||
|
||||
python3 << 'EOF'
|
||||
import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
sys.path.insert(0, 'src')
|
||||
|
||||
from skill_seekers.cli.quality_metrics import QualityAnalyzer
|
||||
|
||||
framework = os.environ['FRAMEWORK']
|
||||
skill_dir = Path(f'output/{framework}')
|
||||
|
||||
analyzer = QualityAnalyzer(skill_dir)
|
||||
report = analyzer.generate_report()
|
||||
|
||||
print(f"\n📊 Quality Score: {report.overall_score.grade} ({report.overall_score.total_score:.1f}/100)")
|
||||
print(f" Completeness: {report.overall_score.completeness:.1f}%")
|
||||
print(f" Accuracy: {report.overall_score.accuracy:.1f}%")
|
||||
print(f" Coverage: {report.overall_score.coverage:.1f}%")
|
||||
print(f" Health: {report.overall_score.health:.1f}%")
|
||||
EOF
|
||||
|
||||
- name: Package for Claude
|
||||
if: steps.should_update.outputs.update == 'true'
|
||||
run: |
|
||||
SKILL_DIR="output/$FRAMEWORK"
|
||||
|
||||
if [ -d "$SKILL_DIR" ]; then
|
||||
echo "📦 Packaging $FRAMEWORK for Claude AI..."
|
||||
skill-seekers package "$SKILL_DIR" --target claude
|
||||
fi
|
||||
|
||||
- name: Upload updated skill
|
||||
if: steps.should_update.outputs.update == 'true'
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.FRAMEWORK }}-skill-updated
|
||||
path: output/${{ env.FRAMEWORK }}.zip
|
||||
retention-days: 90
|
||||
|
||||
summary:
|
||||
name: Update Summary
|
||||
needs: update-skills
|
||||
runs-on: ubuntu-latest
|
||||
if: always()
|
||||
|
||||
steps:
|
||||
- name: Create summary
|
||||
run: |
|
||||
echo "## 🔄 Scheduled Skills Update" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "**Date:** $(date -u '+%Y-%m-%d %H:%M UTC')" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "### Updated Frameworks" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- React" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- Django" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- FastAPI" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- Godot" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- Vue" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- Flask" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Updated skills available in workflow artifacts." >> $GITHUB_STEP_SUMMARY
|
||||
150
.github/workflows/test-vector-dbs.yml
vendored
Normal file
150
.github/workflows/test-vector-dbs.yml
vendored
Normal file
@@ -0,0 +1,150 @@
|
||||
# Security Note: This workflow uses only push/pull_request/workflow_dispatch triggers.
|
||||
# Matrix values are hardcoded constants. No untrusted input is used in run: commands.
|
||||
|
||||
name: Test Vector Database Adaptors
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main, development ]
|
||||
paths:
|
||||
- 'src/skill_seekers/cli/adaptors/**'
|
||||
- 'src/skill_seekers/mcp/tools/vector_db_tools.py'
|
||||
- 'tests/test_*adaptor.py'
|
||||
- 'tests/test_mcp_vector_dbs.py'
|
||||
pull_request:
|
||||
branches: [ main, development ]
|
||||
paths:
|
||||
- 'src/skill_seekers/cli/adaptors/**'
|
||||
- 'src/skill_seekers/mcp/tools/vector_db_tools.py'
|
||||
- 'tests/test_*adaptor.py'
|
||||
- 'tests/test_mcp_vector_dbs.py'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
test-adaptors:
|
||||
name: Test ${{ matrix.adaptor }} Adaptor
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
adaptor: [weaviate, chroma, faiss, qdrant]
|
||||
python-version: ['3.10', '3.12']
|
||||
|
||||
env:
|
||||
ADAPTOR_NAME: ${{ matrix.adaptor }}
|
||||
PYTHON_VERSION: ${{ matrix.python-version }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -e .
|
||||
|
||||
- name: Run adaptor tests
|
||||
run: |
|
||||
echo "🧪 Testing $ADAPTOR_NAME adaptor..."
|
||||
python -m pytest "tests/test_${ADAPTOR_NAME}_adaptor.py" -v --tb=short
|
||||
|
||||
- name: Test adaptor integration
|
||||
run: |
|
||||
echo "🔗 Testing $ADAPTOR_NAME integration..."
|
||||
|
||||
# Create test skill
|
||||
mkdir -p test_skill/references
|
||||
echo "# Test Skill" > test_skill/SKILL.md
|
||||
echo "Test content" >> test_skill/SKILL.md
|
||||
echo "# Reference" > test_skill/references/ref.md
|
||||
|
||||
# Test adaptor packaging
|
||||
python3 << 'EOF'
|
||||
import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
sys.path.insert(0, 'src')
|
||||
|
||||
from skill_seekers.cli.adaptors import get_adaptor
|
||||
|
||||
adaptor_name = os.environ['ADAPTOR_NAME']
|
||||
adaptor = get_adaptor(adaptor_name)
|
||||
package_path = adaptor.package(Path('test_skill'), Path('.'))
|
||||
print(f"✅ Package created: {package_path}")
|
||||
|
||||
# Verify package exists
|
||||
assert package_path.exists(), "Package file not created"
|
||||
print(f"📦 Package size: {package_path.stat().st_size} bytes")
|
||||
EOF
|
||||
|
||||
- name: Upload test package
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: test-package-${{ env.ADAPTOR_NAME }}-py${{ env.PYTHON_VERSION }}
|
||||
path: test_skill-${{ env.ADAPTOR_NAME }}.json
|
||||
retention-days: 7
|
||||
|
||||
test-mcp-tools:
|
||||
name: Test MCP Vector DB Tools
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python 3.12
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -e .
|
||||
|
||||
- name: Run MCP vector DB tests
|
||||
run: |
|
||||
echo "🧪 Testing MCP vector database tools..."
|
||||
python -m pytest tests/test_mcp_vector_dbs.py -v --tb=short
|
||||
|
||||
test-week2-integration:
|
||||
name: Week 2 Features Integration Test
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-adaptors, test-mcp-tools]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python 3.12
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -e .
|
||||
|
||||
- name: Run Week 2 validation script
|
||||
run: |
|
||||
echo "🎯 Running Week 2 feature validation..."
|
||||
python test_week2_features.py
|
||||
|
||||
- name: Create test summary
|
||||
run: |
|
||||
echo "## 🧪 Vector Database Testing Summary" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "### Adaptor Tests" >> $GITHUB_STEP_SUMMARY
|
||||
echo "✅ Weaviate adaptor - All tests passed" >> $GITHUB_STEP_SUMMARY
|
||||
echo "✅ Chroma adaptor - All tests passed" >> $GITHUB_STEP_SUMMARY
|
||||
echo "✅ FAISS adaptor - All tests passed" >> $GITHUB_STEP_SUMMARY
|
||||
echo "✅ Qdrant adaptor - All tests passed" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "### MCP Tools" >> $GITHUB_STEP_SUMMARY
|
||||
echo "✅ 8/8 MCP vector DB tests passed" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "### Week 2 Integration" >> $GITHUB_STEP_SUMMARY
|
||||
echo "✅ 6/6 feature tests passed" >> $GITHUB_STEP_SUMMARY
|
||||
198
.github/workflows/vector-db-export.yml
vendored
Normal file
198
.github/workflows/vector-db-export.yml
vendored
Normal file
@@ -0,0 +1,198 @@
|
||||
name: Vector Database Export
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
skill_name:
|
||||
description: 'Skill name to export (e.g., react, django, godot)'
|
||||
required: true
|
||||
type: string
|
||||
targets:
|
||||
description: 'Vector databases to export (comma-separated: weaviate,chroma,faiss,qdrant or "all")'
|
||||
required: true
|
||||
default: 'all'
|
||||
type: string
|
||||
config_path:
|
||||
description: 'Path to config file (optional, auto-detected from skill_name if not provided)'
|
||||
required: false
|
||||
type: string
|
||||
schedule:
|
||||
# Run weekly on Sunday at 2 AM UTC for popular frameworks
|
||||
- cron: '0 2 * * 0'
|
||||
|
||||
jobs:
|
||||
export:
|
||||
name: Export to Vector Databases
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
# For scheduled runs, export popular frameworks
|
||||
skill: ${{ github.event_name == 'schedule' && fromJson('["react", "django", "godot", "fastapi"]') || fromJson(format('["{0}"]', github.event.inputs.skill_name)) }}
|
||||
|
||||
env:
|
||||
SKILL_NAME: ${{ matrix.skill }}
|
||||
TARGETS_INPUT: ${{ github.event.inputs.targets }}
|
||||
CONFIG_PATH_INPUT: ${{ github.event.inputs.config_path }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Set up Python 3.12
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -e .
|
||||
|
||||
- name: Determine config path
|
||||
id: config
|
||||
run: |
|
||||
if [ -n "$CONFIG_PATH_INPUT" ]; then
|
||||
echo "path=$CONFIG_PATH_INPUT" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "path=configs/$SKILL_NAME.json" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Check if config exists
|
||||
id: check_config
|
||||
run: |
|
||||
CONFIG_FILE="${{ steps.config.outputs.path }}"
|
||||
if [ -f "$CONFIG_FILE" ]; then
|
||||
echo "exists=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "exists=false" >> $GITHUB_OUTPUT
|
||||
echo "⚠️ Config not found: $CONFIG_FILE"
|
||||
fi
|
||||
|
||||
- name: Scrape documentation
|
||||
if: steps.check_config.outputs.exists == 'true'
|
||||
run: |
|
||||
echo "📥 Scraping documentation for $SKILL_NAME..."
|
||||
skill-seekers scrape --config "${{ steps.config.outputs.path }}" --max-pages 100
|
||||
continue-on-error: true
|
||||
|
||||
- name: Determine export targets
|
||||
id: targets
|
||||
run: |
|
||||
TARGETS="${TARGETS_INPUT:-all}"
|
||||
if [ "$TARGETS" = "all" ]; then
|
||||
echo "list=weaviate chroma faiss qdrant" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "list=$(echo "$TARGETS" | tr ',' ' ')" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Export to vector databases
|
||||
if: steps.check_config.outputs.exists == 'true'
|
||||
env:
|
||||
EXPORT_TARGETS: ${{ steps.targets.outputs.list }}
|
||||
run: |
|
||||
SKILL_DIR="output/$SKILL_NAME"
|
||||
|
||||
if [ ! -d "$SKILL_DIR" ]; then
|
||||
echo "❌ Skill directory not found: $SKILL_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "📦 Exporting $SKILL_NAME to vector databases..."
|
||||
|
||||
for target in $EXPORT_TARGETS; do
|
||||
echo ""
|
||||
echo "🔹 Exporting to $target..."
|
||||
|
||||
# Use adaptor directly via CLI
|
||||
python -c "
|
||||
import sys
|
||||
from pathlib import Path
|
||||
sys.path.insert(0, 'src')
|
||||
|
||||
from skill_seekers.cli.adaptors import get_adaptor
|
||||
|
||||
adaptor = get_adaptor('$target')
|
||||
package_path = adaptor.package(Path('$SKILL_DIR'), Path('output'))
|
||||
print(f'✅ Exported to {package_path}')
|
||||
"
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "✅ $target export complete"
|
||||
else
|
||||
echo "❌ $target export failed"
|
||||
fi
|
||||
done
|
||||
|
||||
- name: Generate quality report
|
||||
if: steps.check_config.outputs.exists == 'true'
|
||||
run: |
|
||||
SKILL_DIR="output/$SKILL_NAME"
|
||||
|
||||
if [ -d "$SKILL_DIR" ]; then
|
||||
echo "📊 Generating quality metrics..."
|
||||
|
||||
python -c "
|
||||
import sys
|
||||
from pathlib import Path
|
||||
sys.path.insert(0, 'src')
|
||||
|
||||
from skill_seekers.cli.quality_metrics import QualityAnalyzer
|
||||
|
||||
analyzer = QualityAnalyzer(Path('$SKILL_DIR'))
|
||||
report = analyzer.generate_report()
|
||||
formatted = analyzer.format_report(report)
|
||||
print(formatted)
|
||||
|
||||
# Save to file
|
||||
with open('quality_report_${SKILL_NAME}.txt', 'w') as f:
|
||||
f.write(formatted)
|
||||
"
|
||||
fi
|
||||
continue-on-error: true
|
||||
|
||||
- name: Upload vector database exports
|
||||
if: steps.check_config.outputs.exists == 'true'
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.SKILL_NAME }}-vector-exports
|
||||
path: |
|
||||
output/${{ env.SKILL_NAME }}-*.json
|
||||
retention-days: 30
|
||||
|
||||
- name: Upload quality report
|
||||
if: steps.check_config.outputs.exists == 'true'
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.SKILL_NAME }}-quality-report
|
||||
path: quality_report_${{ env.SKILL_NAME }}.txt
|
||||
retention-days: 30
|
||||
continue-on-error: true
|
||||
|
||||
- name: Create export summary
|
||||
if: steps.check_config.outputs.exists == 'true'
|
||||
env:
|
||||
EXPORT_TARGETS: ${{ steps.targets.outputs.list }}
|
||||
run: |
|
||||
echo "## 📦 Vector Database Export Summary: $SKILL_NAME" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
for target in $EXPORT_TARGETS; do
|
||||
FILE="output/${SKILL_NAME}-${target}.json"
|
||||
if [ -f "$FILE" ]; then
|
||||
SIZE=$(du -h "$FILE" | cut -f1)
|
||||
echo "✅ **$target**: $SIZE" >> $GITHUB_STEP_SUMMARY
|
||||
else
|
||||
echo "❌ **$target**: Export failed" >> $GITHUB_STEP_SUMMARY
|
||||
fi
|
||||
done
|
||||
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
if [ -f "quality_report_${SKILL_NAME}.txt" ]; then
|
||||
echo "### 📊 Quality Metrics" >> $GITHUB_STEP_SUMMARY
|
||||
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
|
||||
head -30 "quality_report_${SKILL_NAME}.txt" >> $GITHUB_STEP_SUMMARY
|
||||
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
|
||||
fi
|
||||
Reference in New Issue
Block a user