Files
skill-seekers-reference/.github/workflows/vector-db-export.yml
yusyus 00c72ea4a3 fix: resolve CI failures across all GitHub Actions workflows
- Fix ruff format issue in doc_scraper.py
- Add pytest skip markers for browser renderer tests when Playwright is
  not installed in CI
- Replace broken Python heredocs in 4 workflow YAML files
  (scheduled-updates, vector-db-export, quality-metrics, test-vector-dbs)
  with python3 -c calls to fix YAML parsing errors

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-29 20:40:45 +03:00

189 lines
5.9 KiB
YAML

name: Vector Database Export
on:
workflow_dispatch:
inputs:
skill_name:
description: 'Skill name to export (e.g., react, django, godot)'
required: true
type: string
targets:
description: 'Vector databases to export (comma-separated: weaviate,chroma,faiss,qdrant or "all")'
required: true
default: 'all'
type: string
config_path:
description: 'Path to config file (optional, auto-detected from skill_name if not provided)'
required: false
type: string
schedule:
# Run weekly on Sunday at 2 AM UTC for popular frameworks
- cron: '0 2 * * 0'
jobs:
export:
name: Export to Vector Databases
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
# For scheduled runs, export popular frameworks
skill: ${{ github.event_name == 'schedule' && fromJson('["react", "django", "godot", "fastapi"]') || fromJson(format('["{0}"]', github.event.inputs.skill_name)) }}
env:
SKILL_NAME: ${{ matrix.skill }}
TARGETS_INPUT: ${{ github.event.inputs.targets }}
CONFIG_PATH_INPUT: ${{ github.event.inputs.config_path }}
steps:
- uses: actions/checkout@v3
with:
submodules: recursive
- name: Set up Python 3.12
uses: actions/setup-python@v4
with:
python-version: '3.12'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e .
- name: Determine config path
id: config
run: |
if [ -n "$CONFIG_PATH_INPUT" ]; then
echo "path=$CONFIG_PATH_INPUT" >> $GITHUB_OUTPUT
else
echo "path=configs/$SKILL_NAME.json" >> $GITHUB_OUTPUT
fi
- name: Check if config exists
id: check_config
run: |
CONFIG_FILE="${{ steps.config.outputs.path }}"
if [ -f "$CONFIG_FILE" ]; then
echo "exists=true" >> $GITHUB_OUTPUT
else
echo "exists=false" >> $GITHUB_OUTPUT
echo "⚠️ Config not found: $CONFIG_FILE"
fi
- name: Scrape documentation
if: steps.check_config.outputs.exists == 'true'
run: |
echo "📥 Scraping documentation for $SKILL_NAME..."
skill-seekers scrape --config "${{ steps.config.outputs.path }}" --max-pages 100
continue-on-error: true
- name: Determine export targets
id: targets
run: |
TARGETS="${TARGETS_INPUT:-all}"
if [ "$TARGETS" = "all" ]; then
echo "list=weaviate chroma faiss qdrant" >> $GITHUB_OUTPUT
else
echo "list=$(echo "$TARGETS" | tr ',' ' ')" >> $GITHUB_OUTPUT
fi
- name: Export to vector databases
if: steps.check_config.outputs.exists == 'true'
env:
EXPORT_TARGETS: ${{ steps.targets.outputs.list }}
run: |
SKILL_DIR="output/$SKILL_NAME"
if [ ! -d "$SKILL_DIR" ]; then
echo "❌ Skill directory not found: $SKILL_DIR"
exit 1
fi
echo "📦 Exporting $SKILL_NAME to vector databases..."
for target in $EXPORT_TARGETS; do
echo ""
echo "🔹 Exporting to $target..."
# Use adaptor directly via CLI
python3 -c "
from pathlib import Path
from skill_seekers.cli.adaptors import get_adaptor
adaptor = get_adaptor('$target')
package_path = adaptor.package(Path('$SKILL_DIR'), Path('output'))
print(f'Exported to {package_path}')
"
if [ $? -eq 0 ]; then
echo "✅ $target export complete"
else
echo "❌ $target export failed"
fi
done
- name: Generate quality report
if: steps.check_config.outputs.exists == 'true'
run: |
SKILL_DIR="output/$SKILL_NAME"
if [ -d "$SKILL_DIR" ]; then
echo "📊 Generating quality metrics..."
python3 -c "
from pathlib import Path
from skill_seekers.cli.quality_metrics import QualityAnalyzer
analyzer = QualityAnalyzer(Path('$SKILL_DIR'))
report = analyzer.generate_report()
formatted = analyzer.format_report(report)
print(formatted)
with open('quality_report_${SKILL_NAME}.txt', 'w') as f:
f.write(formatted)
"
fi
continue-on-error: true
- name: Upload vector database exports
if: steps.check_config.outputs.exists == 'true'
uses: actions/upload-artifact@v3
with:
name: ${{ env.SKILL_NAME }}-vector-exports
path: |
output/${{ env.SKILL_NAME }}-*.json
retention-days: 30
- name: Upload quality report
if: steps.check_config.outputs.exists == 'true'
uses: actions/upload-artifact@v3
with:
name: ${{ env.SKILL_NAME }}-quality-report
path: quality_report_${{ env.SKILL_NAME }}.txt
retention-days: 30
continue-on-error: true
- name: Create export summary
if: steps.check_config.outputs.exists == 'true'
env:
EXPORT_TARGETS: ${{ steps.targets.outputs.list }}
run: |
echo "## 📦 Vector Database Export Summary: $SKILL_NAME" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
for target in $EXPORT_TARGETS; do
FILE="output/${SKILL_NAME}-${target}.json"
if [ -f "$FILE" ]; then
SIZE=$(du -h "$FILE" | cut -f1)
echo "✅ **$target**: $SIZE" >> $GITHUB_STEP_SUMMARY
else
echo "❌ **$target**: Export failed" >> $GITHUB_STEP_SUMMARY
fi
done
echo "" >> $GITHUB_STEP_SUMMARY
if [ -f "quality_report_${SKILL_NAME}.txt" ]; then
echo "### 📊 Quality Metrics" >> $GITHUB_STEP_SUMMARY
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
head -30 "quality_report_${SKILL_NAME}.txt" >> $GITHUB_STEP_SUMMARY
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
fi