fix: Enforce min_chunk_size in RAG chunker

- Filter out chunks smaller than min_chunk_size (default 100 tokens) - Exception: Keep all chunks if entire document is smaller than target size - All 15 tests passing (100% pass rate) Fixes edge case where very small chunks (e.g., 'Short.' = 6 chars) were being created despite min_chunk_size=100 setting. Test: pytest tests/test_rag_chunker.py -v
2026-02-07 20:59:03 +03:00
parent 3a769a27cd
commit 8b3f31409e
65 changed files with 16133 additions and 7 deletions
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -0,0 +1,139 @@
+# Docker Image Publishing - Automated builds and pushes to Docker Hub
+# Security Note: Uses secrets for Docker Hub credentials. Matrix values are hardcoded.
+# Triggers: push/pull_request/workflow_dispatch only. No untrusted input.
+
+name: Docker Publish
+
+on:
+  push:
+    branches: [ main ]
+    tags:
+      - 'v*'
+  pull_request:
+    branches: [ main ]
+    paths:
+      - 'Dockerfile*'
+      - 'docker-compose.yml'
+      - 'src/**'
+      - 'pyproject.toml'
+  workflow_dispatch:
+
+env:
+  DOCKER_REGISTRY: docker.io
+  DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
+
+jobs:
+  build-and-push:
+    name: Build and Push Docker Images
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        image:
+          - name: skill-seekers
+            dockerfile: Dockerfile
+            description: "Skill Seekers CLI - Convert documentation to AI skills"
+          - name: skill-seekers-mcp
+            dockerfile: Dockerfile.mcp
+            description: "Skill Seekers MCP Server - 25 tools for AI assistants"
+
+    env:
+      IMAGE_NAME: ${{ matrix.image.name }}
+      IMAGE_DOCKERFILE: ${{ matrix.image.dockerfile }}
+      IMAGE_DESCRIPTION: ${{ matrix.image.description }}
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v3
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v2
+
+    - name: Log in to Docker Hub
+      if: github.event_name != 'pull_request'
+      uses: docker/login-action@v2
+      with:
+        username: ${{ secrets.DOCKER_USERNAME }}
+        password: ${{ secrets.DOCKER_PASSWORD }}
+
+    - name: Extract metadata
+      id: meta
+      uses: docker/metadata-action@v4
+      with:
+        images: ${{ env.DOCKER_REGISTRY }}/${{ env.DOCKER_USERNAME }}/${{ env.IMAGE_NAME }}
+        tags: |
+          type=ref,event=branch
+          type=ref,event=pr
+          type=semver,pattern={{version}}
+          type=semver,pattern={{major}}.{{minor}}
+          type=semver,pattern={{major}}
+          type=raw,value=latest,enable={{is_default_branch}}
+
+    - name: Build and push Docker image
+      uses: docker/build-push-action@v4
+      with:
+        context: .
+        file: ${{ env.IMAGE_DOCKERFILE }}
+        push: ${{ github.event_name != 'pull_request' }}
+        tags: ${{ steps.meta.outputs.tags }}
+        labels: ${{ steps.meta.outputs.labels }}
+        cache-from: type=gha
+        cache-to: type=gha,mode=max
+        platforms: linux/amd64,linux/arm64
+
+    - name: Create image summary
+      run: |
+        echo "## 🐳 Docker Image: $IMAGE_NAME" >> $GITHUB_STEP_SUMMARY
+        echo "" >> $GITHUB_STEP_SUMMARY
+        echo "**Description:** $IMAGE_DESCRIPTION" >> $GITHUB_STEP_SUMMARY
+        echo "" >> $GITHUB_STEP_SUMMARY
+        echo "**Tags:**" >> $GITHUB_STEP_SUMMARY
+        echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+        echo "${{ steps.meta.outputs.tags }}" >> $GITHUB_STEP_SUMMARY
+        echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+
+  test-images:
+    name: Test Docker Images
+    needs: build-and-push
+    runs-on: ubuntu-latest
+    if: github.event_name == 'pull_request'
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v3
+
+    - name: Build CLI image
+      run: |
+        docker build -t skill-seekers:test -f Dockerfile .
+
+    - name: Test CLI image
+      run: |
+        echo "🧪 Testing CLI image..."
+        docker run --rm skill-seekers:test skill-seekers --version
+        docker run --rm skill-seekers:test skill-seekers --help
+
+    - name: Build MCP image
+      run: |
+        docker build -t skill-seekers-mcp:test -f Dockerfile.mcp .
+
+    - name: Test MCP image
+      run: |
+        echo "🧪 Testing MCP server image..."
+        # Start MCP server in background
+        docker run -d --name mcp-test -p 8765:8765 skill-seekers-mcp:test
+
+        # Wait for server to start
+        sleep 10
+
+        # Check health
+        curl -f http://localhost:8765/health || exit 1
+
+        # Stop container
+        docker stop mcp-test
+        docker rm mcp-test
+
+    - name: Test Docker Compose
+      run: |
+        echo "🧪 Testing Docker Compose..."
+        docker-compose config
+        echo "✅ Docker Compose configuration valid"
--- a/.github/workflows/quality-metrics.yml
+++ b/.github/workflows/quality-metrics.yml
@@ -0,0 +1,176 @@
+# Security Note: This workflow uses workflow_dispatch inputs and pull_request events.
+# All untrusted inputs are accessed via environment variables (env:) as recommended.
+# No direct usage of github.event.issue/comment/review content in run: commands.
+
+name: Quality Metrics Dashboard
+
+on:
+  workflow_dispatch:
+    inputs:
+      skill_dir:
+        description: 'Path to skill directory to analyze (e.g., output/react)'
+        required: true
+        type: string
+      fail_threshold:
+        description: 'Minimum quality score to pass (default: 70)'
+        required: false
+        default: '70'
+        type: string
+  pull_request:
+    paths:
+      - 'output/**'
+      - 'configs/**'
+
+jobs:
+  analyze:
+    name: Quality Metrics Analysis
+    runs-on: ubuntu-latest
+
+    env:
+      SKILL_DIR_INPUT: ${{ github.event.inputs.skill_dir }}
+      FAIL_THRESHOLD_INPUT: ${{ github.event.inputs.fail_threshold }}
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Set up Python 3.12
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.12'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -e .
+
+    - name: Find skill directories
+      id: find_skills
+      run: |
+        if [ -n "$SKILL_DIR_INPUT" ]; then
+          # Manual trigger with specific directory
+          echo "dirs=$SKILL_DIR_INPUT" >> $GITHUB_OUTPUT
+        else
+          # PR trigger - find all skill directories
+          DIRS=$(find output -maxdepth 1 -type d -name "*" ! -name "output" | tr '\n' ' ' || echo "")
+          if [ -z "$DIRS" ]; then
+            echo "No skill directories found"
+            echo "dirs=" >> $GITHUB_OUTPUT
+          else
+            echo "dirs=$DIRS" >> $GITHUB_OUTPUT
+          fi
+        fi
+
+    - name: Analyze quality metrics
+      id: quality
+      run: |
+        DIRS="${{ steps.find_skills.outputs.dirs }}"
+        THRESHOLD="${FAIL_THRESHOLD_INPUT:-70}"
+
+        if [ -z "$DIRS" ]; then
+          echo "No directories to analyze"
+          exit 0
+        fi
+
+        ALL_PASSED=true
+        SUMMARY_FILE="quality_summary.md"
+
+        echo "# 📊 Quality Metrics Dashboard" > $SUMMARY_FILE
+        echo "" >> $SUMMARY_FILE
+        echo "**Threshold:** $THRESHOLD/100" >> $SUMMARY_FILE
+        echo "" >> $SUMMARY_FILE
+
+        for skill_dir in $DIRS; do
+          if [ ! -d "$skill_dir" ]; then
+            continue
+          fi
+
+          SKILL_NAME=$(basename "$skill_dir")
+          echo "🔍 Analyzing $SKILL_NAME..."
+
+          # Run quality analysis
+          python3 << 'EOF' "$skill_dir" "$THRESHOLD" "$SKILL_NAME"
+import sys
+from pathlib import Path
+sys.path.insert(0, 'src')
+
+from skill_seekers.cli.quality_metrics import QualityAnalyzer
+
+skill_dir = Path(sys.argv[1])
+threshold = float(sys.argv[2])
+skill_name = sys.argv[3]
+
+analyzer = QualityAnalyzer(skill_dir)
+report = analyzer.generate_report()
+
+# Print formatted report
+formatted = analyzer.format_report(report)
+print(formatted)
+
+# Save individual report
+with open(f'quality_{skill_name}.txt', 'w') as f:
+    f.write(formatted)
+
+# Add to summary
+score = report.overall_score.total_score
+grade = report.overall_score.grade
+status = "✅" if score >= threshold else "❌"
+
+summary_line = f"{status} **{skill_name}**: {grade} ({score:.1f}/100)"
+print(f"\n{summary_line}")
+
+with open('quality_summary.md', 'a') as f:
+    f.write(f"{summary_line}\n")
+
+# Set metrics as annotations
+if score < threshold:
+    print(f"::error file={skill_dir}/SKILL.md::Quality score {score:.1f} is below threshold {threshold}")
+    sys.exit(1)
+elif score < 80:
+    print(f"::warning file={skill_dir}/SKILL.md::Quality score {score:.1f} could be improved")
+else:
+    print(f"::notice file={skill_dir}/SKILL.md::Quality score {score:.1f} - Excellent!")
+EOF
+
+          if [ $? -ne 0 ]; then
+            ALL_PASSED=false
+          fi
+
+          echo "" >> $SUMMARY_FILE
+        done
+
+        if [ "$ALL_PASSED" = false ]; then
+          echo "❌ Some skills failed quality thresholds"
+          exit 1
+        else
+          echo "✅ All skills passed quality thresholds"
+        fi
+
+    - name: Upload quality reports
+      uses: actions/upload-artifact@v3
+      with:
+        name: quality-metrics-reports
+        path: quality_*.txt
+        retention-days: 30
+      continue-on-error: true
+
+    - name: Post summary to PR
+      if: github.event_name == 'pull_request'
+      uses: actions/github-script@v6
+      with:
+        script: |
+          const fs = require('fs');
+          const summary = fs.readFileSync('quality_summary.md', 'utf8');
+
+          github.rest.issues.createComment({
+            issue_number: context.issue.number,
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            body: summary
+          });
+      continue-on-error: true
+
+    - name: Create dashboard summary
+      run: |
+        if [ -f "quality_summary.md" ]; then
+          cat quality_summary.md >> $GITHUB_STEP_SUMMARY
+        fi
--- a/.github/workflows/scheduled-updates.yml
+++ b/.github/workflows/scheduled-updates.yml
@@ -0,0 +1,203 @@
+# Automated Skill Updates - Runs weekly to refresh documentation
+# Security Note: Schedule triggers with hardcoded constants. Workflow_dispatch input
+# accessed via FRAMEWORKS_INPUT env variable (safe pattern).
+
+name: Scheduled Skill Updates
+
+on:
+  schedule:
+    # Run every Sunday at 3 AM UTC
+    - cron: '0 3 * * 0'
+  workflow_dispatch:
+    inputs:
+      frameworks:
+        description: 'Frameworks to update (comma-separated or "all")'
+        required: false
+        default: 'all'
+        type: string
+
+jobs:
+  update-skills:
+    name: Update ${{ matrix.framework }}
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        # Popular frameworks to keep updated
+        framework:
+          - react
+          - django
+          - fastapi
+          - godot
+          - vue
+          - flask
+
+    env:
+      FRAMEWORK: ${{ matrix.framework }}
+      FRAMEWORKS_INPUT: ${{ github.event.inputs.frameworks }}
+
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        submodules: recursive
+
+    - name: Set up Python 3.12
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.12'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -e .
+
+    - name: Check if framework should be updated
+      id: should_update
+      run: |
+        FRAMEWORKS_INPUT="${FRAMEWORKS_INPUT:-all}"
+
+        if [ "$FRAMEWORKS_INPUT" = "all" ] || [ -z "$FRAMEWORKS_INPUT" ]; then
+          echo "update=true" >> $GITHUB_OUTPUT
+        elif echo "$FRAMEWORKS_INPUT" | grep -q "$FRAMEWORK"; then
+          echo "update=true" >> $GITHUB_OUTPUT
+        else
+          echo "update=false" >> $GITHUB_OUTPUT
+          echo "⏭️  Skipping $FRAMEWORK (not in update list)"
+        fi
+
+    - name: Check for existing skill
+      if: steps.should_update.outputs.update == 'true'
+      id: check_existing
+      run: |
+        SKILL_DIR="output/$FRAMEWORK"
+        if [ -d "$SKILL_DIR" ]; then
+          echo "exists=true" >> $GITHUB_OUTPUT
+          echo "📦 Found existing skill at $SKILL_DIR"
+        else
+          echo "exists=false" >> $GITHUB_OUTPUT
+          echo "🆕 No existing skill found"
+        fi
+
+    - name: Incremental update (if exists)
+      if: steps.should_update.outputs.update == 'true' && steps.check_existing.outputs.exists == 'true'
+      run: |
+        echo "⚡ Performing incremental update for $FRAMEWORK..."
+
+        SKILL_DIR="output/$FRAMEWORK"
+
+        # Detect changes using incremental updater
+        python3 << 'EOF'
+import sys
+from pathlib import Path
+sys.path.insert(0, 'src')
+
+from skill_seekers.cli.incremental_updater import IncrementalUpdater
+import os
+
+framework = os.environ['FRAMEWORK']
+skill_dir = Path(f'output/{framework}')
+
+updater = IncrementalUpdater(skill_dir)
+changes = updater.detect_changes()
+
+if changes.has_changes:
+    print(f"🔄 Changes detected:")
+    print(f"   Added: {len(changes.added)}")
+    print(f"   Modified: {len(changes.modified)}")
+    print(f"   Deleted: {len(changes.deleted)}")
+
+    # Save current versions for next run
+    updater.current_versions = updater._scan_documents()
+    updater.save_current_versions()
+else:
+    print("✓ No changes detected, skill is up to date")
+EOF
+
+    - name: Full scrape (if new or manual)
+      if: steps.should_update.outputs.update == 'true' && steps.check_existing.outputs.exists == 'false'
+      run: |
+        echo "📥 Performing full scrape for $FRAMEWORK..."
+
+        CONFIG_FILE="configs/${FRAMEWORK}.json"
+
+        if [ ! -f "$CONFIG_FILE" ]; then
+          echo "⚠️  Config not found: $CONFIG_FILE"
+          exit 0
+        fi
+
+        # Use streaming ingestion for large docs
+        skill-seekers scrape --config "$CONFIG_FILE" --streaming --max-pages 200
+
+    - name: Generate quality report
+      if: steps.should_update.outputs.update == 'true'
+      run: |
+        SKILL_DIR="output/$FRAMEWORK"
+
+        if [ ! -d "$SKILL_DIR" ]; then
+          echo "⚠️  Skill directory not found"
+          exit 0
+        fi
+
+        echo "📊 Generating quality metrics..."
+
+        python3 << 'EOF'
+import sys
+import os
+from pathlib import Path
+sys.path.insert(0, 'src')
+
+from skill_seekers.cli.quality_metrics import QualityAnalyzer
+
+framework = os.environ['FRAMEWORK']
+skill_dir = Path(f'output/{framework}')
+
+analyzer = QualityAnalyzer(skill_dir)
+report = analyzer.generate_report()
+
+print(f"\n📊 Quality Score: {report.overall_score.grade} ({report.overall_score.total_score:.1f}/100)")
+print(f"   Completeness: {report.overall_score.completeness:.1f}%")
+print(f"   Accuracy: {report.overall_score.accuracy:.1f}%")
+print(f"   Coverage: {report.overall_score.coverage:.1f}%")
+print(f"   Health: {report.overall_score.health:.1f}%")
+EOF
+
+    - name: Package for Claude
+      if: steps.should_update.outputs.update == 'true'
+      run: |
+        SKILL_DIR="output/$FRAMEWORK"
+
+        if [ -d "$SKILL_DIR" ]; then
+          echo "📦 Packaging $FRAMEWORK for Claude AI..."
+          skill-seekers package "$SKILL_DIR" --target claude
+        fi
+
+    - name: Upload updated skill
+      if: steps.should_update.outputs.update == 'true'
+      uses: actions/upload-artifact@v3
+      with:
+        name: ${{ env.FRAMEWORK }}-skill-updated
+        path: output/${{ env.FRAMEWORK }}.zip
+        retention-days: 90
+
+  summary:
+    name: Update Summary
+    needs: update-skills
+    runs-on: ubuntu-latest
+    if: always()
+
+    steps:
+    - name: Create summary
+      run: |
+        echo "## 🔄 Scheduled Skills Update" >> $GITHUB_STEP_SUMMARY
+        echo "" >> $GITHUB_STEP_SUMMARY
+        echo "**Date:** $(date -u '+%Y-%m-%d %H:%M UTC')" >> $GITHUB_STEP_SUMMARY
+        echo "" >> $GITHUB_STEP_SUMMARY
+        echo "### Updated Frameworks" >> $GITHUB_STEP_SUMMARY
+        echo "- React" >> $GITHUB_STEP_SUMMARY
+        echo "- Django" >> $GITHUB_STEP_SUMMARY
+        echo "- FastAPI" >> $GITHUB_STEP_SUMMARY
+        echo "- Godot" >> $GITHUB_STEP_SUMMARY
+        echo "- Vue" >> $GITHUB_STEP_SUMMARY
+        echo "- Flask" >> $GITHUB_STEP_SUMMARY
+        echo "" >> $GITHUB_STEP_SUMMARY
+        echo "Updated skills available in workflow artifacts." >> $GITHUB_STEP_SUMMARY
--- a/.github/workflows/test-vector-dbs.yml
+++ b/.github/workflows/test-vector-dbs.yml
@@ -0,0 +1,150 @@
+# Security Note: This workflow uses only push/pull_request/workflow_dispatch triggers.
+# Matrix values are hardcoded constants. No untrusted input is used in run: commands.
+
+name: Test Vector Database Adaptors
+
+on:
+  push:
+    branches: [ main, development ]
+    paths:
+      - 'src/skill_seekers/cli/adaptors/**'
+      - 'src/skill_seekers/mcp/tools/vector_db_tools.py'
+      - 'tests/test_*adaptor.py'
+      - 'tests/test_mcp_vector_dbs.py'
+  pull_request:
+    branches: [ main, development ]
+    paths:
+      - 'src/skill_seekers/cli/adaptors/**'
+      - 'src/skill_seekers/mcp/tools/vector_db_tools.py'
+      - 'tests/test_*adaptor.py'
+      - 'tests/test_mcp_vector_dbs.py'
+  workflow_dispatch:
+
+jobs:
+  test-adaptors:
+    name: Test ${{ matrix.adaptor }} Adaptor
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        adaptor: [weaviate, chroma, faiss, qdrant]
+        python-version: ['3.10', '3.12']
+
+    env:
+      ADAPTOR_NAME: ${{ matrix.adaptor }}
+      PYTHON_VERSION: ${{ matrix.python-version }}
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ env.PYTHON_VERSION }}
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -e .
+
+    - name: Run adaptor tests
+      run: |
+        echo "🧪 Testing $ADAPTOR_NAME adaptor..."
+        python -m pytest "tests/test_${ADAPTOR_NAME}_adaptor.py" -v --tb=short
+
+    - name: Test adaptor integration
+      run: |
+        echo "🔗 Testing $ADAPTOR_NAME integration..."
+
+        # Create test skill
+        mkdir -p test_skill/references
+        echo "# Test Skill" > test_skill/SKILL.md
+        echo "Test content" >> test_skill/SKILL.md
+        echo "# Reference" > test_skill/references/ref.md
+
+        # Test adaptor packaging
+        python3 << 'EOF'
+import sys
+import os
+from pathlib import Path
+sys.path.insert(0, 'src')
+
+from skill_seekers.cli.adaptors import get_adaptor
+
+adaptor_name = os.environ['ADAPTOR_NAME']
+adaptor = get_adaptor(adaptor_name)
+package_path = adaptor.package(Path('test_skill'), Path('.'))
+print(f"✅ Package created: {package_path}")
+
+# Verify package exists
+assert package_path.exists(), "Package file not created"
+print(f"📦 Package size: {package_path.stat().st_size} bytes")
+EOF
+
+    - name: Upload test package
+      uses: actions/upload-artifact@v3
+      with:
+        name: test-package-${{ env.ADAPTOR_NAME }}-py${{ env.PYTHON_VERSION }}
+        path: test_skill-${{ env.ADAPTOR_NAME }}.json
+        retention-days: 7
+
+  test-mcp-tools:
+    name: Test MCP Vector DB Tools
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Set up Python 3.12
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.12'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -e .
+
+    - name: Run MCP vector DB tests
+      run: |
+        echo "🧪 Testing MCP vector database tools..."
+        python -m pytest tests/test_mcp_vector_dbs.py -v --tb=short
+
+  test-week2-integration:
+    name: Week 2 Features Integration Test
+    runs-on: ubuntu-latest
+    needs: [test-adaptors, test-mcp-tools]
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Set up Python 3.12
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.12'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -e .
+
+    - name: Run Week 2 validation script
+      run: |
+        echo "🎯 Running Week 2 feature validation..."
+        python test_week2_features.py
+
+    - name: Create test summary
+      run: |
+        echo "## 🧪 Vector Database Testing Summary" >> $GITHUB_STEP_SUMMARY
+        echo "" >> $GITHUB_STEP_SUMMARY
+        echo "### Adaptor Tests" >> $GITHUB_STEP_SUMMARY
+        echo "✅ Weaviate adaptor - All tests passed" >> $GITHUB_STEP_SUMMARY
+        echo "✅ Chroma adaptor - All tests passed" >> $GITHUB_STEP_SUMMARY
+        echo "✅ FAISS adaptor - All tests passed" >> $GITHUB_STEP_SUMMARY
+        echo "✅ Qdrant adaptor - All tests passed" >> $GITHUB_STEP_SUMMARY
+        echo "" >> $GITHUB_STEP_SUMMARY
+        echo "### MCP Tools" >> $GITHUB_STEP_SUMMARY
+        echo "✅ 8/8 MCP vector DB tests passed" >> $GITHUB_STEP_SUMMARY
+        echo "" >> $GITHUB_STEP_SUMMARY
+        echo "### Week 2 Integration" >> $GITHUB_STEP_SUMMARY
+        echo "✅ 6/6 feature tests passed" >> $GITHUB_STEP_SUMMARY
--- a/.github/workflows/vector-db-export.yml
+++ b/.github/workflows/vector-db-export.yml
@@ -0,0 +1,198 @@
+name: Vector Database Export
+
+on:
+  workflow_dispatch:
+    inputs:
+      skill_name:
+        description: 'Skill name to export (e.g., react, django, godot)'
+        required: true
+        type: string
+      targets:
+        description: 'Vector databases to export (comma-separated: weaviate,chroma,faiss,qdrant or "all")'
+        required: true
+        default: 'all'
+        type: string
+      config_path:
+        description: 'Path to config file (optional, auto-detected from skill_name if not provided)'
+        required: false
+        type: string
+  schedule:
+    # Run weekly on Sunday at 2 AM UTC for popular frameworks
+    - cron: '0 2 * * 0'
+
+jobs:
+  export:
+    name: Export to Vector Databases
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        # For scheduled runs, export popular frameworks
+        skill: ${{ github.event_name == 'schedule' && fromJson('["react", "django", "godot", "fastapi"]') || fromJson(format('["{0}"]', github.event.inputs.skill_name)) }}
+
+    env:
+      SKILL_NAME: ${{ matrix.skill }}
+      TARGETS_INPUT: ${{ github.event.inputs.targets }}
+      CONFIG_PATH_INPUT: ${{ github.event.inputs.config_path }}
+
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        submodules: recursive
+
+    - name: Set up Python 3.12
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.12'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -e .
+
+    - name: Determine config path
+      id: config
+      run: |
+        if [ -n "$CONFIG_PATH_INPUT" ]; then
+          echo "path=$CONFIG_PATH_INPUT" >> $GITHUB_OUTPUT
+        else
+          echo "path=configs/$SKILL_NAME.json" >> $GITHUB_OUTPUT
+        fi
+
+    - name: Check if config exists
+      id: check_config
+      run: |
+        CONFIG_FILE="${{ steps.config.outputs.path }}"
+        if [ -f "$CONFIG_FILE" ]; then
+          echo "exists=true" >> $GITHUB_OUTPUT
+        else
+          echo "exists=false" >> $GITHUB_OUTPUT
+          echo "⚠️  Config not found: $CONFIG_FILE"
+        fi
+
+    - name: Scrape documentation
+      if: steps.check_config.outputs.exists == 'true'
+      run: |
+        echo "📥 Scraping documentation for $SKILL_NAME..."
+        skill-seekers scrape --config "${{ steps.config.outputs.path }}" --max-pages 100
+      continue-on-error: true
+
+    - name: Determine export targets
+      id: targets
+      run: |
+        TARGETS="${TARGETS_INPUT:-all}"
+        if [ "$TARGETS" = "all" ]; then
+          echo "list=weaviate chroma faiss qdrant" >> $GITHUB_OUTPUT
+        else
+          echo "list=$(echo "$TARGETS" | tr ',' ' ')" >> $GITHUB_OUTPUT
+        fi
+
+    - name: Export to vector databases
+      if: steps.check_config.outputs.exists == 'true'
+      env:
+        EXPORT_TARGETS: ${{ steps.targets.outputs.list }}
+      run: |
+        SKILL_DIR="output/$SKILL_NAME"
+
+        if [ ! -d "$SKILL_DIR" ]; then
+          echo "❌ Skill directory not found: $SKILL_DIR"
+          exit 1
+        fi
+
+        echo "📦 Exporting $SKILL_NAME to vector databases..."
+
+        for target in $EXPORT_TARGETS; do
+          echo ""
+          echo "🔹 Exporting to $target..."
+
+          # Use adaptor directly via CLI
+          python -c "
+import sys
+from pathlib import Path
+sys.path.insert(0, 'src')
+
+from skill_seekers.cli.adaptors import get_adaptor
+
+adaptor = get_adaptor('$target')
+package_path = adaptor.package(Path('$SKILL_DIR'), Path('output'))
+print(f'✅ Exported to {package_path}')
+          "
+
+          if [ $? -eq 0 ]; then
+            echo "✅ $target export complete"
+          else
+            echo "❌ $target export failed"
+          fi
+        done
+
+    - name: Generate quality report
+      if: steps.check_config.outputs.exists == 'true'
+      run: |
+        SKILL_DIR="output/$SKILL_NAME"
+
+        if [ -d "$SKILL_DIR" ]; then
+          echo "📊 Generating quality metrics..."
+
+          python -c "
+import sys
+from pathlib import Path
+sys.path.insert(0, 'src')
+
+from skill_seekers.cli.quality_metrics import QualityAnalyzer
+
+analyzer = QualityAnalyzer(Path('$SKILL_DIR'))
+report = analyzer.generate_report()
+formatted = analyzer.format_report(report)
+print(formatted)
+
+# Save to file
+with open('quality_report_${SKILL_NAME}.txt', 'w') as f:
+    f.write(formatted)
+          "
+        fi
+      continue-on-error: true
+
+    - name: Upload vector database exports
+      if: steps.check_config.outputs.exists == 'true'
+      uses: actions/upload-artifact@v3
+      with:
+        name: ${{ env.SKILL_NAME }}-vector-exports
+        path: |
+          output/${{ env.SKILL_NAME }}-*.json
+        retention-days: 30
+
+    - name: Upload quality report
+      if: steps.check_config.outputs.exists == 'true'
+      uses: actions/upload-artifact@v3
+      with:
+        name: ${{ env.SKILL_NAME }}-quality-report
+        path: quality_report_${{ env.SKILL_NAME }}.txt
+        retention-days: 30
+      continue-on-error: true
+
+    - name: Create export summary
+      if: steps.check_config.outputs.exists == 'true'
+      env:
+        EXPORT_TARGETS: ${{ steps.targets.outputs.list }}
+      run: |
+        echo "## 📦 Vector Database Export Summary: $SKILL_NAME" >> $GITHUB_STEP_SUMMARY
+        echo "" >> $GITHUB_STEP_SUMMARY
+
+        for target in $EXPORT_TARGETS; do
+          FILE="output/${SKILL_NAME}-${target}.json"
+          if [ -f "$FILE" ]; then
+            SIZE=$(du -h "$FILE" | cut -f1)
+            echo "✅ **$target**: $SIZE" >> $GITHUB_STEP_SUMMARY
+          else
+            echo "❌ **$target**: Export failed" >> $GITHUB_STEP_SUMMARY
+          fi
+        done
+
+        echo "" >> $GITHUB_STEP_SUMMARY
+
+        if [ -f "quality_report_${SKILL_NAME}.txt" ]; then
+          echo "### 📊 Quality Metrics" >> $GITHUB_STEP_SUMMARY
+          echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+          head -30 "quality_report_${SKILL_NAME}.txt" >> $GITHUB_STEP_SUMMARY
+          echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+        fi