revert: remove promptfoo eval pipeline

Switching to native skill-creator eval workflow instead. No external API key dependency needed. Removes: eval/ directory, skill-eval.yml workflow. No other files affected.
2026-03-12 09:43:03 +01:00
parent d196685726
commit de0d748288
15 changed files with 0 additions and 1055 deletions
--- a/.github/workflows/skill-eval.yml
+++ b/.github/workflows/skill-eval.yml
@@ -1,235 +0,0 @@
---
-name: Skill Quality Eval (promptfoo)
-
-'on':
-  pull_request:
-    types: [opened, synchronize, reopened]
-    paths:
-      - '**/SKILL.md'
-  workflow_dispatch:
-    inputs:
-      skill:
-        description: 'Specific skill eval config to run (e.g. copywriting)'
-        required: false
-
-concurrency:
-  group: skill-eval-${{ github.event.pull_request.number || github.run_id }}
-  cancel-in-progress: true
-
-jobs:
-  detect-changes:
-    name: Detect changed skills
-    runs-on: ubuntu-latest
-    outputs:
-      skills: ${{ steps.find-evals.outputs.skills }}
-      has_evals: ${{ steps.find-evals.outputs.has_evals }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Find eval configs for changed skills
-        id: find-evals
-        run: |
-          if [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.skill }}" ]]; then
-            SKILL="${{ github.event.inputs.skill }}"
-            if [[ -f "eval/skills/${SKILL}.yaml" ]]; then
-              echo "skills=[\"${SKILL}\"]" >> "$GITHUB_OUTPUT"
-              echo "has_evals=true" >> "$GITHUB_OUTPUT"
-            else
-              echo "No eval config found for: ${SKILL}"
-              echo "has_evals=false" >> "$GITHUB_OUTPUT"
-            fi
-            exit 0
-          fi
-
-          # Get changed SKILL.md files in this PR
-          CHANGED=$(git diff --name-only origin/${{ github.base_ref }}...HEAD -- '**/SKILL.md' | grep -v '.gemini/' | grep -v '.codex/' | grep -v 'sample')
-
-          if [[ -z "$CHANGED" ]]; then
-            echo "No SKILL.md files changed."
-            echo "has_evals=false" >> "$GITHUB_OUTPUT"
-            exit 0
-          fi
-
-          echo "Changed SKILL.md files:"
-          echo "$CHANGED"
-
-          # Map changed skills to eval configs
-          EVALS="[]"
-          for skill_path in $CHANGED; do
-            # Extract skill name from path (e.g. marketing-skill/copywriting/SKILL.md -> copywriting)
-            skill_name=$(basename $(dirname "$skill_path"))
-            eval_config="eval/skills/${skill_name}.yaml"
-
-            if [[ -f "$eval_config" ]]; then
-              EVALS=$(echo "$EVALS" | python3 -c "
-          import json, sys
-          arr = json.load(sys.stdin)
-          name = '$skill_name'
-          if name not in arr:
-              arr.append(name)
-          print(json.dumps(arr))
-          ")
-              echo "  ✅ $skill_name → $eval_config"
-            else
-              echo "  ⏭️  $skill_name → no eval config (skipping)"
-            fi
-          done
-
-          echo "skills=$EVALS" >> "$GITHUB_OUTPUT"
-          if [[ "$EVALS" == "[]" ]]; then
-            echo "has_evals=false" >> "$GITHUB_OUTPUT"
-          else
-            echo "has_evals=true" >> "$GITHUB_OUTPUT"
-          fi
-
-  eval:
-    name: "Eval: ${{ matrix.skill }}"
-    needs: detect-changes
-    if: needs.detect-changes.outputs.has_evals == 'true'
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      pull-requests: write
-    timeout-minutes: 15
-    strategy:
-      fail-fast: false
-      matrix:
-        skill: ${{ fromJson(needs.detect-changes.outputs.skills) }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Set up Node.js
-        uses: actions/setup-node@v4
-        with:
-          node-version: 20
-
-      - name: Run promptfoo eval
-        id: eval
-        continue-on-error: true
-        env:
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-        run: |
-          npx promptfoo@latest eval \
-            -c "eval/skills/${{ matrix.skill }}.yaml" \
-            --no-cache \
-            --output "/tmp/${{ matrix.skill }}-results.json" \
-            --output-format json \
-            2>&1 | tee /tmp/eval-output.log
-
-          echo "exit_code=$?" >> "$GITHUB_OUTPUT"
-
-      - name: Parse results
-        id: parse
-        if: always()
-        run: |
-          RESULTS_FILE="/tmp/${{ matrix.skill }}-results.json"
-          if [[ ! -f "$RESULTS_FILE" ]]; then
-            echo "summary=⚠️ No results file generated" >> "$GITHUB_OUTPUT"
-            exit 0
-          fi
-
-          python3 << 'PYEOF'
-          import json, os
-
-          with open(os.environ.get("RESULTS_FILE", f"/tmp/${{ matrix.skill }}-results.json")) as f:
-              data = json.load(f)
-
-          results = data.get("results", data.get("evalResults", []))
-          total = len(results)
-          passed = 0
-          failed = 0
-          details = []
-
-          for r in results:
-              test_pass = r.get("success", False)
-              if test_pass:
-                  passed += 1
-              else:
-                  failed += 1
-
-              prompt_vars = r.get("vars", {})
-              task = prompt_vars.get("task", "unknown")[:80]
-
-              assertions = r.get("gradingResult", {}).get("componentResults", [])
-              for a in assertions:
-                  status = "✅" if a.get("pass", False) else "❌"
-                  reason = a.get("reason", a.get("assertion", {}).get("value", ""))[:100]
-                  details.append(f"  {status} {reason}")
-
-          rate = (passed / total * 100) if total > 0 else 0
-          icon = "✅" if rate >= 80 else "⚠️" if rate >= 50 else "❌"
-
-          summary = f"{icon} **${{ matrix.skill }}**: {passed}/{total} tests passed ({rate:.0f}%)"
-
-          # Write to file for comment step
-          with open("/tmp/eval-summary.md", "w") as f:
-              f.write(f"### {summary}\n\n")
-              if details:
-                  f.write("<details><summary>Assertion details</summary>\n\n")
-                  f.write("\n".join(details))
-                  f.write("\n\n</details>\n")
-
-          # Output for workflow
-          with open(os.environ["GITHUB_OUTPUT"], "a") as f:
-              f.write(f"summary={summary}\n")
-              f.write(f"pass_rate={rate:.0f}\n")
-          PYEOF
-
-        env:
-          RESULTS_FILE: "/tmp/${{ matrix.skill }}-results.json"
-
-      - name: Comment on PR
-        if: github.event_name == 'pull_request' && always()
-        uses: actions/github-script@v7
-        with:
-          script: |
-            const fs = require('fs');
-            let body = '### 🧪 Skill Eval: `${{ matrix.skill }}`\n\n';
-
-            try {
-              const summary = fs.readFileSync('/tmp/eval-summary.md', 'utf8');
-              body += summary;
-            } catch {
-              body += '⚠️ Eval did not produce results. Check the workflow logs.\n';
-            }
-
-            body += '\n\n---\n*Powered by [promptfoo](https://promptfoo.dev) · [eval config](eval/skills/${{ matrix.skill }}.yaml)*';
-
-            // Find existing comment to update
-            const { data: comments } = await github.rest.issues.listComments({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              issue_number: context.issue.number,
-            });
-
-            const marker = `Skill Eval: \`${{ matrix.skill }}\``;
-            const existing = comments.find(c => c.body.includes(marker));
-
-            if (existing) {
-              await github.rest.issues.updateComment({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                comment_id: existing.id,
-                body,
-              });
-            } else {
-              await github.rest.issues.createComment({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                issue_number: context.issue.number,
-                body,
-              });
-            }
-
-      - name: Upload results
-        if: always()
-        uses: actions/upload-artifact@v4
-        with:
-          name: eval-results-${{ matrix.skill }}
-          path: /tmp/${{ matrix.skill }}-results.json
-          retention-days: 30
-          if-no-files-found: ignore