claude-skills-reference/eval/promptfooconfig.yaml

# Promptfoo Master Config — claude-skills
# Run all pilot skill evals: npx promptfoo@latest eval -c eval/promptfooconfig.yaml
# Run a single skill: npx promptfoo@latest eval -c eval/skills/copywriting.yaml

description: "claude-skills quality evaluation — pilot batch"

prompts:
  - |
    You are an expert AI assistant. You have the following skill loaded that guides your behavior:

    ---BEGIN SKILL---
    {{skill_content}}
    ---END SKILL---

    Now complete this task:
    {{task}}

providers:
  - id: anthropic:messages:claude-sonnet-4-6
    config:
      max_tokens: 4096
      temperature: 0.7

defaultTest:
  assert:
    - type: javascript
      value: "output.length > 200"
    - type: llm-rubric
      value: "The response demonstrates domain expertise relevant to the task, not generic advice"

# Import per-skill test suites
tests: []