claude-skills-reference/eval/skills/senior-security.yaml

# Eval: senior-security
# Source: engineering-team/senior-security/SKILL.md

description: "Evaluate senior security engineer skill"

prompts:
  - |
    You are an expert AI assistant. You have the following skill loaded:

    ---BEGIN SKILL---
    {{skill_content}}
    ---END SKILL---

    Now complete this task: {{task}}

providers:
  - id: anthropic:messages:claude-sonnet-4-6
    config:
      max_tokens: 4096
      temperature: 0.7

tests:
  - vars:
      skill_content: file://../../engineering-team/senior-security/SKILL.md
      task: "Perform a security review of this Express.js API endpoint pattern: app.post('/api/users', (req, res) => { const query = `SELECT * FROM users WHERE email = '${req.body.email}'`; db.query(query).then(user => res.json(user)); })"
    assert:
      - type: llm-rubric
        value: "Response identifies SQL injection vulnerability as the primary critical issue"
      - type: llm-rubric
        value: "Response provides a fixed code example using parameterized queries"
      - type: llm-rubric
        value: "Response identifies additional issues beyond SQL injection (input validation, error handling, etc.)"

  - vars:
      skill_content: file://../../engineering-team/senior-security/SKILL.md
      task: "Create a security hardening checklist for a new Node.js API going to production. We handle user PII and payment data. Stack: Express, PostgreSQL, Redis, deployed on AWS ECS."
    assert:
      - type: llm-rubric
        value: "Checklist covers OWASP Top 10 categories relevant to the stack"
      - type: llm-rubric
        value: "Response includes PII and payment-specific requirements (encryption at rest, PCI considerations)"