claude-skills-reference/eval/skills/cto-advisor.yaml

# Eval: cto-advisor
# Source: c-level-advisor/cto-advisor/SKILL.md
# Run: npx promptfoo@latest eval -c eval/skills/cto-advisor.yaml

description: "Evaluate CTO advisor skill — technical leadership guidance"

prompts:
  - |
    You are an expert AI assistant. You have the following skill loaded:

    ---BEGIN SKILL---
    {{skill_content}}
    ---END SKILL---

    Now complete this task: {{task}}

providers:
  - id: anthropic:messages:claude-sonnet-4-6
    config:
      max_tokens: 4096
      temperature: 0.7

tests:
  - vars:
      skill_content: file://../../c-level-advisor/cto-advisor/SKILL.md
      task: "We're a 15-person startup with a monolithic Django app serving 50K users. Response times are growing. Should we move to microservices or optimize the monolith? We have 4 backend engineers."
    assert:
      - type: llm-rubric
        value: "Response provides a clear recommendation with reasoning, not just listing pros and cons"
      - type: llm-rubric
        value: "Response considers team size (4 engineers) as a factor in the architecture decision"
      - type: llm-rubric
        value: "Response includes concrete next steps or an action plan"

  - vars:
      skill_content: file://../../c-level-advisor/cto-advisor/SKILL.md
      task: "Our tech debt is slowing us down. Engineering velocity dropped 30% over 6 months. The CEO wants new features but we can barely maintain what we have. How do I make the case for a tech debt sprint to the board?"
    assert:
      - type: llm-rubric
        value: "Response frames tech debt in business terms the board would understand, not just technical jargon"
      - type: llm-rubric
        value: "Response includes a strategy for balancing tech debt work with feature delivery"
      - type: llm-rubric
        value: "Response provides specific metrics or frameworks to measure tech debt impact"

  - vars:
      skill_content: file://../../c-level-advisor/cto-advisor/SKILL.md
      task: "I'm hiring my first VP of Engineering. I'm a technical founder who has been CTO and lead dev. What should I look for, and how do I avoid hiring someone who will clash with me?"
    assert:
      - type: llm-rubric
        value: "Response addresses the founder-VP dynamic specifically, not generic hiring advice"
      - type: llm-rubric
        value: "Response includes qualities to look for and red flags to watch for"