claude-skills-reference/engineering/runbook-generator/scripts/runbook_generator.py

#!/usr/bin/env python3
"""Generate an operational runbook skeleton for a service."""

from __future__ import annotations

import argparse
from datetime import date
from pathlib import Path


def build_runbook(service: str, owner: str, environment: str) -> str:
    today = date.today().isoformat()
    return f"""# Runbook - {service}

- Service: {service}
- Owner: {owner}
- Environment: {environment}
- Last verified: {today}

## Overview

Describe the service purpose, dependencies, and critical user impact.

## Preconditions

- Access to deployment platform
- Access to logs/metrics
- Access to secret/config manager

## Start Procedure

1. Pull latest config/secrets.
2. Start service process.
3. Confirm process is healthy.

```bash
# Example
# systemctl start {service}
```

## Stop Procedure

1. Drain traffic if applicable.
2. Stop service process.
3. Confirm no active workers remain.

```bash
# Example
# systemctl stop {service}
```

## Health Checks

- HTTP health endpoint
- Dependency connectivity checks
- Error-rate and latency checks

```bash
# Example
# curl -sf https://{service}.example.com/health
```

## Deployment Checklist

1. Verify CI status and artifact integrity.
2. Apply migrations (if required) in safe order.
3. Deploy service revision.
4. Run smoke checks.
5. Observe metrics for 10-15 minutes.

## Rollback

1. Identify last known good release.
2. Re-deploy previous version.
3. Re-run health checks.
4. Communicate rollback status to stakeholders.

```bash
# Example
# deployctl rollback --service {service}
```

## Incident Response

1. Classify severity.
2. Contain user impact.
3. Triage likely failing component.
4. Escalate if SLA risk is high.

## Escalation

- L1: On-call engineer
- L2: Service owner ({owner})
- L3: Platform/Engineering leadership

## Post-Incident

1. Write timeline and root cause.
2. Define corrective actions with owners.
3. Update this runbook with missing steps.
"""


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Generate a markdown runbook skeleton.")
    parser.add_argument("service", help="Service name")
    parser.add_argument("--owner", default="platform-team", help="Service owner label")
    parser.add_argument("--environment", default="production", help="Primary environment")
    parser.add_argument("--output", help="Optional output path (prints to stdout if omitted)")
    return parser.parse_args()


def main() -> int:
    args = parse_args()
    markdown = build_runbook(args.service, owner=args.owner, environment=args.environment)

    if args.output:
        path = Path(args.output)
        path.parent.mkdir(parents=True, exist_ok=True)
        path.write_text(markdown, encoding="utf-8")
        print(f"Wrote runbook skeleton to {path}")
    else:
        print(markdown)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())