Files

129 lines
2.8 KiB
Python
Executable File

#!/usr/bin/env python3
"""Generate an operational runbook skeleton for a service."""
from __future__ import annotations
import argparse
from datetime import date
from pathlib import Path
def build_runbook(service: str, owner: str, environment: str) -> str:
today = date.today().isoformat()
return f"""# Runbook - {service}
- Service: {service}
- Owner: {owner}
- Environment: {environment}
- Last verified: {today}
## Overview
Describe the service purpose, dependencies, and critical user impact.
## Preconditions
- Access to deployment platform
- Access to logs/metrics
- Access to secret/config manager
## Start Procedure
1. Pull latest config/secrets.
2. Start service process.
3. Confirm process is healthy.
```bash
# Example
# systemctl start {service}
```
## Stop Procedure
1. Drain traffic if applicable.
2. Stop service process.
3. Confirm no active workers remain.
```bash
# Example
# systemctl stop {service}
```
## Health Checks
- HTTP health endpoint
- Dependency connectivity checks
- Error-rate and latency checks
```bash
# Example
# curl -sf https://{service}.example.com/health
```
## Deployment Checklist
1. Verify CI status and artifact integrity.
2. Apply migrations (if required) in safe order.
3. Deploy service revision.
4. Run smoke checks.
5. Observe metrics for 10-15 minutes.
## Rollback
1. Identify last known good release.
2. Re-deploy previous version.
3. Re-run health checks.
4. Communicate rollback status to stakeholders.
```bash
# Example
# deployctl rollback --service {service}
```
## Incident Response
1. Classify severity.
2. Contain user impact.
3. Triage likely failing component.
4. Escalate if SLA risk is high.
## Escalation
- L1: On-call engineer
- L2: Service owner ({owner})
- L3: Platform/Engineering leadership
## Post-Incident
1. Write timeline and root cause.
2. Define corrective actions with owners.
3. Update this runbook with missing steps.
"""
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Generate a markdown runbook skeleton.")
parser.add_argument("service", help="Service name")
parser.add_argument("--owner", default="platform-team", help="Service owner label")
parser.add_argument("--environment", default="production", help="Primary environment")
parser.add_argument("--output", help="Optional output path (prints to stdout if omitted)")
return parser.parse_args()
def main() -> int:
args = parse_args()
markdown = build_runbook(args.service, owner=args.owner, environment=args.environment)
if args.output:
path = Path(args.output)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(markdown, encoding="utf-8")
print(f"Wrote runbook skeleton to {path}")
else:
print(markdown)
return 0
if __name__ == "__main__":
raise SystemExit(main())