Files
claude-skills-reference/engineering-team/incident-commander/assets/sample_timeline_events.json
Leo daace78954 feat: Add comprehensive incident-commander skill
- Add SKILL.md with 300+ lines of incident response playbook
- Implement incident_classifier.py: severity classification and response recommendations
- Implement timeline_reconstructor.py: event timeline reconstruction with phase analysis
- Implement pir_generator.py: comprehensive PIR generation with multiple RCA frameworks
- Add reference documentation: severity matrix, RCA frameworks, communication templates
- Add sample data files and expected outputs for testing
- All scripts are standalone with zero external dependencies
- Dual output formats: JSON + human-readable text
- Professional, opinionated defaults based on SRE best practices

This POWERFUL-tier skill provides end-to-end incident response capabilities from
detection through post-incident review.
2026-02-16 12:43:38 +00:00

263 lines
7.6 KiB
JSON

[
{
"timestamp": "2024-03-15T14:30:00Z",
"source": "datadog",
"type": "alert",
"message": "High error rate detected on payment-api: 45% error rate (threshold: 5%)",
"severity": "critical",
"actor": "monitoring-system",
"metadata": {
"alert_id": "ALT-001",
"metric_value": "45%",
"threshold": "5%"
}
},
{
"timestamp": "2024-03-15T14:32:00Z",
"source": "pagerduty",
"type": "escalation",
"message": "Paged on-call engineer Sarah Chen for payment-api alerts",
"severity": "high",
"actor": "pagerduty-system",
"metadata": {
"incident_id": "PD-12345",
"responder": "sarah.chen@company.com"
}
},
{
"timestamp": "2024-03-15T14:35:00Z",
"source": "slack",
"type": "communication",
"message": "Sarah Chen acknowledged the alert and is investigating payment-api issues",
"severity": "medium",
"actor": "sarah.chen",
"metadata": {
"channel": "#incidents",
"message_id": "1234567890.123456"
}
},
{
"timestamp": "2024-03-15T14:38:00Z",
"source": "application_logs",
"type": "log",
"message": "Database connection pool exhausted: 200/200 connections active, unable to acquire new connections",
"severity": "critical",
"actor": "payment-api",
"metadata": {
"log_level": "ERROR",
"component": "database_pool",
"connection_count": 200,
"max_connections": 200
}
},
{
"timestamp": "2024-03-15T14:40:00Z",
"source": "slack",
"type": "escalation",
"message": "Sarah Chen: Escalating to incident commander - database connection pool exhausted, need database team",
"severity": "high",
"actor": "sarah.chen",
"metadata": {
"channel": "#incidents",
"escalation_reason": "database_expertise_needed"
}
},
{
"timestamp": "2024-03-15T14:42:00Z",
"source": "pagerduty",
"type": "escalation",
"message": "Incident commander Mike Rodriguez assigned to incident PD-12345",
"severity": "high",
"actor": "pagerduty-system",
"metadata": {
"incident_commander": "mike.rodriguez@company.com",
"role": "incident_commander"
}
},
{
"timestamp": "2024-03-15T14:45:00Z",
"source": "slack",
"type": "communication",
"message": "Mike Rodriguez: War room established in #war-room-payment-api. Engaging database team.",
"severity": "high",
"actor": "mike.rodriguez",
"metadata": {
"channel": "#incidents",
"war_room": "#war-room-payment-api"
}
},
{
"timestamp": "2024-03-15T14:47:00Z",
"source": "pagerduty",
"type": "escalation",
"message": "Database team engineers paged: Tom Wilson, Lisa Park",
"severity": "medium",
"actor": "pagerduty-system",
"metadata": {
"team": "database-team",
"responders": ["tom.wilson@company.com", "lisa.park@company.com"]
}
},
{
"timestamp": "2024-03-15T14:50:00Z",
"source": "statuspage",
"type": "communication",
"message": "Status page updated: Investigating payment processing issues",
"severity": "medium",
"actor": "mike.rodriguez",
"metadata": {
"status": "investigating",
"affected_systems": ["payment-api"]
}
},
{
"timestamp": "2024-03-15T14:52:00Z",
"source": "slack",
"type": "communication",
"message": "Tom Wilson: Joining war room. Looking at database metrics now. Seeing unusual query patterns from recent deployment.",
"severity": "medium",
"actor": "tom.wilson",
"metadata": {
"channel": "#war-room-payment-api",
"investigation_focus": "database_metrics"
}
},
{
"timestamp": "2024-03-15T14:55:00Z",
"source": "database_monitoring",
"type": "log",
"message": "Identified slow query introduced in deployment v2.3.1: payment validation taking 15s per request",
"severity": "critical",
"actor": "database-monitor",
"metadata": {
"deployment_version": "v2.3.1",
"query_time": "15s",
"normal_query_time": "0.1s"
}
},
{
"timestamp": "2024-03-15T15:00:00Z",
"source": "slack",
"type": "communication",
"message": "Tom Wilson: Root cause identified - inefficient query in v2.3.1 deployment. Recommending immediate rollback.",
"severity": "high",
"actor": "tom.wilson",
"metadata": {
"channel": "#war-room-payment-api",
"root_cause": "inefficient_query",
"recommendation": "rollback"
}
},
{
"timestamp": "2024-03-15T15:02:00Z",
"source": "slack",
"type": "communication",
"message": "Mike Rodriguez: Approved rollback to v2.2.9. Sarah initiating rollback procedure.",
"severity": "high",
"actor": "mike.rodriguez",
"metadata": {
"channel": "#war-room-payment-api",
"decision": "rollback_approved",
"target_version": "v2.2.9"
}
},
{
"timestamp": "2024-03-15T15:05:00Z",
"source": "deployment_system",
"type": "action",
"message": "Rollback initiated: payment-api v2.3.1 → v2.2.9",
"severity": "medium",
"actor": "sarah.chen",
"metadata": {
"from_version": "v2.3.1",
"to_version": "v2.2.9",
"deployment_type": "rollback"
}
},
{
"timestamp": "2024-03-15T15:12:00Z",
"source": "deployment_system",
"type": "action",
"message": "Rollback completed successfully: payment-api now running v2.2.9 across all regions",
"severity": "medium",
"actor": "deployment-system",
"metadata": {
"deployment_status": "completed",
"regions": ["us-west", "us-east", "eu-west"]
}
},
{
"timestamp": "2024-03-15T15:15:00Z",
"source": "datadog",
"type": "log",
"message": "Error rate decreasing: payment-api error rate dropped to 8% and continuing to decline",
"severity": "medium",
"actor": "monitoring-system",
"metadata": {
"error_rate": "8%",
"trend": "decreasing"
}
},
{
"timestamp": "2024-03-15T15:18:00Z",
"source": "database_monitoring",
"type": "log",
"message": "Connection pool utilization normalizing: 45/200 connections active",
"severity": "low",
"actor": "database-monitor",
"metadata": {
"connection_count": 45,
"max_connections": 200,
"utilization": "22.5%"
}
},
{
"timestamp": "2024-03-15T15:25:00Z",
"source": "datadog",
"type": "log",
"message": "Error rate returned to normal: payment-api error rate now 0.2% (within normal range)",
"severity": "low",
"actor": "monitoring-system",
"metadata": {
"error_rate": "0.2%",
"status": "normal"
}
},
{
"timestamp": "2024-03-15T15:30:00Z",
"source": "slack",
"type": "communication",
"message": "Mike Rodriguez: All metrics returned to normal. Declaring incident resolved. Thanks to all responders.",
"severity": "low",
"actor": "mike.rodriguez",
"metadata": {
"channel": "#war-room-payment-api",
"status": "resolved"
}
},
{
"timestamp": "2024-03-15T15:35:00Z",
"source": "statuspage",
"type": "communication",
"message": "Status page updated: Payment processing issues resolved. All systems operational.",
"severity": "low",
"actor": "mike.rodriguez",
"metadata": {
"status": "resolved",
"duration": "65 minutes"
}
},
{
"timestamp": "2024-03-15T15:40:00Z",
"source": "slack",
"type": "communication",
"message": "Mike Rodriguez: PIR scheduled for tomorrow 10am. Action item: fix the inefficient query in v2.3.2",
"severity": "low",
"actor": "mike.rodriguez",
"metadata": {
"channel": "#incidents",
"pir_time": "2024-03-16T10:00:00Z",
"action_item": "fix_query_v2.3.2"
}
}
]