groups:
- name: taskdaemon
rules:
- alert: HighQueueDepth
expr: taskdaemon_queue_size > 1000
for: 5m
labels:
severity: warning
annotations:
summary: "TaskDaemon queue depth is high"
- alert: HighErrorRate
expr: rate(taskdaemon_tasks_failed_total[5m]) / rate(taskdaemon_tasks_queued_total[5m]) > 0.1
for: 5m
labels:
severity: critical
annotations:
summary: "TaskDaemon error rate above 10%"
- alert: SlowTasks
expr: histogram_quantile(0.95, rate(taskdaemon_task_duration_seconds_bucket[5m])) > 30
for: 5m
labels:
severity: warning
annotations:
summary: "95th percentile task duration above 30s"