Skip to content

Instantly share code, notes, and snippets.

@MAS150MD200
Forked from mjf/prometheus-rules.yml
Created July 6, 2018 10:31
Show Gist options
  • Save MAS150MD200/12bd497644b210d70b9da5c3b637637c to your computer and use it in GitHub Desktop.
Save MAS150MD200/12bd497644b210d70b9da5c3b637637c to your computer and use it in GitHub Desktop.
Prometheus Recoding and Alert Rules Collection
# Prometheus Recoding and Alert Rules Collection
# Copyright (C) 2017 Matous Jan Fialka, <http://mjf.cz/>
# Released under the terms of The MIT License
ALERT processor_usage_too_high
IF ((sum(node_cpu{mode=~"user|nice|system|irq|softirq|steal|idle|iowait"}) by (instance, job)) - (sum(node_cpu{mode=~"idle|iowait"}) by (instance, job))) / (sum(node_cpu{mode=~"user|nice|system|irq|softirq|steal|idle|iowait"}) by (instance, job)) * 100 > 95
FOR 5m
LABELS {severity="critical"}
ANNOTATIONS {description="Instance {{ $labels.instance }} of job {{ $labels.job }} has processor above 95% (current value: {{ printf \"%.2f\" $value }}%) for over 5 minutes", summary="Processor usage above 95%"}
ALERT swap_usage_above_20_percent
IF (((node_memory_SwapTotal - node_memory_SwapFree) / node_memory_SwapTotal) * 100) > 50
FOR 1h
LABELS {severity="moderate"}
ANNOTATIONS {description="Instance {{ $labels.instance }} of job {{ $labels.job }} has swap usage above 20% (current value: {{ printf \"%.2f\" $value }}%) for over 1 hour", summary="Swap usage above 20%"}
ALERT memory_usage_above_90_percent
IF (((node_memory_MemTotal - node_memory_MemFree - node_memory_Cached) / (node_memory_MemTotal) * 100)) > 95
FOR 5m
LABELS {severity="critical"}
ANNOTATIONS {description="Instance {{ $labels.instance }} of job {{ $labels.job }} has memory usage above 90% (current value: {{ printf \"%.2f\" $value }}%) for over 5 minutes", summary="Memory usage above 90%"}
instance:fd_utilization{} =
process_open_fds / process_max_fds
ALERT file_descriptors_exhausted_in_4_hours
IF predict_linear(instance:fd_utilization[1h], 4 * 3600) > 1
FOR 10m
LABELS {severity="critical"}
ANNOTATIONS {description="Instance {{ $labels.instance }} of job {{ $labels.job }} will have file descriptors exhausted in 4 hours", summary="File descriptors will be exhausted soon"}
ALERT disk_space_exhausted_in_8_hours
IF predict_linear(node_filesystem_free[1h], 8 * 3600) < 0
FOR 20m
LABELS {severity="moderate"}
ANNOTATIONS {description="Instance {{ $labels.instance }} of job {{ $labels.job }} will have disk space exhausted in 8 hours", summary="Disk space will be exhausted soon"}
ALERT disk_space_almost_exhausted
IF node_filesystem_avail / node_filesystem_size * 100 <= 10
FOR 15m
LABELS {severity="critical"}
ANNOTATIONS {description="Instance {{ $labels.instance }} of job {{ $labels.job }} has disk space less than 10% (current value: {{ printf \"%.2f\" $value }}%) for 15 minutes", summary="Disk space almost exhausted"}
ALERT node_down
IF up == 0
FOR 1m
LABELS {severity="critical"}
ANNOTATIONS {description="Instance {{ $labels.instance }} of job {{ $labels.job }} has been down for over 1 minute", summary="Node down"}
ALERT service_down
IF {__name__=~"[^_]+_up"} == 0
FOR 3m
LABELS {severity="critical"}
ANNOTATIONS {description="Instance {{ $labels.instance }} of job {{ $labels.job }} has been down for over 3 minutes", summary="Service down"}
ALERT mysql_innodb_log_waits
IF rate(mysql_global_status_innodb_log_waits[5m]) > 10
LABELS {severity="critical"}
ANNOTATIONS {description="The MySQL InnoDB logs are waiting for disk at a rate of {{ printf \"%.2f\" $value }} per second for over 5 minutes", summary="MySQL InnoDB log waits"}
ALERT ntp_drifting
IF node_ntp_drift_seconds > 0.05
FOR 1m
LABELS {severity = "critical"}
ANNOTATIONS {description="The NTP drifting has been too high for over 1 minute", summary="NTP drifting too high"}
ALERT ntp_drifting
IF node_ntp_drift_seconds > 0.01
FOR 1m
LABELS {severity = "moderate"}
ANNOTATIONS {description="The NTP has been drifting for over 1 minute", summary="NTP drift"}
# vi:ft=prometheus:nowrap:tw=10000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment