/etc/prometheus/alert_healthchecks.yml > Selfmonitoring
|
Labels |
State |
Active Since |
Value |
alertname="SelfMonitoringAlwaysFiring"
application="leonard_healthchecks"
severity="info"
|
firing |
2025-05-27 17:27:35.330119324 +0000 UTC |
2 |
|
/etc/prometheus/alert_loadbalancing.yml > lowpref
|
Labels |
State |
Active Since |
Value |
alertname="LowGatewayPreference"
instance="gw01n03"
job="json_gwpref"
segment="1"
severity="page"
|
firing |
2025-05-27 17:29:33.417307786 +0000 UTC |
-9 |
Annotations |
- summary
- has low gateway preference (-9)
|
|
/etc/prometheus/alerts/blackbox-exporter.yml > BlackboxExporter
|
|
|
|
|
|
|
|
/etc/prometheus/alerts/general.yml > probe_success
|
|
/etc/prometheus/alerts/general.yml > reload_success
|
|
|
/etc/prometheus/alerts/general.yml > up_success
|
|
/etc/prometheus/alerts/node-exporter.yml > NodeExporter
|
|
|
|
|
|
|
|
|
alert: HostFilesystemDeviceError
expr: node_filesystem_device_error
== 1
for: 2m
labels:
severity: critical
annotations:
description: |-
{{ $labels.instance }}: Device error with the {{ $labels.mountpoint }} filesystem
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Host filesystem device error (instance {{ $labels.instance }})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/alerts/smartctl-exporter.yml > SmartctlExporter
|
alert: SmartCriticalWarning
expr: smartctl_device_critical_warning
> 0
for: 15m
labels:
severity: critical
annotations:
description: |-
device has critical warning (instance {{ $labels.instance }})
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Smart critical warning (instance {{ $labels.instance }})
|
alert: SmartDeviceTemperatureCritical
expr: smartctl_device_temperature
> 80
for: 2m
labels:
severity: critical
annotations:
description: |-
Device temperature critical (instance {{ $labels.instance }})
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Smart device temperature critical (instance {{ $labels.instance }})
|
alert: SmartDeviceTemperatureWarning
expr: smartctl_device_temperature
> 60
for: 2m
labels:
severity: warning
annotations:
description: |-
Device temperature warning (instance {{ $labels.instance }})
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Smart device temperature warning (instance {{ $labels.instance }})
|
alert: SmartMediaErrors
expr: smartctl_device_media_errors
> 0
for: 15m
labels:
severity: critical
annotations:
description: |-
device has media errors (instance {{ $labels.instance }})
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Smart media errors (instance {{ $labels.instance }})
|
|