k3s-cluster/cluster/apps/monitoring/zfs-exporter/alerts.yaml

49 lines
2.4 KiB
YAML

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: zfs-exporter-rules
labels:
release: kube-prometheus-stack
spec:
groups:
- name: zfsexporter.rules
rules:
- alert: DriveErrorsCriticalAlert
expr: zfs_read_errors{device_type="disk"} + zfs_write_errors{device_type="disk"} + zfs_checksum_errors{device_type="disk"} > 1000
annotations:
summary: A drive encountered some errors (drive {{ $labels.device_name }})
description: "A drive encountered some errors! drive: {{ $labels.device_name }} in vdev {{ $labels.vdev }} in pool {{ $labels.pool }} \n error count: {{ $value }}"
labels:
severity: critical
- alert: DriveAnyErrorsAlert
expr: zfs_read_errors{device_type="disk"} + zfs_write_errors{device_type="disk"} + zfs_checksum_errors{device_type="disk"} > 0
annotations:
summary: A drive encountered some errors (drive {{ $labels.device_name }})
description: "A drive encountered some errors! drive: {{ $labels.device_name }} in vdev {{ $labels.vdev }} in pool {{ $labels.pool }} \n error count: {{ $value }}"
labels:
severity: warning
- alert: DriveReadErrorsAlert
expr: zfs_read_errors{device_type="disk"} > 0
annotations:
summary: A drive encountered a checksum error (drive {{ $labels.device_name }})
description: "A drive encountered a checksum error! drive: {{ $labels.device_name }} in vdev {{ $labels.vdev }} in pool {{ $labels.pool }} \n error count: {{ $value }}"
labels:
severity: warning
- alert: DriveWriteErrorsAlert
expr: zfs_write_errors{device_type="disk"} > 0
annotations:
summary: A drive encountered a checksum error (drive {{ $labels.device_name }})
description: "A drive encountered a checksum error! drive: {{ $labels.device_name }} in vdev {{ $labels.vdev }} in pool {{ $labels.pool }} \n error count: {{ $value }}"
labels:
severity: warning
- alert: DriveChecksumErrorsAlert
expr: zfs_checksum_errors{device_type="disk"} > 0
annotations:
summary: A drive encountered a checksum error (drive {{ $labels.device_name }})
description: "A drive encountered a checksum error! drive: {{ $labels.device_name }} in vdev {{ $labels.vdev }} in pool {{ $labels.pool }} \n error count: {{ $value }}"
labels:
severity: warning