feat(zfs-exporter): create some alerts

This commit is contained in:
SeanOMik 2023-08-22 14:46:37 -04:00
parent a7ff1e305f
commit c536fe7b0f
Signed by: SeanOMik
GPG Key ID: 568F326C7EB33ACB
1 changed files with 49 additions and 0 deletions

View File

@ -0,0 +1,49 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: zfs-exporter-rules
labels:
release: kube-prometheus-stack
spec:
groups:
- name: zfsexporter.rules
rules:
- alert: DriveErrorsCriticalAlert
expr: zfs_read_errors{device_type="disk"} + zfs_write_errors{device_type="disk"} + zfs_checksum_errors{device_type="disk"} > 1000
annotations:
summary: A drive encountered some errors (drive {{ $labels.device_name }})
description: "A drive encountered some errors! drive: {{ $labels.device_name }} in vdev {{ $labels.vdev }} in pool {{ $labels.pool }} \n error count: {{ $value }}"
labels:
severity: critical
- alert: DriveAnyErrorsAlert
expr: zfs_read_errors{device_type="disk"} + zfs_write_errors{device_type="disk"} + zfs_checksum_errors{device_type="disk"} > 0
annotations:
summary: A drive encountered some errors (drive {{ $labels.device_name }})
description: "A drive encountered some errors! drive: {{ $labels.device_name }} in vdev {{ $labels.vdev }} in pool {{ $labels.pool }} \n error count: {{ $value }}"
labels:
severity: warning
- alert: DriveReadErrorsAlert
expr: zfs_read_errors{device_type="disk"} > 0
annotations:
summary: A drive encountered a checksum error (drive {{ $labels.device_name }})
description: "A drive encountered a checksum error! drive: {{ $labels.device_name }} in vdev {{ $labels.vdev }} in pool {{ $labels.pool }} \n error count: {{ $value }}"
labels:
severity: warning
- alert: DriveWriteErrorsAlert
expr: zfs_write_errors{device_type="disk"} > 0
annotations:
summary: A drive encountered a checksum error (drive {{ $labels.device_name }})
description: "A drive encountered a checksum error! drive: {{ $labels.device_name }} in vdev {{ $labels.vdev }} in pool {{ $labels.pool }} \n error count: {{ $value }}"
labels:
severity: warning
- alert: DriveChecksumErrorsAlert
expr: zfs_checksum_errors{device_type="disk"} > 0
annotations:
summary: A drive encountered a checksum error (drive {{ $labels.device_name }})
description: "A drive encountered a checksum error! drive: {{ $labels.device_name }} in vdev {{ $labels.vdev }} in pool {{ $labels.pool }} \n error count: {{ $value }}"
labels:
severity: warning