apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: zfs-exporter-rules namespace: monitoring labels: release: kube-prometheus-stack spec: groups: - name: zfsexporter.rules rules: - alert: DriveErrorsCriticalAlert expr: zfs_read_errors{device_type="disk"} + zfs_write_errors{device_type="disk"} + zfs_checksum_errors{device_type="disk"} > 1000 annotations: summary: A drive encountered some errors (drive {{ $labels.device_name }}) description: "A drive encountered some errors! drive: {{ $labels.device_name }} in vdev {{ $labels.vdev }} in pool {{ $labels.pool }} \n error count: {{ $value }}" labels: severity: critical - alert: DriveAnyErrorsAlert expr: zfs_read_errors{device_type="disk"} + zfs_write_errors{device_type="disk"} + zfs_checksum_errors{device_type="disk"} > 0 annotations: summary: A drive encountered some errors (drive {{ $labels.device_name }}) description: "A drive encountered some errors! drive: {{ $labels.device_name }} in vdev {{ $labels.vdev }} in pool {{ $labels.pool }} \n error count: {{ $value }}" labels: severity: warning - alert: DriveReadErrorsAlert expr: zfs_read_errors{device_type="disk"} > 0 annotations: summary: A drive encountered a checksum error (drive {{ $labels.device_name }}) description: "A drive encountered a checksum error! drive: {{ $labels.device_name }} in vdev {{ $labels.vdev }} in pool {{ $labels.pool }} \n error count: {{ $value }}" labels: severity: warning - alert: DriveWriteErrorsAlert expr: zfs_write_errors{device_type="disk"} > 0 annotations: summary: A drive encountered a checksum error (drive {{ $labels.device_name }}) description: "A drive encountered a checksum error! drive: {{ $labels.device_name }} in vdev {{ $labels.vdev }} in pool {{ $labels.pool }} \n error count: {{ $value }}" labels: severity: warning - alert: DriveChecksumErrorsAlert expr: zfs_checksum_errors{device_type="disk"} > 0 annotations: summary: A drive encountered a checksum error (drive {{ $labels.device_name }}) description: "A drive encountered a checksum error! drive: {{ $labels.device_name }} in vdev {{ $labels.vdev }} in pool {{ $labels.pool }} \n error count: {{ $value }}" labels: severity: warning