feat(longhorn): add alerts and service monitor

This commit is contained in:
SeanOMik 2024-01-23 18:51:53 -05:00
parent c55cbcdefa
commit 52b51c3966
Signed by: SeanOMik
GPG Key ID: FEC9E2FC15235964
4 changed files with 42 additions and 1 deletions

View File

@ -129,6 +129,11 @@ spec:
cert-manager:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/cert-manager/dashboards/cert-manager.json
datasource: Victoria
longhorn:
# renovate: depName="Longhorn"
gnetId: 16888
revision: 8
datasource: Victoria
spegel:
# renovate: depName="Spegel"
gnetId: 18089

View File

@ -0,0 +1,21 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: prometheus-longhorn-rules
namespace: longhorn-system
labels:
release: kube-prometheus-stack
spec:
groups:
- name: longhorn.rules
rules:
- alert: LonghornVolumeUsageCritical
annotations:
description: Longhorn volume {{$labels.volume}} on {{$labels.node}} is at {{$value}}% used for
more than 5 minutes.
summary: Longhorn volume capacity is over 90% used.
expr: 100 * (longhorn_volume_usage_bytes / longhorn_volume_capacity_bytes) > 90
for: 5m
labels:
issue: Longhorn volume {{$labels.volume}} usage on {{$labels.node}} is critical.
severity: critical

View File

@ -4,3 +4,5 @@ resources:
- ./namespace.yaml
- ./helm-repository.yaml
- ./helm-release.yaml
- ./alerts.yaml
- ./service-monitor.yaml

View File

@ -0,0 +1,13 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: longhorn
namespace: longhorn-system
labels:
release: kube-prometheus-stack
spec:
selector:
matchLabels:
app: longhorn-manager
endpoints:
- port: manager