feat(longhorn): add alerts and service monitor
This commit is contained in:
parent
c55cbcdefa
commit
52b51c3966
|
@ -129,6 +129,11 @@ spec:
|
|||
cert-manager:
|
||||
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/cert-manager/dashboards/cert-manager.json
|
||||
datasource: Victoria
|
||||
longhorn:
|
||||
# renovate: depName="Longhorn"
|
||||
gnetId: 16888
|
||||
revision: 8
|
||||
datasource: Victoria
|
||||
spegel:
|
||||
# renovate: depName="Spegel"
|
||||
gnetId: 18089
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: prometheus-longhorn-rules
|
||||
namespace: longhorn-system
|
||||
labels:
|
||||
release: kube-prometheus-stack
|
||||
spec:
|
||||
groups:
|
||||
- name: longhorn.rules
|
||||
rules:
|
||||
- alert: LonghornVolumeUsageCritical
|
||||
annotations:
|
||||
description: Longhorn volume {{$labels.volume}} on {{$labels.node}} is at {{$value}}% used for
|
||||
more than 5 minutes.
|
||||
summary: Longhorn volume capacity is over 90% used.
|
||||
expr: 100 * (longhorn_volume_usage_bytes / longhorn_volume_capacity_bytes) > 90
|
||||
for: 5m
|
||||
labels:
|
||||
issue: Longhorn volume {{$labels.volume}} usage on {{$labels.node}} is critical.
|
||||
severity: critical
|
|
@ -4,3 +4,5 @@ resources:
|
|||
- ./namespace.yaml
|
||||
- ./helm-repository.yaml
|
||||
- ./helm-release.yaml
|
||||
- ./alerts.yaml
|
||||
- ./service-monitor.yaml
|
|
@ -0,0 +1,13 @@
|
|||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: longhorn
|
||||
namespace: longhorn-system
|
||||
labels:
|
||||
release: kube-prometheus-stack
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: longhorn-manager
|
||||
endpoints:
|
||||
- port: manager
|
Loading…
Reference in New Issue