Compare commits

...

2 Commits

5 changed files with 48 additions and 2 deletions

View File

@ -129,6 +129,11 @@ spec:
cert-manager:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/cert-manager/dashboards/cert-manager.json
datasource: Victoria
longhorn:
# renovate: depName="Longhorn"
gnetId: 16888
revision: 8
datasource: Victoria
spegel:
# renovate: depName="Spegel"
gnetId: 18089

View File

@ -25,4 +25,9 @@ spec:
podDnsConfig:
nameservers:
- "1.1.1.1"
- "9.9.9.9"
- "9.9.9.9"
prometheus:
servicemonitor:
enabled: true
labels:
release: kube-prometheus-stack

View File

@ -0,0 +1,21 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: prometheus-longhorn-rules
namespace: longhorn-system
labels:
release: kube-prometheus-stack
spec:
groups:
- name: longhorn.rules
rules:
- alert: LonghornVolumeUsageCritical
annotations:
description: Longhorn volume {{$labels.volume}} on {{$labels.node}} is at {{$value}}% used for
more than 5 minutes.
summary: Longhorn volume capacity is over 90% used.
expr: 100 * (longhorn_volume_usage_bytes / longhorn_volume_capacity_bytes) > 90
for: 5m
labels:
issue: Longhorn volume {{$labels.volume}} usage on {{$labels.node}} is critical.
severity: critical

View File

@ -3,4 +3,6 @@ kind: Kustomization
resources:
- ./namespace.yaml
- ./helm-repository.yaml
- ./helm-release.yaml
- ./helm-release.yaml
- ./alerts.yaml
- ./service-monitor.yaml

View File

@ -0,0 +1,13 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: longhorn
namespace: longhorn-system
labels:
release: kube-prometheus-stack
spec:
selector:
matchLabels:
app: longhorn-manager
endpoints:
- port: manager