feat: add alerts for flux

This commit is contained in:
SeanOMik 2024-05-22 20:44:27 -04:00
parent 0294c45903
commit f9132bcc01
Signed by: SeanOMik
GPG Key ID: FEC9E2FC15235964
4 changed files with 71 additions and 1 deletions

View File

@ -0,0 +1,5 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./podmonitor.yaml
- ./rules.yaml

View File

@ -0,0 +1,32 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/podmonitor_v1.json
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: flux-system
namespace: flux-system
labels:
app.kubernetes.io/part-of: flux
app.kubernetes.io/component: monitoring
spec:
namespaceSelector:
matchNames:
- flux-system
selector:
matchExpressions:
- key: app
operator: In
values:
- helm-controller
- source-controller
- kustomize-controller
- notification-controller
- image-automation-controller
- image-reflector-controller
podMetricsEndpoints:
- port: http-prom
relabelings:
# Ref: https://github.com/prometheus-operator/prometheus-operator/issues/4816
- sourceLabels: [__meta_kubernetes_pod_phase]
action: keep
regex: Running

View File

@ -0,0 +1,32 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/prometheusrule_v1.json
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: flux-rules
namespace: monitoring
spec:
groups:
- name: flux.rules
rules:
- alert: FluxComponentAbsent
annotations:
summary: Flux component has disappeared from Prometheus target discovery.
expr: |
absent(up{job=~".*flux-system.*"} == 1)
for: 15m
labels:
severity: critical
- alert: FluxReconciliationFailure
annotations:
summary: >-
{{ $labels.kind }} {{ $labels.namespace }}/{{ $labels.name }} reconciliation
has been failing for more than 15 minutes.
expr: |
max(gotk_reconcile_condition{status="False",type="Ready"}) by (namespace, name, kind)
+
on(namespace, name, kind) (max(gotk_reconcile_condition{status="Deleted"})
by (namespace, name, kind)) * 2 == 1
for: 15m
labels:
severity: critical

View File

@ -11,3 +11,4 @@ resources:
- ./varken
- ./proxmoxve-exporter
- ./external-monitors
- ./flux