feat: add alerts for flux
This commit is contained in:
parent
0294c45903
commit
f9132bcc01
|
@ -0,0 +1,5 @@
|
||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
resources:
|
||||||
|
- ./podmonitor.yaml
|
||||||
|
- ./rules.yaml
|
|
@ -0,0 +1,32 @@
|
||||||
|
---
|
||||||
|
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/podmonitor_v1.json
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PodMonitor
|
||||||
|
metadata:
|
||||||
|
name: flux-system
|
||||||
|
namespace: flux-system
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: flux
|
||||||
|
app.kubernetes.io/component: monitoring
|
||||||
|
spec:
|
||||||
|
namespaceSelector:
|
||||||
|
matchNames:
|
||||||
|
- flux-system
|
||||||
|
selector:
|
||||||
|
matchExpressions:
|
||||||
|
- key: app
|
||||||
|
operator: In
|
||||||
|
values:
|
||||||
|
- helm-controller
|
||||||
|
- source-controller
|
||||||
|
- kustomize-controller
|
||||||
|
- notification-controller
|
||||||
|
- image-automation-controller
|
||||||
|
- image-reflector-controller
|
||||||
|
podMetricsEndpoints:
|
||||||
|
- port: http-prom
|
||||||
|
relabelings:
|
||||||
|
# Ref: https://github.com/prometheus-operator/prometheus-operator/issues/4816
|
||||||
|
- sourceLabels: [__meta_kubernetes_pod_phase]
|
||||||
|
action: keep
|
||||||
|
regex: Running
|
|
@ -0,0 +1,32 @@
|
||||||
|
---
|
||||||
|
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/prometheusrule_v1.json
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PrometheusRule
|
||||||
|
metadata:
|
||||||
|
name: flux-rules
|
||||||
|
namespace: monitoring
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: flux.rules
|
||||||
|
rules:
|
||||||
|
- alert: FluxComponentAbsent
|
||||||
|
annotations:
|
||||||
|
summary: Flux component has disappeared from Prometheus target discovery.
|
||||||
|
expr: |
|
||||||
|
absent(up{job=~".*flux-system.*"} == 1)
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: FluxReconciliationFailure
|
||||||
|
annotations:
|
||||||
|
summary: >-
|
||||||
|
{{ $labels.kind }} {{ $labels.namespace }}/{{ $labels.name }} reconciliation
|
||||||
|
has been failing for more than 15 minutes.
|
||||||
|
expr: |
|
||||||
|
max(gotk_reconcile_condition{status="False",type="Ready"}) by (namespace, name, kind)
|
||||||
|
+
|
||||||
|
on(namespace, name, kind) (max(gotk_reconcile_condition{status="Deleted"})
|
||||||
|
by (namespace, name, kind)) * 2 == 1
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
|
@ -11,3 +11,4 @@ resources:
|
||||||
- ./varken
|
- ./varken
|
||||||
- ./proxmoxve-exporter
|
- ./proxmoxve-exporter
|
||||||
- ./external-monitors
|
- ./external-monitors
|
||||||
|
- ./flux
|
Loading…
Reference in New Issue