diff --git a/cluster/apps/monitoring/flux/kustomization.yaml b/cluster/apps/monitoring/flux/kustomization.yaml new file mode 100644 index 00000000..03d903fb --- /dev/null +++ b/cluster/apps/monitoring/flux/kustomization.yaml @@ -0,0 +1,5 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- ./podmonitor.yaml +- ./rules.yaml \ No newline at end of file diff --git a/cluster/apps/monitoring/flux/podmonitor.yaml b/cluster/apps/monitoring/flux/podmonitor.yaml new file mode 100644 index 00000000..489b4e35 --- /dev/null +++ b/cluster/apps/monitoring/flux/podmonitor.yaml @@ -0,0 +1,32 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/podmonitor_v1.json +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: flux-system + namespace: flux-system + labels: + app.kubernetes.io/part-of: flux + app.kubernetes.io/component: monitoring +spec: + namespaceSelector: + matchNames: + - flux-system + selector: + matchExpressions: + - key: app + operator: In + values: + - helm-controller + - source-controller + - kustomize-controller + - notification-controller + - image-automation-controller + - image-reflector-controller + podMetricsEndpoints: + - port: http-prom + relabelings: + # Ref: https://github.com/prometheus-operator/prometheus-operator/issues/4816 + - sourceLabels: [__meta_kubernetes_pod_phase] + action: keep + regex: Running \ No newline at end of file diff --git a/cluster/apps/monitoring/flux/rules.yaml b/cluster/apps/monitoring/flux/rules.yaml new file mode 100644 index 00000000..8f106767 --- /dev/null +++ b/cluster/apps/monitoring/flux/rules.yaml @@ -0,0 +1,32 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/prometheusrule_v1.json +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: flux-rules + namespace: monitoring +spec: + groups: + - name: flux.rules + rules: + - alert: FluxComponentAbsent + annotations: + summary: Flux component has disappeared from Prometheus target discovery. + expr: | + absent(up{job=~".*flux-system.*"} == 1) + for: 15m + labels: + severity: critical + - alert: FluxReconciliationFailure + annotations: + summary: >- + {{ $labels.kind }} {{ $labels.namespace }}/{{ $labels.name }} reconciliation + has been failing for more than 15 minutes. + expr: | + max(gotk_reconcile_condition{status="False",type="Ready"}) by (namespace, name, kind) + + + on(namespace, name, kind) (max(gotk_reconcile_condition{status="Deleted"}) + by (namespace, name, kind)) * 2 == 1 + for: 15m + labels: + severity: critical \ No newline at end of file diff --git a/cluster/apps/monitoring/kustomization.yaml b/cluster/apps/monitoring/kustomization.yaml index 7464cad6..ee9537bd 100644 --- a/cluster/apps/monitoring/kustomization.yaml +++ b/cluster/apps/monitoring/kustomization.yaml @@ -10,4 +10,5 @@ resources: - ./victoria-metrics - ./varken - ./proxmoxve-exporter -- ./external-monitors \ No newline at end of file +- ./external-monitors +- ./flux \ No newline at end of file