k3s-cluster/cluster/apps/monitoring/kube-prometheus-stack/helm-release.yaml

159 lines
6.9 KiB
YAML

apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: kube-prometheus-stack
namespace: monitoring
spec:
interval: 5m
chart:
spec:
chart: kube-prometheus-stack
version: "60.2.0"
sourceRef:
kind: HelmRepository
name: prometheus-community-charts
namespace: flux-system
values:
namespaceOverride: "monitoring"
alertmanager:
ingress:
enabled: true
annotations:
cert-manager.io/cluster-issuer: letsencrypt-production
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.middlewares: traefik-authentik@kubernetescrd
hosts:
- &alertmanager-host alertmanager.${SECRET_NEW_DOMAIN}
paths:
- "/"
tls:
- hosts:
- *alertmanager-host
alertmanagerSpec:
alertmanagerConfiguration:
name: alertmanager-config
grafana:
enabled: false
# enabled dashboards even with grafana being disabled
#forceDeployDashboards: true
#defaultDashboardsTimezone: America/New_York
prometheus:
ingress:
enabled: true
annotations:
cert-manager.io/cluster-issuer: letsencrypt-production
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.middlewares: traefik-authentik@kubernetescrd
hosts:
- &prometheus-host metrics.${SECRET_NEW_DOMAIN}
paths:
- "/"
tls:
- hosts:
- *prometheus-host
prometheusSpec:
enableAdminAPI: false
retention: 1d
remoteWrite:
- url: http://victoria-metrics-server.monitoring.svc:8428/api/v1/write
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: mainpool-hostpath
resources:
requests:
storage: 40Gi
defaultRule:
create: false
rules:
general: false
kubelet:
enabled: true
serviceMonitor:
metricRelabelings:
# Remove duplicate metrics
- sourceLabels: ["__name__"]
regex: "(apiserver_audit|apiserver_client|apiserver_delegated|apiserver_envelope|apiserver_storage|apiserver_webhooks|authentication_token|cadvisor_version|container_blkio|container_cpu|container_fs|container_last|container_memory|container_network|container_oom|container_processes|container|csi_operations|disabled_metric|get_token|go|hidden_metric|kubelet_certificate|kubelet_cgroup|kubelet_container|kubelet_containers|kubelet_cpu|kubelet_device|kubelet_graceful|kubelet_http|kubelet_lifecycle|kubelet_managed|kubelet_node|kubelet_pleg|kubelet_pod|kubelet_run|kubelet_running|kubelet_runtime|kubelet_server|kubelet_started|kubelet_volume|kubernetes_build|kubernetes_feature|machine_cpu|machine_memory|machine_nvm|machine_scrape|node_namespace|plugin_manager|prober_probe|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|registered_metric|rest_client|scrape_duration|scrape_samples|scrape_series|storage_operation|volume_manager|volume_operation|workqueue)_(.+)"
action: keep
- sourceLabels: ["node"]
targetLabel: instance
action: replace
kubeApiServer:
enabled: true
serviceMonitor:
metricRelabelings:
# Remove duplicate metrics
- sourceLabels: ["__name__"]
regex: "(aggregator_openapi|aggregator_unavailable|apiextensions_openapi|apiserver_admission|apiserver_audit|apiserver_cache|apiserver_cel|apiserver_client|apiserver_crd|apiserver_current|apiserver_envelope|apiserver_flowcontrol|apiserver_init|apiserver_kube|apiserver_longrunning|apiserver_request|apiserver_requested|apiserver_response|apiserver_selfrequest|apiserver_storage|apiserver_terminated|apiserver_tls|apiserver_watch|apiserver_webhooks|authenticated_user|authentication|disabled_metric|etcd_bookmark|etcd_lease|etcd_request|field_validation|get_token|go|grpc_client|hidden_metric|kube_apiserver|kubernetes_build|kubernetes_feature|node_authorizer|pod_security|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|registered_metric|rest_client|scrape_duration|scrape_samples|scrape_series|serviceaccount_legacy|serviceaccount_stale|serviceaccount_valid|watch_cache|workqueue)_(.+)"
action: keep
# Remove high cardinality metrics
- sourceLabels: ["__name__"]
regex: (apiserver|etcd|rest_client)_request(|_sli|_slo)_duration_seconds_bucket
action: drop
- sourceLabels: ["__name__"]
regex: (apiserver_response_sizes_bucket|apiserver_watch_events_sizes_bucket)
action: drop
kubeControllerManager:
enabled: true
endpoints:
- 192.168.10.40
service:
enabled: true
port: 10257
targetPort: 10257
serviceMonitor:
enabled: true
https: true
insecureSkipVerify: true
metricRelabelings:
# Remove duplicate metrics
- sourceLabels: ["__name__"]
regex: "(apiserver_audit|apiserver_client|apiserver_delegated|apiserver_envelope|apiserver_storage|apiserver_webhooks|attachdetach_controller|authenticated_user|authentication|cronjob_controller|disabled_metric|endpoint_slice|ephemeral_volume|garbagecollector_controller|get_token|go|hidden_metric|job_controller|kubernetes_build|kubernetes_feature|leader_election|node_collector|node_ipam|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|pv_collector|registered_metric|replicaset_controller|rest_client|retroactive_storageclass|root_ca|running_managed|scrape_duration|scrape_samples|scrape_series|service_controller|storage_count|storage_operation|ttl_after|volume_operation|workqueue)_(.+)"
action: keep
kubeScheduler:
enabled: true
endpoints:
- 192.168.10.40
service:
enabled: true
port: 10259
targetPort: 10259
serviceMonitor:
enabled: true
https: true
insecureSkipVerify: true
metricRelabelings:
# Remove duplicate metrics
- sourceLabels: ["__name__"]
regex: "(apiserver_audit|apiserver_client|apiserver_delegated|apiserver_envelope|apiserver_storage|apiserver_webhooks|authenticated_user|authentication|disabled_metric|go|hidden_metric|kubernetes_build|kubernetes_feature|leader_election|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|registered_metric|rest_client|scheduler|scrape_duration|scrape_samples|scrape_series|workqueue)_(.+)"
action: keep
kubeProxy:
enabled: true
endpoints:
- 192.168.10.40
service:
enabled: true
port: 10249
targetPort: 10249
serviceMonitor:
enabled: true
https: false
kubeEtcd:
enabled: false