apiVersion: helm.toolkit.fluxcd.io/v2 kind: HelmRelease metadata: name: kube-prometheus-stack namespace: monitoring spec: interval: 5m chart: spec: chart: kube-prometheus-stack version: "62.3.1" sourceRef: kind: HelmRepository name: prometheus-community-charts namespace: flux-system values: namespaceOverride: "monitoring" alertmanager: ingress: enabled: true annotations: cert-manager.io/cluster-issuer: letsencrypt-production traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.middlewares: traefik-authentik@kubernetescrd hosts: - &alertmanager-host alertmanager.${SECRET_NEW_DOMAIN} paths: - "/" tls: - hosts: - *alertmanager-host alertmanagerSpec: alertmanagerConfiguration: name: alertmanager-config grafana: enabled: false # enabled dashboards even with grafana being disabled #forceDeployDashboards: true #defaultDashboardsTimezone: America/New_York prometheus: ingress: enabled: true annotations: cert-manager.io/cluster-issuer: letsencrypt-production traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.middlewares: traefik-authentik@kubernetescrd hosts: - &prometheus-host metrics.${SECRET_NEW_DOMAIN} paths: - "/" tls: - hosts: - *prometheus-host prometheusSpec: enableAdminAPI: false retention: 1d remoteWrite: - url: http://victoria-metrics-server.monitoring.svc:8428/api/v1/write storageSpec: volumeClaimTemplate: spec: storageClassName: mainpool-hostpath resources: requests: storage: 40Gi defaultRule: create: false rules: general: false kubelet: enabled: true serviceMonitor: metricRelabelings: # Remove duplicate metrics - sourceLabels: ["__name__"] regex: "(apiserver_audit|apiserver_client|apiserver_delegated|apiserver_envelope|apiserver_storage|apiserver_webhooks|authentication_token|cadvisor_version|container_blkio|container_cpu|container_fs|container_last|container_memory|container_network|container_oom|container_processes|container|csi_operations|disabled_metric|get_token|go|hidden_metric|kubelet_certificate|kubelet_cgroup|kubelet_container|kubelet_containers|kubelet_cpu|kubelet_device|kubelet_graceful|kubelet_http|kubelet_lifecycle|kubelet_managed|kubelet_node|kubelet_pleg|kubelet_pod|kubelet_run|kubelet_running|kubelet_runtime|kubelet_server|kubelet_started|kubelet_volume|kubernetes_build|kubernetes_feature|machine_cpu|machine_memory|machine_nvm|machine_scrape|node_namespace|plugin_manager|prober_probe|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|registered_metric|rest_client|scrape_duration|scrape_samples|scrape_series|storage_operation|volume_manager|volume_operation|workqueue)_(.+)" action: keep - sourceLabels: ["node"] targetLabel: instance action: replace kubeApiServer: enabled: true serviceMonitor: metricRelabelings: # Remove duplicate metrics - sourceLabels: ["__name__"] regex: "(aggregator_openapi|aggregator_unavailable|apiextensions_openapi|apiserver_admission|apiserver_audit|apiserver_cache|apiserver_cel|apiserver_client|apiserver_crd|apiserver_current|apiserver_envelope|apiserver_flowcontrol|apiserver_init|apiserver_kube|apiserver_longrunning|apiserver_request|apiserver_requested|apiserver_response|apiserver_selfrequest|apiserver_storage|apiserver_terminated|apiserver_tls|apiserver_watch|apiserver_webhooks|authenticated_user|authentication|disabled_metric|etcd_bookmark|etcd_lease|etcd_request|field_validation|get_token|go|grpc_client|hidden_metric|kube_apiserver|kubernetes_build|kubernetes_feature|node_authorizer|pod_security|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|registered_metric|rest_client|scrape_duration|scrape_samples|scrape_series|serviceaccount_legacy|serviceaccount_stale|serviceaccount_valid|watch_cache|workqueue)_(.+)" action: keep # Remove high cardinality metrics - sourceLabels: ["__name__"] regex: (apiserver|etcd|rest_client)_request(|_sli|_slo)_duration_seconds_bucket action: drop - sourceLabels: ["__name__"] regex: (apiserver_response_sizes_bucket|apiserver_watch_events_sizes_bucket) action: drop kubeControllerManager: enabled: true endpoints: - 192.168.10.40 service: enabled: true port: 10257 targetPort: 10257 serviceMonitor: enabled: true https: true insecureSkipVerify: true metricRelabelings: # Remove duplicate metrics - sourceLabels: ["__name__"] regex: "(apiserver_audit|apiserver_client|apiserver_delegated|apiserver_envelope|apiserver_storage|apiserver_webhooks|attachdetach_controller|authenticated_user|authentication|cronjob_controller|disabled_metric|endpoint_slice|ephemeral_volume|garbagecollector_controller|get_token|go|hidden_metric|job_controller|kubernetes_build|kubernetes_feature|leader_election|node_collector|node_ipam|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|pv_collector|registered_metric|replicaset_controller|rest_client|retroactive_storageclass|root_ca|running_managed|scrape_duration|scrape_samples|scrape_series|service_controller|storage_count|storage_operation|ttl_after|volume_operation|workqueue)_(.+)" action: keep kubeScheduler: enabled: true endpoints: - 192.168.10.40 service: enabled: true port: 10259 targetPort: 10259 serviceMonitor: enabled: true https: true insecureSkipVerify: true metricRelabelings: # Remove duplicate metrics - sourceLabels: ["__name__"] regex: "(apiserver_audit|apiserver_client|apiserver_delegated|apiserver_envelope|apiserver_storage|apiserver_webhooks|authenticated_user|authentication|disabled_metric|go|hidden_metric|kubernetes_build|kubernetes_feature|leader_election|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|registered_metric|rest_client|scheduler|scrape_duration|scrape_samples|scrape_series|workqueue)_(.+)" action: keep kubeProxy: enabled: true endpoints: - 192.168.10.40 service: enabled: true port: 10249 targetPort: 10249 serviceMonitor: enabled: true https: false kubeEtcd: enabled: false