apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
  name: kube-prometheus-stack
  namespace: monitoring
spec:
  interval: 5m
  chart:
    spec:
      chart: kube-prometheus-stack
      version: "66.6.0"
      sourceRef:
        kind: HelmRepository
        name: prometheus-community-charts
        namespace: flux-system

  values:
    namespaceOverride: "monitoring"

    alertmanager:
      ingress:
        enabled: true
        annotations:
          cert-manager.io/cluster-issuer: letsencrypt-production
          traefik.ingress.kubernetes.io/router.entrypoints: websecure
          traefik.ingress.kubernetes.io/router.middlewares: traefik-authentik@kubernetescrd
        hosts:
          - &alertmanager-host alertmanager.${SECRET_NEW_DOMAIN}
        paths:
          - "/"
        tls:
          - hosts:
              - *alertmanager-host

      alertmanagerSpec:
        alertmanagerConfiguration:
          name: alertmanager-config
                
    grafana:
      enabled: false
      # enabled dashboards even with grafana being disabled
      #forceDeployDashboards: true
      #defaultDashboardsTimezone: America/New_York

    prometheus:
      ingress:
        enabled: true
        annotations:
          cert-manager.io/cluster-issuer: letsencrypt-production
          traefik.ingress.kubernetes.io/router.entrypoints: websecure
          traefik.ingress.kubernetes.io/router.middlewares: traefik-authentik@kubernetescrd
        hosts:
          - &prometheus-host metrics.${SECRET_NEW_DOMAIN}
        paths:
          - "/"
        tls:
          - hosts:
              - *prometheus-host

      prometheusSpec:
        enableAdminAPI: false
        retention: 1d
        remoteWrite:
        - url: http://victoria-metrics-server.monitoring.svc:8428/api/v1/write
        
        # select everything
        podMonitorSelectorNilUsesHelmValues: false
        podMonitorSelector: {}
        serviceMonitorSelectorNilUsesHelmValues: false
        serviceMonitorSelector: {}
        ruleSelectorNilUsesHelmValues: false
        ruleSelector: {}
        probeSelectorNilUsesHelmValues: false
        probeSelector: {}
        scrapeConfigSelectorNilUsesHelmValues: false
        scrapeConfigSelector: {}

        storageSpec:
          volumeClaimTemplate:
            spec:
              storageClassName: mainpool-hostpath
              resources:
                requests:
                  storage: 40Gi

    defaultRule:
      create: false
      rules:
        general: false

    kubelet:
      enabled: true
      serviceMonitor:
        metricRelabelings:
          # Remove duplicate metrics
          - sourceLabels: ["__name__"]
            regex: "(apiserver_audit|apiserver_client|apiserver_delegated|apiserver_envelope|apiserver_storage|apiserver_webhooks|authentication_token|cadvisor_version|container_blkio|container_cpu|container_fs|container_last|container_memory|container_network|container_oom|container_processes|container|csi_operations|disabled_metric|get_token|go|hidden_metric|kubelet_certificate|kubelet_cgroup|kubelet_container|kubelet_containers|kubelet_cpu|kubelet_device|kubelet_graceful|kubelet_http|kubelet_lifecycle|kubelet_managed|kubelet_node|kubelet_pleg|kubelet_pod|kubelet_run|kubelet_running|kubelet_runtime|kubelet_server|kubelet_started|kubelet_volume|kubernetes_build|kubernetes_feature|machine_cpu|machine_memory|machine_nvm|machine_scrape|node_namespace|plugin_manager|prober_probe|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|registered_metric|rest_client|scrape_duration|scrape_samples|scrape_series|storage_operation|volume_manager|volume_operation|workqueue)_(.+)"
            action: keep
          - sourceLabels: ["node"]
            targetLabel: instance
            action: replace

    kubeApiServer:
      enabled: true
      serviceMonitor:
        metricRelabelings:
          # Remove duplicate metrics
          - sourceLabels: ["__name__"]
            regex: "(aggregator_openapi|aggregator_unavailable|apiextensions_openapi|apiserver_admission|apiserver_audit|apiserver_cache|apiserver_cel|apiserver_client|apiserver_crd|apiserver_current|apiserver_envelope|apiserver_flowcontrol|apiserver_init|apiserver_kube|apiserver_longrunning|apiserver_request|apiserver_requested|apiserver_response|apiserver_selfrequest|apiserver_storage|apiserver_terminated|apiserver_tls|apiserver_watch|apiserver_webhooks|authenticated_user|authentication|disabled_metric|etcd_bookmark|etcd_lease|etcd_request|field_validation|get_token|go|grpc_client|hidden_metric|kube_apiserver|kubernetes_build|kubernetes_feature|node_authorizer|pod_security|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|registered_metric|rest_client|scrape_duration|scrape_samples|scrape_series|serviceaccount_legacy|serviceaccount_stale|serviceaccount_valid|watch_cache|workqueue)_(.+)"
            action: keep
          # Remove high cardinality metrics
          - sourceLabels: ["__name__"]
            regex: (apiserver|etcd|rest_client)_request(|_sli|_slo)_duration_seconds_bucket
            action: drop
          - sourceLabels: ["__name__"]
            regex: (apiserver_response_sizes_bucket|apiserver_watch_events_sizes_bucket)
            action: drop

    kubeControllerManager:
      enabled: true
      endpoints:
      - 192.168.10.40
      service:
        enabled: true
        port: 10257
        targetPort: 10257
      serviceMonitor:
        enabled: true
        https: true
        insecureSkipVerify: true
        metricRelabelings:
          # Remove duplicate metrics
          - sourceLabels: ["__name__"]
            regex: "(apiserver_audit|apiserver_client|apiserver_delegated|apiserver_envelope|apiserver_storage|apiserver_webhooks|attachdetach_controller|authenticated_user|authentication|cronjob_controller|disabled_metric|endpoint_slice|ephemeral_volume|garbagecollector_controller|get_token|go|hidden_metric|job_controller|kubernetes_build|kubernetes_feature|leader_election|node_collector|node_ipam|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|pv_collector|registered_metric|replicaset_controller|rest_client|retroactive_storageclass|root_ca|running_managed|scrape_duration|scrape_samples|scrape_series|service_controller|storage_count|storage_operation|ttl_after|volume_operation|workqueue)_(.+)"
            action: keep

    kubeScheduler:
      enabled: true
      endpoints:
      - 192.168.10.40
      service:
        enabled: true
        port: 10259
        targetPort: 10259
      serviceMonitor:
        enabled: true
        https: true
        insecureSkipVerify: true
        metricRelabelings:
          # Remove duplicate metrics
          - sourceLabels: ["__name__"]
            regex: "(apiserver_audit|apiserver_client|apiserver_delegated|apiserver_envelope|apiserver_storage|apiserver_webhooks|authenticated_user|authentication|disabled_metric|go|hidden_metric|kubernetes_build|kubernetes_feature|leader_election|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|registered_metric|rest_client|scheduler|scrape_duration|scrape_samples|scrape_series|workqueue)_(.+)"
            action: keep

    kubeProxy:
      enabled: true
      endpoints:
      - 192.168.10.40
      service:
        enabled: true
        port: 10249
        targetPort: 10249
      serviceMonitor:
        enabled: true
        https: false

    kubeEtcd:
      enabled: false