159 lines
6.9 KiB
YAML
159 lines
6.9 KiB
YAML
apiVersion: helm.toolkit.fluxcd.io/v2
|
|
kind: HelmRelease
|
|
metadata:
|
|
name: kube-prometheus-stack
|
|
namespace: monitoring
|
|
spec:
|
|
interval: 5m
|
|
chart:
|
|
spec:
|
|
chart: kube-prometheus-stack
|
|
version: "58.6.0"
|
|
sourceRef:
|
|
kind: HelmRepository
|
|
name: prometheus-community-charts
|
|
namespace: flux-system
|
|
|
|
values:
|
|
namespaceOverride: "monitoring"
|
|
|
|
alertmanager:
|
|
ingress:
|
|
enabled: true
|
|
annotations:
|
|
cert-manager.io/cluster-issuer: letsencrypt-production
|
|
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
|
traefik.ingress.kubernetes.io/router.middlewares: traefik-authentik@kubernetescrd
|
|
hosts:
|
|
- &alertmanager-host alertmanager.${SECRET_NEW_DOMAIN}
|
|
paths:
|
|
- "/"
|
|
tls:
|
|
- hosts:
|
|
- *alertmanager-host
|
|
|
|
alertmanagerSpec:
|
|
alertmanagerConfiguration:
|
|
name: alertmanager-config
|
|
|
|
grafana:
|
|
enabled: false
|
|
# enabled dashboards even with grafana being disabled
|
|
#forceDeployDashboards: true
|
|
#defaultDashboardsTimezone: America/New_York
|
|
|
|
prometheus:
|
|
ingress:
|
|
enabled: true
|
|
annotations:
|
|
cert-manager.io/cluster-issuer: letsencrypt-production
|
|
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
|
traefik.ingress.kubernetes.io/router.middlewares: traefik-authentik@kubernetescrd
|
|
hosts:
|
|
- &prometheus-host metrics.${SECRET_NEW_DOMAIN}
|
|
paths:
|
|
- "/"
|
|
tls:
|
|
- hosts:
|
|
- *prometheus-host
|
|
|
|
prometheusSpec:
|
|
enableAdminAPI: false
|
|
|
|
retention: 1d
|
|
|
|
remoteWrite:
|
|
- url: http://victoria-metrics-server.monitoring.svc:8428/api/v1/write
|
|
|
|
storageSpec:
|
|
volumeClaimTemplate:
|
|
spec:
|
|
storageClassName: openebs-hostpath
|
|
resources:
|
|
requests:
|
|
storage: 40Gi
|
|
|
|
defaultRule:
|
|
create: true
|
|
rules:
|
|
general: false
|
|
|
|
kubelet:
|
|
enabled: true
|
|
serviceMonitor:
|
|
metricRelabelings:
|
|
# Remove duplicate metrics
|
|
- sourceLabels: ["__name__"]
|
|
regex: "(apiserver_audit|apiserver_client|apiserver_delegated|apiserver_envelope|apiserver_storage|apiserver_webhooks|authentication_token|cadvisor_version|container_blkio|container_cpu|container_fs|container_last|container_memory|container_network|container_oom|container_processes|container|csi_operations|disabled_metric|get_token|go|hidden_metric|kubelet_certificate|kubelet_cgroup|kubelet_container|kubelet_containers|kubelet_cpu|kubelet_device|kubelet_graceful|kubelet_http|kubelet_lifecycle|kubelet_managed|kubelet_node|kubelet_pleg|kubelet_pod|kubelet_run|kubelet_running|kubelet_runtime|kubelet_server|kubelet_started|kubelet_volume|kubernetes_build|kubernetes_feature|machine_cpu|machine_memory|machine_nvm|machine_scrape|node_namespace|plugin_manager|prober_probe|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|registered_metric|rest_client|scrape_duration|scrape_samples|scrape_series|storage_operation|volume_manager|volume_operation|workqueue)_(.+)"
|
|
action: keep
|
|
- sourceLabels: ["node"]
|
|
targetLabel: instance
|
|
action: replace
|
|
|
|
kubeApiServer:
|
|
enabled: true
|
|
serviceMonitor:
|
|
metricRelabelings:
|
|
# Remove duplicate metrics
|
|
- sourceLabels: ["__name__"]
|
|
regex: "(aggregator_openapi|aggregator_unavailable|apiextensions_openapi|apiserver_admission|apiserver_audit|apiserver_cache|apiserver_cel|apiserver_client|apiserver_crd|apiserver_current|apiserver_envelope|apiserver_flowcontrol|apiserver_init|apiserver_kube|apiserver_longrunning|apiserver_request|apiserver_requested|apiserver_response|apiserver_selfrequest|apiserver_storage|apiserver_terminated|apiserver_tls|apiserver_watch|apiserver_webhooks|authenticated_user|authentication|disabled_metric|etcd_bookmark|etcd_lease|etcd_request|field_validation|get_token|go|grpc_client|hidden_metric|kube_apiserver|kubernetes_build|kubernetes_feature|node_authorizer|pod_security|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|registered_metric|rest_client|scrape_duration|scrape_samples|scrape_series|serviceaccount_legacy|serviceaccount_stale|serviceaccount_valid|watch_cache|workqueue)_(.+)"
|
|
action: keep
|
|
# Remove high cardinality metrics
|
|
- sourceLabels: ["__name__"]
|
|
regex: (apiserver|etcd|rest_client)_request(|_sli|_slo)_duration_seconds_bucket
|
|
action: drop
|
|
- sourceLabels: ["__name__"]
|
|
regex: (apiserver_response_sizes_bucket|apiserver_watch_events_sizes_bucket)
|
|
action: drop
|
|
|
|
kubeControllerManager:
|
|
enabled: true
|
|
endpoints:
|
|
- 192.168.10.40
|
|
service:
|
|
enabled: true
|
|
port: 10257
|
|
targetPort: 10257
|
|
serviceMonitor:
|
|
enabled: true
|
|
https: true
|
|
insecureSkipVerify: true
|
|
metricRelabelings:
|
|
# Remove duplicate metrics
|
|
- sourceLabels: ["__name__"]
|
|
regex: "(apiserver_audit|apiserver_client|apiserver_delegated|apiserver_envelope|apiserver_storage|apiserver_webhooks|attachdetach_controller|authenticated_user|authentication|cronjob_controller|disabled_metric|endpoint_slice|ephemeral_volume|garbagecollector_controller|get_token|go|hidden_metric|job_controller|kubernetes_build|kubernetes_feature|leader_election|node_collector|node_ipam|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|pv_collector|registered_metric|replicaset_controller|rest_client|retroactive_storageclass|root_ca|running_managed|scrape_duration|scrape_samples|scrape_series|service_controller|storage_count|storage_operation|ttl_after|volume_operation|workqueue)_(.+)"
|
|
action: keep
|
|
|
|
kubeScheduler:
|
|
enabled: true
|
|
endpoints:
|
|
- 192.168.10.40
|
|
service:
|
|
enabled: true
|
|
port: 10259
|
|
targetPort: 10259
|
|
serviceMonitor:
|
|
enabled: true
|
|
https: true
|
|
insecureSkipVerify: true
|
|
metricRelabelings:
|
|
# Remove duplicate metrics
|
|
- sourceLabels: ["__name__"]
|
|
regex: "(apiserver_audit|apiserver_client|apiserver_delegated|apiserver_envelope|apiserver_storage|apiserver_webhooks|authenticated_user|authentication|disabled_metric|go|hidden_metric|kubernetes_build|kubernetes_feature|leader_election|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|registered_metric|rest_client|scheduler|scrape_duration|scrape_samples|scrape_series|workqueue)_(.+)"
|
|
action: keep
|
|
|
|
kubeProxy:
|
|
enabled: true
|
|
endpoints:
|
|
- 192.168.10.40
|
|
service:
|
|
enabled: true
|
|
port: 10249
|
|
targetPort: 10249
|
|
serviceMonitor:
|
|
enabled: true
|
|
https: false
|
|
|
|
kubeEtcd:
|
|
enabled: false
|