feat: add grafana and kube-prometheus-stack for kubernetes and node exporters

This commit is contained in:
SeanOMik 2024-09-27 16:00:51 -04:00
parent a64e2fed75
commit bb16a89a25
Signed by: SeanOMik
GPG Key ID: FEC9E2FC15235964
10 changed files with 393 additions and 65 deletions

View File

@ -1,54 +0,0 @@
apiVersion: cilium.io/v2alpha1
kind: CiliumBGPClusterConfig
metadata:
name: cilium-bgp
spec:
nodeSelector:
matchExpressions:
- key: node-role.kubernetes.io/control-plane
operator: DoesNotExist
bgpInstances:
- name: "bgp-public"
localASN: 65552
peers:
- name: "router"
peerASN: 65551
peerAddress: 192.168.1.1
peerConfigRef:
name: "cilium-peer-router"
---
apiVersion: cilium.io/v2alpha1
kind: CiliumBGPPeerConfig
metadata:
name: cilium-peer-router
spec:
timers:
holdTimeSeconds: 9
keepAliveTimeSeconds: 3
gracefulRestart:
enabled: true
restartTimeSeconds: 15
families:
- afi: ipv4
safi: unicast
advertisements:
matchLabels:
advertise: "bgp-public"
---
apiVersion: cilium.io/v2alpha1
kind: CiliumBGPAdvertisement
metadata:
name: bgp-public-ad
labels:
advertise: "bgp-public"
spec:
advertisements:
- advertisementType: "Service"
service:
addresses:
#- ClusterIP
- ExternalIP
- LoadBalancerIP
selector:
matchLabels:
bgp/service-type: public

View File

@ -1,8 +0,0 @@
apiVersion: "cilium.io/v2alpha1"
kind: CiliumLoadBalancerIPPool
metadata:
name: "main-pool"
spec:
blocks:
- start: "192.168.2.50"
stop: "192.168.2.59"

View File

@ -0,0 +1,168 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: grafana
namespace: monitoring
spec:
interval: 5m
chart:
spec:
chart: grafana
version: "8.5.1"
sourceRef:
kind: HelmRepository
name: grafana
namespace: flux-system
values:
ingress:
enabled: true
annotations:
cert-manager.io/cluster-issuer: letsencrypt-production
traefik.ingress.kubernetes.io/router.entrypoints: websecure
className: internal
hosts:
- &grafana-host grafana.internal.${SECRET_NEW_DOMAIN}
path: "/"
tls:
- hosts:
- *grafana-host
grafana.ini:
server:
root_url: https://grafana.internal.${SECRET_NEW_DOMAIN}
auth:
disable_login_form: true
oauth_auto_login: true
auth.generic_oauth:
enabled: true
allow_sign_up: true # creates new users after authentik login
auto_login: true
name: Authentik
client_id: $__file{/etc/secrets/auth_generic_oauth/client_id}
client_secret: $__file{/etc/secrets/auth_generic_oauth/client_secret}
scopes: openid profile email offline_access
auth_url: https://auth.${SECRET_BASE_DOMAIN}/application/o/authorize/
token_url: https://auth.${SECRET_BASE_DOMAIN}/application/o/token/
api_url: https://auth.${SECRET_BASE_DOMAIN}/application/o/userinfo/
role_attribute_path: contains(groups[*], 'authentik Admins') && 'GrafanaAdmin' #|| contains(info.groups[*], 'editor') && 'Editor' || 'Viewer'
groups_attribute_path: groups
name_attribute_path: preferred_username
login_attribute_path: email
allow_assign_grafana_admin: true
use_pkce: true
use_refresh_token: true
# Provide oauth creds
extraSecretMounts:
- name: grafana-secrets-mount
secretName: grafana-oauth
defaultMode: 0440
mountPath: /etc/secrets/auth_generic_oauth
readOnly: true
# Add Victoria Metrics as the default datasource
datasources:
victoria.yaml:
apiVersion: 1
datasources:
- name: Victoria
type: prometheus
jsonData:
tlsSkipVerify: true
url: http://vmsingle-primary.monitoring.svc:8429
editable: false
isDefault: true
# datasources:
# - name: Victoria
# uid: victoria-metrics-server
# type: prometheus
# jsonData:
# tlsSkipVerify: "true"
# editable: false"
# url: http://victoria-metrics-server.monitoring.svc:8428
# version: "1"
# isDefault: "true"
sidecar:
dashboards:
enabled: true
label: grafana_dashboard
labelValue: "1"
folderAnnotation: grafana_folder
provider:
foldersFromFilesStructure: true
serviceMonitor:
enabled: true
dashboardProviders:
dashboardproviders.yaml:
apiVersion: 1
providers:
- name: default
orgId: 1
folder: ""
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/default
- name: kubernetes
orgId: 1
folder: Kubernetes
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/kubernetes
dashboards:
default:
node-exporter-full:
# renovate: depName="Node Exporter Full"
gnetId: 1860
revision: 33
datasource: Victoria
cert-manager:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/cert-manager/dashboards/cert-manager.json
datasource: Victoria
# minio:
# # renovate: depName="MinIO Dashboard"
# gnetId: 13502
# revision: 24
# datasource:
# - { name: DS_PROMETHEUS, value: Victoria }
kubernetes:
kubernetes-api-server:
# renovate: depName="Kubernetes / System / API Server"
gnetId: 15761
revision: 16
datasource: Victoria
kubernetes-coredns:
# renovate: depName="Kubernetes / System / CoreDNS"
gnetId: 15762
revision: 17
datasource: Victoria
kubernetes-global:
# renovate: depName="Kubernetes / Views / Global"
gnetId: 15757
revision: 36
datasource: Victoria
kubernetes-namespaces:
# renovate: depName="Kubernetes / Views / Namespaces"
gnetId: 15758
revision: 32
datasource: Victoria
kubernetes-nodes:
# renovate: depName="Kubernetes / Views / Nodes"
gnetId: 15759
revision: 28
datasource: Victoria
kubernetes-pods:
# renovate: depName="Kubernetes / Views / Pods"
gnetId: 15760
revision: 21
datasource: Prometheus

View File

@ -0,0 +1,5 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./secret.sops.yaml
- ./helm-release.yaml

View File

@ -0,0 +1,72 @@
apiVersion: v1
kind: Secret
metadata:
name: grafana-oauth
namespace: monitoring
stringData:
client_id: ENC[AES256_GCM,data:bmrhYD+cLeKC7sPsGYlLSXBDkqr6capZwP3CY/2T2ca9su46TnrZPg==,iv:BC7SS6B390c2ZazTpQVLuzo93YqNYMneTox0kjOIsfs=,tag:um7jM/endC6e86NfJsXpow==,type:str]
client_secret: ENC[AES256_GCM,data:H/nqW0f8tE8MfQ1wtloU7XWOmBnjImcXyP4GJVO7KJ6Q5JrUZbFAcS1khqe6nBUJCf+/9M+8BEhxPhFR5/IiuzV7xk7a71jw4jMEZs4YizkpUhw4JZ5+YhGcpcGFqB1egsd4sQLWIdRpYy2E45EiuUvldNs2qvBtYrCO2o2MnL8=,iv:UEhm5zywHwkTQO12qg0/tOkfqpOndlKIy75cydMcmvA=,tag:EkzTHSXKYpEutGUa7lZQ5Q==,type:str]
sops:
kms: []
gcp_kms: []
azure_kv: []
hc_vault: []
age: []
lastmodified: "2024-09-27T19:58:26Z"
mac: ENC[AES256_GCM,data:ftw1bowXQNeyQy55BugSr31VakDVp9aGnq8nNBIvpdMxS0vd/l2DVjhCy9BF/iWQU7T1PR4TErEDCfeJaMpXOt8HzDlDy7IxIuHSSMHK9rFR/L+CwWu0aIwxGB8RpcWlt5JDCWkuk3nCtv61jV6KuZ7fFX/8YXuIVx6Ev6HW7yI=,iv:t5cuWCr53rO7g6BiyLoISmQNE+TBPpdaBebY4Jy+7tQ=,tag:CQEPser8jQbcMlbXN82ANg==,type:str]
pgp:
- created_at: "2024-09-27T19:58:26Z"
enc: |-
-----BEGIN PGP MESSAGE-----
hQIMAwAAAAAAAAAAAQ/+PDDA2u/3tQA5D7RBGPBHcKCuraO13Ogb32gxt5ZvrBgA
SpABi32A/zZ9FB8kg+T/VWHXh1NK7aDB8TvAQN6W9l/d+OIRvvqd3EdZLax9EhXu
DFSOXP0ZnqzMSLILP6e7ahEHdlvno+gaCAGJnMyPjNYhvd5dFiVKZYufsYIVXUq4
6UsFxWTzs5cDtphZfuGNgZeHWxyalbrl+XagIY0KX9+pFN5muufo3tnSbqjDOui/
UAhpEE2V/6gZPXJLWggSfKQq/QTO24bxl5Csbb1GIOnLnz48eVpU5oZpiwXWbZo8
mTS9QKzs/Z+rDooTLE/R73agxEq1EvfGOXlHg2RqV7wBPu3PoHWmId3mM+PykvDD
HyzLzIHtzd1d7kDqH4j/SMi3xP3vZN3H7P2SVBoPX9d0lPmNtILX0N1Rh+sTrF9A
sRSOmHNw8eNLoQojF+VcglA9LiQSuzUn1XKu/QjLkHYMZeRNXfF78XZNj/X6CNf5
U5i8AuhIZTlRRYKFD+Yw5zMu/UNe0R04z/sBpn7Kdaf8vszihRB/0DLXi33bCltG
788DZVc3Sx9lFqb9avq6c79Fg6GxIM/FIkpiRm9inC/06c1TwqeZI8VI+s4RwDCb
yVBleNKBAB9hDaO1U/yEOwm9WH8TBnh3sOBwmMaRiTL5z3/PjFFuhXMiVqEAY6+F
AgwDAAAAAAAAAAABD/0QueC44PlfxiFEu+3bj4Lwe3RD03FxEealrb/eZsN/eOZR
gKCZzk3TXXg/2Uc4S7p3ebiNCgs+9MYlN5PvxcheNtWv2rz/n3EdVeZIqZ/YDnv4
C/eWgyp0YFVthcZtm/ZAetzK17TFG44vpayh+v0GlsLTzdgnhuwDSARcHOZgiCC0
vcerM9+r+l3UZ+xmhnphoWAaYBfeRHFSGUZUjjQnmBjheB5/ru/mltPMrTX6nlEM
Ngw8cgOj9WL2JNfGIutE7nfDB9Lapyng8eFL2IlAMralFgq6psO2ZdeApLdTy5zV
38+zYbGRQLTACfKVej6dRdH0dHSxzP0uJe2GVXZE6V7C8CfrDTPDrA9fwdpJLWmR
t148KzFUMQ5twva9CBzfrGNS4Fs41EV1BjhwqlhqWNjPrr/BnK45wKrmPbna+tyJ
EUsu1Ztrxn/J055ugN7vaDuRvSjdSsSyTbhdZPFhgk95Btn8uqNgpp/bk7l/i01w
e/u6qqzqbNez4EpNLReqfSbI6dMxsXqgY1a8WTbGtgzp7d0sZAFzMGdT4aUdAkV4
SWkexxsZiIqxv26reBtdfq7aKGyg+hQuOVe7D20cRjjuB9Enbbs+OHGbITpt15Oj
X1I2UgfOW1fzMHLmeg1qIF7q2Kd4vLyQ3BiBQPClhRdq+MdZPLPl2bKSFD0ZntRm
AQkCEHAlUhkXndiJatN+uW5/N5xZ9GQ7FEszGVwbdGMlF60VGPfkXqDC45J42wLL
0zHM02DFd0T8stj3SD0PRJVQWJXwE2qrHdzO4MPBqb0W2vjO+9xBdj0uVYq8vcHI
JaVyt6TK
=3Jlu
-----END PGP MESSAGE-----
fp: BD1AAF9D8170F4BEE437365FF6F0933799CFEBCD
- created_at: "2024-09-27T19:58:26Z"
enc: |-
-----BEGIN PGP MESSAGE-----
hQIMAwAAAAAAAAAAAQ//Wu7UO5G0QcWMKZ+0vwEiMSW5HAuUoGJkbrthRtw6yzuu
XCBEbQKZRhuAm3BS7dx0LXYYiE7bEQX4s5ZnqwuwWpRzNCzbfOSxSgibpCqPMocM
RrRKHDi+yma96PY3Uruj3X8kWlL3I+8N9R1Fd7UiTqnm4pti/DaKfeWLIp8by6FE
E2N+BY58GZo8NnkHM1Muud8JaecH8CJxtkHEpFjT/EMu+AgUacmM/OYpRBJyR8Ma
r1gc64Nem2PTwevQj79iGuM3OJWcybeWUh3ssl8QypdOLiMG1ywUCfuwJ4Qqo81q
G+zSbY8AqfaX7ykx329vaWtz3GZrqQAeRSPtVpDd3lFdG4dGTvOUW89OTDvbBgXa
Hi38y18N3gBMspCe+AlH7a2WT8zz1GegMLvaIL7sFHWcmPfBj57wAyoosi85Wy83
adkgOB7DNO/LMX11XWPha+5Pab1ofqn0XUN+xklfkZ1766pMWCnBCKpK6LrAvxWk
6kChg8lW+flq6T3MtoloQmWiKY875pd7ZuEihAthgCDjdbOoPV24dPkj0ICFMATN
pZVXd0TzT5efhIb16oxe/+OPdphNIkKl0KCdM4/sSiG8h+FKKjPYNRNsrkPdx/gl
uCIxA7AF0lH3tvi4yUIgZEd+23RTxNd+5IzMVFwntG7Kj9IeCQ0N5fIeoz4BvB7U
ZgEJAhBCtL8btKWlS9/RUTUDBUhdHZ7lxwQFnM2f6ki4RqO5cbb9QphNriEAxTvm
BXhne3sMAVi9KKf/ZsmJ5EOvBicf4qQeN2NfIGJ+HxiQAIyHj9PhVJNJK5OP+t+Q
HnIbXMw9fQ==
=Ls8l
-----END PGP MESSAGE-----
fp: 687802D4DFD8AA82EA55666CF7DADAC782D7663D
encrypted_regex: ^(data|stringData)$
version: 3.9.0

View File

@ -0,0 +1,30 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: grafana
namespace: flux-system
spec:
targetNamespace: monitoring
timeout: 5m
interval: 10m
path: ./kubernetes/thin/apps/monitoring/grafana/app
prune: true
sourceRef:
kind: GitRepository
name: home-cluster
decryption:
provider: sops
secretRef:
name: sops-gpg
dependsOn:
- name: victoria-metrics-deploys
- name: kube-prometheus-stack
postBuild:
substitute: {}
substituteFrom:
- kind: ConfigMap
name: cluster-settings
- kind: Secret
name: cluster-secrets

View File

@ -0,0 +1,85 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: kube-prometheus-stack
namespace: monitoring
spec:
interval: 5m
chart:
spec:
chart: kube-prometheus-stack
version: "62.7.0"
sourceRef:
kind: HelmRepository
name: prometheus-community
namespace: flux-system
values:
alertmanager:
enabled: false
grafana:
enabled: false
# enabled dashboards even with grafana being disabled
#forceDeployDashboards: true
#defaultDashboardsTimezone: America/New_York
prometheus:
enabled: false
defaultRule:
create: false
rules:
general: false
kubelet:
enabled: true
serviceMonitor:
metricRelabelings:
# Remove duplicate metrics
- sourceLabels: ["__name__"]
regex: "(apiserver_audit|apiserver_client|apiserver_delegated|apiserver_envelope|apiserver_storage|apiserver_webhooks|authentication_token|cadvisor_version|container_blkio|container_cpu|container_fs|container_last|container_memory|container_network|container_oom|container_processes|container|csi_operations|disabled_metric|get_token|go|hidden_metric|kubelet_certificate|kubelet_cgroup|kubelet_container|kubelet_containers|kubelet_cpu|kubelet_device|kubelet_graceful|kubelet_http|kubelet_lifecycle|kubelet_managed|kubelet_node|kubelet_pleg|kubelet_pod|kubelet_run|kubelet_running|kubelet_runtime|kubelet_server|kubelet_started|kubelet_volume|kubernetes_build|kubernetes_feature|machine_cpu|machine_memory|machine_nvm|machine_scrape|node_namespace|plugin_manager|prober_probe|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|registered_metric|rest_client|scrape_duration|scrape_samples|scrape_series|storage_operation|volume_manager|volume_operation|workqueue)_(.+)"
action: keep
- sourceLabels: ["node"]
targetLabel: instance
action: replace
kubeApiServer:
serviceMonitor:
selector:
k8s-app: kube-apiserver
kubeScheduler:
service:
selector:
k8s-app: kube-scheduler
kubeControllerManager: &kubeControllerManager
service:
selector:
k8s-app: kube-controller-manager
kubeEtcd:
<<: *kubeControllerManager # etcd runs on control plane nodes
prometheus-node-exporter:
fullnameOverride: node-exporter
prometheus:
monitor:
enabled: true
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels: ["__meta_kubernetes_pod_node_name"]
targetLabel: kubernetes_node
kube-state-metrics:
fullnameOverride: kube-state-metrics
metricLabelsAllowlist:
- pods=[*]
- deployments=[*]
- persistentvolumeclaims=[*]
prometheus:
monitor:
enabled: true
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels: ["__meta_kubernetes_pod_node_name"]
targetLabel: kubernetes_node

View File

@ -1,5 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1 apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: resources:
- ./main-ip-pool.yaml - ./helm-release.yaml
- ./bgp.yaml

View File

@ -0,0 +1,29 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: kube-prometheus-stack
namespace: flux-system
spec:
targetNamespace: monitoring
timeout: 5m
interval: 10m
path: ./kubernetes/thin/apps/monitoring/kube-prometheus-stack/app
prune: true
sourceRef:
kind: GitRepository
name: home-cluster
decryption:
provider: sops
secretRef:
name: sops-gpg
dependsOn:
- name: victoria-metrics-deploys
postBuild:
substitute: {}
substituteFrom:
- kind: ConfigMap
name: cluster-settings
- kind: Secret
name: cluster-secrets

View File

@ -2,3 +2,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: resources:
- ./victoria-metrics/ks.yaml - ./victoria-metrics/ks.yaml
- ./kube-prometheus-stack/ks.yaml
- ./grafana/ks.yaml