Add victoria metrics for long term metrics storage

This commit is contained in:
SeanOMik 2023-05-03 00:52:24 -04:00
parent 49ad416dfc
commit d5c7d7d28d
14 changed files with 1467 additions and 253 deletions

View File

@ -6,4 +6,4 @@ resources:
- ./postgresql
- ./redis
- ./minio
- ./mariadb
#- ./mariadb

View File

@ -51,6 +51,23 @@ spec:
- *grafana-host
secretName: wildcard-main-tls
sidecar:
datasources:
defaultDatasourceEnabled: false
isDefaultDatasource: false
# Add Victoria Metrics as the default datasource
additionalDataSources:
- name: Victoria
uid: victoria-metrics-server
type: prometheus
jsonData:
tlsSkipVerify: true
editable: false
url: http://victoria-metrics-server.monitoring.svc:8428
version: 1
isDefault: true
prometheus:
ingress:
enabled: true
@ -70,6 +87,22 @@ spec:
prometheusSpec:
enableAdminAPI: false
retention: 1d
remoteWrite:
- url: http://victoria-metrics-server.monitoring.svc:8428/api/v1/write
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: hostpath
selector:
matchLabels:
app.kubernetes.io/name: kube-prometheus-stack-pv
resources:
requests:
storage: 30Gi
kubeControllerManager:
enabled: true
endpoints:

View File

@ -1,6 +1,7 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./prom-pv.yaml
- ./helm-repository.yaml
- ./helm-release.yaml
- ./alertmanager-config.yaml

View File

@ -0,0 +1,16 @@
apiVersion: v1
kind: PersistentVolume
metadata:
name: prometheus-pv
namespace: monitoring
labels:
app.kubernetes.io/name: kube-prometheus-stack-pv
spec:
storageClassName: hostpath
persistentVolumeReclaimPolicy: Retain
capacity:
storage: 30Gi
accessModes:
- ReadWriteOnce
hostPath:
path: "/mnt/MainPool/Kubernetes/prometheus"

View File

@ -5,4 +5,5 @@ resources:
- ./network_policy.yaml
- ./kube-prometheus-stack
- ./zfs-exporter
- ./alertmanager-gotify-bridge
- ./alertmanager-gotify-bridge
- ./victoria-metrics

View File

@ -1,188 +0,0 @@
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: mimir
namespace: monitoring
spec:
interval: 5m
chart:
spec:
chart: mimir-distributed
version: 2.8.x
sourceRef:
kind: HelmRepository
name: grafana-charts
namespace: flux-system
values:
global:
extraEnvFrom:
- secretRef:
name: mimir-secret
mimir:
structuredConfig:
common:
storage:
backend: s3
s3:
endpoint: minio.database:9000
access_key_id: $${S3_ACCESS_KEY}
secret_access_key: $${S3_SECRET_KEY}
insecure: true
bucket_name: mimir
compactor:
persistentVolume:
size: 20Gi
resources:
limits:
memory: 2.1Gi
requests:
cpu: 1
memory: 1.5Gi
distributor:
replicas: 2
resources:
limits:
memory: 5.7Gi
requests:
cpu: 2
memory: 4Gi
ingester:
persistentVolume:
size: 50Gi
replicas: 3
resources:
limits:
memory: 12Gi
requests:
cpu: 3.5
memory: 8Gi
topologySpreadConstraints: {}
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: target # support for enterprise.legacyLabels
operator: In
values:
- ingester
topologyKey: 'kubernetes.io/hostname'
- labelSelector:
matchExpressions:
- key: app.kubernetes.io/component
operator: In
values:
- ingester
topologyKey: 'kubernetes.io/hostname'
zoneAwareReplication:
topologyKey: 'kubernetes.io/hostname'
admin-cache:
enabled: true
replicas: 2
chunks-cache:
enabled: true
replicas: 2
index-cache:
enabled: true
replicas: 3
metadata-cache:
enabled: true
results-cache:
enabled: true
replicas: 2
minio:
enabled: false
# Deployed by kube-prometheus-stack
alertmanager:
enabled: false
overrides_exporter:
replicas: 1
resources:
limits:
memory: 128Mi
requests:
cpu: 100m
memory: 128Mi
querier:
replicas: 1
resources:
limits:
memory: 5.6Gi
requests:
cpu: 2
memory: 4Gi
query_frontend:
replicas: 1
resources:
limits:
memory: 2.8Gi
requests:
cpu: 2
memory: 2Gi
ruler:
replicas: 1
resources:
limits:
memory: 2.8Gi
requests:
cpu: 1
memory: 2Gi
store_gateway:
persistentVolume:
size: 10Gi
replicas: 3
resources:
limits:
memory: 2.1Gi
requests:
cpu: 1
memory: 1.5Gi
topologySpreadConstraints: {}
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: target # support for enterprise.legacyLabels
operator: In
values:
- store-gateway
topologyKey: 'kubernetes.io/hostname'
- labelSelector:
matchExpressions:
- key: app.kubernetes.io/component
operator: In
values:
- store-gateway
topologyKey: 'kubernetes.io/hostname'
zoneAwareReplication:
topologyKey: 'kubernetes.io/hostname'
nginx:
replicas: 1
resources:
limits:
memory: 731Mi
requests:
cpu: 1
memory: 512Mi

View File

@ -1,61 +0,0 @@
apiVersion: v1
kind: Secret
metadata:
name: mimir-secret
namespace: monitoring
stringData:
S3_ACCESS_KEY: ENC[AES256_GCM,data:jfnHq3DE,iv:Ft3d/tbvCKuTDHmCXZJgYl5xVBOwIj0Zkc9+JgILDAI=,tag:5bcZBsODsA9Pi2vf5OGsHg==,type:str]
S3_SECRET_KEY: ENC[AES256_GCM,data:3WpNKx1d,iv:M5xewbvJm+U8td7kIpkPImd2gDIFfVTGVIR5BJtfoB8=,tag:X78jSBvcHbSIu6S8W8yZNA==,type:str]
sops:
kms: []
gcp_kms: []
azure_kv: []
hc_vault: []
age: []
lastmodified: "2023-05-01T22:12:24Z"
mac: ENC[AES256_GCM,data:SywFZE0Kj1lx1X1f5chgW7qycPwQvHkRz/35F/hKBLjr0UXI1T9D3IIQeNZlTrxJwSiCvm/+FxMxbF4hJBfZ61Z2jfgwDINghPkoNJothgV0dlPtFTfApgK2BfNqWffhPc3Qj4cmuQZV6kG0h05CbKL4PN89DQ/aEDPPbKI01lo=,iv:x1ZGglUJM/PT5gZgvxRR411pSFmlDkEADrd3arCqFdY=,tag:0xlalnODXYns3CpuDxt9vQ==,type:str]
pgp:
- created_at: "2023-04-07T01:57:22Z"
enc: |
-----BEGIN PGP MESSAGE-----
hQIMAzKleRwoSoixAQ/9Hi4VyrUXV7LvbCFiLbyfv314lMGwrAf+2po/4Lr1hANe
KiwpfthiNheAjNaGCG6v2C1rx2Wrr5G3+rMik/1TLWbg2u9zZU4mWO8bwJUGXKDo
/T1nl47f09UPDtQ6KiG0nPf3M0Ovmk3d63R3zpY4Q7uE4uhLNDr0KD9mp7MmRCbZ
PO++tdiZa67z9owNDh/NSnQr9Y6JwjlxlkJl5SJ76vaK/SaOi/j86mOm9CV6SQmk
cLOwiO7JxV8I4gD9jlLdYEPS+nqztX5eHLRoaXsAQrX4DdWNnOF0C2sk9nMHwQTb
W8/SVmg7TiVVL6qVCXgUCgFRXllrlGlXlfv+W6ruuZIBv2MAA1V+afl5A3/KVvE6
FDq9YrJ4XfZPCD2ZByM2386L8MiUwkfF/3uge38MT/WDU2DTT+g7jV3UQs+Awi8f
N4YBVBcp5jGTkMD0347GPfPF7kdiN/YFZ/Ws1jf/EsS6vOpKNlPn64fVJfTSfdie
rvNxksi8Y4vpwEngy38t7JRfpJniDo9iK9EwhXMChYXnWkiz/B3vMoii496B7TzO
9gKd4v7kFA6iXI+wqbYrZfOGeLZlMI99pwTatNL4fo9ABJ7JScISzTvS7p/xB6Ae
JPdlA0Tf8wP4RYz8YYRcNlfEQPZYb4kHj5r9Ei59InHzwKfq9GyKKvluS0/k3NHU
aAEJAhCVkPuIHluRLHsjVEbKbFzSJUG8p/hSSmQnfk3CT36/dJhgv3jzoL+1/Sx1
o8OwWPmNq8TuX9SaXfhfy/EGMulWgRaztxt9D+0+wgc8IOAPp+0SYUsaOa0T9+Pl
pjU1GRaK5AlT
=mItp
-----END PGP MESSAGE-----
fp: 2CC2B3631D5C3393901335DB68F95C5D753EE1E5
- created_at: "2023-04-07T01:57:22Z"
enc: |
-----BEGIN PGP MESSAGE-----
hQIMA4WLYkVpP8xtAQ/9FQGyKS1wEodU9ZVZ8kxijp6aFtMCmL/I5HBEhbSLj0P9
TVD0QwnUPZqf7zlWrAh6TspyLQdRMt9JAYZCPyLgu//FdKfBJNYeU3+aWj/lMtJ4
Twgs7NPtGbRJcpF+a4NmAOIqzKfJI+h714BLFoWrGtUmTE9/dBHh2yxADSgprY1o
/4J8aHQfaqg5JwijP3PhtRMxla4YQfhqf0JRAcmQPKUDuxT2QG/wp59Fq/665aaO
JFWiCOPBqTtEhY4ML4EYNUV+Cd7UT7LOXC+Xzuj1eEGMV1Pmqd1u1UyQKvHOOXhT
AfGeCub+ZONGfmcDcY5gEMnbSCGcQEvipA3dBIIFklgnxM00jmcJ1Ojo1+MYynpl
E1XLOaolRWinlDNXA62k8iWG33hcxHGSzkHrsQjtqrrD2PdHS1RmTJ8Hn+iuRUn6
/fGk8ZQJ7oMPsZNyfiM0OdwSXxJ4rQUtGkHHd727S4K6nXC6OLxXCzl7lYG7QKcP
RVrbFMNv01aToyNGhLmcSxUYdQ4oc+nv65rNZDsdbi34T+dlULboJDkwV6JrJ5dz
hlu3ySgijZuRD5bfpfKB2RScu2ixEijOIyk1oXBB2Dhyh1ezc3qnAw8xkGr9W2SE
roBuu95mZsIZEtfMS5hxwGyWzSCENnbkSukQhUoIjRXryly7MQgNZ5FMX+f5n3DU
aAEJAhBJcIEidIhFVqDkezzMcofKl3MlXWqkfTUV3vsjz6EpN1FwhpZ3prTexUcM
9XCx9Wq1kMpjkphWETh2lSAafyIz6R/d4zWV5IWIeDh+USYT9z0Rprp4URka4Wjx
fux0T5xDbgq5
=eiXM
-----END PGP MESSAGE-----
fp: 8DF31C9F48A24F525FFB1815FC96C52B59328E95
encrypted_regex: ^(data|stringData)$
version: 3.7.3

View File

@ -0,0 +1,53 @@
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: victoria-metrics
namespace: monitoring
spec:
interval: 5m
chart:
spec:
chart: victoria-metrics-single
version: 0.8.59
sourceRef:
kind: HelmRepository
name: victoria-metrics-charts
namespace: flux-system
values:
server:
retentionPeriod: 6 # months
fullnameOverride: victoria-metrics-server
persistentVolume:
enabled: true
storageClass: "hostpath"
matchLabels:
app.kubernetes.io/name: victoria-metrics-pv
size: 32Gi
# resources:
# limits:
# cpu: 500m
# memory: 512Mi
# requests:
# cpu: 500m
# memory: 512Mi
serviceMonitor: # TODO
# -- Enable deployment of Service Monitor for server component. This is Prometheus operator object
enabled: false
# -- Service Monitor labels
extraLabels: {}
# -- Service Monitor annotations
annotations: {}
# -- Commented. Prometheus scrape interval for server component
interval: 15s
# -- Commented. Prometheus pre-scrape timeout for server component
scrapeTimeout: 5s
# -- Commented. HTTP scheme to use for scraping.
scheme: https
# -- Commented. TLS configuration to use when scraping the endpoint
tlsConfig:
insecureSkipVerify: true
# -- Service Monitor relabelings
relabelings: []

View File

@ -0,0 +1,8 @@
apiVersion: source.toolkit.fluxcd.io/v1beta2
kind: HelmRepository
metadata:
name: victoria-metrics-charts
namespace: flux-system
spec:
interval: 1m
url: https://victoriametrics.github.io/helm-charts

View File

@ -1,5 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./mimir.sops.yaml
- ./victoria-pv.yaml
- ./helm-repository.yaml
- ./helm-release.yaml

View File

@ -0,0 +1,16 @@
apiVersion: v1
kind: PersistentVolume
metadata:
name: victoria-pv
namespace: monitoring
labels:
app.kubernetes.io/name: victoria-metrics-pv
spec:
storageClassName: hostpath
persistentVolumeReclaimPolicy: Retain
capacity:
storage: 32Gi
accessModes:
- ReadWriteOnce
hostPath:
path: "/mnt/MainPool/Kubernetes/victoria-metrics"

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,5 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./helm-release.yaml
- ./helm-release.yaml
- ./dashboard.yaml

View File

@ -15,6 +15,9 @@ TODO:
- [ ] Grafana Loki
- [ ] uptime-kuma
- [x] gotify
- [ ] Services to monitor
- [ ] Authentik
- [ ]
- [x] IRC
- [ ] Harbor
- [x] CDN (nginx filelist)