Tobias Brunner
0108ac6084
All checks were successful
continuous-integration/drone/push Build is passing
50 lines
1.7 KiB
Plaintext
50 lines
1.7 KiB
Plaintext
{
|
|
_config+:: {
|
|
notKubeDnsCoreDnsSelector: 'job!~"kube-dns|coredns"',
|
|
},
|
|
|
|
prometheusAlerts+:: {
|
|
groups+: [
|
|
{
|
|
name: 'kubernetes-system',
|
|
rules: [
|
|
{
|
|
alert: 'KubeVersionMismatch',
|
|
expr: |||
|
|
count(count by (gitVersion) (label_replace(kubernetes_build_info{%(notKubeDnsCoreDnsSelector)s},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*).*"))) > 1
|
|
||| % $._config,
|
|
'for': '15m',
|
|
labels: {
|
|
severity: 'warning',
|
|
},
|
|
annotations: {
|
|
description: 'There are {{ $value }} different semantic versions of Kubernetes components running.',
|
|
summary: 'Different semantic versions of Kubernetes components running.',
|
|
},
|
|
},
|
|
{
|
|
alert: 'KubeClientErrors',
|
|
// Many clients use get requests to check the existence of objects,
|
|
// this is normal and an expected error, therefore it should be
|
|
// ignored in this alert.
|
|
expr: |||
|
|
(sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
|
|
/
|
|
sum(rate(rest_client_requests_total[5m])) by (instance, job))
|
|
> 0.01
|
|
|||,
|
|
'for': '15m',
|
|
labels: {
|
|
severity: 'warning',
|
|
},
|
|
annotations: {
|
|
description: "Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.'",
|
|
summary: 'Kubernetes API server client is experiencing errors.',
|
|
},
|
|
},
|
|
],
|
|
},
|
|
],
|
|
},
|
|
}
|