This repository has been archived on 2023-04-02. You can view files and clone it, but cannot push or open issues or pull requests.
gitops-tbrnt/monitoring/vendor/github.com/kubernetes-monitoring/kubernetes-mixin/alerts/kube_apiserver.libsonnet

111 lines
4.2 KiB
Plaintext

local utils = import 'utils.libsonnet';
{
_config+:: {
kubeApiserverSelector: error 'must provide selector for kube-apiserver',
kubeAPILatencyWarningSeconds: 1,
certExpirationWarningSeconds: 7 * 24 * 3600,
certExpirationCriticalSeconds: 1 * 24 * 3600,
},
prometheusAlerts+:: {
groups+: [
{
name: 'kube-apiserver-slos',
rules: [
{
alert: 'KubeAPIErrorBudgetBurn',
expr: |||
sum(apiserver_request:burnrate%s) > (%.2f * %.5f)
and
sum(apiserver_request:burnrate%s) > (%.2f * %.5f)
||| % [
w.long,
w.factor,
(1 - $._config.SLOs.apiserver.target),
w.short,
w.factor,
(1 - $._config.SLOs.apiserver.target),
],
labels: {
severity: w.severity,
short: '%(short)s' % w,
long: '%(long)s' % w,
},
annotations: {
description: 'The API server is burning too much error budget.',
summary: 'The API server is burning too much error budget.',
},
'for': '%(for)s' % w,
}
for w in $._config.SLOs.apiserver.windows
],
},
{
name: 'kubernetes-system-apiserver',
rules: [
{
alert: 'KubeClientCertificateExpiration',
expr: |||
apiserver_client_certificate_expiration_seconds_count{%(kubeApiserverSelector)s} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{%(kubeApiserverSelector)s}[5m]))) < %(certExpirationWarningSeconds)s
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'A client certificate used to authenticate to the apiserver is expiring in less than %s.' % (utils.humanizeSeconds($._config.certExpirationWarningSeconds)),
summary: 'Client certificate is about to expire.',
},
},
{
alert: 'KubeClientCertificateExpiration',
expr: |||
apiserver_client_certificate_expiration_seconds_count{%(kubeApiserverSelector)s} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{%(kubeApiserverSelector)s}[5m]))) < %(certExpirationCriticalSeconds)s
||| % $._config,
labels: {
severity: 'critical',
},
annotations: {
description: 'A client certificate used to authenticate to the apiserver is expiring in less than %s.' % (utils.humanizeSeconds($._config.certExpirationCriticalSeconds)),
summary: 'Client certificate is about to expire.',
},
},
{
alert: 'AggregatedAPIErrors',
expr: |||
sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. The number of errors have increased for it in the past five minutes. High values indicate that the availability of the service changes too often.',
summary: 'An aggregated API has reported errors.',
},
},
{
alert: 'AggregatedAPIDown',
expr: |||
(1 - max by(name, namespace)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85
||| % $._config,
'for': '5m',
labels: {
severity: 'warning',
},
annotations: {
description: 'An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m.',
summary: 'An aggregated API is down.',
},
},
(import '../lib/absent_alert.libsonnet') {
componentName:: 'KubeAPI',
selector:: $._config.kubeApiserverSelector,
},
],
},
],
},
}