update deps

This commit is contained in:
Tobias Brunner 2020-02-16 14:19:34 +01:00
parent 8d6fc06973
commit 2c1fb2424a
9 changed files with 209 additions and 151 deletions

View file

@ -14,7 +14,7 @@
"name": "prometheus-pushgateway",
"source": {
"git": {
"remote": "https://github.com/latchmihay/kube-prometheus-pushgateway",
"remote": "https://github.com/tobru/kube-prometheus-pushgateway",
"subdir": "prometheus-pushgateway"
}
},

View file

@ -8,7 +8,7 @@
"subdir": "Documentation/etcd-mixin"
}
},
"version": "c94782cd55fb44df43574505db9ac1c1b7d49c00",
"version": "f0faa5501d936cd8c9f561bb9d1baca70eb67ab1",
"sum": "Ko3qhNfC2vN/houLh6C0Ryacjv70gl0DVPGU/PQ4OD0="
},
{
@ -74,7 +74,7 @@
"subdir": "jsonnet/kube-state-metrics"
}
},
"version": "30c152b805781b5571ed25b914cc66c615964ffb",
"version": "9216f8bb1530aeca21849d987f6475e57831d825",
"sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA="
},
{
@ -85,7 +85,7 @@
"subdir": "jsonnet/kube-state-metrics-mixin"
}
},
"version": "30c152b805781b5571ed25b914cc66c615964ffb",
"version": "9216f8bb1530aeca21849d987f6475e57831d825",
"sum": "E1GGavnf9PCWBm4WVrxWnc0FIj72UcbcweqGioWrOdU="
},
{
@ -96,8 +96,8 @@
"subdir": ""
}
},
"version": "b2d7f762bd22be3ba5e7d54a1fcecfe1092f214b",
"sum": "NqrJQnQnRDzkCbrHg7L1zX8XPAzfoE4DS2XBEj6WC8g="
"version": "cd35e336d85e144afac7edd7fc19622653d0fd77",
"sum": "LbY7vUNOhxqZY5LAF+C5/k6Na45i+YUG+uuo8NMmUAk="
},
{
"name": "node-mixin",
@ -107,7 +107,7 @@
"subdir": "docs/node-mixin"
}
},
"version": "92ea3c6a3f0ea2d1d55de168e65037e2313f9940",
"version": "dcfd6104332b22d3de1afa5425b6316b7a2952c6",
"sum": "7vEamDTP9AApeiF4Zu9ZyXzDIs3rYHzwf9k7g8X+wsg="
},
{
@ -118,8 +118,8 @@
"subdir": "documentation/prometheus-mixin"
}
},
"version": "40dd13b07420a044cc1b0ca57f639c572583d9c1",
"sum": "u1YS9CVuBTcw2vks0PZbLb1gtlI/7bVGDVBZsjWFLTw="
"version": "384cba98fec550052292dcc3095ed16fbf197087",
"sum": "5EUgr6Spr1zNR8Y2/NevjvEkGV9WMvKo6nEScNER1Lc="
},
{
"name": "prometheus-operator",
@ -136,12 +136,12 @@
"name": "prometheus-pushgateway",
"source": {
"git": {
"remote": "https://github.com/latchmihay/kube-prometheus-pushgateway",
"remote": "https://github.com/tobru/kube-prometheus-pushgateway",
"subdir": "prometheus-pushgateway"
}
},
"version": "77534af823ee6e7e889f83c04930f28666c7ab14",
"sum": "Xahez0mgGdUIUt823gC3CEELInzGBsamCvml/+AJcmg="
"version": "15e01d5677fc55b77a584f7d3e587fe9d7475a28",
"sum": "8Zwq/4wN2+ue2Vl4a+OwElAPfi5o6sD/0Wc2yWCS1eg="
},
{
"name": "promgrafonnet",
@ -151,7 +151,7 @@
"subdir": "lib/promgrafonnet"
}
},
"version": "b2d7f762bd22be3ba5e7d54a1fcecfe1092f214b",
"version": "cd35e336d85e144afac7edd7fc19622653d0fd77",
"sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc="
},
{

View file

@ -1,4 +1,4 @@
apiVersion: apps/v1beta2
apiVersion: apps/v1
kind: Deployment
metadata:
labels:

View file

@ -65,6 +65,120 @@ spec:
rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[1m])
)
record: instance:node_network_transmit_drop_excluding_lo:rate1m
- name: kube-apiserver-error
rules:
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[5m]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate5m
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[30m]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate30m
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[1h]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate1h
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[2h]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate2h
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[6h]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate6h
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[1d]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate1d
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[3d]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate3d
- expr: |
sum(status_class:apiserver_request_total:rate5m{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate5m{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate5m
- expr: |
sum(status_class:apiserver_request_total:rate30m{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate30m{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate30m
- expr: |
sum(status_class:apiserver_request_total:rate1h{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate1h{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate1h
- expr: |
sum(status_class:apiserver_request_total:rate2h{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate2h{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate2h
- expr: |
sum(status_class:apiserver_request_total:rate6h{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate6h{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate6h
- expr: |
sum(status_class:apiserver_request_total:rate1d{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate1d{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate1d
- expr: |
sum(status_class:apiserver_request_total:rate3d{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate3d{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate3d
- name: kube-apiserver.rules
rules:
- expr: |
@ -656,7 +770,7 @@ spec:
tolerate node failure.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
expr: |
sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum)
sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum{})
/
sum(kube_node_status_allocatable_cpu_cores)
>
@ -670,7 +784,7 @@ spec:
tolerate node failure.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit
expr: |
sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum)
sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum{})
/
sum(kube_node_status_allocatable_memory_bytes)
>
@ -799,7 +913,7 @@ spec:
for: 15m
labels:
severity: warning
- name: kube-apiserver-error
- name: kube-apiserver-error-alerts
rules:
- alert: ErrorBudgetBurn
annotations:
@ -837,118 +951,6 @@ spec:
labels:
job: apiserver
severity: warning
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[5m]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate5m
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[30m]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate30m
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[1h]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate1h
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[2h]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate2h
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[6h]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate6h
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[1d]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate1d
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[3d]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate3d
- expr: |
sum(status_class:apiserver_request_total:rate5m{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate5m{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate5m
- expr: |
sum(status_class:apiserver_request_total:rate30m{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate30m{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate30m
- expr: |
sum(status_class:apiserver_request_total:rate1h{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate1h{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate1h
- expr: |
sum(status_class:apiserver_request_total:rate2h{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate2h{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate2h
- expr: |
sum(status_class:apiserver_request_total:rate6h{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate6h{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate6h
- expr: |
sum(status_class:apiserver_request_total:rate1d{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate1d{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate1d
- expr: |
sum(status_class:apiserver_request_total:rate3d{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate3d{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate3d
- name: kubernetes-system-apiserver
rules:
- alert: KubeAPILatencyHigh
@ -1053,6 +1055,27 @@ spec:
apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
labels:
severity: critical
- alert: AggregatedAPIErrors
annotations:
message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has
reported errors. The number of errors have increased for it in the past
five minutes. High values indicate that the availability of the service
changes too often.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors
expr: |
sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2
labels:
severity: warning
- alert: AggregatedAPIDown
annotations:
message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} is down.
It has not been available at least for the past five minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapidown
expr: |
sum by(name, namespace)(sum_over_time(aggregator_unavailable_apiservice[5m])) > 0
for: 5m
labels:
severity: warning
- alert: KubeAPIDown
annotations:
message: KubeAPI has disappeared from Prometheus target discovery.
@ -1253,7 +1276,8 @@ spec:
- alert: PrometheusRemoteStorageFailures
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send
{{ printf "%.1f" $value }}% of the samples to queue {{$labels.queue}}.
{{ printf "%.1f" $value }}% of the samples to {{ if $labels.queue }}{{ $labels.queue
}}{{ else }}{{ $labels.url }}{{ end }}.
summary: Prometheus fails to send samples to remote storage.
expr: |
(
@ -1273,7 +1297,8 @@ spec:
- alert: PrometheusRemoteWriteBehind
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
is {{ printf "%.1f" $value }}s behind for queue {{$labels.queue}}.
is {{ printf "%.1f" $value }}s behind for {{ if $labels.queue }}{{ $labels.queue
}}{{ else }}{{ $labels.url }}{{ end }}.
summary: Prometheus remote write is behind.
expr: |
# Without max_over_time, failed scrapes could create false negatives, see

View file

@ -14,10 +14,9 @@ local utils = import 'utils.libsonnet';
prometheusAlerts+:: {
groups+: [
{
name: 'kube-apiserver-error',
name: 'kube-apiserver-error-alerts',
rules:
$._config.SLOs.apiserver.errors.alerts +
$._config.SLOs.apiserver.errors.recordingrules,
$._config.SLOs.apiserver.errors.alerts,
},
{
name: 'kubernetes-system-apiserver',
@ -146,6 +145,31 @@ local utils = import 'utils.libsonnet';
message: 'A client certificate used to authenticate to the apiserver is expiring in less than %s.' % (utils.humanizeSeconds($._config.certExpirationCriticalSeconds)),
},
},
{
alert: 'AggregatedAPIErrors',
expr: |||
sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
message: 'An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. The number of errors have increased for it in the past five minutes. High values indicate that the availability of the service changes too often.',
},
},
{
alert: 'AggregatedAPIDown',
expr: |||
sum by(name, namespace)(sum_over_time(aggregator_unavailable_apiservice[5m])) > 0
||| % $._config,
'for': '5m',
labels: {
severity: 'warning',
},
annotations: {
message: 'An aggregated API {{ $labels.name }}/{{ $labels.namespace }} is down. It has not been available at least for the past five minutes.',
},
},
(import '../lib/absent_alert.libsonnet') {
componentName:: 'KubeAPI',
selector:: $._config.kubeApiserverSelector,

View file

@ -11,6 +11,10 @@
namespaceOvercommitFactor: 1.5,
cpuThrottlingPercent: 25,
cpuThrottlingSelector: '',
// Set this selector for seleting namespaces that contains resources used for overprovision
// See https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#how-can-i-configure-overprovisioning-with-cluster-autoscaler
// for more details.
ignoringOverprovisionedWorkloadSelector: '',
},
prometheusAlerts+:: {
@ -21,7 +25,7 @@
{
alert: 'KubeCPUOvercommit',
expr: |||
sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum)
sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum{%(ignoringOverprovisionedWorkloadSelector)s})
/
sum(kube_node_status_allocatable_cpu_cores)
>
@ -38,7 +42,7 @@
{
alert: 'KubeMemOvercommit',
expr: |||
sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum)
sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum{%(ignoringOverprovisionedWorkloadSelector)s})
/
sum(kube_node_status_allocatable_memory_bytes)
>

View file

@ -6,6 +6,11 @@
prometheusRules+:: {
groups+: [
{
name: 'kube-apiserver-error',
rules:
$._config.SLOs.apiserver.errors.recordingrules,
},
{
name: 'kube-apiserver.rules',
rules: [

View file

@ -5,7 +5,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
namespace: 'pushgateway',
versions+:: {
pushgateway: 'v0.8.0',
pushgateway: 'v1.1.0',
},
imageRepos+:: {
@ -13,17 +13,17 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
},
pushgateway+:: {
name: "prometheus-pushgateway",
port: 9091,
labels: { app: $._config.pushgateway.name},
cpu: "50m",
memory: "100Mi"
}
name: 'prometheus-pushgateway',
port: 9091,
labels: { app: $._config.pushgateway.name },
cpu: '50m',
memory: '100Mi',
},
},
pushgateway+:: {
deployment:
local deployment = k.apps.v1beta2.deployment;
local deployment = k.apps.v1.deployment;
local container = deployment.mixin.spec.template.spec.containersType;
local containerPort = container.portsType;
local podSelector = deployment.mixin.spec.template.spec.selectorType;
@ -34,12 +34,12 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
container.mixin.resources.withRequests({ cpu: $._config.pushgateway.cpu, memory: $._config.pushgateway.memory }) +
container.mixin.resources.withLimits({ cpu: $._config.pushgateway.cpu, memory: $._config.pushgateway.memory }) +
container.mixin.livenessProbe.withInitialDelaySeconds(10) +
container.mixin.livenessProbe.withTimeoutSeconds(10)+
container.mixin.livenessProbe.httpGet.withPath("/#/status") +
container.mixin.livenessProbe.withTimeoutSeconds(10) +
container.mixin.livenessProbe.httpGet.withPath('/#/status') +
container.mixin.livenessProbe.httpGet.withPort($._config.pushgateway.port) +
container.mixin.readinessProbe.withInitialDelaySeconds(10) +
container.mixin.readinessProbe.withTimeoutSeconds(10)+
container.mixin.readinessProbe.httpGet.withPath("/#/status") +
container.mixin.readinessProbe.withTimeoutSeconds(10) +
container.mixin.readinessProbe.httpGet.withPath('/#/status') +
container.mixin.readinessProbe.httpGet.withPort($._config.pushgateway.port);
deployment.new($._config.pushgateway.name, 1, c, $._config.pushgateway.labels) +
@ -66,7 +66,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
name: $._config.pushgateway.name,
namespace: $._config.namespace,
labels: {
'prometheus': 'k8s',
prometheus: 'k8s',
},
},
spec: {

View file

@ -187,7 +187,7 @@
},
annotations: {
summary: 'Prometheus fails to send samples to remote storage.',
description: 'Prometheus %(prometheusName)s failed to send {{ printf "%%.1f" $value }}%% of the samples to queue {{$labels.queue}}.' % $._config,
description: 'Prometheus %(prometheusName)s failed to send {{ printf "%%.1f" $value }}%% of the samples to {{ if $labels.queue }}{{ $labels.queue }}{{ else }}{{ $labels.url }}{{ end }}.' % $._config,
},
},
{
@ -208,7 +208,7 @@
},
annotations: {
summary: 'Prometheus remote write is behind.',
description: 'Prometheus %(prometheusName)s remote write is {{ printf "%%.1f" $value }}s behind for queue {{$labels.queue}}.' % $._config,
description: 'Prometheus %(prometheusName)s remote write is {{ printf "%%.1f" $value }}s behind for {{ if $labels.queue }}{{ $labels.queue }}{{ else }}{{ $labels.url }}{{ end }}.' % $._config,
},
},
{