update libs
This commit is contained in:
parent
4eca792b49
commit
0b4bd3c36e
|
@ -74,7 +74,7 @@
|
|||
"subdir": "jsonnet/kube-state-metrics"
|
||||
}
|
||||
},
|
||||
"version": "9216f8bb1530aeca21849d987f6475e57831d825",
|
||||
"version": "392572e1e789fc5f866fbeb6466173531a659bcc",
|
||||
"sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA="
|
||||
},
|
||||
{
|
||||
|
@ -85,7 +85,7 @@
|
|||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
||||
}
|
||||
},
|
||||
"version": "9216f8bb1530aeca21849d987f6475e57831d825",
|
||||
"version": "392572e1e789fc5f866fbeb6466173531a659bcc",
|
||||
"sum": "E1GGavnf9PCWBm4WVrxWnc0FIj72UcbcweqGioWrOdU="
|
||||
},
|
||||
{
|
||||
|
@ -96,8 +96,8 @@
|
|||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "cd35e336d85e144afac7edd7fc19622653d0fd77",
|
||||
"sum": "LbY7vUNOhxqZY5LAF+C5/k6Na45i+YUG+uuo8NMmUAk="
|
||||
"version": "7f3e0130ccd3e39400d1dc36e690cab16f8d4881",
|
||||
"sum": "vQ1u8c5WNl7S7jmYyPk8HayvEPdIiZwKx5Sk6jdtOAE="
|
||||
},
|
||||
{
|
||||
"name": "node-mixin",
|
||||
|
@ -118,7 +118,7 @@
|
|||
"subdir": "documentation/prometheus-mixin"
|
||||
}
|
||||
},
|
||||
"version": "384cba98fec550052292dcc3095ed16fbf197087",
|
||||
"version": "489a9aa7b9478022c3b9c5952b8f9c70ddae5bdb",
|
||||
"sum": "5EUgr6Spr1zNR8Y2/NevjvEkGV9WMvKo6nEScNER1Lc="
|
||||
},
|
||||
{
|
||||
|
@ -151,7 +151,7 @@
|
|||
"subdir": "lib/promgrafonnet"
|
||||
}
|
||||
},
|
||||
"version": "cd35e336d85e144afac7edd7fc19622653d0fd77",
|
||||
"version": "7f3e0130ccd3e39400d1dc36e690cab16f8d4881",
|
||||
"sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc="
|
||||
},
|
||||
{
|
||||
|
|
|
@ -358,6 +358,23 @@ spec:
|
|||
)
|
||||
) by (cluster)
|
||||
record: :node_memory_MemAvailable_bytes:sum
|
||||
- name: kubelet.rules
|
||||
rules:
|
||||
- expr: |
|
||||
histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
|
||||
labels:
|
||||
quantile: "0.99"
|
||||
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
|
||||
- expr: |
|
||||
histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
|
||||
labels:
|
||||
quantile: "0.9"
|
||||
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
|
||||
- expr: |
|
||||
histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
|
||||
labels:
|
||||
quantile: "0.5"
|
||||
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
|
||||
- name: kube-prometheus-node-recording.rules
|
||||
rules:
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY
|
||||
|
@ -1115,6 +1132,26 @@ spec:
|
|||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeNodeReadinessFlapping
|
||||
annotations:
|
||||
message: The readiness status of node {{ $labels.node }} has changed {{ $value
|
||||
}} times in the last 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodereadinessflapping
|
||||
expr: |
|
||||
sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (node) > 2
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeletPlegDurationHigh
|
||||
annotations:
|
||||
message: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration
|
||||
of {{ $value }} seconds on node {{ $labels.node }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletplegdurationhigh
|
||||
expr: |
|
||||
node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeletDown
|
||||
annotations:
|
||||
message: Kubelet has disappeared from Prometheus target discovery.
|
||||
|
|
|
@ -1,13 +1,11 @@
|
|||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local pvc = k.core.v1.persistentVolumeClaim;
|
||||
|
||||
local kp =
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'prometheus-pushgateway/pushgateway.libsonnet') +
|
||||
(import 'k3s.libsonnet')
|
||||
// Uncomment the following imports to enable its patches
|
||||
// (import 'kube-prometheus/kube-prometheus-anti-affinity.libsonnet') +
|
||||
// (import 'kube-prometheus/kube-prometheus-managed-cluster.libsonnet') +
|
||||
// (import 'kube-prometheus/kube-prometheus-node-ports.libsonnet') +
|
||||
// (import 'kube-prometheus/kube-prometheus-static-etcd.libsonnet') +
|
||||
// (import 'kube-prometheus/kube-prometheus-thanos-sidecar.libsonnet') +
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
|
@ -18,6 +16,16 @@ local kp =
|
|||
names: 'k8s',
|
||||
replicas: 1,
|
||||
namespaces+: ['k8up'],
|
||||
spec+: {
|
||||
retention: '7d',
|
||||
storage: {
|
||||
volumeClaimTemplate:
|
||||
pvc.new() +
|
||||
pvc.mixin.spec.withAccessModes('ReadWriteOnce') +
|
||||
pvc.mixin.spec.resources.withRequests({ storage: '10Gi' }) +
|
||||
pvc.mixin.spec.withStorageClassName('local-path'),
|
||||
},
|
||||
},
|
||||
},
|
||||
alertmanager+:: {
|
||||
replicas: 1,
|
||||
|
|
|
@ -48,6 +48,32 @@
|
|||
message: "Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.",
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'KubeNodeReadinessFlapping',
|
||||
expr: |||
|
||||
sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (node) > 2
|
||||
||| % $._config,
|
||||
'for': '15m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
message: 'The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes.',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'KubeletPlegDurationHigh',
|
||||
expr: |||
|
||||
node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
|
||||
||| % $._config,
|
||||
'for': '5m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
message: 'The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.',
|
||||
},
|
||||
},
|
||||
(import '../lib/absent_alert.libsonnet') {
|
||||
componentName:: 'Kubelet',
|
||||
selector:: $._config.kubeletSelector,
|
||||
|
|
25
monitoring/vendor/kubernetes-mixin/rules/kubelet.libsonnet
vendored
Normal file
25
monitoring/vendor/kubernetes-mixin/rules/kubelet.libsonnet
vendored
Normal file
|
@ -0,0 +1,25 @@
|
|||
{
|
||||
_config+:: {
|
||||
kubeletSelector: 'job="kubelet"',
|
||||
},
|
||||
|
||||
prometheusRules+:: {
|
||||
groups+: [
|
||||
{
|
||||
name: 'kubelet.rules',
|
||||
rules: [
|
||||
{
|
||||
record: 'node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile',
|
||||
expr: |||
|
||||
histogram_quantile(%(quantile)s, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{%(kubeletSelector)s})
|
||||
||| % ({ quantile: quantile } + $._config),
|
||||
labels: {
|
||||
quantile: quantile,
|
||||
},
|
||||
}
|
||||
for quantile in ['0.99', '0.9', '0.5']
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
|
@ -1,4 +1,5 @@
|
|||
(import 'kube_apiserver.libsonnet') +
|
||||
(import 'apps.libsonnet') +
|
||||
(import 'kube_scheduler.libsonnet') +
|
||||
(import 'node.libsonnet')
|
||||
(import 'node.libsonnet') +
|
||||
(import 'kubelet.libsonnet')
|
||||
|
|
Reference in a new issue