From 0b4bd3c36e3d1ae86c816a409e1a17793489fbae Mon Sep 17 00:00:00 2001 From: Tobias Brunner Date: Mon, 17 Feb 2020 20:57:21 +0100 Subject: [PATCH] update libs --- monitoring/jsonnetfile.lock.json | 12 +++--- monitoring/manifests/prometheus-rules.yaml | 37 +++++++++++++++++++ monitoring/monitoring.jsonnet | 20 +++++++--- .../kubernetes-mixin/alerts/kubelet.libsonnet | 26 +++++++++++++ .../kubernetes-mixin/rules/kubelet.libsonnet | 25 +++++++++++++ .../kubernetes-mixin/rules/rules.libsonnet | 3 +- 6 files changed, 110 insertions(+), 13 deletions(-) create mode 100644 monitoring/vendor/kubernetes-mixin/rules/kubelet.libsonnet diff --git a/monitoring/jsonnetfile.lock.json b/monitoring/jsonnetfile.lock.json index f3848ac..1075b8b 100644 --- a/monitoring/jsonnetfile.lock.json +++ b/monitoring/jsonnetfile.lock.json @@ -74,7 +74,7 @@ "subdir": "jsonnet/kube-state-metrics" } }, - "version": "9216f8bb1530aeca21849d987f6475e57831d825", + "version": "392572e1e789fc5f866fbeb6466173531a659bcc", "sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA=" }, { @@ -85,7 +85,7 @@ "subdir": "jsonnet/kube-state-metrics-mixin" } }, - "version": "9216f8bb1530aeca21849d987f6475e57831d825", + "version": "392572e1e789fc5f866fbeb6466173531a659bcc", "sum": "E1GGavnf9PCWBm4WVrxWnc0FIj72UcbcweqGioWrOdU=" }, { @@ -96,8 +96,8 @@ "subdir": "" } }, - "version": "cd35e336d85e144afac7edd7fc19622653d0fd77", - "sum": "LbY7vUNOhxqZY5LAF+C5/k6Na45i+YUG+uuo8NMmUAk=" + "version": "7f3e0130ccd3e39400d1dc36e690cab16f8d4881", + "sum": "vQ1u8c5WNl7S7jmYyPk8HayvEPdIiZwKx5Sk6jdtOAE=" }, { "name": "node-mixin", @@ -118,7 +118,7 @@ "subdir": "documentation/prometheus-mixin" } }, - "version": "384cba98fec550052292dcc3095ed16fbf197087", + "version": "489a9aa7b9478022c3b9c5952b8f9c70ddae5bdb", "sum": "5EUgr6Spr1zNR8Y2/NevjvEkGV9WMvKo6nEScNER1Lc=" }, { @@ -151,7 +151,7 @@ "subdir": "lib/promgrafonnet" } }, - "version": "cd35e336d85e144afac7edd7fc19622653d0fd77", + "version": "7f3e0130ccd3e39400d1dc36e690cab16f8d4881", "sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc=" }, { diff --git a/monitoring/manifests/prometheus-rules.yaml b/monitoring/manifests/prometheus-rules.yaml index ca36639..c37db55 100644 --- a/monitoring/manifests/prometheus-rules.yaml +++ b/monitoring/manifests/prometheus-rules.yaml @@ -358,6 +358,23 @@ spec: ) ) by (cluster) record: :node_memory_MemAvailable_bytes:sum + - name: kubelet.rules + rules: + - expr: | + histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) + labels: + quantile: "0.99" + record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile + - expr: | + histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) + labels: + quantile: "0.9" + record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile + - expr: | + histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) + labels: + quantile: "0.5" + record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile - name: kube-prometheus-node-recording.rules rules: - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY @@ -1115,6 +1132,26 @@ spec: for: 15m labels: severity: warning + - alert: KubeNodeReadinessFlapping + annotations: + message: The readiness status of node {{ $labels.node }} has changed {{ $value + }} times in the last 15 minutes. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodereadinessflapping + expr: | + sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (node) > 2 + for: 15m + labels: + severity: warning + - alert: KubeletPlegDurationHigh + annotations: + message: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration + of {{ $value }} seconds on node {{ $labels.node }}. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletplegdurationhigh + expr: | + node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10 + for: 5m + labels: + severity: warning - alert: KubeletDown annotations: message: Kubelet has disappeared from Prometheus target discovery. diff --git a/monitoring/monitoring.jsonnet b/monitoring/monitoring.jsonnet index b15208e..b0ee157 100644 --- a/monitoring/monitoring.jsonnet +++ b/monitoring/monitoring.jsonnet @@ -1,13 +1,11 @@ +local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet'; +local pvc = k.core.v1.persistentVolumeClaim; + local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + (import 'prometheus-pushgateway/pushgateway.libsonnet') + (import 'k3s.libsonnet') - // Uncomment the following imports to enable its patches - // (import 'kube-prometheus/kube-prometheus-anti-affinity.libsonnet') + - // (import 'kube-prometheus/kube-prometheus-managed-cluster.libsonnet') + - // (import 'kube-prometheus/kube-prometheus-node-ports.libsonnet') + - // (import 'kube-prometheus/kube-prometheus-static-etcd.libsonnet') + - // (import 'kube-prometheus/kube-prometheus-thanos-sidecar.libsonnet') + + { _config+:: { namespace: 'monitoring', @@ -18,6 +16,16 @@ local kp = names: 'k8s', replicas: 1, namespaces+: ['k8up'], + spec+: { + retention: '7d', + storage: { + volumeClaimTemplate: + pvc.new() + + pvc.mixin.spec.withAccessModes('ReadWriteOnce') + + pvc.mixin.spec.resources.withRequests({ storage: '10Gi' }) + + pvc.mixin.spec.withStorageClassName('local-path'), + }, + }, }, alertmanager+:: { replicas: 1, diff --git a/monitoring/vendor/kubernetes-mixin/alerts/kubelet.libsonnet b/monitoring/vendor/kubernetes-mixin/alerts/kubelet.libsonnet index 8d3842e..58532e9 100644 --- a/monitoring/vendor/kubernetes-mixin/alerts/kubelet.libsonnet +++ b/monitoring/vendor/kubernetes-mixin/alerts/kubelet.libsonnet @@ -48,6 +48,32 @@ message: "Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.", }, }, + { + alert: 'KubeNodeReadinessFlapping', + expr: ||| + sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (node) > 2 + ||| % $._config, + 'for': '15m', + labels: { + severity: 'warning', + }, + annotations: { + message: 'The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes.', + }, + }, + { + alert: 'KubeletPlegDurationHigh', + expr: ||| + node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10 + ||| % $._config, + 'for': '5m', + labels: { + severity: 'warning', + }, + annotations: { + message: 'The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.', + }, + }, (import '../lib/absent_alert.libsonnet') { componentName:: 'Kubelet', selector:: $._config.kubeletSelector, diff --git a/monitoring/vendor/kubernetes-mixin/rules/kubelet.libsonnet b/monitoring/vendor/kubernetes-mixin/rules/kubelet.libsonnet new file mode 100644 index 0000000..25d1479 --- /dev/null +++ b/monitoring/vendor/kubernetes-mixin/rules/kubelet.libsonnet @@ -0,0 +1,25 @@ +{ + _config+:: { + kubeletSelector: 'job="kubelet"', + }, + + prometheusRules+:: { + groups+: [ + { + name: 'kubelet.rules', + rules: [ + { + record: 'node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile', + expr: ||| + histogram_quantile(%(quantile)s, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{%(kubeletSelector)s}) + ||| % ({ quantile: quantile } + $._config), + labels: { + quantile: quantile, + }, + } + for quantile in ['0.99', '0.9', '0.5'] + ], + }, + ], + }, +} diff --git a/monitoring/vendor/kubernetes-mixin/rules/rules.libsonnet b/monitoring/vendor/kubernetes-mixin/rules/rules.libsonnet index 874b423..4e116d4 100644 --- a/monitoring/vendor/kubernetes-mixin/rules/rules.libsonnet +++ b/monitoring/vendor/kubernetes-mixin/rules/rules.libsonnet @@ -1,4 +1,5 @@ (import 'kube_apiserver.libsonnet') + (import 'apps.libsonnet') + (import 'kube_scheduler.libsonnet') + -(import 'node.libsonnet') +(import 'node.libsonnet') + +(import 'kubelet.libsonnet')