From ca446b428a1206a424bff119aeaee59a07cac1a7 Mon Sep 17 00:00:00 2001 From: Tobias Brunner Date: Tue, 25 Feb 2020 22:34:26 +0100 Subject: [PATCH] update deps --- monitoring/Makefile | 4 + monitoring/jsonnetfile.lock.json | 28 +- .../manifests/alertmanager-alertmanager.yaml | 2 +- monitoring/manifests/alertmanager-secret.yaml | 2 + .../grafana-dashboardDefinitions.yaml | 1574 ----------------- monitoring/manifests/grafana-deployment.yaml | 15 +- .../manifests/prometheus-prometheus.yaml | 2 +- monitoring/manifests/prometheus-rules.yaml | 24 +- monitoring/vendor/grafana/grafana.libsonnet | 18 +- monitoring/vendor/grafana/jsonnetfile.json | 43 +- .../vendor/grafonnet/graph_panel.libsonnet | 2 + .../vendor/grafonnet/heatmap_panel.libsonnet | 2 + .../alertmanager/alertmanager.libsonnet | 6 +- .../kube-prometheus-thanos-sidecar.libsonnet | 2 +- .../prometheus/prometheus.libsonnet | 2 +- .../alerts/apps_alerts.libsonnet | 24 +- .../dashboards/dashboards.libsonnet | 2 - .../dashboards/pods.libsonnet | 195 -- .../dashboards/resources.libsonnet | 1359 +------------- .../dashboards/resources/cluster.libsonnet | 274 +++ .../resources/multi-cluster.libsonnet | 107 ++ .../dashboards/resources/namespace.libsonnet | 213 +++ .../dashboards/resources/node.libsonnet | 107 ++ .../dashboards/resources/pod.libsonnet | 169 ++ .../resources/workload-namespace.libsonnet | 338 ++++ .../dashboards/resources/workload.libsonnet | 310 ++++ .../dashboards/statefulset.libsonnet | 160 -- 27 files changed, 1628 insertions(+), 3356 deletions(-) delete mode 100644 monitoring/vendor/kubernetes-mixin/dashboards/pods.libsonnet create mode 100644 monitoring/vendor/kubernetes-mixin/dashboards/resources/cluster.libsonnet create mode 100644 monitoring/vendor/kubernetes-mixin/dashboards/resources/multi-cluster.libsonnet create mode 100644 monitoring/vendor/kubernetes-mixin/dashboards/resources/namespace.libsonnet create mode 100644 monitoring/vendor/kubernetes-mixin/dashboards/resources/node.libsonnet create mode 100644 monitoring/vendor/kubernetes-mixin/dashboards/resources/pod.libsonnet create mode 100644 monitoring/vendor/kubernetes-mixin/dashboards/resources/workload-namespace.libsonnet create mode 100644 monitoring/vendor/kubernetes-mixin/dashboards/resources/workload.libsonnet delete mode 100644 monitoring/vendor/kubernetes-mixin/dashboards/statefulset.libsonnet diff --git a/monitoring/Makefile b/monitoring/Makefile index e33dcf1..3174ac4 100644 --- a/monitoring/Makefile +++ b/monitoring/Makefile @@ -1,3 +1,7 @@ build: docker run --rm -v $(shell pwd):$(shell pwd) --workdir $(shell pwd) quay.io/coreos/jsonnet-ci ./build.sh monitoring.jsonnet .PHONY: build + +update: + docker run --rm -v $(shell pwd):$(shell pwd) --workdir $(shell pwd) quay.io/coreos/jsonnet-ci jb update +.PHONY: update diff --git a/monitoring/jsonnetfile.lock.json b/monitoring/jsonnetfile.lock.json index 1075b8b..750c3eb 100644 --- a/monitoring/jsonnetfile.lock.json +++ b/monitoring/jsonnetfile.lock.json @@ -8,7 +8,7 @@ "subdir": "Documentation/etcd-mixin" } }, - "version": "f0faa5501d936cd8c9f561bb9d1baca70eb67ab1", + "version": "52fba431b686f6a5c30d60a0bbaf9fafc14bae35", "sum": "Ko3qhNfC2vN/houLh6C0Ryacjv70gl0DVPGU/PQ4OD0=" }, { @@ -19,8 +19,8 @@ "subdir": "grafana" } }, - "version": "539a90dbf63c812ad0194d8078dd776868a11c81", - "sum": "b8faWX1qqLGyN67sA36oRqYZ5HX+tHBRMPtrWRqIysE=" + "version": "1b07a802b663f77e36fe1e518cef552ef9fbdb82", + "sum": "GliiVmOLUPmBNjvsx332UOvZj0o9VVxLFLp9u4QmmNk=" }, { "name": "grafana-builder", @@ -41,8 +41,8 @@ "subdir": "grafonnet" } }, - "version": "c459106d2d2b583dd3a83f6c75eb52abee3af764", - "sum": "CeM3LRgUCUJTolTdMnerfMPGYmhClx7gX5ajrQVEY2Y=" + "version": "db36a706bd1c87056759eacb686102133eb4740c", + "sum": "g1aMw5iYEP/Dkw1wo1pcfe7q4LIpXc4wqDZsjaCpsRc=" }, { "name": "ksonnet", @@ -63,8 +63,8 @@ "subdir": "jsonnet/kube-prometheus" } }, - "version": "8b0b0bc51435a5f7742307c86235273ab568dffe", - "sum": "NJN0f7veWXOJyM3PNDM6vJQEzpkDxOchU9EVnoSRe6E=" + "version": "953c5464f72594b7fde2e534b207b211f7454ec7", + "sum": "+9Clkrsv9C637n1P7pPoKXTMJTbJGgt2bhv1/1ySTuc=" }, { "name": "kube-state-metrics", @@ -74,7 +74,7 @@ "subdir": "jsonnet/kube-state-metrics" } }, - "version": "392572e1e789fc5f866fbeb6466173531a659bcc", + "version": "22d195f20a20b51cf14b5ff01bb4a200c65196da", "sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA=" }, { @@ -85,7 +85,7 @@ "subdir": "jsonnet/kube-state-metrics-mixin" } }, - "version": "392572e1e789fc5f866fbeb6466173531a659bcc", + "version": "22d195f20a20b51cf14b5ff01bb4a200c65196da", "sum": "E1GGavnf9PCWBm4WVrxWnc0FIj72UcbcweqGioWrOdU=" }, { @@ -96,8 +96,8 @@ "subdir": "" } }, - "version": "7f3e0130ccd3e39400d1dc36e690cab16f8d4881", - "sum": "vQ1u8c5WNl7S7jmYyPk8HayvEPdIiZwKx5Sk6jdtOAE=" + "version": "3cf851b2c8ff8bf98c12eac7f37d97f086cd0fc9", + "sum": "CydKHxWA9LG9w1+sjlqREHXPQTdbiTwy40rnyXfHfGE=" }, { "name": "node-mixin", @@ -107,7 +107,7 @@ "subdir": "docs/node-mixin" } }, - "version": "dcfd6104332b22d3de1afa5425b6316b7a2952c6", + "version": "ef7c05816adcb0e8923defe34e97f6afcce0a939", "sum": "7vEamDTP9AApeiF4Zu9ZyXzDIs3rYHzwf9k7g8X+wsg=" }, { @@ -118,7 +118,7 @@ "subdir": "documentation/prometheus-mixin" } }, - "version": "489a9aa7b9478022c3b9c5952b8f9c70ddae5bdb", + "version": "65a19421a42c69e16241eec24c66b98e4c8fa5da", "sum": "5EUgr6Spr1zNR8Y2/NevjvEkGV9WMvKo6nEScNER1Lc=" }, { @@ -151,7 +151,7 @@ "subdir": "lib/promgrafonnet" } }, - "version": "7f3e0130ccd3e39400d1dc36e690cab16f8d4881", + "version": "3cf851b2c8ff8bf98c12eac7f37d97f086cd0fc9", "sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc=" }, { diff --git a/monitoring/manifests/alertmanager-alertmanager.yaml b/monitoring/manifests/alertmanager-alertmanager.yaml index bef6a0d..6df6e2b 100644 --- a/monitoring/manifests/alertmanager-alertmanager.yaml +++ b/monitoring/manifests/alertmanager-alertmanager.yaml @@ -6,7 +6,7 @@ metadata: name: main namespace: monitoring spec: - baseImage: quay.io/prometheus/alertmanager + image: quay.io/prometheus/alertmanager:v0.20.0 nodeSelector: kubernetes.io/os: linux replicas: 1 diff --git a/monitoring/manifests/alertmanager-secret.yaml b/monitoring/manifests/alertmanager-secret.yaml index ecd30d3..e019922 100644 --- a/monitoring/manifests/alertmanager-secret.yaml +++ b/monitoring/manifests/alertmanager-secret.yaml @@ -10,12 +10,14 @@ stringData: "resolve_timeout": "5m" "inhibit_rules": - "equal": + - "namespace" - "alertname" "source_match": "severity": "critical" "target_match_re": "severity": "warning|info" - "equal": + - "namespace" - "alertname" "source_match": "severity": "warning" diff --git a/monitoring/manifests/grafana-dashboardDefinitions.yaml b/monitoring/manifests/grafana-dashboardDefinitions.yaml index e3b2295..d58b9ad 100644 --- a/monitoring/manifests/grafana-dashboardDefinitions.yaml +++ b/monitoring/manifests/grafana-dashboardDefinitions.yaml @@ -25580,670 +25580,6 @@ items: metadata: name: grafana-dashboard-pod-total namespace: monitoring -- apiVersion: v1 - data: - pods.json: |- - { - "__inputs": [ - - ], - "__requires": [ - - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "$datasource", - "enable": true, - "expr": "time() == BOOL timestamp(rate(kube_pod_container_status_restarts_total{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[2m]) > 0)", - "hide": false, - "iconColor": "rgba(215, 44, 44, 1)", - "name": "Restarts", - "showIn": 0, - "tags": [ - "restart" - ], - "type": "rows" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 2, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(container) (container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\", container!=\"POD\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Current: {{ container }}", - "refId": "A" - }, - { - "expr": "sum by(container) (kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\", pod=\"$pod\", container=~\"$container\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Requested: {{ container }}", - "refId": "B" - }, - { - "expr": "sum by(container) (kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\", pod=\"$pod\", container=~\"$container\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Limit: {{ container }}", - "refId": "C" - }, - { - "expr": "sum by(container) (container_memory_cache{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\", container=~\"$container\", container!=\"POD\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Cache: {{ container }}", - "refId": "D" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory Usage", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 3, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (container) (irate(container_cpu_usage_seconds_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", image!=\"\", pod=\"$pod\", container=~\"$container\", container!=\"POD\"}[4m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Current: {{ container }}", - "refId": "A" - }, - { - "expr": "sum by(container) (kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\", pod=\"$pod\", container=~\"$container\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Requested: {{ container }}", - "refId": "B" - }, - { - "expr": "sum by(container) (kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\", pod=\"$pod\", container=~\"$container\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Limit: {{ container }}", - "refId": "C" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 4, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum by (pod) (irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[4m])))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "RX: {{ pod }}", - "refId": "A" - }, - { - "expr": "sort_desc(sum by (pod) (irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[4m])))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "TX: {{ pod }}", - "refId": "B" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Network I/O", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 5, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by (container) (kube_pod_container_status_restarts_total{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Restarts: {{ container }}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Total Restarts Per Container", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "Namespace", - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "Pod", - "multi": false, - "name": "pod", - "options": [ - - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=~\"$namespace\"}, pod)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "Container", - "multi": false, - "name": "container", - "options": [ - - ], - "query": "label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}, container)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / Pods", - "uid": "ab4f13a9892a76a4d21ce8c2445bf4ea", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-pods - namespace: monitoring - apiVersion: v1 data: prometheus-remote-write.json: |- @@ -31311,916 +30647,6 @@ items: metadata: name: grafana-dashboard-scheduler namespace: monitoring -- apiVersion: v1 - data: - statefulset.json: |- - { - "__inputs": [ - - ], - "__requires": [ - - ], - "annotations": { - "list": [ - - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 2, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "cores", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "title": "CPU", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 3, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "GB", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}) / 1024^3", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "title": "Memory", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 4, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "Bps", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod=~\"$statefulset.*\"}[3m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "title": "Network", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "current" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "height": "100px", - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 5, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "title": "Desired Replicas", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 6, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "title": "Replicas of current version", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 7, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "max(kube_statefulset_status_observed_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "title": "Observed Generation", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 8, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "max(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "title": "Metadata Generation", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "current" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 9, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "replicas specified", - "refId": "A" - }, - { - "expr": "max(kube_statefulset_status_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "replicas created", - "refId": "B" - }, - { - "expr": "min(kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "ready", - "refId": "C" - }, - { - "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "replicas of current version", - "refId": "D" - }, - { - "expr": "min(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "updated", - "refId": "E" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Replicas", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_statefulset_metadata_generation, cluster)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "Namespace", - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "Name", - "multi": false, - "name": "statefulset", - "options": [ - - ], - "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, statefulset)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / StatefulSets", - "uid": "a31c1f46e6f727cb37c0d731a7245005", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-statefulset - namespace: monitoring - apiVersion: v1 data: workload-total.json: |- diff --git a/monitoring/manifests/grafana-deployment.yaml b/monitoring/manifests/grafana-deployment.yaml index 5ccf92f..927fc40 100644 --- a/monitoring/manifests/grafana-deployment.yaml +++ b/monitoring/manifests/grafana-deployment.yaml @@ -16,7 +16,8 @@ spec: app: grafana spec: containers: - - image: grafana/grafana:6.6.0 + - env: [] + image: grafana/grafana:6.6.0 name: grafana ports: - containerPort: 3000 @@ -93,9 +94,6 @@ spec: - mountPath: /grafana-dashboard-definitions/0/pod-total name: grafana-dashboard-pod-total readOnly: false - - mountPath: /grafana-dashboard-definitions/0/pods - name: grafana-dashboard-pods - readOnly: false - mountPath: /grafana-dashboard-definitions/0/prometheus-remote-write name: grafana-dashboard-prometheus-remote-write readOnly: false @@ -108,9 +106,6 @@ spec: - mountPath: /grafana-dashboard-definitions/0/scheduler name: grafana-dashboard-scheduler readOnly: false - - mountPath: /grafana-dashboard-definitions/0/statefulset - name: grafana-dashboard-statefulset - readOnly: false - mountPath: /grafana-dashboard-definitions/0/workload-total name: grafana-dashboard-workload-total readOnly: false @@ -180,9 +175,6 @@ spec: - configMap: name: grafana-dashboard-pod-total name: grafana-dashboard-pod-total - - configMap: - name: grafana-dashboard-pods - name: grafana-dashboard-pods - configMap: name: grafana-dashboard-prometheus-remote-write name: grafana-dashboard-prometheus-remote-write @@ -195,9 +187,6 @@ spec: - configMap: name: grafana-dashboard-scheduler name: grafana-dashboard-scheduler - - configMap: - name: grafana-dashboard-statefulset - name: grafana-dashboard-statefulset - configMap: name: grafana-dashboard-workload-total name: grafana-dashboard-workload-total diff --git a/monitoring/manifests/prometheus-prometheus.yaml b/monitoring/manifests/prometheus-prometheus.yaml index dea5016..f5cfcd7 100644 --- a/monitoring/manifests/prometheus-prometheus.yaml +++ b/monitoring/manifests/prometheus-prometheus.yaml @@ -11,7 +11,7 @@ spec: - name: alertmanager-main namespace: monitoring port: web - baseImage: quay.io/prometheus/prometheus + image: quay.io/prometheus/prometheus:v2.15.2 nodeSelector: kubernetes.io/os: linux podMonitorNamespaceSelector: {} diff --git a/monitoring/manifests/prometheus-rules.yaml b/monitoring/manifests/prometheus-rules.yaml index c37db55..2c64e88 100644 --- a/monitoring/manifests/prometheus-rules.yaml +++ b/monitoring/manifests/prometheus-rules.yaml @@ -629,9 +629,15 @@ spec: matched the expected number of replicas for longer than 15 minutes. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch expr: | - kube_deployment_spec_replicas{job="kube-state-metrics"} - != - kube_deployment_status_replicas_available{job="kube-state-metrics"} + ( + kube_deployment_spec_replicas{job="kube-state-metrics"} + != + kube_deployment_status_replicas_available{job="kube-state-metrics"} + ) and ( + changes(kube_deployment_status_replicas_updated{job="kube-state-metrics"}[5m]) + == + 0 + ) for: 15m labels: severity: critical @@ -641,9 +647,15 @@ spec: not matched the expected number of replicas for longer than 15 minutes. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch expr: | - kube_statefulset_status_replicas_ready{job="kube-state-metrics"} - != - kube_statefulset_status_replicas{job="kube-state-metrics"} + ( + kube_statefulset_status_replicas_ready{job="kube-state-metrics"} + != + kube_statefulset_status_replicas{job="kube-state-metrics"} + ) and ( + changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics"}[5m]) + == + 0 + ) for: 15m labels: severity: critical diff --git a/monitoring/vendor/grafana/grafana.libsonnet b/monitoring/vendor/grafana/grafana.libsonnet index e4c3c3a..a5e6ac8 100644 --- a/monitoring/vendor/grafana/grafana.libsonnet +++ b/monitoring/vendor/grafana/grafana.libsonnet @@ -5,7 +5,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet'; namespace: 'default', versions+:: { - grafana: '6.4.3', + grafana: '6.6.0', }, imageRepos+:: { @@ -32,10 +32,13 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet'; config: {}, ldap: null, plugins: [], + env: [], + port: 3000, container: { requests: { cpu: '100m', memory: '100Mi' }, limits: { cpu: '200m', memory: '200Mi' }, }, + containers: [], }, }, grafanaDashboards: {}, @@ -79,7 +82,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet'; local service = k.core.v1.service; local servicePort = k.core.v1.service.mixin.spec.portsType; - local grafanaServiceNodePort = servicePort.newNamed('http', 3000, 'http'); + local grafanaServiceNodePort = servicePort.newNamed('http', $._config.grafana.port, 'http'); service.new('grafana', $.grafana.deployment.spec.selector.matchLabels, grafanaServiceNodePort) + service.mixin.metadata.withLabels({ app: 'grafana' }) + @@ -97,7 +100,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet'; local podSelector = deployment.mixin.spec.template.spec.selectorType; local env = container.envType; - local targetPort = 3000; + local targetPort = $._config.grafana.port; local portName = 'http'; local podLabels = { app: 'grafana' }; @@ -159,15 +162,18 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet'; ] + if std.length($._config.grafana.config) > 0 then [configVolume] else []; - local c = + local plugins = (if std.length($._config.grafana.plugins) == 0 then [] else [env.new('GF_INSTALL_PLUGINS', std.join(',', $._config.grafana.plugins))]); + + local c = [ container.new('grafana', $._config.imageRepos.grafana + ':' + $._config.versions.grafana) + - (if std.length($._config.grafana.plugins) == 0 then {} else container.withEnv([env.new('GF_INSTALL_PLUGINS', std.join(',', $._config.grafana.plugins))])) + + container.withEnv($._config.grafana.env + plugins) + container.withVolumeMounts(volumeMounts) + container.withPorts(containerPort.newNamed(targetPort, portName)) + container.mixin.readinessProbe.httpGet.withPath('/api/health') + container.mixin.readinessProbe.httpGet.withPort(portName) + container.mixin.resources.withRequests($._config.grafana.container.requests) + - container.mixin.resources.withLimits($._config.grafana.container.limits); + container.mixin.resources.withLimits($._config.grafana.container.limits), + ] + $._config.grafana.containers; deployment.new('grafana', 1, c, podLabels) + deployment.mixin.metadata.withNamespace($._config.namespace) + diff --git a/monitoring/vendor/grafana/jsonnetfile.json b/monitoring/vendor/grafana/jsonnetfile.json index 52c8ba3..6b6ffb5 100644 --- a/monitoring/vendor/grafana/jsonnetfile.json +++ b/monitoring/vendor/grafana/jsonnetfile.json @@ -1,24 +1,25 @@ { - "dependencies": [ - { - "name": "grafonnet", - "source": { - "git": { - "remote": "https://github.com/grafana/grafonnet-lib", - "subdir": "grafonnet" - } - }, - "version": "master" - }, - { - "name": "ksonnet", - "source": { - "git": { - "remote": "https://github.com/ksonnet/ksonnet-lib", - "subdir": "" - } - }, - "version": "master" + "dependencies": [ + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet-lib", + "subdir": "grafonnet" } - ] + }, + "version": "master", + "name": "grafonnet" + }, + { + "source": { + "git": { + "remote": "https://github.com/ksonnet/ksonnet-lib", + "subdir": "" + } + }, + "version": "master", + "name": "ksonnet" + } + ], + "legacyImports": true } diff --git a/monitoring/vendor/grafonnet/graph_panel.libsonnet b/monitoring/vendor/grafonnet/graph_panel.libsonnet index 3b92f5a..892ff4b 100644 --- a/monitoring/vendor/grafonnet/graph_panel.libsonnet +++ b/monitoring/vendor/grafonnet/graph_panel.libsonnet @@ -225,6 +225,7 @@ message='', noDataState='no_data', notifications=[], + alertRuleTags={}, ):: self { local it = self, _conditions:: [], @@ -238,6 +239,7 @@ noDataState: noDataState, notifications: notifications, message: message, + alertRuleTags: alertRuleTags, }, addCondition(condition):: self { _conditions+: [condition], diff --git a/monitoring/vendor/grafonnet/heatmap_panel.libsonnet b/monitoring/vendor/grafonnet/heatmap_panel.libsonnet index bf38cda..a18632f 100644 --- a/monitoring/vendor/grafonnet/heatmap_panel.libsonnet +++ b/monitoring/vendor/grafonnet/heatmap_panel.libsonnet @@ -56,6 +56,7 @@ highlightCards=true, legend_show=false, minSpan=null, + span=null, repeat=null, repeatDirection=null, tooltipDecimals=null, @@ -100,6 +101,7 @@ show: legend_show, }, [if minSpan != null then 'minSpan']: minSpan, + [if span != null then 'span']: span, [if repeat != null then 'repeat']: repeat, [if repeatDirection != null then 'repeatDirection']: repeatDirection, tooltip: { diff --git a/monitoring/vendor/kube-prometheus/alertmanager/alertmanager.libsonnet b/monitoring/vendor/kube-prometheus/alertmanager/alertmanager.libsonnet index bdbe57a..0fb1826 100644 --- a/monitoring/vendor/kube-prometheus/alertmanager/alertmanager.libsonnet +++ b/monitoring/vendor/kube-prometheus/alertmanager/alertmanager.libsonnet @@ -25,7 +25,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet'; target_match_re: { severity: 'warning|info', }, - equal: ['alertname'], + equal: ['namespace', 'alertname'], }, { source_match: { severity: 'warning', @@ -33,7 +33,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet'; target_match_re: { severity: 'info', }, - equal: ['alertname'], + equal: ['namespace', 'alertname'], }], route: { group_by: ['namespace'], @@ -141,7 +141,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet'; spec: { replicas: $._config.alertmanager.replicas, version: $._config.versions.alertmanager, - baseImage: $._config.imageRepos.alertmanager, + image: $._config.imageRepos.alertmanager + ':' + $._config.versions.alertmanager, nodeSelector: { 'kubernetes.io/os': 'linux' }, serviceAccountName: 'alertmanager-' + $._config.alertmanager.name, securityContext: { diff --git a/monitoring/vendor/kube-prometheus/kube-prometheus-thanos-sidecar.libsonnet b/monitoring/vendor/kube-prometheus/kube-prometheus-thanos-sidecar.libsonnet index 07e9948..5d675c7 100644 --- a/monitoring/vendor/kube-prometheus/kube-prometheus-thanos-sidecar.libsonnet +++ b/monitoring/vendor/kube-prometheus/kube-prometheus-thanos-sidecar.libsonnet @@ -30,7 +30,7 @@ local servicePort = k.core.v1.service.mixin.spec.portsType; spec+: { thanos+: { version: $._config.versions.thanos, - baseImage: $._config.imageRepos.thanos, + image: $._config.imageRepos.thanos + ':' + $._config.versions.thanos, objectStorageConfig: $._config.thanos.objectStorageConfig, }, }, diff --git a/monitoring/vendor/kube-prometheus/prometheus/prometheus.libsonnet b/monitoring/vendor/kube-prometheus/prometheus/prometheus.libsonnet index 2ea5f80..a93bcb4 100644 --- a/monitoring/vendor/kube-prometheus/prometheus/prometheus.libsonnet +++ b/monitoring/vendor/kube-prometheus/prometheus/prometheus.libsonnet @@ -178,7 +178,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet'; spec: { replicas: p.replicas, version: $._config.versions.prometheus, - baseImage: $._config.imageRepos.prometheus, + image: $._config.imageRepos.prometheus + ':' + $._config.versions.prometheus, serviceAccountName: 'prometheus-' + p.name, serviceMonitorSelector: {}, podMonitorSelector: {}, diff --git a/monitoring/vendor/kubernetes-mixin/alerts/apps_alerts.libsonnet b/monitoring/vendor/kubernetes-mixin/alerts/apps_alerts.libsonnet index 0a567dd..79c7e3f 100644 --- a/monitoring/vendor/kubernetes-mixin/alerts/apps_alerts.libsonnet +++ b/monitoring/vendor/kubernetes-mixin/alerts/apps_alerts.libsonnet @@ -53,9 +53,15 @@ }, { expr: ||| - kube_deployment_spec_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s} - != - kube_deployment_status_replicas_available{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s} + ( + kube_deployment_spec_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s} + != + kube_deployment_status_replicas_available{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s} + ) and ( + changes(kube_deployment_status_replicas_updated{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}[5m]) + == + 0 + ) ||| % $._config, labels: { severity: 'critical', @@ -68,9 +74,15 @@ }, { expr: ||| - kube_statefulset_status_replicas_ready{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s} - != - kube_statefulset_status_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s} + ( + kube_statefulset_status_replicas_ready{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s} + != + kube_statefulset_status_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s} + ) and ( + changes(kube_statefulset_status_replicas_updated{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}[5m]) + == + 0 + ) ||| % $._config, labels: { severity: 'critical', diff --git a/monitoring/vendor/kubernetes-mixin/dashboards/dashboards.libsonnet b/monitoring/vendor/kubernetes-mixin/dashboards/dashboards.libsonnet index c00111c..51bf17e 100644 --- a/monitoring/vendor/kubernetes-mixin/dashboards/dashboards.libsonnet +++ b/monitoring/vendor/kubernetes-mixin/dashboards/dashboards.libsonnet @@ -1,8 +1,6 @@ (import 'network.libsonnet') + (import 'persistentvolumesusage.libsonnet') + -(import 'pods.libsonnet') + (import 'resources.libsonnet') + -(import 'statefulset.libsonnet') + (import 'apiserver.libsonnet') + (import 'controller-manager.libsonnet') + (import 'scheduler.libsonnet') + diff --git a/monitoring/vendor/kubernetes-mixin/dashboards/pods.libsonnet b/monitoring/vendor/kubernetes-mixin/dashboards/pods.libsonnet deleted file mode 100644 index f3ea9fa..0000000 --- a/monitoring/vendor/kubernetes-mixin/dashboards/pods.libsonnet +++ /dev/null @@ -1,195 +0,0 @@ -local grafana = import 'grafonnet/grafana.libsonnet'; -local annotation = grafana.annotation; -local dashboard = grafana.dashboard; -local graphPanel = grafana.graphPanel; -local prometheus = grafana.prometheus; -local promgrafonnet = import '../lib/promgrafonnet/promgrafonnet.libsonnet'; -local row = grafana.row; -local singlestat = grafana.singlestat; -local template = grafana.template; -local numbersinglestat = promgrafonnet.numbersinglestat; - -{ - grafanaDashboards+:: { - 'pods.json': - local memoryRow = row.new() - .addPanel( - graphPanel.new( - 'Memory Usage', - datasource='$datasource', - min=0, - span=12, - format='bytes', - legend_rightSide=true, - legend_alignAsTable=true, - legend_current=true, - legend_avg=true, - ) - .addTarget(prometheus.target( - 'sum by(container) (container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container=~"$container", container!="POD"})' % $._config, - legendFormat='Current: {{ container }}', - )) - .addTarget(prometheus.target( - 'sum by(container) (kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory", pod="$pod", container=~"$container"})' % $._config, - legendFormat='Requested: {{ container }}', - )) - .addTarget(prometheus.target( - 'sum by(container) (kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory", pod="$pod", container=~"$container"})' % $._config, - legendFormat='Limit: {{ container }}', - )) - .addTarget(prometheus.target( - 'sum by(container) (container_memory_cache{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod", container=~"$container", container!="POD"})' % $._config, - legendFormat='Cache: {{ container }}', - )) - ); - - local cpuRow = row.new() - .addPanel( - graphPanel.new( - 'CPU Usage', - datasource='$datasource', - min=0, - span=12, - legend_rightSide=true, - legend_alignAsTable=true, - legend_current=true, - legend_avg=true, - ) - .addTarget(prometheus.target( - 'sum by (container) (irate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", image!="", pod="$pod", container=~"$container", container!="POD"}[4m]))' % $._config, - legendFormat='Current: {{ container }}', - )) - .addTarget(prometheus.target( - 'sum by(container) (kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu", pod="$pod", container=~"$container"})' % $._config, - legendFormat='Requested: {{ container }}', - )) - .addTarget(prometheus.target( - 'sum by(container) (kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu", pod="$pod", container=~"$container"})' % $._config, - legendFormat='Limit: {{ container }}', - )) - ); - - local networkRow = row.new() - .addPanel( - graphPanel.new( - 'Network I/O', - datasource='$datasource', - format='bytes', - min=0, - span=12, - legend_rightSide=true, - legend_alignAsTable=true, - legend_current=true, - legend_avg=true, - ) - .addTarget(prometheus.target( - 'sort_desc(sum by (pod) (irate(container_network_receive_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[4m])))' % $._config, - legendFormat='RX: {{ pod }}', - )) - .addTarget(prometheus.target( - 'sort_desc(sum by (pod) (irate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[4m])))' % $._config, - legendFormat='TX: {{ pod }}', - )) - ); - - local restartsRow = row.new() - .addPanel( - graphPanel.new( - 'Total Restarts Per Container', - datasource='$datasource', - format='short', - min=0, - span=12, - legend_rightSide=true, - legend_alignAsTable=true, - legend_current=true, - legend_avg=true, - ) - .addTarget(prometheus.target( - 'max by (container) (kube_pod_container_status_restarts_total{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container=~"$container"})' % $._config, - legendFormat='Restarts: {{ container }}', - )) - ); - - local restartAnnotation = annotation.datasource( - 'Restarts', - '$datasource', - expr='time() == BOOL timestamp(rate(kube_pod_container_status_restarts_total{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[2m]) > 0)' % $._config, - enable=true, - hide=false, - iconColor='rgba(215, 44, 44, 1)', - tags=['restart'], - type='rows', - builtIn=1, - ); - - dashboard.new( - '%(dashboardNamePrefix)sPods' % $._config.grafanaK8s, - time_from='now-1h', - uid=($._config.grafanaDashboardIDs['pods.json']), - tags=($._config.grafanaK8s.dashboardTags), - ).addTemplate( - { - current: { - text: 'default', - value: 'default', - }, - hide: 0, - label: null, - name: 'datasource', - options: [], - query: 'prometheus', - refresh: 1, - regex: '', - type: 'datasource', - }, - ) - .addTemplate( - template.new( - 'cluster', - '$datasource', - 'label_values(kube_pod_info, %(clusterLabel)s)' % $._config, - label='cluster', - refresh='time', - hide=if $._config.showMultiCluster then '' else 'variable', - sort=1, - ) - ) - .addTemplate( - template.new( - 'namespace', - '$datasource', - 'label_values(kube_pod_info{%(clusterLabel)s="$cluster"}, namespace)' % $._config, - label='Namespace', - refresh='time', - sort=1, - ) - ) - .addTemplate( - template.new( - 'pod', - '$datasource', - 'label_values(kube_pod_info{%(clusterLabel)s="$cluster", namespace=~"$namespace"}, pod)' % $._config, - label='Pod', - refresh='time', - sort=1, - ) - ) - .addTemplate( - template.new( - 'container', - '$datasource', - 'label_values(kube_pod_container_info{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}, container)' % $._config, - label='Container', - refresh='time', - includeAll=true, - sort=1, - ) - ) - .addAnnotation(restartAnnotation) - .addRow(memoryRow) - .addRow(cpuRow) - .addRow(networkRow) - .addRow(restartsRow), - }, -} diff --git a/monitoring/vendor/kubernetes-mixin/dashboards/resources.libsonnet b/monitoring/vendor/kubernetes-mixin/dashboards/resources.libsonnet index 3501ccd..10f9f7e 100644 --- a/monitoring/vendor/kubernetes-mixin/dashboards/resources.libsonnet +++ b/monitoring/vendor/kubernetes-mixin/dashboards/resources.libsonnet @@ -1,1352 +1,7 @@ -local g = import 'grafana-builder/grafana.libsonnet'; -local grafana = import 'grafonnet/grafana.libsonnet'; -local template = grafana.template; - -{ - grafanaDashboards+:: { - - local intervalTemplate = - template.new( - name='interval', - datasource='$datasource', - query='4h', - current='5m', - hide=2, - refresh=2, - includeAll=false, - sort=1 - ) + { - auto: false, - auto_count: 30, - auto_min: '10s', - skipUrlSync: false, - type: 'interval', - options: [ - { - selected: true, - text: '4h', - value: '4h', - }, - ], - }, - - local typeTemplate = - template.new( - name='type', - datasource='$datasource', - query='label_values(mixin_pod_workload{namespace=~"$namespace", workload=~".+"}, workload_type)', - current='deployment', - hide='', - refresh=1, - includeAll=false, - sort=0 - ) + { - auto: false, - auto_count: 30, - auto_min: '10s', - definition: 'label_values(mixin_pod_workload{namespace=~"$namespace", workload=~".+"}, workload_type)', - skipUrlSync: false, - }, - - 'k8s-resources-cluster.json': - local tableStyles = { - namespace: { - alias: 'Namespace', - link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') }, - linkTooltip: 'Drill down to pods', - }, - 'Value #A': { - alias: 'Pods', - linkTooltip: 'Drill down to pods', - link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') }, - decimals: 0, - }, - 'Value #B': { - alias: 'Workloads', - linkTooltip: 'Drill down to workloads', - link: '%(prefix)s/d/%(uid)s/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-workloads-namespace.json') }, - decimals: 0, - }, - }; - - local podWorkloadColumns = [ - 'count(mixin_pod_workload{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'count(avg(mixin_pod_workload{%(clusterLabel)s="$cluster"}) by (workload, namespace)) by (namespace)' % $._config, - ]; - - local networkColumns = [ - 'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, - 'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, - 'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, - 'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, - 'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, - 'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, - ]; - - local networkTableStyles = { - namespace: { - alias: 'Namespace', - link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') }, - linkTooltip: 'Drill down to pods', - }, - 'Value #A': { - alias: 'Current Receive Bandwidth', - unit: 'Bps', - }, - 'Value #B': { - alias: 'Current Transmit Bandwidth', - unit: 'Bps', - }, - 'Value #C': { - alias: 'Rate of Received Packets', - unit: 'pps', - }, - 'Value #D': { - alias: 'Rate of Transmitted Packets', - unit: 'pps', - }, - 'Value #E': { - alias: 'Rate of Received Packets Dropped', - unit: 'pps', - }, - 'Value #F': { - alias: 'Rate of Transmitted Packets Dropped', - unit: 'pps', - }, - }; - - g.dashboard( - '%(dashboardNamePrefix)sCompute Resources / Cluster' % $._config.grafanaK8s, - uid=($._config.grafanaDashboardIDs['k8s-resources-cluster.json']), - ).addTemplate('cluster', 'node_cpu_seconds_total', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2) - .addRow( - (g.row('Headlines') + - { - height: '100px', - showTitle: false, - }) - .addPanel( - g.panel('CPU Utilisation') + - g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle", %(clusterLabel)s="$cluster"}[1m]))' % $._config) - ) - .addPanel( - g.panel('CPU Requests Commitment') + - g.statPanel('sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{%(clusterLabel)s="$cluster"})' % $._config) - ) - .addPanel( - g.panel('CPU Limits Commitment') + - g.statPanel('sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{%(clusterLabel)s="$cluster"})' % $._config) - ) - .addPanel( - g.panel('Memory Utilisation') + - g.statPanel('1 - sum(:node_memory_MemAvailable_bytes:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config) - ) - .addPanel( - g.panel('Memory Requests Commitment') + - g.statPanel('sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config) - ) - .addPanel( - g.panel('Memory Limits Commitment') + - g.statPanel('sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config) - ) - ) - .addRow( - g.row('CPU') - .addPanel( - g.panel('CPU Usage') + - g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, '{{namespace}}') + - g.stack - ) - ) - .addRow( - g.row('CPU Quota') - .addPanel( - g.panel('CPU Quota') + - g.tablePanel(podWorkloadColumns + [ - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - ], tableStyles { - 'Value #C': { alias: 'CPU Usage' }, - 'Value #D': { alias: 'CPU Requests' }, - 'Value #E': { alias: 'CPU Requests %', unit: 'percentunit' }, - 'Value #F': { alias: 'CPU Limits' }, - 'Value #G': { alias: 'CPU Limits %', unit: 'percentunit' }, - }) - ) - ) - .addRow( - g.row('Memory') - .addPanel( - g.panel('Memory Usage (w/o cache)') + - // Not using container_memory_usage_bytes here because that includes page cache - g.queryPanel('sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace)' % $._config, '{{namespace}}') + - g.stack + - { yaxes: g.yaxes('bytes') }, - ) - ) - .addRow( - g.row('Memory Requests') - .addPanel( - g.panel('Requests by Namespace') + - g.tablePanel(podWorkloadColumns + [ - // Not using container_memory_usage_bytes here because that includes page cache - 'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace)' % $._config, - 'sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - ], tableStyles { - 'Value #C': { alias: 'Memory Usage', unit: 'bytes' }, - 'Value #D': { alias: 'Memory Requests', unit: 'bytes' }, - 'Value #E': { alias: 'Memory Requests %', unit: 'percentunit' }, - 'Value #F': { alias: 'Memory Limits', unit: 'bytes' }, - 'Value #G': { alias: 'Memory Limits %', unit: 'percentunit' }, - }) - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Current Network Usage') + - g.tablePanel( - networkColumns, - networkTableStyles - ), - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Receive Bandwidth') + - g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Transmit Bandwidth') + - g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Average Container Bandwidth by Namespace: Received') + - g.queryPanel('avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Average Container Bandwidth by Namespace: Transmitted') + - g.queryPanel('avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Rate of Received Packets') + - g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Rate of Transmitted Packets') + - g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Rate of Received Packets Dropped') + - g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Rate of Transmitted Packets Dropped') + - g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } }, - - 'k8s-resources-namespace.json': - local tableStyles = { - pod: { - alias: 'Pod', - link: '%(prefix)s/d/%(uid)s/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-pod.json') }, - }, - }; - - local networkColumns = [ - 'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, - 'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, - 'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, - 'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, - 'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, - 'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, - ]; - - local networkTableStyles = { - pod: { - alias: 'Pod', - link: '%(prefix)s/d/%(uid)s/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-pod.json') }, - linkTooltip: 'Drill down to pods', - }, - 'Value #A': { - alias: 'Current Receive Bandwidth', - unit: 'Bps', - }, - 'Value #B': { - alias: 'Current Transmit Bandwidth', - unit: 'Bps', - }, - 'Value #C': { - alias: 'Rate of Received Packets', - unit: 'pps', - }, - 'Value #D': { - alias: 'Rate of Transmitted Packets', - unit: 'pps', - }, - 'Value #E': { - alias: 'Rate of Received Packets Dropped', - unit: 'pps', - }, - 'Value #F': { - alias: 'Rate of Transmitted Packets Dropped', - unit: 'pps', - }, - }; - - g.dashboard( - '%(dashboardNamePrefix)sCompute Resources / Namespace (Pods)' % $._config.grafanaK8s, - uid=($._config.grafanaDashboardIDs['k8s-resources-namespace.json']), - ).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2) - .addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace') - .addRow( - g.row('CPU Usage') - .addPanel( - g.panel('CPU Usage') + - g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, '{{pod}}') + - g.stack, - ) - ) - .addRow( - g.row('CPU Quota') - .addPanel( - g.panel('CPU Quota') + - g.tablePanel([ - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - ], tableStyles { - 'Value #A': { alias: 'CPU Usage' }, - 'Value #B': { alias: 'CPU Requests' }, - 'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'CPU Limits' }, - 'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' }, - }) - ) - ) - .addRow( - g.row('Memory Usage') - .addPanel( - g.panel('Memory Usage (w/o cache)') + - // Like above, without page cache - g.queryPanel('sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container!=""}) by (pod)' % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('bytes') }, - ) - ) - .addRow( - g.row('Memory Quota') - .addPanel( - g.panel('Memory Quota') + - g.tablePanel([ - 'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config, - 'sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace="$namespace"}) by (pod)' % $._config, - 'sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace="$namespace"}) by (pod)' % $._config, - 'sum(container_memory_rss{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config, - 'sum(container_memory_cache{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config, - 'sum(container_memory_swap{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config, - ], tableStyles { - 'Value #A': { alias: 'Memory Usage', unit: 'bytes' }, - 'Value #B': { alias: 'Memory Requests', unit: 'bytes' }, - 'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'Memory Limits', unit: 'bytes' }, - 'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' }, - 'Value #F': { alias: 'Memory Usage (RSS)', unit: 'bytes' }, - 'Value #G': { alias: 'Memory Usage (Cache)', unit: 'bytes' }, - 'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' }, - }) - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Current Network Usage') + - g.tablePanel( - networkColumns, - networkTableStyles - ), - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Receive Bandwidth') + - g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Transmit Bandwidth') + - g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Rate of Received Packets') + - g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Rate of Transmitted Packets') + - g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Rate of Received Packets Dropped') + - g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Rate of Transmitted Packets Dropped') + - g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } }, - - 'k8s-resources-node.json': - local tableStyles = { - pod: { - alias: 'Pod', - }, - }; - - g.dashboard( - '%(dashboardNamePrefix)sCompute Resources / Node (Pods)' % $._config.grafanaK8s, - uid=($._config.grafanaDashboardIDs['k8s-resources-node.json']), - ).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2) - .addTemplate('node', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'node') - .addRow( - g.row('CPU Usage') - .addPanel( - g.panel('CPU Usage') + - g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config, '{{pod}}') + - g.stack, - ) - ) - .addRow( - g.row('CPU Quota') - .addPanel( - g.panel('CPU Quota') + - g.tablePanel([ - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config, - 'sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node="$node"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config, - 'sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node="$node"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config, - ], tableStyles { - 'Value #A': { alias: 'CPU Usage' }, - 'Value #B': { alias: 'CPU Requests' }, - 'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'CPU Limits' }, - 'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' }, - }) - ) - ) - .addRow( - g.row('Memory Usage') - .addPanel( - g.panel('Memory Usage (w/o cache)') + - // Like above, without page cache - g.queryPanel('sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node="$node", container!=""}) by (pod)' % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('bytes') }, - ) - ) - .addRow( - g.row('Memory Quota') - .addPanel( - g.panel('Memory Quota') + - g.tablePanel([ - 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node="$node",container!=""}) by (pod)' % $._config, - 'sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node="$node",container!=""}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node="$node"}) by (pod)' % $._config, - 'sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node="$node",container!=""}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node="$node"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_memory_rss{%(clusterLabel)s="$cluster", node="$node",container!=""}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_memory_cache{%(clusterLabel)s="$cluster", node="$node",container!=""}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_memory_swap{%(clusterLabel)s="$cluster", node="$node",container!=""}) by (pod)' % $._config, - ], tableStyles { - 'Value #A': { alias: 'Memory Usage', unit: 'bytes' }, - 'Value #B': { alias: 'Memory Requests', unit: 'bytes' }, - 'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'Memory Limits', unit: 'bytes' }, - 'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' }, - 'Value #F': { alias: 'Memory Usage (RSS)', unit: 'bytes' }, - 'Value #G': { alias: 'Memory Usage (Cache)', unit: 'bytes' }, - 'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' }, - }) - ) - ) + { tags: $._config.grafanaK8s.dashboardTags }, - - 'k8s-resources-workloads-namespace.json': - local tableStyles = { - workload: { - alias: 'Workload', - link: '%(prefix)s/d/%(uid)s/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-workload.json') }, - }, - workload_type: { - alias: 'Workload Type', - }, - }; - - local networkColumns = [ - ||| - (sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload)) - ||| % $._config, - ||| - (sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload)) - ||| % $._config, - ||| - (sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload)) - ||| % $._config, - ||| - (sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload)) - ||| % $._config, - ||| - (sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload)) - ||| % $._config, - ||| - (sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload)) - ||| % $._config, - ]; - - local networkTableStyles = { - workload: { - alias: 'Workload', - link: '%(prefix)s/d/%(uid)s/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$type' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-workload.json') }, - linkTooltip: 'Drill down to pods', - }, - workload_type: { - alias: 'Workload Type', - }, - 'Value #A': { - alias: 'Current Receive Bandwidth', - unit: 'Bps', - }, - 'Value #B': { - alias: 'Current Transmit Bandwidth', - unit: 'Bps', - }, - 'Value #C': { - alias: 'Rate of Received Packets', - unit: 'pps', - }, - 'Value #D': { - alias: 'Rate of Transmitted Packets', - unit: 'pps', - }, - 'Value #E': { - alias: 'Rate of Received Packets Dropped', - unit: 'pps', - }, - 'Value #F': { - alias: 'Rate of Transmitted Packets Dropped', - unit: 'pps', - }, - }; - - local cpuUsageQuery = ||| - sum( - node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"} - * on(namespace,pod) - group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type="$type"} - ) by (workload, workload_type) - ||| % $._config; - - local cpuRequestsQuery = ||| - sum( - kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"} - * on(namespace,pod) - group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type="$type"} - ) by (workload, workload_type) - ||| % $._config; - - local podCountQuery = 'count(mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type="$type"}) by (workload, workload_type)' % $._config; - local cpuLimitsQuery = std.strReplace(cpuRequestsQuery, 'requests', 'limits'); - - local memUsageQuery = ||| - sum( - container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container!=""} - * on(namespace,pod) - group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type="$type"} - ) by (workload, workload_type) - ||| % $._config; - local memRequestsQuery = std.strReplace(cpuRequestsQuery, 'cpu_cores', 'memory_bytes'); - local memLimitsQuery = std.strReplace(cpuLimitsQuery, 'cpu_cores', 'memory_bytes'); - - g.dashboard( - '%(dashboardNamePrefix)sCompute Resources / Namespace (Workloads)' % $._config.grafanaK8s, - uid=($._config.grafanaDashboardIDs['k8s-resources-workloads-namespace.json']), - ).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2) - .addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace') - .addRow( - g.row('CPU Usage') - .addPanel( - g.panel('CPU Usage') + - g.queryPanel(cpuUsageQuery, '{{workload}} - {{workload_type}}') + - g.stack, - ) - ) - .addRow( - g.row('CPU Quota') - .addPanel( - g.panel('CPU Quota') + - g.tablePanel([ - podCountQuery, - cpuUsageQuery, - cpuRequestsQuery, - cpuUsageQuery + '/' + cpuRequestsQuery, - cpuLimitsQuery, - cpuUsageQuery + '/' + cpuLimitsQuery, - ], tableStyles { - 'Value #A': { alias: 'Running Pods', decimals: 0 }, - 'Value #B': { alias: 'CPU Usage' }, - 'Value #C': { alias: 'CPU Requests' }, - 'Value #D': { alias: 'CPU Requests %', unit: 'percentunit' }, - 'Value #E': { alias: 'CPU Limits' }, - 'Value #F': { alias: 'CPU Limits %', unit: 'percentunit' }, - }) - ) - ) - .addRow( - g.row('Memory Usage') - .addPanel( - g.panel('Memory Usage') + - g.queryPanel(memUsageQuery, '{{workload}} - {{workload_type}}') + - g.stack + - { yaxes: g.yaxes('bytes') }, - ) - ) - .addRow( - g.row('Memory Quota') - .addPanel( - g.panel('Memory Quota') + - g.tablePanel([ - podCountQuery, - memUsageQuery, - memRequestsQuery, - memUsageQuery + '/' + memRequestsQuery, - memLimitsQuery, - memUsageQuery + '/' + memLimitsQuery, - ], tableStyles { - 'Value #A': { alias: 'Running Pods', decimals: 0 }, - 'Value #B': { alias: 'Memory Usage', unit: 'bytes' }, - 'Value #C': { alias: 'Memory Requests', unit: 'bytes' }, - 'Value #D': { alias: 'Memory Requests %', unit: 'percentunit' }, - 'Value #E': { alias: 'Memory Limits', unit: 'bytes' }, - 'Value #F': { alias: 'Memory Limits %', unit: 'percentunit' }, - }) - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Current Network Usage') + - g.tablePanel( - networkColumns, - networkTableStyles - ), - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Receive Bandwidth') + - g.queryPanel(||| - (sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) - ||| % $._config, '{{workload}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Transmit Bandwidth') + - g.queryPanel(||| - (sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) - ||| % $._config, '{{workload}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Average Container Bandwidth by Workload: Received') + - g.queryPanel(||| - (avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) - ||| % $._config, '{{workload}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Average Container Bandwidth by Workload: Transmitted') + - g.queryPanel(||| - (avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) - ||| % $._config, '{{workload}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Rate of Received Packets') + - g.queryPanel(||| - (sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) - ||| % $._config, '{{workload}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Rate of Transmitted Packets') + - g.queryPanel(||| - (sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) - ||| % $._config, '{{workload}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Rate of Received Packets Dropped') + - g.queryPanel(||| - (sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) - ||| % $._config, '{{workload}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Rate of Transmitted Packets Dropped') + - g.queryPanel(||| - (sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) - ||| % $._config, '{{workload}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate, typeTemplate] } }, - - 'k8s-resources-workload.json': - local tableStyles = { - pod: { - alias: 'Pod', - link: '%(prefix)s/d/%(uid)s/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-pod.json') }, - }, - }; - - local networkColumns = [ - ||| - (sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) - ||| % $._config, - ||| - (sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) - ||| % $._config, - ||| - (sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) - ||| % $._config, - ||| - (sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) - ||| % $._config, - ||| - (sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) - ||| % $._config, - ||| - (sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) - ||| % $._config, - ]; - - local networkTableStyles = { - pod: { - alias: 'Pod', - link: '%(prefix)s/d/%(uid)s/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-pod.json') }, - }, - 'Value #A': { - alias: 'Current Receive Bandwidth', - unit: 'Bps', - }, - 'Value #B': { - alias: 'Current Transmit Bandwidth', - unit: 'Bps', - }, - 'Value #C': { - alias: 'Rate of Received Packets', - unit: 'pps', - }, - 'Value #D': { - alias: 'Rate of Transmitted Packets', - unit: 'pps', - }, - 'Value #E': { - alias: 'Rate of Received Packets Dropped', - unit: 'pps', - }, - 'Value #F': { - alias: 'Rate of Transmitted Packets Dropped', - unit: 'pps', - }, - }; - - - local cpuUsageQuery = ||| - sum( - node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"} - * on(namespace,pod) - group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type="$type"} - ) by (pod) - ||| % $._config; - - local cpuRequestsQuery = ||| - sum( - kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"} - * on(namespace,pod) - group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type="$type"} - ) by (pod) - ||| % $._config; - - local cpuLimitsQuery = std.strReplace(cpuRequestsQuery, 'requests', 'limits'); - - local memUsageQuery = ||| - sum( - container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container!=""} - * on(namespace,pod) - group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type="$type"} - ) by (pod) - ||| % $._config; - local memRequestsQuery = std.strReplace(cpuRequestsQuery, 'cpu_cores', 'memory_bytes'); - local memLimitsQuery = std.strReplace(cpuLimitsQuery, 'cpu_cores', 'memory_bytes'); - - g.dashboard( - '%(dashboardNamePrefix)sCompute Resources / Workload' % $._config.grafanaK8s, - uid=($._config.grafanaDashboardIDs['k8s-resources-workload.json']), - ).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2) - .addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace') - .addTemplate('workload', 'mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace"}' % $._config, 'workload') - .addTemplate('type', 'mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload"}' % $._config, 'workload_type') - .addRow( - g.row('CPU Usage') - .addPanel( - g.panel('CPU Usage') + - g.queryPanel(cpuUsageQuery, '{{pod}}') + - g.stack, - ) - ) - .addRow( - g.row('CPU Quota') - .addPanel( - g.panel('CPU Quota') + - g.tablePanel([ - cpuUsageQuery, - cpuRequestsQuery, - cpuUsageQuery + '/' + cpuRequestsQuery, - cpuLimitsQuery, - cpuUsageQuery + '/' + cpuLimitsQuery, - ], tableStyles { - 'Value #A': { alias: 'CPU Usage' }, - 'Value #B': { alias: 'CPU Requests' }, - 'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'CPU Limits' }, - 'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' }, - }) - ) - ) - .addRow( - g.row('Memory Usage') - .addPanel( - g.panel('Memory Usage') + - g.queryPanel(memUsageQuery, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('bytes') }, - ) - ) - .addRow( - g.row('Memory Quota') - .addPanel( - g.panel('Memory Quota') + - g.tablePanel([ - memUsageQuery, - memRequestsQuery, - memUsageQuery + '/' + memRequestsQuery, - memLimitsQuery, - memUsageQuery + '/' + memLimitsQuery, - ], tableStyles { - 'Value #A': { alias: 'Memory Usage', unit: 'bytes' }, - 'Value #B': { alias: 'Memory Requests', unit: 'bytes' }, - 'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'Memory Limits', unit: 'bytes' }, - 'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' }, - }) - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Current Network Usage') + - g.tablePanel( - networkColumns, - networkTableStyles - ), - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Receive Bandwidth') + - g.queryPanel(||| - (sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) - ||| % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Transmit Bandwidth') + - g.queryPanel(||| - (sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) - ||| % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Average Container Bandwidth by Pod: Received') + - g.queryPanel(||| - (avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) - ||| % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Average Container Bandwidth by Pod: Transmitted') + - g.queryPanel(||| - (avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) - ||| % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Rate of Received Packets') + - g.queryPanel(||| - (sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) - ||| % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Rate of Transmitted Packets') + - g.queryPanel(||| - (sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) - ||| % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Rate of Received Packets Dropped') + - g.queryPanel(||| - (sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) - ||| % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Rate of Transmitted Packets Dropped') + - g.queryPanel(||| - (sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) - * on (namespace,pod) - group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) - ||| % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } }, - - 'k8s-resources-pod.json': - local tableStyles = { - container: { - alias: 'Container', - }, - }; - - g.dashboard( - '%(dashboardNamePrefix)sCompute Resources / Pod' % $._config.grafanaK8s, - uid=($._config.grafanaDashboardIDs['k8s-resources-pod.json']), - ).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2) - .addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace') - .addTemplate('pod', 'kube_pod_info{%(clusterLabel)s="$cluster", namespace="$namespace"}' % $._config, 'pod') - .addRow( - g.row('CPU Usage') - .addPanel( - g.panel('CPU Usage') + - g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace="$namespace", pod="$pod", container!="POD", %(clusterLabel)s="$cluster"}) by (container)' % $._config, '{{container}}') + - g.stack, - ) - ) - .addRow( - g.row('CPU Quota') - .addPanel( - g.panel('CPU Quota') + - g.tablePanel([ - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD"}) by (container)' % $._config, - 'sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - ], tableStyles { - 'Value #A': { alias: 'CPU Usage' }, - 'Value #B': { alias: 'CPU Requests' }, - 'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'CPU Limits' }, - 'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' }, - }) - ) - ) - .addRow( - g.row('Memory Usage') - .addPanel( - g.panel('Memory Usage') + - g.queryPanel([ - 'sum(container_memory_rss{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config, - 'sum(container_memory_cache{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config, - 'sum(container_memory_swap{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config, - ], [ - '{{container}} (RSS)', - '{{container}} (Cache)', - '{{container}} (Swap)', - ]) + - g.stack + - { yaxes: g.yaxes('bytes') }, - ) - ) - .addRow( - g.row('Memory Quota') - .addPanel( - g.panel('Memory Quota') + - g.tablePanel([ - 'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config, - 'sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config, - 'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(container_memory_rss{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container != "", container != "POD"}) by (container)' % $._config, - 'sum(container_memory_cache{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container != "", container != "POD"}) by (container)' % $._config, - 'sum(container_memory_swap{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container != "", container != "POD"}) by (container)' % $._config, - ], tableStyles { - 'Value #A': { alias: 'Memory Usage', unit: 'bytes' }, - 'Value #B': { alias: 'Memory Requests', unit: 'bytes' }, - 'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'Memory Limits', unit: 'bytes' }, - 'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' }, - 'Value #F': { alias: 'Memory Usage (RSS)', unit: 'bytes' }, - 'Value #G': { alias: 'Memory Usage (Cache)', unit: 'bytes' }, - 'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' }, - }) - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Receive Bandwidth') + - g.queryPanel('sum(irate(container_network_receive_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$interval])) by (pod)', '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Transmit Bandwidth') + - g.queryPanel('sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$interval])) by (pod)', '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Rate of Received Packets') + - g.queryPanel('sum(irate(container_network_receive_packets_total{namespace=~"$namespace", pod=~"$pod"}[$interval])) by (pod)', '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Rate of Transmitted Packets') + - g.queryPanel('sum(irate(container_network_transmit_packets_total{namespace=~"$namespace", pod=~"$pod"}[$interval])) by (pod)', '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Rate of Received Packets Dropped') + - g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$interval])) by (pod)', '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Rate of Transmitted Packets Dropped') + - g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$interval])) by (pod)', '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } }, - }, -} + { - grafanaDashboards+:: if $._config.showMultiCluster then { - 'k8s-resources-multicluster.json': - local tableStyles = { - [$._config.clusterLabel]: { - alias: 'Cluster', - link: '%(prefix)s/d/%(uid)s/k8s-resources-cluster?var-datasource=$datasource&var-cluster=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-cluster.json') }, - }, - }; - - g.dashboard( - '%(dashboardNamePrefix)sCompute Resources / Multi-Cluster' % $._config.grafanaK8s, - uid=($._config.grafanaDashboardIDs['k8s-resources-multicluster.json']), - ).addRow( - (g.row('Headlines') + - { - height: '100px', - showTitle: false, - }) - .addPanel( - g.panel('CPU Utilisation') + - g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle"}[1m]))' % $._config) - ) - .addPanel( - g.panel('CPU Requests Commitment') + - g.statPanel('sum(kube_pod_container_resource_requests_cpu_cores) / sum(kube_node_status_allocatable_cpu_cores)' % $._config) - ) - .addPanel( - g.panel('CPU Limits Commitment') + - g.statPanel('sum(kube_pod_container_resource_limits_cpu_cores) / sum(kube_node_status_allocatable_cpu_cores)' % $._config) - ) - .addPanel( - g.panel('Memory Utilisation') + - g.statPanel('1 - sum(:node_memory_MemAvailable_bytes:sum) / sum(kube_node_status_allocatable_memory_bytes)' % $._config) - ) - .addPanel( - g.panel('Memory Requests Commitment') + - g.statPanel('sum(kube_pod_container_resource_requests_memory_bytes) / sum(kube_node_status_allocatable_memory_bytes)' % $._config) - ) - .addPanel( - g.panel('Memory Limits Commitment') + - g.statPanel('sum(kube_pod_container_resource_limits_memory_bytes) / sum(kube_node_status_allocatable_memory_bytes)' % $._config) - ) - ) - .addRow( - g.row('CPU') - .addPanel( - g.panel('CPU Usage') + - g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config) - + { fill: 0, linewidth: 2 }, - ) - ) - .addRow( - g.row('CPU Quota') - .addPanel( - g.panel('CPU Quota') + - g.tablePanel([ - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config, - 'sum(kube_pod_container_resource_requests_cpu_cores) by (%(clusterLabel)s)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests_cpu_cores) by (%(clusterLabel)s)' % $._config, - 'sum(kube_pod_container_resource_limits_cpu_cores) by (%(clusterLabel)s)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits_cpu_cores) by (%(clusterLabel)s)' % $._config, - ], tableStyles { - 'Value #A': { alias: 'CPU Usage' }, - 'Value #B': { alias: 'CPU Requests' }, - 'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'CPU Limits' }, - 'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' }, - }) - ) - ) - .addRow( - g.row('Memory') - .addPanel( - g.panel('Memory Usage (w/o cache)') + - // Not using container_memory_usage_bytes here because that includes page cache - g.queryPanel('sum(container_memory_rss{container!=""}) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config) + - { fill: 0, linewidth: 2, yaxes: g.yaxes('bytes') }, - ) - ) - .addRow( - g.row('Memory Requests') - .addPanel( - g.panel('Requests by Namespace') + - g.tablePanel([ - // Not using container_memory_usage_bytes here because that includes page cache - 'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s)' % $._config, - 'sum(kube_pod_container_resource_requests_memory_bytes) by (%(clusterLabel)s)' % $._config, - 'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests_memory_bytes) by (%(clusterLabel)s)' % $._config, - 'sum(kube_pod_container_resource_limits_memory_bytes) by (%(clusterLabel)s)' % $._config, - 'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits_memory_bytes) by (%(clusterLabel)s)' % $._config, - ], tableStyles { - 'Value #A': { alias: 'Memory Usage', unit: 'bytes' }, - 'Value #B': { alias: 'Memory Requests', unit: 'bytes' }, - 'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'Memory Limits', unit: 'bytes' }, - 'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' }, - }) - ) - ) + { tags: $._config.grafanaK8s.dashboardTags }, - } else {}, -} +(import 'resources/cluster.libsonnet') + +(import 'resources/multi-cluster.libsonnet') + +(import 'resources/namespace.libsonnet') + +(import 'resources/node.libsonnet') + +(import 'resources/pod.libsonnet') + +(import 'resources/workload-namespace.libsonnet') + +(import 'resources/workload.libsonnet') \ No newline at end of file diff --git a/monitoring/vendor/kubernetes-mixin/dashboards/resources/cluster.libsonnet b/monitoring/vendor/kubernetes-mixin/dashboards/resources/cluster.libsonnet new file mode 100644 index 0000000..18143d2 --- /dev/null +++ b/monitoring/vendor/kubernetes-mixin/dashboards/resources/cluster.libsonnet @@ -0,0 +1,274 @@ +local g = import 'grafana-builder/grafana.libsonnet'; +local grafana = import 'grafonnet/grafana.libsonnet'; +local template = grafana.template; + +{ + grafanaDashboards+:: { + local intervalTemplate = + template.new( + name='interval', + datasource='$datasource', + query='4h', + current='5m', + hide=2, + refresh=2, + includeAll=false, + sort=1 + ) + { + auto: false, + auto_count: 30, + auto_min: '10s', + skipUrlSync: false, + type: 'interval', + options: [ + { + selected: true, + text: '4h', + value: '4h', + }, + ], + }, + + 'k8s-resources-cluster.json': + local tableStyles = { + namespace: { + alias: 'Namespace', + link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') }, + linkTooltip: 'Drill down to pods', + }, + 'Value #A': { + alias: 'Pods', + linkTooltip: 'Drill down to pods', + link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') }, + decimals: 0, + }, + 'Value #B': { + alias: 'Workloads', + linkTooltip: 'Drill down to workloads', + link: '%(prefix)s/d/%(uid)s/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-workloads-namespace.json') }, + decimals: 0, + }, + }; + + local podWorkloadColumns = [ + 'count(mixin_pod_workload{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'count(avg(mixin_pod_workload{%(clusterLabel)s="$cluster"}) by (workload, namespace)) by (namespace)' % $._config, + ]; + + local networkColumns = [ + 'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, + 'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, + 'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, + 'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, + 'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, + 'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, + ]; + + local networkTableStyles = { + namespace: { + alias: 'Namespace', + link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') }, + linkTooltip: 'Drill down to pods', + }, + 'Value #A': { + alias: 'Current Receive Bandwidth', + unit: 'Bps', + }, + 'Value #B': { + alias: 'Current Transmit Bandwidth', + unit: 'Bps', + }, + 'Value #C': { + alias: 'Rate of Received Packets', + unit: 'pps', + }, + 'Value #D': { + alias: 'Rate of Transmitted Packets', + unit: 'pps', + }, + 'Value #E': { + alias: 'Rate of Received Packets Dropped', + unit: 'pps', + }, + 'Value #F': { + alias: 'Rate of Transmitted Packets Dropped', + unit: 'pps', + }, + }; + + g.dashboard( + '%(dashboardNamePrefix)sCompute Resources / Cluster' % $._config.grafanaK8s, + uid=($._config.grafanaDashboardIDs['k8s-resources-cluster.json']), + ).addTemplate('cluster', 'node_cpu_seconds_total', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2) + .addRow( + (g.row('Headlines') + + { + height: '100px', + showTitle: false, + }) + .addPanel( + g.panel('CPU Utilisation') + + g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle", %(clusterLabel)s="$cluster"}[1m]))' % $._config) + ) + .addPanel( + g.panel('CPU Requests Commitment') + + g.statPanel('sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{%(clusterLabel)s="$cluster"})' % $._config) + ) + .addPanel( + g.panel('CPU Limits Commitment') + + g.statPanel('sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{%(clusterLabel)s="$cluster"})' % $._config) + ) + .addPanel( + g.panel('Memory Utilisation') + + g.statPanel('1 - sum(:node_memory_MemAvailable_bytes:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config) + ) + .addPanel( + g.panel('Memory Requests Commitment') + + g.statPanel('sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config) + ) + .addPanel( + g.panel('Memory Limits Commitment') + + g.statPanel('sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config) + ) + ) + .addRow( + g.row('CPU') + .addPanel( + g.panel('CPU Usage') + + g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, '{{namespace}}') + + g.stack + ) + ) + .addRow( + g.row('CPU Quota') + .addPanel( + g.panel('CPU Quota') + + g.tablePanel(podWorkloadColumns + [ + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + ], tableStyles { + 'Value #C': { alias: 'CPU Usage' }, + 'Value #D': { alias: 'CPU Requests' }, + 'Value #E': { alias: 'CPU Requests %', unit: 'percentunit' }, + 'Value #F': { alias: 'CPU Limits' }, + 'Value #G': { alias: 'CPU Limits %', unit: 'percentunit' }, + }) + ) + ) + .addRow( + g.row('Memory') + .addPanel( + g.panel('Memory Usage (w/o cache)') + + // Not using container_memory_usage_bytes here because that includes page cache + g.queryPanel('sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace)' % $._config, '{{namespace}}') + + g.stack + + { yaxes: g.yaxes('bytes') }, + ) + ) + .addRow( + g.row('Memory Requests') + .addPanel( + g.panel('Requests by Namespace') + + g.tablePanel(podWorkloadColumns + [ + // Not using container_memory_usage_bytes here because that includes page cache + 'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace)' % $._config, + 'sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + ], tableStyles { + 'Value #C': { alias: 'Memory Usage', unit: 'bytes' }, + 'Value #D': { alias: 'Memory Requests', unit: 'bytes' }, + 'Value #E': { alias: 'Memory Requests %', unit: 'percentunit' }, + 'Value #F': { alias: 'Memory Limits', unit: 'bytes' }, + 'Value #G': { alias: 'Memory Limits %', unit: 'percentunit' }, + }) + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Current Network Usage') + + g.tablePanel( + networkColumns, + networkTableStyles + ), + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Receive Bandwidth') + + g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Transmit Bandwidth') + + g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Average Container Bandwidth by Namespace: Received') + + g.queryPanel('avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Average Container Bandwidth by Namespace: Transmitted') + + g.queryPanel('avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Rate of Received Packets') + + g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Rate of Transmitted Packets') + + g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Rate of Received Packets Dropped') + + g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Rate of Transmitted Packets Dropped') + + g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } }, + } +} \ No newline at end of file diff --git a/monitoring/vendor/kubernetes-mixin/dashboards/resources/multi-cluster.libsonnet b/monitoring/vendor/kubernetes-mixin/dashboards/resources/multi-cluster.libsonnet new file mode 100644 index 0000000..ebeb340 --- /dev/null +++ b/monitoring/vendor/kubernetes-mixin/dashboards/resources/multi-cluster.libsonnet @@ -0,0 +1,107 @@ +local g = import 'grafana-builder/grafana.libsonnet'; +local grafana = import 'grafonnet/grafana.libsonnet'; +local template = grafana.template; + +{ + grafanaDashboards+:: + if $._config.showMultiCluster then { + 'k8s-resources-multicluster.json': + local tableStyles = { + [$._config.clusterLabel]: { + alias: 'Cluster', + link: '%(prefix)s/d/%(uid)s/k8s-resources-cluster?var-datasource=$datasource&var-cluster=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-cluster.json') }, + }, + }; + + g.dashboard( + '%(dashboardNamePrefix)sCompute Resources / Multi-Cluster' % $._config.grafanaK8s, + uid=($._config.grafanaDashboardIDs['k8s-resources-multicluster.json']), + ).addRow( + (g.row('Headlines') + + { + height: '100px', + showTitle: false, + }) + .addPanel( + g.panel('CPU Utilisation') + + g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle"}[1m]))' % $._config) + ) + .addPanel( + g.panel('CPU Requests Commitment') + + g.statPanel('sum(kube_pod_container_resource_requests_cpu_cores) / sum(kube_node_status_allocatable_cpu_cores)' % $._config) + ) + .addPanel( + g.panel('CPU Limits Commitment') + + g.statPanel('sum(kube_pod_container_resource_limits_cpu_cores) / sum(kube_node_status_allocatable_cpu_cores)' % $._config) + ) + .addPanel( + g.panel('Memory Utilisation') + + g.statPanel('1 - sum(:node_memory_MemAvailable_bytes:sum) / sum(kube_node_status_allocatable_memory_bytes)' % $._config) + ) + .addPanel( + g.panel('Memory Requests Commitment') + + g.statPanel('sum(kube_pod_container_resource_requests_memory_bytes) / sum(kube_node_status_allocatable_memory_bytes)' % $._config) + ) + .addPanel( + g.panel('Memory Limits Commitment') + + g.statPanel('sum(kube_pod_container_resource_limits_memory_bytes) / sum(kube_node_status_allocatable_memory_bytes)' % $._config) + ) + ) + .addRow( + g.row('CPU') + .addPanel( + g.panel('CPU Usage') + + g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config) + + { fill: 0, linewidth: 2 }, + ) + ) + .addRow( + g.row('CPU Quota') + .addPanel( + g.panel('CPU Quota') + + g.tablePanel([ + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config, + 'sum(kube_pod_container_resource_requests_cpu_cores) by (%(clusterLabel)s)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests_cpu_cores) by (%(clusterLabel)s)' % $._config, + 'sum(kube_pod_container_resource_limits_cpu_cores) by (%(clusterLabel)s)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits_cpu_cores) by (%(clusterLabel)s)' % $._config, + ], tableStyles { + 'Value #A': { alias: 'CPU Usage' }, + 'Value #B': { alias: 'CPU Requests' }, + 'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' }, + 'Value #D': { alias: 'CPU Limits' }, + 'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' }, + }) + ) + ) + .addRow( + g.row('Memory') + .addPanel( + g.panel('Memory Usage (w/o cache)') + + // Not using container_memory_usage_bytes here because that includes page cache + g.queryPanel('sum(container_memory_rss{container!=""}) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config) + + { fill: 0, linewidth: 2, yaxes: g.yaxes('bytes') }, + ) + ) + .addRow( + g.row('Memory Requests') + .addPanel( + g.panel('Requests by Namespace') + + g.tablePanel([ + // Not using container_memory_usage_bytes here because that includes page cache + 'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s)' % $._config, + 'sum(kube_pod_container_resource_requests_memory_bytes) by (%(clusterLabel)s)' % $._config, + 'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests_memory_bytes) by (%(clusterLabel)s)' % $._config, + 'sum(kube_pod_container_resource_limits_memory_bytes) by (%(clusterLabel)s)' % $._config, + 'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits_memory_bytes) by (%(clusterLabel)s)' % $._config, + ], tableStyles { + 'Value #A': { alias: 'Memory Usage', unit: 'bytes' }, + 'Value #B': { alias: 'Memory Requests', unit: 'bytes' }, + 'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' }, + 'Value #D': { alias: 'Memory Limits', unit: 'bytes' }, + 'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' }, + }) + ) + ) + { tags: $._config.grafanaK8s.dashboardTags }, + } else {}, +} \ No newline at end of file diff --git a/monitoring/vendor/kubernetes-mixin/dashboards/resources/namespace.libsonnet b/monitoring/vendor/kubernetes-mixin/dashboards/resources/namespace.libsonnet new file mode 100644 index 0000000..a0ad253 --- /dev/null +++ b/monitoring/vendor/kubernetes-mixin/dashboards/resources/namespace.libsonnet @@ -0,0 +1,213 @@ +local g = import 'grafana-builder/grafana.libsonnet'; +local grafana = import 'grafonnet/grafana.libsonnet'; +local template = grafana.template; + +{ + grafanaDashboards+:: { + local intervalTemplate = + template.new( + name='interval', + datasource='$datasource', + query='4h', + current='5m', + hide=2, + refresh=2, + includeAll=false, + sort=1 + ) + { + auto: false, + auto_count: 30, + auto_min: '10s', + skipUrlSync: false, + type: 'interval', + options: [ + { + selected: true, + text: '4h', + value: '4h', + }, + ], + }, + + 'k8s-resources-namespace.json': + local tableStyles = { + pod: { + alias: 'Pod', + link: '%(prefix)s/d/%(uid)s/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-pod.json') }, + }, + }; + + local networkColumns = [ + 'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, + 'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, + 'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, + 'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, + 'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, + 'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, + ]; + + local networkTableStyles = { + pod: { + alias: 'Pod', + link: '%(prefix)s/d/%(uid)s/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-pod.json') }, + linkTooltip: 'Drill down to pods', + }, + 'Value #A': { + alias: 'Current Receive Bandwidth', + unit: 'Bps', + }, + 'Value #B': { + alias: 'Current Transmit Bandwidth', + unit: 'Bps', + }, + 'Value #C': { + alias: 'Rate of Received Packets', + unit: 'pps', + }, + 'Value #D': { + alias: 'Rate of Transmitted Packets', + unit: 'pps', + }, + 'Value #E': { + alias: 'Rate of Received Packets Dropped', + unit: 'pps', + }, + 'Value #F': { + alias: 'Rate of Transmitted Packets Dropped', + unit: 'pps', + }, + }; + + g.dashboard( + '%(dashboardNamePrefix)sCompute Resources / Namespace (Pods)' % $._config.grafanaK8s, + uid=($._config.grafanaDashboardIDs['k8s-resources-namespace.json']), + ).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2) + .addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace') + .addRow( + g.row('CPU Usage') + .addPanel( + g.panel('CPU Usage') + + g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, '{{pod}}') + + g.stack, + ) + ) + .addRow( + g.row('CPU Quota') + .addPanel( + g.panel('CPU Quota') + + g.tablePanel([ + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + ], tableStyles { + 'Value #A': { alias: 'CPU Usage' }, + 'Value #B': { alias: 'CPU Requests' }, + 'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' }, + 'Value #D': { alias: 'CPU Limits' }, + 'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' }, + }) + ) + ) + .addRow( + g.row('Memory Usage') + .addPanel( + g.panel('Memory Usage (w/o cache)') + + // Like above, without page cache + g.queryPanel('sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container!=""}) by (pod)' % $._config, '{{pod}}') + + g.stack + + { yaxes: g.yaxes('bytes') }, + ) + ) + .addRow( + g.row('Memory Quota') + .addPanel( + g.panel('Memory Quota') + + g.tablePanel([ + 'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config, + 'sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace="$namespace"}) by (pod)' % $._config, + 'sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace="$namespace"}) by (pod)' % $._config, + 'sum(container_memory_rss{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config, + 'sum(container_memory_cache{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config, + 'sum(container_memory_swap{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config, + ], tableStyles { + 'Value #A': { alias: 'Memory Usage', unit: 'bytes' }, + 'Value #B': { alias: 'Memory Requests', unit: 'bytes' }, + 'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' }, + 'Value #D': { alias: 'Memory Limits', unit: 'bytes' }, + 'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' }, + 'Value #F': { alias: 'Memory Usage (RSS)', unit: 'bytes' }, + 'Value #G': { alias: 'Memory Usage (Cache)', unit: 'bytes' }, + 'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' }, + }) + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Current Network Usage') + + g.tablePanel( + networkColumns, + networkTableStyles + ), + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Receive Bandwidth') + + g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, '{{pod}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Transmit Bandwidth') + + g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, '{{pod}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Rate of Received Packets') + + g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, '{{pod}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Rate of Transmitted Packets') + + g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, '{{pod}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Rate of Received Packets Dropped') + + g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, '{{pod}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Rate of Transmitted Packets Dropped') + + g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, '{{pod}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } }, + } +} \ No newline at end of file diff --git a/monitoring/vendor/kubernetes-mixin/dashboards/resources/node.libsonnet b/monitoring/vendor/kubernetes-mixin/dashboards/resources/node.libsonnet new file mode 100644 index 0000000..1fb0dd5 --- /dev/null +++ b/monitoring/vendor/kubernetes-mixin/dashboards/resources/node.libsonnet @@ -0,0 +1,107 @@ +local g = import 'grafana-builder/grafana.libsonnet'; +local grafana = import 'grafonnet/grafana.libsonnet'; +local template = grafana.template; + +{ + grafanaDashboards+:: { + local intervalTemplate = + template.new( + name='interval', + datasource='$datasource', + query='4h', + current='5m', + hide=2, + refresh=2, + includeAll=false, + sort=1 + ) + { + auto: false, + auto_count: 30, + auto_min: '10s', + skipUrlSync: false, + type: 'interval', + options: [ + { + selected: true, + text: '4h', + value: '4h', + }, + ], + }, + + 'k8s-resources-node.json': + local tableStyles = { + pod: { + alias: 'Pod', + }, + }; + + g.dashboard( + '%(dashboardNamePrefix)sCompute Resources / Node (Pods)' % $._config.grafanaK8s, + uid=($._config.grafanaDashboardIDs['k8s-resources-node.json']), + ).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2) + .addTemplate('node', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'node') + .addRow( + g.row('CPU Usage') + .addPanel( + g.panel('CPU Usage') + + g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config, '{{pod}}') + + g.stack, + ) + ) + .addRow( + g.row('CPU Quota') + .addPanel( + g.panel('CPU Quota') + + g.tablePanel([ + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config, + 'sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node="$node"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config, + 'sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node="$node"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config, + ], tableStyles { + 'Value #A': { alias: 'CPU Usage' }, + 'Value #B': { alias: 'CPU Requests' }, + 'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' }, + 'Value #D': { alias: 'CPU Limits' }, + 'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' }, + }) + ) + ) + .addRow( + g.row('Memory Usage') + .addPanel( + g.panel('Memory Usage (w/o cache)') + + // Like above, without page cache + g.queryPanel('sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node="$node", container!=""}) by (pod)' % $._config, '{{pod}}') + + g.stack + + { yaxes: g.yaxes('bytes') }, + ) + ) + .addRow( + g.row('Memory Quota') + .addPanel( + g.panel('Memory Quota') + + g.tablePanel([ + 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node="$node",container!=""}) by (pod)' % $._config, + 'sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node="$node",container!=""}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node="$node"}) by (pod)' % $._config, + 'sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node="$node",container!=""}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node="$node"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_memory_rss{%(clusterLabel)s="$cluster", node="$node",container!=""}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_memory_cache{%(clusterLabel)s="$cluster", node="$node",container!=""}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_memory_swap{%(clusterLabel)s="$cluster", node="$node",container!=""}) by (pod)' % $._config, + ], tableStyles { + 'Value #A': { alias: 'Memory Usage', unit: 'bytes' }, + 'Value #B': { alias: 'Memory Requests', unit: 'bytes' }, + 'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' }, + 'Value #D': { alias: 'Memory Limits', unit: 'bytes' }, + 'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' }, + 'Value #F': { alias: 'Memory Usage (RSS)', unit: 'bytes' }, + 'Value #G': { alias: 'Memory Usage (Cache)', unit: 'bytes' }, + 'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' }, + }) + ) + ) + { tags: $._config.grafanaK8s.dashboardTags }, + } +} \ No newline at end of file diff --git a/monitoring/vendor/kubernetes-mixin/dashboards/resources/pod.libsonnet b/monitoring/vendor/kubernetes-mixin/dashboards/resources/pod.libsonnet new file mode 100644 index 0000000..fb32b04 --- /dev/null +++ b/monitoring/vendor/kubernetes-mixin/dashboards/resources/pod.libsonnet @@ -0,0 +1,169 @@ +local g = import 'grafana-builder/grafana.libsonnet'; +local grafana = import 'grafonnet/grafana.libsonnet'; +local template = grafana.template; + +{ + grafanaDashboards+:: { + local intervalTemplate = + template.new( + name='interval', + datasource='$datasource', + query='4h', + current='5m', + hide=2, + refresh=2, + includeAll=false, + sort=1 + ) + { + auto: false, + auto_count: 30, + auto_min: '10s', + skipUrlSync: false, + type: 'interval', + options: [ + { + selected: true, + text: '4h', + value: '4h', + }, + ], + }, + + 'k8s-resources-pod.json': + local tableStyles = { + container: { + alias: 'Container', + }, + }; + + g.dashboard( + '%(dashboardNamePrefix)sCompute Resources / Pod' % $._config.grafanaK8s, + uid=($._config.grafanaDashboardIDs['k8s-resources-pod.json']), + ).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2) + .addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace') + .addTemplate('pod', 'kube_pod_info{%(clusterLabel)s="$cluster", namespace="$namespace"}' % $._config, 'pod') + .addRow( + g.row('CPU Usage') + .addPanel( + g.panel('CPU Usage') + + g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace="$namespace", pod="$pod", container!="POD", %(clusterLabel)s="$cluster"}) by (container)' % $._config, '{{container}}') + + g.stack, + ) + ) + .addRow( + g.row('CPU Quota') + .addPanel( + g.panel('CPU Quota') + + g.tablePanel([ + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD"}) by (container)' % $._config, + 'sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, + 'sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, + ], tableStyles { + 'Value #A': { alias: 'CPU Usage' }, + 'Value #B': { alias: 'CPU Requests' }, + 'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' }, + 'Value #D': { alias: 'CPU Limits' }, + 'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' }, + }) + ) + ) + .addRow( + g.row('Memory Usage') + .addPanel( + g.panel('Memory Usage') + + g.queryPanel([ + 'sum(container_memory_rss{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config, + 'sum(container_memory_cache{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config, + 'sum(container_memory_swap{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config, + ], [ + '{{container}} (RSS)', + '{{container}} (Cache)', + '{{container}} (Swap)', + ]) + + g.stack + + { yaxes: g.yaxes('bytes') }, + ) + ) + .addRow( + g.row('Memory Quota') + .addPanel( + g.panel('Memory Quota') + + g.tablePanel([ + 'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config, + 'sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, + 'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace="$namespace", pod="$pod"}) by (container)' % $._config, + 'sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config, + 'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace="$namespace", pod="$pod"}) by (container)' % $._config, + 'sum(container_memory_rss{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container != "", container != "POD"}) by (container)' % $._config, + 'sum(container_memory_cache{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container != "", container != "POD"}) by (container)' % $._config, + 'sum(container_memory_swap{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container != "", container != "POD"}) by (container)' % $._config, + ], tableStyles { + 'Value #A': { alias: 'Memory Usage', unit: 'bytes' }, + 'Value #B': { alias: 'Memory Requests', unit: 'bytes' }, + 'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' }, + 'Value #D': { alias: 'Memory Limits', unit: 'bytes' }, + 'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' }, + 'Value #F': { alias: 'Memory Usage (RSS)', unit: 'bytes' }, + 'Value #G': { alias: 'Memory Usage (Cache)', unit: 'bytes' }, + 'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' }, + }) + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Receive Bandwidth') + + g.queryPanel('sum(irate(container_network_receive_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$interval])) by (pod)', '{{pod}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Transmit Bandwidth') + + g.queryPanel('sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$interval])) by (pod)', '{{pod}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Rate of Received Packets') + + g.queryPanel('sum(irate(container_network_receive_packets_total{namespace=~"$namespace", pod=~"$pod"}[$interval])) by (pod)', '{{pod}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Rate of Transmitted Packets') + + g.queryPanel('sum(irate(container_network_transmit_packets_total{namespace=~"$namespace", pod=~"$pod"}[$interval])) by (pod)', '{{pod}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Rate of Received Packets Dropped') + + g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$interval])) by (pod)', '{{pod}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Rate of Transmitted Packets Dropped') + + g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$interval])) by (pod)', '{{pod}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } }, + } +} \ No newline at end of file diff --git a/monitoring/vendor/kubernetes-mixin/dashboards/resources/workload-namespace.libsonnet b/monitoring/vendor/kubernetes-mixin/dashboards/resources/workload-namespace.libsonnet new file mode 100644 index 0000000..c9cbb38 --- /dev/null +++ b/monitoring/vendor/kubernetes-mixin/dashboards/resources/workload-namespace.libsonnet @@ -0,0 +1,338 @@ +local g = import 'grafana-builder/grafana.libsonnet'; +local grafana = import 'grafonnet/grafana.libsonnet'; +local template = grafana.template; + +{ + grafanaDashboards+:: { + local intervalTemplate = + template.new( + name='interval', + datasource='$datasource', + query='4h', + current='5m', + hide=2, + refresh=2, + includeAll=false, + sort=1 + ) + { + auto: false, + auto_count: 30, + auto_min: '10s', + skipUrlSync: false, + type: 'interval', + options: [ + { + selected: true, + text: '4h', + value: '4h', + }, + ], + }, + + local typeTemplate = + template.new( + name='type', + datasource='$datasource', + query='label_values(mixin_pod_workload{namespace=~"$namespace", workload=~".+"}, workload_type)', + current='deployment', + hide='', + refresh=1, + includeAll=false, + sort=0 + ) + { + auto: false, + auto_count: 30, + auto_min: '10s', + definition: 'label_values(mixin_pod_workload{namespace=~"$namespace", workload=~".+"}, workload_type)', + skipUrlSync: false, + }, + + 'k8s-resources-workloads-namespace.json': + local tableStyles = { + workload: { + alias: 'Workload', + link: '%(prefix)s/d/%(uid)s/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-workload.json') }, + }, + workload_type: { + alias: 'Workload Type', + }, + }; + + local networkColumns = [ + ||| + (sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload)) + ||| % $._config, + ||| + (sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload)) + ||| % $._config, + ||| + (sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload)) + ||| % $._config, + ||| + (sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload)) + ||| % $._config, + ||| + (sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload)) + ||| % $._config, + ||| + (sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload)) + ||| % $._config, + ]; + + local networkTableStyles = { + workload: { + alias: 'Workload', + link: '%(prefix)s/d/%(uid)s/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$type' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-workload.json') }, + linkTooltip: 'Drill down to pods', + }, + workload_type: { + alias: 'Workload Type', + }, + 'Value #A': { + alias: 'Current Receive Bandwidth', + unit: 'Bps', + }, + 'Value #B': { + alias: 'Current Transmit Bandwidth', + unit: 'Bps', + }, + 'Value #C': { + alias: 'Rate of Received Packets', + unit: 'pps', + }, + 'Value #D': { + alias: 'Rate of Transmitted Packets', + unit: 'pps', + }, + 'Value #E': { + alias: 'Rate of Received Packets Dropped', + unit: 'pps', + }, + 'Value #F': { + alias: 'Rate of Transmitted Packets Dropped', + unit: 'pps', + }, + }; + + local cpuUsageQuery = ||| + sum( + node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"} + * on(namespace,pod) + group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type="$type"} + ) by (workload, workload_type) + ||| % $._config; + + local cpuRequestsQuery = ||| + sum( + kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"} + * on(namespace,pod) + group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type="$type"} + ) by (workload, workload_type) + ||| % $._config; + + local podCountQuery = 'count(mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type="$type"}) by (workload, workload_type)' % $._config; + local cpuLimitsQuery = std.strReplace(cpuRequestsQuery, 'requests', 'limits'); + + local memUsageQuery = ||| + sum( + container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container!=""} + * on(namespace,pod) + group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type="$type"} + ) by (workload, workload_type) + ||| % $._config; + local memRequestsQuery = std.strReplace(cpuRequestsQuery, 'cpu_cores', 'memory_bytes'); + local memLimitsQuery = std.strReplace(cpuLimitsQuery, 'cpu_cores', 'memory_bytes'); + + g.dashboard( + '%(dashboardNamePrefix)sCompute Resources / Namespace (Workloads)' % $._config.grafanaK8s, + uid=($._config.grafanaDashboardIDs['k8s-resources-workloads-namespace.json']), + ).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2) + .addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace') + .addRow( + g.row('CPU Usage') + .addPanel( + g.panel('CPU Usage') + + g.queryPanel(cpuUsageQuery, '{{workload}} - {{workload_type}}') + + g.stack, + ) + ) + .addRow( + g.row('CPU Quota') + .addPanel( + g.panel('CPU Quota') + + g.tablePanel([ + podCountQuery, + cpuUsageQuery, + cpuRequestsQuery, + cpuUsageQuery + '/' + cpuRequestsQuery, + cpuLimitsQuery, + cpuUsageQuery + '/' + cpuLimitsQuery, + ], tableStyles { + 'Value #A': { alias: 'Running Pods', decimals: 0 }, + 'Value #B': { alias: 'CPU Usage' }, + 'Value #C': { alias: 'CPU Requests' }, + 'Value #D': { alias: 'CPU Requests %', unit: 'percentunit' }, + 'Value #E': { alias: 'CPU Limits' }, + 'Value #F': { alias: 'CPU Limits %', unit: 'percentunit' }, + }) + ) + ) + .addRow( + g.row('Memory Usage') + .addPanel( + g.panel('Memory Usage') + + g.queryPanel(memUsageQuery, '{{workload}} - {{workload_type}}') + + g.stack + + { yaxes: g.yaxes('bytes') }, + ) + ) + .addRow( + g.row('Memory Quota') + .addPanel( + g.panel('Memory Quota') + + g.tablePanel([ + podCountQuery, + memUsageQuery, + memRequestsQuery, + memUsageQuery + '/' + memRequestsQuery, + memLimitsQuery, + memUsageQuery + '/' + memLimitsQuery, + ], tableStyles { + 'Value #A': { alias: 'Running Pods', decimals: 0 }, + 'Value #B': { alias: 'Memory Usage', unit: 'bytes' }, + 'Value #C': { alias: 'Memory Requests', unit: 'bytes' }, + 'Value #D': { alias: 'Memory Requests %', unit: 'percentunit' }, + 'Value #E': { alias: 'Memory Limits', unit: 'bytes' }, + 'Value #F': { alias: 'Memory Limits %', unit: 'percentunit' }, + }) + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Current Network Usage') + + g.tablePanel( + networkColumns, + networkTableStyles + ), + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Receive Bandwidth') + + g.queryPanel(||| + (sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) + ||| % $._config, '{{workload}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Transmit Bandwidth') + + g.queryPanel(||| + (sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) + ||| % $._config, '{{workload}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Average Container Bandwidth by Workload: Received') + + g.queryPanel(||| + (avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) + ||| % $._config, '{{workload}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Average Container Bandwidth by Workload: Transmitted') + + g.queryPanel(||| + (avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) + ||| % $._config, '{{workload}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Rate of Received Packets') + + g.queryPanel(||| + (sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) + ||| % $._config, '{{workload}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Rate of Transmitted Packets') + + g.queryPanel(||| + (sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) + ||| % $._config, '{{workload}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Rate of Received Packets Dropped') + + g.queryPanel(||| + (sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) + ||| % $._config, '{{workload}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Rate of Transmitted Packets Dropped') + + g.queryPanel(||| + (sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) + ||| % $._config, '{{workload}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate, typeTemplate] } }, + + } +} \ No newline at end of file diff --git a/monitoring/vendor/kubernetes-mixin/dashboards/resources/workload.libsonnet b/monitoring/vendor/kubernetes-mixin/dashboards/resources/workload.libsonnet new file mode 100644 index 0000000..aed215e --- /dev/null +++ b/monitoring/vendor/kubernetes-mixin/dashboards/resources/workload.libsonnet @@ -0,0 +1,310 @@ +local g = import 'grafana-builder/grafana.libsonnet'; +local grafana = import 'grafonnet/grafana.libsonnet'; +local template = grafana.template; + +{ + grafanaDashboards+:: { + local intervalTemplate = + template.new( + name='interval', + datasource='$datasource', + query='4h', + current='5m', + hide=2, + refresh=2, + includeAll=false, + sort=1 + ) + { + auto: false, + auto_count: 30, + auto_min: '10s', + skipUrlSync: false, + type: 'interval', + options: [ + { + selected: true, + text: '4h', + value: '4h', + }, + ], + }, + + 'k8s-resources-workload.json': + local tableStyles = { + pod: { + alias: 'Pod', + link: '%(prefix)s/d/%(uid)s/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-pod.json') }, + }, + }; + + local networkColumns = [ + ||| + (sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) + ||| % $._config, + ||| + (sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) + ||| % $._config, + ||| + (sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) + ||| % $._config, + ||| + (sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) + ||| % $._config, + ||| + (sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) + ||| % $._config, + ||| + (sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) + ||| % $._config, + ]; + + local networkTableStyles = { + pod: { + alias: 'Pod', + link: '%(prefix)s/d/%(uid)s/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-pod.json') }, + }, + 'Value #A': { + alias: 'Current Receive Bandwidth', + unit: 'Bps', + }, + 'Value #B': { + alias: 'Current Transmit Bandwidth', + unit: 'Bps', + }, + 'Value #C': { + alias: 'Rate of Received Packets', + unit: 'pps', + }, + 'Value #D': { + alias: 'Rate of Transmitted Packets', + unit: 'pps', + }, + 'Value #E': { + alias: 'Rate of Received Packets Dropped', + unit: 'pps', + }, + 'Value #F': { + alias: 'Rate of Transmitted Packets Dropped', + unit: 'pps', + }, + }; + + + local cpuUsageQuery = ||| + sum( + node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"} + * on(namespace,pod) + group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type="$type"} + ) by (pod) + ||| % $._config; + + local cpuRequestsQuery = ||| + sum( + kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"} + * on(namespace,pod) + group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type="$type"} + ) by (pod) + ||| % $._config; + + local cpuLimitsQuery = std.strReplace(cpuRequestsQuery, 'requests', 'limits'); + + local memUsageQuery = ||| + sum( + container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container!=""} + * on(namespace,pod) + group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type="$type"} + ) by (pod) + ||| % $._config; + local memRequestsQuery = std.strReplace(cpuRequestsQuery, 'cpu_cores', 'memory_bytes'); + local memLimitsQuery = std.strReplace(cpuLimitsQuery, 'cpu_cores', 'memory_bytes'); + + g.dashboard( + '%(dashboardNamePrefix)sCompute Resources / Workload' % $._config.grafanaK8s, + uid=($._config.grafanaDashboardIDs['k8s-resources-workload.json']), + ).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2) + .addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace') + .addTemplate('workload', 'mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace"}' % $._config, 'workload') + .addTemplate('type', 'mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload"}' % $._config, 'workload_type') + .addRow( + g.row('CPU Usage') + .addPanel( + g.panel('CPU Usage') + + g.queryPanel(cpuUsageQuery, '{{pod}}') + + g.stack, + ) + ) + .addRow( + g.row('CPU Quota') + .addPanel( + g.panel('CPU Quota') + + g.tablePanel([ + cpuUsageQuery, + cpuRequestsQuery, + cpuUsageQuery + '/' + cpuRequestsQuery, + cpuLimitsQuery, + cpuUsageQuery + '/' + cpuLimitsQuery, + ], tableStyles { + 'Value #A': { alias: 'CPU Usage' }, + 'Value #B': { alias: 'CPU Requests' }, + 'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' }, + 'Value #D': { alias: 'CPU Limits' }, + 'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' }, + }) + ) + ) + .addRow( + g.row('Memory Usage') + .addPanel( + g.panel('Memory Usage') + + g.queryPanel(memUsageQuery, '{{pod}}') + + g.stack + + { yaxes: g.yaxes('bytes') }, + ) + ) + .addRow( + g.row('Memory Quota') + .addPanel( + g.panel('Memory Quota') + + g.tablePanel([ + memUsageQuery, + memRequestsQuery, + memUsageQuery + '/' + memRequestsQuery, + memLimitsQuery, + memUsageQuery + '/' + memLimitsQuery, + ], tableStyles { + 'Value #A': { alias: 'Memory Usage', unit: 'bytes' }, + 'Value #B': { alias: 'Memory Requests', unit: 'bytes' }, + 'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' }, + 'Value #D': { alias: 'Memory Limits', unit: 'bytes' }, + 'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' }, + }) + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Current Network Usage') + + g.tablePanel( + networkColumns, + networkTableStyles + ), + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Receive Bandwidth') + + g.queryPanel(||| + (sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) + ||| % $._config, '{{pod}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Transmit Bandwidth') + + g.queryPanel(||| + (sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) + ||| % $._config, '{{pod}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Average Container Bandwidth by Pod: Received') + + g.queryPanel(||| + (avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) + ||| % $._config, '{{pod}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Average Container Bandwidth by Pod: Transmitted') + + g.queryPanel(||| + (avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) + ||| % $._config, '{{pod}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Rate of Received Packets') + + g.queryPanel(||| + (sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) + ||| % $._config, '{{pod}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Rate of Transmitted Packets') + + g.queryPanel(||| + (sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) + ||| % $._config, '{{pod}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Rate of Received Packets Dropped') + + g.queryPanel(||| + (sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) + ||| % $._config, '{{pod}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + .addRow( + g.row('Network') + .addPanel( + g.panel('Rate of Transmitted Packets Dropped') + + g.queryPanel(||| + (sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval]) + * on (namespace,pod) + group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) + ||| % $._config, '{{pod}}') + + g.stack + + { yaxes: g.yaxes('Bps') }, + ) + ) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } }, + } +} \ No newline at end of file diff --git a/monitoring/vendor/kubernetes-mixin/dashboards/statefulset.libsonnet b/monitoring/vendor/kubernetes-mixin/dashboards/statefulset.libsonnet deleted file mode 100644 index 7f91cf8..0000000 --- a/monitoring/vendor/kubernetes-mixin/dashboards/statefulset.libsonnet +++ /dev/null @@ -1,160 +0,0 @@ -local grafana = import 'grafonnet/grafana.libsonnet'; -local dashboard = grafana.dashboard; -local graphPanel = grafana.graphPanel; -local prometheus = grafana.prometheus; -local promgrafonnet = import '../lib/promgrafonnet/promgrafonnet.libsonnet'; -local row = grafana.row; -local singlestat = grafana.singlestat; -local template = grafana.template; -local numbersinglestat = promgrafonnet.numbersinglestat; - -{ - grafanaDashboards+:: { - 'statefulset.json': - local cpuStat = - numbersinglestat.new( - 'CPU', - 'sum(rate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}[3m]))' % $._config, - ) - .withSpanSize(4) - .withPostfix('cores') - .withSparkline(); - - local memoryStat = - numbersinglestat.new( - 'Memory', - 'sum(container_memory_usage_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}) / 1024^3' % $._config, - ) - .withSpanSize(4) - .withPostfix('GB') - .withSparkline(); - - local networkStat = - numbersinglestat.new( - 'Network', - 'sum(rate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}[3m])) + sum(rate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace="$namespace",pod=~"$statefulset.*"}[3m]))' % $._config, - ) - .withSpanSize(4) - .withPostfix('Bps') - .withSparkline(); - - local overviewRow = - row.new() - .addPanel(cpuStat) - .addPanel(memoryStat) - .addPanel(networkStat); - - local desiredReplicasStat = numbersinglestat.new( - 'Desired Replicas', - 'max(kube_statefulset_replicas{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", statefulset="$statefulset"}) without (instance, pod)' % $._config, - ); - - local availableReplicasStat = numbersinglestat.new( - 'Replicas of current version', - 'min(kube_statefulset_status_replicas_current{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", statefulset="$statefulset"}) without (instance, pod)' % $._config, - ); - - local observedGenerationStat = numbersinglestat.new( - 'Observed Generation', - 'max(kube_statefulset_status_observed_generation{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", statefulset="$statefulset"}) without (instance, pod)' % $._config, - ); - - local metadataGenerationStat = numbersinglestat.new( - 'Metadata Generation', - 'max(kube_statefulset_metadata_generation{%(kubeStateMetricsSelector)s, statefulset="$statefulset", %(clusterLabel)s="$cluster", namespace="$namespace"}) without (instance, pod)' % $._config, - ); - - local statsRow = - row.new(height='100px') - .addPanel(desiredReplicasStat) - .addPanel(availableReplicasStat) - .addPanel(observedGenerationStat) - .addPanel(metadataGenerationStat); - - local replicasGraph = - graphPanel.new( - 'Replicas', - datasource='$datasource', - ) - .addTarget(prometheus.target( - 'max(kube_statefulset_replicas{%(kubeStateMetricsSelector)s, statefulset="$statefulset", %(clusterLabel)s="$cluster", namespace="$namespace"}) without (instance, pod)' % $._config, - legendFormat='replicas specified', - )) - .addTarget(prometheus.target( - 'max(kube_statefulset_status_replicas{%(kubeStateMetricsSelector)s, statefulset="$statefulset", %(clusterLabel)s="$cluster", namespace="$namespace"}) without (instance, pod)' % $._config, - legendFormat='replicas created', - )) - .addTarget(prometheus.target( - 'min(kube_statefulset_status_replicas_ready{%(kubeStateMetricsSelector)s, statefulset="$statefulset", %(clusterLabel)s="$cluster", namespace="$namespace"}) without (instance, pod)' % $._config, - legendFormat='ready', - )) - .addTarget(prometheus.target( - 'min(kube_statefulset_status_replicas_current{%(kubeStateMetricsSelector)s, statefulset="$statefulset", %(clusterLabel)s="$cluster", namespace="$namespace"}) without (instance, pod)' % $._config, - legendFormat='replicas of current version', - )) - .addTarget(prometheus.target( - 'min(kube_statefulset_status_replicas_updated{%(kubeStateMetricsSelector)s, statefulset="$statefulset", %(clusterLabel)s="$cluster", namespace="$namespace"}) without (instance, pod)' % $._config, - legendFormat='updated', - )); - - local replicasRow = - row.new() - .addPanel(replicasGraph); - - dashboard.new( - '%(dashboardNamePrefix)sStatefulSets' % $._config.grafanaK8s, - time_from='now-1h', - uid=($._config.grafanaDashboardIDs['statefulset.json']), - tags=($._config.grafanaK8s.dashboardTags), - ).addTemplate( - { - current: { - text: 'default', - value: 'default', - }, - hide: 0, - label: null, - name: 'datasource', - options: [], - query: 'prometheus', - refresh: 1, - regex: '', - type: 'datasource', - }, - ) - .addTemplate( - template.new( - 'cluster', - '$datasource', - 'label_values(kube_statefulset_metadata_generation, %s)' % $._config.clusterLabel, - label='cluster', - refresh='time', - hide=if $._config.showMultiCluster then '' else 'variable', - sort=1, - ) - ) - .addTemplate( - template.new( - 'namespace', - '$datasource', - 'label_values(kube_statefulset_metadata_generation{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster"}, namespace)' % $._config, - label='Namespace', - refresh='time', - sort=1, - ) - ) - .addTemplate( - template.new( - 'statefulset', - '$datasource', - 'label_values(kube_statefulset_metadata_generation{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}, statefulset)' % $._config, - label='Name', - refresh='time', - sort=1, - ) - ) - .addRow(overviewRow) - .addRow(statsRow) - .addRow(replicasRow), - }, -}