update kubernetes-mixin
This commit is contained in:
parent
3d9b8c252e
commit
7c5eaaf4b3
|
@ -4,4 +4,6 @@ build:
|
||||||
|
|
||||||
update:
|
update:
|
||||||
docker run --rm -v $(shell pwd):$(shell pwd) --workdir $(shell pwd) quay.io/coreos/jsonnet-ci jb update
|
docker run --rm -v $(shell pwd):$(shell pwd) --workdir $(shell pwd) quay.io/coreos/jsonnet-ci jb update
|
||||||
|
sudo chown -R tobru. vendor/
|
||||||
|
make build
|
||||||
.PHONY: update
|
.PHONY: update
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
"subdir": "Documentation/etcd-mixin"
|
"subdir": "Documentation/etcd-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "52fba431b686f6a5c30d60a0bbaf9fafc14bae35",
|
"version": "cb633418a2a67a41cd2f30d556f19e995ed8f274",
|
||||||
"sum": "Ko3qhNfC2vN/houLh6C0Ryacjv70gl0DVPGU/PQ4OD0="
|
"sum": "Ko3qhNfC2vN/houLh6C0Ryacjv70gl0DVPGU/PQ4OD0="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -30,7 +30,7 @@
|
||||||
"subdir": "grafana-builder"
|
"subdir": "grafana-builder"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "7ac7da1a0fe165b68cdb718b2521b560d51bd1f4",
|
"version": "66eb3af2bd87c4ee18b97d5b2d366b234eef89cc",
|
||||||
"sum": "slxrtftVDiTlQK22ertdfrg4Epnq97gdrLI63ftUfaE="
|
"sum": "slxrtftVDiTlQK22ertdfrg4Epnq97gdrLI63ftUfaE="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -74,7 +74,7 @@
|
||||||
"subdir": "jsonnet/kube-state-metrics"
|
"subdir": "jsonnet/kube-state-metrics"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "22d195f20a20b51cf14b5ff01bb4a200c65196da",
|
"version": "89ede10b19d7ef0145777717351cabe14b113c01",
|
||||||
"sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA="
|
"sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -85,7 +85,7 @@
|
||||||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
"subdir": "jsonnet/kube-state-metrics-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "22d195f20a20b51cf14b5ff01bb4a200c65196da",
|
"version": "89ede10b19d7ef0145777717351cabe14b113c01",
|
||||||
"sum": "E1GGavnf9PCWBm4WVrxWnc0FIj72UcbcweqGioWrOdU="
|
"sum": "E1GGavnf9PCWBm4WVrxWnc0FIj72UcbcweqGioWrOdU="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -96,8 +96,8 @@
|
||||||
"subdir": ""
|
"subdir": ""
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "3cf851b2c8ff8bf98c12eac7f37d97f086cd0fc9",
|
"version": "02b62082e3feb271b8fd476603dceaa1fd2054c0",
|
||||||
"sum": "CydKHxWA9LG9w1+sjlqREHXPQTdbiTwy40rnyXfHfGE="
|
"sum": "h+ZL4TFVFbSdlsY25mi5x1nRts3PY3JmKz3QXUgnXJk="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "node-mixin",
|
"name": "node-mixin",
|
||||||
|
@ -107,7 +107,7 @@
|
||||||
"subdir": "docs/node-mixin"
|
"subdir": "docs/node-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "ef7c05816adcb0e8923defe34e97f6afcce0a939",
|
"version": "a7c31ff7ed0990545ed4cc62690fc53563ee8860",
|
||||||
"sum": "7vEamDTP9AApeiF4Zu9ZyXzDIs3rYHzwf9k7g8X+wsg="
|
"sum": "7vEamDTP9AApeiF4Zu9ZyXzDIs3rYHzwf9k7g8X+wsg="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -118,7 +118,7 @@
|
||||||
"subdir": "documentation/prometheus-mixin"
|
"subdir": "documentation/prometheus-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "65a19421a42c69e16241eec24c66b98e4c8fa5da",
|
"version": "babadf13e852654cfc87c06fc8ff0b843586a00e",
|
||||||
"sum": "5EUgr6Spr1zNR8Y2/NevjvEkGV9WMvKo6nEScNER1Lc="
|
"sum": "5EUgr6Spr1zNR8Y2/NevjvEkGV9WMvKo6nEScNER1Lc="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -151,7 +151,7 @@
|
||||||
"subdir": "lib/promgrafonnet"
|
"subdir": "lib/promgrafonnet"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "3cf851b2c8ff8bf98c12eac7f37d97f086cd0fc9",
|
"version": "02b62082e3feb271b8fd476603dceaa1fd2054c0",
|
||||||
"sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc="
|
"sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -209,23 +209,33 @@ spec:
|
||||||
- expr: |
|
- expr: |
|
||||||
sum by (cluster, namespace, pod, container) (
|
sum by (cluster, namespace, pod, container) (
|
||||||
rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])
|
rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])
|
||||||
) * on (cluster, namespace, pod) group_left(node) max by(cluster, namespace, pod, node) (kube_pod_info)
|
) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (
|
||||||
|
1, max by(cluster, namespace, pod, node) (kube_pod_info)
|
||||||
|
)
|
||||||
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
|
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
|
||||||
- expr: |
|
- expr: |
|
||||||
container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
||||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||||
|
max by(namespace, pod, node) (kube_pod_info)
|
||||||
|
)
|
||||||
record: node_namespace_pod_container:container_memory_working_set_bytes
|
record: node_namespace_pod_container:container_memory_working_set_bytes
|
||||||
- expr: |
|
- expr: |
|
||||||
container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
||||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||||
|
max by(namespace, pod, node) (kube_pod_info)
|
||||||
|
)
|
||||||
record: node_namespace_pod_container:container_memory_rss
|
record: node_namespace_pod_container:container_memory_rss
|
||||||
- expr: |
|
- expr: |
|
||||||
container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
||||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||||
|
max by(namespace, pod, node) (kube_pod_info)
|
||||||
|
)
|
||||||
record: node_namespace_pod_container:container_memory_cache
|
record: node_namespace_pod_container:container_memory_cache
|
||||||
- expr: |
|
- expr: |
|
||||||
container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
||||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||||
|
max by(namespace, pod, node) (kube_pod_info)
|
||||||
|
)
|
||||||
record: node_namespace_pod_container:container_memory_swap
|
record: node_namespace_pod_container:container_memory_swap
|
||||||
- expr: |
|
- expr: |
|
||||||
sum(container_memory_usage_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}) by (namespace)
|
sum(container_memory_usage_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}) by (namespace)
|
||||||
|
@ -253,35 +263,39 @@ spec:
|
||||||
)
|
)
|
||||||
record: namespace:kube_pod_container_resource_requests_cpu_cores:sum
|
record: namespace:kube_pod_container_resource_requests_cpu_cores:sum
|
||||||
- expr: |
|
- expr: |
|
||||||
sum(
|
max by (cluster, namespace, workload, pod) (
|
||||||
label_replace(
|
label_replace(
|
||||||
label_replace(
|
label_replace(
|
||||||
kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"},
|
kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"},
|
||||||
"replicaset", "$1", "owner_name", "(.*)"
|
"replicaset", "$1", "owner_name", "(.*)"
|
||||||
) * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{job="kube-state-metrics"},
|
) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) (
|
||||||
|
1, max by (replicaset, namespace, owner_name) (
|
||||||
|
kube_replicaset_owner{job="kube-state-metrics"}
|
||||||
|
)
|
||||||
|
),
|
||||||
"workload", "$1", "owner_name", "(.*)"
|
"workload", "$1", "owner_name", "(.*)"
|
||||||
)
|
)
|
||||||
) by (cluster, namespace, workload, pod)
|
)
|
||||||
labels:
|
labels:
|
||||||
workload_type: deployment
|
workload_type: deployment
|
||||||
record: mixin_pod_workload
|
record: mixin_pod_workload
|
||||||
- expr: |
|
- expr: |
|
||||||
sum(
|
max by (cluster, namespace, workload, pod) (
|
||||||
label_replace(
|
label_replace(
|
||||||
kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"},
|
kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"},
|
||||||
"workload", "$1", "owner_name", "(.*)"
|
"workload", "$1", "owner_name", "(.*)"
|
||||||
)
|
)
|
||||||
) by (cluster, namespace, workload, pod)
|
)
|
||||||
labels:
|
labels:
|
||||||
workload_type: daemonset
|
workload_type: daemonset
|
||||||
record: mixin_pod_workload
|
record: mixin_pod_workload
|
||||||
- expr: |
|
- expr: |
|
||||||
sum(
|
max by (cluster, namespace, workload, pod) (
|
||||||
label_replace(
|
label_replace(
|
||||||
kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"},
|
kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"},
|
||||||
"workload", "$1", "owner_name", "(.*)"
|
"workload", "$1", "owner_name", "(.*)"
|
||||||
)
|
)
|
||||||
) by (cluster, namespace, workload, pod)
|
)
|
||||||
labels:
|
labels:
|
||||||
workload_type: statefulset
|
workload_type: statefulset
|
||||||
record: mixin_pod_workload
|
record: mixin_pod_workload
|
||||||
|
@ -338,7 +352,10 @@ spec:
|
||||||
sum(min(kube_pod_info) by (cluster, node))
|
sum(min(kube_pod_info) by (cluster, node))
|
||||||
record: ':kube_pod_info_node_count:'
|
record: ':kube_pod_info_node_count:'
|
||||||
- expr: |
|
- expr: |
|
||||||
max(label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")) by (node, namespace, pod)
|
topk by(namespace, pod) (1,
|
||||||
|
max by (node, namespace, pod) (
|
||||||
|
label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")
|
||||||
|
))
|
||||||
record: 'node_namespace_pod:kube_pod_info:'
|
record: 'node_namespace_pod:kube_pod_info:'
|
||||||
- expr: |
|
- expr: |
|
||||||
count by (cluster, node) (sum by (node, cpu) (
|
count by (cluster, node) (sum by (node, cpu) (
|
||||||
|
@ -1164,6 +1181,16 @@ spec:
|
||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
|
- alert: KubeletPodStartUpLatencyHigh
|
||||||
|
annotations:
|
||||||
|
message: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds
|
||||||
|
on node {{ $labels.node }}.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh
|
||||||
|
expr: |
|
||||||
|
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name > 5
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
- alert: KubeletDown
|
- alert: KubeletDown
|
||||||
annotations:
|
annotations:
|
||||||
message: Kubelet has disappeared from Prometheus target discovery.
|
message: Kubelet has disappeared from Prometheus target discovery.
|
||||||
|
|
|
@ -74,6 +74,19 @@
|
||||||
message: 'The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.',
|
message: 'The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
alert: 'KubeletPodStartUpLatencyHigh',
|
||||||
|
expr: |||
|
||||||
|
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{%(kubeletSelector)s}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name > 5
|
||||||
|
||| % $._config,
|
||||||
|
'for': '15m',
|
||||||
|
labels: {
|
||||||
|
severity: 'warning',
|
||||||
|
},
|
||||||
|
annotations: {
|
||||||
|
message: 'Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.',
|
||||||
|
},
|
||||||
|
},
|
||||||
(import '../lib/absent_alert.libsonnet') {
|
(import '../lib/absent_alert.libsonnet') {
|
||||||
componentName:: 'Kubelet',
|
componentName:: 'Kubelet',
|
||||||
selector:: $._config.kubeletSelector,
|
selector:: $._config.kubeletSelector,
|
||||||
|
|
|
@ -64,6 +64,9 @@ local slo = import 'slo-libsonnet/slo.libsonnet';
|
||||||
// For links between grafana dashboards, you need to tell us if your grafana
|
// For links between grafana dashboards, you need to tell us if your grafana
|
||||||
// servers under some non-root path.
|
// servers under some non-root path.
|
||||||
linkPrefix: '.',
|
linkPrefix: '.',
|
||||||
|
|
||||||
|
// The default refresh time for all dashboards, default to 10s
|
||||||
|
refresh: '10s',
|
||||||
},
|
},
|
||||||
|
|
||||||
// Opt-in to multiCluster dashboards by overriding this and the clusterLabel.
|
// Opt-in to multiCluster dashboards by overriding this and the clusterLabel.
|
||||||
|
|
|
@ -208,6 +208,6 @@ local singlestat = grafana.singlestat;
|
||||||
.addPanel(memory)
|
.addPanel(memory)
|
||||||
.addPanel(cpu)
|
.addPanel(cpu)
|
||||||
.addPanel(goroutines)
|
.addPanel(goroutines)
|
||||||
),
|
) + { refresh: $._config.grafanaK8s.refresh },
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -180,6 +180,6 @@ local singlestat = grafana.singlestat;
|
||||||
.addPanel(memory)
|
.addPanel(memory)
|
||||||
.addPanel(cpu)
|
.addPanel(cpu)
|
||||||
.addPanel(goroutines)
|
.addPanel(goroutines)
|
||||||
),
|
) + { refresh: $._config.grafanaK8s.refresh },
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -413,6 +413,6 @@ local singlestat = grafana.singlestat;
|
||||||
.addPanel(memory)
|
.addPanel(memory)
|
||||||
.addPanel(cpu)
|
.addPanel(cpu)
|
||||||
.addPanel(goroutines)
|
.addPanel(goroutines)
|
||||||
),
|
) + { refresh: $._config.grafanaK8s.refresh },
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -343,7 +343,7 @@ local gauge = promgrafonnet.gauge;
|
||||||
tags=($._config.grafanaK8s.dashboardTags),
|
tags=($._config.grafanaK8s.dashboardTags),
|
||||||
editable=true,
|
editable=true,
|
||||||
schemaVersion=18,
|
schemaVersion=18,
|
||||||
refresh='30s',
|
refresh=($._config.grafanaK8s.refresh),
|
||||||
time_from='now-1h',
|
time_from='now-1h',
|
||||||
time_to='now',
|
time_to='now',
|
||||||
)
|
)
|
||||||
|
|
|
@ -345,7 +345,7 @@ local gauge = promgrafonnet.gauge;
|
||||||
tags=($._config.grafanaK8s.dashboardTags),
|
tags=($._config.grafanaK8s.dashboardTags),
|
||||||
editable=true,
|
editable=true,
|
||||||
schemaVersion=18,
|
schemaVersion=18,
|
||||||
refresh='30s',
|
refresh=($._config.grafanaK8s.refresh),
|
||||||
time_from='now-1h',
|
time_from='now-1h',
|
||||||
time_to='now',
|
time_to='now',
|
||||||
)
|
)
|
||||||
|
|
|
@ -373,7 +373,7 @@ local gauge = promgrafonnet.gauge;
|
||||||
tags=($._config.grafanaK8s.dashboardTags),
|
tags=($._config.grafanaK8s.dashboardTags),
|
||||||
editable=true,
|
editable=true,
|
||||||
schemaVersion=18,
|
schemaVersion=18,
|
||||||
refresh='30s',
|
refresh=($._config.grafanaK8s.refresh),
|
||||||
time_from='now-1h',
|
time_from='now-1h',
|
||||||
time_to='now',
|
time_to='now',
|
||||||
)
|
)
|
||||||
|
|
|
@ -242,7 +242,7 @@ local gauge = promgrafonnet.gauge;
|
||||||
tags=($._config.grafanaK8s.dashboardTags),
|
tags=($._config.grafanaK8s.dashboardTags),
|
||||||
editable=true,
|
editable=true,
|
||||||
schemaVersion=18,
|
schemaVersion=18,
|
||||||
refresh='30s',
|
refresh=($._config.grafanaK8s.refresh),
|
||||||
time_from='now-1h',
|
time_from='now-1h',
|
||||||
time_to='now',
|
time_to='now',
|
||||||
)
|
)
|
||||||
|
|
|
@ -257,7 +257,7 @@ local gauge = promgrafonnet.gauge;
|
||||||
tags=($._config.grafanaK8s.dashboardTags),
|
tags=($._config.grafanaK8s.dashboardTags),
|
||||||
editable=true,
|
editable=true,
|
||||||
schemaVersion=18,
|
schemaVersion=18,
|
||||||
refresh='30s',
|
refresh=($._config.grafanaK8s.refresh),
|
||||||
time_from='now-1h',
|
time_from='now-1h',
|
||||||
time_to='now',
|
time_to='now',
|
||||||
)
|
)
|
||||||
|
|
|
@ -166,6 +166,6 @@ local gauge = promgrafonnet.gauge;
|
||||||
row.new()
|
row.new()
|
||||||
.addPanel(inodesGraph)
|
.addPanel(inodesGraph)
|
||||||
.addPanel(inodeGauge)
|
.addPanel(inodeGauge)
|
||||||
),
|
) + { refresh: $._config.grafanaK8s.refresh },
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -186,6 +186,6 @@ local singlestat = grafana.singlestat;
|
||||||
.addPanel(memory)
|
.addPanel(memory)
|
||||||
.addPanel(cpu)
|
.addPanel(cpu)
|
||||||
.addPanel(goroutines)
|
.addPanel(goroutines)
|
||||||
),
|
) + { refresh: $._config.grafanaK8s.refresh },
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
(import 'resources/node.libsonnet') +
|
(import 'resources/node.libsonnet') +
|
||||||
(import 'resources/pod.libsonnet') +
|
(import 'resources/pod.libsonnet') +
|
||||||
(import 'resources/workload-namespace.libsonnet') +
|
(import 'resources/workload-namespace.libsonnet') +
|
||||||
(import 'resources/workload.libsonnet')
|
(import 'resources/workload.libsonnet')
|
||||||
|
|
|
@ -3,272 +3,289 @@ local grafana = import 'grafonnet/grafana.libsonnet';
|
||||||
local template = grafana.template;
|
local template = grafana.template;
|
||||||
|
|
||||||
{
|
{
|
||||||
grafanaDashboards+:: {
|
grafanaDashboards+:: {
|
||||||
local intervalTemplate =
|
local intervalTemplate =
|
||||||
template.new(
|
template.new(
|
||||||
name='interval',
|
name='interval',
|
||||||
datasource='$datasource',
|
datasource='$datasource',
|
||||||
query='4h',
|
query='$__interval',
|
||||||
current='5m',
|
current='5m',
|
||||||
hide=2,
|
hide=2,
|
||||||
refresh=2,
|
refresh=2,
|
||||||
includeAll=false,
|
includeAll=false,
|
||||||
sort=1
|
sort=1
|
||||||
) + {
|
) + {
|
||||||
auto: false,
|
auto: false,
|
||||||
auto_count: 30,
|
auto_count: 30,
|
||||||
auto_min: '10s',
|
auto_min: '10s',
|
||||||
skipUrlSync: false,
|
skipUrlSync: false,
|
||||||
type: 'interval',
|
type: 'interval',
|
||||||
options: [
|
options: [
|
||||||
{
|
{
|
||||||
selected: true,
|
selected: true,
|
||||||
text: '4h',
|
text: '$__interval',
|
||||||
value: '4h',
|
value: '$__interval',
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
},
|
||||||
|
|
||||||
|
local clusterTemplate =
|
||||||
|
template.new(
|
||||||
|
name='cluster',
|
||||||
|
datasource='$datasource',
|
||||||
|
query='label_values(node_cpu_seconds_total, %s)' % $._config.clusterLabel,
|
||||||
|
current='',
|
||||||
|
hide=if $._config.showMultiCluster then '' else '2',
|
||||||
|
refresh=2,
|
||||||
|
includeAll=false,
|
||||||
|
sort=1
|
||||||
|
),
|
||||||
|
|
||||||
|
'k8s-resources-cluster.json':
|
||||||
|
local tableStyles = {
|
||||||
|
namespace: {
|
||||||
|
alias: 'Namespace',
|
||||||
|
link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') },
|
||||||
|
linkTooltip: 'Drill down to pods',
|
||||||
},
|
},
|
||||||
|
'Value #A': {
|
||||||
|
alias: 'Pods',
|
||||||
|
linkTooltip: 'Drill down to pods',
|
||||||
|
link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') },
|
||||||
|
decimals: 0,
|
||||||
|
},
|
||||||
|
'Value #B': {
|
||||||
|
alias: 'Workloads',
|
||||||
|
linkTooltip: 'Drill down to workloads',
|
||||||
|
link: '%(prefix)s/d/%(uid)s/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-workloads-namespace.json') },
|
||||||
|
decimals: 0,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
'k8s-resources-cluster.json':
|
|
||||||
local tableStyles = {
|
|
||||||
namespace: {
|
|
||||||
alias: 'Namespace',
|
|
||||||
link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') },
|
|
||||||
linkTooltip: 'Drill down to pods',
|
|
||||||
},
|
|
||||||
'Value #A': {
|
|
||||||
alias: 'Pods',
|
|
||||||
linkTooltip: 'Drill down to pods',
|
|
||||||
link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') },
|
|
||||||
decimals: 0,
|
|
||||||
},
|
|
||||||
'Value #B': {
|
|
||||||
alias: 'Workloads',
|
|
||||||
linkTooltip: 'Drill down to workloads',
|
|
||||||
link: '%(prefix)s/d/%(uid)s/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-workloads-namespace.json') },
|
|
||||||
decimals: 0,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
local podWorkloadColumns = [
|
local podWorkloadColumns = [
|
||||||
'count(mixin_pod_workload{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
'count(mixin_pod_workload{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||||
'count(avg(mixin_pod_workload{%(clusterLabel)s="$cluster"}) by (workload, namespace)) by (namespace)' % $._config,
|
'count(avg(mixin_pod_workload{%(clusterLabel)s="$cluster"}) by (workload, namespace)) by (namespace)' % $._config,
|
||||||
];
|
];
|
||||||
|
|
||||||
local networkColumns = [
|
local networkColumns = [
|
||||||
'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
|
'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
|
||||||
'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
|
'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
|
||||||
'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
|
'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
|
||||||
'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
|
'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
|
||||||
'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
|
'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
|
||||||
'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
|
'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
|
||||||
];
|
];
|
||||||
|
|
||||||
local networkTableStyles = {
|
local networkTableStyles = {
|
||||||
namespace: {
|
namespace: {
|
||||||
alias: 'Namespace',
|
alias: 'Namespace',
|
||||||
link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') },
|
link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') },
|
||||||
linkTooltip: 'Drill down to pods',
|
linkTooltip: 'Drill down to pods',
|
||||||
},
|
},
|
||||||
'Value #A': {
|
'Value #A': {
|
||||||
alias: 'Current Receive Bandwidth',
|
alias: 'Current Receive Bandwidth',
|
||||||
unit: 'Bps',
|
unit: 'Bps',
|
||||||
},
|
},
|
||||||
'Value #B': {
|
'Value #B': {
|
||||||
alias: 'Current Transmit Bandwidth',
|
alias: 'Current Transmit Bandwidth',
|
||||||
unit: 'Bps',
|
unit: 'Bps',
|
||||||
},
|
},
|
||||||
'Value #C': {
|
'Value #C': {
|
||||||
alias: 'Rate of Received Packets',
|
alias: 'Rate of Received Packets',
|
||||||
unit: 'pps',
|
unit: 'pps',
|
||||||
},
|
},
|
||||||
'Value #D': {
|
'Value #D': {
|
||||||
alias: 'Rate of Transmitted Packets',
|
alias: 'Rate of Transmitted Packets',
|
||||||
unit: 'pps',
|
unit: 'pps',
|
||||||
},
|
},
|
||||||
'Value #E': {
|
'Value #E': {
|
||||||
alias: 'Rate of Received Packets Dropped',
|
alias: 'Rate of Received Packets Dropped',
|
||||||
unit: 'pps',
|
unit: 'pps',
|
||||||
},
|
},
|
||||||
'Value #F': {
|
'Value #F': {
|
||||||
alias: 'Rate of Transmitted Packets Dropped',
|
alias: 'Rate of Transmitted Packets Dropped',
|
||||||
unit: 'pps',
|
unit: 'pps',
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
g.dashboard(
|
g.dashboard(
|
||||||
'%(dashboardNamePrefix)sCompute Resources / Cluster' % $._config.grafanaK8s,
|
'%(dashboardNamePrefix)sCompute Resources / Cluster' % $._config.grafanaK8s,
|
||||||
uid=($._config.grafanaDashboardIDs['k8s-resources-cluster.json']),
|
uid=($._config.grafanaDashboardIDs['k8s-resources-cluster.json']),
|
||||||
).addTemplate('cluster', 'node_cpu_seconds_total', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
|
).addTemplate('cluster', 'node_cpu_seconds_total', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
|
||||||
.addRow(
|
.addRow(
|
||||||
(g.row('Headlines') +
|
(g.row('Headlines') +
|
||||||
{
|
{
|
||||||
height: '100px',
|
height: '100px',
|
||||||
showTitle: false,
|
showTitle: false,
|
||||||
})
|
})
|
||||||
.addPanel(
|
.addPanel(
|
||||||
g.panel('CPU Utilisation') +
|
g.panel('CPU Utilisation') +
|
||||||
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle", %(clusterLabel)s="$cluster"}[1m]))' % $._config)
|
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle", %(clusterLabel)s="$cluster"}[$interval]))' % $._config)
|
||||||
)
|
|
||||||
.addPanel(
|
|
||||||
g.panel('CPU Requests Commitment') +
|
|
||||||
g.statPanel('sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{%(clusterLabel)s="$cluster"})' % $._config)
|
|
||||||
)
|
|
||||||
.addPanel(
|
|
||||||
g.panel('CPU Limits Commitment') +
|
|
||||||
g.statPanel('sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{%(clusterLabel)s="$cluster"})' % $._config)
|
|
||||||
)
|
|
||||||
.addPanel(
|
|
||||||
g.panel('Memory Utilisation') +
|
|
||||||
g.statPanel('1 - sum(:node_memory_MemAvailable_bytes:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config)
|
|
||||||
)
|
|
||||||
.addPanel(
|
|
||||||
g.panel('Memory Requests Commitment') +
|
|
||||||
g.statPanel('sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config)
|
|
||||||
)
|
|
||||||
.addPanel(
|
|
||||||
g.panel('Memory Limits Commitment') +
|
|
||||||
g.statPanel('sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
.addRow(
|
.addPanel(
|
||||||
g.row('CPU')
|
g.panel('CPU Requests Commitment') +
|
||||||
.addPanel(
|
g.statPanel('sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{%(clusterLabel)s="$cluster"})' % $._config)
|
||||||
g.panel('CPU Usage') +
|
|
||||||
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, '{{namespace}}') +
|
|
||||||
g.stack
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
.addRow(
|
.addPanel(
|
||||||
g.row('CPU Quota')
|
g.panel('CPU Limits Commitment') +
|
||||||
.addPanel(
|
g.statPanel('sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{%(clusterLabel)s="$cluster"})' % $._config)
|
||||||
g.panel('CPU Quota') +
|
|
||||||
g.tablePanel(podWorkloadColumns + [
|
|
||||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
|
||||||
'sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
|
||||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
|
||||||
'sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
|
||||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
|
||||||
], tableStyles {
|
|
||||||
'Value #C': { alias: 'CPU Usage' },
|
|
||||||
'Value #D': { alias: 'CPU Requests' },
|
|
||||||
'Value #E': { alias: 'CPU Requests %', unit: 'percentunit' },
|
|
||||||
'Value #F': { alias: 'CPU Limits' },
|
|
||||||
'Value #G': { alias: 'CPU Limits %', unit: 'percentunit' },
|
|
||||||
})
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
.addRow(
|
.addPanel(
|
||||||
g.row('Memory')
|
g.panel('Memory Utilisation') +
|
||||||
.addPanel(
|
g.statPanel('1 - sum(:node_memory_MemAvailable_bytes:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config)
|
||||||
g.panel('Memory Usage (w/o cache)') +
|
)
|
||||||
|
.addPanel(
|
||||||
|
g.panel('Memory Requests Commitment') +
|
||||||
|
g.statPanel('sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config)
|
||||||
|
)
|
||||||
|
.addPanel(
|
||||||
|
g.panel('Memory Limits Commitment') +
|
||||||
|
g.statPanel('sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.addRow(
|
||||||
|
g.row('CPU')
|
||||||
|
.addPanel(
|
||||||
|
g.panel('CPU Usage') +
|
||||||
|
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, '{{namespace}}') +
|
||||||
|
g.stack
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.addRow(
|
||||||
|
g.row('CPU Quota')
|
||||||
|
.addPanel(
|
||||||
|
g.panel('CPU Quota') +
|
||||||
|
g.tablePanel(podWorkloadColumns + [
|
||||||
|
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||||
|
'sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||||
|
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||||
|
'sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||||
|
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||||
|
], tableStyles {
|
||||||
|
'Value #C': { alias: 'CPU Usage' },
|
||||||
|
'Value #D': { alias: 'CPU Requests' },
|
||||||
|
'Value #E': { alias: 'CPU Requests %', unit: 'percentunit' },
|
||||||
|
'Value #F': { alias: 'CPU Limits' },
|
||||||
|
'Value #G': { alias: 'CPU Limits %', unit: 'percentunit' },
|
||||||
|
})
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.addRow(
|
||||||
|
g.row('Memory')
|
||||||
|
.addPanel(
|
||||||
|
g.panel('Memory Usage (w/o cache)') +
|
||||||
|
// Not using container_memory_usage_bytes here because that includes page cache
|
||||||
|
g.queryPanel('sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace)' % $._config, '{{namespace}}') +
|
||||||
|
g.stack +
|
||||||
|
{ yaxes: g.yaxes('bytes') },
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.addRow(
|
||||||
|
g.row('Memory Requests')
|
||||||
|
.addPanel(
|
||||||
|
g.panel('Requests by Namespace') +
|
||||||
|
g.tablePanel(podWorkloadColumns + [
|
||||||
// Not using container_memory_usage_bytes here because that includes page cache
|
// Not using container_memory_usage_bytes here because that includes page cache
|
||||||
g.queryPanel('sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace)' % $._config, '{{namespace}}') +
|
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace)' % $._config,
|
||||||
g.stack +
|
'sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||||
{ yaxes: g.yaxes('bytes') },
|
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||||
)
|
'sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||||
|
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||||
|
], tableStyles {
|
||||||
|
'Value #C': { alias: 'Memory Usage', unit: 'bytes' },
|
||||||
|
'Value #D': { alias: 'Memory Requests', unit: 'bytes' },
|
||||||
|
'Value #E': { alias: 'Memory Requests %', unit: 'percentunit' },
|
||||||
|
'Value #F': { alias: 'Memory Limits', unit: 'bytes' },
|
||||||
|
'Value #G': { alias: 'Memory Limits %', unit: 'percentunit' },
|
||||||
|
})
|
||||||
)
|
)
|
||||||
.addRow(
|
)
|
||||||
g.row('Memory Requests')
|
.addRow(
|
||||||
.addPanel(
|
g.row('Network')
|
||||||
g.panel('Requests by Namespace') +
|
.addPanel(
|
||||||
g.tablePanel(podWorkloadColumns + [
|
g.panel('Current Network Usage') +
|
||||||
// Not using container_memory_usage_bytes here because that includes page cache
|
g.tablePanel(
|
||||||
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace)' % $._config,
|
networkColumns,
|
||||||
'sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
networkTableStyles
|
||||||
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
),
|
||||||
'sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
|
||||||
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
|
||||||
], tableStyles {
|
|
||||||
'Value #C': { alias: 'Memory Usage', unit: 'bytes' },
|
|
||||||
'Value #D': { alias: 'Memory Requests', unit: 'bytes' },
|
|
||||||
'Value #E': { alias: 'Memory Requests %', unit: 'percentunit' },
|
|
||||||
'Value #F': { alias: 'Memory Limits', unit: 'bytes' },
|
|
||||||
'Value #G': { alias: 'Memory Limits %', unit: 'percentunit' },
|
|
||||||
})
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
.addRow(
|
)
|
||||||
g.row('Network')
|
.addRow(
|
||||||
.addPanel(
|
g.row('Network')
|
||||||
g.panel('Current Network Usage') +
|
.addPanel(
|
||||||
g.tablePanel(
|
g.panel('Receive Bandwidth') +
|
||||||
networkColumns,
|
g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||||
networkTableStyles
|
g.stack +
|
||||||
),
|
{ yaxes: g.yaxes('Bps') },
|
||||||
)
|
|
||||||
)
|
)
|
||||||
.addRow(
|
)
|
||||||
g.row('Network')
|
.addRow(
|
||||||
.addPanel(
|
g.row('Network')
|
||||||
g.panel('Receive Bandwidth') +
|
.addPanel(
|
||||||
g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
g.panel('Transmit Bandwidth') +
|
||||||
g.stack +
|
g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||||
{ yaxes: g.yaxes('Bps') },
|
g.stack +
|
||||||
)
|
{ yaxes: g.yaxes('Bps') },
|
||||||
)
|
)
|
||||||
.addRow(
|
)
|
||||||
g.row('Network')
|
.addRow(
|
||||||
.addPanel(
|
g.row('Network')
|
||||||
g.panel('Transmit Bandwidth') +
|
.addPanel(
|
||||||
g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
g.panel('Average Container Bandwidth by Namespace: Received') +
|
||||||
g.stack +
|
g.queryPanel('avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||||
{ yaxes: g.yaxes('Bps') },
|
g.stack +
|
||||||
)
|
{ yaxes: g.yaxes('Bps') },
|
||||||
)
|
)
|
||||||
.addRow(
|
)
|
||||||
g.row('Network')
|
.addRow(
|
||||||
.addPanel(
|
g.row('Network')
|
||||||
g.panel('Average Container Bandwidth by Namespace: Received') +
|
.addPanel(
|
||||||
g.queryPanel('avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
g.panel('Average Container Bandwidth by Namespace: Transmitted') +
|
||||||
g.stack +
|
g.queryPanel('avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||||
{ yaxes: g.yaxes('Bps') },
|
g.stack +
|
||||||
)
|
{ yaxes: g.yaxes('Bps') },
|
||||||
)
|
)
|
||||||
.addRow(
|
)
|
||||||
g.row('Network')
|
.addRow(
|
||||||
.addPanel(
|
g.row('Network')
|
||||||
g.panel('Average Container Bandwidth by Namespace: Transmitted') +
|
.addPanel(
|
||||||
g.queryPanel('avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
g.panel('Rate of Received Packets') +
|
||||||
g.stack +
|
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||||
{ yaxes: g.yaxes('Bps') },
|
g.stack +
|
||||||
)
|
{ yaxes: g.yaxes('Bps') },
|
||||||
)
|
)
|
||||||
.addRow(
|
)
|
||||||
g.row('Network')
|
.addRow(
|
||||||
.addPanel(
|
g.row('Network')
|
||||||
g.panel('Rate of Received Packets') +
|
.addPanel(
|
||||||
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
g.panel('Rate of Transmitted Packets') +
|
||||||
g.stack +
|
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||||
{ yaxes: g.yaxes('Bps') },
|
g.stack +
|
||||||
)
|
{ yaxes: g.yaxes('Bps') },
|
||||||
)
|
)
|
||||||
.addRow(
|
)
|
||||||
g.row('Network')
|
.addRow(
|
||||||
.addPanel(
|
g.row('Network')
|
||||||
g.panel('Rate of Transmitted Packets') +
|
.addPanel(
|
||||||
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
g.panel('Rate of Received Packets Dropped') +
|
||||||
g.stack +
|
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||||
{ yaxes: g.yaxes('Bps') },
|
g.stack +
|
||||||
)
|
{ yaxes: g.yaxes('Bps') },
|
||||||
)
|
)
|
||||||
.addRow(
|
)
|
||||||
g.row('Network')
|
.addRow(
|
||||||
.addPanel(
|
g.row('Network')
|
||||||
g.panel('Rate of Received Packets Dropped') +
|
.addPanel(
|
||||||
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
g.panel('Rate of Transmitted Packets Dropped') +
|
||||||
g.stack +
|
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||||
{ yaxes: g.yaxes('Bps') },
|
g.stack +
|
||||||
)
|
{ yaxes: g.yaxes('Bps') },
|
||||||
)
|
)
|
||||||
.addRow(
|
) + {
|
||||||
g.row('Network')
|
tags: $._config.grafanaK8s.dashboardTags,
|
||||||
.addPanel(
|
templating+: { list+: [intervalTemplate, clusterTemplate] },
|
||||||
g.panel('Rate of Transmitted Packets Dropped') +
|
refresh: $._config.grafanaK8s.refresh,
|
||||||
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
},
|
||||||
g.stack +
|
},
|
||||||
{ yaxes: g.yaxes('Bps') },
|
}
|
||||||
)
|
|
||||||
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } },
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -3,105 +3,105 @@ local grafana = import 'grafonnet/grafana.libsonnet';
|
||||||
local template = grafana.template;
|
local template = grafana.template;
|
||||||
|
|
||||||
{
|
{
|
||||||
grafanaDashboards+::
|
grafanaDashboards+::
|
||||||
if $._config.showMultiCluster then {
|
if $._config.showMultiCluster then {
|
||||||
'k8s-resources-multicluster.json':
|
'k8s-resources-multicluster.json':
|
||||||
local tableStyles = {
|
local tableStyles = {
|
||||||
[$._config.clusterLabel]: {
|
[$._config.clusterLabel]: {
|
||||||
alias: 'Cluster',
|
alias: 'Cluster',
|
||||||
link: '%(prefix)s/d/%(uid)s/k8s-resources-cluster?var-datasource=$datasource&var-cluster=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-cluster.json') },
|
link: '%(prefix)s/d/%(uid)s/k8s-resources-cluster?var-datasource=$datasource&var-cluster=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-cluster.json') },
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
g.dashboard(
|
g.dashboard(
|
||||||
'%(dashboardNamePrefix)sCompute Resources / Multi-Cluster' % $._config.grafanaK8s,
|
'%(dashboardNamePrefix)sCompute Resources / Multi-Cluster' % $._config.grafanaK8s,
|
||||||
uid=($._config.grafanaDashboardIDs['k8s-resources-multicluster.json']),
|
uid=($._config.grafanaDashboardIDs['k8s-resources-multicluster.json']),
|
||||||
).addRow(
|
).addRow(
|
||||||
(g.row('Headlines') +
|
(g.row('Headlines') +
|
||||||
{
|
{
|
||||||
height: '100px',
|
height: '100px',
|
||||||
showTitle: false,
|
showTitle: false,
|
||||||
})
|
})
|
||||||
.addPanel(
|
.addPanel(
|
||||||
g.panel('CPU Utilisation') +
|
g.panel('CPU Utilisation') +
|
||||||
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle"}[1m]))' % $._config)
|
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle"}[$__interval]))' % $._config)
|
||||||
|
)
|
||||||
|
.addPanel(
|
||||||
|
g.panel('CPU Requests Commitment') +
|
||||||
|
g.statPanel('sum(kube_pod_container_resource_requests_cpu_cores) / sum(kube_node_status_allocatable_cpu_cores)' % $._config)
|
||||||
|
)
|
||||||
|
.addPanel(
|
||||||
|
g.panel('CPU Limits Commitment') +
|
||||||
|
g.statPanel('sum(kube_pod_container_resource_limits_cpu_cores) / sum(kube_node_status_allocatable_cpu_cores)' % $._config)
|
||||||
|
)
|
||||||
|
.addPanel(
|
||||||
|
g.panel('Memory Utilisation') +
|
||||||
|
g.statPanel('1 - sum(:node_memory_MemAvailable_bytes:sum) / sum(kube_node_status_allocatable_memory_bytes)' % $._config)
|
||||||
|
)
|
||||||
|
.addPanel(
|
||||||
|
g.panel('Memory Requests Commitment') +
|
||||||
|
g.statPanel('sum(kube_pod_container_resource_requests_memory_bytes) / sum(kube_node_status_allocatable_memory_bytes)' % $._config)
|
||||||
|
)
|
||||||
|
.addPanel(
|
||||||
|
g.panel('Memory Limits Commitment') +
|
||||||
|
g.statPanel('sum(kube_pod_container_resource_limits_memory_bytes) / sum(kube_node_status_allocatable_memory_bytes)' % $._config)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
.addPanel(
|
.addRow(
|
||||||
g.panel('CPU Requests Commitment') +
|
g.row('CPU')
|
||||||
g.statPanel('sum(kube_pod_container_resource_requests_cpu_cores) / sum(kube_node_status_allocatable_cpu_cores)' % $._config)
|
.addPanel(
|
||||||
|
g.panel('CPU Usage') +
|
||||||
|
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config)
|
||||||
|
+ { fill: 0, linewidth: 2 },
|
||||||
|
)
|
||||||
)
|
)
|
||||||
.addPanel(
|
.addRow(
|
||||||
g.panel('CPU Limits Commitment') +
|
g.row('CPU Quota')
|
||||||
g.statPanel('sum(kube_pod_container_resource_limits_cpu_cores) / sum(kube_node_status_allocatable_cpu_cores)' % $._config)
|
.addPanel(
|
||||||
|
g.panel('CPU Quota') +
|
||||||
|
g.tablePanel([
|
||||||
|
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config,
|
||||||
|
'sum(kube_pod_container_resource_requests_cpu_cores) by (%(clusterLabel)s)' % $._config,
|
||||||
|
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests_cpu_cores) by (%(clusterLabel)s)' % $._config,
|
||||||
|
'sum(kube_pod_container_resource_limits_cpu_cores) by (%(clusterLabel)s)' % $._config,
|
||||||
|
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits_cpu_cores) by (%(clusterLabel)s)' % $._config,
|
||||||
|
], tableStyles {
|
||||||
|
'Value #A': { alias: 'CPU Usage' },
|
||||||
|
'Value #B': { alias: 'CPU Requests' },
|
||||||
|
'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' },
|
||||||
|
'Value #D': { alias: 'CPU Limits' },
|
||||||
|
'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' },
|
||||||
|
})
|
||||||
|
)
|
||||||
)
|
)
|
||||||
.addPanel(
|
.addRow(
|
||||||
g.panel('Memory Utilisation') +
|
g.row('Memory')
|
||||||
g.statPanel('1 - sum(:node_memory_MemAvailable_bytes:sum) / sum(kube_node_status_allocatable_memory_bytes)' % $._config)
|
.addPanel(
|
||||||
)
|
g.panel('Memory Usage (w/o cache)') +
|
||||||
.addPanel(
|
|
||||||
g.panel('Memory Requests Commitment') +
|
|
||||||
g.statPanel('sum(kube_pod_container_resource_requests_memory_bytes) / sum(kube_node_status_allocatable_memory_bytes)' % $._config)
|
|
||||||
)
|
|
||||||
.addPanel(
|
|
||||||
g.panel('Memory Limits Commitment') +
|
|
||||||
g.statPanel('sum(kube_pod_container_resource_limits_memory_bytes) / sum(kube_node_status_allocatable_memory_bytes)' % $._config)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
.addRow(
|
|
||||||
g.row('CPU')
|
|
||||||
.addPanel(
|
|
||||||
g.panel('CPU Usage') +
|
|
||||||
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config)
|
|
||||||
+ { fill: 0, linewidth: 2 },
|
|
||||||
)
|
|
||||||
)
|
|
||||||
.addRow(
|
|
||||||
g.row('CPU Quota')
|
|
||||||
.addPanel(
|
|
||||||
g.panel('CPU Quota') +
|
|
||||||
g.tablePanel([
|
|
||||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config,
|
|
||||||
'sum(kube_pod_container_resource_requests_cpu_cores) by (%(clusterLabel)s)' % $._config,
|
|
||||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests_cpu_cores) by (%(clusterLabel)s)' % $._config,
|
|
||||||
'sum(kube_pod_container_resource_limits_cpu_cores) by (%(clusterLabel)s)' % $._config,
|
|
||||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits_cpu_cores) by (%(clusterLabel)s)' % $._config,
|
|
||||||
], tableStyles {
|
|
||||||
'Value #A': { alias: 'CPU Usage' },
|
|
||||||
'Value #B': { alias: 'CPU Requests' },
|
|
||||||
'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' },
|
|
||||||
'Value #D': { alias: 'CPU Limits' },
|
|
||||||
'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' },
|
|
||||||
})
|
|
||||||
)
|
|
||||||
)
|
|
||||||
.addRow(
|
|
||||||
g.row('Memory')
|
|
||||||
.addPanel(
|
|
||||||
g.panel('Memory Usage (w/o cache)') +
|
|
||||||
// Not using container_memory_usage_bytes here because that includes page cache
|
|
||||||
g.queryPanel('sum(container_memory_rss{container!=""}) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config) +
|
|
||||||
{ fill: 0, linewidth: 2, yaxes: g.yaxes('bytes') },
|
|
||||||
)
|
|
||||||
)
|
|
||||||
.addRow(
|
|
||||||
g.row('Memory Requests')
|
|
||||||
.addPanel(
|
|
||||||
g.panel('Requests by Namespace') +
|
|
||||||
g.tablePanel([
|
|
||||||
// Not using container_memory_usage_bytes here because that includes page cache
|
// Not using container_memory_usage_bytes here because that includes page cache
|
||||||
'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s)' % $._config,
|
g.queryPanel('sum(container_memory_rss{container!=""}) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config) +
|
||||||
'sum(kube_pod_container_resource_requests_memory_bytes) by (%(clusterLabel)s)' % $._config,
|
{ fill: 0, linewidth: 2, yaxes: g.yaxes('bytes') },
|
||||||
'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests_memory_bytes) by (%(clusterLabel)s)' % $._config,
|
)
|
||||||
'sum(kube_pod_container_resource_limits_memory_bytes) by (%(clusterLabel)s)' % $._config,
|
|
||||||
'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits_memory_bytes) by (%(clusterLabel)s)' % $._config,
|
|
||||||
], tableStyles {
|
|
||||||
'Value #A': { alias: 'Memory Usage', unit: 'bytes' },
|
|
||||||
'Value #B': { alias: 'Memory Requests', unit: 'bytes' },
|
|
||||||
'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' },
|
|
||||||
'Value #D': { alias: 'Memory Limits', unit: 'bytes' },
|
|
||||||
'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' },
|
|
||||||
})
|
|
||||||
)
|
)
|
||||||
) + { tags: $._config.grafanaK8s.dashboardTags },
|
.addRow(
|
||||||
} else {},
|
g.row('Memory Requests')
|
||||||
}
|
.addPanel(
|
||||||
|
g.panel('Requests by Namespace') +
|
||||||
|
g.tablePanel([
|
||||||
|
// Not using container_memory_usage_bytes here because that includes page cache
|
||||||
|
'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s)' % $._config,
|
||||||
|
'sum(kube_pod_container_resource_requests_memory_bytes) by (%(clusterLabel)s)' % $._config,
|
||||||
|
'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests_memory_bytes) by (%(clusterLabel)s)' % $._config,
|
||||||
|
'sum(kube_pod_container_resource_limits_memory_bytes) by (%(clusterLabel)s)' % $._config,
|
||||||
|
'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits_memory_bytes) by (%(clusterLabel)s)' % $._config,
|
||||||
|
], tableStyles {
|
||||||
|
'Value #A': { alias: 'Memory Usage', unit: 'bytes' },
|
||||||
|
'Value #B': { alias: 'Memory Requests', unit: 'bytes' },
|
||||||
|
'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' },
|
||||||
|
'Value #D': { alias: 'Memory Limits', unit: 'bytes' },
|
||||||
|
'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' },
|
||||||
|
})
|
||||||
|
)
|
||||||
|
) + { tags: $._config.grafanaK8s.dashboardTags, refresh: $._config.grafanaK8s.refresh },
|
||||||
|
} else {},
|
||||||
|
}
|
||||||
|
|
|
@ -3,33 +3,56 @@ local grafana = import 'grafonnet/grafana.libsonnet';
|
||||||
local template = grafana.template;
|
local template = grafana.template;
|
||||||
|
|
||||||
{
|
{
|
||||||
grafanaDashboards+:: {
|
grafanaDashboards+:: {
|
||||||
local intervalTemplate =
|
local intervalTemplate =
|
||||||
template.new(
|
template.new(
|
||||||
name='interval',
|
name='interval',
|
||||||
datasource='$datasource',
|
datasource='$datasource',
|
||||||
query='4h',
|
query='$__interval',
|
||||||
current='5m',
|
current='5m',
|
||||||
hide=2,
|
hide=2,
|
||||||
refresh=2,
|
refresh=2,
|
||||||
includeAll=false,
|
includeAll=false,
|
||||||
sort=1
|
sort=1
|
||||||
) + {
|
) + {
|
||||||
auto: false,
|
auto: false,
|
||||||
auto_count: 30,
|
auto_count: 30,
|
||||||
auto_min: '10s',
|
auto_min: '10s',
|
||||||
skipUrlSync: false,
|
skipUrlSync: false,
|
||||||
type: 'interval',
|
type: 'interval',
|
||||||
options: [
|
options: [
|
||||||
{
|
{
|
||||||
selected: true,
|
selected: true,
|
||||||
text: '4h',
|
text: '$__interval',
|
||||||
value: '4h',
|
value: '$__interval',
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
||||||
'k8s-resources-namespace.json':
|
local clusterTemplate =
|
||||||
|
template.new(
|
||||||
|
name='cluster',
|
||||||
|
datasource='$datasource',
|
||||||
|
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
|
||||||
|
current='',
|
||||||
|
hide=if $._config.showMultiCluster then '' else '2',
|
||||||
|
refresh=1,
|
||||||
|
includeAll=false,
|
||||||
|
sort=1
|
||||||
|
),
|
||||||
|
|
||||||
|
local namespaceTemplate =
|
||||||
|
template.new(
|
||||||
|
name='namespace',
|
||||||
|
datasource='$datasource',
|
||||||
|
query='label_values(kube_pod_info{%(clusterLabel)s="$cluster"}, namespace)' % $._config.clusterLabel,
|
||||||
|
current='',
|
||||||
|
hide='',
|
||||||
|
refresh=1,
|
||||||
|
includeAll=false,
|
||||||
|
sort=1
|
||||||
|
),
|
||||||
|
'k8s-resources-namespace.json':
|
||||||
local tableStyles = {
|
local tableStyles = {
|
||||||
pod: {
|
pod: {
|
||||||
alias: 'Pod',
|
alias: 'Pod',
|
||||||
|
@ -78,17 +101,75 @@ local template = grafana.template;
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
local cpuUsageQuery = 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config;
|
||||||
|
|
||||||
|
local memoryUsageQuery = 'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container!=""}) by (pod)' % $._config;
|
||||||
|
|
||||||
|
local cpuQuotaRequestsQuery = 'scalar(kube_resourcequota{%(clusterLabel)s="$cluster", namespace="$namespace", type="hard",resource="requests.cpu"})' % $._config;
|
||||||
|
local cpuQuotaLimitsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'limits.cpu');
|
||||||
|
local memoryQuotaRequestsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'requests.memory');
|
||||||
|
local memoryQuotaLimitsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'limits.memory');
|
||||||
|
|
||||||
g.dashboard(
|
g.dashboard(
|
||||||
'%(dashboardNamePrefix)sCompute Resources / Namespace (Pods)' % $._config.grafanaK8s,
|
'%(dashboardNamePrefix)sCompute Resources / Namespace (Pods)' % $._config.grafanaK8s,
|
||||||
uid=($._config.grafanaDashboardIDs['k8s-resources-namespace.json']),
|
uid=($._config.grafanaDashboardIDs['k8s-resources-namespace.json']),
|
||||||
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
|
)
|
||||||
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')
|
.addRow(
|
||||||
|
(g.row('Headlines') +
|
||||||
|
{
|
||||||
|
height: '100px',
|
||||||
|
showTitle: false,
|
||||||
|
})
|
||||||
|
.addPanel(
|
||||||
|
g.panel('CPU Utilisation (from requests)') +
|
||||||
|
g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"})' % $._config)
|
||||||
|
)
|
||||||
|
.addPanel(
|
||||||
|
g.panel('CPU Utilisation (from limits)') +
|
||||||
|
g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"})' % $._config)
|
||||||
|
)
|
||||||
|
.addPanel(
|
||||||
|
g.panel('Memory Utilization (from requests)') +
|
||||||
|
g.statPanel('sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) / sum(kube_pod_container_resource_requests_memory_bytes{namespace="$namespace"})' % $._config)
|
||||||
|
)
|
||||||
|
.addPanel(
|
||||||
|
g.panel('Memory Utilisation (from limits)') +
|
||||||
|
g.statPanel('sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) / sum(kube_pod_container_resource_limits_memory_bytes{namespace="$namespace"})' % $._config)
|
||||||
|
)
|
||||||
|
)
|
||||||
.addRow(
|
.addRow(
|
||||||
g.row('CPU Usage')
|
g.row('CPU Usage')
|
||||||
.addPanel(
|
.addPanel(
|
||||||
g.panel('CPU Usage') +
|
g.panel('CPU Usage') +
|
||||||
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, '{{pod}}') +
|
g.queryPanel([
|
||||||
g.stack,
|
cpuUsageQuery,
|
||||||
|
cpuQuotaRequestsQuery,
|
||||||
|
cpuQuotaLimitsQuery,
|
||||||
|
], ['{{pod}}', 'quota - requests', 'quota - limits']) +
|
||||||
|
g.stack + {
|
||||||
|
seriesOverrides: [
|
||||||
|
{
|
||||||
|
alias: 'quota - requests',
|
||||||
|
color: '#F2495C',
|
||||||
|
dashes: true,
|
||||||
|
fill: 0,
|
||||||
|
hideTooltip: true,
|
||||||
|
legend: false,
|
||||||
|
linewidth: 2,
|
||||||
|
stack: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
alias: 'quota - limits',
|
||||||
|
color: '#FF9830',
|
||||||
|
dashes: true,
|
||||||
|
fill: 0,
|
||||||
|
hideTooltip: true,
|
||||||
|
legend: false,
|
||||||
|
linewidth: 2,
|
||||||
|
stack: false,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
.addRow(
|
.addRow(
|
||||||
|
@ -115,9 +196,37 @@ local template = grafana.template;
|
||||||
.addPanel(
|
.addPanel(
|
||||||
g.panel('Memory Usage (w/o cache)') +
|
g.panel('Memory Usage (w/o cache)') +
|
||||||
// Like above, without page cache
|
// Like above, without page cache
|
||||||
g.queryPanel('sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container!=""}) by (pod)' % $._config, '{{pod}}') +
|
g.queryPanel([
|
||||||
|
memoryUsageQuery,
|
||||||
|
memoryQuotaRequestsQuery,
|
||||||
|
memoryQuotaLimitsQuery,
|
||||||
|
], ['{{pod}}', 'quota - requests', 'quota - limits']) +
|
||||||
g.stack +
|
g.stack +
|
||||||
{ yaxes: g.yaxes('bytes') },
|
{
|
||||||
|
yaxes: g.yaxes('bytes'),
|
||||||
|
seriesOverrides: [
|
||||||
|
{
|
||||||
|
alias: 'quota - requests',
|
||||||
|
color: '#F2495C',
|
||||||
|
dashes: true,
|
||||||
|
fill: 0,
|
||||||
|
hideTooltip: true,
|
||||||
|
legend: false,
|
||||||
|
linewidth: 2,
|
||||||
|
stack: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
alias: 'quota - limits',
|
||||||
|
color: '#FF9830',
|
||||||
|
dashes: true,
|
||||||
|
fill: 0,
|
||||||
|
hideTooltip: true,
|
||||||
|
legend: false,
|
||||||
|
linewidth: 2,
|
||||||
|
stack: false,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
.addRow(
|
.addRow(
|
||||||
|
@ -208,6 +317,6 @@ local template = grafana.template;
|
||||||
g.stack +
|
g.stack +
|
||||||
{ yaxes: g.yaxes('Bps') },
|
{ yaxes: g.yaxes('Bps') },
|
||||||
)
|
)
|
||||||
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } },
|
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate, clusterTemplate, namespaceTemplate] }, refresh: $._config.grafanaK8s.refresh },
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,33 +3,57 @@ local grafana = import 'grafonnet/grafana.libsonnet';
|
||||||
local template = grafana.template;
|
local template = grafana.template;
|
||||||
|
|
||||||
{
|
{
|
||||||
grafanaDashboards+:: {
|
grafanaDashboards+:: {
|
||||||
local intervalTemplate =
|
local intervalTemplate =
|
||||||
template.new(
|
template.new(
|
||||||
name='interval',
|
name='interval',
|
||||||
datasource='$datasource',
|
datasource='$datasource',
|
||||||
query='4h',
|
query='$__interval',
|
||||||
current='5m',
|
current='5m',
|
||||||
hide=2,
|
hide=2,
|
||||||
refresh=2,
|
refresh=2,
|
||||||
includeAll=false,
|
includeAll=false,
|
||||||
sort=1
|
sort=1
|
||||||
) + {
|
) + {
|
||||||
auto: false,
|
auto: false,
|
||||||
auto_count: 30,
|
auto_count: 30,
|
||||||
auto_min: '10s',
|
auto_min: '10s',
|
||||||
skipUrlSync: false,
|
skipUrlSync: false,
|
||||||
type: 'interval',
|
type: 'interval',
|
||||||
options: [
|
options: [
|
||||||
{
|
{
|
||||||
selected: true,
|
selected: true,
|
||||||
text: '4h',
|
text: '$__interval',
|
||||||
value: '4h',
|
value: '$__interval',
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
||||||
'k8s-resources-node.json':
|
local clusterTemplate =
|
||||||
|
template.new(
|
||||||
|
name='cluster',
|
||||||
|
datasource='$datasource',
|
||||||
|
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
|
||||||
|
current='',
|
||||||
|
hide=if $._config.showMultiCluster then '' else '2',
|
||||||
|
refresh=1,
|
||||||
|
includeAll=false,
|
||||||
|
sort=1
|
||||||
|
),
|
||||||
|
|
||||||
|
local nodeTemplate =
|
||||||
|
template.new(
|
||||||
|
name='node',
|
||||||
|
datasource='$datasource',
|
||||||
|
query='label_values(kube_pod_info{%(clusterLabel)s="$cluster"}, node)' % $._config.clusterLabel,
|
||||||
|
current='',
|
||||||
|
hide='',
|
||||||
|
refresh=1,
|
||||||
|
includeAll=false,
|
||||||
|
sort=1
|
||||||
|
),
|
||||||
|
|
||||||
|
'k8s-resources-node.json':
|
||||||
local tableStyles = {
|
local tableStyles = {
|
||||||
pod: {
|
pod: {
|
||||||
alias: 'Pod',
|
alias: 'Pod',
|
||||||
|
@ -39,8 +63,7 @@ local template = grafana.template;
|
||||||
g.dashboard(
|
g.dashboard(
|
||||||
'%(dashboardNamePrefix)sCompute Resources / Node (Pods)' % $._config.grafanaK8s,
|
'%(dashboardNamePrefix)sCompute Resources / Node (Pods)' % $._config.grafanaK8s,
|
||||||
uid=($._config.grafanaDashboardIDs['k8s-resources-node.json']),
|
uid=($._config.grafanaDashboardIDs['k8s-resources-node.json']),
|
||||||
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
|
)
|
||||||
.addTemplate('node', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'node')
|
|
||||||
.addRow(
|
.addRow(
|
||||||
g.row('CPU Usage')
|
g.row('CPU Usage')
|
||||||
.addPanel(
|
.addPanel(
|
||||||
|
@ -102,6 +125,6 @@ local template = grafana.template;
|
||||||
'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' },
|
'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' },
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
) + { tags: $._config.grafanaK8s.dashboardTags },
|
) + { tags: $._config.grafanaK8s.dashboardTags, refresh: $._config.grafanaK8s.refresh, templating+: { list+: [intervalTemplate, clusterTemplate, nodeTemplate] } },
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,51 +3,150 @@ local grafana = import 'grafonnet/grafana.libsonnet';
|
||||||
local template = grafana.template;
|
local template = grafana.template;
|
||||||
|
|
||||||
{
|
{
|
||||||
grafanaDashboards+:: {
|
grafanaDashboards+:: {
|
||||||
local intervalTemplate =
|
local intervalTemplate =
|
||||||
template.new(
|
template.new(
|
||||||
name='interval',
|
name='interval',
|
||||||
datasource='$datasource',
|
datasource='$datasource',
|
||||||
query='4h',
|
query='$__interval',
|
||||||
current='5m',
|
current='5m',
|
||||||
hide=2,
|
hide=2,
|
||||||
refresh=2,
|
refresh=2,
|
||||||
includeAll=false,
|
includeAll=false,
|
||||||
sort=1
|
sort=1
|
||||||
) + {
|
) + {
|
||||||
auto: false,
|
auto: false,
|
||||||
auto_count: 30,
|
auto_count: 30,
|
||||||
auto_min: '10s',
|
auto_min: '10s',
|
||||||
skipUrlSync: false,
|
skipUrlSync: false,
|
||||||
type: 'interval',
|
type: 'interval',
|
||||||
options: [
|
options: [
|
||||||
{
|
{
|
||||||
selected: true,
|
selected: true,
|
||||||
text: '4h',
|
text: '$__interval',
|
||||||
value: '4h',
|
value: '$__interval',
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
||||||
'k8s-resources-pod.json':
|
local clusterTemplate =
|
||||||
|
template.new(
|
||||||
|
name='cluster',
|
||||||
|
datasource='$datasource',
|
||||||
|
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
|
||||||
|
current='',
|
||||||
|
hide=if $._config.showMultiCluster then '' else '2',
|
||||||
|
refresh=1,
|
||||||
|
includeAll=false,
|
||||||
|
sort=1
|
||||||
|
),
|
||||||
|
|
||||||
|
local namespaceTemplate =
|
||||||
|
template.new(
|
||||||
|
name='namespace',
|
||||||
|
datasource='$datasource',
|
||||||
|
query='label_values(kube_pod_info{%(clusterLabel)s="$cluster"}, namespace)' % $._config.clusterLabel,
|
||||||
|
current='',
|
||||||
|
hide='',
|
||||||
|
refresh=1,
|
||||||
|
includeAll=false,
|
||||||
|
sort=1
|
||||||
|
),
|
||||||
|
|
||||||
|
local podTemplate =
|
||||||
|
template.new(
|
||||||
|
name='pod',
|
||||||
|
datasource='$datasource',
|
||||||
|
query='label_values(kube_pod_info{%(clusterLabel)s="$cluster", namespace="$namespace"}, pod)' % $._config.clusterLabel,
|
||||||
|
current='',
|
||||||
|
hide='',
|
||||||
|
refresh=2,
|
||||||
|
includeAll=false,
|
||||||
|
sort=1
|
||||||
|
),
|
||||||
|
|
||||||
|
'k8s-resources-pod.json':
|
||||||
local tableStyles = {
|
local tableStyles = {
|
||||||
container: {
|
container: {
|
||||||
alias: 'Container',
|
alias: 'Container',
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
local cpuRequestsQuery = |||
|
||||||
|
sum(
|
||||||
|
kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"})
|
||||||
|
||| % $._config;
|
||||||
|
|
||||||
|
local cpuLimitsQuery = std.strReplace(cpuRequestsQuery, 'requests', 'limits');
|
||||||
|
local memRequestsQuery = std.strReplace(cpuRequestsQuery, 'cpu_cores', 'memory_bytes');
|
||||||
|
local memLimitsQuery = std.strReplace(cpuLimitsQuery, 'cpu_cores', 'memory_bytes');
|
||||||
|
|
||||||
g.dashboard(
|
g.dashboard(
|
||||||
'%(dashboardNamePrefix)sCompute Resources / Pod' % $._config.grafanaK8s,
|
'%(dashboardNamePrefix)sCompute Resources / Pod' % $._config.grafanaK8s,
|
||||||
uid=($._config.grafanaDashboardIDs['k8s-resources-pod.json']),
|
uid=($._config.grafanaDashboardIDs['k8s-resources-pod.json']),
|
||||||
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
|
)
|
||||||
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')
|
|
||||||
.addTemplate('pod', 'kube_pod_info{%(clusterLabel)s="$cluster", namespace="$namespace"}' % $._config, 'pod')
|
|
||||||
.addRow(
|
.addRow(
|
||||||
g.row('CPU Usage')
|
g.row('CPU Usage')
|
||||||
.addPanel(
|
.addPanel(
|
||||||
g.panel('CPU Usage') +
|
g.panel('CPU Usage') +
|
||||||
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace="$namespace", pod="$pod", container!="POD", %(clusterLabel)s="$cluster"}) by (container)' % $._config, '{{container}}') +
|
g.queryPanel(
|
||||||
g.stack,
|
[
|
||||||
|
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace="$namespace", pod="$pod", container!="POD", %(clusterLabel)s="$cluster"}) by (container)' % $._config,
|
||||||
|
cpuRequestsQuery,
|
||||||
|
cpuLimitsQuery,
|
||||||
|
], [
|
||||||
|
'{{container}}',
|
||||||
|
'requests',
|
||||||
|
'limits',
|
||||||
|
],
|
||||||
|
) +
|
||||||
|
g.stack + {
|
||||||
|
seriesOverrides: [
|
||||||
|
{
|
||||||
|
alias: 'requests',
|
||||||
|
color: '#F2495C',
|
||||||
|
fill: 0,
|
||||||
|
hideTooltip: true,
|
||||||
|
legend: true,
|
||||||
|
linewidth: 2,
|
||||||
|
stack: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
alias: 'limits',
|
||||||
|
color: '#FF9830',
|
||||||
|
fill: 0,
|
||||||
|
hideTooltip: true,
|
||||||
|
legend: true,
|
||||||
|
linewidth: 2,
|
||||||
|
stack: false,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.addRow(
|
||||||
|
g.row('CPU Throttling')
|
||||||
|
.addPanel(
|
||||||
|
g.panel('CPU Throttling') +
|
||||||
|
g.queryPanel('sum(increase(container_cpu_cfs_throttled_periods_total{namespace="$namespace", pod="$pod", container!="POD", %(clusterLabel)s="$cluster"}[5m])) by (container) /sum(increase(container_cpu_cfs_periods_total{namespace="$namespace", pod="$pod", container!="POD", %(clusterLabel)s="$cluster"}[5m])) by (container)' % $._config, '{{container}}') +
|
||||||
|
g.stack
|
||||||
|
+ {
|
||||||
|
yaxes: g.yaxes({ format: 'percentunit', max: 1 }),
|
||||||
|
legend+: {
|
||||||
|
current: true,
|
||||||
|
max: true,
|
||||||
|
},
|
||||||
|
thresholds: [
|
||||||
|
{
|
||||||
|
value: $._config.cpuThrottlingPercent / 100,
|
||||||
|
colorMode: 'critical',
|
||||||
|
op: 'gt',
|
||||||
|
fill: true,
|
||||||
|
line: true,
|
||||||
|
yaxis: 'left',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
.addRow(
|
.addRow(
|
||||||
|
@ -74,16 +173,40 @@ local template = grafana.template;
|
||||||
.addPanel(
|
.addPanel(
|
||||||
g.panel('Memory Usage') +
|
g.panel('Memory Usage') +
|
||||||
g.queryPanel([
|
g.queryPanel([
|
||||||
'sum(container_memory_rss{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config,
|
'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config,
|
||||||
'sum(container_memory_cache{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config,
|
memRequestsQuery,
|
||||||
'sum(container_memory_swap{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config,
|
memLimitsQuery,
|
||||||
], [
|
], [
|
||||||
'{{container}} (RSS)',
|
'{{container}}',
|
||||||
'{{container}} (Cache)',
|
'requests',
|
||||||
'{{container}} (Swap)',
|
'limits',
|
||||||
]) +
|
]) +
|
||||||
g.stack +
|
g.stack +
|
||||||
{ yaxes: g.yaxes('bytes') },
|
{
|
||||||
|
yaxes: g.yaxes('bytes'),
|
||||||
|
seriesOverrides: [
|
||||||
|
{
|
||||||
|
alias: 'requests',
|
||||||
|
color: '#F2495C',
|
||||||
|
dashes: true,
|
||||||
|
fill: 0,
|
||||||
|
hideTooltip: true,
|
||||||
|
legend: false,
|
||||||
|
linewidth: 2,
|
||||||
|
stack: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
alias: 'limits',
|
||||||
|
color: '#FF9830',
|
||||||
|
dashes: true,
|
||||||
|
fill: 0,
|
||||||
|
hideTooltip: true,
|
||||||
|
legend: false,
|
||||||
|
linewidth: 2,
|
||||||
|
stack: false,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
.addRow(
|
.addRow(
|
||||||
|
@ -164,6 +287,6 @@ local template = grafana.template;
|
||||||
g.stack +
|
g.stack +
|
||||||
{ yaxes: g.yaxes('Bps') },
|
{ yaxes: g.yaxes('Bps') },
|
||||||
)
|
)
|
||||||
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } },
|
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate, clusterTemplate, namespaceTemplate, podTemplate] }, refresh: $._config.grafanaK8s.refresh },
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,51 +3,75 @@ local grafana = import 'grafonnet/grafana.libsonnet';
|
||||||
local template = grafana.template;
|
local template = grafana.template;
|
||||||
|
|
||||||
{
|
{
|
||||||
grafanaDashboards+:: {
|
grafanaDashboards+:: {
|
||||||
local intervalTemplate =
|
local intervalTemplate =
|
||||||
template.new(
|
template.new(
|
||||||
name='interval',
|
name='interval',
|
||||||
datasource='$datasource',
|
datasource='$datasource',
|
||||||
query='4h',
|
query='$__interval',
|
||||||
current='5m',
|
current='5m',
|
||||||
hide=2,
|
hide=2,
|
||||||
refresh=2,
|
refresh=2,
|
||||||
includeAll=false,
|
includeAll=false,
|
||||||
sort=1
|
sort=1
|
||||||
) + {
|
) + {
|
||||||
auto: false,
|
auto: false,
|
||||||
auto_count: 30,
|
auto_count: 30,
|
||||||
auto_min: '10s',
|
auto_min: '10s',
|
||||||
skipUrlSync: false,
|
skipUrlSync: false,
|
||||||
type: 'interval',
|
type: 'interval',
|
||||||
options: [
|
options: [
|
||||||
{
|
{
|
||||||
selected: true,
|
selected: true,
|
||||||
text: '4h',
|
text: '$__interval',
|
||||||
value: '4h',
|
value: '$__interval',
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
||||||
local typeTemplate =
|
local typeTemplate =
|
||||||
template.new(
|
template.new(
|
||||||
name='type',
|
name='type',
|
||||||
datasource='$datasource',
|
datasource='$datasource',
|
||||||
query='label_values(mixin_pod_workload{namespace=~"$namespace", workload=~".+"}, workload_type)',
|
query='label_values(mixin_pod_workload{namespace=~"$namespace", workload=~".+"}, workload_type)',
|
||||||
current='deployment',
|
current='deployment',
|
||||||
hide='',
|
hide='',
|
||||||
refresh=1,
|
refresh=1,
|
||||||
includeAll=false,
|
includeAll=false,
|
||||||
sort=0
|
sort=0
|
||||||
) + {
|
) + {
|
||||||
auto: false,
|
auto: false,
|
||||||
auto_count: 30,
|
auto_count: 30,
|
||||||
auto_min: '10s',
|
auto_min: '10s',
|
||||||
definition: 'label_values(mixin_pod_workload{namespace=~"$namespace", workload=~".+"}, workload_type)',
|
definition: 'label_values(mixin_pod_workload{namespace=~"$namespace", workload=~".+"}, workload_type)',
|
||||||
skipUrlSync: false,
|
skipUrlSync: false,
|
||||||
},
|
},
|
||||||
|
|
||||||
'k8s-resources-workloads-namespace.json':
|
local clusterTemplate =
|
||||||
|
template.new(
|
||||||
|
name='cluster',
|
||||||
|
datasource='$datasource',
|
||||||
|
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
|
||||||
|
current='',
|
||||||
|
hide=if $._config.showMultiCluster then '' else '2',
|
||||||
|
refresh=1,
|
||||||
|
includeAll=false,
|
||||||
|
sort=1
|
||||||
|
),
|
||||||
|
|
||||||
|
local namespaceTemplate =
|
||||||
|
template.new(
|
||||||
|
name='namespace',
|
||||||
|
datasource='$datasource',
|
||||||
|
query='label_values(kube_pod_info{%(clusterLabel)s="$cluster"}, namespace)' % $._config.clusterLabel,
|
||||||
|
current='',
|
||||||
|
hide='',
|
||||||
|
refresh=1,
|
||||||
|
includeAll=false,
|
||||||
|
sort=1
|
||||||
|
),
|
||||||
|
|
||||||
|
'k8s-resources-workloads-namespace.json':
|
||||||
local tableStyles = {
|
local tableStyles = {
|
||||||
workload: {
|
workload: {
|
||||||
alias: 'Workload',
|
alias: 'Workload',
|
||||||
|
@ -155,17 +179,44 @@ local template = grafana.template;
|
||||||
local memRequestsQuery = std.strReplace(cpuRequestsQuery, 'cpu_cores', 'memory_bytes');
|
local memRequestsQuery = std.strReplace(cpuRequestsQuery, 'cpu_cores', 'memory_bytes');
|
||||||
local memLimitsQuery = std.strReplace(cpuLimitsQuery, 'cpu_cores', 'memory_bytes');
|
local memLimitsQuery = std.strReplace(cpuLimitsQuery, 'cpu_cores', 'memory_bytes');
|
||||||
|
|
||||||
|
local cpuQuotaRequestsQuery = 'scalar(kube_resourcequota{%(clusterLabel)s="$cluster", namespace="$namespace", type="hard",resource="requests.cpu"})' % $._config;
|
||||||
|
local cpuQuotaLimitsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'limits.cpu');
|
||||||
|
local memoryQuotaRequestsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'requests.memory');
|
||||||
|
local memoryQuotaLimitsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'limits.memory');
|
||||||
|
|
||||||
g.dashboard(
|
g.dashboard(
|
||||||
'%(dashboardNamePrefix)sCompute Resources / Namespace (Workloads)' % $._config.grafanaK8s,
|
'%(dashboardNamePrefix)sCompute Resources / Namespace (Workloads)' % $._config.grafanaK8s,
|
||||||
uid=($._config.grafanaDashboardIDs['k8s-resources-workloads-namespace.json']),
|
uid=($._config.grafanaDashboardIDs['k8s-resources-workloads-namespace.json']),
|
||||||
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
|
)
|
||||||
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')
|
|
||||||
.addRow(
|
.addRow(
|
||||||
g.row('CPU Usage')
|
g.row('CPU Usage')
|
||||||
.addPanel(
|
.addPanel(
|
||||||
g.panel('CPU Usage') +
|
g.panel('CPU Usage') +
|
||||||
g.queryPanel(cpuUsageQuery, '{{workload}} - {{workload_type}}') +
|
g.queryPanel([cpuUsageQuery, cpuQuotaRequestsQuery, cpuQuotaLimitsQuery], ['{{workload}} - {{workload_type}}', 'quota - requests', 'quota - limits']) +
|
||||||
g.stack,
|
g.stack + {
|
||||||
|
seriesOverrides: [
|
||||||
|
{
|
||||||
|
alias: 'quota - requests',
|
||||||
|
color: '#F2495C',
|
||||||
|
dashes: true,
|
||||||
|
fill: 0,
|
||||||
|
hideTooltip: true,
|
||||||
|
legend: false,
|
||||||
|
linewidth: 2,
|
||||||
|
stack: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
alias: 'quota - limits',
|
||||||
|
color: '#FF9830',
|
||||||
|
dashes: true,
|
||||||
|
fill: 0,
|
||||||
|
hideTooltip: true,
|
||||||
|
legend: false,
|
||||||
|
linewidth: 2,
|
||||||
|
stack: false,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
.addRow(
|
.addRow(
|
||||||
|
@ -193,9 +244,33 @@ local template = grafana.template;
|
||||||
g.row('Memory Usage')
|
g.row('Memory Usage')
|
||||||
.addPanel(
|
.addPanel(
|
||||||
g.panel('Memory Usage') +
|
g.panel('Memory Usage') +
|
||||||
g.queryPanel(memUsageQuery, '{{workload}} - {{workload_type}}') +
|
g.queryPanel([memUsageQuery, memoryQuotaRequestsQuery, memoryQuotaLimitsQuery], ['{{workload}} - {{workload_type}}', 'quota - requests', 'quota - limits']) +
|
||||||
g.stack +
|
g.stack +
|
||||||
{ yaxes: g.yaxes('bytes') },
|
{
|
||||||
|
yaxes: g.yaxes('bytes'),
|
||||||
|
seriesOverrides: [
|
||||||
|
{
|
||||||
|
alias: 'quota - requests',
|
||||||
|
color: '#F2495C',
|
||||||
|
dashes: true,
|
||||||
|
fill: 0,
|
||||||
|
hideTooltip: true,
|
||||||
|
legend: false,
|
||||||
|
linewidth: 2,
|
||||||
|
stack: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
alias: 'quota - limits',
|
||||||
|
color: '#FF9830',
|
||||||
|
dashes: true,
|
||||||
|
fill: 0,
|
||||||
|
hideTooltip: true,
|
||||||
|
legend: false,
|
||||||
|
linewidth: 2,
|
||||||
|
stack: false,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
.addRow(
|
.addRow(
|
||||||
|
@ -332,7 +407,7 @@ local template = grafana.template;
|
||||||
g.stack +
|
g.stack +
|
||||||
{ yaxes: g.yaxes('Bps') },
|
{ yaxes: g.yaxes('Bps') },
|
||||||
)
|
)
|
||||||
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate, typeTemplate] } },
|
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate, typeTemplate, clusterTemplate, namespaceTemplate] }, refresh: $._config.grafanaK8s.refresh },
|
||||||
|
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,32 +3,79 @@ local grafana = import 'grafonnet/grafana.libsonnet';
|
||||||
local template = grafana.template;
|
local template = grafana.template;
|
||||||
|
|
||||||
{
|
{
|
||||||
grafanaDashboards+:: {
|
grafanaDashboards+:: {
|
||||||
local intervalTemplate =
|
local intervalTemplate =
|
||||||
template.new(
|
template.new(
|
||||||
name='interval',
|
name='interval',
|
||||||
datasource='$datasource',
|
datasource='$datasource',
|
||||||
query='4h',
|
query='$__interval',
|
||||||
current='5m',
|
current='5m',
|
||||||
hide=2,
|
hide=2,
|
||||||
refresh=2,
|
refresh=2,
|
||||||
includeAll=false,
|
includeAll=false,
|
||||||
sort=1
|
sort=1
|
||||||
) + {
|
) + {
|
||||||
auto: false,
|
auto: false,
|
||||||
auto_count: 30,
|
auto_count: 30,
|
||||||
auto_min: '10s',
|
auto_min: '10s',
|
||||||
skipUrlSync: false,
|
skipUrlSync: false,
|
||||||
type: 'interval',
|
type: 'interval',
|
||||||
options: [
|
options: [
|
||||||
{
|
{
|
||||||
selected: true,
|
selected: true,
|
||||||
text: '4h',
|
text: '$__interval',
|
||||||
value: '4h',
|
value: '$__interval',
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
||||||
|
local clusterTemplate =
|
||||||
|
template.new(
|
||||||
|
name='cluster',
|
||||||
|
datasource='$datasource',
|
||||||
|
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
|
||||||
|
current='',
|
||||||
|
hide=if $._config.showMultiCluster then '' else '2',
|
||||||
|
refresh=1,
|
||||||
|
includeAll=false,
|
||||||
|
sort=1
|
||||||
|
),
|
||||||
|
|
||||||
|
local namespaceTemplate =
|
||||||
|
template.new(
|
||||||
|
name='namespace',
|
||||||
|
datasource='$datasource',
|
||||||
|
query='label_values(kube_pod_info{%(clusterLabel)s="$cluster"}, namespace)' % $._config.clusterLabel,
|
||||||
|
current='',
|
||||||
|
hide='',
|
||||||
|
refresh=1,
|
||||||
|
includeAll=false,
|
||||||
|
sort=1
|
||||||
|
),
|
||||||
|
|
||||||
|
local workloadTemplate =
|
||||||
|
template.new(
|
||||||
|
name='workload',
|
||||||
|
datasource='$datasource',
|
||||||
|
query='label_values(mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace"}, workload)' % $._config.clusterLabel,
|
||||||
|
current='',
|
||||||
|
hide=if $._config.showMultiCluster then '' else '2',
|
||||||
|
refresh=1,
|
||||||
|
includeAll=false,
|
||||||
|
sort=1
|
||||||
|
),
|
||||||
|
|
||||||
|
local workloadTypeTemplate =
|
||||||
|
template.new(
|
||||||
|
name='type',
|
||||||
|
datasource='$datasource',
|
||||||
|
query='label_values(mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload"}, workload_type)' % $._config.clusterLabel,
|
||||||
|
current='',
|
||||||
|
hide='',
|
||||||
|
refresh=1,
|
||||||
|
includeAll=false,
|
||||||
|
sort=1
|
||||||
|
),
|
||||||
'k8s-resources-workload.json':
|
'k8s-resources-workload.json':
|
||||||
local tableStyles = {
|
local tableStyles = {
|
||||||
pod: {
|
pod: {
|
||||||
|
@ -133,10 +180,7 @@ local template = grafana.template;
|
||||||
g.dashboard(
|
g.dashboard(
|
||||||
'%(dashboardNamePrefix)sCompute Resources / Workload' % $._config.grafanaK8s,
|
'%(dashboardNamePrefix)sCompute Resources / Workload' % $._config.grafanaK8s,
|
||||||
uid=($._config.grafanaDashboardIDs['k8s-resources-workload.json']),
|
uid=($._config.grafanaDashboardIDs['k8s-resources-workload.json']),
|
||||||
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
|
)
|
||||||
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')
|
|
||||||
.addTemplate('workload', 'mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace"}' % $._config, 'workload')
|
|
||||||
.addTemplate('type', 'mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload"}' % $._config, 'workload_type')
|
|
||||||
.addRow(
|
.addRow(
|
||||||
g.row('CPU Usage')
|
g.row('CPU Usage')
|
||||||
.addPanel(
|
.addPanel(
|
||||||
|
@ -305,6 +349,6 @@ local template = grafana.template;
|
||||||
g.stack +
|
g.stack +
|
||||||
{ yaxes: g.yaxes('Bps') },
|
{ yaxes: g.yaxes('Bps') },
|
||||||
)
|
)
|
||||||
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } },
|
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate, clusterTemplate, namespaceTemplate, workloadTemplate, workloadTypeTemplate] }, refresh: $._config.grafanaK8s.refresh },
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -170,6 +170,6 @@ local singlestat = grafana.singlestat;
|
||||||
.addPanel(memory)
|
.addPanel(memory)
|
||||||
.addPanel(cpu)
|
.addPanel(cpu)
|
||||||
.addPanel(goroutines)
|
.addPanel(goroutines)
|
||||||
),
|
) + { refresh: $._config.grafanaK8s.refresh },
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -561,6 +561,6 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
||||||
) +
|
) +
|
||||||
{ yaxes: g.yaxes('percentunit') },
|
{ yaxes: g.yaxes('percentunit') },
|
||||||
),
|
),
|
||||||
),
|
) + { refresh: $._config.grafanaK8s.refresh },
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,35 +23,45 @@
|
||||||
expr: |||
|
expr: |||
|
||||||
sum by (%(clusterLabel)s, namespace, pod, container) (
|
sum by (%(clusterLabel)s, namespace, pod, container) (
|
||||||
rate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, image!="", container!="POD"}[5m])
|
rate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, image!="", container!="POD"}[5m])
|
||||||
) * on (%(clusterLabel)s, namespace, pod) group_left(node) max by(%(clusterLabel)s, namespace, pod, node) (kube_pod_info)
|
) * on (%(clusterLabel)s, namespace, pod) group_left(node) topk by (%(clusterLabel)s, namespace, pod) (
|
||||||
|
1, max by(%(clusterLabel)s, namespace, pod, node) (kube_pod_info)
|
||||||
|
)
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
record: 'node_namespace_pod_container:container_memory_working_set_bytes',
|
record: 'node_namespace_pod_container:container_memory_working_set_bytes',
|
||||||
expr: |||
|
expr: |||
|
||||||
container_memory_working_set_bytes{%(cadvisorSelector)s, image!=""}
|
container_memory_working_set_bytes{%(cadvisorSelector)s, image!=""}
|
||||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||||
|
max by(namespace, pod, node) (kube_pod_info)
|
||||||
|
)
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
record: 'node_namespace_pod_container:container_memory_rss',
|
record: 'node_namespace_pod_container:container_memory_rss',
|
||||||
expr: |||
|
expr: |||
|
||||||
container_memory_rss{%(cadvisorSelector)s, image!=""}
|
container_memory_rss{%(cadvisorSelector)s, image!=""}
|
||||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||||
|
max by(namespace, pod, node) (kube_pod_info)
|
||||||
|
)
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
record: 'node_namespace_pod_container:container_memory_cache',
|
record: 'node_namespace_pod_container:container_memory_cache',
|
||||||
expr: |||
|
expr: |||
|
||||||
container_memory_cache{%(cadvisorSelector)s, image!=""}
|
container_memory_cache{%(cadvisorSelector)s, image!=""}
|
||||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||||
|
max by(namespace, pod, node) (kube_pod_info)
|
||||||
|
)
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
record: 'node_namespace_pod_container:container_memory_swap',
|
record: 'node_namespace_pod_container:container_memory_swap',
|
||||||
expr: |||
|
expr: |||
|
||||||
container_memory_swap{%(cadvisorSelector)s, image!=""}
|
container_memory_swap{%(cadvisorSelector)s, image!=""}
|
||||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||||
|
max by(namespace, pod, node) (kube_pod_info)
|
||||||
|
)
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -92,15 +102,19 @@
|
||||||
{
|
{
|
||||||
record: 'mixin_pod_workload',
|
record: 'mixin_pod_workload',
|
||||||
expr: |||
|
expr: |||
|
||||||
sum(
|
max by (%(clusterLabel)s, namespace, workload, pod) (
|
||||||
label_replace(
|
label_replace(
|
||||||
label_replace(
|
label_replace(
|
||||||
kube_pod_owner{%(kubeStateMetricsSelector)s, owner_kind="ReplicaSet"},
|
kube_pod_owner{%(kubeStateMetricsSelector)s, owner_kind="ReplicaSet"},
|
||||||
"replicaset", "$1", "owner_name", "(.*)"
|
"replicaset", "$1", "owner_name", "(.*)"
|
||||||
) * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{%(kubeStateMetricsSelector)s},
|
) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) (
|
||||||
|
1, max by (replicaset, namespace, owner_name) (
|
||||||
|
kube_replicaset_owner{%(kubeStateMetricsSelector)s}
|
||||||
|
)
|
||||||
|
),
|
||||||
"workload", "$1", "owner_name", "(.*)"
|
"workload", "$1", "owner_name", "(.*)"
|
||||||
)
|
)
|
||||||
) by (%(clusterLabel)s, namespace, workload, pod)
|
)
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
labels: {
|
labels: {
|
||||||
workload_type: 'deployment',
|
workload_type: 'deployment',
|
||||||
|
@ -109,12 +123,12 @@
|
||||||
{
|
{
|
||||||
record: 'mixin_pod_workload',
|
record: 'mixin_pod_workload',
|
||||||
expr: |||
|
expr: |||
|
||||||
sum(
|
max by (%(clusterLabel)s, namespace, workload, pod) (
|
||||||
label_replace(
|
label_replace(
|
||||||
kube_pod_owner{%(kubeStateMetricsSelector)s, owner_kind="DaemonSet"},
|
kube_pod_owner{%(kubeStateMetricsSelector)s, owner_kind="DaemonSet"},
|
||||||
"workload", "$1", "owner_name", "(.*)"
|
"workload", "$1", "owner_name", "(.*)"
|
||||||
)
|
)
|
||||||
) by (%(clusterLabel)s, namespace, workload, pod)
|
)
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
labels: {
|
labels: {
|
||||||
workload_type: 'daemonset',
|
workload_type: 'daemonset',
|
||||||
|
@ -123,12 +137,12 @@
|
||||||
{
|
{
|
||||||
record: 'mixin_pod_workload',
|
record: 'mixin_pod_workload',
|
||||||
expr: |||
|
expr: |||
|
||||||
sum(
|
max by (%(clusterLabel)s, namespace, workload, pod) (
|
||||||
label_replace(
|
label_replace(
|
||||||
kube_pod_owner{%(kubeStateMetricsSelector)s, owner_kind="StatefulSet"},
|
kube_pod_owner{%(kubeStateMetricsSelector)s, owner_kind="StatefulSet"},
|
||||||
"workload", "$1", "owner_name", "(.*)"
|
"workload", "$1", "owner_name", "(.*)"
|
||||||
)
|
)
|
||||||
) by (%(clusterLabel)s, namespace, workload, pod)
|
)
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
labels: {
|
labels: {
|
||||||
workload_type: 'statefulset',
|
workload_type: 'statefulset',
|
||||||
|
|
|
@ -15,15 +15,23 @@
|
||||||
// SINCE 2018-02-08
|
// SINCE 2018-02-08
|
||||||
record: ':kube_pod_info_node_count:',
|
record: ':kube_pod_info_node_count:',
|
||||||
expr: |||
|
expr: |||
|
||||||
sum(min(kube_pod_info) by (%(clusterLabel)s, node))
|
sum(min(kube_pod_info) by (%(clusterLabel)s, node))
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
// This rule results in the tuples (node, namespace, instance) => 1;
|
// This rule results in the tuples (node, namespace, instance) => 1.
|
||||||
// it is used to calculate per-node metrics, given namespace & instance.
|
// It is used to calculate per-node metrics, given namespace & instance.
|
||||||
|
// We use the topk() aggregator to ensure that each (namespace,
|
||||||
|
// instance) tuple is only associated to one node and thus avoid
|
||||||
|
// "many-to-many matching not allowed" errors when joining with
|
||||||
|
// other timeseries on (namespace, instance). See node:node_num_cpu:sum
|
||||||
|
// below for instance.
|
||||||
record: 'node_namespace_pod:kube_pod_info:',
|
record: 'node_namespace_pod:kube_pod_info:',
|
||||||
expr: |||
|
expr: |||
|
||||||
max(label_replace(kube_pod_info{%(kubeStateMetricsSelector)s}, "%(podLabel)s", "$1", "pod", "(.*)")) by (node, namespace, %(podLabel)s)
|
topk by(namespace, %(podLabel)s) (1,
|
||||||
|
max by (node, namespace, %(podLabel)s) (
|
||||||
|
label_replace(kube_pod_info{%(kubeStateMetricsSelector)s}, "%(podLabel)s", "$1", "pod", "(.*)")
|
||||||
|
))
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
Reference in a new issue