update kubernetes-mixin

This commit is contained in:
Tobias Brunner 2020-03-01 20:12:27 +01:00
parent 3d9b8c252e
commit 7c5eaaf4b3
28 changed files with 2118 additions and 953 deletions

View file

@ -4,4 +4,6 @@ build:
update:
docker run --rm -v $(shell pwd):$(shell pwd) --workdir $(shell pwd) quay.io/coreos/jsonnet-ci jb update
sudo chown -R tobru. vendor/
make build
.PHONY: update

View file

@ -8,7 +8,7 @@
"subdir": "Documentation/etcd-mixin"
}
},
"version": "52fba431b686f6a5c30d60a0bbaf9fafc14bae35",
"version": "cb633418a2a67a41cd2f30d556f19e995ed8f274",
"sum": "Ko3qhNfC2vN/houLh6C0Ryacjv70gl0DVPGU/PQ4OD0="
},
{
@ -30,7 +30,7 @@
"subdir": "grafana-builder"
}
},
"version": "7ac7da1a0fe165b68cdb718b2521b560d51bd1f4",
"version": "66eb3af2bd87c4ee18b97d5b2d366b234eef89cc",
"sum": "slxrtftVDiTlQK22ertdfrg4Epnq97gdrLI63ftUfaE="
},
{
@ -74,7 +74,7 @@
"subdir": "jsonnet/kube-state-metrics"
}
},
"version": "22d195f20a20b51cf14b5ff01bb4a200c65196da",
"version": "89ede10b19d7ef0145777717351cabe14b113c01",
"sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA="
},
{
@ -85,7 +85,7 @@
"subdir": "jsonnet/kube-state-metrics-mixin"
}
},
"version": "22d195f20a20b51cf14b5ff01bb4a200c65196da",
"version": "89ede10b19d7ef0145777717351cabe14b113c01",
"sum": "E1GGavnf9PCWBm4WVrxWnc0FIj72UcbcweqGioWrOdU="
},
{
@ -96,8 +96,8 @@
"subdir": ""
}
},
"version": "3cf851b2c8ff8bf98c12eac7f37d97f086cd0fc9",
"sum": "CydKHxWA9LG9w1+sjlqREHXPQTdbiTwy40rnyXfHfGE="
"version": "02b62082e3feb271b8fd476603dceaa1fd2054c0",
"sum": "h+ZL4TFVFbSdlsY25mi5x1nRts3PY3JmKz3QXUgnXJk="
},
{
"name": "node-mixin",
@ -107,7 +107,7 @@
"subdir": "docs/node-mixin"
}
},
"version": "ef7c05816adcb0e8923defe34e97f6afcce0a939",
"version": "a7c31ff7ed0990545ed4cc62690fc53563ee8860",
"sum": "7vEamDTP9AApeiF4Zu9ZyXzDIs3rYHzwf9k7g8X+wsg="
},
{
@ -118,7 +118,7 @@
"subdir": "documentation/prometheus-mixin"
}
},
"version": "65a19421a42c69e16241eec24c66b98e4c8fa5da",
"version": "babadf13e852654cfc87c06fc8ff0b843586a00e",
"sum": "5EUgr6Spr1zNR8Y2/NevjvEkGV9WMvKo6nEScNER1Lc="
},
{
@ -151,7 +151,7 @@
"subdir": "lib/promgrafonnet"
}
},
"version": "3cf851b2c8ff8bf98c12eac7f37d97f086cd0fc9",
"version": "02b62082e3feb271b8fd476603dceaa1fd2054c0",
"sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc="
},
{

File diff suppressed because it is too large Load diff

View file

@ -209,23 +209,33 @@ spec:
- expr: |
sum by (cluster, namespace, pod, container) (
rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])
) * on (cluster, namespace, pod) group_left(node) max by(cluster, namespace, pod, node) (kube_pod_info)
) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (
1, max by(cluster, namespace, pod, node) (kube_pod_info)
)
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
- expr: |
container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
max by(namespace, pod, node) (kube_pod_info)
)
record: node_namespace_pod_container:container_memory_working_set_bytes
- expr: |
container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
max by(namespace, pod, node) (kube_pod_info)
)
record: node_namespace_pod_container:container_memory_rss
- expr: |
container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
max by(namespace, pod, node) (kube_pod_info)
)
record: node_namespace_pod_container:container_memory_cache
- expr: |
container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
max by(namespace, pod, node) (kube_pod_info)
)
record: node_namespace_pod_container:container_memory_swap
- expr: |
sum(container_memory_usage_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}) by (namespace)
@ -253,35 +263,39 @@ spec:
)
record: namespace:kube_pod_container_resource_requests_cpu_cores:sum
- expr: |
sum(
max by (cluster, namespace, workload, pod) (
label_replace(
label_replace(
kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"},
"replicaset", "$1", "owner_name", "(.*)"
) * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{job="kube-state-metrics"},
) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) (
1, max by (replicaset, namespace, owner_name) (
kube_replicaset_owner{job="kube-state-metrics"}
)
),
"workload", "$1", "owner_name", "(.*)"
)
) by (cluster, namespace, workload, pod)
)
labels:
workload_type: deployment
record: mixin_pod_workload
- expr: |
sum(
max by (cluster, namespace, workload, pod) (
label_replace(
kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"},
"workload", "$1", "owner_name", "(.*)"
)
) by (cluster, namespace, workload, pod)
)
labels:
workload_type: daemonset
record: mixin_pod_workload
- expr: |
sum(
max by (cluster, namespace, workload, pod) (
label_replace(
kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"},
"workload", "$1", "owner_name", "(.*)"
)
) by (cluster, namespace, workload, pod)
)
labels:
workload_type: statefulset
record: mixin_pod_workload
@ -338,7 +352,10 @@ spec:
sum(min(kube_pod_info) by (cluster, node))
record: ':kube_pod_info_node_count:'
- expr: |
max(label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")) by (node, namespace, pod)
topk by(namespace, pod) (1,
max by (node, namespace, pod) (
label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")
))
record: 'node_namespace_pod:kube_pod_info:'
- expr: |
count by (cluster, node) (sum by (node, cpu) (
@ -1164,6 +1181,16 @@ spec:
for: 5m
labels:
severity: warning
- alert: KubeletPodStartUpLatencyHigh
annotations:
message: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds
on node {{ $labels.node }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh
expr: |
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name > 5
for: 15m
labels:
severity: warning
- alert: KubeletDown
annotations:
message: Kubelet has disappeared from Prometheus target discovery.

View file

@ -74,6 +74,19 @@
message: 'The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.',
},
},
{
alert: 'KubeletPodStartUpLatencyHigh',
expr: |||
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{%(kubeletSelector)s}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name > 5
||| % $._config,
'for': '15m',
labels: {
severity: 'warning',
},
annotations: {
message: 'Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.',
},
},
(import '../lib/absent_alert.libsonnet') {
componentName:: 'Kubelet',
selector:: $._config.kubeletSelector,

View file

@ -64,6 +64,9 @@ local slo = import 'slo-libsonnet/slo.libsonnet';
// For links between grafana dashboards, you need to tell us if your grafana
// servers under some non-root path.
linkPrefix: '.',
// The default refresh time for all dashboards, default to 10s
refresh: '10s',
},
// Opt-in to multiCluster dashboards by overriding this and the clusterLabel.

View file

@ -208,6 +208,6 @@ local singlestat = grafana.singlestat;
.addPanel(memory)
.addPanel(cpu)
.addPanel(goroutines)
),
) + { refresh: $._config.grafanaK8s.refresh },
},
}

View file

@ -180,6 +180,6 @@ local singlestat = grafana.singlestat;
.addPanel(memory)
.addPanel(cpu)
.addPanel(goroutines)
),
) + { refresh: $._config.grafanaK8s.refresh },
},
}

View file

@ -413,6 +413,6 @@ local singlestat = grafana.singlestat;
.addPanel(memory)
.addPanel(cpu)
.addPanel(goroutines)
),
) + { refresh: $._config.grafanaK8s.refresh },
},
}

View file

@ -343,7 +343,7 @@ local gauge = promgrafonnet.gauge;
tags=($._config.grafanaK8s.dashboardTags),
editable=true,
schemaVersion=18,
refresh='30s',
refresh=($._config.grafanaK8s.refresh),
time_from='now-1h',
time_to='now',
)

View file

@ -345,7 +345,7 @@ local gauge = promgrafonnet.gauge;
tags=($._config.grafanaK8s.dashboardTags),
editable=true,
schemaVersion=18,
refresh='30s',
refresh=($._config.grafanaK8s.refresh),
time_from='now-1h',
time_to='now',
)

View file

@ -373,7 +373,7 @@ local gauge = promgrafonnet.gauge;
tags=($._config.grafanaK8s.dashboardTags),
editable=true,
schemaVersion=18,
refresh='30s',
refresh=($._config.grafanaK8s.refresh),
time_from='now-1h',
time_to='now',
)

View file

@ -242,7 +242,7 @@ local gauge = promgrafonnet.gauge;
tags=($._config.grafanaK8s.dashboardTags),
editable=true,
schemaVersion=18,
refresh='30s',
refresh=($._config.grafanaK8s.refresh),
time_from='now-1h',
time_to='now',
)

View file

@ -257,7 +257,7 @@ local gauge = promgrafonnet.gauge;
tags=($._config.grafanaK8s.dashboardTags),
editable=true,
schemaVersion=18,
refresh='30s',
refresh=($._config.grafanaK8s.refresh),
time_from='now-1h',
time_to='now',
)

View file

@ -166,6 +166,6 @@ local gauge = promgrafonnet.gauge;
row.new()
.addPanel(inodesGraph)
.addPanel(inodeGauge)
),
) + { refresh: $._config.grafanaK8s.refresh },
},
}

View file

@ -186,6 +186,6 @@ local singlestat = grafana.singlestat;
.addPanel(memory)
.addPanel(cpu)
.addPanel(goroutines)
),
) + { refresh: $._config.grafanaK8s.refresh },
},
}

View file

@ -4,4 +4,4 @@
(import 'resources/node.libsonnet') +
(import 'resources/pod.libsonnet') +
(import 'resources/workload-namespace.libsonnet') +
(import 'resources/workload.libsonnet')
(import 'resources/workload.libsonnet')

View file

@ -3,272 +3,289 @@ local grafana = import 'grafonnet/grafana.libsonnet';
local template = grafana.template;
{
grafanaDashboards+:: {
local intervalTemplate =
template.new(
name='interval',
datasource='$datasource',
query='4h',
current='5m',
hide=2,
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: true,
text: '4h',
value: '4h',
},
],
grafanaDashboards+:: {
local intervalTemplate =
template.new(
name='interval',
datasource='$datasource',
query='$__interval',
current='5m',
hide=2,
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: true,
text: '$__interval',
value: '$__interval',
},
],
},
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(node_cpu_seconds_total, %s)' % $._config.clusterLabel,
current='',
hide=if $._config.showMultiCluster then '' else '2',
refresh=2,
includeAll=false,
sort=1
),
'k8s-resources-cluster.json':
local tableStyles = {
namespace: {
alias: 'Namespace',
link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') },
linkTooltip: 'Drill down to pods',
},
'Value #A': {
alias: 'Pods',
linkTooltip: 'Drill down to pods',
link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') },
decimals: 0,
},
'Value #B': {
alias: 'Workloads',
linkTooltip: 'Drill down to workloads',
link: '%(prefix)s/d/%(uid)s/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-workloads-namespace.json') },
decimals: 0,
},
};
'k8s-resources-cluster.json':
local tableStyles = {
namespace: {
alias: 'Namespace',
link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') },
linkTooltip: 'Drill down to pods',
},
'Value #A': {
alias: 'Pods',
linkTooltip: 'Drill down to pods',
link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') },
decimals: 0,
},
'Value #B': {
alias: 'Workloads',
linkTooltip: 'Drill down to workloads',
link: '%(prefix)s/d/%(uid)s/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-workloads-namespace.json') },
decimals: 0,
},
};
local podWorkloadColumns = [
'count(mixin_pod_workload{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'count(avg(mixin_pod_workload{%(clusterLabel)s="$cluster"}) by (workload, namespace)) by (namespace)' % $._config,
];
local podWorkloadColumns = [
'count(mixin_pod_workload{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'count(avg(mixin_pod_workload{%(clusterLabel)s="$cluster"}) by (workload, namespace)) by (namespace)' % $._config,
];
local networkColumns = [
'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
];
local networkColumns = [
'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
];
local networkTableStyles = {
namespace: {
alias: 'Namespace',
link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') },
linkTooltip: 'Drill down to pods',
},
'Value #A': {
alias: 'Current Receive Bandwidth',
unit: 'Bps',
},
'Value #B': {
alias: 'Current Transmit Bandwidth',
unit: 'Bps',
},
'Value #C': {
alias: 'Rate of Received Packets',
unit: 'pps',
},
'Value #D': {
alias: 'Rate of Transmitted Packets',
unit: 'pps',
},
'Value #E': {
alias: 'Rate of Received Packets Dropped',
unit: 'pps',
},
'Value #F': {
alias: 'Rate of Transmitted Packets Dropped',
unit: 'pps',
},
};
local networkTableStyles = {
namespace: {
alias: 'Namespace',
link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') },
linkTooltip: 'Drill down to pods',
},
'Value #A': {
alias: 'Current Receive Bandwidth',
unit: 'Bps',
},
'Value #B': {
alias: 'Current Transmit Bandwidth',
unit: 'Bps',
},
'Value #C': {
alias: 'Rate of Received Packets',
unit: 'pps',
},
'Value #D': {
alias: 'Rate of Transmitted Packets',
unit: 'pps',
},
'Value #E': {
alias: 'Rate of Received Packets Dropped',
unit: 'pps',
},
'Value #F': {
alias: 'Rate of Transmitted Packets Dropped',
unit: 'pps',
},
};
g.dashboard(
'%(dashboardNamePrefix)sCompute Resources / Cluster' % $._config.grafanaK8s,
uid=($._config.grafanaDashboardIDs['k8s-resources-cluster.json']),
).addTemplate('cluster', 'node_cpu_seconds_total', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
.addRow(
(g.row('Headlines') +
{
height: '100px',
showTitle: false,
})
.addPanel(
g.panel('CPU Utilisation') +
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle", %(clusterLabel)s="$cluster"}[1m]))' % $._config)
)
.addPanel(
g.panel('CPU Requests Commitment') +
g.statPanel('sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{%(clusterLabel)s="$cluster"})' % $._config)
)
.addPanel(
g.panel('CPU Limits Commitment') +
g.statPanel('sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{%(clusterLabel)s="$cluster"})' % $._config)
)
.addPanel(
g.panel('Memory Utilisation') +
g.statPanel('1 - sum(:node_memory_MemAvailable_bytes:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config)
)
.addPanel(
g.panel('Memory Requests Commitment') +
g.statPanel('sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config)
)
.addPanel(
g.panel('Memory Limits Commitment') +
g.statPanel('sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config)
)
g.dashboard(
'%(dashboardNamePrefix)sCompute Resources / Cluster' % $._config.grafanaK8s,
uid=($._config.grafanaDashboardIDs['k8s-resources-cluster.json']),
).addTemplate('cluster', 'node_cpu_seconds_total', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
.addRow(
(g.row('Headlines') +
{
height: '100px',
showTitle: false,
})
.addPanel(
g.panel('CPU Utilisation') +
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle", %(clusterLabel)s="$cluster"}[$interval]))' % $._config)
)
.addRow(
g.row('CPU')
.addPanel(
g.panel('CPU Usage') +
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, '{{namespace}}') +
g.stack
)
.addPanel(
g.panel('CPU Requests Commitment') +
g.statPanel('sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{%(clusterLabel)s="$cluster"})' % $._config)
)
.addRow(
g.row('CPU Quota')
.addPanel(
g.panel('CPU Quota') +
g.tablePanel(podWorkloadColumns + [
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
], tableStyles {
'Value #C': { alias: 'CPU Usage' },
'Value #D': { alias: 'CPU Requests' },
'Value #E': { alias: 'CPU Requests %', unit: 'percentunit' },
'Value #F': { alias: 'CPU Limits' },
'Value #G': { alias: 'CPU Limits %', unit: 'percentunit' },
})
)
.addPanel(
g.panel('CPU Limits Commitment') +
g.statPanel('sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{%(clusterLabel)s="$cluster"})' % $._config)
)
.addRow(
g.row('Memory')
.addPanel(
g.panel('Memory Usage (w/o cache)') +
.addPanel(
g.panel('Memory Utilisation') +
g.statPanel('1 - sum(:node_memory_MemAvailable_bytes:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config)
)
.addPanel(
g.panel('Memory Requests Commitment') +
g.statPanel('sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config)
)
.addPanel(
g.panel('Memory Limits Commitment') +
g.statPanel('sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config)
)
)
.addRow(
g.row('CPU')
.addPanel(
g.panel('CPU Usage') +
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, '{{namespace}}') +
g.stack
)
)
.addRow(
g.row('CPU Quota')
.addPanel(
g.panel('CPU Quota') +
g.tablePanel(podWorkloadColumns + [
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
], tableStyles {
'Value #C': { alias: 'CPU Usage' },
'Value #D': { alias: 'CPU Requests' },
'Value #E': { alias: 'CPU Requests %', unit: 'percentunit' },
'Value #F': { alias: 'CPU Limits' },
'Value #G': { alias: 'CPU Limits %', unit: 'percentunit' },
})
)
)
.addRow(
g.row('Memory')
.addPanel(
g.panel('Memory Usage (w/o cache)') +
// Not using container_memory_usage_bytes here because that includes page cache
g.queryPanel('sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('bytes') },
)
)
.addRow(
g.row('Memory Requests')
.addPanel(
g.panel('Requests by Namespace') +
g.tablePanel(podWorkloadColumns + [
// Not using container_memory_usage_bytes here because that includes page cache
g.queryPanel('sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('bytes') },
)
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace)' % $._config,
'sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
], tableStyles {
'Value #C': { alias: 'Memory Usage', unit: 'bytes' },
'Value #D': { alias: 'Memory Requests', unit: 'bytes' },
'Value #E': { alias: 'Memory Requests %', unit: 'percentunit' },
'Value #F': { alias: 'Memory Limits', unit: 'bytes' },
'Value #G': { alias: 'Memory Limits %', unit: 'percentunit' },
})
)
.addRow(
g.row('Memory Requests')
.addPanel(
g.panel('Requests by Namespace') +
g.tablePanel(podWorkloadColumns + [
// Not using container_memory_usage_bytes here because that includes page cache
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace)' % $._config,
'sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
], tableStyles {
'Value #C': { alias: 'Memory Usage', unit: 'bytes' },
'Value #D': { alias: 'Memory Requests', unit: 'bytes' },
'Value #E': { alias: 'Memory Requests %', unit: 'percentunit' },
'Value #F': { alias: 'Memory Limits', unit: 'bytes' },
'Value #G': { alias: 'Memory Limits %', unit: 'percentunit' },
})
)
)
.addRow(
g.row('Network')
.addPanel(
g.panel('Current Network Usage') +
g.tablePanel(
networkColumns,
networkTableStyles
),
)
.addRow(
g.row('Network')
.addPanel(
g.panel('Current Network Usage') +
g.tablePanel(
networkColumns,
networkTableStyles
),
)
)
.addRow(
g.row('Network')
.addPanel(
g.panel('Receive Bandwidth') +
g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
.addRow(
g.row('Network')
.addPanel(
g.panel('Receive Bandwidth') +
g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
)
.addRow(
g.row('Network')
.addPanel(
g.panel('Transmit Bandwidth') +
g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
.addRow(
g.row('Network')
.addPanel(
g.panel('Transmit Bandwidth') +
g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
)
.addRow(
g.row('Network')
.addPanel(
g.panel('Average Container Bandwidth by Namespace: Received') +
g.queryPanel('avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
.addRow(
g.row('Network')
.addPanel(
g.panel('Average Container Bandwidth by Namespace: Received') +
g.queryPanel('avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
)
.addRow(
g.row('Network')
.addPanel(
g.panel('Average Container Bandwidth by Namespace: Transmitted') +
g.queryPanel('avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
.addRow(
g.row('Network')
.addPanel(
g.panel('Average Container Bandwidth by Namespace: Transmitted') +
g.queryPanel('avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
)
.addRow(
g.row('Network')
.addPanel(
g.panel('Rate of Received Packets') +
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
.addRow(
g.row('Network')
.addPanel(
g.panel('Rate of Received Packets') +
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
)
.addRow(
g.row('Network')
.addPanel(
g.panel('Rate of Transmitted Packets') +
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
.addRow(
g.row('Network')
.addPanel(
g.panel('Rate of Transmitted Packets') +
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
)
.addRow(
g.row('Network')
.addPanel(
g.panel('Rate of Received Packets Dropped') +
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
.addRow(
g.row('Network')
.addPanel(
g.panel('Rate of Received Packets Dropped') +
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
)
.addRow(
g.row('Network')
.addPanel(
g.panel('Rate of Transmitted Packets Dropped') +
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
.addRow(
g.row('Network')
.addPanel(
g.panel('Rate of Transmitted Packets Dropped') +
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } },
}
}
) + {
tags: $._config.grafanaK8s.dashboardTags,
templating+: { list+: [intervalTemplate, clusterTemplate] },
refresh: $._config.grafanaK8s.refresh,
},
},
}

View file

@ -3,105 +3,105 @@ local grafana = import 'grafonnet/grafana.libsonnet';
local template = grafana.template;
{
grafanaDashboards+::
if $._config.showMultiCluster then {
'k8s-resources-multicluster.json':
local tableStyles = {
[$._config.clusterLabel]: {
alias: 'Cluster',
link: '%(prefix)s/d/%(uid)s/k8s-resources-cluster?var-datasource=$datasource&var-cluster=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-cluster.json') },
},
};
grafanaDashboards+::
if $._config.showMultiCluster then {
'k8s-resources-multicluster.json':
local tableStyles = {
[$._config.clusterLabel]: {
alias: 'Cluster',
link: '%(prefix)s/d/%(uid)s/k8s-resources-cluster?var-datasource=$datasource&var-cluster=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-cluster.json') },
},
};
g.dashboard(
'%(dashboardNamePrefix)sCompute Resources / Multi-Cluster' % $._config.grafanaK8s,
uid=($._config.grafanaDashboardIDs['k8s-resources-multicluster.json']),
).addRow(
(g.row('Headlines') +
{
height: '100px',
showTitle: false,
})
.addPanel(
g.panel('CPU Utilisation') +
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle"}[1m]))' % $._config)
g.dashboard(
'%(dashboardNamePrefix)sCompute Resources / Multi-Cluster' % $._config.grafanaK8s,
uid=($._config.grafanaDashboardIDs['k8s-resources-multicluster.json']),
).addRow(
(g.row('Headlines') +
{
height: '100px',
showTitle: false,
})
.addPanel(
g.panel('CPU Utilisation') +
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle"}[$__interval]))' % $._config)
)
.addPanel(
g.panel('CPU Requests Commitment') +
g.statPanel('sum(kube_pod_container_resource_requests_cpu_cores) / sum(kube_node_status_allocatable_cpu_cores)' % $._config)
)
.addPanel(
g.panel('CPU Limits Commitment') +
g.statPanel('sum(kube_pod_container_resource_limits_cpu_cores) / sum(kube_node_status_allocatable_cpu_cores)' % $._config)
)
.addPanel(
g.panel('Memory Utilisation') +
g.statPanel('1 - sum(:node_memory_MemAvailable_bytes:sum) / sum(kube_node_status_allocatable_memory_bytes)' % $._config)
)
.addPanel(
g.panel('Memory Requests Commitment') +
g.statPanel('sum(kube_pod_container_resource_requests_memory_bytes) / sum(kube_node_status_allocatable_memory_bytes)' % $._config)
)
.addPanel(
g.panel('Memory Limits Commitment') +
g.statPanel('sum(kube_pod_container_resource_limits_memory_bytes) / sum(kube_node_status_allocatable_memory_bytes)' % $._config)
)
)
.addPanel(
g.panel('CPU Requests Commitment') +
g.statPanel('sum(kube_pod_container_resource_requests_cpu_cores) / sum(kube_node_status_allocatable_cpu_cores)' % $._config)
.addRow(
g.row('CPU')
.addPanel(
g.panel('CPU Usage') +
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config)
+ { fill: 0, linewidth: 2 },
)
)
.addPanel(
g.panel('CPU Limits Commitment') +
g.statPanel('sum(kube_pod_container_resource_limits_cpu_cores) / sum(kube_node_status_allocatable_cpu_cores)' % $._config)
.addRow(
g.row('CPU Quota')
.addPanel(
g.panel('CPU Quota') +
g.tablePanel([
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config,
'sum(kube_pod_container_resource_requests_cpu_cores) by (%(clusterLabel)s)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests_cpu_cores) by (%(clusterLabel)s)' % $._config,
'sum(kube_pod_container_resource_limits_cpu_cores) by (%(clusterLabel)s)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits_cpu_cores) by (%(clusterLabel)s)' % $._config,
], tableStyles {
'Value #A': { alias: 'CPU Usage' },
'Value #B': { alias: 'CPU Requests' },
'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' },
'Value #D': { alias: 'CPU Limits' },
'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' },
})
)
)
.addPanel(
g.panel('Memory Utilisation') +
g.statPanel('1 - sum(:node_memory_MemAvailable_bytes:sum) / sum(kube_node_status_allocatable_memory_bytes)' % $._config)
)
.addPanel(
g.panel('Memory Requests Commitment') +
g.statPanel('sum(kube_pod_container_resource_requests_memory_bytes) / sum(kube_node_status_allocatable_memory_bytes)' % $._config)
)
.addPanel(
g.panel('Memory Limits Commitment') +
g.statPanel('sum(kube_pod_container_resource_limits_memory_bytes) / sum(kube_node_status_allocatable_memory_bytes)' % $._config)
)
)
.addRow(
g.row('CPU')
.addPanel(
g.panel('CPU Usage') +
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config)
+ { fill: 0, linewidth: 2 },
)
)
.addRow(
g.row('CPU Quota')
.addPanel(
g.panel('CPU Quota') +
g.tablePanel([
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config,
'sum(kube_pod_container_resource_requests_cpu_cores) by (%(clusterLabel)s)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests_cpu_cores) by (%(clusterLabel)s)' % $._config,
'sum(kube_pod_container_resource_limits_cpu_cores) by (%(clusterLabel)s)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits_cpu_cores) by (%(clusterLabel)s)' % $._config,
], tableStyles {
'Value #A': { alias: 'CPU Usage' },
'Value #B': { alias: 'CPU Requests' },
'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' },
'Value #D': { alias: 'CPU Limits' },
'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' },
})
)
)
.addRow(
g.row('Memory')
.addPanel(
g.panel('Memory Usage (w/o cache)') +
// Not using container_memory_usage_bytes here because that includes page cache
g.queryPanel('sum(container_memory_rss{container!=""}) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config) +
{ fill: 0, linewidth: 2, yaxes: g.yaxes('bytes') },
)
)
.addRow(
g.row('Memory Requests')
.addPanel(
g.panel('Requests by Namespace') +
g.tablePanel([
.addRow(
g.row('Memory')
.addPanel(
g.panel('Memory Usage (w/o cache)') +
// Not using container_memory_usage_bytes here because that includes page cache
'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s)' % $._config,
'sum(kube_pod_container_resource_requests_memory_bytes) by (%(clusterLabel)s)' % $._config,
'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests_memory_bytes) by (%(clusterLabel)s)' % $._config,
'sum(kube_pod_container_resource_limits_memory_bytes) by (%(clusterLabel)s)' % $._config,
'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits_memory_bytes) by (%(clusterLabel)s)' % $._config,
], tableStyles {
'Value #A': { alias: 'Memory Usage', unit: 'bytes' },
'Value #B': { alias: 'Memory Requests', unit: 'bytes' },
'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' },
'Value #D': { alias: 'Memory Limits', unit: 'bytes' },
'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' },
})
g.queryPanel('sum(container_memory_rss{container!=""}) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config) +
{ fill: 0, linewidth: 2, yaxes: g.yaxes('bytes') },
)
)
) + { tags: $._config.grafanaK8s.dashboardTags },
} else {},
}
.addRow(
g.row('Memory Requests')
.addPanel(
g.panel('Requests by Namespace') +
g.tablePanel([
// Not using container_memory_usage_bytes here because that includes page cache
'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s)' % $._config,
'sum(kube_pod_container_resource_requests_memory_bytes) by (%(clusterLabel)s)' % $._config,
'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests_memory_bytes) by (%(clusterLabel)s)' % $._config,
'sum(kube_pod_container_resource_limits_memory_bytes) by (%(clusterLabel)s)' % $._config,
'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits_memory_bytes) by (%(clusterLabel)s)' % $._config,
], tableStyles {
'Value #A': { alias: 'Memory Usage', unit: 'bytes' },
'Value #B': { alias: 'Memory Requests', unit: 'bytes' },
'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' },
'Value #D': { alias: 'Memory Limits', unit: 'bytes' },
'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' },
})
)
) + { tags: $._config.grafanaK8s.dashboardTags, refresh: $._config.grafanaK8s.refresh },
} else {},
}

View file

@ -3,33 +3,56 @@ local grafana = import 'grafonnet/grafana.libsonnet';
local template = grafana.template;
{
grafanaDashboards+:: {
local intervalTemplate =
template.new(
name='interval',
datasource='$datasource',
query='4h',
current='5m',
hide=2,
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: true,
text: '4h',
value: '4h',
},
],
},
grafanaDashboards+:: {
local intervalTemplate =
template.new(
name='interval',
datasource='$datasource',
query='$__interval',
current='5m',
hide=2,
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: true,
text: '$__interval',
value: '$__interval',
},
],
},
'k8s-resources-namespace.json':
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
current='',
hide=if $._config.showMultiCluster then '' else '2',
refresh=1,
includeAll=false,
sort=1
),
local namespaceTemplate =
template.new(
name='namespace',
datasource='$datasource',
query='label_values(kube_pod_info{%(clusterLabel)s="$cluster"}, namespace)' % $._config.clusterLabel,
current='',
hide='',
refresh=1,
includeAll=false,
sort=1
),
'k8s-resources-namespace.json':
local tableStyles = {
pod: {
alias: 'Pod',
@ -78,17 +101,75 @@ local template = grafana.template;
},
};
local cpuUsageQuery = 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config;
local memoryUsageQuery = 'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container!=""}) by (pod)' % $._config;
local cpuQuotaRequestsQuery = 'scalar(kube_resourcequota{%(clusterLabel)s="$cluster", namespace="$namespace", type="hard",resource="requests.cpu"})' % $._config;
local cpuQuotaLimitsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'limits.cpu');
local memoryQuotaRequestsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'requests.memory');
local memoryQuotaLimitsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'limits.memory');
g.dashboard(
'%(dashboardNamePrefix)sCompute Resources / Namespace (Pods)' % $._config.grafanaK8s,
uid=($._config.grafanaDashboardIDs['k8s-resources-namespace.json']),
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')
)
.addRow(
(g.row('Headlines') +
{
height: '100px',
showTitle: false,
})
.addPanel(
g.panel('CPU Utilisation (from requests)') +
g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"})' % $._config)
)
.addPanel(
g.panel('CPU Utilisation (from limits)') +
g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"})' % $._config)
)
.addPanel(
g.panel('Memory Utilization (from requests)') +
g.statPanel('sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) / sum(kube_pod_container_resource_requests_memory_bytes{namespace="$namespace"})' % $._config)
)
.addPanel(
g.panel('Memory Utilisation (from limits)') +
g.statPanel('sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) / sum(kube_pod_container_resource_limits_memory_bytes{namespace="$namespace"})' % $._config)
)
)
.addRow(
g.row('CPU Usage')
.addPanel(
g.panel('CPU Usage') +
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, '{{pod}}') +
g.stack,
g.queryPanel([
cpuUsageQuery,
cpuQuotaRequestsQuery,
cpuQuotaLimitsQuery,
], ['{{pod}}', 'quota - requests', 'quota - limits']) +
g.stack + {
seriesOverrides: [
{
alias: 'quota - requests',
color: '#F2495C',
dashes: true,
fill: 0,
hideTooltip: true,
legend: false,
linewidth: 2,
stack: false,
},
{
alias: 'quota - limits',
color: '#FF9830',
dashes: true,
fill: 0,
hideTooltip: true,
legend: false,
linewidth: 2,
stack: false,
},
],
},
)
)
.addRow(
@ -115,9 +196,37 @@ local template = grafana.template;
.addPanel(
g.panel('Memory Usage (w/o cache)') +
// Like above, without page cache
g.queryPanel('sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container!=""}) by (pod)' % $._config, '{{pod}}') +
g.queryPanel([
memoryUsageQuery,
memoryQuotaRequestsQuery,
memoryQuotaLimitsQuery,
], ['{{pod}}', 'quota - requests', 'quota - limits']) +
g.stack +
{ yaxes: g.yaxes('bytes') },
{
yaxes: g.yaxes('bytes'),
seriesOverrides: [
{
alias: 'quota - requests',
color: '#F2495C',
dashes: true,
fill: 0,
hideTooltip: true,
legend: false,
linewidth: 2,
stack: false,
},
{
alias: 'quota - limits',
color: '#FF9830',
dashes: true,
fill: 0,
hideTooltip: true,
legend: false,
linewidth: 2,
stack: false,
},
],
},
)
)
.addRow(
@ -208,6 +317,6 @@ local template = grafana.template;
g.stack +
{ yaxes: g.yaxes('Bps') },
)
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } },
}
}
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate, clusterTemplate, namespaceTemplate] }, refresh: $._config.grafanaK8s.refresh },
},
}

View file

@ -3,33 +3,57 @@ local grafana = import 'grafonnet/grafana.libsonnet';
local template = grafana.template;
{
grafanaDashboards+:: {
local intervalTemplate =
template.new(
name='interval',
datasource='$datasource',
query='4h',
current='5m',
hide=2,
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: true,
text: '4h',
value: '4h',
},
],
},
grafanaDashboards+:: {
local intervalTemplate =
template.new(
name='interval',
datasource='$datasource',
query='$__interval',
current='5m',
hide=2,
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: true,
text: '$__interval',
value: '$__interval',
},
],
},
'k8s-resources-node.json':
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
current='',
hide=if $._config.showMultiCluster then '' else '2',
refresh=1,
includeAll=false,
sort=1
),
local nodeTemplate =
template.new(
name='node',
datasource='$datasource',
query='label_values(kube_pod_info{%(clusterLabel)s="$cluster"}, node)' % $._config.clusterLabel,
current='',
hide='',
refresh=1,
includeAll=false,
sort=1
),
'k8s-resources-node.json':
local tableStyles = {
pod: {
alias: 'Pod',
@ -39,8 +63,7 @@ local template = grafana.template;
g.dashboard(
'%(dashboardNamePrefix)sCompute Resources / Node (Pods)' % $._config.grafanaK8s,
uid=($._config.grafanaDashboardIDs['k8s-resources-node.json']),
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
.addTemplate('node', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'node')
)
.addRow(
g.row('CPU Usage')
.addPanel(
@ -102,6 +125,6 @@ local template = grafana.template;
'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' },
})
)
) + { tags: $._config.grafanaK8s.dashboardTags },
}
}
) + { tags: $._config.grafanaK8s.dashboardTags, refresh: $._config.grafanaK8s.refresh, templating+: { list+: [intervalTemplate, clusterTemplate, nodeTemplate] } },
},
}

View file

@ -3,51 +3,150 @@ local grafana = import 'grafonnet/grafana.libsonnet';
local template = grafana.template;
{
grafanaDashboards+:: {
local intervalTemplate =
template.new(
name='interval',
datasource='$datasource',
query='4h',
current='5m',
hide=2,
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: true,
text: '4h',
value: '4h',
},
],
},
grafanaDashboards+:: {
local intervalTemplate =
template.new(
name='interval',
datasource='$datasource',
query='$__interval',
current='5m',
hide=2,
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: true,
text: '$__interval',
value: '$__interval',
},
],
},
'k8s-resources-pod.json':
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
current='',
hide=if $._config.showMultiCluster then '' else '2',
refresh=1,
includeAll=false,
sort=1
),
local namespaceTemplate =
template.new(
name='namespace',
datasource='$datasource',
query='label_values(kube_pod_info{%(clusterLabel)s="$cluster"}, namespace)' % $._config.clusterLabel,
current='',
hide='',
refresh=1,
includeAll=false,
sort=1
),
local podTemplate =
template.new(
name='pod',
datasource='$datasource',
query='label_values(kube_pod_info{%(clusterLabel)s="$cluster", namespace="$namespace"}, pod)' % $._config.clusterLabel,
current='',
hide='',
refresh=2,
includeAll=false,
sort=1
),
'k8s-resources-pod.json':
local tableStyles = {
container: {
alias: 'Container',
},
};
local cpuRequestsQuery = |||
sum(
kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"})
||| % $._config;
local cpuLimitsQuery = std.strReplace(cpuRequestsQuery, 'requests', 'limits');
local memRequestsQuery = std.strReplace(cpuRequestsQuery, 'cpu_cores', 'memory_bytes');
local memLimitsQuery = std.strReplace(cpuLimitsQuery, 'cpu_cores', 'memory_bytes');
g.dashboard(
'%(dashboardNamePrefix)sCompute Resources / Pod' % $._config.grafanaK8s,
uid=($._config.grafanaDashboardIDs['k8s-resources-pod.json']),
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')
.addTemplate('pod', 'kube_pod_info{%(clusterLabel)s="$cluster", namespace="$namespace"}' % $._config, 'pod')
)
.addRow(
g.row('CPU Usage')
.addPanel(
g.panel('CPU Usage') +
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace="$namespace", pod="$pod", container!="POD", %(clusterLabel)s="$cluster"}) by (container)' % $._config, '{{container}}') +
g.stack,
g.queryPanel(
[
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace="$namespace", pod="$pod", container!="POD", %(clusterLabel)s="$cluster"}) by (container)' % $._config,
cpuRequestsQuery,
cpuLimitsQuery,
], [
'{{container}}',
'requests',
'limits',
],
) +
g.stack + {
seriesOverrides: [
{
alias: 'requests',
color: '#F2495C',
fill: 0,
hideTooltip: true,
legend: true,
linewidth: 2,
stack: false,
},
{
alias: 'limits',
color: '#FF9830',
fill: 0,
hideTooltip: true,
legend: true,
linewidth: 2,
stack: false,
},
],
},
)
)
.addRow(
g.row('CPU Throttling')
.addPanel(
g.panel('CPU Throttling') +
g.queryPanel('sum(increase(container_cpu_cfs_throttled_periods_total{namespace="$namespace", pod="$pod", container!="POD", %(clusterLabel)s="$cluster"}[5m])) by (container) /sum(increase(container_cpu_cfs_periods_total{namespace="$namespace", pod="$pod", container!="POD", %(clusterLabel)s="$cluster"}[5m])) by (container)' % $._config, '{{container}}') +
g.stack
+ {
yaxes: g.yaxes({ format: 'percentunit', max: 1 }),
legend+: {
current: true,
max: true,
},
thresholds: [
{
value: $._config.cpuThrottlingPercent / 100,
colorMode: 'critical',
op: 'gt',
fill: true,
line: true,
yaxis: 'left',
},
],
},
)
)
.addRow(
@ -74,16 +173,40 @@ local template = grafana.template;
.addPanel(
g.panel('Memory Usage') +
g.queryPanel([
'sum(container_memory_rss{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config,
'sum(container_memory_cache{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config,
'sum(container_memory_swap{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config,
'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config,
memRequestsQuery,
memLimitsQuery,
], [
'{{container}} (RSS)',
'{{container}} (Cache)',
'{{container}} (Swap)',
'{{container}}',
'requests',
'limits',
]) +
g.stack +
{ yaxes: g.yaxes('bytes') },
{
yaxes: g.yaxes('bytes'),
seriesOverrides: [
{
alias: 'requests',
color: '#F2495C',
dashes: true,
fill: 0,
hideTooltip: true,
legend: false,
linewidth: 2,
stack: false,
},
{
alias: 'limits',
color: '#FF9830',
dashes: true,
fill: 0,
hideTooltip: true,
legend: false,
linewidth: 2,
stack: false,
},
],
}
)
)
.addRow(
@ -164,6 +287,6 @@ local template = grafana.template;
g.stack +
{ yaxes: g.yaxes('Bps') },
)
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } },
}
}
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate, clusterTemplate, namespaceTemplate, podTemplate] }, refresh: $._config.grafanaK8s.refresh },
},
}

View file

@ -3,51 +3,75 @@ local grafana = import 'grafonnet/grafana.libsonnet';
local template = grafana.template;
{
grafanaDashboards+:: {
local intervalTemplate =
template.new(
name='interval',
datasource='$datasource',
query='4h',
current='5m',
hide=2,
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: true,
text: '4h',
value: '4h',
},
],
},
grafanaDashboards+:: {
local intervalTemplate =
template.new(
name='interval',
datasource='$datasource',
query='$__interval',
current='5m',
hide=2,
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: true,
text: '$__interval',
value: '$__interval',
},
],
},
local typeTemplate =
template.new(
name='type',
datasource='$datasource',
query='label_values(mixin_pod_workload{namespace=~"$namespace", workload=~".+"}, workload_type)',
current='deployment',
hide='',
refresh=1,
includeAll=false,
sort=0
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
definition: 'label_values(mixin_pod_workload{namespace=~"$namespace", workload=~".+"}, workload_type)',
skipUrlSync: false,
},
local typeTemplate =
template.new(
name='type',
datasource='$datasource',
query='label_values(mixin_pod_workload{namespace=~"$namespace", workload=~".+"}, workload_type)',
current='deployment',
hide='',
refresh=1,
includeAll=false,
sort=0
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
definition: 'label_values(mixin_pod_workload{namespace=~"$namespace", workload=~".+"}, workload_type)',
skipUrlSync: false,
},
'k8s-resources-workloads-namespace.json':
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
current='',
hide=if $._config.showMultiCluster then '' else '2',
refresh=1,
includeAll=false,
sort=1
),
local namespaceTemplate =
template.new(
name='namespace',
datasource='$datasource',
query='label_values(kube_pod_info{%(clusterLabel)s="$cluster"}, namespace)' % $._config.clusterLabel,
current='',
hide='',
refresh=1,
includeAll=false,
sort=1
),
'k8s-resources-workloads-namespace.json':
local tableStyles = {
workload: {
alias: 'Workload',
@ -155,17 +179,44 @@ local template = grafana.template;
local memRequestsQuery = std.strReplace(cpuRequestsQuery, 'cpu_cores', 'memory_bytes');
local memLimitsQuery = std.strReplace(cpuLimitsQuery, 'cpu_cores', 'memory_bytes');
local cpuQuotaRequestsQuery = 'scalar(kube_resourcequota{%(clusterLabel)s="$cluster", namespace="$namespace", type="hard",resource="requests.cpu"})' % $._config;
local cpuQuotaLimitsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'limits.cpu');
local memoryQuotaRequestsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'requests.memory');
local memoryQuotaLimitsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'limits.memory');
g.dashboard(
'%(dashboardNamePrefix)sCompute Resources / Namespace (Workloads)' % $._config.grafanaK8s,
uid=($._config.grafanaDashboardIDs['k8s-resources-workloads-namespace.json']),
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')
)
.addRow(
g.row('CPU Usage')
.addPanel(
g.panel('CPU Usage') +
g.queryPanel(cpuUsageQuery, '{{workload}} - {{workload_type}}') +
g.stack,
g.queryPanel([cpuUsageQuery, cpuQuotaRequestsQuery, cpuQuotaLimitsQuery], ['{{workload}} - {{workload_type}}', 'quota - requests', 'quota - limits']) +
g.stack + {
seriesOverrides: [
{
alias: 'quota - requests',
color: '#F2495C',
dashes: true,
fill: 0,
hideTooltip: true,
legend: false,
linewidth: 2,
stack: false,
},
{
alias: 'quota - limits',
color: '#FF9830',
dashes: true,
fill: 0,
hideTooltip: true,
legend: false,
linewidth: 2,
stack: false,
},
],
},
)
)
.addRow(
@ -193,9 +244,33 @@ local template = grafana.template;
g.row('Memory Usage')
.addPanel(
g.panel('Memory Usage') +
g.queryPanel(memUsageQuery, '{{workload}} - {{workload_type}}') +
g.queryPanel([memUsageQuery, memoryQuotaRequestsQuery, memoryQuotaLimitsQuery], ['{{workload}} - {{workload_type}}', 'quota - requests', 'quota - limits']) +
g.stack +
{ yaxes: g.yaxes('bytes') },
{
yaxes: g.yaxes('bytes'),
seriesOverrides: [
{
alias: 'quota - requests',
color: '#F2495C',
dashes: true,
fill: 0,
hideTooltip: true,
legend: false,
linewidth: 2,
stack: false,
},
{
alias: 'quota - limits',
color: '#FF9830',
dashes: true,
fill: 0,
hideTooltip: true,
legend: false,
linewidth: 2,
stack: false,
},
],
},
)
)
.addRow(
@ -332,7 +407,7 @@ local template = grafana.template;
g.stack +
{ yaxes: g.yaxes('Bps') },
)
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate, typeTemplate] } },
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate, typeTemplate, clusterTemplate, namespaceTemplate] }, refresh: $._config.grafanaK8s.refresh },
}
}
},
}

View file

@ -3,32 +3,79 @@ local grafana = import 'grafonnet/grafana.libsonnet';
local template = grafana.template;
{
grafanaDashboards+:: {
local intervalTemplate =
template.new(
name='interval',
datasource='$datasource',
query='4h',
current='5m',
hide=2,
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: true,
text: '4h',
value: '4h',
},
],
},
grafanaDashboards+:: {
local intervalTemplate =
template.new(
name='interval',
datasource='$datasource',
query='$__interval',
current='5m',
hide=2,
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: true,
text: '$__interval',
value: '$__interval',
},
],
},
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
current='',
hide=if $._config.showMultiCluster then '' else '2',
refresh=1,
includeAll=false,
sort=1
),
local namespaceTemplate =
template.new(
name='namespace',
datasource='$datasource',
query='label_values(kube_pod_info{%(clusterLabel)s="$cluster"}, namespace)' % $._config.clusterLabel,
current='',
hide='',
refresh=1,
includeAll=false,
sort=1
),
local workloadTemplate =
template.new(
name='workload',
datasource='$datasource',
query='label_values(mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace"}, workload)' % $._config.clusterLabel,
current='',
hide=if $._config.showMultiCluster then '' else '2',
refresh=1,
includeAll=false,
sort=1
),
local workloadTypeTemplate =
template.new(
name='type',
datasource='$datasource',
query='label_values(mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload"}, workload_type)' % $._config.clusterLabel,
current='',
hide='',
refresh=1,
includeAll=false,
sort=1
),
'k8s-resources-workload.json':
local tableStyles = {
pod: {
@ -133,10 +180,7 @@ local template = grafana.template;
g.dashboard(
'%(dashboardNamePrefix)sCompute Resources / Workload' % $._config.grafanaK8s,
uid=($._config.grafanaDashboardIDs['k8s-resources-workload.json']),
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')
.addTemplate('workload', 'mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace"}' % $._config, 'workload')
.addTemplate('type', 'mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload"}' % $._config, 'workload_type')
)
.addRow(
g.row('CPU Usage')
.addPanel(
@ -305,6 +349,6 @@ local template = grafana.template;
g.stack +
{ yaxes: g.yaxes('Bps') },
)
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } },
}
}
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate, clusterTemplate, namespaceTemplate, workloadTemplate, workloadTypeTemplate] }, refresh: $._config.grafanaK8s.refresh },
},
}

View file

@ -170,6 +170,6 @@ local singlestat = grafana.singlestat;
.addPanel(memory)
.addPanel(cpu)
.addPanel(goroutines)
),
) + { refresh: $._config.grafanaK8s.refresh },
},
}

View file

@ -561,6 +561,6 @@ local g = import 'grafana-builder/grafana.libsonnet';
) +
{ yaxes: g.yaxes('percentunit') },
),
),
) + { refresh: $._config.grafanaK8s.refresh },
},
}

View file

@ -23,35 +23,45 @@
expr: |||
sum by (%(clusterLabel)s, namespace, pod, container) (
rate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, image!="", container!="POD"}[5m])
) * on (%(clusterLabel)s, namespace, pod) group_left(node) max by(%(clusterLabel)s, namespace, pod, node) (kube_pod_info)
) * on (%(clusterLabel)s, namespace, pod) group_left(node) topk by (%(clusterLabel)s, namespace, pod) (
1, max by(%(clusterLabel)s, namespace, pod, node) (kube_pod_info)
)
||| % $._config,
},
{
record: 'node_namespace_pod_container:container_memory_working_set_bytes',
expr: |||
container_memory_working_set_bytes{%(cadvisorSelector)s, image!=""}
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
max by(namespace, pod, node) (kube_pod_info)
)
||| % $._config,
},
{
record: 'node_namespace_pod_container:container_memory_rss',
expr: |||
container_memory_rss{%(cadvisorSelector)s, image!=""}
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
max by(namespace, pod, node) (kube_pod_info)
)
||| % $._config,
},
{
record: 'node_namespace_pod_container:container_memory_cache',
expr: |||
container_memory_cache{%(cadvisorSelector)s, image!=""}
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
max by(namespace, pod, node) (kube_pod_info)
)
||| % $._config,
},
{
record: 'node_namespace_pod_container:container_memory_swap',
expr: |||
container_memory_swap{%(cadvisorSelector)s, image!=""}
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
max by(namespace, pod, node) (kube_pod_info)
)
||| % $._config,
},
{
@ -92,15 +102,19 @@
{
record: 'mixin_pod_workload',
expr: |||
sum(
max by (%(clusterLabel)s, namespace, workload, pod) (
label_replace(
label_replace(
kube_pod_owner{%(kubeStateMetricsSelector)s, owner_kind="ReplicaSet"},
"replicaset", "$1", "owner_name", "(.*)"
) * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{%(kubeStateMetricsSelector)s},
) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) (
1, max by (replicaset, namespace, owner_name) (
kube_replicaset_owner{%(kubeStateMetricsSelector)s}
)
),
"workload", "$1", "owner_name", "(.*)"
)
) by (%(clusterLabel)s, namespace, workload, pod)
)
||| % $._config,
labels: {
workload_type: 'deployment',
@ -109,12 +123,12 @@
{
record: 'mixin_pod_workload',
expr: |||
sum(
max by (%(clusterLabel)s, namespace, workload, pod) (
label_replace(
kube_pod_owner{%(kubeStateMetricsSelector)s, owner_kind="DaemonSet"},
"workload", "$1", "owner_name", "(.*)"
)
) by (%(clusterLabel)s, namespace, workload, pod)
)
||| % $._config,
labels: {
workload_type: 'daemonset',
@ -123,12 +137,12 @@
{
record: 'mixin_pod_workload',
expr: |||
sum(
max by (%(clusterLabel)s, namespace, workload, pod) (
label_replace(
kube_pod_owner{%(kubeStateMetricsSelector)s, owner_kind="StatefulSet"},
"workload", "$1", "owner_name", "(.*)"
)
) by (%(clusterLabel)s, namespace, workload, pod)
)
||| % $._config,
labels: {
workload_type: 'statefulset',

View file

@ -15,15 +15,23 @@
// SINCE 2018-02-08
record: ':kube_pod_info_node_count:',
expr: |||
sum(min(kube_pod_info) by (%(clusterLabel)s, node))
||| % $._config,
sum(min(kube_pod_info) by (%(clusterLabel)s, node))
||| % $._config,
},
{
// This rule results in the tuples (node, namespace, instance) => 1;
// it is used to calculate per-node metrics, given namespace & instance.
// This rule results in the tuples (node, namespace, instance) => 1.
// It is used to calculate per-node metrics, given namespace & instance.
// We use the topk() aggregator to ensure that each (namespace,
// instance) tuple is only associated to one node and thus avoid
// "many-to-many matching not allowed" errors when joining with
// other timeseries on (namespace, instance). See node:node_num_cpu:sum
// below for instance.
record: 'node_namespace_pod:kube_pod_info:',
expr: |||
max(label_replace(kube_pod_info{%(kubeStateMetricsSelector)s}, "%(podLabel)s", "$1", "pod", "(.*)")) by (node, namespace, %(podLabel)s)
topk by(namespace, %(podLabel)s) (1,
max by (node, namespace, %(podLabel)s) (
label_replace(kube_pod_info{%(kubeStateMetricsSelector)s}, "%(podLabel)s", "$1", "pod", "(.*)")
))
||| % $._config,
},
{