update kubernetes-mixin
This commit is contained in:
parent
3d9b8c252e
commit
7c5eaaf4b3
|
@ -4,4 +4,6 @@ build:
|
|||
|
||||
update:
|
||||
docker run --rm -v $(shell pwd):$(shell pwd) --workdir $(shell pwd) quay.io/coreos/jsonnet-ci jb update
|
||||
sudo chown -R tobru. vendor/
|
||||
make build
|
||||
.PHONY: update
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
"subdir": "Documentation/etcd-mixin"
|
||||
}
|
||||
},
|
||||
"version": "52fba431b686f6a5c30d60a0bbaf9fafc14bae35",
|
||||
"version": "cb633418a2a67a41cd2f30d556f19e995ed8f274",
|
||||
"sum": "Ko3qhNfC2vN/houLh6C0Ryacjv70gl0DVPGU/PQ4OD0="
|
||||
},
|
||||
{
|
||||
|
@ -30,7 +30,7 @@
|
|||
"subdir": "grafana-builder"
|
||||
}
|
||||
},
|
||||
"version": "7ac7da1a0fe165b68cdb718b2521b560d51bd1f4",
|
||||
"version": "66eb3af2bd87c4ee18b97d5b2d366b234eef89cc",
|
||||
"sum": "slxrtftVDiTlQK22ertdfrg4Epnq97gdrLI63ftUfaE="
|
||||
},
|
||||
{
|
||||
|
@ -74,7 +74,7 @@
|
|||
"subdir": "jsonnet/kube-state-metrics"
|
||||
}
|
||||
},
|
||||
"version": "22d195f20a20b51cf14b5ff01bb4a200c65196da",
|
||||
"version": "89ede10b19d7ef0145777717351cabe14b113c01",
|
||||
"sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA="
|
||||
},
|
||||
{
|
||||
|
@ -85,7 +85,7 @@
|
|||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
||||
}
|
||||
},
|
||||
"version": "22d195f20a20b51cf14b5ff01bb4a200c65196da",
|
||||
"version": "89ede10b19d7ef0145777717351cabe14b113c01",
|
||||
"sum": "E1GGavnf9PCWBm4WVrxWnc0FIj72UcbcweqGioWrOdU="
|
||||
},
|
||||
{
|
||||
|
@ -96,8 +96,8 @@
|
|||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "3cf851b2c8ff8bf98c12eac7f37d97f086cd0fc9",
|
||||
"sum": "CydKHxWA9LG9w1+sjlqREHXPQTdbiTwy40rnyXfHfGE="
|
||||
"version": "02b62082e3feb271b8fd476603dceaa1fd2054c0",
|
||||
"sum": "h+ZL4TFVFbSdlsY25mi5x1nRts3PY3JmKz3QXUgnXJk="
|
||||
},
|
||||
{
|
||||
"name": "node-mixin",
|
||||
|
@ -107,7 +107,7 @@
|
|||
"subdir": "docs/node-mixin"
|
||||
}
|
||||
},
|
||||
"version": "ef7c05816adcb0e8923defe34e97f6afcce0a939",
|
||||
"version": "a7c31ff7ed0990545ed4cc62690fc53563ee8860",
|
||||
"sum": "7vEamDTP9AApeiF4Zu9ZyXzDIs3rYHzwf9k7g8X+wsg="
|
||||
},
|
||||
{
|
||||
|
@ -118,7 +118,7 @@
|
|||
"subdir": "documentation/prometheus-mixin"
|
||||
}
|
||||
},
|
||||
"version": "65a19421a42c69e16241eec24c66b98e4c8fa5da",
|
||||
"version": "babadf13e852654cfc87c06fc8ff0b843586a00e",
|
||||
"sum": "5EUgr6Spr1zNR8Y2/NevjvEkGV9WMvKo6nEScNER1Lc="
|
||||
},
|
||||
{
|
||||
|
@ -151,7 +151,7 @@
|
|||
"subdir": "lib/promgrafonnet"
|
||||
}
|
||||
},
|
||||
"version": "3cf851b2c8ff8bf98c12eac7f37d97f086cd0fc9",
|
||||
"version": "02b62082e3feb271b8fd476603dceaa1fd2054c0",
|
||||
"sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc="
|
||||
},
|
||||
{
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -209,23 +209,33 @@ spec:
|
|||
- expr: |
|
||||
sum by (cluster, namespace, pod, container) (
|
||||
rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])
|
||||
) * on (cluster, namespace, pod) group_left(node) max by(cluster, namespace, pod, node) (kube_pod_info)
|
||||
) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (
|
||||
1, max by(cluster, namespace, pod, node) (kube_pod_info)
|
||||
)
|
||||
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
|
||||
- expr: |
|
||||
container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
||||
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||
max by(namespace, pod, node) (kube_pod_info)
|
||||
)
|
||||
record: node_namespace_pod_container:container_memory_working_set_bytes
|
||||
- expr: |
|
||||
container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
||||
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||
max by(namespace, pod, node) (kube_pod_info)
|
||||
)
|
||||
record: node_namespace_pod_container:container_memory_rss
|
||||
- expr: |
|
||||
container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
||||
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||
max by(namespace, pod, node) (kube_pod_info)
|
||||
)
|
||||
record: node_namespace_pod_container:container_memory_cache
|
||||
- expr: |
|
||||
container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
||||
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||
max by(namespace, pod, node) (kube_pod_info)
|
||||
)
|
||||
record: node_namespace_pod_container:container_memory_swap
|
||||
- expr: |
|
||||
sum(container_memory_usage_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}) by (namespace)
|
||||
|
@ -253,35 +263,39 @@ spec:
|
|||
)
|
||||
record: namespace:kube_pod_container_resource_requests_cpu_cores:sum
|
||||
- expr: |
|
||||
sum(
|
||||
max by (cluster, namespace, workload, pod) (
|
||||
label_replace(
|
||||
label_replace(
|
||||
kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"},
|
||||
"replicaset", "$1", "owner_name", "(.*)"
|
||||
) * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{job="kube-state-metrics"},
|
||||
) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) (
|
||||
1, max by (replicaset, namespace, owner_name) (
|
||||
kube_replicaset_owner{job="kube-state-metrics"}
|
||||
)
|
||||
),
|
||||
"workload", "$1", "owner_name", "(.*)"
|
||||
)
|
||||
) by (cluster, namespace, workload, pod)
|
||||
)
|
||||
labels:
|
||||
workload_type: deployment
|
||||
record: mixin_pod_workload
|
||||
- expr: |
|
||||
sum(
|
||||
max by (cluster, namespace, workload, pod) (
|
||||
label_replace(
|
||||
kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"},
|
||||
"workload", "$1", "owner_name", "(.*)"
|
||||
)
|
||||
) by (cluster, namespace, workload, pod)
|
||||
)
|
||||
labels:
|
||||
workload_type: daemonset
|
||||
record: mixin_pod_workload
|
||||
- expr: |
|
||||
sum(
|
||||
max by (cluster, namespace, workload, pod) (
|
||||
label_replace(
|
||||
kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"},
|
||||
"workload", "$1", "owner_name", "(.*)"
|
||||
)
|
||||
) by (cluster, namespace, workload, pod)
|
||||
)
|
||||
labels:
|
||||
workload_type: statefulset
|
||||
record: mixin_pod_workload
|
||||
|
@ -338,7 +352,10 @@ spec:
|
|||
sum(min(kube_pod_info) by (cluster, node))
|
||||
record: ':kube_pod_info_node_count:'
|
||||
- expr: |
|
||||
max(label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")) by (node, namespace, pod)
|
||||
topk by(namespace, pod) (1,
|
||||
max by (node, namespace, pod) (
|
||||
label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")
|
||||
))
|
||||
record: 'node_namespace_pod:kube_pod_info:'
|
||||
- expr: |
|
||||
count by (cluster, node) (sum by (node, cpu) (
|
||||
|
@ -1164,6 +1181,16 @@ spec:
|
|||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeletPodStartUpLatencyHigh
|
||||
annotations:
|
||||
message: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds
|
||||
on node {{ $labels.node }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh
|
||||
expr: |
|
||||
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name > 5
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeletDown
|
||||
annotations:
|
||||
message: Kubelet has disappeared from Prometheus target discovery.
|
||||
|
|
|
@ -74,6 +74,19 @@
|
|||
message: 'The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'KubeletPodStartUpLatencyHigh',
|
||||
expr: |||
|
||||
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{%(kubeletSelector)s}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name > 5
|
||||
||| % $._config,
|
||||
'for': '15m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
message: 'Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.',
|
||||
},
|
||||
},
|
||||
(import '../lib/absent_alert.libsonnet') {
|
||||
componentName:: 'Kubelet',
|
||||
selector:: $._config.kubeletSelector,
|
||||
|
|
|
@ -64,6 +64,9 @@ local slo = import 'slo-libsonnet/slo.libsonnet';
|
|||
// For links between grafana dashboards, you need to tell us if your grafana
|
||||
// servers under some non-root path.
|
||||
linkPrefix: '.',
|
||||
|
||||
// The default refresh time for all dashboards, default to 10s
|
||||
refresh: '10s',
|
||||
},
|
||||
|
||||
// Opt-in to multiCluster dashboards by overriding this and the clusterLabel.
|
||||
|
|
|
@ -208,6 +208,6 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(memory)
|
||||
.addPanel(cpu)
|
||||
.addPanel(goroutines)
|
||||
),
|
||||
) + { refresh: $._config.grafanaK8s.refresh },
|
||||
},
|
||||
}
|
||||
|
|
|
@ -180,6 +180,6 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(memory)
|
||||
.addPanel(cpu)
|
||||
.addPanel(goroutines)
|
||||
),
|
||||
) + { refresh: $._config.grafanaK8s.refresh },
|
||||
},
|
||||
}
|
||||
|
|
|
@ -413,6 +413,6 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(memory)
|
||||
.addPanel(cpu)
|
||||
.addPanel(goroutines)
|
||||
),
|
||||
) + { refresh: $._config.grafanaK8s.refresh },
|
||||
},
|
||||
}
|
||||
|
|
|
@ -343,7 +343,7 @@ local gauge = promgrafonnet.gauge;
|
|||
tags=($._config.grafanaK8s.dashboardTags),
|
||||
editable=true,
|
||||
schemaVersion=18,
|
||||
refresh='30s',
|
||||
refresh=($._config.grafanaK8s.refresh),
|
||||
time_from='now-1h',
|
||||
time_to='now',
|
||||
)
|
||||
|
|
|
@ -345,7 +345,7 @@ local gauge = promgrafonnet.gauge;
|
|||
tags=($._config.grafanaK8s.dashboardTags),
|
||||
editable=true,
|
||||
schemaVersion=18,
|
||||
refresh='30s',
|
||||
refresh=($._config.grafanaK8s.refresh),
|
||||
time_from='now-1h',
|
||||
time_to='now',
|
||||
)
|
||||
|
|
|
@ -373,7 +373,7 @@ local gauge = promgrafonnet.gauge;
|
|||
tags=($._config.grafanaK8s.dashboardTags),
|
||||
editable=true,
|
||||
schemaVersion=18,
|
||||
refresh='30s',
|
||||
refresh=($._config.grafanaK8s.refresh),
|
||||
time_from='now-1h',
|
||||
time_to='now',
|
||||
)
|
||||
|
|
|
@ -242,7 +242,7 @@ local gauge = promgrafonnet.gauge;
|
|||
tags=($._config.grafanaK8s.dashboardTags),
|
||||
editable=true,
|
||||
schemaVersion=18,
|
||||
refresh='30s',
|
||||
refresh=($._config.grafanaK8s.refresh),
|
||||
time_from='now-1h',
|
||||
time_to='now',
|
||||
)
|
||||
|
|
|
@ -257,7 +257,7 @@ local gauge = promgrafonnet.gauge;
|
|||
tags=($._config.grafanaK8s.dashboardTags),
|
||||
editable=true,
|
||||
schemaVersion=18,
|
||||
refresh='30s',
|
||||
refresh=($._config.grafanaK8s.refresh),
|
||||
time_from='now-1h',
|
||||
time_to='now',
|
||||
)
|
||||
|
|
|
@ -166,6 +166,6 @@ local gauge = promgrafonnet.gauge;
|
|||
row.new()
|
||||
.addPanel(inodesGraph)
|
||||
.addPanel(inodeGauge)
|
||||
),
|
||||
) + { refresh: $._config.grafanaK8s.refresh },
|
||||
},
|
||||
}
|
||||
|
|
|
@ -186,6 +186,6 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(memory)
|
||||
.addPanel(cpu)
|
||||
.addPanel(goroutines)
|
||||
),
|
||||
) + { refresh: $._config.grafanaK8s.refresh },
|
||||
},
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ local template = grafana.template;
|
|||
template.new(
|
||||
name='interval',
|
||||
datasource='$datasource',
|
||||
query='4h',
|
||||
query='$__interval',
|
||||
current='5m',
|
||||
hide=2,
|
||||
refresh=2,
|
||||
|
@ -23,12 +23,24 @@ local template = grafana.template;
|
|||
options: [
|
||||
{
|
||||
selected: true,
|
||||
text: '4h',
|
||||
value: '4h',
|
||||
text: '$__interval',
|
||||
value: '$__interval',
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
local clusterTemplate =
|
||||
template.new(
|
||||
name='cluster',
|
||||
datasource='$datasource',
|
||||
query='label_values(node_cpu_seconds_total, %s)' % $._config.clusterLabel,
|
||||
current='',
|
||||
hide=if $._config.showMultiCluster then '' else '2',
|
||||
refresh=2,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
),
|
||||
|
||||
'k8s-resources-cluster.json':
|
||||
local tableStyles = {
|
||||
namespace: {
|
||||
|
@ -50,6 +62,7 @@ local template = grafana.template;
|
|||
},
|
||||
};
|
||||
|
||||
|
||||
local podWorkloadColumns = [
|
||||
'count(mixin_pod_workload{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||
'count(avg(mixin_pod_workload{%(clusterLabel)s="$cluster"}) by (workload, namespace)) by (namespace)' % $._config,
|
||||
|
@ -108,7 +121,7 @@ local template = grafana.template;
|
|||
})
|
||||
.addPanel(
|
||||
g.panel('CPU Utilisation') +
|
||||
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle", %(clusterLabel)s="$cluster"}[1m]))' % $._config)
|
||||
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle", %(clusterLabel)s="$cluster"}[$interval]))' % $._config)
|
||||
)
|
||||
.addPanel(
|
||||
g.panel('CPU Requests Commitment') +
|
||||
|
@ -269,6 +282,10 @@ local template = grafana.template;
|
|||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } },
|
||||
}
|
||||
) + {
|
||||
tags: $._config.grafanaK8s.dashboardTags,
|
||||
templating+: { list+: [intervalTemplate, clusterTemplate] },
|
||||
refresh: $._config.grafanaK8s.refresh,
|
||||
},
|
||||
},
|
||||
}
|
|
@ -24,7 +24,7 @@ local template = grafana.template;
|
|||
})
|
||||
.addPanel(
|
||||
g.panel('CPU Utilisation') +
|
||||
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle"}[1m]))' % $._config)
|
||||
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle"}[$__interval]))' % $._config)
|
||||
)
|
||||
.addPanel(
|
||||
g.panel('CPU Requests Commitment') +
|
||||
|
@ -102,6 +102,6 @@ local template = grafana.template;
|
|||
'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' },
|
||||
})
|
||||
)
|
||||
) + { tags: $._config.grafanaK8s.dashboardTags },
|
||||
) + { tags: $._config.grafanaK8s.dashboardTags, refresh: $._config.grafanaK8s.refresh },
|
||||
} else {},
|
||||
}
|
|
@ -8,7 +8,7 @@ local template = grafana.template;
|
|||
template.new(
|
||||
name='interval',
|
||||
datasource='$datasource',
|
||||
query='4h',
|
||||
query='$__interval',
|
||||
current='5m',
|
||||
hide=2,
|
||||
refresh=2,
|
||||
|
@ -23,12 +23,35 @@ local template = grafana.template;
|
|||
options: [
|
||||
{
|
||||
selected: true,
|
||||
text: '4h',
|
||||
value: '4h',
|
||||
text: '$__interval',
|
||||
value: '$__interval',
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
local clusterTemplate =
|
||||
template.new(
|
||||
name='cluster',
|
||||
datasource='$datasource',
|
||||
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
|
||||
current='',
|
||||
hide=if $._config.showMultiCluster then '' else '2',
|
||||
refresh=1,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
),
|
||||
|
||||
local namespaceTemplate =
|
||||
template.new(
|
||||
name='namespace',
|
||||
datasource='$datasource',
|
||||
query='label_values(kube_pod_info{%(clusterLabel)s="$cluster"}, namespace)' % $._config.clusterLabel,
|
||||
current='',
|
||||
hide='',
|
||||
refresh=1,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
),
|
||||
'k8s-resources-namespace.json':
|
||||
local tableStyles = {
|
||||
pod: {
|
||||
|
@ -78,17 +101,75 @@ local template = grafana.template;
|
|||
},
|
||||
};
|
||||
|
||||
local cpuUsageQuery = 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config;
|
||||
|
||||
local memoryUsageQuery = 'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container!=""}) by (pod)' % $._config;
|
||||
|
||||
local cpuQuotaRequestsQuery = 'scalar(kube_resourcequota{%(clusterLabel)s="$cluster", namespace="$namespace", type="hard",resource="requests.cpu"})' % $._config;
|
||||
local cpuQuotaLimitsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'limits.cpu');
|
||||
local memoryQuotaRequestsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'requests.memory');
|
||||
local memoryQuotaLimitsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'limits.memory');
|
||||
|
||||
g.dashboard(
|
||||
'%(dashboardNamePrefix)sCompute Resources / Namespace (Pods)' % $._config.grafanaK8s,
|
||||
uid=($._config.grafanaDashboardIDs['k8s-resources-namespace.json']),
|
||||
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
|
||||
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')
|
||||
)
|
||||
.addRow(
|
||||
(g.row('Headlines') +
|
||||
{
|
||||
height: '100px',
|
||||
showTitle: false,
|
||||
})
|
||||
.addPanel(
|
||||
g.panel('CPU Utilisation (from requests)') +
|
||||
g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"})' % $._config)
|
||||
)
|
||||
.addPanel(
|
||||
g.panel('CPU Utilisation (from limits)') +
|
||||
g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"})' % $._config)
|
||||
)
|
||||
.addPanel(
|
||||
g.panel('Memory Utilization (from requests)') +
|
||||
g.statPanel('sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) / sum(kube_pod_container_resource_requests_memory_bytes{namespace="$namespace"})' % $._config)
|
||||
)
|
||||
.addPanel(
|
||||
g.panel('Memory Utilisation (from limits)') +
|
||||
g.statPanel('sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) / sum(kube_pod_container_resource_limits_memory_bytes{namespace="$namespace"})' % $._config)
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('CPU Usage')
|
||||
.addPanel(
|
||||
g.panel('CPU Usage') +
|
||||
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack,
|
||||
g.queryPanel([
|
||||
cpuUsageQuery,
|
||||
cpuQuotaRequestsQuery,
|
||||
cpuQuotaLimitsQuery,
|
||||
], ['{{pod}}', 'quota - requests', 'quota - limits']) +
|
||||
g.stack + {
|
||||
seriesOverrides: [
|
||||
{
|
||||
alias: 'quota - requests',
|
||||
color: '#F2495C',
|
||||
dashes: true,
|
||||
fill: 0,
|
||||
hideTooltip: true,
|
||||
legend: false,
|
||||
linewidth: 2,
|
||||
stack: false,
|
||||
},
|
||||
{
|
||||
alias: 'quota - limits',
|
||||
color: '#FF9830',
|
||||
dashes: true,
|
||||
fill: 0,
|
||||
hideTooltip: true,
|
||||
legend: false,
|
||||
linewidth: 2,
|
||||
stack: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
|
@ -115,9 +196,37 @@ local template = grafana.template;
|
|||
.addPanel(
|
||||
g.panel('Memory Usage (w/o cache)') +
|
||||
// Like above, without page cache
|
||||
g.queryPanel('sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container!=""}) by (pod)' % $._config, '{{pod}}') +
|
||||
g.queryPanel([
|
||||
memoryUsageQuery,
|
||||
memoryQuotaRequestsQuery,
|
||||
memoryQuotaLimitsQuery,
|
||||
], ['{{pod}}', 'quota - requests', 'quota - limits']) +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('bytes') },
|
||||
{
|
||||
yaxes: g.yaxes('bytes'),
|
||||
seriesOverrides: [
|
||||
{
|
||||
alias: 'quota - requests',
|
||||
color: '#F2495C',
|
||||
dashes: true,
|
||||
fill: 0,
|
||||
hideTooltip: true,
|
||||
legend: false,
|
||||
linewidth: 2,
|
||||
stack: false,
|
||||
},
|
||||
{
|
||||
alias: 'quota - limits',
|
||||
color: '#FF9830',
|
||||
dashes: true,
|
||||
fill: 0,
|
||||
hideTooltip: true,
|
||||
legend: false,
|
||||
linewidth: 2,
|
||||
stack: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
|
@ -208,6 +317,6 @@ local template = grafana.template;
|
|||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } },
|
||||
}
|
||||
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate, clusterTemplate, namespaceTemplate] }, refresh: $._config.grafanaK8s.refresh },
|
||||
},
|
||||
}
|
|
@ -8,7 +8,7 @@ local template = grafana.template;
|
|||
template.new(
|
||||
name='interval',
|
||||
datasource='$datasource',
|
||||
query='4h',
|
||||
query='$__interval',
|
||||
current='5m',
|
||||
hide=2,
|
||||
refresh=2,
|
||||
|
@ -23,12 +23,36 @@ local template = grafana.template;
|
|||
options: [
|
||||
{
|
||||
selected: true,
|
||||
text: '4h',
|
||||
value: '4h',
|
||||
text: '$__interval',
|
||||
value: '$__interval',
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
local clusterTemplate =
|
||||
template.new(
|
||||
name='cluster',
|
||||
datasource='$datasource',
|
||||
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
|
||||
current='',
|
||||
hide=if $._config.showMultiCluster then '' else '2',
|
||||
refresh=1,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
),
|
||||
|
||||
local nodeTemplate =
|
||||
template.new(
|
||||
name='node',
|
||||
datasource='$datasource',
|
||||
query='label_values(kube_pod_info{%(clusterLabel)s="$cluster"}, node)' % $._config.clusterLabel,
|
||||
current='',
|
||||
hide='',
|
||||
refresh=1,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
),
|
||||
|
||||
'k8s-resources-node.json':
|
||||
local tableStyles = {
|
||||
pod: {
|
||||
|
@ -39,8 +63,7 @@ local template = grafana.template;
|
|||
g.dashboard(
|
||||
'%(dashboardNamePrefix)sCompute Resources / Node (Pods)' % $._config.grafanaK8s,
|
||||
uid=($._config.grafanaDashboardIDs['k8s-resources-node.json']),
|
||||
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
|
||||
.addTemplate('node', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'node')
|
||||
)
|
||||
.addRow(
|
||||
g.row('CPU Usage')
|
||||
.addPanel(
|
||||
|
@ -102,6 +125,6 @@ local template = grafana.template;
|
|||
'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' },
|
||||
})
|
||||
)
|
||||
) + { tags: $._config.grafanaK8s.dashboardTags },
|
||||
}
|
||||
) + { tags: $._config.grafanaK8s.dashboardTags, refresh: $._config.grafanaK8s.refresh, templating+: { list+: [intervalTemplate, clusterTemplate, nodeTemplate] } },
|
||||
},
|
||||
}
|
|
@ -8,7 +8,7 @@ local template = grafana.template;
|
|||
template.new(
|
||||
name='interval',
|
||||
datasource='$datasource',
|
||||
query='4h',
|
||||
query='$__interval',
|
||||
current='5m',
|
||||
hide=2,
|
||||
refresh=2,
|
||||
|
@ -23,12 +23,48 @@ local template = grafana.template;
|
|||
options: [
|
||||
{
|
||||
selected: true,
|
||||
text: '4h',
|
||||
value: '4h',
|
||||
text: '$__interval',
|
||||
value: '$__interval',
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
local clusterTemplate =
|
||||
template.new(
|
||||
name='cluster',
|
||||
datasource='$datasource',
|
||||
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
|
||||
current='',
|
||||
hide=if $._config.showMultiCluster then '' else '2',
|
||||
refresh=1,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
),
|
||||
|
||||
local namespaceTemplate =
|
||||
template.new(
|
||||
name='namespace',
|
||||
datasource='$datasource',
|
||||
query='label_values(kube_pod_info{%(clusterLabel)s="$cluster"}, namespace)' % $._config.clusterLabel,
|
||||
current='',
|
||||
hide='',
|
||||
refresh=1,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
),
|
||||
|
||||
local podTemplate =
|
||||
template.new(
|
||||
name='pod',
|
||||
datasource='$datasource',
|
||||
query='label_values(kube_pod_info{%(clusterLabel)s="$cluster", namespace="$namespace"}, pod)' % $._config.clusterLabel,
|
||||
current='',
|
||||
hide='',
|
||||
refresh=2,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
),
|
||||
|
||||
'k8s-resources-pod.json':
|
||||
local tableStyles = {
|
||||
container: {
|
||||
|
@ -36,18 +72,81 @@ local template = grafana.template;
|
|||
},
|
||||
};
|
||||
|
||||
local cpuRequestsQuery = |||
|
||||
sum(
|
||||
kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"})
|
||||
||| % $._config;
|
||||
|
||||
local cpuLimitsQuery = std.strReplace(cpuRequestsQuery, 'requests', 'limits');
|
||||
local memRequestsQuery = std.strReplace(cpuRequestsQuery, 'cpu_cores', 'memory_bytes');
|
||||
local memLimitsQuery = std.strReplace(cpuLimitsQuery, 'cpu_cores', 'memory_bytes');
|
||||
|
||||
g.dashboard(
|
||||
'%(dashboardNamePrefix)sCompute Resources / Pod' % $._config.grafanaK8s,
|
||||
uid=($._config.grafanaDashboardIDs['k8s-resources-pod.json']),
|
||||
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
|
||||
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')
|
||||
.addTemplate('pod', 'kube_pod_info{%(clusterLabel)s="$cluster", namespace="$namespace"}' % $._config, 'pod')
|
||||
)
|
||||
.addRow(
|
||||
g.row('CPU Usage')
|
||||
.addPanel(
|
||||
g.panel('CPU Usage') +
|
||||
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace="$namespace", pod="$pod", container!="POD", %(clusterLabel)s="$cluster"}) by (container)' % $._config, '{{container}}') +
|
||||
g.stack,
|
||||
g.queryPanel(
|
||||
[
|
||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace="$namespace", pod="$pod", container!="POD", %(clusterLabel)s="$cluster"}) by (container)' % $._config,
|
||||
cpuRequestsQuery,
|
||||
cpuLimitsQuery,
|
||||
], [
|
||||
'{{container}}',
|
||||
'requests',
|
||||
'limits',
|
||||
],
|
||||
) +
|
||||
g.stack + {
|
||||
seriesOverrides: [
|
||||
{
|
||||
alias: 'requests',
|
||||
color: '#F2495C',
|
||||
fill: 0,
|
||||
hideTooltip: true,
|
||||
legend: true,
|
||||
linewidth: 2,
|
||||
stack: false,
|
||||
},
|
||||
{
|
||||
alias: 'limits',
|
||||
color: '#FF9830',
|
||||
fill: 0,
|
||||
hideTooltip: true,
|
||||
legend: true,
|
||||
linewidth: 2,
|
||||
stack: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('CPU Throttling')
|
||||
.addPanel(
|
||||
g.panel('CPU Throttling') +
|
||||
g.queryPanel('sum(increase(container_cpu_cfs_throttled_periods_total{namespace="$namespace", pod="$pod", container!="POD", %(clusterLabel)s="$cluster"}[5m])) by (container) /sum(increase(container_cpu_cfs_periods_total{namespace="$namespace", pod="$pod", container!="POD", %(clusterLabel)s="$cluster"}[5m])) by (container)' % $._config, '{{container}}') +
|
||||
g.stack
|
||||
+ {
|
||||
yaxes: g.yaxes({ format: 'percentunit', max: 1 }),
|
||||
legend+: {
|
||||
current: true,
|
||||
max: true,
|
||||
},
|
||||
thresholds: [
|
||||
{
|
||||
value: $._config.cpuThrottlingPercent / 100,
|
||||
colorMode: 'critical',
|
||||
op: 'gt',
|
||||
fill: true,
|
||||
line: true,
|
||||
yaxis: 'left',
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
|
@ -74,16 +173,40 @@ local template = grafana.template;
|
|||
.addPanel(
|
||||
g.panel('Memory Usage') +
|
||||
g.queryPanel([
|
||||
'sum(container_memory_rss{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config,
|
||||
'sum(container_memory_cache{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config,
|
||||
'sum(container_memory_swap{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config,
|
||||
'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config,
|
||||
memRequestsQuery,
|
||||
memLimitsQuery,
|
||||
], [
|
||||
'{{container}} (RSS)',
|
||||
'{{container}} (Cache)',
|
||||
'{{container}} (Swap)',
|
||||
'{{container}}',
|
||||
'requests',
|
||||
'limits',
|
||||
]) +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('bytes') },
|
||||
{
|
||||
yaxes: g.yaxes('bytes'),
|
||||
seriesOverrides: [
|
||||
{
|
||||
alias: 'requests',
|
||||
color: '#F2495C',
|
||||
dashes: true,
|
||||
fill: 0,
|
||||
hideTooltip: true,
|
||||
legend: false,
|
||||
linewidth: 2,
|
||||
stack: false,
|
||||
},
|
||||
{
|
||||
alias: 'limits',
|
||||
color: '#FF9830',
|
||||
dashes: true,
|
||||
fill: 0,
|
||||
hideTooltip: true,
|
||||
legend: false,
|
||||
linewidth: 2,
|
||||
stack: false,
|
||||
},
|
||||
],
|
||||
}
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
|
@ -164,6 +287,6 @@ local template = grafana.template;
|
|||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } },
|
||||
}
|
||||
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate, clusterTemplate, namespaceTemplate, podTemplate] }, refresh: $._config.grafanaK8s.refresh },
|
||||
},
|
||||
}
|
|
@ -8,7 +8,7 @@ local template = grafana.template;
|
|||
template.new(
|
||||
name='interval',
|
||||
datasource='$datasource',
|
||||
query='4h',
|
||||
query='$__interval',
|
||||
current='5m',
|
||||
hide=2,
|
||||
refresh=2,
|
||||
|
@ -23,8 +23,8 @@ local template = grafana.template;
|
|||
options: [
|
||||
{
|
||||
selected: true,
|
||||
text: '4h',
|
||||
value: '4h',
|
||||
text: '$__interval',
|
||||
value: '$__interval',
|
||||
},
|
||||
],
|
||||
},
|
||||
|
@ -47,6 +47,30 @@ local template = grafana.template;
|
|||
skipUrlSync: false,
|
||||
},
|
||||
|
||||
local clusterTemplate =
|
||||
template.new(
|
||||
name='cluster',
|
||||
datasource='$datasource',
|
||||
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
|
||||
current='',
|
||||
hide=if $._config.showMultiCluster then '' else '2',
|
||||
refresh=1,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
),
|
||||
|
||||
local namespaceTemplate =
|
||||
template.new(
|
||||
name='namespace',
|
||||
datasource='$datasource',
|
||||
query='label_values(kube_pod_info{%(clusterLabel)s="$cluster"}, namespace)' % $._config.clusterLabel,
|
||||
current='',
|
||||
hide='',
|
||||
refresh=1,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
),
|
||||
|
||||
'k8s-resources-workloads-namespace.json':
|
||||
local tableStyles = {
|
||||
workload: {
|
||||
|
@ -155,17 +179,44 @@ local template = grafana.template;
|
|||
local memRequestsQuery = std.strReplace(cpuRequestsQuery, 'cpu_cores', 'memory_bytes');
|
||||
local memLimitsQuery = std.strReplace(cpuLimitsQuery, 'cpu_cores', 'memory_bytes');
|
||||
|
||||
local cpuQuotaRequestsQuery = 'scalar(kube_resourcequota{%(clusterLabel)s="$cluster", namespace="$namespace", type="hard",resource="requests.cpu"})' % $._config;
|
||||
local cpuQuotaLimitsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'limits.cpu');
|
||||
local memoryQuotaRequestsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'requests.memory');
|
||||
local memoryQuotaLimitsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'limits.memory');
|
||||
|
||||
g.dashboard(
|
||||
'%(dashboardNamePrefix)sCompute Resources / Namespace (Workloads)' % $._config.grafanaK8s,
|
||||
uid=($._config.grafanaDashboardIDs['k8s-resources-workloads-namespace.json']),
|
||||
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
|
||||
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')
|
||||
)
|
||||
.addRow(
|
||||
g.row('CPU Usage')
|
||||
.addPanel(
|
||||
g.panel('CPU Usage') +
|
||||
g.queryPanel(cpuUsageQuery, '{{workload}} - {{workload_type}}') +
|
||||
g.stack,
|
||||
g.queryPanel([cpuUsageQuery, cpuQuotaRequestsQuery, cpuQuotaLimitsQuery], ['{{workload}} - {{workload_type}}', 'quota - requests', 'quota - limits']) +
|
||||
g.stack + {
|
||||
seriesOverrides: [
|
||||
{
|
||||
alias: 'quota - requests',
|
||||
color: '#F2495C',
|
||||
dashes: true,
|
||||
fill: 0,
|
||||
hideTooltip: true,
|
||||
legend: false,
|
||||
linewidth: 2,
|
||||
stack: false,
|
||||
},
|
||||
{
|
||||
alias: 'quota - limits',
|
||||
color: '#FF9830',
|
||||
dashes: true,
|
||||
fill: 0,
|
||||
hideTooltip: true,
|
||||
legend: false,
|
||||
linewidth: 2,
|
||||
stack: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
|
@ -193,9 +244,33 @@ local template = grafana.template;
|
|||
g.row('Memory Usage')
|
||||
.addPanel(
|
||||
g.panel('Memory Usage') +
|
||||
g.queryPanel(memUsageQuery, '{{workload}} - {{workload_type}}') +
|
||||
g.queryPanel([memUsageQuery, memoryQuotaRequestsQuery, memoryQuotaLimitsQuery], ['{{workload}} - {{workload_type}}', 'quota - requests', 'quota - limits']) +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('bytes') },
|
||||
{
|
||||
yaxes: g.yaxes('bytes'),
|
||||
seriesOverrides: [
|
||||
{
|
||||
alias: 'quota - requests',
|
||||
color: '#F2495C',
|
||||
dashes: true,
|
||||
fill: 0,
|
||||
hideTooltip: true,
|
||||
legend: false,
|
||||
linewidth: 2,
|
||||
stack: false,
|
||||
},
|
||||
{
|
||||
alias: 'quota - limits',
|
||||
color: '#FF9830',
|
||||
dashes: true,
|
||||
fill: 0,
|
||||
hideTooltip: true,
|
||||
legend: false,
|
||||
linewidth: 2,
|
||||
stack: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
|
@ -332,7 +407,7 @@ local template = grafana.template;
|
|||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate, typeTemplate] } },
|
||||
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate, typeTemplate, clusterTemplate, namespaceTemplate] }, refresh: $._config.grafanaK8s.refresh },
|
||||
|
||||
}
|
||||
},
|
||||
}
|
|
@ -8,7 +8,7 @@ local template = grafana.template;
|
|||
template.new(
|
||||
name='interval',
|
||||
datasource='$datasource',
|
||||
query='4h',
|
||||
query='$__interval',
|
||||
current='5m',
|
||||
hide=2,
|
||||
refresh=2,
|
||||
|
@ -23,12 +23,59 @@ local template = grafana.template;
|
|||
options: [
|
||||
{
|
||||
selected: true,
|
||||
text: '4h',
|
||||
value: '4h',
|
||||
text: '$__interval',
|
||||
value: '$__interval',
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
local clusterTemplate =
|
||||
template.new(
|
||||
name='cluster',
|
||||
datasource='$datasource',
|
||||
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
|
||||
current='',
|
||||
hide=if $._config.showMultiCluster then '' else '2',
|
||||
refresh=1,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
),
|
||||
|
||||
local namespaceTemplate =
|
||||
template.new(
|
||||
name='namespace',
|
||||
datasource='$datasource',
|
||||
query='label_values(kube_pod_info{%(clusterLabel)s="$cluster"}, namespace)' % $._config.clusterLabel,
|
||||
current='',
|
||||
hide='',
|
||||
refresh=1,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
),
|
||||
|
||||
local workloadTemplate =
|
||||
template.new(
|
||||
name='workload',
|
||||
datasource='$datasource',
|
||||
query='label_values(mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace"}, workload)' % $._config.clusterLabel,
|
||||
current='',
|
||||
hide=if $._config.showMultiCluster then '' else '2',
|
||||
refresh=1,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
),
|
||||
|
||||
local workloadTypeTemplate =
|
||||
template.new(
|
||||
name='type',
|
||||
datasource='$datasource',
|
||||
query='label_values(mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload"}, workload_type)' % $._config.clusterLabel,
|
||||
current='',
|
||||
hide='',
|
||||
refresh=1,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
),
|
||||
'k8s-resources-workload.json':
|
||||
local tableStyles = {
|
||||
pod: {
|
||||
|
@ -133,10 +180,7 @@ local template = grafana.template;
|
|||
g.dashboard(
|
||||
'%(dashboardNamePrefix)sCompute Resources / Workload' % $._config.grafanaK8s,
|
||||
uid=($._config.grafanaDashboardIDs['k8s-resources-workload.json']),
|
||||
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
|
||||
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')
|
||||
.addTemplate('workload', 'mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace"}' % $._config, 'workload')
|
||||
.addTemplate('type', 'mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload"}' % $._config, 'workload_type')
|
||||
)
|
||||
.addRow(
|
||||
g.row('CPU Usage')
|
||||
.addPanel(
|
||||
|
@ -305,6 +349,6 @@ local template = grafana.template;
|
|||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } },
|
||||
}
|
||||
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate, clusterTemplate, namespaceTemplate, workloadTemplate, workloadTypeTemplate] }, refresh: $._config.grafanaK8s.refresh },
|
||||
},
|
||||
}
|
|
@ -170,6 +170,6 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(memory)
|
||||
.addPanel(cpu)
|
||||
.addPanel(goroutines)
|
||||
),
|
||||
) + { refresh: $._config.grafanaK8s.refresh },
|
||||
},
|
||||
}
|
||||
|
|
|
@ -561,6 +561,6 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
|||
) +
|
||||
{ yaxes: g.yaxes('percentunit') },
|
||||
),
|
||||
),
|
||||
) + { refresh: $._config.grafanaK8s.refresh },
|
||||
},
|
||||
}
|
||||
|
|
|
@ -23,35 +23,45 @@
|
|||
expr: |||
|
||||
sum by (%(clusterLabel)s, namespace, pod, container) (
|
||||
rate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, image!="", container!="POD"}[5m])
|
||||
) * on (%(clusterLabel)s, namespace, pod) group_left(node) max by(%(clusterLabel)s, namespace, pod, node) (kube_pod_info)
|
||||
) * on (%(clusterLabel)s, namespace, pod) group_left(node) topk by (%(clusterLabel)s, namespace, pod) (
|
||||
1, max by(%(clusterLabel)s, namespace, pod, node) (kube_pod_info)
|
||||
)
|
||||
||| % $._config,
|
||||
},
|
||||
{
|
||||
record: 'node_namespace_pod_container:container_memory_working_set_bytes',
|
||||
expr: |||
|
||||
container_memory_working_set_bytes{%(cadvisorSelector)s, image!=""}
|
||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
||||
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||
max by(namespace, pod, node) (kube_pod_info)
|
||||
)
|
||||
||| % $._config,
|
||||
},
|
||||
{
|
||||
record: 'node_namespace_pod_container:container_memory_rss',
|
||||
expr: |||
|
||||
container_memory_rss{%(cadvisorSelector)s, image!=""}
|
||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
||||
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||
max by(namespace, pod, node) (kube_pod_info)
|
||||
)
|
||||
||| % $._config,
|
||||
},
|
||||
{
|
||||
record: 'node_namespace_pod_container:container_memory_cache',
|
||||
expr: |||
|
||||
container_memory_cache{%(cadvisorSelector)s, image!=""}
|
||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
||||
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||
max by(namespace, pod, node) (kube_pod_info)
|
||||
)
|
||||
||| % $._config,
|
||||
},
|
||||
{
|
||||
record: 'node_namespace_pod_container:container_memory_swap',
|
||||
expr: |||
|
||||
container_memory_swap{%(cadvisorSelector)s, image!=""}
|
||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
||||
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||
max by(namespace, pod, node) (kube_pod_info)
|
||||
)
|
||||
||| % $._config,
|
||||
},
|
||||
{
|
||||
|
@ -92,15 +102,19 @@
|
|||
{
|
||||
record: 'mixin_pod_workload',
|
||||
expr: |||
|
||||
sum(
|
||||
max by (%(clusterLabel)s, namespace, workload, pod) (
|
||||
label_replace(
|
||||
label_replace(
|
||||
kube_pod_owner{%(kubeStateMetricsSelector)s, owner_kind="ReplicaSet"},
|
||||
"replicaset", "$1", "owner_name", "(.*)"
|
||||
) * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{%(kubeStateMetricsSelector)s},
|
||||
) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) (
|
||||
1, max by (replicaset, namespace, owner_name) (
|
||||
kube_replicaset_owner{%(kubeStateMetricsSelector)s}
|
||||
)
|
||||
),
|
||||
"workload", "$1", "owner_name", "(.*)"
|
||||
)
|
||||
) by (%(clusterLabel)s, namespace, workload, pod)
|
||||
)
|
||||
||| % $._config,
|
||||
labels: {
|
||||
workload_type: 'deployment',
|
||||
|
@ -109,12 +123,12 @@
|
|||
{
|
||||
record: 'mixin_pod_workload',
|
||||
expr: |||
|
||||
sum(
|
||||
max by (%(clusterLabel)s, namespace, workload, pod) (
|
||||
label_replace(
|
||||
kube_pod_owner{%(kubeStateMetricsSelector)s, owner_kind="DaemonSet"},
|
||||
"workload", "$1", "owner_name", "(.*)"
|
||||
)
|
||||
) by (%(clusterLabel)s, namespace, workload, pod)
|
||||
)
|
||||
||| % $._config,
|
||||
labels: {
|
||||
workload_type: 'daemonset',
|
||||
|
@ -123,12 +137,12 @@
|
|||
{
|
||||
record: 'mixin_pod_workload',
|
||||
expr: |||
|
||||
sum(
|
||||
max by (%(clusterLabel)s, namespace, workload, pod) (
|
||||
label_replace(
|
||||
kube_pod_owner{%(kubeStateMetricsSelector)s, owner_kind="StatefulSet"},
|
||||
"workload", "$1", "owner_name", "(.*)"
|
||||
)
|
||||
) by (%(clusterLabel)s, namespace, workload, pod)
|
||||
)
|
||||
||| % $._config,
|
||||
labels: {
|
||||
workload_type: 'statefulset',
|
||||
|
|
|
@ -19,11 +19,19 @@
|
|||
||| % $._config,
|
||||
},
|
||||
{
|
||||
// This rule results in the tuples (node, namespace, instance) => 1;
|
||||
// it is used to calculate per-node metrics, given namespace & instance.
|
||||
// This rule results in the tuples (node, namespace, instance) => 1.
|
||||
// It is used to calculate per-node metrics, given namespace & instance.
|
||||
// We use the topk() aggregator to ensure that each (namespace,
|
||||
// instance) tuple is only associated to one node and thus avoid
|
||||
// "many-to-many matching not allowed" errors when joining with
|
||||
// other timeseries on (namespace, instance). See node:node_num_cpu:sum
|
||||
// below for instance.
|
||||
record: 'node_namespace_pod:kube_pod_info:',
|
||||
expr: |||
|
||||
max(label_replace(kube_pod_info{%(kubeStateMetricsSelector)s}, "%(podLabel)s", "$1", "pod", "(.*)")) by (node, namespace, %(podLabel)s)
|
||||
topk by(namespace, %(podLabel)s) (1,
|
||||
max by (node, namespace, %(podLabel)s) (
|
||||
label_replace(kube_pod_info{%(kubeStateMetricsSelector)s}, "%(podLabel)s", "$1", "pod", "(.*)")
|
||||
))
|
||||
||| % $._config,
|
||||
},
|
||||
{
|
||||
|
|
Reference in a new issue