This repository has been archived on 2023-04-02. You can view files and clone it, but cannot push or open issues or pull requests.
gitops-tbrnt/monitoring/vendor/kubernetes-mixin/dashboards/pods.libsonnet

196 lines
7.2 KiB
Plaintext

local grafana = import 'grafonnet/grafana.libsonnet';
local annotation = grafana.annotation;
local dashboard = grafana.dashboard;
local graphPanel = grafana.graphPanel;
local prometheus = grafana.prometheus;
local promgrafonnet = import '../lib/promgrafonnet/promgrafonnet.libsonnet';
local row = grafana.row;
local singlestat = grafana.singlestat;
local template = grafana.template;
local numbersinglestat = promgrafonnet.numbersinglestat;
{
grafanaDashboards+:: {
'pods.json':
local memoryRow = row.new()
.addPanel(
graphPanel.new(
'Memory Usage',
datasource='$datasource',
min=0,
span=12,
format='bytes',
legend_rightSide=true,
legend_alignAsTable=true,
legend_current=true,
legend_avg=true,
)
.addTarget(prometheus.target(
'sum by(container) (container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container=~"$container", container!="POD"})' % $._config,
legendFormat='Current: {{ container }}',
))
.addTarget(prometheus.target(
'sum by(container) (kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory", pod="$pod", container=~"$container"})' % $._config,
legendFormat='Requested: {{ container }}',
))
.addTarget(prometheus.target(
'sum by(container) (kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory", pod="$pod", container=~"$container"})' % $._config,
legendFormat='Limit: {{ container }}',
))
.addTarget(prometheus.target(
'sum by(container) (container_memory_cache{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod", container=~"$container", container!="POD"})' % $._config,
legendFormat='Cache: {{ container }}',
))
);
local cpuRow = row.new()
.addPanel(
graphPanel.new(
'CPU Usage',
datasource='$datasource',
min=0,
span=12,
legend_rightSide=true,
legend_alignAsTable=true,
legend_current=true,
legend_avg=true,
)
.addTarget(prometheus.target(
'sum by (container) (irate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", image!="", pod="$pod", container=~"$container", container!="POD"}[4m]))' % $._config,
legendFormat='Current: {{ container }}',
))
.addTarget(prometheus.target(
'sum by(container) (kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu", pod="$pod", container=~"$container"})' % $._config,
legendFormat='Requested: {{ container }}',
))
.addTarget(prometheus.target(
'sum by(container) (kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu", pod="$pod", container=~"$container"})' % $._config,
legendFormat='Limit: {{ container }}',
))
);
local networkRow = row.new()
.addPanel(
graphPanel.new(
'Network I/O',
datasource='$datasource',
format='bytes',
min=0,
span=12,
legend_rightSide=true,
legend_alignAsTable=true,
legend_current=true,
legend_avg=true,
)
.addTarget(prometheus.target(
'sort_desc(sum by (pod) (irate(container_network_receive_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[4m])))' % $._config,
legendFormat='RX: {{ pod }}',
))
.addTarget(prometheus.target(
'sort_desc(sum by (pod) (irate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[4m])))' % $._config,
legendFormat='TX: {{ pod }}',
))
);
local restartsRow = row.new()
.addPanel(
graphPanel.new(
'Total Restarts Per Container',
datasource='$datasource',
format='short',
min=0,
span=12,
legend_rightSide=true,
legend_alignAsTable=true,
legend_current=true,
legend_avg=true,
)
.addTarget(prometheus.target(
'max by (container) (kube_pod_container_status_restarts_total{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container=~"$container"})' % $._config,
legendFormat='Restarts: {{ container }}',
))
);
local restartAnnotation = annotation.datasource(
'Restarts',
'$datasource',
expr='time() == BOOL timestamp(rate(kube_pod_container_status_restarts_total{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[2m]) > 0)' % $._config,
enable=true,
hide=false,
iconColor='rgba(215, 44, 44, 1)',
tags=['restart'],
type='rows',
builtIn=1,
);
dashboard.new(
'%(dashboardNamePrefix)sPods' % $._config.grafanaK8s,
time_from='now-1h',
uid=($._config.grafanaDashboardIDs['pods.json']),
tags=($._config.grafanaK8s.dashboardTags),
).addTemplate(
{
current: {
text: 'default',
value: 'default',
},
hide: 0,
label: null,
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: '',
type: 'datasource',
},
)
.addTemplate(
template.new(
'cluster',
'$datasource',
'label_values(kube_pod_info, %(clusterLabel)s)' % $._config,
label='cluster',
refresh='time',
hide=if $._config.showMultiCluster then '' else 'variable',
sort=1,
)
)
.addTemplate(
template.new(
'namespace',
'$datasource',
'label_values(kube_pod_info{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
label='Namespace',
refresh='time',
sort=1,
)
)
.addTemplate(
template.new(
'pod',
'$datasource',
'label_values(kube_pod_info{%(clusterLabel)s="$cluster", namespace=~"$namespace"}, pod)' % $._config,
label='Pod',
refresh='time',
sort=1,
)
)
.addTemplate(
template.new(
'container',
'$datasource',
'label_values(kube_pod_container_info{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}, container)' % $._config,
label='Container',
refresh='time',
includeAll=true,
sort=1,
)
)
.addAnnotation(restartAnnotation)
.addRow(memoryRow)
.addRow(cpuRow)
.addRow(networkRow)
.addRow(restartsRow),
},
}