update deps
This commit is contained in:
parent
a13fb8d079
commit
ca446b428a
|
@ -1,3 +1,7 @@
|
|||
build:
|
||||
docker run --rm -v $(shell pwd):$(shell pwd) --workdir $(shell pwd) quay.io/coreos/jsonnet-ci ./build.sh monitoring.jsonnet
|
||||
.PHONY: build
|
||||
|
||||
update:
|
||||
docker run --rm -v $(shell pwd):$(shell pwd) --workdir $(shell pwd) quay.io/coreos/jsonnet-ci jb update
|
||||
.PHONY: update
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
"subdir": "Documentation/etcd-mixin"
|
||||
}
|
||||
},
|
||||
"version": "f0faa5501d936cd8c9f561bb9d1baca70eb67ab1",
|
||||
"version": "52fba431b686f6a5c30d60a0bbaf9fafc14bae35",
|
||||
"sum": "Ko3qhNfC2vN/houLh6C0Ryacjv70gl0DVPGU/PQ4OD0="
|
||||
},
|
||||
{
|
||||
|
@ -19,8 +19,8 @@
|
|||
"subdir": "grafana"
|
||||
}
|
||||
},
|
||||
"version": "539a90dbf63c812ad0194d8078dd776868a11c81",
|
||||
"sum": "b8faWX1qqLGyN67sA36oRqYZ5HX+tHBRMPtrWRqIysE="
|
||||
"version": "1b07a802b663f77e36fe1e518cef552ef9fbdb82",
|
||||
"sum": "GliiVmOLUPmBNjvsx332UOvZj0o9VVxLFLp9u4QmmNk="
|
||||
},
|
||||
{
|
||||
"name": "grafana-builder",
|
||||
|
@ -41,8 +41,8 @@
|
|||
"subdir": "grafonnet"
|
||||
}
|
||||
},
|
||||
"version": "c459106d2d2b583dd3a83f6c75eb52abee3af764",
|
||||
"sum": "CeM3LRgUCUJTolTdMnerfMPGYmhClx7gX5ajrQVEY2Y="
|
||||
"version": "db36a706bd1c87056759eacb686102133eb4740c",
|
||||
"sum": "g1aMw5iYEP/Dkw1wo1pcfe7q4LIpXc4wqDZsjaCpsRc="
|
||||
},
|
||||
{
|
||||
"name": "ksonnet",
|
||||
|
@ -63,8 +63,8 @@
|
|||
"subdir": "jsonnet/kube-prometheus"
|
||||
}
|
||||
},
|
||||
"version": "8b0b0bc51435a5f7742307c86235273ab568dffe",
|
||||
"sum": "NJN0f7veWXOJyM3PNDM6vJQEzpkDxOchU9EVnoSRe6E="
|
||||
"version": "953c5464f72594b7fde2e534b207b211f7454ec7",
|
||||
"sum": "+9Clkrsv9C637n1P7pPoKXTMJTbJGgt2bhv1/1ySTuc="
|
||||
},
|
||||
{
|
||||
"name": "kube-state-metrics",
|
||||
|
@ -74,7 +74,7 @@
|
|||
"subdir": "jsonnet/kube-state-metrics"
|
||||
}
|
||||
},
|
||||
"version": "392572e1e789fc5f866fbeb6466173531a659bcc",
|
||||
"version": "22d195f20a20b51cf14b5ff01bb4a200c65196da",
|
||||
"sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA="
|
||||
},
|
||||
{
|
||||
|
@ -85,7 +85,7 @@
|
|||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
||||
}
|
||||
},
|
||||
"version": "392572e1e789fc5f866fbeb6466173531a659bcc",
|
||||
"version": "22d195f20a20b51cf14b5ff01bb4a200c65196da",
|
||||
"sum": "E1GGavnf9PCWBm4WVrxWnc0FIj72UcbcweqGioWrOdU="
|
||||
},
|
||||
{
|
||||
|
@ -96,8 +96,8 @@
|
|||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "7f3e0130ccd3e39400d1dc36e690cab16f8d4881",
|
||||
"sum": "vQ1u8c5WNl7S7jmYyPk8HayvEPdIiZwKx5Sk6jdtOAE="
|
||||
"version": "3cf851b2c8ff8bf98c12eac7f37d97f086cd0fc9",
|
||||
"sum": "CydKHxWA9LG9w1+sjlqREHXPQTdbiTwy40rnyXfHfGE="
|
||||
},
|
||||
{
|
||||
"name": "node-mixin",
|
||||
|
@ -107,7 +107,7 @@
|
|||
"subdir": "docs/node-mixin"
|
||||
}
|
||||
},
|
||||
"version": "dcfd6104332b22d3de1afa5425b6316b7a2952c6",
|
||||
"version": "ef7c05816adcb0e8923defe34e97f6afcce0a939",
|
||||
"sum": "7vEamDTP9AApeiF4Zu9ZyXzDIs3rYHzwf9k7g8X+wsg="
|
||||
},
|
||||
{
|
||||
|
@ -118,7 +118,7 @@
|
|||
"subdir": "documentation/prometheus-mixin"
|
||||
}
|
||||
},
|
||||
"version": "489a9aa7b9478022c3b9c5952b8f9c70ddae5bdb",
|
||||
"version": "65a19421a42c69e16241eec24c66b98e4c8fa5da",
|
||||
"sum": "5EUgr6Spr1zNR8Y2/NevjvEkGV9WMvKo6nEScNER1Lc="
|
||||
},
|
||||
{
|
||||
|
@ -151,7 +151,7 @@
|
|||
"subdir": "lib/promgrafonnet"
|
||||
}
|
||||
},
|
||||
"version": "7f3e0130ccd3e39400d1dc36e690cab16f8d4881",
|
||||
"version": "3cf851b2c8ff8bf98c12eac7f37d97f086cd0fc9",
|
||||
"sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc="
|
||||
},
|
||||
{
|
||||
|
|
|
@ -6,7 +6,7 @@ metadata:
|
|||
name: main
|
||||
namespace: monitoring
|
||||
spec:
|
||||
baseImage: quay.io/prometheus/alertmanager
|
||||
image: quay.io/prometheus/alertmanager:v0.20.0
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
replicas: 1
|
||||
|
|
|
@ -10,12 +10,14 @@ stringData:
|
|||
"resolve_timeout": "5m"
|
||||
"inhibit_rules":
|
||||
- "equal":
|
||||
- "namespace"
|
||||
- "alertname"
|
||||
"source_match":
|
||||
"severity": "critical"
|
||||
"target_match_re":
|
||||
"severity": "warning|info"
|
||||
- "equal":
|
||||
- "namespace"
|
||||
- "alertname"
|
||||
"source_match":
|
||||
"severity": "warning"
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -16,7 +16,8 @@ spec:
|
|||
app: grafana
|
||||
spec:
|
||||
containers:
|
||||
- image: grafana/grafana:6.6.0
|
||||
- env: []
|
||||
image: grafana/grafana:6.6.0
|
||||
name: grafana
|
||||
ports:
|
||||
- containerPort: 3000
|
||||
|
@ -93,9 +94,6 @@ spec:
|
|||
- mountPath: /grafana-dashboard-definitions/0/pod-total
|
||||
name: grafana-dashboard-pod-total
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/pods
|
||||
name: grafana-dashboard-pods
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/prometheus-remote-write
|
||||
name: grafana-dashboard-prometheus-remote-write
|
||||
readOnly: false
|
||||
|
@ -108,9 +106,6 @@ spec:
|
|||
- mountPath: /grafana-dashboard-definitions/0/scheduler
|
||||
name: grafana-dashboard-scheduler
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/statefulset
|
||||
name: grafana-dashboard-statefulset
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/workload-total
|
||||
name: grafana-dashboard-workload-total
|
||||
readOnly: false
|
||||
|
@ -180,9 +175,6 @@ spec:
|
|||
- configMap:
|
||||
name: grafana-dashboard-pod-total
|
||||
name: grafana-dashboard-pod-total
|
||||
- configMap:
|
||||
name: grafana-dashboard-pods
|
||||
name: grafana-dashboard-pods
|
||||
- configMap:
|
||||
name: grafana-dashboard-prometheus-remote-write
|
||||
name: grafana-dashboard-prometheus-remote-write
|
||||
|
@ -195,9 +187,6 @@ spec:
|
|||
- configMap:
|
||||
name: grafana-dashboard-scheduler
|
||||
name: grafana-dashboard-scheduler
|
||||
- configMap:
|
||||
name: grafana-dashboard-statefulset
|
||||
name: grafana-dashboard-statefulset
|
||||
- configMap:
|
||||
name: grafana-dashboard-workload-total
|
||||
name: grafana-dashboard-workload-total
|
||||
|
|
|
@ -11,7 +11,7 @@ spec:
|
|||
- name: alertmanager-main
|
||||
namespace: monitoring
|
||||
port: web
|
||||
baseImage: quay.io/prometheus/prometheus
|
||||
image: quay.io/prometheus/prometheus:v2.15.2
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
podMonitorNamespaceSelector: {}
|
||||
|
|
|
@ -629,9 +629,15 @@ spec:
|
|||
matched the expected number of replicas for longer than 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch
|
||||
expr: |
|
||||
kube_deployment_spec_replicas{job="kube-state-metrics"}
|
||||
!=
|
||||
kube_deployment_status_replicas_available{job="kube-state-metrics"}
|
||||
(
|
||||
kube_deployment_spec_replicas{job="kube-state-metrics"}
|
||||
!=
|
||||
kube_deployment_status_replicas_available{job="kube-state-metrics"}
|
||||
) and (
|
||||
changes(kube_deployment_status_replicas_updated{job="kube-state-metrics"}[5m])
|
||||
==
|
||||
0
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
|
@ -641,9 +647,15 @@ spec:
|
|||
not matched the expected number of replicas for longer than 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch
|
||||
expr: |
|
||||
kube_statefulset_status_replicas_ready{job="kube-state-metrics"}
|
||||
!=
|
||||
kube_statefulset_status_replicas{job="kube-state-metrics"}
|
||||
(
|
||||
kube_statefulset_status_replicas_ready{job="kube-state-metrics"}
|
||||
!=
|
||||
kube_statefulset_status_replicas{job="kube-state-metrics"}
|
||||
) and (
|
||||
changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics"}[5m])
|
||||
==
|
||||
0
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
|
|
18
monitoring/vendor/grafana/grafana.libsonnet
vendored
18
monitoring/vendor/grafana/grafana.libsonnet
vendored
|
@ -5,7 +5,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
namespace: 'default',
|
||||
|
||||
versions+:: {
|
||||
grafana: '6.4.3',
|
||||
grafana: '6.6.0',
|
||||
},
|
||||
|
||||
imageRepos+:: {
|
||||
|
@ -32,10 +32,13 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
config: {},
|
||||
ldap: null,
|
||||
plugins: [],
|
||||
env: [],
|
||||
port: 3000,
|
||||
container: {
|
||||
requests: { cpu: '100m', memory: '100Mi' },
|
||||
limits: { cpu: '200m', memory: '200Mi' },
|
||||
},
|
||||
containers: [],
|
||||
},
|
||||
},
|
||||
grafanaDashboards: {},
|
||||
|
@ -79,7 +82,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
local grafanaServiceNodePort = servicePort.newNamed('http', 3000, 'http');
|
||||
local grafanaServiceNodePort = servicePort.newNamed('http', $._config.grafana.port, 'http');
|
||||
|
||||
service.new('grafana', $.grafana.deployment.spec.selector.matchLabels, grafanaServiceNodePort) +
|
||||
service.mixin.metadata.withLabels({ app: 'grafana' }) +
|
||||
|
@ -97,7 +100,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
local podSelector = deployment.mixin.spec.template.spec.selectorType;
|
||||
local env = container.envType;
|
||||
|
||||
local targetPort = 3000;
|
||||
local targetPort = $._config.grafana.port;
|
||||
local portName = 'http';
|
||||
local podLabels = { app: 'grafana' };
|
||||
|
||||
|
@ -159,15 +162,18 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
] +
|
||||
if std.length($._config.grafana.config) > 0 then [configVolume] else [];
|
||||
|
||||
local c =
|
||||
local plugins = (if std.length($._config.grafana.plugins) == 0 then [] else [env.new('GF_INSTALL_PLUGINS', std.join(',', $._config.grafana.plugins))]);
|
||||
|
||||
local c = [
|
||||
container.new('grafana', $._config.imageRepos.grafana + ':' + $._config.versions.grafana) +
|
||||
(if std.length($._config.grafana.plugins) == 0 then {} else container.withEnv([env.new('GF_INSTALL_PLUGINS', std.join(',', $._config.grafana.plugins))])) +
|
||||
container.withEnv($._config.grafana.env + plugins) +
|
||||
container.withVolumeMounts(volumeMounts) +
|
||||
container.withPorts(containerPort.newNamed(targetPort, portName)) +
|
||||
container.mixin.readinessProbe.httpGet.withPath('/api/health') +
|
||||
container.mixin.readinessProbe.httpGet.withPort(portName) +
|
||||
container.mixin.resources.withRequests($._config.grafana.container.requests) +
|
||||
container.mixin.resources.withLimits($._config.grafana.container.limits);
|
||||
container.mixin.resources.withLimits($._config.grafana.container.limits),
|
||||
] + $._config.grafana.containers;
|
||||
|
||||
deployment.new('grafana', 1, c, podLabels) +
|
||||
deployment.mixin.metadata.withNamespace($._config.namespace) +
|
||||
|
|
43
monitoring/vendor/grafana/jsonnetfile.json
vendored
43
monitoring/vendor/grafana/jsonnetfile.json
vendored
|
@ -1,24 +1,25 @@
|
|||
{
|
||||
"dependencies": [
|
||||
{
|
||||
"name": "grafonnet",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/grafana/grafonnet-lib",
|
||||
"subdir": "grafonnet"
|
||||
}
|
||||
},
|
||||
"version": "master"
|
||||
},
|
||||
{
|
||||
"name": "ksonnet",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/ksonnet/ksonnet-lib",
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "master"
|
||||
"dependencies": [
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/grafana/grafonnet-lib",
|
||||
"subdir": "grafonnet"
|
||||
}
|
||||
]
|
||||
},
|
||||
"version": "master",
|
||||
"name": "grafonnet"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/ksonnet/ksonnet-lib",
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "master",
|
||||
"name": "ksonnet"
|
||||
}
|
||||
],
|
||||
"legacyImports": true
|
||||
}
|
||||
|
|
|
@ -225,6 +225,7 @@
|
|||
message='',
|
||||
noDataState='no_data',
|
||||
notifications=[],
|
||||
alertRuleTags={},
|
||||
):: self {
|
||||
local it = self,
|
||||
_conditions:: [],
|
||||
|
@ -238,6 +239,7 @@
|
|||
noDataState: noDataState,
|
||||
notifications: notifications,
|
||||
message: message,
|
||||
alertRuleTags: alertRuleTags,
|
||||
},
|
||||
addCondition(condition):: self {
|
||||
_conditions+: [condition],
|
||||
|
|
|
@ -56,6 +56,7 @@
|
|||
highlightCards=true,
|
||||
legend_show=false,
|
||||
minSpan=null,
|
||||
span=null,
|
||||
repeat=null,
|
||||
repeatDirection=null,
|
||||
tooltipDecimals=null,
|
||||
|
@ -100,6 +101,7 @@
|
|||
show: legend_show,
|
||||
},
|
||||
[if minSpan != null then 'minSpan']: minSpan,
|
||||
[if span != null then 'span']: span,
|
||||
[if repeat != null then 'repeat']: repeat,
|
||||
[if repeatDirection != null then 'repeatDirection']: repeatDirection,
|
||||
tooltip: {
|
||||
|
|
|
@ -25,7 +25,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
target_match_re: {
|
||||
severity: 'warning|info',
|
||||
},
|
||||
equal: ['alertname'],
|
||||
equal: ['namespace', 'alertname'],
|
||||
}, {
|
||||
source_match: {
|
||||
severity: 'warning',
|
||||
|
@ -33,7 +33,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
target_match_re: {
|
||||
severity: 'info',
|
||||
},
|
||||
equal: ['alertname'],
|
||||
equal: ['namespace', 'alertname'],
|
||||
}],
|
||||
route: {
|
||||
group_by: ['namespace'],
|
||||
|
@ -141,7 +141,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
spec: {
|
||||
replicas: $._config.alertmanager.replicas,
|
||||
version: $._config.versions.alertmanager,
|
||||
baseImage: $._config.imageRepos.alertmanager,
|
||||
image: $._config.imageRepos.alertmanager + ':' + $._config.versions.alertmanager,
|
||||
nodeSelector: { 'kubernetes.io/os': 'linux' },
|
||||
serviceAccountName: 'alertmanager-' + $._config.alertmanager.name,
|
||||
securityContext: {
|
||||
|
|
|
@ -30,7 +30,7 @@ local servicePort = k.core.v1.service.mixin.spec.portsType;
|
|||
spec+: {
|
||||
thanos+: {
|
||||
version: $._config.versions.thanos,
|
||||
baseImage: $._config.imageRepos.thanos,
|
||||
image: $._config.imageRepos.thanos + ':' + $._config.versions.thanos,
|
||||
objectStorageConfig: $._config.thanos.objectStorageConfig,
|
||||
},
|
||||
},
|
||||
|
|
|
@ -178,7 +178,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
spec: {
|
||||
replicas: p.replicas,
|
||||
version: $._config.versions.prometheus,
|
||||
baseImage: $._config.imageRepos.prometheus,
|
||||
image: $._config.imageRepos.prometheus + ':' + $._config.versions.prometheus,
|
||||
serviceAccountName: 'prometheus-' + p.name,
|
||||
serviceMonitorSelector: {},
|
||||
podMonitorSelector: {},
|
||||
|
|
|
@ -53,9 +53,15 @@
|
|||
},
|
||||
{
|
||||
expr: |||
|
||||
kube_deployment_spec_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
|
||||
!=
|
||||
kube_deployment_status_replicas_available{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
|
||||
(
|
||||
kube_deployment_spec_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
|
||||
!=
|
||||
kube_deployment_status_replicas_available{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
|
||||
) and (
|
||||
changes(kube_deployment_status_replicas_updated{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}[5m])
|
||||
==
|
||||
0
|
||||
)
|
||||
||| % $._config,
|
||||
labels: {
|
||||
severity: 'critical',
|
||||
|
@ -68,9 +74,15 @@
|
|||
},
|
||||
{
|
||||
expr: |||
|
||||
kube_statefulset_status_replicas_ready{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
|
||||
!=
|
||||
kube_statefulset_status_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
|
||||
(
|
||||
kube_statefulset_status_replicas_ready{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
|
||||
!=
|
||||
kube_statefulset_status_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
|
||||
) and (
|
||||
changes(kube_statefulset_status_replicas_updated{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}[5m])
|
||||
==
|
||||
0
|
||||
)
|
||||
||| % $._config,
|
||||
labels: {
|
||||
severity: 'critical',
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
(import 'network.libsonnet') +
|
||||
(import 'persistentvolumesusage.libsonnet') +
|
||||
(import 'pods.libsonnet') +
|
||||
(import 'resources.libsonnet') +
|
||||
(import 'statefulset.libsonnet') +
|
||||
(import 'apiserver.libsonnet') +
|
||||
(import 'controller-manager.libsonnet') +
|
||||
(import 'scheduler.libsonnet') +
|
||||
|
|
|
@ -1,195 +0,0 @@
|
|||
local grafana = import 'grafonnet/grafana.libsonnet';
|
||||
local annotation = grafana.annotation;
|
||||
local dashboard = grafana.dashboard;
|
||||
local graphPanel = grafana.graphPanel;
|
||||
local prometheus = grafana.prometheus;
|
||||
local promgrafonnet = import '../lib/promgrafonnet/promgrafonnet.libsonnet';
|
||||
local row = grafana.row;
|
||||
local singlestat = grafana.singlestat;
|
||||
local template = grafana.template;
|
||||
local numbersinglestat = promgrafonnet.numbersinglestat;
|
||||
|
||||
{
|
||||
grafanaDashboards+:: {
|
||||
'pods.json':
|
||||
local memoryRow = row.new()
|
||||
.addPanel(
|
||||
graphPanel.new(
|
||||
'Memory Usage',
|
||||
datasource='$datasource',
|
||||
min=0,
|
||||
span=12,
|
||||
format='bytes',
|
||||
legend_rightSide=true,
|
||||
legend_alignAsTable=true,
|
||||
legend_current=true,
|
||||
legend_avg=true,
|
||||
)
|
||||
.addTarget(prometheus.target(
|
||||
'sum by(container) (container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container=~"$container", container!="POD"})' % $._config,
|
||||
legendFormat='Current: {{ container }}',
|
||||
))
|
||||
.addTarget(prometheus.target(
|
||||
'sum by(container) (kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory", pod="$pod", container=~"$container"})' % $._config,
|
||||
legendFormat='Requested: {{ container }}',
|
||||
))
|
||||
.addTarget(prometheus.target(
|
||||
'sum by(container) (kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory", pod="$pod", container=~"$container"})' % $._config,
|
||||
legendFormat='Limit: {{ container }}',
|
||||
))
|
||||
.addTarget(prometheus.target(
|
||||
'sum by(container) (container_memory_cache{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod", container=~"$container", container!="POD"})' % $._config,
|
||||
legendFormat='Cache: {{ container }}',
|
||||
))
|
||||
);
|
||||
|
||||
local cpuRow = row.new()
|
||||
.addPanel(
|
||||
graphPanel.new(
|
||||
'CPU Usage',
|
||||
datasource='$datasource',
|
||||
min=0,
|
||||
span=12,
|
||||
legend_rightSide=true,
|
||||
legend_alignAsTable=true,
|
||||
legend_current=true,
|
||||
legend_avg=true,
|
||||
)
|
||||
.addTarget(prometheus.target(
|
||||
'sum by (container) (irate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", image!="", pod="$pod", container=~"$container", container!="POD"}[4m]))' % $._config,
|
||||
legendFormat='Current: {{ container }}',
|
||||
))
|
||||
.addTarget(prometheus.target(
|
||||
'sum by(container) (kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu", pod="$pod", container=~"$container"})' % $._config,
|
||||
legendFormat='Requested: {{ container }}',
|
||||
))
|
||||
.addTarget(prometheus.target(
|
||||
'sum by(container) (kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu", pod="$pod", container=~"$container"})' % $._config,
|
||||
legendFormat='Limit: {{ container }}',
|
||||
))
|
||||
);
|
||||
|
||||
local networkRow = row.new()
|
||||
.addPanel(
|
||||
graphPanel.new(
|
||||
'Network I/O',
|
||||
datasource='$datasource',
|
||||
format='bytes',
|
||||
min=0,
|
||||
span=12,
|
||||
legend_rightSide=true,
|
||||
legend_alignAsTable=true,
|
||||
legend_current=true,
|
||||
legend_avg=true,
|
||||
)
|
||||
.addTarget(prometheus.target(
|
||||
'sort_desc(sum by (pod) (irate(container_network_receive_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[4m])))' % $._config,
|
||||
legendFormat='RX: {{ pod }}',
|
||||
))
|
||||
.addTarget(prometheus.target(
|
||||
'sort_desc(sum by (pod) (irate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[4m])))' % $._config,
|
||||
legendFormat='TX: {{ pod }}',
|
||||
))
|
||||
);
|
||||
|
||||
local restartsRow = row.new()
|
||||
.addPanel(
|
||||
graphPanel.new(
|
||||
'Total Restarts Per Container',
|
||||
datasource='$datasource',
|
||||
format='short',
|
||||
min=0,
|
||||
span=12,
|
||||
legend_rightSide=true,
|
||||
legend_alignAsTable=true,
|
||||
legend_current=true,
|
||||
legend_avg=true,
|
||||
)
|
||||
.addTarget(prometheus.target(
|
||||
'max by (container) (kube_pod_container_status_restarts_total{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container=~"$container"})' % $._config,
|
||||
legendFormat='Restarts: {{ container }}',
|
||||
))
|
||||
);
|
||||
|
||||
local restartAnnotation = annotation.datasource(
|
||||
'Restarts',
|
||||
'$datasource',
|
||||
expr='time() == BOOL timestamp(rate(kube_pod_container_status_restarts_total{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[2m]) > 0)' % $._config,
|
||||
enable=true,
|
||||
hide=false,
|
||||
iconColor='rgba(215, 44, 44, 1)',
|
||||
tags=['restart'],
|
||||
type='rows',
|
||||
builtIn=1,
|
||||
);
|
||||
|
||||
dashboard.new(
|
||||
'%(dashboardNamePrefix)sPods' % $._config.grafanaK8s,
|
||||
time_from='now-1h',
|
||||
uid=($._config.grafanaDashboardIDs['pods.json']),
|
||||
tags=($._config.grafanaK8s.dashboardTags),
|
||||
).addTemplate(
|
||||
{
|
||||
current: {
|
||||
text: 'default',
|
||||
value: 'default',
|
||||
},
|
||||
hide: 0,
|
||||
label: null,
|
||||
name: 'datasource',
|
||||
options: [],
|
||||
query: 'prometheus',
|
||||
refresh: 1,
|
||||
regex: '',
|
||||
type: 'datasource',
|
||||
},
|
||||
)
|
||||
.addTemplate(
|
||||
template.new(
|
||||
'cluster',
|
||||
'$datasource',
|
||||
'label_values(kube_pod_info, %(clusterLabel)s)' % $._config,
|
||||
label='cluster',
|
||||
refresh='time',
|
||||
hide=if $._config.showMultiCluster then '' else 'variable',
|
||||
sort=1,
|
||||
)
|
||||
)
|
||||
.addTemplate(
|
||||
template.new(
|
||||
'namespace',
|
||||
'$datasource',
|
||||
'label_values(kube_pod_info{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
|
||||
label='Namespace',
|
||||
refresh='time',
|
||||
sort=1,
|
||||
)
|
||||
)
|
||||
.addTemplate(
|
||||
template.new(
|
||||
'pod',
|
||||
'$datasource',
|
||||
'label_values(kube_pod_info{%(clusterLabel)s="$cluster", namespace=~"$namespace"}, pod)' % $._config,
|
||||
label='Pod',
|
||||
refresh='time',
|
||||
sort=1,
|
||||
)
|
||||
)
|
||||
.addTemplate(
|
||||
template.new(
|
||||
'container',
|
||||
'$datasource',
|
||||
'label_values(kube_pod_container_info{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}, container)' % $._config,
|
||||
label='Container',
|
||||
refresh='time',
|
||||
includeAll=true,
|
||||
sort=1,
|
||||
)
|
||||
)
|
||||
.addAnnotation(restartAnnotation)
|
||||
.addRow(memoryRow)
|
||||
.addRow(cpuRow)
|
||||
.addRow(networkRow)
|
||||
.addRow(restartsRow),
|
||||
},
|
||||
}
|
File diff suppressed because it is too large
Load diff
274
monitoring/vendor/kubernetes-mixin/dashboards/resources/cluster.libsonnet
vendored
Normal file
274
monitoring/vendor/kubernetes-mixin/dashboards/resources/cluster.libsonnet
vendored
Normal file
|
@ -0,0 +1,274 @@
|
|||
local g = import 'grafana-builder/grafana.libsonnet';
|
||||
local grafana = import 'grafonnet/grafana.libsonnet';
|
||||
local template = grafana.template;
|
||||
|
||||
{
|
||||
grafanaDashboards+:: {
|
||||
local intervalTemplate =
|
||||
template.new(
|
||||
name='interval',
|
||||
datasource='$datasource',
|
||||
query='4h',
|
||||
current='5m',
|
||||
hide=2,
|
||||
refresh=2,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
) + {
|
||||
auto: false,
|
||||
auto_count: 30,
|
||||
auto_min: '10s',
|
||||
skipUrlSync: false,
|
||||
type: 'interval',
|
||||
options: [
|
||||
{
|
||||
selected: true,
|
||||
text: '4h',
|
||||
value: '4h',
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
'k8s-resources-cluster.json':
|
||||
local tableStyles = {
|
||||
namespace: {
|
||||
alias: 'Namespace',
|
||||
link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') },
|
||||
linkTooltip: 'Drill down to pods',
|
||||
},
|
||||
'Value #A': {
|
||||
alias: 'Pods',
|
||||
linkTooltip: 'Drill down to pods',
|
||||
link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') },
|
||||
decimals: 0,
|
||||
},
|
||||
'Value #B': {
|
||||
alias: 'Workloads',
|
||||
linkTooltip: 'Drill down to workloads',
|
||||
link: '%(prefix)s/d/%(uid)s/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-workloads-namespace.json') },
|
||||
decimals: 0,
|
||||
},
|
||||
};
|
||||
|
||||
local podWorkloadColumns = [
|
||||
'count(mixin_pod_workload{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||
'count(avg(mixin_pod_workload{%(clusterLabel)s="$cluster"}) by (workload, namespace)) by (namespace)' % $._config,
|
||||
];
|
||||
|
||||
local networkColumns = [
|
||||
'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
|
||||
'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
|
||||
'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
|
||||
'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
|
||||
'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
|
||||
'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config,
|
||||
];
|
||||
|
||||
local networkTableStyles = {
|
||||
namespace: {
|
||||
alias: 'Namespace',
|
||||
link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') },
|
||||
linkTooltip: 'Drill down to pods',
|
||||
},
|
||||
'Value #A': {
|
||||
alias: 'Current Receive Bandwidth',
|
||||
unit: 'Bps',
|
||||
},
|
||||
'Value #B': {
|
||||
alias: 'Current Transmit Bandwidth',
|
||||
unit: 'Bps',
|
||||
},
|
||||
'Value #C': {
|
||||
alias: 'Rate of Received Packets',
|
||||
unit: 'pps',
|
||||
},
|
||||
'Value #D': {
|
||||
alias: 'Rate of Transmitted Packets',
|
||||
unit: 'pps',
|
||||
},
|
||||
'Value #E': {
|
||||
alias: 'Rate of Received Packets Dropped',
|
||||
unit: 'pps',
|
||||
},
|
||||
'Value #F': {
|
||||
alias: 'Rate of Transmitted Packets Dropped',
|
||||
unit: 'pps',
|
||||
},
|
||||
};
|
||||
|
||||
g.dashboard(
|
||||
'%(dashboardNamePrefix)sCompute Resources / Cluster' % $._config.grafanaK8s,
|
||||
uid=($._config.grafanaDashboardIDs['k8s-resources-cluster.json']),
|
||||
).addTemplate('cluster', 'node_cpu_seconds_total', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
|
||||
.addRow(
|
||||
(g.row('Headlines') +
|
||||
{
|
||||
height: '100px',
|
||||
showTitle: false,
|
||||
})
|
||||
.addPanel(
|
||||
g.panel('CPU Utilisation') +
|
||||
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle", %(clusterLabel)s="$cluster"}[1m]))' % $._config)
|
||||
)
|
||||
.addPanel(
|
||||
g.panel('CPU Requests Commitment') +
|
||||
g.statPanel('sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{%(clusterLabel)s="$cluster"})' % $._config)
|
||||
)
|
||||
.addPanel(
|
||||
g.panel('CPU Limits Commitment') +
|
||||
g.statPanel('sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{%(clusterLabel)s="$cluster"})' % $._config)
|
||||
)
|
||||
.addPanel(
|
||||
g.panel('Memory Utilisation') +
|
||||
g.statPanel('1 - sum(:node_memory_MemAvailable_bytes:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config)
|
||||
)
|
||||
.addPanel(
|
||||
g.panel('Memory Requests Commitment') +
|
||||
g.statPanel('sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config)
|
||||
)
|
||||
.addPanel(
|
||||
g.panel('Memory Limits Commitment') +
|
||||
g.statPanel('sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{%(clusterLabel)s="$cluster"})' % $._config)
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('CPU')
|
||||
.addPanel(
|
||||
g.panel('CPU Usage') +
|
||||
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.stack
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('CPU Quota')
|
||||
.addPanel(
|
||||
g.panel('CPU Quota') +
|
||||
g.tablePanel(podWorkloadColumns + [
|
||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||
'sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||
'sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||
], tableStyles {
|
||||
'Value #C': { alias: 'CPU Usage' },
|
||||
'Value #D': { alias: 'CPU Requests' },
|
||||
'Value #E': { alias: 'CPU Requests %', unit: 'percentunit' },
|
||||
'Value #F': { alias: 'CPU Limits' },
|
||||
'Value #G': { alias: 'CPU Limits %', unit: 'percentunit' },
|
||||
})
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Memory')
|
||||
.addPanel(
|
||||
g.panel('Memory Usage (w/o cache)') +
|
||||
// Not using container_memory_usage_bytes here because that includes page cache
|
||||
g.queryPanel('sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('bytes') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Memory Requests')
|
||||
.addPanel(
|
||||
g.panel('Requests by Namespace') +
|
||||
g.tablePanel(podWorkloadColumns + [
|
||||
// Not using container_memory_usage_bytes here because that includes page cache
|
||||
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace)' % $._config,
|
||||
'sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||
'sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
|
||||
], tableStyles {
|
||||
'Value #C': { alias: 'Memory Usage', unit: 'bytes' },
|
||||
'Value #D': { alias: 'Memory Requests', unit: 'bytes' },
|
||||
'Value #E': { alias: 'Memory Requests %', unit: 'percentunit' },
|
||||
'Value #F': { alias: 'Memory Limits', unit: 'bytes' },
|
||||
'Value #G': { alias: 'Memory Limits %', unit: 'percentunit' },
|
||||
})
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Current Network Usage') +
|
||||
g.tablePanel(
|
||||
networkColumns,
|
||||
networkTableStyles
|
||||
),
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Receive Bandwidth') +
|
||||
g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Transmit Bandwidth') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Average Container Bandwidth by Namespace: Received') +
|
||||
g.queryPanel('avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Average Container Bandwidth by Namespace: Transmitted') +
|
||||
g.queryPanel('avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Received Packets') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Transmitted Packets') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Received Packets Dropped') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Transmitted Packets Dropped') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } },
|
||||
}
|
||||
}
|
107
monitoring/vendor/kubernetes-mixin/dashboards/resources/multi-cluster.libsonnet
vendored
Normal file
107
monitoring/vendor/kubernetes-mixin/dashboards/resources/multi-cluster.libsonnet
vendored
Normal file
|
@ -0,0 +1,107 @@
|
|||
local g = import 'grafana-builder/grafana.libsonnet';
|
||||
local grafana = import 'grafonnet/grafana.libsonnet';
|
||||
local template = grafana.template;
|
||||
|
||||
{
|
||||
grafanaDashboards+::
|
||||
if $._config.showMultiCluster then {
|
||||
'k8s-resources-multicluster.json':
|
||||
local tableStyles = {
|
||||
[$._config.clusterLabel]: {
|
||||
alias: 'Cluster',
|
||||
link: '%(prefix)s/d/%(uid)s/k8s-resources-cluster?var-datasource=$datasource&var-cluster=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-cluster.json') },
|
||||
},
|
||||
};
|
||||
|
||||
g.dashboard(
|
||||
'%(dashboardNamePrefix)sCompute Resources / Multi-Cluster' % $._config.grafanaK8s,
|
||||
uid=($._config.grafanaDashboardIDs['k8s-resources-multicluster.json']),
|
||||
).addRow(
|
||||
(g.row('Headlines') +
|
||||
{
|
||||
height: '100px',
|
||||
showTitle: false,
|
||||
})
|
||||
.addPanel(
|
||||
g.panel('CPU Utilisation') +
|
||||
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle"}[1m]))' % $._config)
|
||||
)
|
||||
.addPanel(
|
||||
g.panel('CPU Requests Commitment') +
|
||||
g.statPanel('sum(kube_pod_container_resource_requests_cpu_cores) / sum(kube_node_status_allocatable_cpu_cores)' % $._config)
|
||||
)
|
||||
.addPanel(
|
||||
g.panel('CPU Limits Commitment') +
|
||||
g.statPanel('sum(kube_pod_container_resource_limits_cpu_cores) / sum(kube_node_status_allocatable_cpu_cores)' % $._config)
|
||||
)
|
||||
.addPanel(
|
||||
g.panel('Memory Utilisation') +
|
||||
g.statPanel('1 - sum(:node_memory_MemAvailable_bytes:sum) / sum(kube_node_status_allocatable_memory_bytes)' % $._config)
|
||||
)
|
||||
.addPanel(
|
||||
g.panel('Memory Requests Commitment') +
|
||||
g.statPanel('sum(kube_pod_container_resource_requests_memory_bytes) / sum(kube_node_status_allocatable_memory_bytes)' % $._config)
|
||||
)
|
||||
.addPanel(
|
||||
g.panel('Memory Limits Commitment') +
|
||||
g.statPanel('sum(kube_pod_container_resource_limits_memory_bytes) / sum(kube_node_status_allocatable_memory_bytes)' % $._config)
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('CPU')
|
||||
.addPanel(
|
||||
g.panel('CPU Usage') +
|
||||
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config)
|
||||
+ { fill: 0, linewidth: 2 },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('CPU Quota')
|
||||
.addPanel(
|
||||
g.panel('CPU Quota') +
|
||||
g.tablePanel([
|
||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config,
|
||||
'sum(kube_pod_container_resource_requests_cpu_cores) by (%(clusterLabel)s)' % $._config,
|
||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests_cpu_cores) by (%(clusterLabel)s)' % $._config,
|
||||
'sum(kube_pod_container_resource_limits_cpu_cores) by (%(clusterLabel)s)' % $._config,
|
||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits_cpu_cores) by (%(clusterLabel)s)' % $._config,
|
||||
], tableStyles {
|
||||
'Value #A': { alias: 'CPU Usage' },
|
||||
'Value #B': { alias: 'CPU Requests' },
|
||||
'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' },
|
||||
'Value #D': { alias: 'CPU Limits' },
|
||||
'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' },
|
||||
})
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Memory')
|
||||
.addPanel(
|
||||
g.panel('Memory Usage (w/o cache)') +
|
||||
// Not using container_memory_usage_bytes here because that includes page cache
|
||||
g.queryPanel('sum(container_memory_rss{container!=""}) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config) +
|
||||
{ fill: 0, linewidth: 2, yaxes: g.yaxes('bytes') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Memory Requests')
|
||||
.addPanel(
|
||||
g.panel('Requests by Namespace') +
|
||||
g.tablePanel([
|
||||
// Not using container_memory_usage_bytes here because that includes page cache
|
||||
'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s)' % $._config,
|
||||
'sum(kube_pod_container_resource_requests_memory_bytes) by (%(clusterLabel)s)' % $._config,
|
||||
'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests_memory_bytes) by (%(clusterLabel)s)' % $._config,
|
||||
'sum(kube_pod_container_resource_limits_memory_bytes) by (%(clusterLabel)s)' % $._config,
|
||||
'sum(container_memory_rss{container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits_memory_bytes) by (%(clusterLabel)s)' % $._config,
|
||||
], tableStyles {
|
||||
'Value #A': { alias: 'Memory Usage', unit: 'bytes' },
|
||||
'Value #B': { alias: 'Memory Requests', unit: 'bytes' },
|
||||
'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' },
|
||||
'Value #D': { alias: 'Memory Limits', unit: 'bytes' },
|
||||
'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' },
|
||||
})
|
||||
)
|
||||
) + { tags: $._config.grafanaK8s.dashboardTags },
|
||||
} else {},
|
||||
}
|
213
monitoring/vendor/kubernetes-mixin/dashboards/resources/namespace.libsonnet
vendored
Normal file
213
monitoring/vendor/kubernetes-mixin/dashboards/resources/namespace.libsonnet
vendored
Normal file
|
@ -0,0 +1,213 @@
|
|||
local g = import 'grafana-builder/grafana.libsonnet';
|
||||
local grafana = import 'grafonnet/grafana.libsonnet';
|
||||
local template = grafana.template;
|
||||
|
||||
{
|
||||
grafanaDashboards+:: {
|
||||
local intervalTemplate =
|
||||
template.new(
|
||||
name='interval',
|
||||
datasource='$datasource',
|
||||
query='4h',
|
||||
current='5m',
|
||||
hide=2,
|
||||
refresh=2,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
) + {
|
||||
auto: false,
|
||||
auto_count: 30,
|
||||
auto_min: '10s',
|
||||
skipUrlSync: false,
|
||||
type: 'interval',
|
||||
options: [
|
||||
{
|
||||
selected: true,
|
||||
text: '4h',
|
||||
value: '4h',
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
'k8s-resources-namespace.json':
|
||||
local tableStyles = {
|
||||
pod: {
|
||||
alias: 'Pod',
|
||||
link: '%(prefix)s/d/%(uid)s/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-pod.json') },
|
||||
},
|
||||
};
|
||||
|
||||
local networkColumns = [
|
||||
'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config,
|
||||
];
|
||||
|
||||
local networkTableStyles = {
|
||||
pod: {
|
||||
alias: 'Pod',
|
||||
link: '%(prefix)s/d/%(uid)s/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-pod.json') },
|
||||
linkTooltip: 'Drill down to pods',
|
||||
},
|
||||
'Value #A': {
|
||||
alias: 'Current Receive Bandwidth',
|
||||
unit: 'Bps',
|
||||
},
|
||||
'Value #B': {
|
||||
alias: 'Current Transmit Bandwidth',
|
||||
unit: 'Bps',
|
||||
},
|
||||
'Value #C': {
|
||||
alias: 'Rate of Received Packets',
|
||||
unit: 'pps',
|
||||
},
|
||||
'Value #D': {
|
||||
alias: 'Rate of Transmitted Packets',
|
||||
unit: 'pps',
|
||||
},
|
||||
'Value #E': {
|
||||
alias: 'Rate of Received Packets Dropped',
|
||||
unit: 'pps',
|
||||
},
|
||||
'Value #F': {
|
||||
alias: 'Rate of Transmitted Packets Dropped',
|
||||
unit: 'pps',
|
||||
},
|
||||
};
|
||||
|
||||
g.dashboard(
|
||||
'%(dashboardNamePrefix)sCompute Resources / Namespace (Pods)' % $._config.grafanaK8s,
|
||||
uid=($._config.grafanaDashboardIDs['k8s-resources-namespace.json']),
|
||||
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
|
||||
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')
|
||||
.addRow(
|
||||
g.row('CPU Usage')
|
||||
.addPanel(
|
||||
g.panel('CPU Usage') +
|
||||
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack,
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('CPU Quota')
|
||||
.addPanel(
|
||||
g.panel('CPU Quota') +
|
||||
g.tablePanel([
|
||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config,
|
||||
'sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config,
|
||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config,
|
||||
'sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config,
|
||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config,
|
||||
], tableStyles {
|
||||
'Value #A': { alias: 'CPU Usage' },
|
||||
'Value #B': { alias: 'CPU Requests' },
|
||||
'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' },
|
||||
'Value #D': { alias: 'CPU Limits' },
|
||||
'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' },
|
||||
})
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Memory Usage')
|
||||
.addPanel(
|
||||
g.panel('Memory Usage (w/o cache)') +
|
||||
// Like above, without page cache
|
||||
g.queryPanel('sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container!=""}) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('bytes') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Memory Quota')
|
||||
.addPanel(
|
||||
g.panel('Memory Quota') +
|
||||
g.tablePanel([
|
||||
'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config,
|
||||
'sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config,
|
||||
'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace="$namespace"}) by (pod)' % $._config,
|
||||
'sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config,
|
||||
'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace="$namespace"}) by (pod)' % $._config,
|
||||
'sum(container_memory_rss{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config,
|
||||
'sum(container_memory_cache{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config,
|
||||
'sum(container_memory_swap{%(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config,
|
||||
], tableStyles {
|
||||
'Value #A': { alias: 'Memory Usage', unit: 'bytes' },
|
||||
'Value #B': { alias: 'Memory Requests', unit: 'bytes' },
|
||||
'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' },
|
||||
'Value #D': { alias: 'Memory Limits', unit: 'bytes' },
|
||||
'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' },
|
||||
'Value #F': { alias: 'Memory Usage (RSS)', unit: 'bytes' },
|
||||
'Value #G': { alias: 'Memory Usage (Cache)', unit: 'bytes' },
|
||||
'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' },
|
||||
})
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Current Network Usage') +
|
||||
g.tablePanel(
|
||||
networkColumns,
|
||||
networkTableStyles
|
||||
),
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Receive Bandwidth') +
|
||||
g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Transmit Bandwidth') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Received Packets') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Transmitted Packets') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Received Packets Dropped') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Transmitted Packets Dropped') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } },
|
||||
}
|
||||
}
|
107
monitoring/vendor/kubernetes-mixin/dashboards/resources/node.libsonnet
vendored
Normal file
107
monitoring/vendor/kubernetes-mixin/dashboards/resources/node.libsonnet
vendored
Normal file
|
@ -0,0 +1,107 @@
|
|||
local g = import 'grafana-builder/grafana.libsonnet';
|
||||
local grafana = import 'grafonnet/grafana.libsonnet';
|
||||
local template = grafana.template;
|
||||
|
||||
{
|
||||
grafanaDashboards+:: {
|
||||
local intervalTemplate =
|
||||
template.new(
|
||||
name='interval',
|
||||
datasource='$datasource',
|
||||
query='4h',
|
||||
current='5m',
|
||||
hide=2,
|
||||
refresh=2,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
) + {
|
||||
auto: false,
|
||||
auto_count: 30,
|
||||
auto_min: '10s',
|
||||
skipUrlSync: false,
|
||||
type: 'interval',
|
||||
options: [
|
||||
{
|
||||
selected: true,
|
||||
text: '4h',
|
||||
value: '4h',
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
'k8s-resources-node.json':
|
||||
local tableStyles = {
|
||||
pod: {
|
||||
alias: 'Pod',
|
||||
},
|
||||
};
|
||||
|
||||
g.dashboard(
|
||||
'%(dashboardNamePrefix)sCompute Resources / Node (Pods)' % $._config.grafanaK8s,
|
||||
uid=($._config.grafanaDashboardIDs['k8s-resources-node.json']),
|
||||
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
|
||||
.addTemplate('node', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'node')
|
||||
.addRow(
|
||||
g.row('CPU Usage')
|
||||
.addPanel(
|
||||
g.panel('CPU Usage') +
|
||||
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack,
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('CPU Quota')
|
||||
.addPanel(
|
||||
g.panel('CPU Quota') +
|
||||
g.tablePanel([
|
||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config,
|
||||
'sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config,
|
||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node="$node"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config,
|
||||
'sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config,
|
||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node="$node"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config,
|
||||
], tableStyles {
|
||||
'Value #A': { alias: 'CPU Usage' },
|
||||
'Value #B': { alias: 'CPU Requests' },
|
||||
'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' },
|
||||
'Value #D': { alias: 'CPU Limits' },
|
||||
'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' },
|
||||
})
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Memory Usage')
|
||||
.addPanel(
|
||||
g.panel('Memory Usage (w/o cache)') +
|
||||
// Like above, without page cache
|
||||
g.queryPanel('sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node="$node", container!=""}) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('bytes') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Memory Quota')
|
||||
.addPanel(
|
||||
g.panel('Memory Quota') +
|
||||
g.tablePanel([
|
||||
'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node="$node",container!=""}) by (pod)' % $._config,
|
||||
'sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config,
|
||||
'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node="$node",container!=""}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node="$node"}) by (pod)' % $._config,
|
||||
'sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster", node="$node"}) by (pod)' % $._config,
|
||||
'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node="$node",container!=""}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node="$node"}) by (pod)' % $._config,
|
||||
'sum(node_namespace_pod_container:container_memory_rss{%(clusterLabel)s="$cluster", node="$node",container!=""}) by (pod)' % $._config,
|
||||
'sum(node_namespace_pod_container:container_memory_cache{%(clusterLabel)s="$cluster", node="$node",container!=""}) by (pod)' % $._config,
|
||||
'sum(node_namespace_pod_container:container_memory_swap{%(clusterLabel)s="$cluster", node="$node",container!=""}) by (pod)' % $._config,
|
||||
], tableStyles {
|
||||
'Value #A': { alias: 'Memory Usage', unit: 'bytes' },
|
||||
'Value #B': { alias: 'Memory Requests', unit: 'bytes' },
|
||||
'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' },
|
||||
'Value #D': { alias: 'Memory Limits', unit: 'bytes' },
|
||||
'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' },
|
||||
'Value #F': { alias: 'Memory Usage (RSS)', unit: 'bytes' },
|
||||
'Value #G': { alias: 'Memory Usage (Cache)', unit: 'bytes' },
|
||||
'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' },
|
||||
})
|
||||
)
|
||||
) + { tags: $._config.grafanaK8s.dashboardTags },
|
||||
}
|
||||
}
|
169
monitoring/vendor/kubernetes-mixin/dashboards/resources/pod.libsonnet
vendored
Normal file
169
monitoring/vendor/kubernetes-mixin/dashboards/resources/pod.libsonnet
vendored
Normal file
|
@ -0,0 +1,169 @@
|
|||
local g = import 'grafana-builder/grafana.libsonnet';
|
||||
local grafana = import 'grafonnet/grafana.libsonnet';
|
||||
local template = grafana.template;
|
||||
|
||||
{
|
||||
grafanaDashboards+:: {
|
||||
local intervalTemplate =
|
||||
template.new(
|
||||
name='interval',
|
||||
datasource='$datasource',
|
||||
query='4h',
|
||||
current='5m',
|
||||
hide=2,
|
||||
refresh=2,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
) + {
|
||||
auto: false,
|
||||
auto_count: 30,
|
||||
auto_min: '10s',
|
||||
skipUrlSync: false,
|
||||
type: 'interval',
|
||||
options: [
|
||||
{
|
||||
selected: true,
|
||||
text: '4h',
|
||||
value: '4h',
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
'k8s-resources-pod.json':
|
||||
local tableStyles = {
|
||||
container: {
|
||||
alias: 'Container',
|
||||
},
|
||||
};
|
||||
|
||||
g.dashboard(
|
||||
'%(dashboardNamePrefix)sCompute Resources / Pod' % $._config.grafanaK8s,
|
||||
uid=($._config.grafanaDashboardIDs['k8s-resources-pod.json']),
|
||||
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
|
||||
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')
|
||||
.addTemplate('pod', 'kube_pod_info{%(clusterLabel)s="$cluster", namespace="$namespace"}' % $._config, 'pod')
|
||||
.addRow(
|
||||
g.row('CPU Usage')
|
||||
.addPanel(
|
||||
g.panel('CPU Usage') +
|
||||
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace="$namespace", pod="$pod", container!="POD", %(clusterLabel)s="$cluster"}) by (container)' % $._config, '{{container}}') +
|
||||
g.stack,
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('CPU Quota')
|
||||
.addPanel(
|
||||
g.panel('CPU Quota') +
|
||||
g.tablePanel([
|
||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD"}) by (container)' % $._config,
|
||||
'sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config,
|
||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config,
|
||||
'sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config,
|
||||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config,
|
||||
], tableStyles {
|
||||
'Value #A': { alias: 'CPU Usage' },
|
||||
'Value #B': { alias: 'CPU Requests' },
|
||||
'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' },
|
||||
'Value #D': { alias: 'CPU Limits' },
|
||||
'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' },
|
||||
})
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Memory Usage')
|
||||
.addPanel(
|
||||
g.panel('Memory Usage') +
|
||||
g.queryPanel([
|
||||
'sum(container_memory_rss{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config,
|
||||
'sum(container_memory_cache{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config,
|
||||
'sum(container_memory_swap{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config,
|
||||
], [
|
||||
'{{container}} (RSS)',
|
||||
'{{container}} (Cache)',
|
||||
'{{container}} (Swap)',
|
||||
]) +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('bytes') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Memory Quota')
|
||||
.addPanel(
|
||||
g.panel('Memory Quota') +
|
||||
g.tablePanel([
|
||||
'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)' % $._config,
|
||||
'sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config,
|
||||
'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace="$namespace", pod="$pod"}) by (container)' % $._config,
|
||||
'sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config,
|
||||
'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace="$namespace", pod="$pod"}) by (container)' % $._config,
|
||||
'sum(container_memory_rss{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container != "", container != "POD"}) by (container)' % $._config,
|
||||
'sum(container_memory_cache{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container != "", container != "POD"}) by (container)' % $._config,
|
||||
'sum(container_memory_swap{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container != "", container != "POD"}) by (container)' % $._config,
|
||||
], tableStyles {
|
||||
'Value #A': { alias: 'Memory Usage', unit: 'bytes' },
|
||||
'Value #B': { alias: 'Memory Requests', unit: 'bytes' },
|
||||
'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' },
|
||||
'Value #D': { alias: 'Memory Limits', unit: 'bytes' },
|
||||
'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' },
|
||||
'Value #F': { alias: 'Memory Usage (RSS)', unit: 'bytes' },
|
||||
'Value #G': { alias: 'Memory Usage (Cache)', unit: 'bytes' },
|
||||
'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' },
|
||||
})
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Receive Bandwidth') +
|
||||
g.queryPanel('sum(irate(container_network_receive_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$interval])) by (pod)', '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Transmit Bandwidth') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$interval])) by (pod)', '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Received Packets') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_total{namespace=~"$namespace", pod=~"$pod"}[$interval])) by (pod)', '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Transmitted Packets') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_packets_total{namespace=~"$namespace", pod=~"$pod"}[$interval])) by (pod)', '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Received Packets Dropped') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$interval])) by (pod)', '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Transmitted Packets Dropped') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$interval])) by (pod)', '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } },
|
||||
}
|
||||
}
|
338
monitoring/vendor/kubernetes-mixin/dashboards/resources/workload-namespace.libsonnet
vendored
Normal file
338
monitoring/vendor/kubernetes-mixin/dashboards/resources/workload-namespace.libsonnet
vendored
Normal file
|
@ -0,0 +1,338 @@
|
|||
local g = import 'grafana-builder/grafana.libsonnet';
|
||||
local grafana = import 'grafonnet/grafana.libsonnet';
|
||||
local template = grafana.template;
|
||||
|
||||
{
|
||||
grafanaDashboards+:: {
|
||||
local intervalTemplate =
|
||||
template.new(
|
||||
name='interval',
|
||||
datasource='$datasource',
|
||||
query='4h',
|
||||
current='5m',
|
||||
hide=2,
|
||||
refresh=2,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
) + {
|
||||
auto: false,
|
||||
auto_count: 30,
|
||||
auto_min: '10s',
|
||||
skipUrlSync: false,
|
||||
type: 'interval',
|
||||
options: [
|
||||
{
|
||||
selected: true,
|
||||
text: '4h',
|
||||
value: '4h',
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
local typeTemplate =
|
||||
template.new(
|
||||
name='type',
|
||||
datasource='$datasource',
|
||||
query='label_values(mixin_pod_workload{namespace=~"$namespace", workload=~".+"}, workload_type)',
|
||||
current='deployment',
|
||||
hide='',
|
||||
refresh=1,
|
||||
includeAll=false,
|
||||
sort=0
|
||||
) + {
|
||||
auto: false,
|
||||
auto_count: 30,
|
||||
auto_min: '10s',
|
||||
definition: 'label_values(mixin_pod_workload{namespace=~"$namespace", workload=~".+"}, workload_type)',
|
||||
skipUrlSync: false,
|
||||
},
|
||||
|
||||
'k8s-resources-workloads-namespace.json':
|
||||
local tableStyles = {
|
||||
workload: {
|
||||
alias: 'Workload',
|
||||
link: '%(prefix)s/d/%(uid)s/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-workload.json') },
|
||||
},
|
||||
workload_type: {
|
||||
alias: 'Workload Type',
|
||||
},
|
||||
};
|
||||
|
||||
local networkColumns = [
|
||||
|||
|
||||
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
|||
|
||||
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
|||
|
||||
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
|||
|
||||
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
|||
|
||||
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
|||
|
||||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
];
|
||||
|
||||
local networkTableStyles = {
|
||||
workload: {
|
||||
alias: 'Workload',
|
||||
link: '%(prefix)s/d/%(uid)s/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$type' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-workload.json') },
|
||||
linkTooltip: 'Drill down to pods',
|
||||
},
|
||||
workload_type: {
|
||||
alias: 'Workload Type',
|
||||
},
|
||||
'Value #A': {
|
||||
alias: 'Current Receive Bandwidth',
|
||||
unit: 'Bps',
|
||||
},
|
||||
'Value #B': {
|
||||
alias: 'Current Transmit Bandwidth',
|
||||
unit: 'Bps',
|
||||
},
|
||||
'Value #C': {
|
||||
alias: 'Rate of Received Packets',
|
||||
unit: 'pps',
|
||||
},
|
||||
'Value #D': {
|
||||
alias: 'Rate of Transmitted Packets',
|
||||
unit: 'pps',
|
||||
},
|
||||
'Value #E': {
|
||||
alias: 'Rate of Received Packets Dropped',
|
||||
unit: 'pps',
|
||||
},
|
||||
'Value #F': {
|
||||
alias: 'Rate of Transmitted Packets Dropped',
|
||||
unit: 'pps',
|
||||
},
|
||||
};
|
||||
|
||||
local cpuUsageQuery = |||
|
||||
sum(
|
||||
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}
|
||||
* on(namespace,pod)
|
||||
group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type="$type"}
|
||||
) by (workload, workload_type)
|
||||
||| % $._config;
|
||||
|
||||
local cpuRequestsQuery = |||
|
||||
sum(
|
||||
kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"}
|
||||
* on(namespace,pod)
|
||||
group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type="$type"}
|
||||
) by (workload, workload_type)
|
||||
||| % $._config;
|
||||
|
||||
local podCountQuery = 'count(mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type="$type"}) by (workload, workload_type)' % $._config;
|
||||
local cpuLimitsQuery = std.strReplace(cpuRequestsQuery, 'requests', 'limits');
|
||||
|
||||
local memUsageQuery = |||
|
||||
sum(
|
||||
container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container!=""}
|
||||
* on(namespace,pod)
|
||||
group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type="$type"}
|
||||
) by (workload, workload_type)
|
||||
||| % $._config;
|
||||
local memRequestsQuery = std.strReplace(cpuRequestsQuery, 'cpu_cores', 'memory_bytes');
|
||||
local memLimitsQuery = std.strReplace(cpuLimitsQuery, 'cpu_cores', 'memory_bytes');
|
||||
|
||||
g.dashboard(
|
||||
'%(dashboardNamePrefix)sCompute Resources / Namespace (Workloads)' % $._config.grafanaK8s,
|
||||
uid=($._config.grafanaDashboardIDs['k8s-resources-workloads-namespace.json']),
|
||||
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
|
||||
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')
|
||||
.addRow(
|
||||
g.row('CPU Usage')
|
||||
.addPanel(
|
||||
g.panel('CPU Usage') +
|
||||
g.queryPanel(cpuUsageQuery, '{{workload}} - {{workload_type}}') +
|
||||
g.stack,
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('CPU Quota')
|
||||
.addPanel(
|
||||
g.panel('CPU Quota') +
|
||||
g.tablePanel([
|
||||
podCountQuery,
|
||||
cpuUsageQuery,
|
||||
cpuRequestsQuery,
|
||||
cpuUsageQuery + '/' + cpuRequestsQuery,
|
||||
cpuLimitsQuery,
|
||||
cpuUsageQuery + '/' + cpuLimitsQuery,
|
||||
], tableStyles {
|
||||
'Value #A': { alias: 'Running Pods', decimals: 0 },
|
||||
'Value #B': { alias: 'CPU Usage' },
|
||||
'Value #C': { alias: 'CPU Requests' },
|
||||
'Value #D': { alias: 'CPU Requests %', unit: 'percentunit' },
|
||||
'Value #E': { alias: 'CPU Limits' },
|
||||
'Value #F': { alias: 'CPU Limits %', unit: 'percentunit' },
|
||||
})
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Memory Usage')
|
||||
.addPanel(
|
||||
g.panel('Memory Usage') +
|
||||
g.queryPanel(memUsageQuery, '{{workload}} - {{workload_type}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('bytes') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Memory Quota')
|
||||
.addPanel(
|
||||
g.panel('Memory Quota') +
|
||||
g.tablePanel([
|
||||
podCountQuery,
|
||||
memUsageQuery,
|
||||
memRequestsQuery,
|
||||
memUsageQuery + '/' + memRequestsQuery,
|
||||
memLimitsQuery,
|
||||
memUsageQuery + '/' + memLimitsQuery,
|
||||
], tableStyles {
|
||||
'Value #A': { alias: 'Running Pods', decimals: 0 },
|
||||
'Value #B': { alias: 'Memory Usage', unit: 'bytes' },
|
||||
'Value #C': { alias: 'Memory Requests', unit: 'bytes' },
|
||||
'Value #D': { alias: 'Memory Requests %', unit: 'percentunit' },
|
||||
'Value #E': { alias: 'Memory Limits', unit: 'bytes' },
|
||||
'Value #F': { alias: 'Memory Limits %', unit: 'percentunit' },
|
||||
})
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Current Network Usage') +
|
||||
g.tablePanel(
|
||||
networkColumns,
|
||||
networkTableStyles
|
||||
),
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Receive Bandwidth') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config, '{{workload}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Transmit Bandwidth') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config, '{{workload}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Average Container Bandwidth by Workload: Received') +
|
||||
g.queryPanel(|||
|
||||
(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config, '{{workload}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Average Container Bandwidth by Workload: Transmitted') +
|
||||
g.queryPanel(|||
|
||||
(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config, '{{workload}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Received Packets') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config, '{{workload}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Transmitted Packets') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config, '{{workload}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Received Packets Dropped') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config, '{{workload}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Transmitted Packets Dropped') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config, '{{workload}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate, typeTemplate] } },
|
||||
|
||||
}
|
||||
}
|
310
monitoring/vendor/kubernetes-mixin/dashboards/resources/workload.libsonnet
vendored
Normal file
310
monitoring/vendor/kubernetes-mixin/dashboards/resources/workload.libsonnet
vendored
Normal file
|
@ -0,0 +1,310 @@
|
|||
local g = import 'grafana-builder/grafana.libsonnet';
|
||||
local grafana = import 'grafonnet/grafana.libsonnet';
|
||||
local template = grafana.template;
|
||||
|
||||
{
|
||||
grafanaDashboards+:: {
|
||||
local intervalTemplate =
|
||||
template.new(
|
||||
name='interval',
|
||||
datasource='$datasource',
|
||||
query='4h',
|
||||
current='5m',
|
||||
hide=2,
|
||||
refresh=2,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
) + {
|
||||
auto: false,
|
||||
auto_count: 30,
|
||||
auto_min: '10s',
|
||||
skipUrlSync: false,
|
||||
type: 'interval',
|
||||
options: [
|
||||
{
|
||||
selected: true,
|
||||
text: '4h',
|
||||
value: '4h',
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
'k8s-resources-workload.json':
|
||||
local tableStyles = {
|
||||
pod: {
|
||||
alias: 'Pod',
|
||||
link: '%(prefix)s/d/%(uid)s/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-pod.json') },
|
||||
},
|
||||
};
|
||||
|
||||
local networkColumns = [
|
||||
|||
|
||||
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
|||
|
||||
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
|||
|
||||
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
|||
|
||||
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
|||
|
||||
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
|||
|
||||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
];
|
||||
|
||||
local networkTableStyles = {
|
||||
pod: {
|
||||
alias: 'Pod',
|
||||
link: '%(prefix)s/d/%(uid)s/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-pod.json') },
|
||||
},
|
||||
'Value #A': {
|
||||
alias: 'Current Receive Bandwidth',
|
||||
unit: 'Bps',
|
||||
},
|
||||
'Value #B': {
|
||||
alias: 'Current Transmit Bandwidth',
|
||||
unit: 'Bps',
|
||||
},
|
||||
'Value #C': {
|
||||
alias: 'Rate of Received Packets',
|
||||
unit: 'pps',
|
||||
},
|
||||
'Value #D': {
|
||||
alias: 'Rate of Transmitted Packets',
|
||||
unit: 'pps',
|
||||
},
|
||||
'Value #E': {
|
||||
alias: 'Rate of Received Packets Dropped',
|
||||
unit: 'pps',
|
||||
},
|
||||
'Value #F': {
|
||||
alias: 'Rate of Transmitted Packets Dropped',
|
||||
unit: 'pps',
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
local cpuUsageQuery = |||
|
||||
sum(
|
||||
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}
|
||||
* on(namespace,pod)
|
||||
group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type="$type"}
|
||||
) by (pod)
|
||||
||| % $._config;
|
||||
|
||||
local cpuRequestsQuery = |||
|
||||
sum(
|
||||
kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"}
|
||||
* on(namespace,pod)
|
||||
group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type="$type"}
|
||||
) by (pod)
|
||||
||| % $._config;
|
||||
|
||||
local cpuLimitsQuery = std.strReplace(cpuRequestsQuery, 'requests', 'limits');
|
||||
|
||||
local memUsageQuery = |||
|
||||
sum(
|
||||
container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container!=""}
|
||||
* on(namespace,pod)
|
||||
group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type="$type"}
|
||||
) by (pod)
|
||||
||| % $._config;
|
||||
local memRequestsQuery = std.strReplace(cpuRequestsQuery, 'cpu_cores', 'memory_bytes');
|
||||
local memLimitsQuery = std.strReplace(cpuLimitsQuery, 'cpu_cores', 'memory_bytes');
|
||||
|
||||
g.dashboard(
|
||||
'%(dashboardNamePrefix)sCompute Resources / Workload' % $._config.grafanaK8s,
|
||||
uid=($._config.grafanaDashboardIDs['k8s-resources-workload.json']),
|
||||
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
|
||||
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')
|
||||
.addTemplate('workload', 'mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace"}' % $._config, 'workload')
|
||||
.addTemplate('type', 'mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload"}' % $._config, 'workload_type')
|
||||
.addRow(
|
||||
g.row('CPU Usage')
|
||||
.addPanel(
|
||||
g.panel('CPU Usage') +
|
||||
g.queryPanel(cpuUsageQuery, '{{pod}}') +
|
||||
g.stack,
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('CPU Quota')
|
||||
.addPanel(
|
||||
g.panel('CPU Quota') +
|
||||
g.tablePanel([
|
||||
cpuUsageQuery,
|
||||
cpuRequestsQuery,
|
||||
cpuUsageQuery + '/' + cpuRequestsQuery,
|
||||
cpuLimitsQuery,
|
||||
cpuUsageQuery + '/' + cpuLimitsQuery,
|
||||
], tableStyles {
|
||||
'Value #A': { alias: 'CPU Usage' },
|
||||
'Value #B': { alias: 'CPU Requests' },
|
||||
'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' },
|
||||
'Value #D': { alias: 'CPU Limits' },
|
||||
'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' },
|
||||
})
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Memory Usage')
|
||||
.addPanel(
|
||||
g.panel('Memory Usage') +
|
||||
g.queryPanel(memUsageQuery, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('bytes') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Memory Quota')
|
||||
.addPanel(
|
||||
g.panel('Memory Quota') +
|
||||
g.tablePanel([
|
||||
memUsageQuery,
|
||||
memRequestsQuery,
|
||||
memUsageQuery + '/' + memRequestsQuery,
|
||||
memLimitsQuery,
|
||||
memUsageQuery + '/' + memLimitsQuery,
|
||||
], tableStyles {
|
||||
'Value #A': { alias: 'Memory Usage', unit: 'bytes' },
|
||||
'Value #B': { alias: 'Memory Requests', unit: 'bytes' },
|
||||
'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' },
|
||||
'Value #D': { alias: 'Memory Limits', unit: 'bytes' },
|
||||
'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' },
|
||||
})
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Current Network Usage') +
|
||||
g.tablePanel(
|
||||
networkColumns,
|
||||
networkTableStyles
|
||||
),
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Receive Bandwidth') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Transmit Bandwidth') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Average Container Bandwidth by Pod: Received') +
|
||||
g.queryPanel(|||
|
||||
(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Average Container Bandwidth by Pod: Transmitted') +
|
||||
g.queryPanel(|||
|
||||
(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Received Packets') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Transmitted Packets') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Received Packets Dropped') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
)
|
||||
.addRow(
|
||||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Transmitted Packets Dropped') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$interval])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [intervalTemplate] } },
|
||||
}
|
||||
}
|
|
@ -1,160 +0,0 @@
|
|||
local grafana = import 'grafonnet/grafana.libsonnet';
|
||||
local dashboard = grafana.dashboard;
|
||||
local graphPanel = grafana.graphPanel;
|
||||
local prometheus = grafana.prometheus;
|
||||
local promgrafonnet = import '../lib/promgrafonnet/promgrafonnet.libsonnet';
|
||||
local row = grafana.row;
|
||||
local singlestat = grafana.singlestat;
|
||||
local template = grafana.template;
|
||||
local numbersinglestat = promgrafonnet.numbersinglestat;
|
||||
|
||||
{
|
||||
grafanaDashboards+:: {
|
||||
'statefulset.json':
|
||||
local cpuStat =
|
||||
numbersinglestat.new(
|
||||
'CPU',
|
||||
'sum(rate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}[3m]))' % $._config,
|
||||
)
|
||||
.withSpanSize(4)
|
||||
.withPostfix('cores')
|
||||
.withSparkline();
|
||||
|
||||
local memoryStat =
|
||||
numbersinglestat.new(
|
||||
'Memory',
|
||||
'sum(container_memory_usage_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}) / 1024^3' % $._config,
|
||||
)
|
||||
.withSpanSize(4)
|
||||
.withPostfix('GB')
|
||||
.withSparkline();
|
||||
|
||||
local networkStat =
|
||||
numbersinglestat.new(
|
||||
'Network',
|
||||
'sum(rate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}[3m])) + sum(rate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace="$namespace",pod=~"$statefulset.*"}[3m]))' % $._config,
|
||||
)
|
||||
.withSpanSize(4)
|
||||
.withPostfix('Bps')
|
||||
.withSparkline();
|
||||
|
||||
local overviewRow =
|
||||
row.new()
|
||||
.addPanel(cpuStat)
|
||||
.addPanel(memoryStat)
|
||||
.addPanel(networkStat);
|
||||
|
||||
local desiredReplicasStat = numbersinglestat.new(
|
||||
'Desired Replicas',
|
||||
'max(kube_statefulset_replicas{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", statefulset="$statefulset"}) without (instance, pod)' % $._config,
|
||||
);
|
||||
|
||||
local availableReplicasStat = numbersinglestat.new(
|
||||
'Replicas of current version',
|
||||
'min(kube_statefulset_status_replicas_current{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", statefulset="$statefulset"}) without (instance, pod)' % $._config,
|
||||
);
|
||||
|
||||
local observedGenerationStat = numbersinglestat.new(
|
||||
'Observed Generation',
|
||||
'max(kube_statefulset_status_observed_generation{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", statefulset="$statefulset"}) without (instance, pod)' % $._config,
|
||||
);
|
||||
|
||||
local metadataGenerationStat = numbersinglestat.new(
|
||||
'Metadata Generation',
|
||||
'max(kube_statefulset_metadata_generation{%(kubeStateMetricsSelector)s, statefulset="$statefulset", %(clusterLabel)s="$cluster", namespace="$namespace"}) without (instance, pod)' % $._config,
|
||||
);
|
||||
|
||||
local statsRow =
|
||||
row.new(height='100px')
|
||||
.addPanel(desiredReplicasStat)
|
||||
.addPanel(availableReplicasStat)
|
||||
.addPanel(observedGenerationStat)
|
||||
.addPanel(metadataGenerationStat);
|
||||
|
||||
local replicasGraph =
|
||||
graphPanel.new(
|
||||
'Replicas',
|
||||
datasource='$datasource',
|
||||
)
|
||||
.addTarget(prometheus.target(
|
||||
'max(kube_statefulset_replicas{%(kubeStateMetricsSelector)s, statefulset="$statefulset", %(clusterLabel)s="$cluster", namespace="$namespace"}) without (instance, pod)' % $._config,
|
||||
legendFormat='replicas specified',
|
||||
))
|
||||
.addTarget(prometheus.target(
|
||||
'max(kube_statefulset_status_replicas{%(kubeStateMetricsSelector)s, statefulset="$statefulset", %(clusterLabel)s="$cluster", namespace="$namespace"}) without (instance, pod)' % $._config,
|
||||
legendFormat='replicas created',
|
||||
))
|
||||
.addTarget(prometheus.target(
|
||||
'min(kube_statefulset_status_replicas_ready{%(kubeStateMetricsSelector)s, statefulset="$statefulset", %(clusterLabel)s="$cluster", namespace="$namespace"}) without (instance, pod)' % $._config,
|
||||
legendFormat='ready',
|
||||
))
|
||||
.addTarget(prometheus.target(
|
||||
'min(kube_statefulset_status_replicas_current{%(kubeStateMetricsSelector)s, statefulset="$statefulset", %(clusterLabel)s="$cluster", namespace="$namespace"}) without (instance, pod)' % $._config,
|
||||
legendFormat='replicas of current version',
|
||||
))
|
||||
.addTarget(prometheus.target(
|
||||
'min(kube_statefulset_status_replicas_updated{%(kubeStateMetricsSelector)s, statefulset="$statefulset", %(clusterLabel)s="$cluster", namespace="$namespace"}) without (instance, pod)' % $._config,
|
||||
legendFormat='updated',
|
||||
));
|
||||
|
||||
local replicasRow =
|
||||
row.new()
|
||||
.addPanel(replicasGraph);
|
||||
|
||||
dashboard.new(
|
||||
'%(dashboardNamePrefix)sStatefulSets' % $._config.grafanaK8s,
|
||||
time_from='now-1h',
|
||||
uid=($._config.grafanaDashboardIDs['statefulset.json']),
|
||||
tags=($._config.grafanaK8s.dashboardTags),
|
||||
).addTemplate(
|
||||
{
|
||||
current: {
|
||||
text: 'default',
|
||||
value: 'default',
|
||||
},
|
||||
hide: 0,
|
||||
label: null,
|
||||
name: 'datasource',
|
||||
options: [],
|
||||
query: 'prometheus',
|
||||
refresh: 1,
|
||||
regex: '',
|
||||
type: 'datasource',
|
||||
},
|
||||
)
|
||||
.addTemplate(
|
||||
template.new(
|
||||
'cluster',
|
||||
'$datasource',
|
||||
'label_values(kube_statefulset_metadata_generation, %s)' % $._config.clusterLabel,
|
||||
label='cluster',
|
||||
refresh='time',
|
||||
hide=if $._config.showMultiCluster then '' else 'variable',
|
||||
sort=1,
|
||||
)
|
||||
)
|
||||
.addTemplate(
|
||||
template.new(
|
||||
'namespace',
|
||||
'$datasource',
|
||||
'label_values(kube_statefulset_metadata_generation{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster"}, namespace)' % $._config,
|
||||
label='Namespace',
|
||||
refresh='time',
|
||||
sort=1,
|
||||
)
|
||||
)
|
||||
.addTemplate(
|
||||
template.new(
|
||||
'statefulset',
|
||||
'$datasource',
|
||||
'label_values(kube_statefulset_metadata_generation{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}, statefulset)' % $._config,
|
||||
label='Name',
|
||||
refresh='time',
|
||||
sort=1,
|
||||
)
|
||||
)
|
||||
.addRow(overviewRow)
|
||||
.addRow(statsRow)
|
||||
.addRow(replicasRow),
|
||||
},
|
||||
}
|
Reference in a new issue