first try at monitoring setup with jsonnet

This commit is contained in:
Tobias Brunner 2022-10-07 21:16:14 +02:00
parent 8e00f4bc65
commit 875fde3072
310 changed files with 72148 additions and 0 deletions

26
_apps/monitoring.yaml Normal file
View file

@ -0,0 +1,26 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: monitoring
namespace: argocd
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: system
source:
directory:
jsonnet:
libs:
- vendor
recurse: true
path: monitoring
repoURL: https://git.tbrnt.ch/tobru/gitops-zurrli.git
targetRevision: HEAD
destination:
namespace: monitoring
server: https://kubernetes.default.svc
syncPolicy:
automated:
selfHeal: false
syncOptions:
- CreateNamespace=true

View file

@ -0,0 +1,15 @@
{
"version": 1,
"dependencies": [
{
"source": {
"git": {
"remote": "https://github.com/prometheus-operator/kube-prometheus.git",
"subdir": "jsonnet/kube-prometheus"
}
},
"version": "main"
}
],
"legacyImports": true
}

View file

@ -0,0 +1,180 @@
{
"version": 1,
"dependencies": [
{
"source": {
"git": {
"remote": "https://github.com/brancz/kubernetes-grafana.git",
"subdir": "grafana"
}
},
"version": "d039275e4916aceae1c137120882e01d857787ac",
"sum": "515vMn4x4tP8vegL4HLW0nDO5+njGTgnDZB5OOhtsCI="
},
{
"source": {
"git": {
"remote": "https://github.com/etcd-io/etcd.git",
"subdir": "contrib/mixin"
}
},
"version": "6a0bbf346256960cbbe0218d6ab13443ee93e8e3",
"sum": "IkDHlaE0gvvcPjSNurFT+jQ2aCOAbqHF1WVmXbAgkds="
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafana.git",
"subdir": "grafana-mixin"
}
},
"version": "1120f9e255760a3c104b57871fcb91801e934382",
"sum": "MkjR7zCgq6MUZgjDzop574tFKoTX2OBr7DTwm1K+Ofs="
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib.git",
"subdir": "grafonnet"
}
},
"version": "30280196507e0fe6fa978a3e0eaca3a62844f817",
"sum": "342u++/7rViR/zj2jeJOjshzglkZ1SY+hFNuyCBFMdc="
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib.git",
"subdir": "grafonnet-7.0"
}
},
"version": "30280196507e0fe6fa978a3e0eaca3a62844f817",
"sum": "gCtR9s/4D5fxU9aKXg0Bru+/njZhA0YjLjPiASc61FM="
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/jsonnet-libs.git",
"subdir": "grafana-builder"
}
},
"version": "d73aff453c9784cd6922119f3ce33d8d355a79e1",
"sum": "tDR6yT2GVfw0wTU12iZH+m01HrbIr6g/xN+/8nzNkU0="
},
{
"source": {
"git": {
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin.git",
"subdir": ""
}
},
"version": "7b559e800a32a2a80caf4c968f37c4999ec44689",
"sum": "OqX/DHB6fuywNgqHAZTGRnfkYTQqoYmGePsrZ6nQELw="
},
{
"source": {
"git": {
"remote": "https://github.com/kubernetes/kube-state-metrics.git",
"subdir": "jsonnet/kube-state-metrics"
}
},
"version": "12402a564cbf4557763079ab8e6e995d9afb4db9",
"sum": "evJ+PXRzuM1tezCG5WzpAn4Lk3YJfMvDFcs+45fsscU="
},
{
"source": {
"git": {
"remote": "https://github.com/kubernetes/kube-state-metrics.git",
"subdir": "jsonnet/kube-state-metrics-mixin"
}
},
"version": "12402a564cbf4557763079ab8e6e995d9afb4db9",
"sum": "u8gaydJoxEjzizQ8jY8xSjYgWooPmxw+wIWdDxifMAk="
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus-operator/kube-prometheus.git",
"subdir": "jsonnet/kube-prometheus"
}
},
"version": "437c00e3b832607ff2b7d54ef8d9e33b6a416cf3",
"sum": "XAe5QnWHWt+ghbbbqkioLCyOb3k5Zh/UyAobH5BZri8="
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus-operator/prometheus-operator.git",
"subdir": "jsonnet/mixin"
}
},
"version": "3335fd098b6bcd4702a411256a54b515726935af",
"sum": "GQmaVFJwKMiD/P4n3N2LrAZVcwutriWrP8joclDtBYQ=",
"name": "prometheus-operator-mixin"
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus-operator/prometheus-operator.git",
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "3335fd098b6bcd4702a411256a54b515726935af",
"sum": "O/e9OtVscqHAiAvvWGR372ci6xadmQ09WWGjT7vDlbg="
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus/alertmanager.git",
"subdir": "doc/alertmanager-mixin"
}
},
"version": "78b5a27d40c099fee039fbf1a613ebfa3d01e345",
"sum": "PsK+V7oETCPKu2gLoPfqY0wwPKH9TzhNj6o2xezjjXc=",
"name": "alertmanager"
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus/node_exporter.git",
"subdir": "docs/node-mixin"
}
},
"version": "b7dd00ff8b079474b080a068ce0064d66878a348",
"sum": "tappaHscNBSJCA6ypSWt7DDhohIOkxNjcLFRb3WKpu4="
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus/prometheus.git",
"subdir": "documentation/prometheus-mixin"
}
},
"version": "82925eed4a6ba6ebd12d13d8e5f2d59654c7a196",
"sum": "Dq+wurABxuqRAHj4DGp2sCmjJWzNjrhP2XEScsS0kmY=",
"name": "prometheus"
},
{
"source": {
"git": {
"remote": "https://github.com/pyrra-dev/pyrra.git",
"subdir": "config/crd/bases"
}
},
"version": "e7cbd7532f764d232e0117b0da9d820bcaf90d01",
"sum": "d1550yhsX4VxdVN7b0gWT0cido/W90P6OGLzLqPwZcs="
},
{
"source": {
"git": {
"remote": "https://github.com/thanos-io/thanos.git",
"subdir": "mixin"
}
},
"version": "a4e334152549c3eeef5bc55566537b1927bca087",
"sum": "095uB0qB1Ek+aNYf+CgydVZk5aFETsfD8GYf6gDwSJs=",
"name": "thanos-mixin"
}
],
"legacyImports": false
}

View file

@ -0,0 +1,14 @@
local kp = (import 'kube-prometheus/main.libsonnet') + {
values+:: {
common+: {
namespace: 'monitoring',
},
},
};
[kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus)] +
[kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator)] +
[kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter)] +
[kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics)] +
[kp.prometheus[name] for name in std.objectFields(kp.prometheus)] +
[kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter)]

1
monitoring/vendor/alertmanager vendored Symbolic link
View file

@ -0,0 +1 @@
github.com/prometheus/alertmanager/doc/alertmanager-mixin

1
monitoring/vendor/bases vendored Symbolic link
View file

@ -0,0 +1 @@
github.com/pyrra-dev/pyrra/config/crd/bases

View file

@ -0,0 +1,376 @@
local defaults = {
local defaults = self,
namespace: 'default',
version: '7.5.10',
image: 'docker.io/grafana/grafana:' + defaults.version,
commonLabels:: {
'app.kubernetes.io/name': 'grafana',
'app.kubernetes.io/version': defaults.version,
'app.kubernetes.io/component': 'grafana',
},
selectorLabels:: {
[labelName]: defaults.commonLabels[labelName]
for labelName in std.objectFields(defaults.commonLabels)
if !std.setMember(labelName, ['app.kubernetes.io/version'])
},
replicas: 1,
port: 3000,
resources: {
requests: { cpu: '100m', memory: '100Mi' },
limits: { cpu: '200m', memory: '200Mi' },
},
dashboards: {},
rawDashboards: {},
folderDashboards: {},
folderUidGenerator(folder): '',
datasources: [{
name: 'prometheus',
type: 'prometheus',
access: 'proxy',
orgId: 1,
url: 'http://prometheus-k8s.' + defaults.namespace + '.svc:9090',
version: 1,
editable: false,
}],
// Forces pod restarts when dashboards are changed
dashboardsChecksum: false,
config: {
sections: {
date_formats: { default_timezone: 'UTC' },
},
},
ldap: null,
plugins: [],
env: [],
containers: [],
};
function(params) {
local g = self,
_config:: defaults + params,
_metadata:: {
name: 'grafana',
namespace: g._config.namespace,
labels: g._config.commonLabels,
},
serviceAccount: {
apiVersion: 'v1',
kind: 'ServiceAccount',
metadata: g._metadata,
automountServiceAccountToken: false,
},
service: {
apiVersion: 'v1',
kind: 'Service',
metadata: g._metadata,
spec: {
selector: g.deployment.spec.selector.matchLabels,
ports: [
{ name: 'http', targetPort: 'http', port: 3000 },
],
},
},
config: {
apiVersion: 'v1',
kind: 'Secret',
metadata: g._metadata {
name: 'grafana-config',
},
type: 'Opaque',
stringData: {
'grafana.ini': std.manifestIni(g._config.config),
} + if g._config.ldap != null then { 'ldap.toml': g._config.ldap } else {},
},
dashboardDefinitions: {
apiVersion: 'v1',
kind: 'ConfigMapList',
items: [
{
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
apiVersion: 'v1',
kind: 'ConfigMap',
metadata: g._metadata {
name: dashboardName,
},
data: { [name]: std.manifestJsonEx(g._config.dashboards[name], ' ') },
}
for name in std.objectFields(g._config.dashboards)
] + [
{
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
apiVersion: 'v1',
kind: 'ConfigMap',
metadata: g._metadata {
name: dashboardName,
},
data: { [name]: std.manifestJsonEx(g._config.folderDashboards[folder][name], ' ') },
}
for folder in std.objectFields(g._config.folderDashboards)
for name in std.objectFields(g._config.folderDashboards[folder])
] + (
if std.length(g._config.rawDashboards) > 0 then
[
{
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
apiVersion: 'v1',
kind: 'ConfigMap',
metadata: g._metadata {
name: dashboardName,
},
data: { [name]: g._config.rawDashboards[name] },
}
for name in std.objectFields(g._config.rawDashboards)
]
else
[]
),
},
dashboardSources:
local dashboardSources = {
apiVersion: 1,
providers:
(
if std.length(g._config.dashboards) +
std.length(g._config.rawDashboards) > 0 then [
{
name: '0',
orgId: 1,
folder: 'Default',
folderUid: g._config.folderUidGenerator('Default'),
type: 'file',
options: {
path: '/grafana-dashboard-definitions/0',
},
},
] else []
) +
[
{
name: folder,
orgId: 1,
folder: folder,
folderUid: g._config.folderUidGenerator(folder),
type: 'file',
options: {
path: '/grafana-dashboard-definitions/' + folder,
},
}
for folder in std.objectFields(g._config.folderDashboards)
],
};
{
kind: 'ConfigMap',
apiVersion: 'v1',
metadata: g._metadata {
name: 'grafana-dashboards',
},
data: { 'dashboards.yaml': std.manifestJsonEx(dashboardSources, ' ') },
},
dashboardDatasources: {
apiVersion: 'v1',
kind: 'Secret',
metadata: g._metadata {
name: 'grafana-datasources',
},
type: 'Opaque',
stringData: {
'datasources.yaml': std.manifestJsonEx(
{
apiVersion: 1,
datasources: g._config.datasources,
}, ' '
),
},
},
deployment:
local configVolume = {
name: 'grafana-config',
secret: { secretName: g.config.metadata.name },
};
local configVolumeMount = {
name: configVolume.name,
mountPath: '/etc/grafana',
readOnly: false,
};
local storageVolume = {
name: 'grafana-storage',
emptyDir: {},
};
local storageVolumeMount = {
name: storageVolume.name,
mountPath: '/var/lib/grafana',
readOnly: false,
};
local datasourcesVolume = {
name: 'grafana-datasources',
secret: { secretName: g.dashboardDatasources.metadata.name },
};
local datasourcesVolumeMount = {
name: datasourcesVolume.name,
mountPath: '/etc/grafana/provisioning/datasources',
readOnly: false,
};
local dashboardsVolume = {
name: 'grafana-dashboards',
configMap: { name: g.dashboardSources.metadata.name },
};
local dashboardsVolumeMount = {
name: dashboardsVolume.name,
mountPath: '/etc/grafana/provisioning/dashboards',
readOnly: false,
};
// A volume on /tmp is needed to let us use 'readOnlyRootFilesystem: true'
local pluginTmpVolume = {
name: 'tmp-plugins',
emptyDir: {
medium: 'Memory',
},
};
local pluginTmpVolumeMount = {
mountPath: '/tmp',
name: 'tmp-plugins',
readOnly: false,
};
local volumeMounts =
[
storageVolumeMount,
datasourcesVolumeMount,
dashboardsVolumeMount,
pluginTmpVolumeMount,
] +
[
{
local dashboardName = std.strReplace(name, '.json', ''),
name: 'grafana-dashboard-' + dashboardName,
mountPath: '/grafana-dashboard-definitions/0/' + dashboardName,
readOnly: false,
}
for name in std.objectFields(g._config.dashboards + g._config.rawDashboards)
] +
[
{
local dashboardName = std.strReplace(name, '.json', ''),
name: 'grafana-dashboard-' + dashboardName,
mountPath: '/grafana-dashboard-definitions/' + folder + '/' + dashboardName,
readOnly: false,
}
for folder in std.objectFields(g._config.folderDashboards)
for name in std.objectFields(g._config.folderDashboards[folder])
] + (
if std.length(g._config.config) > 0 then [configVolumeMount] else []
);
local volumes =
[
storageVolume,
datasourcesVolume,
dashboardsVolume,
pluginTmpVolume,
] +
[
{
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
name: dashboardName,
configMap: { name: dashboardName },
}
for name in std.objectFields(g._config.dashboards)
] +
[
{
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
name: dashboardName,
configMap: { name: dashboardName },
}
for folder in std.objectFields(g._config.folderDashboards)
for name in std.objectFields(g._config.folderDashboards[folder])
] +
[
{
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
name: dashboardName,
configMap: { name: dashboardName },
}
for name in std.objectFields(g._config.rawDashboards)
] +
if std.length(g._config.config) > 0 then [configVolume] else [];
local plugins = (
if std.length(g._config.plugins) == 0 then
[]
else
[{ name: 'GF_INSTALL_PLUGINS', value: std.join(',', g._config.plugins) }]
);
local grafanaContainer = {
name: 'grafana',
image: g._config.image,
env: g._config.env + plugins,
volumeMounts: volumeMounts,
ports: [{
name: 'http',
containerPort: g._config.port,
}],
readinessProbe: {
httpGet: {
path: '/api/health',
port: grafanaContainer.ports[0].name,
},
},
resources: g._config.resources,
securityContext: {
capabilities: { drop: ['ALL'] },
allowPrivilegeEscalation: false,
readOnlyRootFilesystem: true,
},
};
{
apiVersion: 'apps/v1',
kind: 'Deployment',
metadata: g._metadata,
spec: {
replicas: g._config.replicas,
selector: {
matchLabels: g._config.selectorLabels,
},
template: {
metadata: {
labels: g._config.commonLabels,
annotations: {
[if std.length(g._config.config) > 0 then 'checksum/grafana-config']: std.md5(std.toString(g.config)),
'checksum/grafana-datasources': std.md5(std.toString(g.dashboardDatasources)),
[if g._config.dashboardsChecksum then 'checksum/grafana-dashboards']: std.md5(std.toString(g.dashboardDefinitions)),
'checksum/grafana-dashboardproviders': std.md5(std.toString(g.dashboardSources)),
},
},
spec: {
containers: [grafanaContainer] + g._config.containers,
volumes: volumes,
serviceAccountName: g.serviceAccount.metadata.name,
nodeSelector: {
'kubernetes.io/os': 'linux',
},
securityContext: {
fsGroup: 65534,
runAsNonRoot: true,
runAsUser: 65534,
},
},
},
},
},
}

View file

@ -0,0 +1,15 @@
{
"version": 1,
"dependencies": [
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib.git",
"subdir": "grafonnet"
}
},
"version": "master"
}
],
"legacyImports": false
}

View file

@ -0,0 +1,23 @@
.PHONY: tools manifests test clean
OS := linux
ARCH ?= amd64
PROMETHEUS_VERSION := 2.33.1
tools:
go install github.com/google/go-jsonnet/cmd/jsonnet@latest
go install github.com/brancz/gojsontoyaml@latest
wget -qO- "https://github.com/prometheus/prometheus/releases/download/v${PROMETHEUS_VERSION}/prometheus-${PROMETHEUS_VERSION}.${OS}-${ARCH}.tar.gz" |\
tar xvz --strip-components=1 -C "$$(go env GOPATH)/bin" prometheus-${PROMETHEUS_VERSION}.${OS}-${ARCH}/promtool
manifests: manifests/etcd-prometheusRules.yaml
manifests/etcd-prometheusRules.yaml:
mkdir -p manifests
jsonnet -e '(import "mixin.libsonnet").prometheusAlerts' | gojsontoyaml > manifests/etcd-prometheusRules.yaml
test: manifests/etcd-prometheusRules.yaml
promtool test rules test.yaml
clean:
rm -rf manifests/*.yaml

View file

@ -0,0 +1,29 @@
# Prometheus Monitoring Mixin for etcd
> NOTE: This project is *alpha* stage. Flags, configuration, behaviour and design may change significantly in following releases.
A set of customisable Prometheus alerts for etcd.
Instructions for use are the same as the [kubernetes-mixin](https://github.com/kubernetes-monitoring/kubernetes-mixin).
## Background
* For more information about monitoring mixins, see this [design doc](https://docs.google.com/document/d/1A9xvzwqnFVSOZ5fD3blKODXfsat5fg6ZhnKu9LK3lB4/edit#).
## Testing alerts
Make sure to have [jsonnet](https://jsonnet.org/) and [gojsontoyaml](https://github.com/brancz/gojsontoyaml) installed. You can fetch it via
```
make tools
```
First compile the mixin to a YAML file, which the promtool will read:
```
make manifests
```
Then run the unit test:
```
promtool test rules test.yaml
```

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,164 @@
rule_files:
- manifests/etcd-prometheusRules.yaml
evaluation_interval: 1m
tests:
- interval: 1m
input_series:
- series: 'up{job="etcd",instance="10.10.10.0"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0'
- series: 'up{job="etcd",instance="10.10.10.1"}'
values: '1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0'
- series: 'up{job="etcd",instance="10.10.10.2"}'
values: '1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0'
alert_rule_test:
- eval_time: 3m
alertname: etcdInsufficientMembers
- eval_time: 5m
alertname: etcdInsufficientMembers
- eval_time: 12m
alertname: etcdMembersDown
- eval_time: 14m
alertname: etcdMembersDown
exp_alerts:
- exp_labels:
job: etcd
severity: critical
exp_annotations:
description: 'etcd cluster "etcd": members are down (3).'
summary: 'etcd cluster members are down.'
- eval_time: 7m
alertname: etcdInsufficientMembers
- eval_time: 11m
alertname: etcdInsufficientMembers
exp_alerts:
- exp_labels:
job: etcd
severity: critical
exp_annotations:
description: 'etcd cluster "etcd": insufficient members (1).'
summary: 'etcd cluster has insufficient number of members.'
- eval_time: 15m
alertname: etcdInsufficientMembers
exp_alerts:
- exp_labels:
job: etcd
severity: critical
exp_annotations:
description: 'etcd cluster "etcd": insufficient members (0).'
summary: 'etcd cluster has insufficient number of members.'
- interval: 1m
input_series:
- series: 'up{job="etcd",instance="10.10.10.0"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0'
- series: 'up{job="etcd",instance="10.10.10.1"}'
values: '1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0'
- series: 'up{job="etcd",instance="10.10.10.2"}'
values: '1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0'
alert_rule_test:
- eval_time: 14m
alertname: etcdMembersDown
exp_alerts:
- exp_labels:
job: etcd
severity: critical
exp_annotations:
description: 'etcd cluster "etcd": members are down (3).'
summary: 'etcd cluster members are down.'
- interval: 1m
input_series:
- series: 'up{job="etcd",instance="10.10.10.0"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0'
- series: 'up{job="etcd",instance="10.10.10.1"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0'
- series: 'etcd_network_peer_sent_failures_total{To="member-1",job="etcd",endpoint="test"}'
values: '0 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18'
alert_rule_test:
- eval_time: 13m
alertname: etcdMembersDown
exp_alerts:
- exp_labels:
job: etcd
severity: critical
exp_annotations:
description: 'etcd cluster "etcd": members are down (1).'
summary: 'etcd cluster members are down.'
- interval: 1m
input_series:
- series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.0"}'
values: '0 0 2 0 0 1 0 0 0 0 0 0 0 0 0 0'
- series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.1"}'
values: '0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0'
- series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.2"}'
values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0'
alert_rule_test:
- eval_time: 10m
alertname: etcdHighNumberOfLeaderChanges
exp_alerts:
- exp_labels:
job: etcd
severity: warning
exp_annotations:
description: 'etcd cluster "etcd": 4 leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.'
summary: 'etcd cluster has high number of leader changes.'
- interval: 1m
input_series:
- series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.0"}'
values: '0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0'
- series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.1"}'
values: '0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0'
- series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.2"}'
values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0'
alert_rule_test:
- eval_time: 10m
alertname: etcdHighNumberOfLeaderChanges
exp_alerts:
- interval: 1m
input_series:
- series: 'etcd_mvcc_db_total_size_in_bytes{job="etcd",instance="10.10.10.0"}'
values: '0+8192x240'
- series: 'etcd_server_quota_backend_bytes{job="etcd",instance="10.10.10.0"}'
values: '524288+0x240'
- series: 'etcd_mvcc_db_total_size_in_bytes{job="etcd",instance="10.10.10.1"}'
values: '0+1024x240'
- series: 'etcd_server_quota_backend_bytes{job="etcd",instance="10.10.10.1"}'
values: '524288+0x240'
alert_rule_test:
- eval_time: 11m
alertname: etcdExcessiveDatabaseGrowth
exp_alerts:
- exp_labels:
instance: '10.10.10.0'
job: etcd
severity: warning
exp_annotations:
description: 'etcd cluster "etcd": Predicting running out of disk space in the next four hours, based on write observations within the past four hours on etcd instance 10.10.10.0, please check as it might be disruptive.'
summary: 'etcd cluster database growing very fast.'
- interval: 1m
input_series:
- series: 'etcd_mvcc_db_total_size_in_use_in_bytes{job="etcd",instance="10.10.10.0"}'
values: '30000+0x10'
- series: 'etcd_mvcc_db_total_size_in_bytes{job="etcd",instance="10.10.10.0"}'
values: '100000+0x10'
- series: 'etcd_mvcc_db_total_size_in_use_in_bytes{job="etcd",instance="10.10.10.1"}'
values: '70000+0x10'
- series: 'etcd_mvcc_db_total_size_in_bytes{job="etcd",instance="10.10.10.1"}'
values: '100000+0x10'
alert_rule_test:
- eval_time: 11m
alertname: etcdDatabaseHighFragmentationRatio
exp_alerts:
- exp_labels:
instance: '10.10.10.0'
job: etcd
severity: warning
exp_annotations:
description: 'etcd cluster "etcd": database size in use on instance 10.10.10.0 is 30% of the actual allocated disk space, please run defragmentation (e.g. etcdctl defrag) to retrieve the unused fragmented disk space.'
runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation
summary: 'etcd database size in use is less than 50% of the actual allocated storage.'

View file

@ -0,0 +1,3 @@
/alerts.yaml
/rules.yaml
dashboards_out

View file

@ -0,0 +1,13 @@
all: fmt lint build clean
fmt:
./scripts/format.sh
lint:
./scripts/lint.sh
build:
./scripts/build.sh
clean:
rm -rf dashboards_out alerts.yaml rules.yaml

View file

@ -0,0 +1,28 @@
# Grafana Mixin
_This is a work in progress. We aim for it to become a good role model for alerts
and dashboards eventually, but it is not quite there yet._
The Grafana Mixin is a set of configurable, reusable, and extensible alerts and
dashboards based on the metrics exported by Grafana. The mixin creates
recording and alerting rules for Prometheus and suitable dashboard descriptions
for Grafana.
To use them, you need to have `mixtool` and `jsonnetfmt` installed. If you
have a working Go development environment, it's easiest to run the following:
```bash
$ go get github.com/monitoring-mixins/mixtool/cmd/mixtool
$ go get github.com/google/go-jsonnet/cmd/jsonnetfmt
```
You can then build the Prometheus rules files `alerts.yaml` and
`rules.yaml` and a directory `dashboard_out` with the JSON dashboard files
for Grafana:
```bash
$ make build
```
For more advanced uses of mixins, see
https://github.com/monitoring-mixins/docs.

View file

@ -0,0 +1,31 @@
{
_config+:: {
grafanaRequestsFailingThresholdPercent: 50,
},
prometheusAlerts+:: {
groups+: [
{
name: 'GrafanaAlerts',
rules: [
{
alert: 'GrafanaRequestsFailing',
expr: |||
100 * namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query", status_code=~"5.."}
/ ignoring (status_code)
sum without (status_code) (namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query"})
> %(grafanaRequestsFailingThresholdPercent)s
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
message: '{{ $labels.namespace }}/{{ $labels.job }}/{{ $labels.handler }} is experiencing {{ $value | humanize }}% errors',
},
'for': '5m',
},
],
},
],
},
}

View file

@ -0,0 +1,5 @@
{
grafanaDashboards+:: {
'grafana-overview.json': (import 'grafana-overview.json'),
},
}

View file

@ -0,0 +1,535 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": 3085,
"iteration": 1631554945276,
"links": [],
"panels": [
{
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"mappings": [],
"noValue": "0",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 6,
"x": 0,
"y": 0
},
"id": 6,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": ["mean"],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.1.3",
"targets": [
{
"expr": "grafana_alerting_result_total{job=~\"$job\", instance=~\"$instance\", state=\"alerting\"}",
"instant": true,
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Firing Alerts",
"type": "stat"
},
{
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 6,
"x": 6,
"y": 0
},
"id": 8,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": ["mean"],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.1.3",
"targets": [
{
"expr": "sum(grafana_stat_totals_dashboard{job=~\"$job\", instance=~\"$instance\"})",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Dashboards",
"type": "stat"
},
{
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {
"align": null,
"displayMode": "auto"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 12,
"y": 0
},
"id": 10,
"options": {
"showHeader": true
},
"pluginVersion": "8.1.3",
"targets": [
{
"expr": "grafana_build_info{job=~\"$job\", instance=~\"$instance\"}",
"instant": true,
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Build Info",
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
"Value": true,
"branch": true,
"container": true,
"goversion": true,
"namespace": true,
"pod": true,
"revision": true
},
"indexByName": {
"Time": 7,
"Value": 11,
"branch": 4,
"container": 8,
"edition": 2,
"goversion": 6,
"instance": 1,
"job": 0,
"namespace": 9,
"pod": 10,
"revision": 5,
"version": 3
},
"renameByName": {}
}
}
],
"type": "table"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 5
},
"hiddenSeries": false,
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.1.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by (status_code) (irate(grafana_http_request_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[1m])) ",
"interval": "",
"legendFormat": "{{status_code}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "RPS",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:157",
"format": "reqps",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"$$hashKey": "object:158",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 5
},
"hiddenSeries": false,
"id": 4,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.1.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "histogram_quantile(0.99, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1",
"interval": "",
"legendFormat": "99th Percentile",
"refId": "A"
},
{
"exemplar": true,
"expr": "histogram_quantile(0.50, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1",
"interval": "",
"legendFormat": "50th Percentile",
"refId": "B"
},
{
"exemplar": true,
"expr": "sum(irate(grafana_http_request_duration_seconds_sum{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) * 1 / sum(irate(grafana_http_request_duration_seconds_count{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval]))",
"interval": "",
"legendFormat": "Average",
"refId": "C"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Request Latency",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:210",
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"$$hashKey": "object:211",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"schemaVersion": 30,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {
"selected": true,
"text": "dev-cortex",
"value": "dev-cortex"
},
"description": null,
"error": null,
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"queryValue": "",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
},
{
"allValue": ".*",
"current": {
"selected": false,
"text": ["default/grafana"],
"value": ["default/grafana"]
},
"datasource": "$datasource",
"definition": "label_values(grafana_build_info, job)",
"description": null,
"error": null,
"hide": 0,
"includeAll": true,
"label": null,
"multi": true,
"name": "job",
"options": [],
"query": {
"query": "label_values(grafana_build_info, job)",
"refId": "Billing Admin-job-Variable-Query"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".*",
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": "$datasource",
"definition": "label_values(grafana_build_info, instance)",
"description": null,
"error": null,
"hide": 0,
"includeAll": true,
"label": null,
"multi": true,
"name": "instance",
"options": [],
"query": {
"query": "label_values(grafana_build_info, instance)",
"refId": "Billing Admin-instance-Variable-Query"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {
"refresh_intervals": ["10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"]
},
"timezone": "",
"title": "Grafana Overview",
"uid": "6be0s85Mk",
"version": 2
}

View file

@ -0,0 +1,3 @@
(import 'alerts/alerts.libsonnet') +
(import 'dashboards/dashboards.libsonnet') +
(import 'rules/rules.libsonnet')

View file

@ -0,0 +1,17 @@
{
prometheusRules+:: {
groups+: [
{
name: 'grafana_rules',
rules: [
{
record: 'namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m',
expr: |||
sum by (namespace, job, handler, status_code) (rate(grafana_http_request_duration_seconds_count[5m]))
|||,
},
],
},
],
},
}

View file

@ -0,0 +1,6 @@
#!/bin/bash
set -eo pipefail
cd "$(dirname "$0")"/..
mixtool generate all mixin.libsonnet

View file

@ -0,0 +1 @@
JSONNET_FMT="jsonnetfmt -n 2 --max-blank-lines 2 --string-style s --comment-style s"

View file

@ -0,0 +1,9 @@
#!/bin/bash
set -eo pipefail
cd "$(dirname "$0")"/..
. scripts/common.sh
find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \
xargs -n 1 -- ${JSONNET_FMT} -i

View file

@ -0,0 +1,13 @@
#!/bin/bash
set -eo pipefail
cd "$(dirname "$0")"/..
. scripts/common.sh
find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \
while read f; do \
${JSONNET_FMT} "$f" | diff -u "$f" -; \
done
#mixtool lint mixin.libsonnet

View file

@ -0,0 +1,885 @@
# Docs
* [dashboard](#dashboard)
* [panel](#panel)
* [gauge.new](#panelGaugenew)
* [graph.new](#panelGraphnew)
* [row.new](#panelRownew)
* [stat.new](#panelStatnew)
* [table.new](#panelTablenew)
* [text.new](#panelTextnew)
* [target](#target)
* [prometheus.new](#targetPrometheusnew)
* [template](#template)
* [custom.new](#templateCustomnew)
* [datasource.new](#templateDatasourcenew)
* [query.new](#templateQuerynew)
## dashboard
### dashboard.new
Instantiate a dashboard.
* **description**: (type: string, default: `null`)
* **editable**: (type: boolean, default: `true`)
* **graphTooltip**: (type: integer, default: `0`)
* **refresh**: (type: string, default: `null`)
* **schemaVersion**: (type: integer, default: `25`)
* **style**: (type: string, default: `"dark"`)
* **tags**: (type: array, default: `[]`)
* **timezone**: (type: string, default: `null`)
* **title**: (type: string, default: `null`)
* **uid**: (type: string, default: `null`)
#### #setTime
* **from**: (type: string, default: `"now-6h"`)
* **to**: (type: string, default: `"now"`)
#### #setTimepicker
* **hidden**: (type: boolean, default: `false`)
* **refreshIntervals**: (type: array, default: `["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"]`)
#### #addAnnotation
* **builtIn**: (type: integer, default: `0`)
* **datasource**: (type: string, default: `"default"`)
* **enable**: (type: boolean, default: `true`)
* **hide**: (type: boolean, default: `false`)
* **iconColor**: (type: string, default: `null`)
* **name**: (type: string, default: `null`)
* **rawQuery**: (type: string, default: `null`)
* **showIn**: (type: integer, default: `0`)
#### #addTemplate
* **template**: (type: object)
## panel
### panel.gauge.new
* **datasource**: (type: string, default: `"default"`)
* **description**: (type: string, default: `null`)
* **repeat**: (type: string, default: `null`)
* **repeatDirection**: (type: string, default: `null`)
* **title**: (type: string, default: `null`)
* **transparent**: (type: boolean, default: `false`)
#### #setFieldConfig
* **max**: (type: integer, default: `null`)
* **min**: (type: integer, default: `null`)
* **thresholdMode**: (type: string, default: `"absolute"`)
* **unit**: (type: string, default: `null`)
#### #setGridPos
* **h**: (type: integer, default: `8`)
Panel height.
* **w**: (type: integer, default: `12`)
Panel width.
* **x**: (type: integer, default: `null`)
Panel x position.
* **y**: (type: integer, default: `null`)
Panel y position.
#### #setOptions
* **calcs**: (type: array, default: `["mean"]`)
* **fields**: (type: string, default: `null`)
* **orientation**: (type: string, default: `"auto"`)
* **showThresholdLabels**: (type: boolean, default: `false`)
* **showThresholdMarkers**: (type: boolean, default: `true`)
* **values**: (type: boolean, default: `false`)
#### #addDataLink
* **targetBlank**: (type: boolean, default: `true`)
* **title**: (type: string, default: `null`)
* **url**: (type: string, default: `null`)
#### #addPanelLink
* **targetBlank**: (type: boolean, default: `true`)
* **title**: (type: string, default: `null`)
* **url**: (type: string, default: `null`)
#### #addMapping
* **from**: (type: string, default: `null`)
* **id**: (type: integer, default: `null`)
* **operator**: (type: string, default: `null`)
* **text**: (type: string, default: `null`)
* **to**: (type: string, default: `null`)
* **type**: (type: integer, default: `null`)
* **value**: (type: string, default: `null`)
#### #addOverride
* **matcher**: (type: oject, default: `null`)
* **properties**: (type: array, default: `null`)
#### #addThresholdStep
* **color**: (type: string, default: `null`)
* **value**: (type: integer, default: `null`)
#### #addTarget
* **target**: (type: object)
### panel.graph.new
* **bars**: (type: boolean, default: `false`)
Display values as a bar chart.
* **dashLength**: (type: integer, default: `10`)
Dashed line length.
* **dashes**: (type: boolean, default: `false`)
Show line with dashes.
* **datasource**: (type: string, default: `"default"`)
* **decimals**: (type: integer, default: `null`)
Controls how many decimals are displayed for legend values and
graph hover tooltips.
* **description**: (type: string, default: `null`)
* **fill**: (type: integer, default: `1`)
Amount of color fill for a series. Expects a value between 0 and 1.
* **fillGradient**: (type: integer, default: `0`)
Degree of gradient on the area fill. 0 is no gradient, 10 is a
steep gradient.
* **hiddenSeries**: (type: boolean, default: `false`)
Hide the series.
* **lines**: (type: boolean, default: `true`)
Display values as a line graph.
* **linewidth**: (type: integer, default: `1`)
The width of the line for a series.
* **nullPointMode**: (type: string, default: `"null"`)
How null values are displayed.
* 'null' - If there is a gap in the series, meaning a null value,
then the line in the graph will be broken and show the gap.
* 'null as zero' - If there is a gap in the series, meaning a null
value, then it will be displayed as a zero value in the graph
panel.
* 'connected' - If there is a gap in the series, meaning a null
value or values, then the line will skip the gap and connect to the
next non-null value.
* **percentage**: (type: boolean, default: `false`)
Available when `stack` is true. Each series is drawn as a percentage
of the total of all series.
* **pointradius**: (type: integer, default: `null`)
Controls how large the points are.
* **points**: (type: boolean, default: `false`)
Display points for values.
* **repeat**: (type: string, default: `null`)
* **repeatDirection**: (type: string, default: `null`)
* **spaceLength**: (type: integer, default: `10`)
Dashed line spacing when `dashes` is true.
* **stack**: (type: boolean, default: `false`)
Each series is stacked on top of another.
* **steppedLine**: (type: boolean, default: `false`)
Draws adjacent points as staircase.
* **timeFrom**: (type: string, default: `null`)
* **timeShift**: (type: string, default: `null`)
* **title**: (type: string, default: `null`)
* **transparent**: (type: boolean, default: `false`)
#### #setGridPos
* **h**: (type: integer, default: `8`)
Panel height.
* **w**: (type: integer, default: `12`)
Panel width.
* **x**: (type: integer, default: `null`)
Panel x position.
* **y**: (type: integer, default: `null`)
Panel y position.
#### #setLegend
* **alignAsTable**: (type: boolean, default: `null`)
Whether to display legend in table.
* **avg**: (type: boolean, default: `false`)
Average of all values returned from the metric query.
* **current**: (type: boolean, default: `false`)
Last value returned from the metric query.
* **max**: (type: boolean, default: `false`)
Maximum of all values returned from the metric query.
* **min**: (type: boolean, default: `false`)
Minimum of all values returned from the metric query.
* **rightSide**: (type: boolean, default: `false`)
Display legend to the right.
* **show**: (type: boolean, default: `true`)
Show or hide the legend.
* **sideWidth**: (type: integer, default: `null`)
Available when `rightSide` is true. The minimum width for the legend in
pixels.
* **total**: (type: boolean, default: `false`)
Sum of all values returned from the metric query.
* **values**: (type: boolean, default: `true`)
#### #setThresholds
* **thresholdMode**: (type: string, default: `"absolute"`)
#### #setTooltip
* **shared**: (type: boolean, default: `true`)
* true - The hover tooltip shows all series in the graph.
Grafana highlights the series that you are hovering over in
bold in the series list in the tooltip.
* false - The hover tooltip shows only a single series, the one
that you are hovering over on the graph.
* **sort**: (type: integer, default: `2`)
* 0 (none) - The order of the series in the tooltip is
determined by the sort order in your query. For example, they
could be alphabetically sorted by series name.
* 1 (increasing) - The series in the hover tooltip are sorted
by value and in increasing order, with the lowest value at the
top of the list.
* 2 (decreasing) - The series in the hover tooltip are sorted
by value and in decreasing order, with the highest value at the
top of the list.
#### #setXaxis
* **buckets**: (type: string, default: `null`)
* **mode**: (type: string, default: `"time"`)
The display mode completely changes the visualization of the
graph panel. Its like three panels in one. The main mode is
the time series mode with time on the X-axis. The other two
modes are a basic bar chart mode with series on the X-axis
instead of time and a histogram mode.
* 'time' - The X-axis represents time and that the data is
grouped by time (for example, by hour, or by minute).
* 'series' - The data is grouped by series and not by time. The
Y-axis still represents the value.
* 'histogram' - Converts the graph into a histogram. A histogram
is a kind of bar chart that groups numbers into ranges, often
called buckets or bins. Taller bars show that more data falls
in that range.
* **name**: (type: string, default: `null`)
* **show**: (type: boolean, default: `true`)
Show or hide the axis.
#### #setYaxis
* **align**: (type: boolean, default: `false`)
Align left and right Y-axes by value.
* **alignLevel**: (type: integer, default: `0`)
Available when align is true. Value to use for alignment of
left and right Y-axes, starting from Y=0.
#### #addDataLink
* **targetBlank**: (type: boolean, default: `true`)
* **title**: (type: string, default: `null`)
* **url**: (type: string, default: `null`)
#### #addPanelLink
* **targetBlank**: (type: boolean, default: `true`)
* **title**: (type: string, default: `null`)
* **url**: (type: string, default: `null`)
#### #addOverride
* **matcher**: (type: oject, default: `null`)
* **properties**: (type: array, default: `null`)
#### #addSeriesOverride
* **alias**: (type: string, default: `null`)
Alias or regex matching the series you'd like to target.
* **bars**: (type: boolean, default: `null`)
* **color**: (type: string, default: `null`)
* **dashLength**: (type: integer, default: `null`)
* **dashes**: (type: boolean, default: `null`)
* **fill**: (type: integer, default: `null`)
* **fillBelowTo**: (type: string, default: `null`)
* **fillGradient**: (type: integer, default: `null`)
* **hiddenSeries**: (type: boolean, default: `null`)
* **hideTooltip**: (type: boolean, default: `null`)
* **legend**: (type: boolean, default: `null`)
* **lines**: (type: boolean, default: `null`)
* **linewidth**: (type: integer, default: `null`)
* **nullPointMode**: (type: string, default: `null`)
* **pointradius**: (type: integer, default: `null`)
* **points**: (type: boolean, default: `null`)
* **spaceLength**: (type: integer, default: `null`)
* **stack**: (type: integer, default: `null`)
* **steppedLine**: (type: boolean, default: `null`)
* **transform**: (type: string, default: `null`)
* **yaxis**: (type: integer, default: `null`)
* **zindex**: (type: integer, default: `null`)
#### #addThresholdStep
* **color**: (type: string, default: `null`)
* **value**: (type: integer, default: `null`)
#### #addTarget
* **target**: (type: object)
#### #addYaxis
* **decimals**: (type: integer, default: `null`)
Defines how many decimals are displayed for Y value.
* **format**: (type: string, default: `"short"`)
The display unit for the Y value.
* **label**: (type: string, default: `null`)
The Y axis label.
* **logBase**: (type: integer, default: `1`)
The scale to use for the Y value - linear, or logarithmic.
* 1 - linear
* 2 - log (base 2)
* 10 - log (base 10)
* 32 - log (base 32)
* 1024 - log (base 1024)
* **max**: (type: integer, default: `null`)
The maximum Y value.
* **min**: (type: integer, default: `null`)
The minimum Y value.
* **show**: (type: boolean, default: `true`)
Show or hide the axis.
### panel.row.new
* **collapse**: (type: boolean, default: `true`)
* **collapsed**: (type: boolean, default: `true`)
* **datasource**: (type: string, default: `null`)
* **repeat**: (type: string, default: `null`)
* **repeatIteration**: (type: string, default: `null`)
* **showTitle**: (type: boolean, default: `true`)
* **title**: (type: string, default: `null`)
* **titleSize**: (type: string, default: `"h6"`)
#### #setGridPos
* **h**: (type: integer, default: `8`)
Panel height.
* **w**: (type: integer, default: `12`)
Panel width.
* **x**: (type: integer, default: `null`)
Panel x position.
* **y**: (type: integer, default: `null`)
Panel y position.
#### #addPanel
* **panel**: (type: object)
### panel.stat.new
* **datasource**: (type: string, default: `"default"`)
* **description**: (type: string, default: `null`)
* **repeat**: (type: string, default: `null`)
* **repeatDirection**: (type: string, default: `null`)
* **title**: (type: string, default: `null`)
* **transparent**: (type: boolean, default: `false`)
#### #setFieldConfig
* **max**: (type: integer, default: `null`)
* **min**: (type: integer, default: `null`)
* **thresholdMode**: (type: string, default: `"absolute"`)
* **unit**: (type: string, default: `null`)
#### #setGridPos
* **h**: (type: integer, default: `8`)
Panel height.
* **w**: (type: integer, default: `12`)
Panel width.
* **x**: (type: integer, default: `null`)
Panel x position.
* **y**: (type: integer, default: `null`)
Panel y position.
#### #setOptions
* **calcs**: (type: array, default: `["mean"]`)
* **colorMode**: (type: string, default: `"value"`)
* **fields**: (type: string, default: `null`)
* **graphMode**: (type: string, default: `"none"`)
* **justifyMode**: (type: string, default: `"auto"`)
* **orientation**: (type: string, default: `"auto"`)
* **textMode**: (type: string, default: `"auto"`)
* **values**: (type: boolean, default: `false`)
#### #addDataLink
* **targetBlank**: (type: boolean, default: `true`)
* **title**: (type: string, default: `null`)
* **url**: (type: string, default: `null`)
#### #addPanelLink
* **targetBlank**: (type: boolean, default: `true`)
* **title**: (type: string, default: `null`)
* **url**: (type: string, default: `null`)
#### #addMapping
* **from**: (type: string, default: `null`)
* **id**: (type: integer, default: `null`)
* **operator**: (type: string, default: `null`)
* **text**: (type: string, default: `null`)
* **to**: (type: string, default: `null`)
* **type**: (type: integer, default: `null`)
* **value**: (type: string, default: `null`)
#### #addOverride
* **matcher**: (type: oject, default: `null`)
* **properties**: (type: array, default: `null`)
#### #addThresholdStep
* **color**: (type: string, default: `null`)
* **value**: (type: integer, default: `null`)
#### #addTarget
* **target**: (type: object)
### panel.table.new
* **datasource**: (type: string, default: `"default"`)
* **description**: (type: string, default: `null`)
* **repeat**: (type: string, default: `null`)
* **repeatDirection**: (type: string, default: `null`)
* **title**: (type: string, default: `null`)
* **transparent**: (type: boolean, default: `false`)
#### #setFieldConfig
* **displayName**: (type: string, default: `null`)
* **max**: (type: integer, default: `null`)
* **min**: (type: integer, default: `null`)
* **thresholdMode**: (type: string, default: `"absolute"`)
* **noValue**: (type: string, default: `null`)
* **unit**: (type: string, default: `"short"`)
* **width**: (type: integer, default: `null`)
#### #setGridPos
* **h**: (type: integer, default: `8`)
Panel height.
* **w**: (type: integer, default: `12`)
Panel width.
* **x**: (type: integer, default: `null`)
Panel x position.
* **y**: (type: integer, default: `null`)
Panel y position.
#### #setOptions
* **showHeader**: (type: boolean, default: `true`)
#### #addDataLink
* **targetBlank**: (type: boolean, default: `true`)
* **title**: (type: string, default: `null`)
* **url**: (type: string, default: `null`)
#### #addPanelLink
* **targetBlank**: (type: boolean, default: `true`)
* **title**: (type: string, default: `null`)
* **url**: (type: string, default: `null`)
#### #addMapping
* **from**: (type: string, default: `null`)
* **id**: (type: integer, default: `null`)
* **operator**: (type: string, default: `null`)
* **text**: (type: string, default: `null`)
* **to**: (type: string, default: `null`)
* **type**: (type: integer, default: `null`)
* **value**: (type: string, default: `null`)
#### #addOverride
* **matcher**: (type: oject, default: `null`)
* **properties**: (type: array, default: `null`)
#### #addThresholdStep
* **color**: (type: string, default: `null`)
* **value**: (type: integer, default: `null`)
#### #addTarget
* **target**: (type: object)
### panel.text.new
* **content**: (type: string, default: `null`)
* **datasource**: (type: string, default: `"default"`)
* **description**: (type: string, default: `null`)
* **mode**: (type: string, default: `"markdown"`)
* **repeat**: (type: string, default: `null`)
* **repeatDirection**: (type: string, default: `null`)
* **title**: (type: string, default: `null`)
* **transparent**: (type: boolean, default: `false`)
#### #setGridPos
* **h**: (type: integer, default: `8`)
Panel height.
* **w**: (type: integer, default: `12`)
Panel width.
* **x**: (type: integer, default: `null`)
Panel x position.
* **y**: (type: integer, default: `null`)
Panel y position.
#### #addPanelLink
* **targetBlank**: (type: boolean, default: `true`)
* **title**: (type: string, default: `null`)
* **url**: (type: string, default: `null`)
#### #addTarget
* **target**: (type: object)
## target
### target.prometheus.new
* **datasource**: (type: string, default: `"default"`)
* **expr**: (type: string, default: `null`)
* **format**: (type: string, default: `"time_series"`)
* **instant**: (type: boolean, default: `null`)
* **interval**: (type: string, default: `null`)
* **intervalFactor**: (type: integer, default: `null`)
* **legendFormat**: (type: string, default: `null`)
## template
### template.custom.new
* **allValue**: (type: string, default: `null`)
* **hide**: (type: integer, default: `0`)
* **includeAll**: (type: boolean, default: `false`)
* **label**: (type: string, default: `null`)
* **multi**: (type: boolean, default: `false`)
* **name**: (type: string, default: `null`)
* **query**: (type: string, default: `null`)
* **queryValue**: (type: string, default: `""`)
* **skipUrlSync**: (type: string, default: `false`)
#### #setCurrent
* **selected**: (type: boolean, default: `false`)
* **text**: (type: string, default: `null`)
* **value**: (type: string, default: `null`)
### template.datasource.new
* **hide**: (type: integer, default: `0`)
* **includeAll**: (type: boolean, default: `false`)
* **label**: (type: string, default: `null`)
* **multi**: (type: boolean, default: `false`)
* **name**: (type: string, default: `null`)
* **query**: (type: string, default: `null`)
* **refresh**: (type: integer, default: `1`)
* **regex**: (type: string, default: `null`)
* **skipUrlSync**: (type: string, default: `false`)
#### #setCurrent
* **selected**: (type: boolean, default: `false`)
* **text**: (type: string, default: `null`)
* **value**: (type: string, default: `null`)
### template.query.new
* **allValue**: (type: string, default: `null`)
* **datasource**: (type: string, default: `null`)
* **definition**: (type: string, default: `null`)
* **hide**: (type: integer, default: `0`)
* **includeAll**: (type: boolean, default: `false`)
* **label**: (type: string, default: `null`)
* **multi**: (type: boolean, default: `false`)
* **name**: (type: string, default: `null`)
* **query**: (type: string, default: `null`)
* **refresh**: (type: integer, default: `0`)
* **regex**: (type: string, default: `null`)
* **skipUrlSync**: (type: string, default: `false`)
* **sort**: (type: integer, default: `0`)
* **tagValuesQuery**: (type: string, default: `null`)
* **tags**: (type: array, default: `null`)
* **tagsQuery**: (type: string, default: `null`)
* **useTags**: (type: boolean, default: `false`)
#### #setCurrent
* **selected**: (type: boolean, default: `null`)
* **text**: (type: string, default: `null`)
* **value**: (type: string, default: `null`)
#### #addOption
* **selected**: (type: boolean, default: `true`)
* **text**: (type: string, default: `null`)
* **value**: (type: string, default: `null`)

View file

@ -0,0 +1,87 @@
// This file was generated by https://github.com/grafana/dashboard-spec
{
new(
description=null,
editable=true,
graphTooltip=0,
refresh=null,
schemaVersion=25,
style='dark',
tags=[],
timezone=null,
title=null,
uid=null,
):: {
[if description != null then 'description']: description,
[if editable != null then 'editable']: editable,
[if graphTooltip != null then 'graphTooltip']: graphTooltip,
[if refresh != null then 'refresh']: refresh,
[if schemaVersion != null then 'schemaVersion']: schemaVersion,
[if style != null then 'style']: style,
[if tags != null then 'tags']: tags,
[if timezone != null then 'timezone']: timezone,
[if title != null then 'title']: title,
[if uid != null then 'uid']: uid,
setTime(
from='now-6h',
to='now',
):: self {}
+ { time+: { [if from != null then 'from']: from } }
+ { time+: { [if to != null then 'to']: to } }
,
setTimepicker(
hidden=false,
refreshIntervals=['5s', '10s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'],
):: self {}
+ { timepicker+: { [if hidden != null then 'hidden']: hidden } }
+ { timepicker+: { [if refreshIntervals != null then 'refresh_intervals']: refreshIntervals } }
,
addTemplate(
template
):: self {}
+ { templating+: { list+: [
template,
] } },
addAnnotation(
builtIn=0,
datasource='default',
enable=true,
hide=false,
iconColor=null,
name=null,
rawQuery=null,
showIn=0,
):: self {}
+ { annotations+: { list+: [
{
[if builtIn != null then 'builtIn']: builtIn,
[if datasource != null then 'datasource']: datasource,
[if enable != null then 'enable']: enable,
[if hide != null then 'hide']: hide,
[if iconColor != null then 'iconColor']: iconColor,
[if name != null then 'name']: name,
[if rawQuery != null then 'rawQuery']: rawQuery,
[if showIn != null then 'showIn']: showIn,
},
] } },
panels: [],
_nextPanelID:: 2,
addPanel(panel):: self {
local nextPanelID = super._nextPanelID,
panels+: [
panel { id: nextPanelID } +
if 'panels' in panel then { panels: std.mapWithIndex(function(i, p) p { id: nextPanelID + i + 1 }, panel.panels) } else {},
],
_nextPanelID:: nextPanelID + 1 + (if 'panels' in panel then std.length(panel.panels) else 0),
},
addPanels(panels):: std.foldl(function(d, p) d.addPanel(p), panels, self),
},
}

View file

@ -0,0 +1,21 @@
// This file was generated by https://github.com/grafana/dashboard-spec
{
dashboard:: import 'dashboard.libsonnet',
panel:: {
gauge:: import 'panel/gauge.libsonnet',
graph:: import 'panel/graph.libsonnet',
row:: import 'panel/row.libsonnet',
stat:: import 'panel/stat.libsonnet',
table:: import 'panel/table.libsonnet',
text:: import 'panel/text.libsonnet',
},
target:: {
prometheus:: import 'target/prometheus.libsonnet',
},
template:: {
custom:: import 'template/custom.libsonnet',
datasource:: import 'template/datasource.libsonnet',
query:: import 'template/query.libsonnet',
},
}

View file

@ -0,0 +1,138 @@
// This file was generated by https://github.com/grafana/dashboard-spec
{
new(
datasource='default',
description=null,
repeat=null,
repeatDirection=null,
title=null,
transparent=false,
):: {
[if datasource != null then 'datasource']: datasource,
[if description != null then 'description']: description,
[if repeat != null then 'repeat']: repeat,
[if repeatDirection != null then 'repeatDirection']: repeatDirection,
[if title != null then 'title']: title,
[if transparent != null then 'transparent']: transparent,
type: 'gauge',
setFieldConfig(
max=null,
min=null,
thresholdMode='absolute',
unit=null,
):: self {}
+ { fieldConfig+: { defaults+: { [if max != null then 'max']: max } } }
+ { fieldConfig+: { defaults+: { [if min != null then 'min']: min } } }
+ { fieldConfig+: { defaults+: { thresholds+: { [if thresholdMode != null then 'mode']: thresholdMode } } } }
+ { fieldConfig+: { defaults+: { [if unit != null then 'unit']: unit } } }
,
setGridPos(
h=8,
w=12,
x=null,
y=null,
):: self {}
+ { gridPos+: { [if h != null then 'h']: h } }
+ { gridPos+: { [if w != null then 'w']: w } }
+ { gridPos+: { [if x != null then 'x']: x } }
+ { gridPos+: { [if y != null then 'y']: y } }
,
setOptions(
calcs=['mean'],
fields=null,
orientation='auto',
showThresholdLabels=false,
showThresholdMarkers=true,
values=false,
):: self {}
+ { options+: { reduceOptions+: { [if calcs != null then 'calcs']: calcs } } }
+ { options+: { reduceOptions+: { [if fields != null then 'fields']: fields } } }
+ { options+: { [if orientation != null then 'orientation']: orientation } }
+ { options+: { [if showThresholdLabels != null then 'showThresholdLabels']: showThresholdLabels } }
+ { options+: { [if showThresholdMarkers != null then 'showThresholdMarkers']: showThresholdMarkers } }
+ { options+: { reduceOptions+: { [if values != null then 'values']: values } } }
,
addPanelLink(
targetBlank=true,
title=null,
url=null,
):: self {}
+ { links+: [
{
[if targetBlank != null then 'targetBlank']: targetBlank,
[if title != null then 'title']: title,
[if url != null then 'url']: url,
},
] },
addDataLink(
targetBlank=true,
title=null,
url=null,
):: self {}
+ { fieldConfig+: { defaults+: { links+: [
{
[if targetBlank != null then 'targetBlank']: targetBlank,
[if title != null then 'title']: title,
[if url != null then 'url']: url,
},
] } } },
addMapping(
from=null,
id=null,
operator=null,
text=null,
to=null,
type=null,
value=null,
):: self {}
+ { fieldConfig+: { defaults+: { mappings+: [
{
[if from != null then 'from']: from,
[if id != null then 'id']: id,
[if operator != null then 'operator']: operator,
[if text != null then 'text']: text,
[if to != null then 'to']: to,
[if type != null then 'type']: type,
[if value != null then 'value']: value,
},
] } } },
addOverride(
matcher=null,
properties=null,
):: self {}
+ { fieldConfig+: { overrides+: [
{
[if matcher != null then 'matcher']: matcher,
[if properties != null then 'properties']: properties,
},
] } },
addThresholdStep(
color=null,
value=null,
):: self {}
+ { fieldConfig+: { defaults+: { thresholds+: { steps+: [
{
[if color != null then 'color']: color,
[if value != null then 'value']: value,
},
] } } } },
addTarget(
target
):: self {}
+ { targets+: [
target,
] },
},
}

View file

@ -0,0 +1,257 @@
// This file was generated by https://github.com/grafana/dashboard-spec
{
new(
bars=false,
dashLength=10,
dashes=false,
datasource='default',
decimals=null,
description=null,
fill=1,
fillGradient=0,
hiddenSeries=false,
lines=true,
linewidth=1,
nullPointMode='null',
percentage=false,
pointradius=null,
points=false,
repeat=null,
repeatDirection=null,
spaceLength=10,
stack=false,
steppedLine=false,
timeFrom=null,
timeShift=null,
title=null,
transparent=false,
):: {
[if bars != null then 'bars']: bars,
[if dashLength != null then 'dashLength']: dashLength,
[if dashes != null then 'dashes']: dashes,
[if datasource != null then 'datasource']: datasource,
[if decimals != null then 'decimals']: decimals,
[if description != null then 'description']: description,
[if fill != null then 'fill']: fill,
[if fillGradient != null then 'fillGradient']: fillGradient,
[if hiddenSeries != null then 'hiddenSeries']: hiddenSeries,
[if lines != null then 'lines']: lines,
[if linewidth != null then 'linewidth']: linewidth,
[if nullPointMode != null then 'nullPointMode']: nullPointMode,
[if percentage != null then 'percentage']: percentage,
[if pointradius != null then 'pointradius']: pointradius,
[if points != null then 'points']: points,
[if repeat != null then 'repeat']: repeat,
[if repeatDirection != null then 'repeatDirection']: repeatDirection,
[if spaceLength != null then 'spaceLength']: spaceLength,
[if stack != null then 'stack']: stack,
[if steppedLine != null then 'steppedLine']: steppedLine,
[if timeFrom != null then 'timeFrom']: timeFrom,
[if timeShift != null then 'timeShift']: timeShift,
[if title != null then 'title']: title,
[if transparent != null then 'transparent']: transparent,
renderer: 'flot',
type: 'graph',
tooltip+: { value_type: 'individual' },
setGridPos(
h=8,
w=12,
x=null,
y=null,
):: self {}
+ { gridPos+: { [if h != null then 'h']: h } }
+ { gridPos+: { [if w != null then 'w']: w } }
+ { gridPos+: { [if x != null then 'x']: x } }
+ { gridPos+: { [if y != null then 'y']: y } }
,
setLegend(
alignAsTable=null,
avg=false,
current=false,
max=false,
min=false,
rightSide=false,
show=true,
sideWidth=null,
total=false,
values=true,
):: self {}
+ { legend+: { [if alignAsTable != null then 'alignAsTable']: alignAsTable } }
+ { legend+: { [if avg != null then 'avg']: avg } }
+ { legend+: { [if current != null then 'current']: current } }
+ { legend+: { [if max != null then 'max']: max } }
+ { legend+: { [if min != null then 'min']: min } }
+ { legend+: { [if rightSide != null then 'rightSide']: rightSide } }
+ { legend+: { [if show != null then 'show']: show } }
+ { legend+: { [if sideWidth != null then 'sideWidth']: sideWidth } }
+ { legend+: { [if total != null then 'total']: total } }
+ { legend+: { [if values != null then 'values']: values } }
,
setThresholds(
thresholdMode='absolute',
):: self {}
+ { thresholds+: { [if thresholdMode != null then 'mode']: thresholdMode } }
,
setTooltip(
shared=true,
sort=2,
):: self {}
+ { tooltip+: { [if shared != null then 'shared']: shared } }
+ { tooltip+: { [if sort != null then 'sort']: sort } }
,
setXaxis(
buckets=null,
mode='time',
name=null,
show=true,
):: self {}
+ { xaxis+: { [if buckets != null then 'buckets']: buckets } }
+ { xaxis+: { [if mode != null then 'mode']: mode } }
+ { xaxis+: { [if name != null then 'name']: name } }
+ { xaxis+: { [if show != null then 'show']: show } }
,
setYaxis(
align=false,
alignLevel=0,
):: self {}
+ { yaxis+: { [if align != null then 'align']: align } }
+ { yaxis+: { [if alignLevel != null then 'alignLevel']: alignLevel } }
,
addDataLink(
targetBlank=true,
title=null,
url=null,
):: self {}
+ { options+: { dataLinks+: [
{
[if targetBlank != null then 'targetBlank']: targetBlank,
[if title != null then 'title']: title,
[if url != null then 'url']: url,
},
] } },
addPanelLink(
targetBlank=true,
title=null,
url=null,
):: self {}
+ { links+: [
{
[if targetBlank != null then 'targetBlank']: targetBlank,
[if title != null then 'title']: title,
[if url != null then 'url']: url,
},
] },
addOverride(
matcher=null,
properties=null,
):: self {}
+ { fieldConfig+: { overrides+: [
{
[if matcher != null then 'matcher']: matcher,
[if properties != null then 'properties']: properties,
},
] } },
addSeriesOverride(
alias=null,
bars=null,
color=null,
dashLength=null,
dashes=null,
fill=null,
fillBelowTo=null,
fillGradient=null,
hiddenSeries=null,
hideTooltip=null,
legend=null,
lines=null,
linewidth=null,
nullPointMode=null,
pointradius=null,
points=null,
spaceLength=null,
stack=null,
steppedLine=null,
transform=null,
yaxis=null,
zindex=null,
):: self {}
+ { seriesOverrides+: [
{
[if alias != null then 'alias']: alias,
[if bars != null then 'bars']: bars,
[if color != null then 'color']: color,
[if dashLength != null then 'dashLength']: dashLength,
[if dashes != null then 'dashes']: dashes,
[if fill != null then 'fill']: fill,
[if fillBelowTo != null then 'fillBelowTo']: fillBelowTo,
[if fillGradient != null then 'fillGradient']: fillGradient,
[if hiddenSeries != null then 'hiddenSeries']: hiddenSeries,
[if hideTooltip != null then 'hideTooltip']: hideTooltip,
[if legend != null then 'legend']: legend,
[if lines != null then 'lines']: lines,
[if linewidth != null then 'linewidth']: linewidth,
[if nullPointMode != null then 'nullPointMode']: nullPointMode,
[if pointradius != null then 'pointradius']: pointradius,
[if points != null then 'points']: points,
[if spaceLength != null then 'spaceLength']: spaceLength,
[if stack != null then 'stack']: stack,
[if steppedLine != null then 'steppedLine']: steppedLine,
[if transform != null then 'transform']: transform,
[if yaxis != null then 'yaxis']: yaxis,
[if zindex != null then 'zindex']: zindex,
},
] },
addThresholdStep(
color=null,
value=null,
):: self {}
+ { thresholds+: { steps+: [
{
[if color != null then 'color']: color,
[if value != null then 'value']: value,
},
] } },
addTarget(
target
):: self {}
+ { targets+: [
target,
] },
addYaxis(
decimals=null,
format='short',
label=null,
logBase=1,
max=null,
min=null,
show=true,
):: self {}
+ { yaxes+: [
{
[if decimals != null then 'decimals']: decimals,
[if format != null then 'format']: format,
[if label != null then 'label']: label,
[if logBase != null then 'logBase']: logBase,
[if max != null then 'max']: max,
[if min != null then 'min']: min,
[if show != null then 'show']: show,
},
] },
},
}

View file

@ -0,0 +1,45 @@
// This file was generated by https://github.com/grafana/dashboard-spec
{
new(
collapse=true,
collapsed=true,
datasource=null,
repeat=null,
repeatIteration=null,
showTitle=true,
title=null,
titleSize='h6',
):: {
[if collapse != null then 'collapse']: collapse,
[if collapsed != null then 'collapsed']: collapsed,
[if datasource != null then 'datasource']: datasource,
[if repeat != null then 'repeat']: repeat,
[if repeatIteration != null then 'repeatIteration']: repeatIteration,
[if showTitle != null then 'showTitle']: showTitle,
[if title != null then 'title']: title,
[if titleSize != null then 'titleSize']: titleSize,
type: 'row',
setGridPos(
h=8,
w=12,
x=null,
y=null,
):: self {}
+ { gridPos+: { [if h != null then 'h']: h } }
+ { gridPos+: { [if w != null then 'w']: w } }
+ { gridPos+: { [if x != null then 'x']: x } }
+ { gridPos+: { [if y != null then 'y']: y } }
,
addPanel(
panel
):: self {}
+ { panels+: [
panel,
] },
},
}

View file

@ -0,0 +1,142 @@
// This file was generated by https://github.com/grafana/dashboard-spec
{
new(
datasource='default',
description=null,
repeat=null,
repeatDirection=null,
title=null,
transparent=false,
):: {
[if datasource != null then 'datasource']: datasource,
[if description != null then 'description']: description,
[if repeat != null then 'repeat']: repeat,
[if repeatDirection != null then 'repeatDirection']: repeatDirection,
[if title != null then 'title']: title,
[if transparent != null then 'transparent']: transparent,
type: 'stat',
setFieldConfig(
max=null,
min=null,
thresholdMode='absolute',
unit=null,
):: self {}
+ { fieldConfig+: { defaults+: { [if max != null then 'max']: max } } }
+ { fieldConfig+: { defaults+: { [if min != null then 'min']: min } } }
+ { fieldConfig+: { defaults+: { thresholds+: { [if thresholdMode != null then 'mode']: thresholdMode } } } }
+ { fieldConfig+: { defaults+: { [if unit != null then 'unit']: unit } } }
,
setGridPos(
h=8,
w=12,
x=null,
y=null,
):: self {}
+ { gridPos+: { [if h != null then 'h']: h } }
+ { gridPos+: { [if w != null then 'w']: w } }
+ { gridPos+: { [if x != null then 'x']: x } }
+ { gridPos+: { [if y != null then 'y']: y } }
,
setOptions(
calcs=['mean'],
colorMode='value',
fields=null,
graphMode='none',
justifyMode='auto',
orientation='auto',
textMode='auto',
values=false,
):: self {}
+ { options+: { reduceOptions+: { [if calcs != null then 'calcs']: calcs } } }
+ { options+: { [if colorMode != null then 'colorMode']: colorMode } }
+ { options+: { reduceOptions+: { [if fields != null then 'fields']: fields } } }
+ { options+: { [if graphMode != null then 'graphMode']: graphMode } }
+ { options+: { [if justifyMode != null then 'justifyMode']: justifyMode } }
+ { options+: { [if orientation != null then 'orientation']: orientation } }
+ { options+: { [if textMode != null then 'textMode']: textMode } }
+ { options+: { reduceOptions+: { [if values != null then 'values']: values } } }
,
addPanelLink(
targetBlank=true,
title=null,
url=null,
):: self {}
+ { links+: [
{
[if targetBlank != null then 'targetBlank']: targetBlank,
[if title != null then 'title']: title,
[if url != null then 'url']: url,
},
] },
addDataLink(
targetBlank=true,
title=null,
url=null,
):: self {}
+ { fieldConfig+: { defaults+: { links+: [
{
[if targetBlank != null then 'targetBlank']: targetBlank,
[if title != null then 'title']: title,
[if url != null then 'url']: url,
},
] } } },
addMapping(
from=null,
id=null,
operator=null,
text=null,
to=null,
type=null,
value=null,
):: self {}
+ { fieldConfig+: { defaults+: { mappings+: [
{
[if from != null then 'from']: from,
[if id != null then 'id']: id,
[if operator != null then 'operator']: operator,
[if text != null then 'text']: text,
[if to != null then 'to']: to,
[if type != null then 'type']: type,
[if value != null then 'value']: value,
},
] } } },
addOverride(
matcher=null,
properties=null,
):: self {}
+ { fieldConfig+: { overrides+: [
{
[if matcher != null then 'matcher']: matcher,
[if properties != null then 'properties']: properties,
},
] } },
addThresholdStep(
color=null,
value=null,
):: self {}
+ { fieldConfig+: { defaults+: { thresholds+: { steps+: [
{
[if color != null then 'color']: color,
[if value != null then 'value']: value,
},
] } } } },
addTarget(
target
):: self {}
+ { targets+: [
target,
] },
},
}

View file

@ -0,0 +1,134 @@
// This file was generated by https://github.com/grafana/dashboard-spec
{
new(
datasource='default',
description=null,
repeat=null,
repeatDirection=null,
title=null,
transparent=false,
):: {
[if datasource != null then 'datasource']: datasource,
[if description != null then 'description']: description,
[if repeat != null then 'repeat']: repeat,
[if repeatDirection != null then 'repeatDirection']: repeatDirection,
[if title != null then 'title']: title,
[if transparent != null then 'transparent']: transparent,
type: 'table',
setFieldConfig(
displayName=null,
max=null,
min=null,
thresholdMode='absolute',
noValue=null,
unit='short',
width=null,
):: self {}
+ { fieldConfig+: { defaults+: { [if displayName != null then 'displayName']: displayName } } }
+ { fieldConfig+: { defaults+: { [if max != null then 'max']: max } } }
+ { fieldConfig+: { defaults+: { [if min != null then 'min']: min } } }
+ { fieldConfig+: { defaults+: { thresholds+: { [if thresholdMode != null then 'mode']: thresholdMode } } } }
+ { fieldConfig+: { defaults+: { [if noValue != null then 'noValue']: noValue } } }
+ { fieldConfig+: { defaults+: { [if unit != null then 'unit']: unit } } }
+ { fieldConfig+: { defaults+: { custom+: { [if width != null then 'width']: width } } } }
,
setGridPos(
h=8,
w=12,
x=null,
y=null,
):: self {}
+ { gridPos+: { [if h != null then 'h']: h } }
+ { gridPos+: { [if w != null then 'w']: w } }
+ { gridPos+: { [if x != null then 'x']: x } }
+ { gridPos+: { [if y != null then 'y']: y } }
,
setOptions(
showHeader=true,
):: self {}
+ { options+: { [if showHeader != null then 'showHeader']: showHeader } }
,
addDataLink(
targetBlank=true,
title=null,
url=null,
):: self {}
+ { fieldConfig+: { defaults+: { links+: [
{
[if targetBlank != null then 'targetBlank']: targetBlank,
[if title != null then 'title']: title,
[if url != null then 'url']: url,
},
] } } },
addPanelLink(
targetBlank=true,
title=null,
url=null,
):: self {}
+ { links+: [
{
[if targetBlank != null then 'targetBlank']: targetBlank,
[if title != null then 'title']: title,
[if url != null then 'url']: url,
},
] },
addMapping(
from=null,
id=null,
operator=null,
text=null,
to=null,
type=null,
value=null,
):: self {}
+ { fieldConfig+: { defaults+: { mappings+: [
{
[if from != null then 'from']: from,
[if id != null then 'id']: id,
[if operator != null then 'operator']: operator,
[if text != null then 'text']: text,
[if to != null then 'to']: to,
[if type != null then 'type']: type,
[if value != null then 'value']: value,
},
] } } },
addOverride(
matcher=null,
properties=null,
):: self {}
+ { fieldConfig+: { overrides+: [
{
[if matcher != null then 'matcher']: matcher,
[if properties != null then 'properties']: properties,
},
] } },
addThresholdStep(
color=null,
value=null,
):: self {}
+ { fieldConfig+: { defaults+: { thresholds+: { steps+: [
{
[if color != null then 'color']: color,
[if value != null then 'value']: value,
},
] } } } },
addTarget(
target
):: self {}
+ { targets+: [
target,
] },
},
}

View file

@ -0,0 +1,58 @@
// This file was generated by https://github.com/grafana/dashboard-spec
{
new(
content=null,
datasource='default',
description=null,
mode='markdown',
repeat=null,
repeatDirection=null,
title=null,
transparent=false,
):: {
[if content != null then 'content']: content,
[if datasource != null then 'datasource']: datasource,
[if description != null then 'description']: description,
[if mode != null then 'mode']: mode,
[if repeat != null then 'repeat']: repeat,
[if repeatDirection != null then 'repeatDirection']: repeatDirection,
[if title != null then 'title']: title,
[if transparent != null then 'transparent']: transparent,
type: 'text',
setGridPos(
h=8,
w=12,
x=null,
y=null,
):: self {}
+ { gridPos+: { [if h != null then 'h']: h } }
+ { gridPos+: { [if w != null then 'w']: w } }
+ { gridPos+: { [if x != null then 'x']: x } }
+ { gridPos+: { [if y != null then 'y']: y } }
,
addPanelLink(
targetBlank=true,
title=null,
url=null,
):: self {}
+ { links+: [
{
[if targetBlank != null then 'targetBlank']: targetBlank,
[if title != null then 'title']: title,
[if url != null then 'url']: url,
},
] },
addTarget(
target
):: self {}
+ { targets+: [
target,
] },
},
}

View file

@ -0,0 +1,21 @@
// This file was generated by https://github.com/grafana/dashboard-spec
{
new(
datasource='default',
expr=null,
format='time_series',
instant=null,
interval=null,
intervalFactor=null,
legendFormat=null,
):: {
[if datasource != null then 'datasource']: datasource,
[if expr != null then 'expr']: expr,
[if format != null then 'format']: format,
[if instant != null then 'instant']: instant,
[if interval != null then 'interval']: interval,
[if intervalFactor != null then 'intervalFactor']: intervalFactor,
[if legendFormat != null then 'legendFormat']: legendFormat,
},
}

View file

@ -0,0 +1,36 @@
// This file was generated by https://github.com/grafana/dashboard-spec
{
new(
allValue=null,
hide=0,
includeAll=false,
label=null,
multi=false,
name=null,
query=null,
queryValue='',
skipUrlSync=false,
):: {
[if allValue != null then 'allValue']: allValue,
[if hide != null then 'hide']: hide,
[if includeAll != null then 'includeAll']: includeAll,
[if label != null then 'label']: label,
[if multi != null then 'multi']: multi,
[if name != null then 'name']: name,
[if query != null then 'query']: query,
[if queryValue != null then 'queryValue']: queryValue,
[if skipUrlSync != null then 'skipUrlSync']: skipUrlSync,
type: 'custom',
setCurrent(
selected=false,
text=null,
value=null,
):: self {}
+ { current+: { [if selected != null then 'selected']: selected } }
+ { current+: { [if text != null then 'text']: text } }
+ { current+: { [if value != null then 'value']: value } },
},
}

View file

@ -0,0 +1,36 @@
// This file was generated by https://github.com/grafana/dashboard-spec
{
new(
hide=0,
includeAll=false,
label=null,
multi=false,
name=null,
query=null,
refresh=1,
regex=null,
skipUrlSync=false,
):: {
[if hide != null then 'hide']: hide,
[if includeAll != null then 'includeAll']: includeAll,
[if label != null then 'label']: label,
[if multi != null then 'multi']: multi,
[if name != null then 'name']: name,
[if query != null then 'query']: query,
[if refresh != null then 'refresh']: refresh,
[if regex != null then 'regex']: regex,
[if skipUrlSync != null then 'skipUrlSync']: skipUrlSync,
type: 'datasource',
setCurrent(
selected=false,
text=null,
value=null,
):: self {}
+ { current+: { [if selected != null then 'selected']: selected } }
+ { current+: { [if text != null then 'text']: text } }
+ { current+: { [if value != null then 'value']: value } },
},
}

View file

@ -0,0 +1,52 @@
// This file was generated by https://github.com/grafana/dashboard-spec
{
new(
allValue=null,
datasource=null,
definition=null,
hide=0,
includeAll=false,
label=null,
multi=false,
name=null,
query=null,
refresh=0,
regex=null,
skipUrlSync=false,
sort=0,
tagValuesQuery=null,
tags=null,
tagsQuery=null,
useTags=false,
):: {
[if allValue != null then 'allValue']: allValue,
[if datasource != null then 'datasource']: datasource,
[if definition != null then 'definition']: definition,
[if hide != null then 'hide']: hide,
[if includeAll != null then 'includeAll']: includeAll,
[if label != null then 'label']: label,
[if multi != null then 'multi']: multi,
[if name != null then 'name']: name,
[if query != null then 'query']: query,
[if refresh != null then 'refresh']: refresh,
[if regex != null then 'regex']: regex,
[if skipUrlSync != null then 'skipUrlSync']: skipUrlSync,
[if sort != null then 'sort']: sort,
[if tagValuesQuery != null then 'tagValuesQuery']: tagValuesQuery,
[if tags != null then 'tags']: tags,
[if tagsQuery != null then 'tagsQuery']: tagsQuery,
[if useTags != null then 'useTags']: useTags,
type: 'query',
setCurrent(
selected=null,
text=null,
value=null,
):: self {}
+ { current+: { [if selected != null then 'selected']: selected } }
+ { current+: { [if text != null then 'text']: text } }
+ { current+: { [if value != null then 'value']: value } },
},
}

View file

@ -0,0 +1,47 @@
{
/**
* Returns a new condition of alert of graph panel.
* Currently the only condition type that exists is a Query condition
* that allows to specify a query letter, time range and an aggregation function.
*
* @name alertCondition.new
*
* @param evaluatorParams Value of threshold
* @param evaluatorType Type of threshold
* @param operatorType Operator between conditions
* @param queryRefId The letter defines what query to execute from the Metrics tab
* @param queryTimeStart Begging of time range
* @param queryTimeEnd End of time range
* @param reducerParams Params of an aggregation function
* @param reducerType Name of an aggregation function
*
* @return A json that represents a condition of alert
*/
new(
evaluatorParams=[],
evaluatorType='gt',
operatorType='and',
queryRefId='A',
queryTimeEnd='now',
queryTimeStart='5m',
reducerParams=[],
reducerType='avg',
)::
{
evaluator: {
params: if std.type(evaluatorParams) == 'array' then evaluatorParams else [evaluatorParams],
type: evaluatorType,
},
operator: {
type: operatorType,
},
query: {
params: [queryRefId, queryTimeStart, queryTimeEnd],
},
reducer: {
params: if std.type(reducerParams) == 'array' then reducerParams else [reducerParams],
type: reducerType,
},
type: 'query',
},
}

View file

@ -0,0 +1,43 @@
{
/**
* Creates an [Alert list panel](https://grafana.com/docs/grafana/latest/panels/visualizations/alert-list-panel/)
*
* @name alertlist.new
*
* @param title (default `''`)
* @param span (optional)
* @param show (default `'current'`) Whether the panel should display the current alert state or recent alert state changes.
* @param limit (default `10`) Sets the maximum number of alerts to list.
* @param sortOrder (default `'1'`) '1': alerting, '2': no_data, '3': pending, '4': ok, '5': paused
* @param stateFilter (optional)
* @param onlyAlertsOnDashboard (optional) Shows alerts only from the dashboard the alert list is in
* @param transparent (optional) Whether to display the panel without a background
* @param description (optional)
* @param datasource (optional)
*/
new(
title='',
span=null,
show='current',
limit=10,
sortOrder=1,
stateFilter=[],
onlyAlertsOnDashboard=true,
transparent=null,
description=null,
datasource=null,
)::
{
[if transparent != null then 'transparent']: transparent,
title: title,
[if span != null then 'span']: span,
type: 'alertlist',
show: show,
limit: limit,
sortOrder: sortOrder,
[if show != 'changes' then 'stateFilter']: stateFilter,
onlyAlertsOnDashboard: onlyAlertsOnDashboard,
[if description != null then 'description']: description,
datasource: datasource,
},
}

View file

@ -0,0 +1,40 @@
{
default::
{
builtIn: 1,
datasource: '-- Grafana --',
enable: true,
hide: true,
iconColor: 'rgba(0, 211, 255, 1)',
name: 'Annotations & Alerts',
type: 'dashboard',
},
/**
* @name annotation.datasource
*/
datasource(
name,
datasource,
expr=null,
enable=true,
hide=false,
iconColor='rgba(255, 96, 96, 1)',
tags=[],
type='tags',
builtIn=null,
)::
{
datasource: datasource,
enable: enable,
[if expr != null then 'expr']: expr,
hide: hide,
iconColor: iconColor,
name: name,
showIn: 0,
tags: tags,
type: type,
[if builtIn != null then 'builtIn']: builtIn,
},
}

View file

@ -0,0 +1,47 @@
{
/**
* Create a [bar gauge panel](https://grafana.com/docs/grafana/latest/panels/visualizations/bar-gauge-panel/),
*
* @name barGaugePanel.new
*
* @param title Panel title.
* @param description (optional) Panel description.
* @param datasource (optional) Panel datasource.
* @param unit (optional) The unit of the data.
* @param thresholds (optional) An array of threashold values.
*
* @method addTarget(target) Adds a target object.
* @method addTargets(targets) Adds an array of targets.
*/
new(
title,
description=null,
datasource=null,
unit=null,
thresholds=[],
):: {
type: 'bargauge',
title: title,
[if description != null then 'description']: description,
datasource: datasource,
targets: [
],
fieldConfig: {
defaults: {
unit: unit,
thresholds: {
mode: 'absolute',
steps: thresholds,
},
},
},
_nextTarget:: 0,
addTarget(target):: self {
// automatically ref id in added targets.
local nextTarget = super._nextTarget,
_nextTarget: nextTarget + 1,
targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
},
addTargets(targets):: std.foldl(function(p, t) p.addTarget(t), targets, self),
},
}

View file

@ -0,0 +1,57 @@
{
/**
* Creates a [Google Cloud Monitoring target](https://grafana.com/docs/grafana/latest/datasources/google-cloud-monitoring/)
*
* @name cloudmonitoring.target
*
* @param metric
* @param project
* @param filters (optional)
* @param groupBys (optional)
* @param period (default: `'cloud-monitoring-auto'`)
* @param crossSeriesReducer (default 'REDUCE_MAX')
* @param valueType (default 'INT64')
* @param perSeriesAligner (default 'ALIGN_DELTA')
* @param metricKind (default 'CUMULATIVE')
* @param unit (optional)
* @param alias (optional)
* @return Panel target
*/
target(
metric,
project,
filters=[],
groupBys=[],
period='cloud-monitoring-auto',
crossSeriesReducer='REDUCE_MAX',
valueType='INT64',
perSeriesAligner='ALIGN_DELTA',
metricKind='CUMULATIVE',
unit=1,
alias=null,
):: {
metricQuery: {
[if alias != null then 'aliasBy']: alias,
alignmentPeriod: period,
crossSeriesReducer: crossSeriesReducer,
[if filters != null then 'filters']: filters,
[if groupBys != null then 'groupBys']: groupBys,
metricKind: metricKind,
metricType: metric,
perSeriesAligner: perSeriesAligner,
projectName: project,
unit: unit,
valueType: valueType,
},
sloQuery: {
[if alias != null then 'aliasBy']: alias,
alignmentPeriod: period,
projectName: project,
selectorName: 'select_slo_health',
serviceId: '',
sloId: '',
},
},
}

View file

@ -0,0 +1,51 @@
{
/**
* Creates a [CloudWatch target](https://grafana.com/docs/grafana/latest/datasources/cloudwatch/)
*
* @name cloudwatch.target
*
* @param region
* @param namespace
* @param metric
* @param datasource (optional)
* @param statistic (default: `'Average'`)
* @param alias (optional)
* @param highResolution (default: `false`)
* @param period (default: `'auto'`)
* @param dimensions (optional)
* @param id (optional)
* @param expression (optional)
* @param hide (optional)
* @return Panel target
*/
target(
region,
namespace,
metric,
datasource=null,
statistic='Average',
alias=null,
highResolution=false,
period='auto',
dimensions={},
id=null,
expression=null,
hide=null
):: {
region: region,
namespace: namespace,
metricName: metric,
[if datasource != null then 'datasource']: datasource,
statistics: [statistic],
[if alias != null then 'alias']: alias,
highResolution: highResolution,
period: period,
dimensions: dimensions,
[if id != null then 'id']: id,
[if expression != null then 'expression']: expression,
[if hide != null then 'hide']: hide,
},
}

View file

@ -0,0 +1,181 @@
local timepickerlib = import 'timepicker.libsonnet';
{
/**
* Creates a [dashboard](https://grafana.com/docs/grafana/latest/features/dashboard/dashboards/)
*
* @name dashboard.new
*
* @param title The title of the dashboard
* @param editable (default: `false`) Whether the dashboard is editable via Grafana UI.
* @param style (default: `'dark'`) Theme of dashboard, `'dark'` or `'light'`
* @param tags (optional) Array of tags associated to the dashboard, e.g.`['tag1','tag2']`
* @param time_from (default: `'now-6h'`)
* @param time_to (default: `'now'`)
* @param timezone (default: `'browser'`) Timezone of the dashboard, `'utc'` or `'browser'`
* @param refresh (default: `''`) Auto-refresh interval, e.g. `'30s'`
* @param timepicker (optional) See timepicker API
* @param graphTooltip (default: `'default'`) `'default'` : no shared crosshair or tooltip (0), `'shared_crosshair'`: shared crosshair (1), `'shared_tooltip'`: shared crosshair AND shared tooltip (2)
* @param hideControls (default: `false`)
* @param schemaVersion (default: `14`) Version of the Grafana JSON schema, incremented each time an update brings changes. `26` for Grafana 7.1.5, `22` for Grafana 6.7.4, `16` for Grafana 5.4.5, `14` for Grafana 4.6.3. etc.
* @param uid (default: `''`) Unique dashboard identifier as a string (8-40), that can be chosen by users. Used to identify a dashboard to update when using Grafana REST API.
* @param description (optional)
*
* @method addTemplate(template) Add a template variable
* @method addTemplates(templates) Adds an array of template variables
* @method addAnnotation(annotation) Add an [annotation](https://grafana.com/docs/grafana/latest/dashboards/annotations/)
* @method addPanel(panel,gridPos) Appends a panel, with an optional grid position in grid coordinates, e.g. `gridPos={'x':0, 'y':0, 'w':12, 'h': 9}`
* @method addPanels(panels) Appends an array of panels
* @method addLink(link) Adds a [dashboard link](https://grafana.com/docs/grafana/latest/linking/dashboard-links/)
* @method addLinks(dashboardLink) Adds an array of [dashboard links](https://grafana.com/docs/grafana/latest/linking/dashboard-links/)
* @method addRequired(type, name, id, version)
* @method addInput(name, label, type, pluginId, pluginName, description, value)
* @method addRow(row) Adds a row. This is the legacy row concept from Grafana < 5, when rows were needed for layout. Rows should now be added via `addPanel`.
*/
new(
title,
editable=false,
style='dark',
tags=[],
time_from='now-6h',
time_to='now',
timezone='browser',
refresh='',
timepicker=timepickerlib.new(),
graphTooltip='default',
hideControls=false,
schemaVersion=14,
uid='',
description=null,
):: {
local it = self,
_annotations:: [],
[if uid != '' then 'uid']: uid,
editable: editable,
[if description != null then 'description']: description,
gnetId: null,
graphTooltip:
if graphTooltip == 'shared_tooltip' then 2
else if graphTooltip == 'shared_crosshair' then 1
else if graphTooltip == 'default' then 0
else graphTooltip,
hideControls: hideControls,
id: null,
links: [],
panels:: [],
refresh: refresh,
rows: [],
schemaVersion: schemaVersion,
style: style,
tags: tags,
time: {
from: time_from,
to: time_to,
},
timezone: timezone,
timepicker: timepicker,
title: title,
version: 0,
addAnnotations(annotations):: self {
_annotations+:: annotations,
},
addAnnotation(a):: self.addAnnotations([a]),
addTemplates(templates):: self {
templates+: templates,
},
addTemplate(t):: self.addTemplates([t]),
templates:: [],
annotations: { list: it._annotations },
templating: { list: it.templates },
_nextPanel:: 2,
addRow(row)::
self {
// automatically number panels in added rows.
// https://github.com/kausalco/public/blob/master/klumps/grafana.libsonnet
local n = std.length(row.panels),
local nextPanel = super._nextPanel,
local panels = std.makeArray(n, function(i)
row.panels[i] { id: nextPanel + i }),
_nextPanel: nextPanel + n,
rows+: [row { panels: panels }],
},
addPanels(newpanels)::
self {
// automatically number panels in added rows.
// https://github.com/kausalco/public/blob/master/klumps/grafana.libsonnet
local n = std.foldl(function(numOfPanels, p)
(if 'panels' in p then
numOfPanels + 1 + std.length(p.panels)
else
numOfPanels + 1), newpanels, 0),
local nextPanel = super._nextPanel,
local _panels = std.makeArray(
std.length(newpanels), function(i)
newpanels[i] {
id: nextPanel + (
if i == 0 then
0
else
if 'panels' in _panels[i - 1] then
(_panels[i - 1].id - nextPanel) + 1 + std.length(_panels[i - 1].panels)
else
(_panels[i - 1].id - nextPanel) + 1
),
[if 'panels' in newpanels[i] then 'panels']: std.makeArray(
std.length(newpanels[i].panels), function(j)
newpanels[i].panels[j] {
id: 1 + j +
nextPanel + (
if i == 0 then
0
else
if 'panels' in _panels[i - 1] then
(_panels[i - 1].id - nextPanel) + 1 + std.length(_panels[i - 1].panels)
else
(_panels[i - 1].id - nextPanel) + 1
),
}
),
}
),
_nextPanel: nextPanel + n,
panels+::: _panels,
},
addPanel(panel, gridPos):: self.addPanels([panel { gridPos: gridPos }]),
addRows(rows):: std.foldl(function(d, row) d.addRow(row), rows, self),
addLink(link):: self {
links+: [link],
},
addLinks(dashboardLinks):: std.foldl(function(d, t) d.addLink(t), dashboardLinks, self),
required:: [],
__requires: it.required,
addRequired(type, name, id, version):: self {
required+: [{ type: type, name: name, id: id, version: version }],
},
inputs:: [],
__inputs: it.inputs,
addInput(
name,
label,
type,
pluginId=null,
pluginName=null,
description='',
value=null,
):: self {
inputs+: [{
name: name,
label: label,
type: type,
[if pluginId != null then 'pluginId']: pluginId,
[if pluginName != null then 'pluginName']: pluginName,
[if value != null then 'value']: value,
description: description,
}],
},
},
}

View file

@ -0,0 +1,41 @@
{
/**
* Creates a [dashlist panel](https://grafana.com/docs/grafana/latest/panels/visualizations/dashboard-list-panel/).
* It requires the dashlist panel plugin in grafana, which is built-in.
*
* @name dashlist.new
*
* @param title The title of the dashlist panel.
* @param description (optional) Description of the panel
* @param query (optional) Query to search by
* @param tags (optional) Array of tag(s) to search by
* @param recent (default `true`) Displays recently viewed dashboards
* @param search (default `false`) Description of the panel
* @param starred (default `false`) Displays starred dashboards
* @param headings (default `true`) Chosen list selection(starred, recently Viewed, search) is shown as a heading
* @param limit (default `10`) Set maximum items in a list
* @return A json that represents a dashlist panel
*/
new(
title,
description=null,
query=null,
tags=[],
recent=true,
search=false,
starred=false,
headings=true,
limit=10,
):: {
type: 'dashlist',
title: title,
query: if query != null then query else '',
tags: tags,
recent: recent,
search: search,
starred: starred,
headings: headings,
limit: limit,
[if description != null then 'description']: description,
},
}

View file

@ -0,0 +1,51 @@
{
/**
* Creates an [Elasticsearch target](https://grafana.com/docs/grafana/latest/datasources/elasticsearch/)
*
* @name elasticsearch.target
*
* @param query
* @param timeField
* @param id (optional)
* @param datasource (optional)
* @param metrics (optional)
* @param bucketAggs (optional)
* @param alias (optional)
*/
target(
query,
timeField,
id=null,
datasource=null,
metrics=[{
field: 'value',
id: null,
type: 'percentiles',
settings: {
percents: [
'90',
],
},
}],
bucketAggs=[{
field: 'timestamp',
id: null,
type: 'date_histogram',
settings: {
interval: '1s',
min_doc_count: 0,
trimEdges: 0,
},
}],
alias=null,
):: {
[if datasource != null then 'datasource']: datasource,
query: query,
id: id,
timeField: timeField,
bucketAggs: bucketAggs,
metrics: metrics,
alias: alias,
// TODO: generate bucket ids
},
}

View file

@ -0,0 +1,211 @@
{
/**
* Creates a [gauge panel](https://grafana.com/docs/grafana/latest/panels/visualizations/gauge-panel/).
*
* @name gaugePanel.new
*
* @param title Panel title.
* @param description (optional) Panel description.
* @param transparent (default `false`) Whether to display the panel without a background.
* @param datasource (optional) Panel datasource.
* @param allValues (default `false`) Show all values instead of reducing to one.
* @param valueLimit (optional) Limit of values in all values mode.
* @param reducerFunction (default `'mean'`) Function to use to reduce values to when using single value.
* @param fields (default `''`) Fields that should be included in the panel.
* @param showThresholdLabels (default `false`) Render the threshold values around the gauge bar.
* @param showThresholdMarkers (default `true`) Render the thresholds as an outer bar.
* @param unit (default `'percent'`) Panel unit field option.
* @param min (optional) Leave empty to calculate based on all values.
* @param max (optional) Leave empty to calculate based on all values.
* @param decimals Number of decimal places to show.
* @param displayName Change the field or series name.
* @param noValue (optional) What to show when there is no value.
* @param thresholdsMode (default `'absolute'`) 'absolute' or 'percentage'.
* @param repeat (optional) Name of variable that should be used to repeat this panel.
* @param repeatDirection (default `'h'`) 'h' for horizontal or 'v' for vertical.
* @param repeatMaxPerRow (optional) Maximum panels per row in repeat mode.
* @param pluginVersion (default `'7'`) Plugin version the panel should be modeled for. This has been tested with the default, '7', and '6.7'.
*
* @method addTarget(target) Adds a target object.
* @method addTargets(targets) Adds an array of targets.
* @method addLink(link) Adds a [panel link](https://grafana.com/docs/grafana/latest/linking/panel-links/). Argument format: `{ title: 'Link Title', url: 'https://...', targetBlank: true }`.
* @method addLinks(links) Adds an array of links.
* @method addThreshold(step) Adds a threshold step. Argument format: `{ color: 'green', value: 0 }`.
* @method addThresholds(steps) Adds an array of threshold steps.
* @method addMapping(mapping) Adds a value mapping.
* @method addMappings(mappings) Adds an array of value mappings.
* @method addDataLink(link) Adds a data link.
* @method addDataLinks(links) Adds an array of data links.
* @param timeFrom (optional)
*/
new(
title,
description=null,
transparent=false,
datasource=null,
allValues=false,
valueLimit=null,
reducerFunction='mean',
fields='',
showThresholdLabels=false,
showThresholdMarkers=true,
unit='percent',
min=0,
max=100,
decimals=null,
displayName=null,
noValue=null,
thresholdsMode='absolute',
repeat=null,
repeatDirection='h',
repeatMaxPerRow=null,
timeFrom=null,
pluginVersion='7',
):: {
type: 'gauge',
title: title,
[if description != null then 'description']: description,
transparent: transparent,
datasource: datasource,
targets: [],
links: [],
[if repeat != null then 'repeat']: repeat,
[if repeat != null then 'repeatDirection']: repeatDirection,
[if repeat != null then 'repeatMaxPerRow']: repeatMaxPerRow,
[if timeFrom != null then 'timeFrom']: timeFrom,
// targets
_nextTarget:: 0,
addTarget(target):: self {
local nextTarget = super._nextTarget,
_nextTarget: nextTarget + 1,
targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
},
addTargets(targets):: std.foldl(function(p, t) p.addTarget(t), targets, self),
// links
addLink(link):: self {
links+: [link],
},
addLinks(links):: std.foldl(function(p, l) p.addLink(l), links, self),
pluginVersion: pluginVersion,
} + (
if pluginVersion >= '7' then {
options: {
reduceOptions: {
values: allValues,
[if allValues && valueLimit != null then 'limit']: valueLimit,
calcs: [
reducerFunction,
],
fields: fields,
},
showThresholdLabels: showThresholdLabels,
showThresholdMarkers: showThresholdMarkers,
},
fieldConfig: {
defaults: {
unit: unit,
[if min != null then 'min']: min,
[if max != null then 'max']: max,
[if decimals != null then 'decimals']: decimals,
[if displayName != null then 'displayName']: displayName,
[if noValue != null then 'noValue']: noValue,
thresholds: {
mode: thresholdsMode,
steps: [],
},
mappings: [],
links: [],
},
},
// thresholds
addThreshold(step):: self {
fieldConfig+: { defaults+: { thresholds+: { steps+: [step] } } },
},
// mappings
_nextMapping:: 0,
addMapping(mapping):: self {
local nextMapping = super._nextMapping,
_nextMapping: nextMapping + 1,
fieldConfig+: { defaults+: { mappings+: [mapping { id: nextMapping }] } },
},
// data links
addDataLink(link):: self {
fieldConfig+: { defaults+: { links+: [link] } },
},
// Overrides
addOverride(
matcher=null,
properties=null,
):: self {
fieldConfig+: {
overrides+: [
{
[if matcher != null then 'matcher']: matcher,
[if properties != null then 'properties']: properties,
},
],
},
},
addOverrides(overrides):: std.foldl(function(p, o) p.addOverride(o.matcher, o.properties), overrides, self),
} else {
options: {
fieldOptions: {
values: allValues,
[if allValues && valueLimit != null then 'limit']: valueLimit,
calcs: [
reducerFunction,
],
fields: fields,
defaults: {
unit: unit,
[if min != null then 'min']: min,
[if max != null then 'max']: max,
[if decimals != null then 'decimals']: decimals,
[if displayName != null then 'displayName']: displayName,
[if noValue != null then 'noValue']: noValue,
thresholds: {
mode: thresholdsMode,
steps: [],
},
mappings: [],
links: [],
},
},
showThresholdLabels: showThresholdLabels,
showThresholdMarkers: showThresholdMarkers,
},
// thresholds
addThreshold(step):: self {
options+: { fieldOptions+: { defaults+: { thresholds+: { steps+: [step] } } } },
},
// mappings
_nextMapping:: 0,
addMapping(mapping):: self {
local nextMapping = super._nextMapping,
_nextMapping: nextMapping + 1,
options+: { fieldOptions+: { defaults+: { mappings+: [mapping { id: nextMapping }] } } },
},
// data links
addDataLink(link):: self {
options+: { fieldOptions+: { defaults+: { links+: [link] } } },
},
}
) + {
addThresholds(steps):: std.foldl(function(p, s) p.addThreshold(s), steps, self),
addMappings(mappings):: std.foldl(function(p, m) p.addMapping(m), mappings, self),
addDataLinks(links):: std.foldl(function(p, l) p.addDataLink(l), links, self),
},
}

View file

@ -0,0 +1,32 @@
{
alertlist:: import 'alertlist.libsonnet',
dashboard:: import 'dashboard.libsonnet',
template:: import 'template.libsonnet',
text:: import 'text.libsonnet',
timepicker:: import 'timepicker.libsonnet',
row:: import 'row.libsonnet',
link:: import 'link.libsonnet',
annotation:: import 'annotation.libsonnet',
graphPanel:: import 'graph_panel.libsonnet',
logPanel:: import 'log_panel.libsonnet',
tablePanel:: import 'table_panel.libsonnet',
singlestat:: import 'singlestat.libsonnet',
pieChartPanel:: import 'pie_chart_panel.libsonnet',
influxdb:: import 'influxdb.libsonnet',
prometheus:: import 'prometheus.libsonnet',
loki:: import 'loki.libsonnet',
sql:: import 'sql.libsonnet',
graphite:: import 'graphite.libsonnet',
alertCondition:: import 'alert_condition.libsonnet',
cloudmonitoring:: import 'cloudmonitoring.libsonnet',
cloudwatch:: import 'cloudwatch.libsonnet',
elasticsearch:: import 'elasticsearch.libsonnet',
heatmapPanel:: import 'heatmap_panel.libsonnet',
dashlist:: import 'dashlist.libsonnet',
pluginlist:: import 'pluginlist.libsonnet',
gauge:: error 'gauge is removed, migrate to gaugePanel',
gaugePanel:: import 'gauge_panel.libsonnet',
barGaugePanel:: import 'bar_gauge_panel.libsonnet',
statPanel:: import 'stat_panel.libsonnet',
transformation:: import 'transformation.libsonnet',
}

View file

@ -0,0 +1,313 @@
{
/**
* Creates a [graph panel](https://grafana.com/docs/grafana/latest/panels/visualizations/graph-panel/).
* It requires the graph panel plugin in grafana, which is built-in.
*
* @name graphPanel.new
*
* @param title The title of the graph panel.
* @param description (optional) The description of the panel
* @param span (optional) Width of the panel
* @param datasource (optional) Datasource
* @param fill (default `1`) , integer from 0 to 10
* @param fillGradient (default `0`) , integer from 0 to 10
* @param linewidth (default `1`) Line Width, integer from 0 to 10
* @param decimals (optional) Override automatic decimal precision for legend and tooltip. If null, not added to the json output.
* @param decimalsY1 (optional) Override automatic decimal precision for the first Y axis. If null, use decimals parameter.
* @param decimalsY2 (optional) Override automatic decimal precision for the second Y axis. If null, use decimals parameter.
* @param min_span (optional) Min span
* @param format (default `short`) Unit of the Y axes
* @param formatY1 (optional) Unit of the first Y axis
* @param formatY2 (optional) Unit of the second Y axis
* @param min (optional) Min of the Y axes
* @param max (optional) Max of the Y axes
* @param maxDataPoints (optional) If the data source supports it, sets the maximum number of data points for each series returned.
* @param labelY1 (optional) Label of the first Y axis
* @param labelY2 (optional) Label of the second Y axis
* @param x_axis_mode (default `'time'`) X axis mode, one of [time, series, histogram]
* @param x_axis_values (default `'total'`) Chosen value of series, one of [avg, min, max, total, count]
* @param x_axis_buckets (optional) Restricts the x axis to this amount of buckets
* @param x_axis_min (optional) Restricts the x axis to display from this value if supplied
* @param x_axis_max (optional) Restricts the x axis to display up to this value if supplied
* @param lines (default `true`) Display lines
* @param points (default `false`) Display points
* @param pointradius (default `5`) Radius of the points, allowed values are 0.5 or [1 ... 10] with step 1
* @param bars (default `false`) Display bars
* @param staircase (default `false`) Display line as staircase
* @param dashes (default `false`) Display line as dashes
* @param stack (default `false`) Whether to stack values
* @param repeat (optional) Name of variable that should be used to repeat this panel.
* @param repeatDirection (default `'h'`) 'h' for horizontal or 'v' for vertical.
* @param legend_show (default `true`) Show legend
* @param legend_values (default `false`) Show values in legend
* @param legend_min (default `false`) Show min in legend
* @param legend_max (default `false`) Show max in legend
* @param legend_current (default `false`) Show current in legend
* @param legend_total (default `false`) Show total in legend
* @param legend_avg (default `false`) Show average in legend
* @param legend_alignAsTable (default `false`) Show legend as table
* @param legend_rightSide (default `false`) Show legend to the right
* @param legend_sideWidth (optional) Legend width
* @param legend_sort (optional) Sort order of legend
* @param legend_sortDesc (optional) Sort legend descending
* @param aliasColors (optional) Define color mappings for graphs
* @param thresholds (optional) An array of graph thresholds
* @param logBase1Y (default `1`) Value of logarithm base of the first Y axis
* @param logBase2Y (default `1`) Value of logarithm base of the second Y axis
* @param transparent (default `false`) Whether to display the panel without a background.
* @param value_type (default `'individual'`) Type of tooltip value
* @param shared_tooltip (default `true`) Allow to group or spit tooltips on mouseover within a chart
* @param percentage (defaut: false) show as percentages
* @param interval (defaut: null) A lower limit for the interval.
*
* @method addTarget(target) Adds a target object.
* @method addTargets(targets) Adds an array of targets.
* @method addSeriesOverride(override)
* @method addYaxis(format,min,max,label,show,logBase,decimals) Adds a Y axis to the graph
* @method addAlert(alert) Adds an alert
* @method addLink(link) Adds a [panel link](https://grafana.com/docs/grafana/latest/linking/panel-links/)
* @method addLinks(links) Adds an array of links.
*/
new(
title,
span=null,
fill=1,
fillGradient=0,
linewidth=1,
decimals=null,
decimalsY1=null,
decimalsY2=null,
description=null,
min_span=null,
format='short',
formatY1=null,
formatY2=null,
min=null,
max=null,
labelY1=null,
labelY2=null,
x_axis_mode='time',
x_axis_values='total',
x_axis_buckets=null,
x_axis_min=null,
x_axis_max=null,
lines=true,
datasource=null,
points=false,
pointradius=5,
bars=false,
staircase=false,
height=null,
nullPointMode='null',
dashes=false,
stack=false,
repeat=null,
repeatDirection=null,
sort=0,
show_xaxis=true,
legend_show=true,
legend_values=false,
legend_min=false,
legend_max=false,
legend_current=false,
legend_total=false,
legend_avg=false,
legend_alignAsTable=false,
legend_rightSide=false,
legend_sideWidth=null,
legend_hideEmpty=null,
legend_hideZero=null,
legend_sort=null,
legend_sortDesc=null,
aliasColors={},
thresholds=[],
links=[],
logBase1Y=1,
logBase2Y=1,
transparent=false,
value_type='individual',
shared_tooltip=true,
percentage=false,
maxDataPoints=null,
time_from=null,
time_shift=null,
interval=null
):: {
title: title,
[if span != null then 'span']: span,
[if min_span != null then 'minSpan']: min_span,
[if decimals != null then 'decimals']: decimals,
type: 'graph',
datasource: datasource,
targets: [
],
[if description != null then 'description']: description,
[if height != null then 'height']: height,
renderer: 'flot',
yaxes: [
self.yaxe(
if formatY1 != null then formatY1 else format,
min,
max,
decimals=(if decimalsY1 != null then decimalsY1 else decimals),
logBase=logBase1Y,
label=labelY1
),
self.yaxe(
if formatY2 != null then formatY2 else format,
min,
max,
decimals=(if decimalsY2 != null then decimalsY2 else decimals),
logBase=logBase2Y,
label=labelY2
),
],
xaxis: {
show: show_xaxis,
mode: x_axis_mode,
name: null,
values: if x_axis_mode == 'series' then [x_axis_values] else [],
buckets: if x_axis_mode == 'histogram' then x_axis_buckets else null,
[if x_axis_min != null then 'min']: x_axis_min,
[if x_axis_max != null then 'max']: x_axis_max,
},
lines: lines,
fill: fill,
fillGradient: fillGradient,
linewidth: linewidth,
dashes: dashes,
dashLength: 10,
spaceLength: 10,
points: points,
pointradius: pointradius,
bars: bars,
stack: stack,
percentage: percentage,
[if maxDataPoints != null then 'maxDataPoints']: maxDataPoints,
legend: {
show: legend_show,
values: legend_values,
min: legend_min,
max: legend_max,
current: legend_current,
total: legend_total,
alignAsTable: legend_alignAsTable,
rightSide: legend_rightSide,
sideWidth: legend_sideWidth,
avg: legend_avg,
[if legend_hideEmpty != null then 'hideEmpty']: legend_hideEmpty,
[if legend_hideZero != null then 'hideZero']: legend_hideZero,
[if legend_sort != null then 'sort']: legend_sort,
[if legend_sortDesc != null then 'sortDesc']: legend_sortDesc,
},
nullPointMode: nullPointMode,
steppedLine: staircase,
tooltip: {
value_type: value_type,
shared: shared_tooltip,
sort: if sort == 'decreasing' then 2 else if sort == 'increasing' then 1 else sort,
},
timeFrom: time_from,
timeShift: time_shift,
[if interval != null then 'interval']: interval,
[if transparent == true then 'transparent']: transparent,
aliasColors: aliasColors,
repeat: repeat,
[if repeatDirection != null then 'repeatDirection']: repeatDirection,
seriesOverrides: [],
thresholds: thresholds,
links: links,
yaxe(
format='short',
min=null,
max=null,
label=null,
show=true,
logBase=1,
decimals=null,
):: {
label: label,
show: show,
logBase: logBase,
min: min,
max: max,
format: format,
[if decimals != null then 'decimals']: decimals,
},
_nextTarget:: 0,
addTarget(target):: self {
// automatically ref id in added targets.
// https://github.com/kausalco/public/blob/master/klumps/grafana.libsonnet
local nextTarget = super._nextTarget,
_nextTarget: nextTarget + 1,
targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
},
addTargets(targets):: std.foldl(function(p, t) p.addTarget(t), targets, self),
addSeriesOverride(override):: self {
seriesOverrides+: [override],
},
resetYaxes():: self {
yaxes: [],
},
addYaxis(
format='short',
min=null,
max=null,
label=null,
show=true,
logBase=1,
decimals=null,
):: self {
yaxes+: [self.yaxe(format, min, max, label, show, logBase, decimals)],
},
addAlert(
name,
executionErrorState='alerting',
forDuration='5m',
frequency='60s',
handler=1,
message='',
noDataState='no_data',
notifications=[],
alertRuleTags={},
):: self {
local it = self,
_conditions:: [],
alert: {
name: name,
conditions: it._conditions,
executionErrorState: executionErrorState,
'for': forDuration,
frequency: frequency,
handler: handler,
noDataState: noDataState,
notifications: notifications,
message: message,
alertRuleTags: alertRuleTags,
},
addCondition(condition):: self {
_conditions+: [condition],
},
addConditions(conditions):: std.foldl(function(p, c) p.addCondition(c), conditions, it),
},
addLink(link):: self {
links+: [link],
},
addLinks(links):: std.foldl(function(p, t) p.addLink(t), links, self),
addOverride(
matcher=null,
properties=null,
):: self {
fieldConfig+: {
overrides+: [
{
[if matcher != null then 'matcher']: matcher,
[if properties != null then 'properties']: properties,
},
],
},
},
addOverrides(overrides):: std.foldl(function(p, o) p.addOverride(o.matcher, o.properties), overrides, self),
},
}

View file

@ -0,0 +1,29 @@
{
/**
* Creates a [Graphite target](https://grafana.com/docs/grafana/latest/datasources/graphite/)
*
* @name graphite.target
*
* @param target Graphite Query. Nested queries are possible by adding the query reference (refId).
* @param targetFull (optional) Expanding the @target. Used in nested queries.
* @param hide (default `false`) Disable query on graph.
* @param textEditor (default `false`) Enable raw query mode.
* @param datasource (optional) Datasource.
* @return Panel target
*/
target(
target,
targetFull=null,
hide=false,
textEditor=false,
datasource=null,
):: {
target: target,
hide: hide,
textEditor: textEditor,
[if targetFull != null then 'targetFull']: targetFull,
[if datasource != null then 'datasource']: datasource,
},
}

View file

@ -0,0 +1,150 @@
{
/**
* Creates a [heatmap panel](https://grafana.com/docs/grafana/latest/panels/visualizations/heatmap/).
* Requires the heatmap panel plugin in Grafana, which is built-in.
*
* @name heatmapPanel.new
*
* @param title The title of the heatmap panel
* @param description (optional) Description of panel
* @param datasource (optional) Datasource
* @param min_span (optional) Min span
* @param span (optional) Width of the panel
* @param cards_cardPadding (optional) How much padding to put between bucket cards
* @param cards_cardRound (optional) How much rounding should be applied to the bucket card shape
* @param color_cardColor (default `'#b4ff00'`) Hex value of color used when color_colorScheme is 'opacity'
* @param color_colorScale (default `'sqrt'`) How to scale the color range, 'linear' or 'sqrt'
* @param color_colorScheme (default `'interpolateOranges'`) TODO: document
* @param color_exponent (default `0.5`) TODO: document
* @param color_max (optional) The value for the end of the color range
* @param color_min (optional) The value for the beginning of the color range
* @param color_mode (default `'spectrum'`) How to display difference in frequency with color
* @param dataFormat (default `'timeseries'`) How to format the data
* @param highlightCards (default `true`) TODO: document
* @param hideZeroBuckets (default `false`) Whether or not to hide empty buckets, default is false
* @param legend_show (default `false`) Show legend
* @param minSpan (optional) Minimum span of the panel when repeated on a template variable
* @param repeat (optional) Variable used to repeat the heatmap panel
* @param repeatDirection (optional) Which direction to repeat the panel, 'h' for horizontal and 'v' for vertically
* @param tooltipDecimals (optional) The number of decimal places to display in the tooltip
* @param tooltip_show (default `true`) Whether or not to display a tooltip when hovering over the heatmap
* @param tooltip_showHistogram (default `false`) Whether or not to display a histogram in the tooltip
* @param xAxis_show (default `true`) Whether or not to show the X axis, default true
* @param xBucketNumber (optional) Number of buckets for the X axis
* @param xBucketSize (optional) Size of X axis buckets. Number or interval(10s, 15h, etc.) Has priority over xBucketNumber
* @param yAxis_decimals (optional) Override automatic decimal precision for the Y axis
* @param yAxis_format (default `'short'`) Unit of the Y axis
* @param yAxis_logBase (default `1`) Only if dataFormat is 'timeseries'
* @param yAxis_min (optional) Only if dataFormat is 'timeseries', min of the Y axis
* @param yAxis_max (optional) Only if dataFormat is 'timeseries', max of the Y axis
* @param yAxis_show (default `true`) Whether or not to show the Y axis
* @param yAxis_splitFactor (optional) TODO: document
* @param yBucketBound (default `'auto'`) Which bound ('lower' or 'upper') of the bucket to use
* @param yBucketNumber (optional) Number of buckets for the Y axis
* @param yBucketSize (optional) Size of Y axis buckets. Has priority over yBucketNumber
* @param maxDataPoints (optional) The maximum data points per series. Used directly by some data sources and used in calculation of auto interval. With streaming data this value is used for the rolling buffer.
*
* @method addTarget(target) Adds a target object.
* @method addTargets(targets) Adds an array of targets.
*/
new(
title,
datasource=null,
description=null,
cards_cardPadding=null,
cards_cardRound=null,
color_cardColor='#b4ff00',
color_colorScale='sqrt',
color_colorScheme='interpolateOranges',
color_exponent=0.5,
color_max=null,
color_min=null,
color_mode='spectrum',
dataFormat='timeseries',
highlightCards=true,
hideZeroBuckets=false,
legend_show=false,
minSpan=null,
span=null,
repeat=null,
repeatDirection=null,
tooltipDecimals=null,
tooltip_show=true,
tooltip_showHistogram=false,
xAxis_show=true,
xBucketNumber=null,
xBucketSize=null,
yAxis_decimals=null,
yAxis_format='short',
yAxis_logBase=1,
yAxis_min=null,
yAxis_max=null,
yAxis_show=true,
yAxis_splitFactor=null,
yBucketBound='auto',
yBucketNumber=null,
yBucketSize=null,
maxDataPoints=null,
):: {
title: title,
type: 'heatmap',
[if description != null then 'description']: description,
datasource: datasource,
cards: {
cardPadding: cards_cardPadding,
cardRound: cards_cardRound,
},
color: {
mode: color_mode,
cardColor: color_cardColor,
colorScale: color_colorScale,
exponent: color_exponent,
[if color_mode == 'spectrum' then 'colorScheme']: color_colorScheme,
[if color_max != null then 'max']: color_max,
[if color_min != null then 'min']: color_min,
},
[if dataFormat != null then 'dataFormat']: dataFormat,
heatmap: {},
hideZeroBuckets: hideZeroBuckets,
highlightCards: highlightCards,
legend: {
show: legend_show,
},
[if minSpan != null then 'minSpan']: minSpan,
[if span != null then 'span']: span,
[if repeat != null then 'repeat']: repeat,
[if repeatDirection != null then 'repeatDirection']: repeatDirection,
tooltip: {
show: tooltip_show,
showHistogram: tooltip_showHistogram,
},
[if tooltipDecimals != null then 'tooltipDecimals']: tooltipDecimals,
xAxis: {
show: xAxis_show,
},
xBucketNumber: if dataFormat == 'timeseries' && xBucketSize != null then xBucketNumber else null,
xBucketSize: if dataFormat == 'timeseries' && xBucketSize != null then xBucketSize else null,
yAxis: {
decimals: yAxis_decimals,
[if dataFormat == 'timeseries' then 'logBase']: yAxis_logBase,
format: yAxis_format,
[if dataFormat == 'timeseries' then 'max']: yAxis_max,
[if dataFormat == 'timeseries' then 'min']: yAxis_min,
show: yAxis_show,
splitFactor: yAxis_splitFactor,
},
yBucketBound: yBucketBound,
[if dataFormat == 'timeseries' then 'yBucketNumber']: yBucketNumber,
[if dataFormat == 'timeseries' then 'yBucketSize']: yBucketSize,
[if maxDataPoints != null then 'maxDataPoints']: maxDataPoints,
_nextTarget:: 0,
addTarget(target):: self {
local nextTarget = super._nextTarget,
_nextTarget: nextTarget + 1,
targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
},
addTargets(targets):: std.foldl(function(p, t) p.addTarget(t), targets, self),
},
}

View file

@ -0,0 +1,104 @@
{
/**
* Creates an [InfluxDB target](https://grafana.com/docs/grafana/latest/datasources/influxdb/)
*
* @name influxdb.target
*
* @param query Raw InfluxQL statement
*
* @param alias (optional) 'Alias By' pattern
* @param datasource (optional) Datasource
* @param hide (optional) Disable query on graph
*
* @param rawQuery (optional) Enable/disable raw query mode
*
* @param policy (default: `'default'`) Tagged query 'From' policy
* @param measurement (optional) Tagged query 'From' measurement
* @param group_time (default: `'$__interval'`) 'Group by' time condition (if set to null, do not groups by time)
* @param group_tags (optional) 'Group by' tags list
* @param fill (default: `'none'`) 'Group by' missing values fill mode (works only with 'Group by time()')
*
* @param resultFormat (default: `'time_series'`) Format results as 'Time series' or 'Table'
*
* @return Panel target
*/
target(
query=null,
alias=null,
datasource=null,
hide=null,
rawQuery=null,
policy='default',
measurement=null,
group_time='$__interval',
group_tags=[],
fill='none',
resultFormat='time_series',
):: {
local it = self,
[if alias != null then 'alias']: alias,
[if datasource != null then 'datasource']: datasource,
[if hide != null then 'hide']: hide,
[if query != null then 'query']: query,
[if rawQuery != null then 'rawQuery']: rawQuery,
[if rawQuery == null && query != null then 'rawQuery']: true,
policy: policy,
[if measurement != null then 'measurement']: measurement,
tags: [],
select: [],
groupBy:
if group_time != null then
[{ type: 'time', params: [group_time] }] +
[{ type: 'tag', params: [tag_name] } for tag_name in group_tags] +
[{ type: 'fill', params: [fill] }]
else
[{ type: 'tag', params: [tag_name] } for tag_name in group_tags],
resultFormat: resultFormat,
where(key, operator, value, condition=null):: self {
/*
* Adds query tag condition ('Where' section)
*/
tags:
if std.length(it.tags) == 0 then
[{ key: key, operator: operator, value: value }]
else
it.tags + [{
key: key,
operator: operator,
value: value,
condition: if condition == null then 'AND' else condition,
}],
},
selectField(value):: self {
/*
* Adds InfluxDB selection ('field(value)' part of 'Select' statement)
*/
select+: [[{ params: [value], type: 'field' }]],
},
addConverter(type, params=[]):: self {
/*
* Appends converter (aggregation, selector, etc.) to last added selection
*/
local len = std.length(it.select),
select:
if len == 1 then
[it.select[0] + [{ params: params, type: type }]]
else if len > 1 then
it.select[0:(len - 1)] + [it.select[len - 1] + [{ params: params, type: type }]]
else
[],
},
},
}

View file

@ -0,0 +1,39 @@
{
/**
* Creates [links](https://grafana.com/docs/grafana/latest/linking/linking-overview/) to navigate to other dashboards.
*
* @param title Human-readable label for the link.
* @param tags Limits the linked dashboards to only the ones with the corresponding tags. Otherwise, Grafana includes links to all other dashboards.
* @param asDropdown (default: `true`) Whether to use a dropdown (with an optional title). If `false`, displays the dashboard links side by side across the top of dashboard.
* @param includeVars (default: `false`) Whether to include template variables currently used as query parameters in the link. Any matching templates in the linked dashboard are set to the values from the link
* @param keepTime (default: `false`) Whether to include the current dashboard time range in the link (e.g. from=now-3h&to=now)
* @param icon (default: `'external link'`) Icon displayed with the link.
* @param url (default: `''`) URL of the link
* @param targetBlank (default: `false`) Whether the link will open in a new window.
* @param type (default: `'dashboards'`)
*
* @name link.dashboards
*/
dashboards(
title,
tags,
asDropdown=true,
includeVars=false,
keepTime=false,
icon='external link',
url='',
targetBlank=false,
type='dashboards',
)::
{
asDropdown: asDropdown,
icon: icon,
includeVars: includeVars,
keepTime: keepTime,
tags: tags,
title: title,
type: type,
url: url,
targetBlank: targetBlank,
},
}

View file

@ -0,0 +1,56 @@
{
/**
* Creates a [log panel](https://grafana.com/docs/grafana/latest/panels/visualizations/logs-panel/).
* It requires the log panel plugin in grafana, which is built-in.
*
* @name logPanel.new
*
* @param title (default `''`) The title of the log panel.
* @param span (optional) Width of the panel
* @param datasource (optional) Datasource
* @showLabels (default `false`) Whether to show or hide labels
* @showTime (default `true`) Whether to show or hide time for each line
* @wrapLogMessage (default `true`) Whether to wrap log line to the next line
* @sortOrder (default `'Descending'`) sort log by time (can be 'Descending' or 'Ascending' )
*
* @method addTarget(target) Adds a target object
* @method addTargets(targets) Adds an array of targets
*/
new(
title='',
datasource=null,
time_from=null,
time_shift=null,
showLabels=false,
showTime=true,
sortOrder='Descending',
wrapLogMessage=true,
span=12,
height=null,
):: {
[if height != null then 'height']: height,
span: span,
datasource: datasource,
options: {
showLabels: showLabels,
showTime: showTime,
sortOrder: sortOrder,
wrapLogMessage: wrapLogMessage,
},
targets: [
],
_nextTarget:: 0,
addTarget(target):: self {
// automatically ref id in added targets.
// https://github.com/kausalco/public/blob/master/klumps/grafana.libsonnet
local nextTarget = super._nextTarget,
_nextTarget: nextTarget + 1,
targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
},
addTargets(targets):: std.foldl(function(p, t) p.addTarget(t), targets, self),
timeFrom: time_from,
timeShift: time_shift,
title: title,
type: 'logs',
},
}

View file

@ -0,0 +1,22 @@
{
/**
* Creates a [Loki target](https://grafana.com/docs/grafana/latest/datasources/loki/)
*
* @name loki.target
*
* @param expr
* @param hide (optional) Disable query on graph.
* @param legendFormat (optional) Defines the legend. Defaults to ''.
*/
target(
expr,
hide=null,
legendFormat='',
instant=null,
):: {
[if hide != null then 'hide']: hide,
expr: expr,
legendFormat: legendFormat,
[if instant != null then 'instant']: instant,
},
}

View file

@ -0,0 +1,72 @@
{
/**
* Creates a pie chart panel.
* It requires the [pie chart panel plugin in grafana](https://grafana.com/grafana/plugins/grafana-piechart-panel),
* which needs to be explicitly installed.
*
* @name pieChartPanel.new
*
* @param title The title of the pie chart panel.
* @param description (default `''`) Description of the panel
* @param span (optional) Width of the panel
* @param min_span (optional) Min span
* @param datasource (optional) Datasource
* @param aliasColors (optional) Define color mappings
* @param pieType (default `'pie'`) Type of pie chart (one of pie or donut)
* @param showLegend (default `true`) Show legend
* @param showLegendPercentage (default `true`) Show percentage values in the legend
* @param legendType (default `'Right side'`) Type of legend (one of 'Right side', 'Under graph' or 'On graph')
* @param valueName (default `'current') Type of tooltip value
* @param repeat (optional) Variable used to repeat the pie chart
* @param repeatDirection (optional) Which direction to repeat the panel, 'h' for horizontal and 'v' for vertical
* @param maxPerRow (optional) Number of panels to display when repeated. Used in combination with repeat.
* @return A json that represents a pie chart panel
*
* @method addTarget(target) Adds a target object.
*/
new(
title,
description='',
span=null,
min_span=null,
datasource=null,
height=null,
aliasColors={},
pieType='pie',
valueName='current',
showLegend=true,
showLegendPercentage=true,
legendType='Right side',
repeat=null,
repeatDirection=null,
maxPerRow=null,
):: {
type: 'grafana-piechart-panel',
[if description != null then 'description']: description,
pieType: pieType,
title: title,
aliasColors: aliasColors,
[if span != null then 'span']: span,
[if min_span != null then 'minSpan']: min_span,
[if height != null then 'height']: height,
[if repeat != null then 'repeat']: repeat,
[if repeatDirection != null then 'repeatDirection']: repeatDirection,
[if maxPerRow != null then 'maxPerRow']: maxPerRow,
valueName: valueName,
datasource: datasource,
legend: {
show: showLegend,
values: true,
percentage: showLegendPercentage,
},
legendType: legendType,
targets: [
],
_nextTarget:: 0,
addTarget(target):: self {
local nextTarget = super._nextTarget,
_nextTarget: nextTarget + 1,
targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
},
},
}

View file

@ -0,0 +1,23 @@
{
/**
* Returns a new pluginlist panel that can be added in a row.
* It requires the pluginlist panel plugin in grafana, which is built-in.
*
* @name pluginlist.new
*
* @param title The title of the pluginlist panel.
* @param description (optional) Description of the panel
* @param limit (optional) Set maximum items in a list
* @return A json that represents a pluginlist panel
*/
new(
title,
description=null,
limit=null,
):: {
type: 'pluginlist',
title: title,
[if limit != null then 'limit']: limit,
[if description != null then 'description']: description,
},
}

View file

@ -0,0 +1,38 @@
{
/**
* Creates a [Prometheus target](https://grafana.com/docs/grafana/latest/datasources/prometheus/)
* to be added to panels.
*
* @name prometheus.target
*
* @param expr PromQL query to be exercised against Prometheus. Checkout [Prometheus documentation](https://prometheus.io/docs/prometheus/latest/querying/basics/).
* @param format (default `'time_series'`) Switch between `'table'`, `'time_series'` or `'heatmap'`. Table will only work in the Table panel. Heatmap is suitable for displaying metrics of the Histogram type on a Heatmap panel. Under the hood, it converts cumulative histograms to regular ones and sorts series by the bucket bound.
* @param intervalFactor (default `2`)
* @param legendFormat (default `''`) Controls the name of the time series, using name or pattern. For example `{{hostname}}` is replaced with the label value for the label `hostname`.
* @param datasource (optional) Name of the Prometheus datasource. Leave by default otherwise.
* @param interval (optional) Time span used to aggregate or group data points by time. By default Grafana uses an automatic interval calculated based on the width of the graph.
* @param instant (optional) Perform an "instant" query, to return only the latest value that Prometheus has scraped for the requested time series. Instant queries return results much faster than normal range queries. Use them to look up label sets.
* @param hide (optional) Set to `true` to hide the target from the panel.
*
* @return A Prometheus target to be added to panels.
*/
target(
expr,
format='time_series',
intervalFactor=2,
legendFormat='',
datasource=null,
interval=null,
instant=null,
hide=null,
):: {
[if hide != null then 'hide']: hide,
[if datasource != null then 'datasource']: datasource,
expr: expr,
format: format,
intervalFactor: intervalFactor,
legendFormat: legendFormat,
[if interval != null then 'interval']: interval,
[if instant != null then 'instant']: instant,
},
}

View file

@ -0,0 +1,47 @@
{
/**
* Creates a [row](https://grafana.com/docs/grafana/latest/features/dashboard/dashboards/#rows).
* Rows are logical dividers within a dashboard and used to group panels together.
*
* @name row.new
*
* @param title The title of the row.
* @param showTitle (default `true` if title is set) Whether to show the row title
* @paral titleSize (default `'h6'`) The size of the title
* @param collapse (default `false`) The initial state of the row when opening the dashboard. Panels in a collapsed row are not load until the row is expanded.
* @param repeat (optional) Name of variable that should be used to repeat this row. It is recommended to use the variable in the row title as well.
*
* @method addPanels(panels) Appends an array of nested panels
* @method addPanel(panel,gridPos) Appends a nested panel, with an optional grid position in grid coordinates, e.g. `gridPos={'x':0, 'y':0, 'w':12, 'h': 9}`
*/
new(
title='Dashboard Row',
height=null,
collapse=false,
repeat=null,
showTitle=null,
titleSize='h6'
):: {
collapse: collapse,
collapsed: collapse,
[if height != null then 'height']: height,
panels: [],
repeat: repeat,
repeatIteration: null,
repeatRowId: null,
showTitle:
if showTitle != null then
showTitle
else
title != 'Dashboard Row',
title: title,
type: 'row',
titleSize: titleSize,
addPanels(panels):: self {
panels+: panels,
},
addPanel(panel, gridPos={}):: self {
panels+: [panel { gridPos: gridPos }],
},
},
}

View file

@ -0,0 +1,181 @@
{
/**
* Creates a singlestat panel.
*
* @name singlestat.new
*
* @param title The title of the singlestat panel.
* @param format (default `'none'`) Unit
* @param description (default `''`)
* @param interval (optional)
* @param height (optional)
* @param datasource (optional)
* @param span (optional)
* @param min_span (optional)
* @param decimals (optional)
* @param valueName (default `'avg'`)
* @param valueFontSize (default `'80%'`)
* @param prefixFontSize (default `'50%'`)
* @param postfixFontSize (default `'50%'`)
* @param mappingType (default `1`)
* @param repeat (optional)
* @param repeatDirection (optional)
* @param prefix (default `''`)
* @param postfix (default `''`)
* @param colors (default `['#299c46','rgba(237, 129, 40, 0.89)','#d44a3a']`)
* @param colorBackground (default `false`)
* @param colorValue (default `false`)
* @param thresholds (default `''`)
* @param valueMaps (default `{value: 'null',op: '=',text: 'N/A'}`)
* @param rangeMaps (default `{value: 'null',op: '=',text: 'N/A'}`)
* @param transparent (optional)
* @param sparklineFillColor (default `'rgba(31, 118, 189, 0.18)'`)
* @param sparklineFull (default `false`)
* @param sparklineLineColor (default `'rgb(31, 120, 193)'`)
* @param sparklineShow (default `false`)
* @param gaugeShow (default `false`)
* @param gaugeMinValue (default `0`)
* @param gaugeMaxValue (default `100`)
* @param gaugeThresholdMarkers (default `true`)
* @param gaugeThresholdLabels (default `false`)
* @param timeFrom (optional)
* @param links (optional)
* @param tableColumn (default `''`)
* @param maxPerRow (optional)
* @param maxDataPoints (default `100`)
*
* @method addTarget(target) Adds a target object.
*/
new(
title,
format='none',
description='',
interval=null,
height=null,
datasource=null,
span=null,
min_span=null,
decimals=null,
valueName='avg',
valueFontSize='80%',
prefixFontSize='50%',
postfixFontSize='50%',
mappingType=1,
repeat=null,
repeatDirection=null,
prefix='',
postfix='',
colors=[
'#299c46',
'rgba(237, 129, 40, 0.89)',
'#d44a3a',
],
colorBackground=false,
colorValue=false,
thresholds='',
valueMaps=[
{
value: 'null',
op: '=',
text: 'N/A',
},
],
rangeMaps=[
{
from: 'null',
to: 'null',
text: 'N/A',
},
],
transparent=null,
sparklineFillColor='rgba(31, 118, 189, 0.18)',
sparklineFull=false,
sparklineLineColor='rgb(31, 120, 193)',
sparklineShow=false,
gaugeShow=false,
gaugeMinValue=0,
gaugeMaxValue=100,
gaugeThresholdMarkers=true,
gaugeThresholdLabels=false,
timeFrom=null,
links=[],
tableColumn='',
maxPerRow=null,
maxDataPoints=100,
)::
{
[if height != null then 'height']: height,
[if description != '' then 'description']: description,
[if repeat != null then 'repeat']: repeat,
[if repeatDirection != null then 'repeatDirection']: repeatDirection,
[if transparent != null then 'transparent']: transparent,
[if min_span != null then 'minSpan']: min_span,
title: title,
[if span != null then 'span']: span,
type: 'singlestat',
datasource: datasource,
targets: [
],
links: links,
[if decimals != null then 'decimals']: decimals,
maxDataPoints: maxDataPoints,
interval: interval,
cacheTimeout: null,
format: format,
prefix: prefix,
postfix: postfix,
nullText: null,
valueMaps: valueMaps,
[if maxPerRow != null then 'maxPerRow']: maxPerRow,
mappingTypes: [
{
name: 'value to text',
value: 1,
},
{
name: 'range to text',
value: 2,
},
],
rangeMaps: rangeMaps,
mappingType:
if mappingType == 'value'
then
1
else if mappingType == 'range'
then
2
else
mappingType,
nullPointMode: 'connected',
valueName: valueName,
prefixFontSize: prefixFontSize,
valueFontSize: valueFontSize,
postfixFontSize: postfixFontSize,
thresholds: thresholds,
[if timeFrom != null then 'timeFrom']: timeFrom,
colorBackground: colorBackground,
colorValue: colorValue,
colors: colors,
gauge: {
show: gaugeShow,
minValue: gaugeMinValue,
maxValue: gaugeMaxValue,
thresholdMarkers: gaugeThresholdMarkers,
thresholdLabels: gaugeThresholdLabels,
},
sparkline: {
fillColor: sparklineFillColor,
full: sparklineFull,
lineColor: sparklineLineColor,
show: sparklineShow,
},
tableColumn: tableColumn,
_nextTarget:: 0,
addTarget(target):: self {
local nextTarget = super._nextTarget,
_nextTarget: nextTarget + 1,
targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
},
},
}

View file

@ -0,0 +1,23 @@
{
/**
* Creates an SQL target.
*
* @name sql.target
*
* @param rawSql The SQL query
* @param datasource (optional)
* @param format (default `'time_series'`)
* @param alias (optional)
*/
target(
rawSql,
datasource=null,
format='time_series',
alias=null,
):: {
[if datasource != null then 'datasource']: datasource,
format: format,
[if alias != null then 'alias']: alias,
rawSql: rawSql,
},
}

View file

@ -0,0 +1,222 @@
{
/**
* Creates a [stat panel](https://grafana.com/docs/grafana/latest/panels/visualizations/stat-panel/).
*
* @name statPanel.new
*
* @param title Panel title.
* @param description (optional) Panel description.
* @param transparent (default `false`) Whether to display the panel without a background.
* @param datasource (optional) Panel datasource.
* @param allValues (default `false`) Show all values instead of reducing to one.
* @param valueLimit (optional) Limit of values in all values mode.
* @param reducerFunction (default `'mean'`) Function to use to reduce values to when using single value.
* @param fields (default `''`) Fields that should be included in the panel.
* @param orientation (default `'auto'`) Stacking direction in case of multiple series or fields.
* @param colorMode (default `'value'`) 'value' or 'background'.
* @param graphMode (default `'area'`) 'none' or 'area' to enable sparkline mode.
* @param textMode (default `'auto'`) Control if name and value is displayed or just name.
* @param justifyMode (default `'auto'`) 'auto' or 'center'.
* @param unit (default `'none'`) Panel unit field option.
* @param min (optional) Leave empty to calculate based on all values.
* @param max (optional) Leave empty to calculate based on all values.
* @param decimals (optional) Number of decimal places to show.
* @param displayName (optional) Change the field or series name.
* @param noValue (optional) What to show when there is no value.
* @param thresholdsMode (default `'absolute'`) 'absolute' or 'percentage'.
* @param timeFrom (optional) Override the relative time range.
* @param repeat (optional) Name of variable that should be used to repeat this panel.
* @param repeatDirection (default `'h'`) 'h' for horizontal or 'v' for vertical.
* @param maxPerRow (optional) Maximum panels per row in repeat mode.
* @param pluginVersion (default `'7'`) Plugin version the panel should be modeled for. This has been tested with the default, '7', and '6.7'.
*
* @method addTarget(target) Adds a target object.
* @method addTargets(targets) Adds an array of targets.
* @method addLink(link) Adds a [panel link](https://grafana.com/docs/grafana/latest/linking/panel-links/). Argument format: `{ title: 'Link Title', url: 'https://...', targetBlank: true }`.
* @method addLinks(links) Adds an array of links.
* @method addThreshold(step) Adds a [threshold](https://grafana.com/docs/grafana/latest/panels/thresholds/) step. Argument format: `{ color: 'green', value: 0 }`.
* @method addThresholds(steps) Adds an array of threshold steps.
* @method addMapping(mapping) Adds a value mapping.
* @method addMappings(mappings) Adds an array of value mappings.
* @method addDataLink(link) Adds a data link.
* @method addDataLinks(links) Adds an array of data links.
*/
new(
title,
description=null,
transparent=false,
datasource=null,
allValues=false,
valueLimit=null,
reducerFunction='mean',
fields='',
orientation='auto',
colorMode='value',
graphMode='area',
textMode='auto',
justifyMode='auto',
unit='none',
min=null,
max=null,
decimals=null,
displayName=null,
noValue=null,
thresholdsMode='absolute',
timeFrom=null,
repeat=null,
repeatDirection='h',
maxPerRow=null,
pluginVersion='7',
):: {
type: 'stat',
title: title,
[if description != null then 'description']: description,
transparent: transparent,
datasource: datasource,
targets: [],
links: [],
[if repeat != null then 'repeat']: repeat,
[if repeat != null then 'repeatDirection']: repeatDirection,
[if timeFrom != null then 'timeFrom']: timeFrom,
[if repeat != null then 'maxPerRow']: maxPerRow,
// targets
_nextTarget:: 0,
addTarget(target):: self {
local nextTarget = super._nextTarget,
_nextTarget: nextTarget + 1,
targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
},
addTargets(targets):: std.foldl(function(p, t) p.addTarget(t), targets, self),
// links
addLink(link):: self {
links+: [link],
},
addLinks(links):: std.foldl(function(p, l) p.addLink(l), links, self),
pluginVersion: pluginVersion,
} + (
if pluginVersion >= '7' then {
options: {
reduceOptions: {
values: allValues,
[if allValues && valueLimit != null then 'limit']: valueLimit,
calcs: [
reducerFunction,
],
fields: fields,
},
orientation: orientation,
colorMode: colorMode,
graphMode: graphMode,
justifyMode: justifyMode,
textMode: textMode,
},
fieldConfig: {
defaults: {
unit: unit,
[if min != null then 'min']: min,
[if max != null then 'max']: max,
[if decimals != null then 'decimals']: decimals,
[if displayName != null then 'displayName']: displayName,
[if noValue != null then 'noValue']: noValue,
thresholds: {
mode: thresholdsMode,
steps: [],
},
mappings: [],
links: [],
},
},
// thresholds
addThreshold(step):: self {
fieldConfig+: { defaults+: { thresholds+: { steps+: [step] } } },
},
// mappings
_nextMapping:: 0,
addMapping(mapping):: self {
local nextMapping = super._nextMapping,
_nextMapping: nextMapping + 1,
fieldConfig+: { defaults+: { mappings+: [mapping { id: nextMapping }] } },
},
// data links
addDataLink(link):: self {
fieldConfig+: { defaults+: { links+: [link] } },
},
// Overrides
addOverride(
matcher=null,
properties=null,
):: self {
fieldConfig+: {
overrides+: [
{
[if matcher != null then 'matcher']: matcher,
[if properties != null then 'properties']: properties,
},
],
},
},
addOverrides(overrides):: std.foldl(function(p, o) p.addOverride(o.matcher, o.properties), overrides, self),
} else {
options: {
fieldOptions: {
values: allValues,
[if allValues && valueLimit != null then 'limit']: valueLimit,
calcs: [
reducerFunction,
],
fields: fields,
defaults: {
unit: unit,
[if min != null then 'min']: min,
[if max != null then 'max']: max,
[if decimals != null then 'decimals']: decimals,
[if displayName != null then 'displayName']: displayName,
[if noValue != null then 'noValue']: noValue,
thresholds: {
mode: thresholdsMode,
steps: [],
},
mappings: [],
links: [],
},
},
orientation: orientation,
colorMode: colorMode,
graphMode: graphMode,
justifyMode: justifyMode,
},
// thresholds
addThreshold(step):: self {
options+: { fieldOptions+: { defaults+: { thresholds+: { steps+: [step] } } } },
},
// mappings
_nextMapping:: 0,
addMapping(mapping):: self {
local nextMapping = super._nextMapping,
_nextMapping: nextMapping + 1,
options+: { fieldOptions+: { defaults+: { mappings+: [mapping { id: nextMapping }] } } },
},
// data links
addDataLink(link):: self {
options+: { fieldOptions+: { defaults+: { links+: [link] } } },
},
}
) + {
addThresholds(steps):: std.foldl(function(p, s) p.addThreshold(s), steps, self),
addMappings(mappings):: std.foldl(function(p, m) p.addMapping(m), mappings, self),
addDataLinks(links):: std.foldl(function(p, l) p.addDataLink(l), links, self),
},
}

View file

@ -0,0 +1,91 @@
{
/**
* Creates a [table panel](https://grafana.com/docs/grafana/latest/panels/visualizations/table-panel/) that can be added in a row.
* It requires the table panel plugin in grafana, which is built-in.
*
* @name table.new
*
* @param title The title of the graph panel.
* @param description (optional) Description of the panel
* @param span (optional) Width of the panel
* @param height (optional) Height of the panel
* @param datasource (optional) Datasource
* @param min_span (optional) Min span
* @param styles (optional) Array of styles for the panel
* @param columns (optional) Array of columns for the panel
* @param sort (optional) Sorting instruction for the panel
* @param transform (optional) Allow table manipulation to present data as desired
* @param transparent (default: 'false') Whether to display the panel without a background
* @param links (optional) Array of links for the panel.
* @return A json that represents a table panel
*
* @method addTarget(target) Adds a target object
* @method addTargets(targets) Adds an array of targets
* @method addColumn(field, style) Adds a column
* @method hideColumn(field) Hides a column
* @method addLink(link) Adds a link
* @method addTransformation(transformation) Adds a transformation object
* @method addTransformations(transformations) Adds an array of transformations
*/
new(
title,
description=null,
span=null,
min_span=null,
height=null,
datasource=null,
styles=[],
transform=null,
transparent=false,
columns=[],
sort=null,
time_from=null,
time_shift=null,
links=[],
):: {
type: 'table',
title: title,
[if span != null then 'span']: span,
[if min_span != null then 'minSpan']: min_span,
[if height != null then 'height']: height,
datasource: datasource,
targets: [
],
styles: styles,
columns: columns,
timeFrom: time_from,
timeShift: time_shift,
links: links,
[if sort != null then 'sort']: sort,
[if description != null then 'description']: description,
[if transform != null then 'transform']: transform,
[if transparent == true then 'transparent']: transparent,
_nextTarget:: 0,
addTarget(target):: self {
local nextTarget = super._nextTarget,
_nextTarget: nextTarget + 1,
targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
},
addTargets(targets):: std.foldl(function(p, t) p.addTarget(t), targets, self),
addColumn(field, style):: self {
local style_ = style { pattern: field },
local column_ = { text: field, value: field },
styles+: [style_],
columns+: [column_],
},
hideColumn(field):: self {
styles+: [{
alias: field,
pattern: field,
type: 'hidden',
}],
},
addLink(link):: self {
links+: [link],
},
addTransformation(transformation):: self {
transformations+: [transformation],
},
addTransformations(transformations):: std.foldl(function(p, t) p.addTransformation(t), transformations, self),
},
}

View file

@ -0,0 +1,289 @@
{
/**
* Creates a [template](https://grafana.com/docs/grafana/latest/variables/#templates) that can be added to a dashboard.
*
* @name template.new
*
* @param name Name of variable.
* @param datasource Template [datasource](https://grafana.com/docs/grafana/latest/variables/variable-types/add-data-source-variable/)
* @param query [Query expression](https://grafana.com/docs/grafana/latest/variables/variable-types/add-query-variable/) for the datasource.
* @param label (optional) Display name of the variable dropdown. If null, then the dropdown label will be the variable name.
* @param allValues (optional) Formatting for [multi-value variables](https://grafana.com/docs/grafana/latest/variables/formatting-multi-value-variables/#formatting-multi-value-variables)
* @param tagValuesQuery (default `''`) Group values into [selectable tags](https://grafana.com/docs/grafana/latest/variables/variable-value-tags/)
* @param current (default `null`) Can be `null`, `'all'` for all, or any other custom text value.
* @param hide (default `''`) `''`: the variable dropdown displays the variable Name or Label value. `'label'`: the variable dropdown only displays the selected variable value and a down arrow. Any other value: no variable dropdown is displayed on the dashboard.
* @param regex (default `''`) Regex expression to filter or capture specific parts of the names returned by your data source query. To see examples, refer to [Filter variables with regex](https://grafana.com/docs/grafana/latest/variables/filter-variables-with-regex/).
* @param refresh (default `'never'`) `'never'`: variables queries are cached and values are not updated. This is fine if the values never change, but problematic if they are dynamic and change a lot. `'load'`: Queries the data source every time the dashboard loads. This slows down dashboard loading, because the variable query needs to be completed before dashboard can be initialized. `'time'`: Queries the data source when the dashboard time range changes. Only use this option if your variable options query contains a time range filter or is dependent on the dashboard time range.
* @param includeAll (default `false`) Whether all value option is available or not.
* @param multi (default `false`) Whether multiple values can be selected or not from variable value list.
* @param sort (default `0`) `0`: Without Sort, `1`: Alphabetical (asc), `2`: Alphabetical (desc), `3`: Numerical (asc), `4`: Numerical (desc).
*
* @return A [template](https://grafana.com/docs/grafana/latest/variables/#templates)
*/
new(
name,
datasource,
query,
label=null,
allValues=null,
tagValuesQuery='',
current=null,
hide='',
regex='',
refresh='never',
includeAll=false,
multi=false,
sort=0,
)::
{
allValue: allValues,
current: $.current(current),
datasource: datasource,
includeAll: includeAll,
hide: $.hide(hide),
label: label,
multi: multi,
name: name,
options: [],
query: query,
refresh: $.refresh(refresh),
regex: regex,
sort: sort,
tagValuesQuery: tagValuesQuery,
tags: [],
tagsQuery: '',
type: 'query',
useTags: false,
},
/**
* Use an [interval variable](https://grafana.com/docs/grafana/latest/variables/variable-types/add-interval-variable/) to represent time spans such as '1m', '1h', '1d'. You can think of them as a dashboard-wide "group by time" command. Interval variables change how the data is grouped in the visualization. You can also use the Auto Option to return a set number of data points per time span.
* You can use an interval variable as a parameter to group by time (for InfluxDB), date histogram interval (for Elasticsearch), or as a summarize function parameter (for Graphite).
*
* @name template.interval
*
* @param name Variable name
* @param query Comma separated values without spacing of intervals available for selection. Add `'auto'` in the query to turn on the Auto Option. Ex: `'auto,5m,10m,20m'`.
* @param current Currently selected interval. Must be one of the values in the query. `'auto'` is allowed if defined in the query.
* @param hide (default `''`) `''`: the variable dropdown displays the variable Name or Label value. `'label'`: the variable dropdown only displays the selected variable value and a down arrow. Any other value: no variable dropdown is displayed on the dashboard.
* @param label (optional) Display name of the variable dropdown. If null, then the dropdown label will be the variable name.
* @param auto_count (default `300`) Valid only if `'auto'` is defined in query. Number of times the current time range will be divided to calculate the value, similar to the Max data points query option. For example, if the current visible time range is 30 minutes, then the auto interval groups the data into 30 one-minute increments. The default value is 30 steps.
* @param auto_min (default `'10s'`) Valid only if `'auto'` is defined in query. The minimum threshold below which the step count intervals will not divide the time. To continue the 30 minute example, if the minimum interval is set to `'2m'`, then Grafana would group the data into 15 two-minute increments.
*
* @return A new interval variable for templating.
*/
interval(
name,
query,
current,
hide='',
label=null,
auto_count=300,
auto_min='10s',
)::
{
current: $.current(current),
hide: $.hide(hide),
label: label,
name: name,
query: std.join(',', std.filter($.filterAuto, std.split(query, ','))),
refresh: 2,
type: 'interval',
auto: std.count(std.split(query, ','), 'auto') > 0,
auto_count: auto_count,
auto_min: auto_min,
},
hide(hide)::
if hide == '' then 0 else if hide == 'label' then 1 else 2,
current(current):: {
[if current != null then 'text']: current,
[if current != null then 'value']: if current == 'auto' then
'$__auto_interval'
else if current == 'all' then
'$__all'
else
current,
},
/**
* Data [source variables](https://grafana.com/docs/grafana/latest/variables/variable-types/add-data-source-variable/)
* allow you to quickly change the data source for an entire dashboard.
* They are useful if you have multiple instances of a data source, perhaps in different environments.
*
* @name template.datasource
*
* @param name Data source variable name. Ex: `'PROMETHEUS_DS'`.
* @param query Type of data source. Ex: `'prometheus'`.
* @param current Ex: `'Prometheus'`.
* @param hide (default `''`) `''`: the variable dropdown displays the variable Name or Label value. `'label'`: the variable dropdown only displays the selected variable value and a down arrow. Any other value: no variable dropdown is displayed on the dashboard.
* @param label (optional) Display name of the variable dropdown. If null, then the dropdown label will be the variable name.
* @param regex (default `''`) Regex filter for which data source instances to choose from in the variable value drop-down list. Leave this field empty to display all instances.
* @param refresh (default `'load'`) `'never'`: Variables queries are cached and values are not updated. This is fine if the values never change, but problematic if they are dynamic and change a lot. `'load'`: Queries the data source every time the dashboard loads. This slows down dashboard loading, because the variable query needs to be completed before dashboard can be initialized. `'time'`: Queries the data source when the dashboard time range changes. Only use this option if your variable options query contains a time range filter or is dependent on the dashboard time range.
*
* @return A [data source variable](https://grafana.com/docs/grafana/latest/variables/variable-types/add-data-source-variable/).
*/
datasource(
name,
query,
current,
hide='',
label=null,
regex='',
refresh='load',
):: {
current: $.current(current),
hide: $.hide(hide),
label: label,
name: name,
options: [],
query: query,
refresh: $.refresh(refresh),
regex: regex,
type: 'datasource',
},
refresh(refresh):: if refresh == 'never'
then
0
else if refresh == 'load'
then
1
else if refresh == 'time'
then
2
else
refresh,
filterAuto(str):: str != 'auto',
/**
* Use a [custom variable](https://grafana.com/docs/grafana/latest/variables/variable-types/add-custom-variable/)
* for values that do not change.
*
* @name template.custom
* This might be numbers, strings, or even other variables.
* @param name Variable name
* @param query Comma separated without spacing list of selectable values.
* @param current Selected value
* @param refresh (default `'never'`) `'never'`: Variables queries are cached and values are not updated. This is fine if the values never change, but problematic if they are dynamic and change a lot. `'load'`: Queries the data source every time the dashboard loads. This slows down dashboard loading, because the variable query needs to be completed before dashboard can be initialized. `'time'`: Queries the data source when the dashboard time range changes. Only use this option if your variable options query contains a time range filter or is dependent on the dashboard time range.
* @param label (default `''`) Display name of the variable dropdown. If you dont enter a display name, then the dropdown label will be the variable name.
* @param valuelabels (default `{}`) Display names for values defined in query. For example, if `query='new,old'`, then you may display them as follows `valuelabels={new: 'nouveau', old: 'ancien'}`.
* @param multi (default `false`) Whether multiple values can be selected or not from variable value list.
* @param allValues (optional) Formatting for [multi-value variables](https://grafana.com/docs/grafana/latest/variables/formatting-multi-value-variables/#formatting-multi-value-variables)
* @param includeAll (default `false`) Whether all value option is available or not.
* @param hide (default `''`) `''`: the variable dropdown displays the variable Name or Label value. `'label'`: the variable dropdown only displays the selected variable value and a down arrow. Any other value: no variable dropdown is displayed on the dashboard.
*
* @return A custom variable.
*/
custom(
name,
query,
current,
refresh='never',
label='',
valuelabels={},
multi=false,
allValues=null,
includeAll=false,
hide='',
)::
{
// self has dynamic scope, so self may not be myself below.
// '$' can't be used neither as this object is not top-level object.
local custom = self,
allValue: allValues,
current: {
// Both 'all' and 'All' are accepted for consistency.
value: if includeAll && (current == 'All' || current == 'all') then
if multi then ['$__all'] else '$__all'
else
current,
text: if std.isArray(current) then
std.join(' + ', std.map(custom.valuelabel, current))
else
custom.valuelabel(current),
[if multi then 'selected']: true,
},
options: std.map(self.option, self.query_array(query)),
hide: $.hide(hide),
includeAll: includeAll,
label: label,
refresh: $.refresh(refresh),
multi: multi,
name: name,
query: query,
type: 'custom',
valuelabel(value):: if value in valuelabels then
valuelabels[value]
else value,
option(option):: {
text: custom.valuelabel(option),
value: if includeAll && option == 'All' then '$__all' else option,
[if multi then 'selected']: if multi && std.isArray(current) then
std.member(current, option)
else if multi then
current == option
else
null,
},
query_array(query):: std.split(
if includeAll then 'All,' + query else query, ','
),
},
/**
* [Text box variables](https://grafana.com/docs/grafana/latest/variables/variable-types/add-text-box-variable/)
* display a free text input field with an optional default value.
* This is the most flexible variable, because you can enter any value.
* Use this type of variable if you have metrics with high cardinality or if you want to
* update multiple panels in a dashboard at the same time.
*
* @name template.text
*
* @param name Variable name.
* @param label (default `''`) Display name of the variable dropdown. If you dont enter a display name, then the dropdown label will be the variable name.
*
* @return A text box variable.
*/
text(
name,
label=''
)::
{
current: {
selected: false,
text: '',
value: '',
},
name: name,
label: label,
query: '',
type: 'textbox',
},
/**
* [Ad hoc filters](https://grafana.com/docs/grafana/latest/variables/variable-types/add-ad-hoc-filters/)
* allow you to add key/value filters that are automatically added to all metric queries
* that use the specified data source. Unlike other variables, you do not use ad hoc filters in queries.
* Instead, you use ad hoc filters to write filters for existing queries.
* Note: Ad hoc filter variables only work with InfluxDB, Prometheus, and Elasticsearch data sources.
*
* @name template.adhoc
*
* @param name Variable name.
* @param datasource Target data source
* @param label (optional) Display name of the variable dropdown. If you dont enter a display name, then the dropdown label will be the variable name.
* @param hide (default `''`) `''`: the variable dropdown displays the variable Name or Label value. `'label'`: the variable dropdown only displays the selected variable value and a down arrow. Any other value: no variable dropdown is displayed on the dashboard.
*
* @return An ad hoc filter
*/
adhoc(
name,
datasource,
label=null,
hide='',
)::
{
datasource: datasource,
hide: $.hide(hide),
label: label,
name: name,
type: 'adhoc',
},
}

View file

@ -0,0 +1,43 @@
{
/**
* Creates a [text panel](https://grafana.com/docs/grafana/latest/panels/visualizations/text-panel/).
*
* @name text.new
*
* @param title (default `''`) Panel title.
* @param description (optional) Panel description.
* @param datasource (optional) Panel datasource.
* @param span (optional)
* @param content (default `''`)
* @param mode (default `'markdown'`) Rendering of the content: 'markdown','html', ...
* @param transparent (optional) Whether to display the panel without a background.
* @param repeat (optional) Name of variable that should be used to repeat this panel.
* @param repeatDirection (default `'h'`) 'h' for horizontal or 'v' for vertical.
* @param repeatMaxPerRow (optional) Maximum panels per row in repeat mode.
*/
new(
title='',
span=null,
mode='markdown',
content='',
transparent=null,
description=null,
datasource=null,
repeat=null,
repeatDirection=null,
repeatMaxPerRow=null,
)::
{
[if transparent != null then 'transparent']: transparent,
title: title,
[if span != null then 'span']: span,
type: 'text',
mode: mode,
content: content,
[if description != null then 'description']: description,
datasource: datasource,
[if repeat != null then 'repeat']: repeat,
[if repeat != null then 'repeatDirection']: repeatDirection,
[if repeat != null then 'maxPerRow']: repeatMaxPerRow,
},
}

View file

@ -0,0 +1,40 @@
{
/**
* Creates a Timepicker
*
* @name timepicker.new
*
* @param refresh_intervals (default: `['5s','10s','30s','1m','5m','15m','30m','1h','2h','1d']`) Array of time durations
* @param time_options (default: `['5m','15m','1h','6h','12h','24h','2d','7d','30d']`) Array of time durations
*/
new(
refresh_intervals=[
'5s',
'10s',
'30s',
'1m',
'5m',
'15m',
'30m',
'1h',
'2h',
'1d',
],
time_options=[
'5m',
'15m',
'1h',
'6h',
'12h',
'24h',
'2d',
'7d',
'30d',
],
nowDelay=null,
):: {
refresh_intervals: refresh_intervals,
time_options: time_options,
[if nowDelay != null then 'nowDelay']: nowDelay,
},
}

View file

@ -0,0 +1,12 @@
{
/**
* @name transformation.new
*/
new(
id='',
options={}
):: {
id: id,
options: options,
},
}

View file

@ -0,0 +1,500 @@
{
dashboard(title, uid='', datasource='default', datasource_regex=''):: {
// Stuff that isn't materialised.
_nextPanel:: 1,
addRow(row):: self {
// automatically number panels in added rows.
local n = std.length(row.panels),
local nextPanel = super._nextPanel,
local panels = std.makeArray(n, function(i)
row.panels[i] { id: nextPanel + i }),
_nextPanel: nextPanel + n,
rows+: [row { panels: panels }],
},
addTemplate(name, metric_name, label_name, hide=0, allValue=null, includeAll=false):: self {
templating+: {
list+: [{
allValue: allValue,
current: {
text: 'prod',
value: 'prod',
},
datasource: '$datasource',
hide: hide,
includeAll: includeAll,
label: name,
multi: false,
name: name,
options: [],
query: 'label_values(%s, %s)' % [metric_name, label_name],
refresh: 1,
regex: '',
sort: 2,
tagValuesQuery: '',
tags: [],
tagsQuery: '',
type: 'query',
useTags: false,
}],
},
},
addMultiTemplate(name, metric_name, label_name, hide=0, allValue='.+'):: self {
templating+: {
list+: [{
allValue: allValue,
current: {
selected: true,
text: 'All',
value: '$__all',
},
datasource: '$datasource',
hide: hide,
includeAll: true,
label: name,
multi: true,
name: name,
options: [],
query: 'label_values(%s, %s)' % [metric_name, label_name],
refresh: 1,
regex: '',
sort: 2,
tagValuesQuery: '',
tags: [],
tagsQuery: '',
type: 'query',
useTags: false,
}],
},
},
dashboardLinkUrl(title, url):: self {
links+: [
{
asDropdown: false,
icon: 'external link',
includeVars: true,
keepTime: true,
tags: [],
targetBlank: true,
title: title,
tooltip: '',
type: 'link',
url: url,
},
],
},
// Stuff that is materialised.
uid: uid,
annotations: {
list: [],
},
hideControls: false,
links: [],
rows: [],
schemaVersion: 14,
style: 'dark',
tags: [],
editable: true,
gnetId: null,
graphTooltip: 0,
templating: {
list: [
{
current: {
text: datasource,
value: datasource,
},
hide: 0,
label: 'Data Source',
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: datasource_regex,
type: 'datasource',
},
],
},
time: {
from: 'now-1h',
to: 'now',
},
refresh: '10s',
timepicker: {
refresh_intervals: [
'5s',
'10s',
'30s',
'1m',
'5m',
'15m',
'30m',
'1h',
'2h',
'1d',
],
time_options: [
'5m',
'15m',
'1h',
'6h',
'12h',
'24h',
'2d',
'7d',
'30d',
],
},
timezone: 'utc',
title: title,
version: 0,
},
row(title):: {
_panels:: [],
addPanel(panel):: self {
_panels+: [panel],
},
panels:
// Automatically distribute panels within a row.
local n = std.length(self._panels);
[
p { span: std.floor(12 / n) }
for p in self._panels
],
collapse: false,
height: '250px',
repeat: null,
repeatIteration: null,
repeatRowId: null,
showTitle: true,
title: title,
titleSize: 'h6',
},
// "graph" type, now deprecated.
panel(title):: {
aliasColors: {},
bars: false,
dashLength: 10,
dashes: false,
datasource: '$datasource',
fill: 1,
legend: {
avg: false,
current: false,
max: false,
min: false,
show: true,
total: false,
values: false,
},
lines: true,
linewidth: 1,
links: [],
nullPointMode: 'null as zero',
percentage: false,
pointradius: 5,
points: false,
renderer: 'flot',
seriesOverrides: [],
spaceLength: 10,
span: 6,
stack: false,
steppedLine: false,
targets: [],
thresholds: [],
timeFrom: null,
timeShift: null,
title: title,
tooltip: {
shared: true,
sort: 2,
value_type: 'individual',
},
type: 'graph',
xaxis: {
buckets: null,
mode: 'time',
name: null,
show: true,
values: [],
},
yaxes: $.yaxes('short'),
},
// "timeseries" panel, introduced with Grafana 7.4 and made standard in 8.0.
timeseriesPanel(title):: {
datasource: '$datasource',
fieldConfig: {
defaults: {
custom: {
drawStyle: 'line',
fillOpacity: 1,
lineWidth: 1,
pointSize: 5,
showPoints: 'never',
spanNulls: false,
stacking: {
group: 'A',
mode: 'none',
},
},
thresholds: {
mode: 'absolute',
steps: [],
},
unit: 's',
},
overrides: [],
},
options: {
legend: {
showLegend: true,
},
tooltip: {
mode: 'single',
sort: 'none',
},
},
links: [],
targets: [],
title: title,
type: 'timeseries',
},
queryPanel(queries, legends, legendLink=null):: {
local qs =
if std.type(queries) == 'string'
then [queries]
else queries,
local ls =
if std.type(legends) == 'string'
then [legends]
else legends,
local qsandls = if std.length(ls) == std.length(qs)
then std.makeArray(std.length(qs), function(x) { q: qs[x], l: ls[x] })
else error 'length of queries is not equal to length of legends',
targets+: [
{
legendLink: legendLink,
expr: ql.q,
format: 'time_series',
intervalFactor: 2,
legendFormat: ql.l,
step: 10,
}
for ql in qsandls
],
},
statPanel(query, format='percentunit'):: {
type: 'singlestat',
thresholds: '70,80',
format: format,
targets: [
{
expr: query,
format: 'time_series',
instant: true,
intervalFactor: 2,
refId: 'A',
},
],
},
tablePanel(queries, labelStyles):: {
local qs =
if std.type(queries) == 'string'
then [queries]
else queries,
local style(labelStyle) =
if std.type(labelStyle) == 'string'
then {
alias: labelStyle,
colorMode: null,
colors: [],
dateFormat: 'YYYY-MM-DD HH:mm:ss',
decimals: 2,
thresholds: [],
type: 'string',
unit: 'short',
}
else {
alias: labelStyle.alias,
colorMode: null,
colors: [],
dateFormat: 'YYYY-MM-DD HH:mm:ss',
decimals: if std.objectHas(labelStyle, 'decimals') then labelStyle.decimals else 2,
thresholds: [],
type: if std.objectHas(labelStyle, 'type') then labelStyle.type else 'number',
unit: if std.objectHas(labelStyle, 'unit') then labelStyle.unit else 'short',
link: std.objectHas(labelStyle, 'link'),
linkTargetBlank: if std.objectHas(labelStyle, 'linkTargetBlank') then labelStyle.linkTargetBlank else false,
linkTooltip: if std.objectHas(labelStyle, 'linkTooltip') then labelStyle.linkTooltip else 'Drill down',
linkUrl: if std.objectHas(labelStyle, 'link') then labelStyle.link else '',
},
_styles:: {
// By default hide time.
Time: {
alias: 'Time',
dateFormat: 'YYYY-MM-DD HH:mm:ss',
type: 'hidden',
},
} + {
[label]: style(labelStyles[label])
for label in std.objectFields(labelStyles)
},
styles: [
self._styles[pattern] { pattern: pattern }
for pattern in std.objectFields(self._styles)
] + [style('') + { pattern: '/.*/' }],
transform: 'table',
type: 'table',
targets: [
{
expr: qs[i],
format: 'table',
instant: true,
intervalFactor: 2,
legendFormat: '',
step: 10,
refId: std.char(65 + i),
}
for i in std.range(0, std.length(qs) - 1)
],
},
textPanel(title, markdown):: {
type: 'text',
title: title,
options: {
content: markdown,
mode: 'markdown',
},
transparent: true,
datasource: null,
timeFrom: null,
timeShift: null,
fieldConfig: {
defaults: {
custom: {},
},
overrides: [],
},
},
stack:: {
stack: true,
fill: 10,
linewidth: 0,
},
yaxes(args)::
local format = if std.type(args) == 'string' then args else null;
local options = if std.type(args) == 'object' then args else {};
[
{
format: format,
label: null,
logBase: 1,
max: null,
min: 0,
show: true,
} + options,
{
format: 'short',
label: null,
logBase: 1,
max: null,
min: null,
show: false,
},
],
qpsPanel(selector, statusLabelName='status_code'):: {
aliasColors: {
'1xx': '#EAB839',
'2xx': '#7EB26D',
'3xx': '#6ED0E0',
'4xx': '#EF843C',
'5xx': '#E24D42',
success: '#7EB26D',
'error': '#E24D42',
},
targets: [
{
expr:
|||
sum by (status) (
label_replace(label_replace(rate(%s[$__rate_interval]),
"status", "${1}xx", "%s", "([0-9]).."),
"status", "${1}", "%s", "([a-z]+)"))
||| % [selector, statusLabelName, statusLabelName],
format: 'time_series',
intervalFactor: 2,
legendFormat: '{{status}}',
refId: 'A',
step: 10,
},
],
} + $.stack,
latencyPanel(metricName, selector, multiplier='1e3'):: {
nullPointMode: 'null as zero',
targets: [
{
expr: 'histogram_quantile(0.99, sum(rate(%s_bucket%s[$__rate_interval])) by (le)) * %s' % [metricName, selector, multiplier],
format: 'time_series',
intervalFactor: 2,
legendFormat: '99th Percentile',
refId: 'A',
step: 10,
},
{
expr: 'histogram_quantile(0.50, sum(rate(%s_bucket%s[$__rate_interval])) by (le)) * %s' % [metricName, selector, multiplier],
format: 'time_series',
intervalFactor: 2,
legendFormat: '50th Percentile',
refId: 'B',
step: 10,
},
{
expr: 'sum(rate(%s_sum%s[$__rate_interval])) * %s / sum(rate(%s_count%s[$__rate_interval]))' % [metricName, selector, multiplier, metricName, selector],
format: 'time_series',
intervalFactor: 2,
legendFormat: 'Average',
refId: 'C',
step: 10,
},
],
yaxes: $.yaxes('ms'),
},
selector:: {
eq(label, value):: { label: label, op: '=', value: value },
neq(label, value):: { label: label, op: '!=', value: value },
re(label, value):: { label: label, op: '=~', value: value },
nre(label, value):: { label: label, op: '!~', value: value },
},
toPrometheusSelector(selector)::
local pairs = [
'%(label)s%(op)s"%(value)s"' % matcher
for matcher in selector
];
'{%s}' % std.join(', ', pairs),
}

View file

@ -0,0 +1,73 @@
name: ci
on:
- push
- pull_request
env:
golang-version: '1.17'
jobs:
generate:
runs-on: ubuntu-latest
name: Generate yaml
steps:
- uses: actions/checkout@v2
with:
persist-credentials: false
- uses: actions/setup-go@v2
with:
go-version: ${{ env.golang-version }}
- run: make --always-make generate && git diff --exit-code
jsonnet-lint:
runs-on: ubuntu-latest
name: Jsonnet linter
steps:
- uses: actions/checkout@v2
with:
persist-credentials: false
- uses: actions/setup-go@v2
with:
go-version: ${{ env.golang-version }}
- run: make --always-make jsonnet-lint
dashboards-lint:
runs-on: ubuntu-latest
name: Grafana dashboard linter
steps:
- uses: actions/checkout@v2
with:
persist-credentials: false
- uses: actions/setup-go@v2
with:
go-version: ${{ env.golang-version }}
- run: make --always-make dashboards-lint
alerts-lint:
runs-on: ubuntu-latest
name: Alerts linter
steps:
- uses: actions/checkout@v2
with:
persist-credentials: false
- uses: actions/setup-go@v2
with:
go-version: ${{ env.golang-version }}
- run: make --always-make alerts-lint
fmt:
runs-on: ubuntu-latest
name: Jsonnet formatter
steps:
- uses: actions/checkout@v2
with:
persist-credentials: false
- uses: actions/setup-go@v2
with:
go-version: ${{ env.golang-version }}
- run: make --always-make fmt && git diff --exit-code
unit-tests:
runs-on: ubuntu-latest
name: Unit tests
steps:
- uses: actions/checkout@v2
with:
persist-credentials: false
- uses: actions/setup-go@v2
with:
go-version: ${{ env.golang-version }}
- run: make --always-make test

View file

@ -0,0 +1,6 @@
prometheus_alerts.yaml
prometheus_rules.yaml
dashboards_out
vendor
jsonnetfile.lock.json
tmp

View file

@ -0,0 +1,3 @@
exclusions:
template-job-rule:
panel-job-instance-rule:

View file

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View file

@ -0,0 +1,76 @@
BIN_DIR ?= $(shell pwd)/tmp/bin
JSONNET_VENDOR=vendor
GRAFANA_DASHBOARD_LINTER_BIN=$(BIN_DIR)/dashboard-linter
JB_BIN=$(BIN_DIR)/jb
JSONNET_BIN=$(BIN_DIR)/jsonnet
JSONNETLINT_BIN=$(BIN_DIR)/jsonnet-lint
JSONNETFMT_BIN=$(BIN_DIR)/jsonnetfmt
PROMTOOL_BIN=$(BIN_DIR)/promtool
TOOLING=$(JB_BIN) $(JSONNETLINT_BIN) $(JSONNET_BIN) $(JSONNETFMT_BIN) $(PROMTOOL_BIN) $(GRAFANA_DASHBOARD_LINTER_BIN)
JSONNETFMT_ARGS=-n 2 --max-blank-lines 2 --string-style s --comment-style s
.PHONY: all
all: fmt generate lint test
.PHONY: generate
generate: prometheus_alerts.yaml prometheus_rules.yaml dashboards_out
$(JSONNET_VENDOR): $(JB_BIN) jsonnetfile.json
$(JB_BIN) install
.PHONY: fmt
fmt: $(JSONNETFMT_BIN)
find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \
xargs -n 1 -- $(JSONNETFMT_BIN) $(JSONNETFMT_ARGS) -i
prometheus_alerts.yaml: $(JSONNET_BIN) mixin.libsonnet lib/alerts.jsonnet alerts/*.libsonnet
@$(JSONNET_BIN) -J vendor -S lib/alerts.jsonnet > $@
prometheus_rules.yaml: $(JSONNET_BIN) mixin.libsonnet lib/rules.jsonnet rules/*.libsonnet
@$(JSONNET_BIN) -J vendor -S lib/rules.jsonnet > $@
dashboards_out: $(JSONNET_BIN) $(JSONNET_VENDOR) mixin.libsonnet lib/dashboards.jsonnet dashboards/*.libsonnet
@mkdir -p dashboards_out
@$(JSONNET_BIN) -J vendor -m dashboards_out lib/dashboards.jsonnet
.PHONY: lint
lint: jsonnet-lint alerts-lint dashboards-lint
.PHONY: jsonnet-lint
jsonnet-lint: $(JSONNETLINT_BIN) $(JSONNET_VENDOR)
@find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \
xargs -n 1 -- $(JSONNETLINT_BIN) -J vendor
.PHONY: alerts-lint
alerts-lint: $(PROMTOOL_BIN) prometheus_alerts.yaml prometheus_rules.yaml
@$(PROMTOOL_BIN) check rules prometheus_rules.yaml
@$(PROMTOOL_BIN) check rules prometheus_alerts.yaml
dashboards_out/.lint: dashboards_out
@cp .lint $@
.PHONY: dashboards-lint
dashboards-lint: $(GRAFANA_DASHBOARD_LINTER_BIN) dashboards_out/.lint
# Replace $$interval:$$resolution var with $$__rate_interval to make dashboard-linter happy.
@sed -i -e 's/$$interval:$$resolution/$$__rate_interval/g' dashboards_out/*.json
@find dashboards_out -name '*.json' -print0 | xargs -n 1 -0 $(GRAFANA_DASHBOARD_LINTER_BIN) lint --strict
.PHONY: clean
clean:
# Remove all files and directories ignored by git.
git clean -Xfd .
.PHONY: test
test: $(PROMTOOL_BIN) prometheus_alerts.yaml prometheus_rules.yaml
@$(PROMTOOL_BIN) test rules tests.yaml
$(BIN_DIR):
mkdir -p $(BIN_DIR)
$(TOOLING): $(BIN_DIR)
@echo Installing tools from hack/tools.go
@cd scripts && go list -mod=mod -tags tools -f '{{ range .Imports }}{{ printf "%s\n" .}}{{end}}' ./ | xargs -tI % go build -mod=mod -o $(BIN_DIR) %

View file

@ -0,0 +1,19 @@
# See the OWNERS docs: https://git.k8s.io/community/contributors/guide/owners.md
approvers:
- brancz
- csmarchbanks
- metalmatze
- tomwilkie
- s-urbaniak
- povilasv
- paulfantom
reviewers:
- brancz
- csmarchbanks
- metalmatze
- tomwilkie
- s-urbaniak
- povilasv
- paulfantom

View file

@ -0,0 +1,255 @@
# Prometheus Monitoring Mixin for Kubernetes
[![ci](https://github.com/kubernetes-monitoring/kubernetes-mixin/actions/workflows/ci.yaml/badge.svg)](https://github.com/kubernetes-monitoring/kubernetes-mixin/actions/workflows/ci.yaml)
> NOTE: This project is *pre-release* stage. Flags, configuration, behaviour and design may change significantly in following releases.
A set of Grafana dashboards and Prometheus alerts for Kubernetes.
## Releases
| Release branch | Kubernetes Compatibility | Prometheus Compatibility | Kube-state-metrics Compatibility |
| -------------- | -------------------------- | ------------------------ | -------------------------------- |
| release-0.1 | v1.13 and before | | |
| release-0.2 | v1.14.1 and before | v2.11.0+ | |
| release-0.3 | v1.17 and before | v2.11.0+ | |
| release-0.4 | v1.18 | v2.11.0+ | |
| release-0.5 | v1.19 | v2.11.0+ | |
| release-0.6 | v1.19+ | v2.11.0+ | |
| release-0.7 | v1.19+ | v2.11.0+ | v1.x |
| release-0.8 | v1.20+ | v2.11.0+ | v2.0+ |
| release-0.9 | v1.20+ | v2.11.0+ | v2.0+ |
| release-0.10 | v1.20+ | v2.11.0+ | v2.0+ |
| release-0.11 | v1.23+ | v2.11.0+ | v2.0+ |
| master | v1.23+ | v2.11.0+ | v2.0+ |
In Kubernetes 1.14 there was a major [metrics overhaul](https://github.com/kubernetes/enhancements/issues/1206) implemented.
Therefore v0.1.x of this repository is the last release to support Kubernetes 1.13 and previous version on a best effort basis.
Some alerts now use Prometheus filters made available in Prometheus 2.11.0, which makes this version of Prometheus a dependency.
Warning: This compatibility matrix was initially created based on experience, we do not guarantee the compatibility, it may be updated based on new learnings.
Warning: By default the expressions will generate *grafana 7.2+* compatible rules using the *$__rate_interval* variable for rate functions. If you need backward compatible rules please set *grafana72: false* in your *_config*
## How to use
This mixin is designed to be vendored into the repo with your infrastructure config.
To do this, use [jsonnet-bundler](https://github.com/jsonnet-bundler/jsonnet-bundler):
You then have three options for deploying your dashboards
1. Generate the config files and deploy them yourself
1. Use ksonnet to deploy this mixin along with Prometheus and Grafana
1. Use prometheus-operator to deploy this mixin (TODO)
## Generate config files
You can manually generate the alerts, dashboards and rules files, but first you
must install some tools:
```
$ go install github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb@latest
$ brew install jsonnet
```
Then, grab the mixin and its dependencies:
```
$ git clone https://github.com/kubernetes-monitoring/kubernetes-mixin
$ cd kubernetes-mixin
$ jb install
```
Finally, build the mixin:
```
$ make prometheus_alerts.yaml
$ make prometheus_rules.yaml
$ make dashboards_out
```
The `prometheus_alerts.yaml` and `prometheus_rules.yaml` file then need to passed
to your Prometheus server, and the files in `dashboards_out` need to be imported
into you Grafana server. The exact details will depending on how you deploy your
monitoring stack to Kubernetes.
### Dashboards for Windows Nodes
There are separate dashboards for windows resources.
1) Compute Resources / Cluster(Windows)
2) Compute Resources / Namespace(Windows)
3) Compute Resources / Pod(Windows)
4) USE Method / Cluster(Windows)
5) USE Method / Node(Windows)
These dashboards are based on metrics populated by [windows-exporter](https://github.com/prometheus-community/windows_exporter) from each Windows node.
## Running the tests
```sh
make test
```
## Using with prometheus-ksonnet
Alternatively you can also use the mixin with
[prometheus-ksonnet](https://github.com/kausalco/public/tree/master/prometheus-ksonnet),
a [ksonnet](https://github.com/ksonnet/ksonnet) module to deploy a fully-fledged
Prometheus-based monitoring system for Kubernetes:
Make sure you have the ksonnet v0.8.0:
```
$ brew install https://raw.githubusercontent.com/ksonnet/homebrew-tap/82ef24cb7b454d1857db40e38671426c18cd8820/ks.rb
$ brew pin ks
$ ks version
ksonnet version: v0.8.0
jsonnet version: v0.9.5
client-go version: v1.6.8-beta.0+$Format:%h$
```
In your config repo, if you don't have a ksonnet application, make a new one (will copy credentials from current context):
```
$ ks init <application name>
$ cd <application name>
$ ks env add default
```
Grab the kubernetes-jsonnet module using and its dependencies, which include
the kubernetes-mixin:
```
$ go get github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb
$ jb init
$ jb install github.com/kausalco/public/prometheus-ksonnet
```
Assuming you want to run in the default namespace ('environment' in ksonnet parlance), add the follow to the file `environments/default/main.jsonnet`:
```jsonnet
local prometheus = import "prometheus-ksonnet/prometheus-ksonnet.libsonnet";
prometheus {
_config+:: {
namespace: "default",
},
}
```
Apply your config:
```
$ ks apply default
```
## Using prometheus-operator
TODO
## Multi-cluster support
Kubernetes-mixin can support dashboards across multiple clusters. You need either a multi-cluster [Thanos](https://github.com/improbable-eng/thanos) installation with `external_labels` configured or a [Cortex](https://github.com/cortexproject/cortex) system where a cluster label exists. To enable this feature you need to configure the following:
```jsonnet
// Opt-in to multiCluster dashboards by overriding this and the clusterLabel.
showMultiCluster: true,
clusterLabel: '<your cluster label>',
```
## Customising the mixin
Kubernetes-mixin allows you to override the selectors used for various jobs,
to match those used in your Prometheus set. You can also customize the dashboard
names and add grafana tags.
In a new directory, add a file `mixin.libsonnet`:
```jsonnet
local kubernetes = import "kubernetes-mixin/mixin.libsonnet";
kubernetes {
_config+:: {
kubeStateMetricsSelector: 'job="kube-state-metrics"',
cadvisorSelector: 'job="kubernetes-cadvisor"',
nodeExporterSelector: 'job="kubernetes-node-exporter"',
kubeletSelector: 'job="kubernetes-kubelet"',
grafanaK8s+:: {
dashboardNamePrefix: 'Mixin / ',
dashboardTags: ['kubernetes', 'infrastucture'],
},
},
}
```
Then, install the kubernetes-mixin:
```
$ jb init
$ jb install github.com/kubernetes-monitoring/kubernetes-mixin
```
Generate the alerts, rules and dashboards:
```
$ jsonnet -J vendor -S -e 'std.manifestYamlDoc((import "mixin.libsonnet").prometheusAlerts)' > alerts.yml
$ jsonnet -J vendor -S -e 'std.manifestYamlDoc((import "mixin.libsonnet").prometheusRules)' >files/rules.yml
$ jsonnet -J vendor -m files/dashboards -e '(import "mixin.libsonnet").grafanaDashboards'
```
### Customising alert annotations
The steps described below extend on the existing mixin library without modifying the original git repository. This is to make consuming updates to your extended alert definitions easier. These definitions can reside outside of this repository and added to your own custom location, where you can define your alert dependencies in your `jsonnetfile.json` and add customisations to the existing definitions.
In your working directory, create a new file `kubernetes_mixin_override.libsonnet` with the following:
```jsonnet
local utils = import 'lib/utils.libsonnet';
(import 'mixin.libsonnet') +
(
{
prometheusAlerts+::
// The specialAlerts can be in any other config file
local slack = 'observability';
local specialAlerts = {
KubePodCrashLooping: { slack_channel: slack },
KubePodNotReady: { slack_channel: slack },
};
local addExtraAnnotations(rule) = rule {
[if 'alert' in rule then 'annotations']+: {
dashboard: 'https://foo.bar.co',
[if rule.alert in specialAlerts then 'slack_channel']: specialAlerts[rule.alert].slack_channel,
},
};
utils.mapRuleGroups(addExtraAnnotations),
}
)
```
Create new file: `lib/kubernetes_customised_alerts.jsonnet` with the following:
```jsonnet
std.manifestYamlDoc((import '../kubernetes_mixin_override.libsonnet').prometheusAlerts)
```
Running `jsonnet -S lib/kubernetes_customised_alerts.jsonnet` will build the alerts with your customisations.
Same result can be achieved by modyfying the existing `config.libsonnet` with the content of `kubernetes_mixin_override.libsonnet`.
## Background
### Alert Severities
While the community has not yet fully agreed on alert severities and their to be used, this repository assumes the following paradigms when setting the severities:
* Critical: An issue, that needs to page a person to take instant action
* Warning: An issue, that needs to be worked on but in the regular work queue or for during office hours rather than paging the oncall
* Info: Is meant to support a trouble shooting process by informing about a non-normal situation for one or more systems but not worth a page or ticket on its own.
### Architecture and Technical Decisions
* For more motivation, see
"[The RED Method: How to instrument your services](https://kccncna17.sched.com/event/CU8K/the-red-method-how-to-instrument-your-services-b-tom-wilkie-kausal?iframe=no&w=100%&sidebar=yes&bg=no)" talk from CloudNativeCon Austin.
* For more information about monitoring mixins, see this [design doc](https://docs.google.com/document/d/1A9xvzwqnFVSOZ5fD3blKODXfsat5fg6ZhnKu9LK3lB4/edit#).
## Note
You can use the external tool call [prom-metrics-check](https://github.com/ContainerSolutions/prom-metrics-check) to validate the created dashboards. This tool allows you to check if the metrics installed and used in Grafana dashboards exist in the Prometheus instance.
Please have a look at https://github.com/ContainerSolutions/prom-metrics-check.

View file

@ -0,0 +1,16 @@
# Defined below are the security contacts for this repo.
#
# They are the contact point for the Product Security Committee to reach out
# to for triaging and handling of incoming issues.
#
# The below names agree to abide by the
# [Embargo Policy](https://git.k8s.io/security/private-distributors-list.md#embargo-policy)
# and will be removed and replaced if they violate that agreement.
#
# DO NOT REPORT SECURITY VULNERABILITIES DIRECTLY TO THESE NAMES, FOLLOW THE
# INSTRUCTIONS AT https://kubernetes.io/security/
brancz
csmarchbanks
metalmatze
tomwilkie

View file

@ -0,0 +1,10 @@
(import 'apps_alerts.libsonnet') +
(import 'resource_alerts.libsonnet') +
(import 'storage_alerts.libsonnet') +
(import 'system_alerts.libsonnet') +
(import 'kube_apiserver.libsonnet') +
(import 'kubelet.libsonnet') +
(import 'kube_scheduler.libsonnet') +
(import 'kube_controller_manager.libsonnet') +
(import 'kube_proxy.libsonnet') +
(import '../lib/add-runbook-links.libsonnet')

View file

@ -0,0 +1,313 @@
{
_config+:: {
kubeStateMetricsSelector: error 'must provide selector for kube-state-metrics',
kubeJobTimeoutDuration: error 'must provide value for kubeJobTimeoutDuration',
namespaceSelector: null,
prefixedNamespaceSelector: if self.namespaceSelector != null then self.namespaceSelector + ',' else '',
},
prometheusAlerts+:: {
groups+: [
{
name: 'kubernetes-apps',
rules: [
{
expr: |||
max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", %(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}[5m]) >= 1
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is in waiting state (reason: "CrashLoopBackOff").',
summary: 'Pod is crash looping.',
},
'for': '15m',
alert: 'KubePodCrashLooping',
},
{
// We wrap kube_pod_owner with the topk() aggregator to ensure that
// every (namespace, pod, %(clusterLabel)s) tuple is unique even if the "owner_kind"
// label exists for 2 values. This avoids "many-to-many matching
// not allowed" errors when joining with kube_pod_status_phase.
expr: |||
sum by (namespace, pod, %(clusterLabel)s) (
max by(namespace, pod, %(clusterLabel)s) (
kube_pod_status_phase{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, phase=~"Pending|Unknown|Failed"}
) * on(namespace, pod, %(clusterLabel)s) group_left(owner_kind) topk by(namespace, pod, %(clusterLabel)s) (
1, max by(namespace, pod, owner_kind, %(clusterLabel)s) (kube_pod_owner{owner_kind!="Job"})
)
) > 0
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes.',
summary: 'Pod has been in a non-ready state for more than 15 minutes.',
},
'for': '15m',
alert: 'KubePodNotReady',
},
{
expr: |||
kube_deployment_status_observed_generation{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
!=
kube_deployment_metadata_generation{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back.',
summary: 'Deployment generation mismatch due to possible roll-back',
},
'for': '15m',
alert: 'KubeDeploymentGenerationMismatch',
},
{
expr: |||
(
kube_deployment_spec_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
>
kube_deployment_status_replicas_available{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
) and (
changes(kube_deployment_status_replicas_updated{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}[10m])
==
0
)
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes.',
summary: 'Deployment has not matched the expected number of replicas.',
},
'for': '15m',
alert: 'KubeDeploymentReplicasMismatch',
},
{
expr: |||
(
kube_statefulset_status_replicas_ready{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
!=
kube_statefulset_status_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
) and (
changes(kube_statefulset_status_replicas_updated{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}[10m])
==
0
)
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes.',
summary: 'Deployment has not matched the expected number of replicas.',
},
'for': '15m',
alert: 'KubeStatefulSetReplicasMismatch',
},
{
expr: |||
kube_statefulset_status_observed_generation{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
!=
kube_statefulset_metadata_generation{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back.',
summary: 'StatefulSet generation mismatch due to possible roll-back',
},
'for': '15m',
alert: 'KubeStatefulSetGenerationMismatch',
},
{
expr: |||
(
max without (revision) (
kube_statefulset_status_current_revision{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
unless
kube_statefulset_status_update_revision{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
)
*
(
kube_statefulset_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
!=
kube_statefulset_status_replicas_updated{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
)
) and (
changes(kube_statefulset_status_replicas_updated{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}[5m])
==
0
)
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out.',
summary: 'StatefulSet update has not been rolled out.',
},
'for': '15m',
alert: 'KubeStatefulSetUpdateNotRolledOut',
},
{
alert: 'KubeDaemonSetRolloutStuck',
expr: |||
(
(
kube_daemonset_status_current_number_scheduled{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
!=
kube_daemonset_status_desired_number_scheduled{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
) or (
kube_daemonset_status_number_misscheduled{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
!=
0
) or (
kube_daemonset_status_updated_number_scheduled{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
!=
kube_daemonset_status_desired_number_scheduled{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
) or (
kube_daemonset_status_number_available{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
!=
kube_daemonset_status_desired_number_scheduled{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
)
) and (
changes(kube_daemonset_status_updated_number_scheduled{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}[5m])
==
0
)
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15 minutes.',
summary: 'DaemonSet rollout is stuck.',
},
'for': '15m',
},
{
expr: |||
sum by (namespace, pod, container, %(clusterLabel)s) (kube_pod_container_status_waiting_reason{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}) > 0
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container {{ $labels.container}} has been in waiting state for longer than 1 hour.',
summary: 'Pod container waiting longer than 1 hour',
},
'for': '1h',
alert: 'KubeContainerWaiting',
},
{
alert: 'KubeDaemonSetNotScheduled',
expr: |||
kube_daemonset_status_desired_number_scheduled{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
-
kube_daemonset_status_current_number_scheduled{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s} > 0
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.',
summary: 'DaemonSet pods are not scheduled.',
},
'for': '10m',
},
{
alert: 'KubeDaemonSetMisScheduled',
expr: |||
kube_daemonset_status_number_misscheduled{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s} > 0
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.',
summary: 'DaemonSet pods are misscheduled.',
},
'for': '15m',
},
{
alert: 'KubeJobNotCompleted',
expr: |||
time() - max by(namespace, job_name, %(clusterLabel)s) (kube_job_status_start_time{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
and
kube_job_status_active{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s} > 0) > %(kubeJobTimeoutDuration)s
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than {{ "%(kubeJobTimeoutDuration)s" | humanizeDuration }} to complete.' % $._config,
summary: 'Job did not complete in time',
},
},
{
alert: 'KubeJobFailed',
expr: |||
kube_job_failed{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s} > 0
||| % $._config,
'for': '15m',
labels: {
severity: 'warning',
},
annotations: {
description: 'Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. Removing failed job after investigation should clear this alert.',
summary: 'Job failed to complete.',
},
},
{
expr: |||
(kube_horizontalpodautoscaler_status_desired_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
!=
kube_horizontalpodautoscaler_status_current_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s})
and
(kube_horizontalpodautoscaler_status_current_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
>
kube_horizontalpodautoscaler_spec_min_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s})
and
(kube_horizontalpodautoscaler_status_current_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
<
kube_horizontalpodautoscaler_spec_max_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s})
and
changes(kube_horizontalpodautoscaler_status_current_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}[15m]) == 0
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has not matched the desired number of replicas for longer than 15 minutes.',
summary: 'HPA has not matched desired number of replicas.',
},
'for': '15m',
alert: 'KubeHpaReplicasMismatch',
},
{
expr: |||
kube_horizontalpodautoscaler_status_current_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
==
kube_horizontalpodautoscaler_spec_max_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has been running at max replicas for longer than 15 minutes.',
summary: 'HPA is running at max replicas',
},
'for': '15m',
alert: 'KubeHpaMaxedOut',
},
],
},
],
},
}

View file

@ -0,0 +1,126 @@
local utils = import '../lib/utils.libsonnet';
{
_config+:: {
kubeApiserverSelector: error 'must provide selector for kube-apiserver',
kubeAPILatencyWarningSeconds: 1,
certExpirationWarningSeconds: 7 * 24 * 3600,
certExpirationCriticalSeconds: 1 * 24 * 3600,
},
prometheusAlerts+:: {
groups+: [
{
name: 'kube-apiserver-slos',
rules: [
{
alert: 'KubeAPIErrorBudgetBurn',
expr: |||
sum(apiserver_request:burnrate%s) > (%.2f * %.5f)
and
sum(apiserver_request:burnrate%s) > (%.2f * %.5f)
||| % [
w.long,
w.factor,
(1 - $._config.SLOs.apiserver.target),
w.short,
w.factor,
(1 - $._config.SLOs.apiserver.target),
],
labels: {
severity: w.severity,
short: '%(short)s' % w,
long: '%(long)s' % w,
},
annotations: {
description: 'The API server is burning too much error budget.',
summary: 'The API server is burning too much error budget.',
},
'for': '%(for)s' % w,
}
for w in $._config.SLOs.apiserver.windows
],
},
{
name: 'kubernetes-system-apiserver',
rules: [
{
alert: 'KubeClientCertificateExpiration',
expr: |||
apiserver_client_certificate_expiration_seconds_count{%(kubeApiserverSelector)s} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{%(kubeApiserverSelector)s}[5m]))) < %(certExpirationWarningSeconds)s
||| % $._config,
'for': '5m',
labels: {
severity: 'warning',
},
annotations: {
description: 'A client certificate used to authenticate to kubernetes apiserver is expiring in less than %s.' % (utils.humanizeSeconds($._config.certExpirationWarningSeconds)),
summary: 'Client certificate is about to expire.',
},
},
{
alert: 'KubeClientCertificateExpiration',
expr: |||
apiserver_client_certificate_expiration_seconds_count{%(kubeApiserverSelector)s} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{%(kubeApiserverSelector)s}[5m]))) < %(certExpirationCriticalSeconds)s
||| % $._config,
'for': '5m',
labels: {
severity: 'critical',
},
annotations: {
description: 'A client certificate used to authenticate to kubernetes apiserver is expiring in less than %s.' % (utils.humanizeSeconds($._config.certExpirationCriticalSeconds)),
summary: 'Client certificate is about to expire.',
},
},
{
alert: 'KubeAggregatedAPIErrors',
expr: |||
sum by(name, namespace, %(clusterLabel)s)(increase(aggregator_unavailable_apiservice_total[10m])) > 4
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m.',
summary: 'Kubernetes aggregated API has reported errors.',
},
},
{
alert: 'KubeAggregatedAPIDown',
expr: |||
(1 - max by(name, namespace, %(clusterLabel)s)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85
||| % $._config,
'for': '5m',
labels: {
severity: 'warning',
},
annotations: {
description: 'Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m.',
summary: 'Kubernetes aggregated API is down.',
},
},
(import '../lib/absent_alert.libsonnet') {
componentName:: 'KubeAPI',
selector:: $._config.kubeApiserverSelector,
},
{
alert: 'KubeAPITerminatedRequests',
expr: |||
sum(rate(apiserver_request_terminations_total{%(kubeApiserverSelector)s}[10m])) / ( sum(rate(apiserver_request_total{%(kubeApiserverSelector)s}[10m])) + sum(rate(apiserver_request_terminations_total{%(kubeApiserverSelector)s}[10m])) ) > 0.20
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.',
summary: 'The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.',
},
'for': '5m',
},
],
},
],
},
}

View file

@ -0,0 +1,19 @@
{
_config+:: {
kubeControllerManagerSelector: error 'must provide selector for kube-controller-manager',
},
prometheusAlerts+:: {
groups+: [
{
name: 'kubernetes-system-controller-manager',
rules: [
(import '../lib/absent_alert.libsonnet') {
componentName:: 'KubeControllerManager',
selector:: $._config.kubeControllerManagerSelector,
},
],
},
],
},
}

View file

@ -0,0 +1,19 @@
{
_config+:: {
kubeProxySelector: error 'must provide selector for kube-proxy',
},
prometheusAlerts+:: {
groups+: [
{
name: 'kubernetes-system-kube-proxy',
rules: [
(import '../lib/absent_alert.libsonnet') {
componentName:: 'KubeProxy',
selector:: $._config.kubeProxySelector,
},
],
},
],
},
}

View file

@ -0,0 +1,19 @@
{
_config+:: {
kubeSchedulerSelector: 'job="kube-scheduler"',
},
prometheusAlerts+:: {
groups+: [
{
name: 'kubernetes-system-scheduler',
rules: [
(import '../lib/absent_alert.libsonnet') {
componentName:: 'KubeScheduler',
selector:: $._config.kubeSchedulerSelector,
},
],
},
],
},
}

View file

@ -0,0 +1,202 @@
{
_config+:: {
kubeStateMetricsSelector: error 'must provide selector for kube-state-metrics',
kubeletSelector: error 'must provide selector for kubelet',
kubeNodeUnreachableIgnoreKeys: [
'ToBeDeletedByClusterAutoscaler',
'cloud.google.com/impending-node-termination',
'aws-node-termination-handler/spot-itn',
],
kubeletCertExpirationWarningSeconds: 7 * 24 * 3600,
kubeletCertExpirationCriticalSeconds: 1 * 24 * 3600,
},
prometheusAlerts+:: {
groups+: [
{
name: 'kubernetes-system-kubelet',
rules: [
{
expr: |||
kube_node_status_condition{%(kubeStateMetricsSelector)s,condition="Ready",status="true"} == 0
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: '{{ $labels.node }} has been unready for more than 15 minutes.',
summary: 'Node is not ready.',
},
'for': '15m',
alert: 'KubeNodeNotReady',
},
{
expr: |||
(kube_node_spec_taint{%(kubeStateMetricsSelector)s,key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{%(kubeStateMetricsSelector)s,key=~"%(kubeNodeUnreachableIgnoreKeys)s"}) == 1
||| % $._config {
kubeNodeUnreachableIgnoreKeys: std.join('|', super.kubeNodeUnreachableIgnoreKeys),
},
labels: {
severity: 'warning',
},
annotations: {
description: '{{ $labels.node }} is unreachable and some workloads may be rescheduled.',
summary: 'Node is unreachable.',
},
'for': '15m',
alert: 'KubeNodeUnreachable',
},
{
alert: 'KubeletTooManyPods',
// Some node has a capacity of 1 like AWS's Fargate and only exists while a pod is running on it.
// We have to ignore this special node in the KubeletTooManyPods alert.
expr: |||
count by(%(clusterLabel)s, node) (
(kube_pod_status_phase{%(kubeStateMetricsSelector)s,phase="Running"} == 1) * on(instance,pod,namespace,%(clusterLabel)s) group_left(node) topk by(instance,pod,namespace,%(clusterLabel)s) (1, kube_pod_info{%(kubeStateMetricsSelector)s})
)
/
max by(%(clusterLabel)s, node) (
kube_node_status_capacity{%(kubeStateMetricsSelector)s,resource="pods"} != 1
) > 0.95
||| % $._config,
'for': '15m',
labels: {
severity: 'info',
},
annotations: {
description: "Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.",
summary: 'Kubelet is running at capacity.',
},
},
{
alert: 'KubeNodeReadinessFlapping',
expr: |||
sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (%(clusterLabel)s, node) > 2
||| % $._config,
'for': '15m',
labels: {
severity: 'warning',
},
annotations: {
description: 'The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes.',
summary: 'Node readiness status is flapping.',
},
},
{
alert: 'KubeletPlegDurationHigh',
expr: |||
node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
||| % $._config,
'for': '5m',
labels: {
severity: 'warning',
},
annotations: {
description: 'The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.',
summary: 'Kubelet Pod Lifecycle Event Generator is taking too long to relist.',
},
},
{
alert: 'KubeletPodStartUpLatencyHigh',
expr: |||
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{%(kubeletSelector)s}[5m])) by (%(clusterLabel)s, instance, le)) * on(%(clusterLabel)s, instance) group_left(node) kubelet_node_name{%(kubeletSelector)s} > 60
||| % $._config,
'for': '15m',
labels: {
severity: 'warning',
},
annotations: {
description: 'Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.',
summary: 'Kubelet Pod startup latency is too high.',
},
},
{
alert: 'KubeletClientCertificateExpiration',
expr: |||
kubelet_certificate_manager_client_ttl_seconds < %(kubeletCertExpirationWarningSeconds)s
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.',
summary: 'Kubelet client certificate is about to expire.',
},
},
{
alert: 'KubeletClientCertificateExpiration',
expr: |||
kubelet_certificate_manager_client_ttl_seconds < %(kubeletCertExpirationCriticalSeconds)s
||| % $._config,
labels: {
severity: 'critical',
},
annotations: {
description: 'Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.',
summary: 'Kubelet client certificate is about to expire.',
},
},
{
alert: 'KubeletServerCertificateExpiration',
expr: |||
kubelet_certificate_manager_server_ttl_seconds < %(kubeletCertExpirationWarningSeconds)s
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.',
summary: 'Kubelet server certificate is about to expire.',
},
},
{
alert: 'KubeletServerCertificateExpiration',
expr: |||
kubelet_certificate_manager_server_ttl_seconds < %(kubeletCertExpirationCriticalSeconds)s
||| % $._config,
labels: {
severity: 'critical',
},
annotations: {
description: 'Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.',
summary: 'Kubelet server certificate is about to expire.',
},
},
{
alert: 'KubeletClientCertificateRenewalErrors',
expr: |||
increase(kubelet_certificate_manager_client_expiration_renew_errors[5m]) > 0
||| % $._config,
labels: {
severity: 'warning',
},
'for': '15m',
annotations: {
description: 'Kubelet on node {{ $labels.node }} has failed to renew its client certificate ({{ $value | humanize }} errors in the last 5 minutes).',
summary: 'Kubelet has failed to renew its client certificate.',
},
},
{
alert: 'KubeletServerCertificateRenewalErrors',
expr: |||
increase(kubelet_server_expiration_renew_errors[5m]) > 0
||| % $._config,
labels: {
severity: 'warning',
},
'for': '15m',
annotations: {
description: 'Kubelet on node {{ $labels.node }} has failed to renew its server certificate ({{ $value | humanize }} errors in the last 5 minutes).',
summary: 'Kubelet has failed to renew its server certificate.',
},
},
(import '../lib/absent_alert.libsonnet') {
componentName:: 'Kubelet',
selector:: $._config.kubeletSelector,
},
],
},
],
},
}

View file

@ -0,0 +1,163 @@
{
_config+:: {
kubeStateMetricsSelector: error 'must provide selector for kube-state-metrics',
nodeExporterSelector: error 'must provide selector for node-exporter',
namespaceSelector: null,
prefixedNamespaceSelector: if self.namespaceSelector != null then self.namespaceSelector + ',' else '',
// We alert when the aggregate (CPU, Memory) quota for all namespaces is
// greater than the amount of the resources in the cluster. We do however
// allow you to overcommit if you wish.
namespaceOvercommitFactor: 1.5,
cpuThrottlingPercent: 25,
cpuThrottlingSelector: '',
// Set this selector for seleting namespaces that contains resources used for overprovision
// See https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#how-can-i-configure-overprovisioning-with-cluster-autoscaler
// for more details.
ignoringOverprovisionedWorkloadSelector: '',
},
prometheusAlerts+:: {
groups+: [
{
name: 'kubernetes-resources',
rules: [
{
alert: 'KubeCPUOvercommit',
expr: |||
sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0
and
(sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'Cluster has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure.',
summary: 'Cluster has overcommitted CPU resource requests.',
},
'for': '10m',
},
{
alert: 'KubeMemoryOvercommit',
expr: |||
sum(namespace_memory:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0
and
(sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'Cluster has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure.',
summary: 'Cluster has overcommitted memory resource requests.',
},
'for': '10m',
},
{
alert: 'KubeCPUQuotaOvercommit',
expr: |||
sum(min without(resource) (kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource=~"(cpu|requests.cpu)"}))
/
sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s})
> %(namespaceOvercommitFactor)s
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'Cluster has overcommitted CPU resource requests for Namespaces.',
summary: 'Cluster has overcommitted CPU resource requests.',
},
'for': '5m',
},
{
alert: 'KubeMemoryQuotaOvercommit',
expr: |||
sum(min without(resource) (kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource=~"(memory|requests.memory)"}))
/
sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s})
> %(namespaceOvercommitFactor)s
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'Cluster has overcommitted memory resource requests for Namespaces.',
summary: 'Cluster has overcommitted memory resource requests.',
},
'for': '5m',
},
{
alert: 'KubeQuotaAlmostFull',
expr: |||
kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="used"}
/ ignoring(instance, job, type)
(kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard"} > 0)
> 0.9 < 1
||| % $._config,
'for': '15m',
labels: {
severity: 'info',
},
annotations: {
description: 'Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.',
summary: 'Namespace quota is going to be full.',
},
},
{
alert: 'KubeQuotaFullyUsed',
expr: |||
kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="used"}
/ ignoring(instance, job, type)
(kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard"} > 0)
== 1
||| % $._config,
'for': '15m',
labels: {
severity: 'info',
},
annotations: {
description: 'Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.',
summary: 'Namespace quota is fully used.',
},
},
{
alert: 'KubeQuotaExceeded',
expr: |||
kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="used"}
/ ignoring(instance, job, type)
(kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard"} > 0)
> 1
||| % $._config,
'for': '15m',
labels: {
severity: 'warning',
},
annotations: {
description: 'Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.',
summary: 'Namespace quota has exceeded the limits.',
},
},
{
alert: 'CPUThrottlingHigh',
expr: |||
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", %(cpuThrottlingSelector)s}[5m])) by (container, pod, namespace)
/
sum(increase(container_cpu_cfs_periods_total{%(cpuThrottlingSelector)s}[5m])) by (container, pod, namespace)
> ( %(cpuThrottlingPercent)s / 100 )
||| % $._config,
'for': '15m',
labels: {
severity: 'info',
},
annotations: {
description: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.',
summary: 'Processes experience elevated CPU throttling.',
},
},
],
},
],
},
}

View file

@ -0,0 +1,137 @@
{
_config+:: {
kubeStateMetricsSelector: error 'must provide selector for kube-state-metrics',
kubeletSelector: error 'must provide selector for kubelet',
namespaceSelector: null,
prefixedNamespaceSelector: if self.namespaceSelector != null then self.namespaceSelector + ',' else '',
// We alert when a disk is expected to fill up in four days. Depending on
// the data-set it might be useful to change the sampling-time for the
// prediction
volumeFullPredictionSampleTime: '6h',
},
prometheusAlerts+:: {
groups+: [
{
name: 'kubernetes-storage',
rules: [
{
alert: 'KubePersistentVolumeFillingUp',
expr: |||
(
kubelet_volume_stats_available_bytes{%(prefixedNamespaceSelector)s%(kubeletSelector)s}
/
kubelet_volume_stats_capacity_bytes{%(prefixedNamespaceSelector)s%(kubeletSelector)s}
) < 0.03
and
kubelet_volume_stats_used_bytes{%(prefixedNamespaceSelector)s%(kubeletSelector)s} > 0
unless on(namespace, persistentvolumeclaim)
kube_persistentvolumeclaim_access_mode{%(prefixedNamespaceSelector)s access_mode="ReadOnlyMany"} == 1
unless on(namespace, persistentvolumeclaim)
kube_persistentvolumeclaim_labels{%(prefixedNamespaceSelector)s%(pvExcludedSelector)s} == 1
||| % $._config,
'for': '1m',
labels: {
severity: 'critical',
},
annotations: {
description: 'The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free.',
summary: 'PersistentVolume is filling up.',
},
},
{
alert: 'KubePersistentVolumeFillingUp',
expr: |||
(
kubelet_volume_stats_available_bytes{%(prefixedNamespaceSelector)s%(kubeletSelector)s}
/
kubelet_volume_stats_capacity_bytes{%(prefixedNamespaceSelector)s%(kubeletSelector)s}
) < 0.15
and
kubelet_volume_stats_used_bytes{%(prefixedNamespaceSelector)s%(kubeletSelector)s} > 0
and
predict_linear(kubelet_volume_stats_available_bytes{%(prefixedNamespaceSelector)s%(kubeletSelector)s}[%(volumeFullPredictionSampleTime)s], 4 * 24 * 3600) < 0
unless on(namespace, persistentvolumeclaim)
kube_persistentvolumeclaim_access_mode{%(prefixedNamespaceSelector)s access_mode="ReadOnlyMany"} == 1
unless on(namespace, persistentvolumeclaim)
kube_persistentvolumeclaim_labels{%(prefixedNamespaceSelector)s%(pvExcludedSelector)s} == 1
||| % $._config,
'for': '1h',
labels: {
severity: 'warning',
},
annotations: {
description: 'Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available.',
summary: 'PersistentVolume is filling up.',
},
},
{
alert: 'KubePersistentVolumeInodesFillingUp',
expr: |||
(
kubelet_volume_stats_inodes_free{%(prefixedNamespaceSelector)s%(kubeletSelector)s}
/
kubelet_volume_stats_inodes{%(prefixedNamespaceSelector)s%(kubeletSelector)s}
) < 0.03
and
kubelet_volume_stats_inodes_used{%(prefixedNamespaceSelector)s%(kubeletSelector)s} > 0
unless on(namespace, persistentvolumeclaim)
kube_persistentvolumeclaim_access_mode{%(prefixedNamespaceSelector)s access_mode="ReadOnlyMany"} == 1
unless on(namespace, persistentvolumeclaim)
kube_persistentvolumeclaim_labels{%(prefixedNamespaceSelector)s%(pvExcludedSelector)s} == 1
||| % $._config,
'for': '1m',
labels: {
severity: 'critical',
},
annotations: {
description: 'The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} only has {{ $value | humanizePercentage }} free inodes.',
summary: 'PersistentVolumeInodes are filling up.',
},
},
{
alert: 'KubePersistentVolumeInodesFillingUp',
expr: |||
(
kubelet_volume_stats_inodes_free{%(prefixedNamespaceSelector)s%(kubeletSelector)s}
/
kubelet_volume_stats_inodes{%(prefixedNamespaceSelector)s%(kubeletSelector)s}
) < 0.15
and
kubelet_volume_stats_inodes_used{%(prefixedNamespaceSelector)s%(kubeletSelector)s} > 0
and
predict_linear(kubelet_volume_stats_inodes_free{%(prefixedNamespaceSelector)s%(kubeletSelector)s}[%(volumeFullPredictionSampleTime)s], 4 * 24 * 3600) < 0
unless on(namespace, persistentvolumeclaim)
kube_persistentvolumeclaim_access_mode{%(prefixedNamespaceSelector)s access_mode="ReadOnlyMany"} == 1
unless on(namespace, persistentvolumeclaim)
kube_persistentvolumeclaim_labels{%(prefixedNamespaceSelector)s%(pvExcludedSelector)s} == 1
||| % $._config,
'for': '1h',
labels: {
severity: 'warning',
},
annotations: {
description: 'Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to run out of inodes within four days. Currently {{ $value | humanizePercentage }} of its inodes are free.',
summary: 'PersistentVolumeInodes are filling up.',
},
},
{
alert: 'KubePersistentVolumeErrors',
expr: |||
kube_persistentvolume_status_phase{phase=~"Failed|Pending",%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s} > 0
||| % $._config,
'for': '5m',
labels: {
severity: 'critical',
},
annotations: {
description: 'The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}.',
summary: 'PersistentVolume is having issues with provisioning.',
},
},
],
},
],
},
}

View file

@ -0,0 +1,49 @@
{
_config+:: {
notKubeDnsCoreDnsSelector: 'job!~"kube-dns|coredns"',
},
prometheusAlerts+:: {
groups+: [
{
name: 'kubernetes-system',
rules: [
{
alert: 'KubeVersionMismatch',
expr: |||
count by (%(clusterLabel)s) (count by (git_version, %(clusterLabel)s) (label_replace(kubernetes_build_info{%(notKubeDnsCoreDnsSelector)s},"git_version","$1","git_version","(v[0-9]*.[0-9]*).*"))) > 1
||| % $._config,
'for': '15m',
labels: {
severity: 'warning',
},
annotations: {
description: 'There are {{ $value }} different semantic versions of Kubernetes components running.',
summary: 'Different semantic versions of Kubernetes components running.',
},
},
{
alert: 'KubeClientErrors',
// Many clients use get requests to check the existence of objects,
// this is normal and an expected error, therefore it should be
// ignored in this alert.
expr: |||
(sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (%(clusterLabel)s, instance, job, namespace)
/
sum(rate(rest_client_requests_total[5m])) by (%(clusterLabel)s, instance, job, namespace))
> 0.01
||| % $._config,
'for': '15m',
labels: {
severity: 'warning',
},
annotations: {
description: "Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.'",
summary: 'Kubernetes API server client is experiencing errors.',
},
},
],
},
],
},
}

View file

@ -0,0 +1,113 @@
{
_config+:: {
SLOs: {
apiserver: {
days: 30, // The number of days we alert on burning too much error budget for.
target: 0.99, // The target percentage of availability between 0-1. (0.99 = 99%, 0.999 = 99.9%)
// Only change these windows when you really understand multi burn rate errors.
// Even though you can change the days above (which will change availability calculations)
// these windows will alert on a 30 days sliding window. We're looking into basing these windows on the given days too.
windows: [
{ severity: 'critical', 'for': '2m', long: '1h', short: '5m', factor: 14.4 },
{ severity: 'critical', 'for': '15m', long: '6h', short: '30m', factor: 6 },
{ severity: 'warning', 'for': '1h', long: '1d', short: '2h', factor: 3 },
{ severity: 'warning', 'for': '3h', long: '3d', short: '6h', factor: 1 },
],
},
},
// Selectors are inserted between {} in Prometheus queries.
cadvisorSelector: 'job="cadvisor"',
kubeletSelector: 'job="kubelet"',
kubeStateMetricsSelector: 'job="kube-state-metrics"',
nodeExporterSelector: 'job="node-exporter"',
kubeSchedulerSelector: 'job="kube-scheduler"',
kubeControllerManagerSelector: 'job="kube-controller-manager"',
kubeApiserverSelector: 'job="kube-apiserver"',
kubeProxySelector: 'job="kube-proxy"',
podLabel: 'pod',
hostNetworkInterfaceSelector: 'device!~"veth.+"',
hostMountpointSelector: 'mountpoint="/"',
windowsExporterSelector: 'job="kubernetes-windows-exporter"',
containerfsSelector: 'container!=""',
// Grafana dashboard IDs are necessary for stable links for dashboards
grafanaDashboardIDs: {
'k8s-resources-multicluster.json': '1gBgaexoVZ4TpBNAt2eGRsc4LNjNhdjcZd6cqU6S',
'k8s-resources-cluster.json': 'ZnbvYbcXkob7GLqcDPLTj1ZL4MRX87tOh8xdr831',
'k8s-resources-namespace.json': 'XaY4UCP3J51an4ikqtkUGBSjLpDW4pg39xe2FuxP',
'k8s-resources-pod.json': 'wU56sdGSNYZTL3eO0db3pONtVmTvsyV7w8aadbYF',
'k8s-multicluster-rsrc-use.json': 'NJ9AlnsObVgj9uKiJMeAqfzMi1wihOMupcsDhlhR',
'k8s-cluster-rsrc-use.json': 'uXQldxzqUNgIOUX6FyZNvqgP2vgYb78daNu4GiDc',
'k8s-node-rsrc-use.json': 'E577CMUOwmPsxVVqM9lj40czM1ZPjclw7hGa7OT7',
'nodes.json': 'kcb9C2QDe4IYcjiTOmYyfhsImuzxRcvwWC3YLJPS',
'persistentvolumesusage.json': 'AhCeikee0xoa6faec0Weep2nee6shaiquigahw8b',
'pods.json': 'AMK9hS0rSbSz7cKjPHcOtk6CGHFjhSHwhbQ3sedK',
'statefulset.json': 'dPiBt0FRG5BNYo0XJ4L0Meoc7DWs9eL40c1CRc1g',
'k8s-resources-windows-cluster.json': '4d08557fd9391b100730f2494bccac68',
'k8s-resources-windows-namespace.json': '490b402361724ab1d4c45666c1fa9b6f',
'k8s-resources-windows-pod.json': '40597a704a610e936dc6ed374a7ce023',
'k8s-windows-cluster-rsrc-use.json': '53a43377ec9aaf2ff64dfc7a1f539334',
'k8s-windows-node-rsrc-use.json': '96e7484b0bb53b74fbc2bcb7723cd40b',
'k8s-resources-workloads-namespace.json': 'L29WgMrccBDauPs3Xsti3fwaKjMB6fReufWj6Gl1',
'k8s-resources-workload.json': 'hZCNbUPfUqjc95N3iumVsaEVHXzaBr3IFKRFvUJf',
'apiserver.json': 'eswbt59QCroA3XLdKFvdOHlKB8Iks3h7d2ohstxr',
'controller-manager.json': '5g73oHG0pCRz4X1t6gNYouVUv9urrQd4wCdHR2mI',
'scheduler.json': '4uMPZ9jmwvYJcM5fcNcNrrt9Sf6ufQL4IKFri2Gp',
'proxy.json': 'hhT4orXD1Ott4U1bNNps0R26EHTwMypdcaCjDRPM',
'kubelet.json': 'B1azll2ETo7DTiM8CysrH6g4s5NCgkOz6ZdU8Q0j',
},
// Support for Grafana 7.2+ `$__rate_interval` instead of `$__interval`
grafana72: true,
grafanaIntervalVar: if self.grafana72 then '$__rate_interval' else '$__interval',
// Config for the Grafana dashboards in the Kubernetes Mixin
grafanaK8s: {
dashboardNamePrefix: 'Kubernetes / ',
dashboardTags: ['kubernetes-mixin'],
// For links between grafana dashboards, you need to tell us if your grafana
// servers under some non-root path.
linkPrefix: '',
// The default refresh time for all dashboards, default to 10s
refresh: '10s',
minimumTimeInterval: '1m',
// Timezone for Grafana dashboards:: UTC, browser, ...
grafanaTimezone: 'UTC',
},
// Opt-in to multiCluster dashboards by overriding this and the clusterLabel.
showMultiCluster: false,
clusterLabel: 'cluster',
namespaceLabel: 'namespace',
// Default datasource name
datasourceName: 'default',
// Datasource instance filter regex
datasourceFilterRegex: '',
// This list of filesystem is referenced in various expressions.
fstypes: ['ext[234]', 'btrfs', 'xfs', 'zfs'],
fstypeSelector: 'fstype=~"%s"' % std.join('|', self.fstypes),
// This list of disk device names is referenced in various expressions.
diskDevices: ['mmcblk.p.+', 'nvme.+', 'rbd.+', 'sd.+', 'vd.+', 'xvd.+', 'dm-.+', 'dasd.+'],
diskDeviceSelector: 'device=~"%s"' % std.join('|', self.diskDevices),
// Certain workloads (e.g. KubeVirt/CDI) will fully utilise the persistent volume they claim
// the size of the PV will never grow since they consume the entirety of the volume by design.
// This selector allows an admin to 'pre-mark' the PVC of such a workload (or for any other use case)
// so that specific storage alerts will not fire.With the default selector, adding a label `exclude-from-alerts: 'true'`
// to the PVC will have the desired effect.
pvExcludedSelector: 'label_excluded_from_alerts="true"',
// Default timeout value for k8s Jobs. The jobs which are active beyond this duration would trigger KubeJobNotCompleted alert.
kubeJobTimeoutDuration: 12 * 60 * 60,
},
}

View file

@ -0,0 +1,286 @@
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet';
local dashboard = grafana.dashboard;
local row = grafana.row;
local prometheus = grafana.prometheus;
local template = grafana.template;
local graphPanel = grafana.graphPanel;
local singlestat = grafana.singlestat;
{
_config+:: {
kubeApiserverSelector: 'job="kube-apiserver"',
},
grafanaDashboards+:: {
'apiserver.json':
local availability1d =
singlestat.new(
'Availability (%dd) > %.3f%%' % [$._config.SLOs.apiserver.days, 100 * $._config.SLOs.apiserver.target],
datasource='$datasource',
span=4,
format='percentunit',
decimals=3,
description='How many percent of requests (both read and write) in %d days have been answered successfully and fast enough?' % $._config.SLOs.apiserver.days,
)
.addTarget(prometheus.target('apiserver_request:availability%dd{verb="all", %(clusterLabel)s="$cluster"}' % [$._config.SLOs.apiserver.days, $._config.clusterLabel]));
local errorBudget =
graphPanel.new(
'ErrorBudget (%dd) > %.3f%%' % [$._config.SLOs.apiserver.days, 100 * $._config.SLOs.apiserver.target],
datasource='$datasource',
span=8,
format='percentunit',
decimals=3,
fill=10,
description='How much error budget is left looking at our %.3f%% availability guarantees?' % $._config.SLOs.apiserver.target,
)
.addTarget(prometheus.target('100 * (apiserver_request:availability%dd{verb="all", %(clusterLabel)s="$cluster"} - %f)' % [$._config.SLOs.apiserver.days, $._config.clusterLabel, $._config.SLOs.apiserver.target], legendFormat='errorbudget'));
local readAvailability =
singlestat.new(
'Read Availability (%dd)' % $._config.SLOs.apiserver.days,
datasource='$datasource',
span=3,
format='percentunit',
decimals=3,
description='How many percent of read requests (LIST,GET) in %d days have been answered successfully and fast enough?' % $._config.SLOs.apiserver.days,
)
.addTarget(prometheus.target('apiserver_request:availability%dd{verb="read", %(clusterLabel)s="$cluster"}' % [$._config.SLOs.apiserver.days, $._config.clusterLabel]));
local readRequests =
graphPanel.new(
'Read SLI - Requests',
datasource='$datasource',
span=3,
format='reqps',
stack=true,
fill=10,
description='How many read requests (LIST,GET) per second do the apiservers get by code?',
)
.addSeriesOverride({ alias: '/2../i', color: '#56A64B' })
.addSeriesOverride({ alias: '/3../i', color: '#F2CC0C' })
.addSeriesOverride({ alias: '/4../i', color: '#3274D9' })
.addSeriesOverride({ alias: '/5../i', color: '#E02F44' })
.addTarget(prometheus.target('sum by (code) (code_resource:apiserver_request_total:rate5m{verb="read", %(clusterLabel)s="$cluster"})' % $._config, legendFormat='{{ code }}'));
local readErrors =
graphPanel.new(
'Read SLI - Errors',
datasource='$datasource',
min=0,
span=3,
format='percentunit',
description='How many percent of read requests (LIST,GET) per second are returned with errors (5xx)?',
)
.addTarget(prometheus.target('sum by (resource) (code_resource:apiserver_request_total:rate5m{verb="read",code=~"5..", %(clusterLabel)s="$cluster"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb="read", %(clusterLabel)s="$cluster"})' % $._config, legendFormat='{{ resource }}'));
local readDuration =
graphPanel.new(
'Read SLI - Duration',
datasource='$datasource',
span=3,
format='s',
description='How many seconds is the 99th percentile for reading (LIST|GET) a given resource?',
)
.addTarget(prometheus.target('cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile{verb="read", %(clusterLabel)s="$cluster"}' % $._config, legendFormat='{{ resource }}'));
local writeAvailability =
singlestat.new(
'Write Availability (%dd)' % $._config.SLOs.apiserver.days,
datasource='$datasource',
span=3,
format='percentunit',
decimals=3,
description='How many percent of write requests (POST|PUT|PATCH|DELETE) in %d days have been answered successfully and fast enough?' % $._config.SLOs.apiserver.days,
)
.addTarget(prometheus.target('apiserver_request:availability%dd{verb="write", %(clusterLabel)s="$cluster"}' % [$._config.SLOs.apiserver.days, $._config.clusterLabel]));
local writeRequests =
graphPanel.new(
'Write SLI - Requests',
datasource='$datasource',
span=3,
format='reqps',
stack=true,
fill=10,
description='How many write requests (POST|PUT|PATCH|DELETE) per second do the apiservers get by code?',
)
.addSeriesOverride({ alias: '/2../i', color: '#56A64B' })
.addSeriesOverride({ alias: '/3../i', color: '#F2CC0C' })
.addSeriesOverride({ alias: '/4../i', color: '#3274D9' })
.addSeriesOverride({ alias: '/5../i', color: '#E02F44' })
.addTarget(prometheus.target('sum by (code) (code_resource:apiserver_request_total:rate5m{verb="write", %(clusterLabel)s="$cluster"})' % $._config, legendFormat='{{ code }}'));
local writeErrors =
graphPanel.new(
'Write SLI - Errors',
datasource='$datasource',
min=0,
span=3,
format='percentunit',
description='How many percent of write requests (POST|PUT|PATCH|DELETE) per second are returned with errors (5xx)?',
)
.addTarget(prometheus.target('sum by (resource) (code_resource:apiserver_request_total:rate5m{verb="write",code=~"5..", %(clusterLabel)s="$cluster"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb="write", %(clusterLabel)s="$cluster"})' % $._config, legendFormat='{{ resource }}'));
local writeDuration =
graphPanel.new(
'Write SLI - Duration',
datasource='$datasource',
span=3,
format='s',
description='How many seconds is the 99th percentile for writing (POST|PUT|PATCH|DELETE) a given resource?',
)
.addTarget(prometheus.target('cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile{verb="write", %(clusterLabel)s="$cluster"}' % $._config, legendFormat='{{ resource }}'));
local workQueueAddRate =
graphPanel.new(
'Work Queue Add Rate',
datasource='$datasource',
span=6,
format='ops',
legend_show=false,
min=0,
)
.addTarget(prometheus.target('sum(rate(workqueue_adds_total{%(kubeApiserverSelector)s, instance=~"$instance", %(clusterLabel)s="$cluster"}[%(grafanaIntervalVar)s])) by (instance, name)' % $._config, legendFormat='{{instance}} {{name}}'));
local workQueueDepth =
graphPanel.new(
'Work Queue Depth',
datasource='$datasource',
span=6,
format='short',
legend_show=false,
min=0,
)
.addTarget(prometheus.target('sum(rate(workqueue_depth{%(kubeApiserverSelector)s, instance=~"$instance", %(clusterLabel)s="$cluster"}[%(grafanaIntervalVar)s])) by (instance, name)' % $._config, legendFormat='{{instance}} {{name}}'));
local workQueueLatency =
graphPanel.new(
'Work Queue Latency',
datasource='$datasource',
span=12,
format='s',
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
)
.addTarget(prometheus.target('histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{%(kubeApiserverSelector)s, instance=~"$instance", %(clusterLabel)s="$cluster"}[%(grafanaIntervalVar)s])) by (instance, name, le))' % $._config, legendFormat='{{instance}} {{name}}'));
local memory =
graphPanel.new(
'Memory',
datasource='$datasource',
span=4,
format='bytes',
)
.addTarget(prometheus.target('process_resident_memory_bytes{%(kubeApiserverSelector)s,instance=~"$instance", %(clusterLabel)s="$cluster"}' % $._config, legendFormat='{{instance}}'));
local cpu =
graphPanel.new(
'CPU usage',
datasource='$datasource',
span=4,
format='short',
min=0,
)
.addTarget(prometheus.target('rate(process_cpu_seconds_total{%(kubeApiserverSelector)s,instance=~"$instance", %(clusterLabel)s="$cluster"}[%(grafanaIntervalVar)s])' % $._config, legendFormat='{{instance}}'));
local goroutines =
graphPanel.new(
'Goroutines',
datasource='$datasource',
span=4,
format='short',
)
.addTarget(prometheus.target('go_goroutines{%(kubeApiserverSelector)s,instance=~"$instance", %(clusterLabel)s="$cluster"}' % $._config, legendFormat='{{instance}}'));
dashboard.new(
'%(dashboardNamePrefix)sAPI server' % $._config.grafanaK8s,
time_from='now-1h',
uid=($._config.grafanaDashboardIDs['apiserver.json']),
tags=($._config.grafanaK8s.dashboardTags),
).addTemplate(
{
current: {
text: 'default',
value: $._config.datasourceName,
},
hide: 0,
label: 'Data Source',
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: $._config.datasourceFilterRegex,
type: 'datasource',
},
)
.addTemplate(
template.new(
'cluster',
'$datasource',
'label_values(up{%(kubeApiserverSelector)s}, %(clusterLabel)s)' % $._config,
label='cluster',
refresh='time',
hide=if $._config.showMultiCluster then '' else 'variable',
sort=1,
)
)
.addTemplate(
template.new(
'instance',
'$datasource',
'label_values(up{%(kubeApiserverSelector)s, %(clusterLabel)s="$cluster"}, instance)' % $._config,
refresh='time',
includeAll=true,
sort=1,
)
)
.addPanel(
grafana.text.new(
title='Notice',
content='The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.',
description='The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.',
span=12,
),
gridPos={
h: 2,
w: 24,
x: 0,
y: 0,
},
)
.addRow(
row.new()
.addPanel(availability1d)
.addPanel(errorBudget)
)
.addRow(
row.new()
.addPanel(readAvailability)
.addPanel(readRequests)
.addPanel(readErrors)
.addPanel(readDuration)
)
.addRow(
row.new()
.addPanel(writeAvailability)
.addPanel(writeRequests)
.addPanel(writeErrors)
.addPanel(writeDuration)
).addRow(
row.new()
.addPanel(workQueueAddRate)
.addPanel(workQueueDepth)
.addPanel(workQueueLatency)
).addRow(
row.new()
.addPanel(memory)
.addPanel(cpu)
.addPanel(goroutines)
),
},
}

View file

@ -0,0 +1,196 @@
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet';
local dashboard = grafana.dashboard;
local row = grafana.row;
local prometheus = grafana.prometheus;
local template = grafana.template;
local graphPanel = grafana.graphPanel;
local singlestat = grafana.singlestat;
{
grafanaDashboards+:: {
'controller-manager.json':
local upCount =
singlestat.new(
'Up',
datasource='$datasource',
span=2,
valueName='min',
)
.addTarget(prometheus.target('sum(up{%(clusterLabel)s="$cluster", %(kubeControllerManagerSelector)s})' % $._config));
local workQueueAddRate =
graphPanel.new(
'Work Queue Add Rate',
datasource='$datasource',
span=10,
format='ops',
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
)
.addTarget(prometheus.target('sum(rate(workqueue_adds_total{%(clusterLabel)s="$cluster", %(kubeControllerManagerSelector)s, instance=~"$instance"}[%(grafanaIntervalVar)s])) by (%(clusterLabel)s, instance, name)' % $._config, legendFormat='{{%(clusterLabel)s}} {{instance}} {{name}}' % $._config));
local workQueueDepth =
graphPanel.new(
'Work Queue Depth',
datasource='$datasource',
span=12,
min=0,
format='short',
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
)
.addTarget(prometheus.target('sum(rate(workqueue_depth{%(clusterLabel)s="$cluster", %(kubeControllerManagerSelector)s, instance=~"$instance"}[%(grafanaIntervalVar)s])) by (%(clusterLabel)s, instance, name)' % $._config, legendFormat='{{%(clusterLabel)s}} {{instance}} {{name}}' % $._config));
local workQueueLatency =
graphPanel.new(
'Work Queue Latency',
datasource='$datasource',
span=12,
format='s',
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
)
.addTarget(prometheus.target('histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{%(clusterLabel)s="$cluster", %(kubeControllerManagerSelector)s, instance=~"$instance"}[%(grafanaIntervalVar)s])) by (%(clusterLabel)s, instance, name, le))' % $._config, legendFormat='{{%(clusterLabel)s}} {{instance}} {{name}}' % $._config));
local rpcRate =
graphPanel.new(
'Kube API Request Rate',
datasource='$datasource',
span=4,
format='ops',
)
.addTarget(prometheus.target('sum(rate(rest_client_requests_total{%(kubeControllerManagerSelector)s, instance=~"$instance",code=~"2.."}[%(grafanaIntervalVar)s]))' % $._config, legendFormat='2xx'))
.addTarget(prometheus.target('sum(rate(rest_client_requests_total{%(kubeControllerManagerSelector)s, instance=~"$instance",code=~"3.."}[%(grafanaIntervalVar)s]))' % $._config, legendFormat='3xx'))
.addTarget(prometheus.target('sum(rate(rest_client_requests_total{%(kubeControllerManagerSelector)s, instance=~"$instance",code=~"4.."}[%(grafanaIntervalVar)s]))' % $._config, legendFormat='4xx'))
.addTarget(prometheus.target('sum(rate(rest_client_requests_total{%(kubeControllerManagerSelector)s, instance=~"$instance",code=~"5.."}[%(grafanaIntervalVar)s]))' % $._config, legendFormat='5xx'));
local postRequestLatency =
graphPanel.new(
'Post Request Latency 99th Quantile',
datasource='$datasource',
span=8,
format='s',
min=0,
)
.addTarget(prometheus.target('histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{%(clusterLabel)s="$cluster", %(kubeControllerManagerSelector)s, instance=~"$instance", verb="POST"}[%(grafanaIntervalVar)s])) by (verb, url, le))' % $._config, legendFormat='{{verb}} {{url}}'));
local getRequestLatency =
graphPanel.new(
'Get Request Latency 99th Quantile',
datasource='$datasource',
span=12,
format='s',
min=0,
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
)
.addTarget(prometheus.target('histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{%(clusterLabel)s="$cluster", %(kubeControllerManagerSelector)s, instance=~"$instance", verb="GET"}[%(grafanaIntervalVar)s])) by (verb, url, le))' % $._config, legendFormat='{{verb}} {{url}}'));
local memory =
graphPanel.new(
'Memory',
datasource='$datasource',
span=4,
format='bytes',
)
.addTarget(prometheus.target('process_resident_memory_bytes{%(clusterLabel)s="$cluster", %(kubeControllerManagerSelector)s,instance=~"$instance"}' % $._config, legendFormat='{{instance}}'));
local cpu =
graphPanel.new(
'CPU usage',
datasource='$datasource',
span=4,
format='short',
min=0,
)
.addTarget(prometheus.target('rate(process_cpu_seconds_total{%(clusterLabel)s="$cluster", %(kubeControllerManagerSelector)s,instance=~"$instance"}[%(grafanaIntervalVar)s])' % $._config, legendFormat='{{instance}}'));
local goroutines =
graphPanel.new(
'Goroutines',
datasource='$datasource',
span=4,
format='short',
)
.addTarget(prometheus.target('go_goroutines{%(clusterLabel)s="$cluster", %(kubeControllerManagerSelector)s,instance=~"$instance"}' % $._config, legendFormat='{{instance}}'));
dashboard.new(
'%(dashboardNamePrefix)sController Manager' % $._config.grafanaK8s,
time_from='now-1h',
uid=($._config.grafanaDashboardIDs['controller-manager.json']),
tags=($._config.grafanaK8s.dashboardTags),
).addTemplate(
{
current: {
text: 'default',
value: $._config.datasourceName,
},
hide: 0,
label: 'Data Source',
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: $._config.datasourceFilterRegex,
type: 'datasource',
},
)
.addTemplate(
template.new(
'cluster',
'$datasource',
'label_values(up{%(kubeControllerManagerSelector)s}, %(clusterLabel)s)' % $._config,
label='cluster',
refresh='time',
hide=if $._config.showMultiCluster then '' else 'variable',
sort=1,
)
)
.addTemplate(
template.new(
'instance',
'$datasource',
'label_values(up{%(clusterLabel)s="$cluster", %(kubeControllerManagerSelector)s}, instance)' % $._config,
refresh='time',
includeAll=true,
sort=1,
)
)
.addRow(
row.new()
.addPanel(upCount)
.addPanel(workQueueAddRate)
).addRow(
row.new()
.addPanel(workQueueDepth)
).addRow(
row.new()
.addPanel(workQueueLatency)
).addRow(
row.new()
.addPanel(rpcRate)
.addPanel(postRequestLatency)
).addRow(
row.new()
.addPanel(getRequestLatency)
).addRow(
row.new()
.addPanel(memory)
.addPanel(cpu)
.addPanel(goroutines)
),
},
}

View file

@ -0,0 +1,9 @@
(import 'network.libsonnet') +
(import 'persistentvolumesusage.libsonnet') +
(import 'resources.libsonnet') +
(import 'apiserver.libsonnet') +
(import 'controller-manager.libsonnet') +
(import 'scheduler.libsonnet') +
(import 'proxy.libsonnet') +
(import 'kubelet.libsonnet') +
(import 'defaults.libsonnet')

View file

@ -0,0 +1,39 @@
{
local kubernetesMixin = self,
local grafanaDashboards = super.grafanaDashboards,
// Automatically add a uid to each dashboard based on the base64 encoding
// of the file name and set the timezone to be 'default'.
grafanaDashboards:: {
[filename]: grafanaDashboards[filename] {
uid: std.md5(filename),
timezone: kubernetesMixin._config.grafanaK8s.grafanaTimezone,
refresh: kubernetesMixin._config.grafanaK8s.refresh,
tags: kubernetesMixin._config.grafanaK8s.dashboardTags,
rows: [
row {
panels: [
panel {
// Modify tooltip to only show a single value
tooltip+: {
shared: false,
},
// Modify legend to always show as table on right side
legend+: {
alignAsTable: true,
rightSide: true,
},
// Set minimum time interval for all panels
interval: kubernetesMixin._config.grafanaK8s.minimumTimeInterval,
}
for panel in super.panels
],
}
for row in super.rows
],
}
for filename in std.objectFields(grafanaDashboards)
},
}

View file

@ -0,0 +1,356 @@
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet';
local dashboard = grafana.dashboard;
local prometheus = grafana.prometheus;
local template = grafana.template;
local graphPanel = grafana.graphPanel;
local statPanel = grafana.statPanel;
{
grafanaDashboards+:: {
'kubelet.json':
local upCount =
statPanel.new(
'Running Kubelets',
datasource='$datasource',
reducerFunction='lastNotNull',
)
.addTarget(prometheus.target('sum(kubelet_node_name{%(clusterLabel)s="$cluster", %(kubeletSelector)s})' % $._config));
local runningPodCount =
statPanel.new(
'Running Pods',
datasource='$datasource',
reducerFunction='lastNotNull',
)
// TODO: The second query selected by the OR operator is for backward compatibility with kubernetes < 1.19, so this can be retored to a single query once 1.23 is out
.addTarget(prometheus.target('sum(kubelet_running_pods{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"}) OR sum(kubelet_running_pod_count{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"})' % $._config, legendFormat='{{instance}}'));
local runningContainerCount =
statPanel.new(
'Running Containers',
datasource='$datasource',
reducerFunction='lastNotNull',
)
// TODO: The second query selected by the OR operator is for backward compatibility with kubernetes < 1.19, so this can be retored to a single query once 1.23 is out
.addTarget(prometheus.target('sum(kubelet_running_containers{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"}) OR sum(kubelet_running_container_count{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"})' % $._config, legendFormat='{{instance}}'));
local actualVolumeCount =
statPanel.new(
'Actual Volume Count',
datasource='$datasource',
reducerFunction='lastNotNull',
)
.addTarget(prometheus.target('sum(volume_manager_total_volumes{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance", state="actual_state_of_world"})' % $._config, legendFormat='{{instance}}'));
local desiredVolumeCount =
statPanel.new(
'Desired Volume Count',
datasource='$datasource',
reducerFunction='lastNotNull',
)
.addTarget(prometheus.target('sum(volume_manager_total_volumes{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance",state="desired_state_of_world"})' % $._config, legendFormat='{{instance}}'));
local configErrorCount =
statPanel.new(
'Config Error Count',
datasource='$datasource',
reducerFunction='lastNotNull',
)
.addTarget(prometheus.target('sum(rate(kubelet_node_config_error{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"}[%(grafanaIntervalVar)s]))' % $._config, legendFormat='{{instance}}'));
local operationRate =
graphPanel.new(
'Operation Rate',
datasource='$datasource',
format='ops',
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
)
.addTarget(prometheus.target('sum(rate(kubelet_runtime_operations_total{%(clusterLabel)s="$cluster",%(kubeletSelector)s,instance=~"$instance"}[%(grafanaIntervalVar)s])) by (operation_type, instance)' % $._config, legendFormat='{{instance}} {{operation_type}}'));
local operationErrorRate =
graphPanel.new(
'Operation Error Rate',
datasource='$datasource',
format='ops',
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
)
.addTarget(prometheus.target('sum(rate(kubelet_runtime_operations_errors_total{%(clusterLabel)s="$cluster",%(kubeletSelector)s,instance=~"$instance"}[%(grafanaIntervalVar)s])) by (instance, operation_type)' % $._config, legendFormat='{{instance}} {{operation_type}}'));
local operationLatency =
graphPanel.new(
'Operation duration 99th quantile',
datasource='$datasource',
format='s',
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
)
.addTarget(prometheus.target('histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{%(clusterLabel)s="$cluster",%(kubeletSelector)s,instance=~"$instance"}[%(grafanaIntervalVar)s])) by (instance, operation_type, le))' % $._config, legendFormat='{{instance}} {{operation_type}}'));
local podStartRate =
graphPanel.new(
'Pod Start Rate',
datasource='$datasource',
format='ops',
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
)
.addTarget(prometheus.target('sum(rate(kubelet_pod_start_duration_seconds_count{%(clusterLabel)s="$cluster",%(kubeletSelector)s,instance=~"$instance"}[%(grafanaIntervalVar)s])) by (instance)' % $._config, legendFormat='{{instance}} pod'))
.addTarget(prometheus.target('sum(rate(kubelet_pod_worker_duration_seconds_count{%(clusterLabel)s="$cluster",%(kubeletSelector)s,instance=~"$instance"}[%(grafanaIntervalVar)s])) by (instance)' % $._config, legendFormat='{{instance}} worker'));
local podStartLatency =
graphPanel.new(
'Pod Start Duration',
datasource='$datasource',
format='s',
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
)
.addTarget(prometheus.target('histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_bucket{%(clusterLabel)s="$cluster",%(kubeletSelector)s,instance=~"$instance"}[%(grafanaIntervalVar)s])) by (instance, le))' % $._config, legendFormat='{{instance}} pod'))
.addTarget(prometheus.target('histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{%(clusterLabel)s="$cluster",%(kubeletSelector)s,instance=~"$instance"}[%(grafanaIntervalVar)s])) by (instance, le))' % $._config, legendFormat='{{instance}} worker'));
local storageOperationRate =
graphPanel.new(
'Storage Operation Rate',
datasource='$datasource',
format='ops',
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
legend_hideEmpty=true,
legend_hideZero=true,
)
.addTarget(prometheus.target('sum(rate(storage_operation_duration_seconds_count{%(clusterLabel)s="$cluster",%(kubeletSelector)s,instance=~"$instance"}[%(grafanaIntervalVar)s])) by (instance, operation_name, volume_plugin)' % $._config, legendFormat='{{instance}} {{operation_name}} {{volume_plugin}}'));
local storageOperationErrorRate =
graphPanel.new(
'Storage Operation Error Rate',
datasource='$datasource',
format='ops',
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
legend_hideEmpty=true,
legend_hideZero=true,
)
.addTarget(prometheus.target('sum(rate(storage_operation_errors_total{%(clusterLabel)s="$cluster",%(kubeletSelector)s,instance=~"$instance"}[%(grafanaIntervalVar)s])) by (instance, operation_name, volume_plugin)' % $._config, legendFormat='{{instance}} {{operation_name}} {{volume_plugin}}'));
local storageOperationLatency =
graphPanel.new(
'Storage Operation Duration 99th quantile',
datasource='$datasource',
format='s',
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
legend_hideEmpty=true,
legend_hideZero=true,
)
.addTarget(prometheus.target('histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"}[%(grafanaIntervalVar)s])) by (instance, operation_name, volume_plugin, le))' % $._config, legendFormat='{{instance}} {{operation_name}} {{volume_plugin}}'));
local cgroupManagerRate =
graphPanel.new(
'Cgroup manager operation rate',
datasource='$datasource',
format='ops',
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
)
.addTarget(prometheus.target('sum(rate(kubelet_cgroup_manager_duration_seconds_count{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"}[%(grafanaIntervalVar)s])) by (instance, operation_type)' % $._config, legendFormat='{{operation_type}}'));
local cgroupManagerDuration =
graphPanel.new(
'Cgroup manager 99th quantile',
datasource='$datasource',
format='s',
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
)
.addTarget(prometheus.target('histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"}[%(grafanaIntervalVar)s])) by (instance, operation_type, le))' % $._config, legendFormat='{{instance}} {{operation_type}}'));
local plegRelistRate =
graphPanel.new(
'PLEG relist rate',
datasource='$datasource',
description='Pod lifecycle event generator',
format='ops',
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
)
.addTarget(prometheus.target('sum(rate(kubelet_pleg_relist_duration_seconds_count{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"}[%(grafanaIntervalVar)s])) by (instance)' % $._config, legendFormat='{{instance}}'));
local plegRelistDuration =
graphPanel.new(
'PLEG relist duration',
datasource='$datasource',
format='s',
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
)
.addTarget(prometheus.target('histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{%(clusterLabel)s="$cluster",%(kubeletSelector)s,instance=~"$instance"}[%(grafanaIntervalVar)s])) by (instance, le))' % $._config, legendFormat='{{instance}}'));
local plegRelistInterval =
graphPanel.new(
'PLEG relist interval',
datasource='$datasource',
format='s',
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
)
.addTarget(prometheus.target('histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{%(clusterLabel)s="$cluster",%(kubeletSelector)s,instance=~"$instance"}[%(grafanaIntervalVar)s])) by (instance, le))' % $._config, legendFormat='{{instance}}'));
local rpcRate =
graphPanel.new(
'RPC Rate',
datasource='$datasource',
format='ops',
)
.addTarget(prometheus.target('sum(rate(rest_client_requests_total{%(clusterLabel)s="$cluster",%(kubeletSelector)s, instance=~"$instance",code=~"2.."}[%(grafanaIntervalVar)s]))' % $._config, legendFormat='2xx'))
.addTarget(prometheus.target('sum(rate(rest_client_requests_total{%(clusterLabel)s="$cluster",%(kubeletSelector)s, instance=~"$instance",code=~"3.."}[%(grafanaIntervalVar)s]))' % $._config, legendFormat='3xx'))
.addTarget(prometheus.target('sum(rate(rest_client_requests_total{%(clusterLabel)s="$cluster",%(kubeletSelector)s, instance=~"$instance",code=~"4.."}[%(grafanaIntervalVar)s]))' % $._config, legendFormat='4xx'))
.addTarget(prometheus.target('sum(rate(rest_client_requests_total{%(clusterLabel)s="$cluster",%(kubeletSelector)s, instance=~"$instance",code=~"5.."}[%(grafanaIntervalVar)s]))' % $._config, legendFormat='5xx'));
local requestDuration =
graphPanel.new(
'Request duration 99th quantile',
datasource='$datasource',
format='s',
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
)
.addTarget(prometheus.target('histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{%(clusterLabel)s="$cluster",%(kubeletSelector)s, instance=~"$instance"}[%(grafanaIntervalVar)s])) by (instance, verb, url, le))' % $._config, legendFormat='{{instance}} {{verb}} {{url}}'));
local memory =
graphPanel.new(
'Memory',
datasource='$datasource',
format='bytes',
)
.addTarget(prometheus.target('process_resident_memory_bytes{%(clusterLabel)s="$cluster",%(kubeletSelector)s,instance=~"$instance"}' % $._config, legendFormat='{{instance}}'));
local cpu =
graphPanel.new(
'CPU usage',
datasource='$datasource',
format='short',
)
.addTarget(prometheus.target('rate(process_cpu_seconds_total{%(clusterLabel)s="$cluster",%(kubeletSelector)s,instance=~"$instance"}[%(grafanaIntervalVar)s])' % $._config, legendFormat='{{instance}}'));
local goroutines =
graphPanel.new(
'Goroutines',
datasource='$datasource',
format='short',
)
.addTarget(prometheus.target('go_goroutines{%(clusterLabel)s="$cluster",%(kubeletSelector)s,instance=~"$instance"}' % $._config, legendFormat='{{instance}}'));
dashboard.new(
'%(dashboardNamePrefix)sKubelet' % $._config.grafanaK8s,
time_from='now-1h',
uid=($._config.grafanaDashboardIDs['kubelet.json']),
tags=($._config.grafanaK8s.dashboardTags),
).addTemplate(
{
current: {
text: 'default',
value: $._config.datasourceName,
},
hide: 0,
label: 'Data Source',
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: $._config.datasourceFilterRegex,
type: 'datasource',
},
)
.addTemplate(
template.new(
'cluster',
'$datasource',
'label_values(up{%(kubeletSelector)s}, %(clusterLabel)s)' % $._config,
label='cluster',
refresh='time',
hide=if $._config.showMultiCluster then '' else 'variable',
sort=1,
)
)
.addTemplate(
template.new(
'instance',
'$datasource',
'label_values(up{%(kubeletSelector)s,%(clusterLabel)s="$cluster"}, instance)' % $._config,
label='instance',
refresh='time',
includeAll=true,
sort=1,
)
)
.addPanel(upCount, gridPos={ h: 7, w: 4, x: 0, y: 0 })
.addPanel(runningPodCount, gridPos={ h: 7, w: 4, x: 4, y: 0 })
.addPanel(runningContainerCount, gridPos={ h: 7, w: 4, x: 8, y: 0 })
.addPanel(actualVolumeCount, gridPos={ h: 7, w: 4, x: 12, y: 0 })
.addPanel(desiredVolumeCount, gridPos={ h: 7, w: 4, x: 16, y: 0 })
.addPanel(configErrorCount, gridPos={ h: 7, w: 4, x: 20, y: 0 })
.addPanel(operationRate, gridPos={ h: 7, w: 12, x: 0, y: 7 })
.addPanel(operationErrorRate, gridPos={ h: 7, w: 12, x: 12, y: 7 })
.addPanel(operationLatency, gridPos={ h: 7, w: 24, x: 0, y: 14 })
.addPanel(podStartRate, gridPos={ h: 7, w: 12, x: 0, y: 21 })
.addPanel(podStartLatency, gridPos={ h: 7, w: 12, x: 12, y: 21 })
.addPanel(storageOperationRate, gridPos={ h: 7, w: 12, x: 0, y: 28 })
.addPanel(storageOperationErrorRate, gridPos={ h: 7, w: 12, x: 12, y: 28 })
.addPanel(storageOperationLatency, gridPos={ h: 7, w: 24, x: 0, y: 35 })
.addPanel(cgroupManagerRate, gridPos={ h: 7, w: 12, x: 0, y: 42 })
.addPanel(cgroupManagerDuration, gridPos={ h: 7, w: 12, x: 12, y: 42 })
.addPanel(plegRelistRate, gridPos={ h: 7, w: 12, x: 0, y: 49 })
.addPanel(plegRelistInterval, gridPos={ h: 7, w: 12, x: 12, y: 49 })
.addPanel(plegRelistDuration, gridPos={ h: 7, w: 24, x: 0, y: 56 })
.addPanel(rpcRate, gridPos={ h: 7, w: 24, x: 0, y: 63 })
.addPanel(requestDuration, gridPos={ h: 7, w: 24, x: 0, y: 70 })
.addPanel(memory, gridPos={ h: 7, w: 8, x: 0, y: 77 })
.addPanel(cpu, gridPos={ h: 7, w: 8, x: 8, y: 77 })
.addPanel(goroutines, gridPos={ h: 7, w: 8, x: 16, y: 77 }),
},
}

View file

@ -0,0 +1,512 @@
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet';
local dashboard = grafana.dashboard;
local row = grafana.row;
local prometheus = grafana.prometheus;
local template = grafana.template;
local graphPanel = grafana.graphPanel;
local tablePanel = grafana.tablePanel;
local annotation = grafana.annotation;
{
grafanaDashboards+:: {
'cluster-total.json':
local newStyle(
alias,
colorMode=null,
colors=[],
dateFormat='YYYY-MM-DD HH:mm:ss',
decimals=2,
link=false,
linkTooltip='Drill down',
linkUrl='',
thresholds=[],
type='number',
unit='short'
) = {
alias: alias,
colorMode: colorMode,
colors: colors,
dateFormat: dateFormat,
decimals: decimals,
link: link,
linkTooltip: linkTooltip,
linkUrl: linkUrl,
thresholds: thresholds,
type: type,
unit: unit,
};
local newBarplotPanel(graphTitle, graphQuery, graphFormat='Bps', legendFormat='{{namespace}}') =
local target =
prometheus.target(
graphQuery
) + {
intervalFactor: 1,
legendFormat: legendFormat,
step: 10,
};
graphPanel.new(
title=graphTitle,
span=24,
datasource='$datasource',
fill=2,
min_span=24,
format=graphFormat,
min=0,
max=null,
show_xaxis=false,
x_axis_mode='series',
x_axis_values='current',
lines=false,
bars=true,
stack=false,
legend_show=true,
legend_values=true,
legend_min=false,
legend_max=false,
legend_current=true,
legend_avg=false,
legend_alignAsTable=true,
legend_rightSide=true,
legend_sort='current',
legend_sortDesc=true,
nullPointMode='null'
).addTarget(target) + {
legend+: {
hideEmpty: true,
hideZero: true,
},
paceLength: 10,
tooltip+: {
sort: 2,
},
};
local newGraphPanel(graphTitle, graphQuery, graphFormat='Bps', legendFormat='{{namespace}}') =
local target =
prometheus.target(
graphQuery
) + {
intervalFactor: 1,
legendFormat: legendFormat,
step: 10,
};
graphPanel.new(
title=graphTitle,
span=24,
datasource='$datasource',
fill=2,
linewidth=2,
min_span=24,
format=graphFormat,
min=0,
max=null,
x_axis_mode='time',
x_axis_values='total',
lines=true,
stack=true,
legend_show=true,
legend_values=true,
legend_min=true,
legend_max=true,
legend_current=true,
legend_avg=true,
legend_alignAsTable=true,
legend_rightSide=true,
nullPointMode='connected'
).addTarget(target) + {
legend+: {
hideEmpty: true,
hideZero: true,
},
paceLength: 10,
tooltip+: {
sort: 2,
},
};
local newTablePanel(tableTitle, colQueries) =
local buildTarget(index, colQuery) =
prometheus.target(
colQuery,
format='table',
instant=true,
) + {
legendFormat: '',
step: 10,
refId: std.char(65 + index),
};
local targets = std.mapWithIndex(buildTarget, colQueries);
tablePanel.new(
title=tableTitle,
span=24,
min_span=24,
datasource='$datasource',
)
.addColumn(
field='Time',
style=newStyle(
alias='Time',
type='hidden'
)
)
.addColumn(
field='Value #A',
style=newStyle(
alias='Current Bandwidth Received',
unit='Bps',
),
)
.addColumn(
field='Value #B',
style=newStyle(
alias='Current Bandwidth Transmitted',
unit='Bps',
),
)
.addColumn(
field='Value #C',
style=newStyle(
alias='Average Bandwidth Received',
unit='Bps',
),
)
.addColumn(
field='Value #D',
style=newStyle(
alias='Average Bandwidth Transmitted',
unit='Bps',
),
)
.addColumn(
field='Value #E',
style=newStyle(
alias='Rate of Received Packets',
unit='pps',
),
)
.addColumn(
field='Value #F',
style=newStyle(
alias='Rate of Transmitted Packets',
unit='pps',
),
)
.addColumn(
field='Value #G',
style=newStyle(
alias='Rate of Received Packets Dropped',
unit='pps',
),
)
.addColumn(
field='Value #H',
style=newStyle(
alias='Rate of Transmitted Packets Dropped',
unit='pps',
),
)
.addColumn(
field='namespace',
style=newStyle(
alias='Namespace',
link=true,
linkUrl='d/8b7a8b326d7a6f1f04244066368c67af/kubernetes-networking-namespace-pods?orgId=1&refresh=30s&var-namespace=$__cell',
),
) + {
fill: 1,
fontSize: '90%',
lines: true,
linewidth: 1,
nullPointMode: 'null as zero',
renderer: 'flot',
scroll: true,
showHeader: true,
spaceLength: 10,
sort: {
col: 0,
desc: false,
},
targets: targets,
};
local resolutionTemplate =
template.new(
name='resolution',
datasource='$datasource',
query='30s,5m,1h',
current='5m',
hide='',
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: false,
text: '30s',
value: '30s',
},
{
selected: true,
text: '5m',
value: '5m',
},
{
selected: false,
text: '1h',
value: '1h',
},
],
};
local intervalTemplate =
template.new(
name='interval',
datasource='$datasource',
query='4h',
current='5m',
hide=2,
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: true,
text: '4h',
value: '4h',
},
],
};
//##### Current Bandwidth Row ######
local currentBandwidthRow =
row.new(
title='Current Bandwidth'
);
//##### Average Bandwidth Row ######
local averageBandwidthRow =
row.new(
title='Average Bandwidth',
collapse=true,
);
//##### Bandwidth History Row ######
local bandwidthHistoryRow =
row.new(
title='Bandwidth History'
);
//##### Packet Row ######
// collapsed, so row must include panels
local packetRow =
row.new(
title='Packets',
collapse=true,
);
//##### Error Row ######
// collapsed, so row must include panels
local errorRow =
row.new(
title='Errors',
collapse=true,
);
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(up{%(cadvisorSelector)s}, %(clusterLabel)s)' % $._config,
hide=if $._config.showMultiCluster then '' else '2',
refresh=2
);
dashboard.new(
title='%(dashboardNamePrefix)sNetworking / Cluster' % $._config.grafanaK8s,
tags=($._config.grafanaK8s.dashboardTags),
editable=true,
schemaVersion=18,
refresh=($._config.grafanaK8s.refresh),
time_from='now-1h',
time_to='now',
)
.addTemplate(resolutionTemplate)
.addTemplate(intervalTemplate)
.addAnnotation(annotation.default)
.addPanel(
currentBandwidthRow, gridPos={ h: 1, w: 24, x: 0, y: 0 }
)
.addTemplate(
{
current: {
text: 'default',
value: $._config.datasourceName,
},
hide: 0,
label: 'Data Source',
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: $._config.datasourceFilterRegex,
type: 'datasource',
},
)
.addTemplate(clusterTemplate)
.addPanel(
newBarplotPanel(
graphTitle='Current Rate of Bytes Received',
graphQuery='sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
),
gridPos={ h: 9, w: 12, x: 0, y: 1 }
)
.addPanel(
newBarplotPanel(
graphTitle='Current Rate of Bytes Transmitted',
graphQuery='sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
),
gridPos={ h: 9, w: 12, x: 12, y: 1 }
)
.addPanel(
newTablePanel(
tableTitle='Current Status',
colQueries=[
'sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
]
),
gridPos={ h: 9, w: 24, x: 0, y: 10 }
)
.addPanel(
averageBandwidthRow
.addPanel(
newBarplotPanel(
graphTitle='Average Rate of Bytes Received',
graphQuery='sort_desc(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
),
gridPos={ h: 9, w: 12, x: 0, y: 11 }
)
.addPanel(
newBarplotPanel(
graphTitle='Average Rate of Bytes Transmitted',
graphQuery='sort_desc(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
),
gridPos={ h: 9, w: 12, x: 12, y: 11 }
),
gridPos={ h: 1, w: 24, x: 0, y: 10 },
)
.addPanel(
bandwidthHistoryRow, gridPos={ h: 1, w: 24, x: 0, y: 11 }
)
.addPanel(
newGraphPanel(
graphTitle='Receive Bandwidth',
graphQuery='sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
),
gridPos={ h: 9, w: 24, x: 0, y: 12 }
)
.addPanel(
newGraphPanel(
graphTitle='Transmit Bandwidth',
graphQuery='sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
),
gridPos={ h: 9, w: 24, x: 0, y: 21 }
)
.addPanel(
packetRow
.addPanel(
newGraphPanel(
graphTitle='Rate of Received Packets',
graphQuery='sort_desc(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 24, x: 0, y: 31 }
)
.addPanel(
newGraphPanel(
graphTitle='Rate of Transmitted Packets',
graphQuery='sort_desc(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 24, x: 0, y: 40 }
),
gridPos={ h: 1, w: 24, x: 0, y: 30 }
)
.addPanel(
errorRow
.addPanel(
newGraphPanel(
graphTitle='Rate of Received Packets Dropped',
graphQuery='sort_desc(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 24, x: 0, y: 50 }
)
.addPanel(
newGraphPanel(
graphTitle='Rate of Transmitted Packets Dropped',
graphQuery='sort_desc(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 24, x: 0, y: 59 }
)
.addPanel(
newGraphPanel(
graphTitle='Rate of TCP Retransmits out of all sent segments',
graphQuery='sort_desc(sum(rate(node_netstat_Tcp_RetransSegs{%(clusterLabel)s="$cluster"}[$interval:$resolution]) / rate(node_netstat_Tcp_OutSegs{%(clusterLabel)s="$cluster"}[$interval:$resolution])) by (instance))' % $._config,
graphFormat='percentunit',
legendFormat='{{instance}}'
) + { links: [
{
url: 'https://accedian.com/enterprises/blog/network-packet-loss-retransmissions-and-duplicate-acknowledgements/',
title: 'What is TCP Retransmit?',
targetBlank: true,
},
] },
gridPos={ h: 9, w: 24, x: 0, y: 59 }
).addPanel(
newGraphPanel(
graphTitle='Rate of TCP SYN Retransmits out of all retransmits',
graphQuery='sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans{%(clusterLabel)s="$cluster"}[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs{%(clusterLabel)s="$cluster"}[$interval:$resolution])) by (instance))' % $._config,
graphFormat='percentunit',
legendFormat='{{instance}}'
) + { links: [
{
url: 'https://github.com/prometheus/node_exporter/issues/1023#issuecomment-408128365',
title: 'Why monitor SYN retransmits?',
targetBlank: true,
},
] },
gridPos={ h: 9, w: 24, x: 0, y: 59 }
),
gridPos={ h: 1, w: 24, x: 0, y: 31 }
),
},
}

View file

@ -0,0 +1,463 @@
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet';
local dashboard = grafana.dashboard;
local row = grafana.row;
local prometheus = grafana.prometheus;
local template = grafana.template;
local graphPanel = grafana.graphPanel;
local tablePanel = grafana.tablePanel;
local annotation = grafana.annotation;
local singlestat = grafana.singlestat;
{
grafanaDashboards+:: {
'namespace-by-pod.json':
local newStyle(
alias,
colorMode=null,
colors=[],
dateFormat='YYYY-MM-DD HH:mm:ss',
decimals=2,
link=false,
linkTooltip='Drill down',
linkUrl='',
thresholds=[],
type='number',
unit='short'
) = {
alias: alias,
colorMode: colorMode,
colors: colors,
dateFormat: dateFormat,
decimals: decimals,
link: link,
linkTooltip: linkTooltip,
linkUrl: linkUrl,
thresholds: thresholds,
type: type,
unit: unit,
};
local newGaugePanel(gaugeTitle, gaugeQuery) =
local target =
prometheus.target(
gaugeQuery,
) + {
instant: null,
intervalFactor: 1,
};
singlestat.new(
title=gaugeTitle,
datasource='$datasource',
format='time_series',
height=9,
span=12,
min_span=12,
decimals=0,
valueName='current'
).addTarget(target) + {
timeFrom: null,
timeShift: null,
type: 'gauge',
options: {
fieldOptions: {
calcs: [
'last',
],
defaults: {
max: 10000000000, // 10GBs
min: 0,
title: '$namespace',
unit: 'Bps',
},
mappings: [],
override: {},
thresholds: [
{
color: 'dark-green',
index: 0,
value: null, // 0GBs
},
{
color: 'dark-yellow',
index: 1,
value: 5000000000, // 5GBs
},
{
color: 'dark-red',
index: 2,
value: 7000000000, // 7GBs
},
],
values: false,
},
},
};
local newGraphPanel(graphTitle, graphQuery, graphFormat='Bps') =
local target =
prometheus.target(
graphQuery
) + {
intervalFactor: 1,
legendFormat: '{{pod}}',
step: 10,
};
graphPanel.new(
title=graphTitle,
span=12,
datasource='$datasource',
fill=2,
linewidth=2,
min_span=12,
format=graphFormat,
min=0,
max=null,
x_axis_mode='time',
x_axis_values='total',
lines=true,
stack=true,
legend_show=true,
nullPointMode='connected'
).addTarget(target) + {
legend+: {
hideEmpty: true,
hideZero: true,
},
paceLength: 10,
tooltip+: {
sort: 2,
},
};
local newTablePanel(tableTitle, colQueries) =
local buildTarget(index, colQuery) =
prometheus.target(
colQuery,
format='table',
instant=true,
) + {
legendFormat: '',
step: 10,
refId: std.char(65 + index),
};
local targets = std.mapWithIndex(buildTarget, colQueries);
tablePanel.new(
title=tableTitle,
span=24,
min_span=24,
datasource='$datasource',
)
.addColumn(
field='Time',
style=newStyle(
alias='Time',
type='hidden',
)
)
.addColumn(
field='Value #A',
style=newStyle(
alias='Bandwidth Received',
unit='Bps',
),
)
.addColumn(
field='Value #B',
style=newStyle(
alias='Bandwidth Transmitted',
unit='Bps',
),
)
.addColumn(
field='Value #C',
style=newStyle(
alias='Rate of Received Packets',
unit='pps',
),
)
.addColumn(
field='Value #D',
style=newStyle(
alias='Rate of Transmitted Packets',
unit='pps',
),
)
.addColumn(
field='Value #E',
style=newStyle(
alias='Rate of Received Packets Dropped',
unit='pps',
),
)
.addColumn(
field='Value #F',
style=newStyle(
alias='Rate of Transmitted Packets Dropped',
unit='pps',
),
)
.addColumn(
field='pod',
style=newStyle(
alias='Pod',
link=true,
linkUrl='d/7a18067ce943a40ae25454675c19ff5c/kubernetes-networking-pod?orgId=1&refresh=30s&var-namespace=$namespace&var-pod=$__cell'
),
) + {
fill: 1,
fontSize: '100%',
lines: true,
linewidth: 1,
nullPointMode: 'null as zero',
renderer: 'flot',
scroll: true,
showHeader: true,
spaceLength: 10,
sort: {
col: 0,
desc: false,
},
targets: targets,
};
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(up{%(cadvisorSelector)s}, %(clusterLabel)s)' % $._config,
hide=if $._config.showMultiCluster then '' else '2',
refresh=2
);
local namespaceTemplate =
template.new(
name='namespace',
datasource='$datasource',
query='label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
allValues='.+',
current='kube-system',
hide='',
refresh=2,
includeAll=true,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
definition: 'label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
skipUrlSync: false,
};
local resolutionTemplate =
template.new(
name='resolution',
datasource='$datasource',
query='30s,5m,1h',
current='5m',
hide='',
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: false,
text: '30s',
value: '30s',
},
{
selected: true,
text: '5m',
value: '5m',
},
{
selected: false,
text: '1h',
value: '1h',
},
],
};
local intervalTemplate =
template.new(
name='interval',
datasource='$datasource',
query='4h',
current='5m',
hide=2,
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: true,
text: '4h',
value: '4h',
},
],
};
//##### Current Bandwidth Row ######
local currentBandwidthRow =
row.new(
title='Current Bandwidth'
);
//##### Bandwidth Row ######
local bandwidthRow =
row.new(
title='Bandwidth'
);
//##### Packet Row ######
// collapsed, so row must include panels
local packetRow =
row.new(
title='Packets',
collapse=true,
);
//##### Error Row ######
// collapsed, so row must include panels
local errorRow =
row.new(
title='Errors',
collapse=true,
);
dashboard.new(
title='%(dashboardNamePrefix)sNetworking / Namespace (Pods)' % $._config.grafanaK8s,
tags=($._config.grafanaK8s.dashboardTags),
editable=true,
schemaVersion=18,
refresh=($._config.grafanaK8s.refresh),
time_from='now-1h',
time_to='now',
)
.addTemplate(
{
current: {
text: 'default',
value: $._config.datasourceName,
},
hide: 0,
label: 'Data Source',
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: $._config.datasourceFilterRegex,
type: 'datasource',
},
)
.addTemplate(clusterTemplate)
.addTemplate(namespaceTemplate)
.addTemplate(resolutionTemplate)
.addTemplate(intervalTemplate)
.addAnnotation(annotation.default)
.addPanel(currentBandwidthRow, gridPos={ h: 1, w: 24, x: 0, y: 0 })
.addPanel(
newGaugePanel(
gaugeTitle='Current Rate of Bytes Received',
gaugeQuery='sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution]))' % $._config,
),
gridPos={ h: 9, w: 12, x: 0, y: 1 }
)
.addPanel(
newGaugePanel(
gaugeTitle='Current Rate of Bytes Transmitted',
gaugeQuery='sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution]))' % $._config,
),
gridPos={ h: 9, w: 12, x: 12, y: 1 }
)
.addPanel(
newTablePanel(
tableTitle='Current Status',
colQueries=[
'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
]
),
gridPos={ h: 9, w: 24, x: 0, y: 10 }
)
.addPanel(bandwidthRow, gridPos={ h: 1, w: 24, x: 0, y: 19 })
.addPanel(
newGraphPanel(
graphTitle='Receive Bandwidth',
graphQuery='sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
),
gridPos={ h: 9, w: 12, x: 0, y: 20 }
)
.addPanel(
newGraphPanel(
graphTitle='Transmit Bandwidth',
graphQuery='sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
),
gridPos={ h: 9, w: 12, x: 12, y: 20 }
)
.addPanel(
packetRow
.addPanel(
newGraphPanel(
graphTitle='Rate of Received Packets',
graphQuery='sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps'
),
gridPos={ h: 10, w: 12, x: 0, y: 30 }
)
.addPanel(
newGraphPanel(
graphTitle='Rate of Transmitted Packets',
graphQuery='sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps'
),
gridPos={ h: 10, w: 12, x: 12, y: 30 }
),
gridPos={ h: 1, w: 24, x: 0, y: 29 }
)
.addPanel(
errorRow
.addPanel(
newGraphPanel(
graphTitle='Rate of Received Packets Dropped',
graphQuery='sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps'
),
gridPos={ h: 10, w: 12, x: 0, y: 40 }
)
.addPanel(
newGraphPanel(
graphTitle='Rate of Transmitted Packets Dropped',
graphQuery='sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps'
),
gridPos={ h: 10, w: 12, x: 12, y: 40 }
),
gridPos={ h: 1, w: 24, x: 0, y: 30 }
),
},
}

View file

@ -0,0 +1,591 @@
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet';
local dashboard = grafana.dashboard;
local row = grafana.row;
local prometheus = grafana.prometheus;
local template = grafana.template;
local graphPanel = grafana.graphPanel;
local tablePanel = grafana.tablePanel;
local annotation = grafana.annotation;
{
grafanaDashboards+:: {
'namespace-by-workload.json':
local newStyle(
alias,
colorMode=null,
colors=[],
dateFormat='YYYY-MM-DD HH:mm:ss',
decimals=2,
link=false,
linkTooltip='Drill down',
linkUrl='',
thresholds=[],
type='number',
unit='short'
) = {
alias: alias,
colorMode: colorMode,
colors: colors,
dateFormat: dateFormat,
decimals: decimals,
link: link,
linkTooltip: linkTooltip,
linkUrl: linkUrl,
thresholds: thresholds,
type: type,
unit: unit,
};
local newBarplotPanel(graphTitle, graphQuery, graphFormat='Bps', legendFormat='{{namespace}}') =
local target =
prometheus.target(
graphQuery
) + {
intervalFactor: 1,
legendFormat: legendFormat,
step: 10,
};
graphPanel.new(
title=graphTitle,
span=24,
datasource='$datasource',
fill=2,
min_span=24,
format=graphFormat,
min=0,
max=null,
show_xaxis=false,
x_axis_mode='series',
x_axis_values='current',
lines=false,
bars=true,
stack=false,
legend_show=true,
legend_values=true,
legend_min=false,
legend_max=false,
legend_current=true,
legend_avg=false,
legend_alignAsTable=true,
legend_rightSide=true,
legend_sort='current',
legend_sortDesc=true,
nullPointMode='null'
).addTarget(target) + {
legend+: {
hideEmpty: true,
hideZero: true,
},
paceLength: 10,
tooltip+: {
sort: 2,
},
};
local newGraphPanel(graphTitle, graphQuery, graphFormat='Bps') =
local target =
prometheus.target(
graphQuery
) + {
intervalFactor: 1,
legendFormat: '{{workload}}',
step: 10,
};
graphPanel.new(
title=graphTitle,
span=12,
datasource='$datasource',
fill=2,
linewidth=2,
min_span=12,
format=graphFormat,
min=0,
max=null,
x_axis_mode='time',
x_axis_values='total',
lines=true,
stack=true,
legend_show=true,
nullPointMode='connected'
).addTarget(target) + {
legend+: {
hideEmpty: true,
hideZero: true,
},
paceLength: 10,
tooltip+: {
sort: 2,
},
};
local newTablePanel(tableTitle, colQueries) =
local buildTarget(index, colQuery) =
prometheus.target(
colQuery,
format='table',
instant=true,
) + {
legendFormat: '',
step: 10,
refId: std.char(65 + index),
};
local targets = std.mapWithIndex(buildTarget, colQueries);
tablePanel.new(
title=tableTitle,
span=24,
min_span=24,
datasource='$datasource',
)
.addColumn(
field='Time',
style=newStyle(
alias='Time',
type='hidden'
)
)
.addColumn(
field='Value #A',
style=newStyle(
alias='Current Bandwidth Received',
unit='Bps',
),
)
.addColumn(
field='Value #B',
style=newStyle(
alias='Current Bandwidth Transmitted',
unit='Bps',
),
)
.addColumn(
field='Value #C',
style=newStyle(
alias='Average Bandwidth Received',
unit='Bps',
),
)
.addColumn(
field='Value #D',
style=newStyle(
alias='Average Bandwidth Transmitted',
unit='Bps',
),
)
.addColumn(
field='Value #E',
style=newStyle(
alias='Rate of Received Packets',
unit='pps',
),
)
.addColumn(
field='Value #F',
style=newStyle(
alias='Rate of Transmitted Packets',
unit='pps',
),
)
.addColumn(
field='Value #G',
style=newStyle(
alias='Rate of Received Packets Dropped',
unit='pps',
),
)
.addColumn(
field='Value #H',
style=newStyle(
alias='Rate of Transmitted Packets Dropped',
unit='pps',
),
)
.addColumn(
field='workload',
style=newStyle(
alias='Workload',
link=true,
linkUrl='d/728bf77cc1166d2f3133bf25846876cc/kubernetes-networking-workload?orgId=1&refresh=30s&var-namespace=$namespace&var-type=$type&var-workload=$__cell'
),
) + {
fill: 1,
fontSize: '90%',
lines: true,
linewidth: 1,
nullPointMode: 'null as zero',
renderer: 'flot',
scroll: true,
showHeader: true,
spaceLength: 10,
sort: {
col: 0,
desc: false,
},
targets: targets,
};
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(up{%(cadvisorSelector)s}, %(clusterLabel)s)' % $._config,
hide=if $._config.showMultiCluster then '' else '2',
refresh=2
);
local namespaceTemplate =
template.new(
name='namespace',
datasource='$datasource',
query='label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
current='kube-system',
hide='',
refresh=2,
includeAll=false,
multi=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
definition: 'label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
skipUrlSync: false,
};
local typeTemplate =
template.new(
name='type',
datasource='$datasource',
query='label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace="$namespace", workload=~".+"}, workload_type)' % $._config,
current='deployment',
hide='',
refresh=2,
includeAll=false,
sort=0
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace="$namespace", workload=~".+"}, workload_type)' % $._config,
skipUrlSync: false,
};
local resolutionTemplate =
template.new(
name='resolution',
datasource='$datasource',
query='30s,5m,1h',
current='5m',
hide='',
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: false,
text: '30s',
value: '30s',
},
{
selected: true,
text: '5m',
value: '5m',
},
{
selected: false,
text: '1h',
value: '1h',
},
],
};
local intervalTemplate =
template.new(
name='interval',
datasource='$datasource',
query='4h',
current='5m',
hide=2,
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: true,
text: '4h',
value: '4h',
},
],
};
//##### Current Bandwidth Row ######
local currentBandwidthRow =
row.new(
title='Current Bandwidth'
);
//##### Average Bandwidth Row ######
local averageBandwidthRow =
row.new(
title='Average Bandwidth',
collapse=true,
);
//##### Bandwidth History Row ######
local bandwidthHistoryRow =
row.new(
title='Bandwidth HIstory',
);
//##### Packet Row ######
// collapsed, so row must include panels
local packetRow =
row.new(
title='Packets',
collapse=true,
);
//##### Error Row ######
// collapsed, so row must include panels
local errorRow =
row.new(
title='Errors',
collapse=true,
);
dashboard.new(
title='%(dashboardNamePrefix)sNetworking / Namespace (Workload)' % $._config.grafanaK8s,
tags=($._config.grafanaK8s.dashboardTags),
editable=true,
schemaVersion=18,
refresh=($._config.grafanaK8s.refresh),
time_from='now-1h',
time_to='now',
)
.addTemplate(
{
current: {
text: 'default',
value: $._config.datasourceName,
},
hide: 0,
label: 'Data Source',
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: $._config.datasourceFilterRegex,
type: 'datasource',
},
)
.addTemplate(clusterTemplate)
.addTemplate(namespaceTemplate)
.addTemplate(typeTemplate)
.addTemplate(resolutionTemplate)
.addTemplate(intervalTemplate)
.addAnnotation(annotation.default)
.addPanel(currentBandwidthRow, gridPos={ h: 1, w: 24, x: 0, y: 0 })
.addPanel(
newBarplotPanel(
graphTitle='Current Rate of Bytes Received',
graphQuery=|||
sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace="$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace="$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
legendFormat='{{ workload }}',
),
gridPos={ h: 9, w: 12, x: 0, y: 1 }
)
.addPanel(
newBarplotPanel(
graphTitle='Current Rate of Bytes Transmitted',
graphQuery=|||
sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace="$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace="$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
legendFormat='{{ workload }}',
),
gridPos={ h: 9, w: 12, x: 12, y: 1 }
)
.addPanel(
newTablePanel(
tableTitle='Current Status',
colQueries=[
|||
sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace="$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace="$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
|||
sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace="$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace="$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
|||
sort_desc(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace="$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace="$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
|||
sort_desc(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace="$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace="$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
|||
sort_desc(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace="$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace="$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
|||
sort_desc(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace="$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace="$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
|||
sort_desc(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace="$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace="$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
|||
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace="$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace="$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
]
),
gridPos={ h: 9, w: 24, x: 0, y: 10 }
)
.addPanel(
averageBandwidthRow
.addPanel(
newBarplotPanel(
graphTitle='Average Rate of Bytes Received',
graphQuery=|||
sort_desc(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace="$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace="$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
legendFormat='{{ workload }}',
),
gridPos={ h: 9, w: 12, x: 0, y: 20 }
)
.addPanel(
newBarplotPanel(
graphTitle='Average Rate of Bytes Transmitted',
graphQuery=|||
sort_desc(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace="$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace="$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
legendFormat='{{ workload }}',
),
gridPos={ h: 9, w: 12, x: 12, y: 20 }
),
gridPos={ h: 1, w: 24, x: 0, y: 19 },
)
.addPanel(
bandwidthHistoryRow, gridPos={ h: 1, w: 24, x: 0, y: 29 }
)
.addPanel(
newGraphPanel(
graphTitle='Receive Bandwidth',
graphQuery=|||
sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace="$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace="$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
),
gridPos={ h: 9, w: 12, x: 0, y: 38 }
)
.addPanel(
newGraphPanel(
graphTitle='Transmit Bandwidth',
graphQuery=|||
sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace="$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace="$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
),
gridPos={ h: 9, w: 12, x: 12, y: 38 }
)
.addPanel(
packetRow
.addPanel(
newGraphPanel(
graphTitle='Rate of Received Packets',
graphQuery=|||
sort_desc(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace="$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace="$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 12, x: 0, y: 40 }
)
.addPanel(
newGraphPanel(
graphTitle='Rate of Transmitted Packets',
graphQuery=|||
sort_desc(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace="$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace="$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 12, x: 12, y: 40 }
),
gridPos={ h: 1, w: 24, x: 0, y: 39 }
)
.addPanel(
errorRow
.addPanel(
newGraphPanel(
graphTitle='Rate of Received Packets Dropped',
graphQuery=|||
sort_desc(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace="$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace="$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 12, x: 0, y: 41 }
)
.addPanel(
newGraphPanel(
graphTitle='Rate of Transmitted Packets Dropped',
graphQuery=|||
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace="$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace="$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 12, x: 12, y: 41 }
),
gridPos={ h: 1, w: 24, x: 0, y: 40 }
),
},
}

View file

@ -0,0 +1,349 @@
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet';
local dashboard = grafana.dashboard;
local row = grafana.row;
local prometheus = grafana.prometheus;
local template = grafana.template;
local graphPanel = grafana.graphPanel;
local annotation = grafana.annotation;
local singlestat = grafana.singlestat;
{
grafanaDashboards+:: {
'pod-total.json':
local newGaugePanel(gaugeTitle, gaugeQuery) =
local target =
prometheus.target(
gaugeQuery
) + {
instant: null,
intervalFactor: 1,
};
singlestat.new(
title=gaugeTitle,
datasource='$datasource',
format='time_series',
height=9,
span=12,
min_span=12,
decimals=0,
valueName='current'
).addTarget(target) + {
timeFrom: null,
timeShift: null,
type: 'gauge',
options: {
fieldOptions: {
calcs: [
'last',
],
defaults: {
max: 10000000000, // 10GBs
min: 0,
title: '$namespace: $pod',
unit: 'Bps',
},
mappings: [],
override: {},
thresholds: [
{
color: 'dark-green',
index: 0,
value: null, // 0GBs
},
{
color: 'dark-yellow',
index: 1,
value: 5000000000, // 5GBs
},
{
color: 'dark-red',
index: 2,
value: 7000000000, // 7GBs
},
],
values: false,
},
},
};
local newGraphPanel(graphTitle, graphQuery, graphFormat='Bps') =
local target =
prometheus.target(
graphQuery
) + {
intervalFactor: 1,
legendFormat: '{{pod}}',
step: 10,
};
graphPanel.new(
title=graphTitle,
span=12,
datasource='$datasource',
fill=2,
linewidth=2,
min_span=12,
format=graphFormat,
min=0,
max=null,
x_axis_mode='time',
x_axis_values='total',
lines=true,
stack=true,
legend_show=true,
nullPointMode='connected'
).addTarget(target) + {
legend+: {
hideEmpty: true,
hideZero: true,
},
paceLength: 10,
tooltip+: {
sort: 2,
},
};
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(up{%(cadvisorSelector)s}, %(clusterLabel)s)' % $._config,
hide=if $._config.showMultiCluster then '' else '2',
refresh=2
);
local namespaceTemplate =
template.new(
name='namespace',
datasource='$datasource',
query='label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
allValues='.+',
current='kube-system',
hide='',
refresh=2,
includeAll=true,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
definition: 'label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
skipUrlSync: false,
};
local podTemplate =
template.new(
name='pod',
datasource='$datasource',
query='label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}, pod)' % $._config,
allValues='.+',
current='',
hide='',
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
definition: 'label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}, pod)' % $._config,
skipUrlSync: false,
};
local resolutionTemplate =
template.new(
name='resolution',
datasource='$datasource',
query='30s,5m,1h',
current='5m',
hide='',
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: false,
text: '30s',
value: '30s',
},
{
selected: true,
text: '5m',
value: '5m',
},
{
selected: false,
text: '1h',
value: '1h',
},
],
};
local intervalTemplate =
template.new(
name='interval',
datasource='$datasource',
query='4h',
current='5m',
hide=2,
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: true,
text: '4h',
value: '4h',
},
],
};
//##### Current Bandwidth Row ######
local currentBandwidthRow =
row.new(
title='Current Bandwidth'
);
//##### Bandwidth Row ######
local bandwidthRow =
row.new(
title='Bandwidth'
);
//##### Packet Row ######
// collapsed, so row must include panels
local packetRow =
row.new(
title='Packets',
collapse=true,
);
//##### Error Row ######
// collapsed, so row must include panels
local errorRow =
row.new(
title='Errors',
collapse=true,
);
dashboard.new(
title='%(dashboardNamePrefix)sNetworking / Pod' % $._config.grafanaK8s,
tags=($._config.grafanaK8s.dashboardTags),
editable=true,
schemaVersion=18,
refresh=($._config.grafanaK8s.refresh),
time_from='now-1h',
time_to='now',
)
.addTemplate(
{
current: {
text: 'default',
value: $._config.datasourceName,
},
hide: 0,
label: 'Data Source',
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: $._config.datasourceFilterRegex,
type: 'datasource',
},
)
.addTemplate(clusterTemplate)
.addTemplate(namespaceTemplate)
.addTemplate(podTemplate)
.addTemplate(resolutionTemplate)
.addTemplate(intervalTemplate)
.addAnnotation(annotation.default)
.addPanel(currentBandwidthRow, gridPos={ h: 1, w: 24, x: 0, y: 0 })
.addPanel(
newGaugePanel(
gaugeTitle='Current Rate of Bytes Received',
gaugeQuery='sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution]))' % $._config,
),
gridPos={ h: 9, w: 12, x: 0, y: 1 }
)
.addPanel(
newGaugePanel(
gaugeTitle='Current Rate of Bytes Transmitted',
gaugeQuery='sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution]))' % $._config,
),
gridPos={ h: 9, w: 12, x: 12, y: 1 }
)
.addPanel(bandwidthRow, gridPos={ h: 1, w: 24, x: 0, y: 10 })
.addPanel(
newGraphPanel(
graphTitle='Receive Bandwidth',
graphQuery='sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
),
gridPos={ h: 9, w: 12, x: 0, y: 11 }
)
.addPanel(
newGraphPanel(
graphTitle='Transmit Bandwidth',
graphQuery='sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
),
gridPos={ h: 9, w: 12, x: 12, y: 11 }
)
.addPanel(
packetRow
.addPanel(
newGraphPanel(
graphTitle='Rate of Received Packets',
graphQuery='sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps'
),
gridPos={ h: 10, w: 12, x: 0, y: 21 }
)
.addPanel(
newGraphPanel(
graphTitle='Rate of Transmitted Packets',
graphQuery='sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps'
),
gridPos={ h: 10, w: 12, x: 12, y: 21 }
),
gridPos={ h: 1, w: 24, x: 0, y: 20 }
)
.addPanel(
errorRow
.addPanel(
newGraphPanel(
graphTitle='Rate of Received Packets Dropped',
graphQuery='sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps'
),
gridPos={ h: 10, w: 12, x: 0, y: 32 }
)
.addPanel(
newGraphPanel(
graphTitle='Rate of Transmitted Packets Dropped',
graphQuery='sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps'
),
gridPos={ h: 10, w: 12, x: 12, y: 32 }
),
gridPos={ h: 1, w: 24, x: 0, y: 21 }
),
},
}

View file

@ -0,0 +1,427 @@
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet';
local dashboard = grafana.dashboard;
local row = grafana.row;
local prometheus = grafana.prometheus;
local template = grafana.template;
local graphPanel = grafana.graphPanel;
local annotation = grafana.annotation;
{
grafanaDashboards+:: {
'workload-total.json':
local newBarplotPanel(graphTitle, graphQuery, graphFormat='Bps', legendFormat='{{namespace}}') =
local target =
prometheus.target(
graphQuery
) + {
intervalFactor: 1,
legendFormat: legendFormat,
step: 10,
};
graphPanel.new(
title=graphTitle,
span=24,
datasource='$datasource',
fill=2,
min_span=24,
format=graphFormat,
min=0,
max=null,
show_xaxis=false,
x_axis_mode='series',
x_axis_values='current',
lines=false,
bars=true,
stack=false,
legend_show=true,
legend_values=true,
legend_min=false,
legend_max=false,
legend_current=true,
legend_avg=false,
legend_alignAsTable=true,
legend_rightSide=true,
legend_sort='current',
legend_sortDesc=true,
nullPointMode='null'
).addTarget(target) + {
legend+: {
hideEmpty: true,
hideZero: true,
},
paceLength: 10,
tooltip+: {
sort: 2,
},
};
local newGraphPanel(graphTitle, graphQuery, graphFormat='Bps') =
local target =
prometheus.target(
graphQuery
) + {
intervalFactor: 1,
legendFormat: '{{pod}}',
step: 10,
};
graphPanel.new(
title=graphTitle,
span=12,
datasource='$datasource',
fill=2,
linewidth=2,
min_span=12,
format=graphFormat,
min=0,
max=null,
x_axis_mode='time',
x_axis_values='total',
lines=true,
stack=true,
legend_show=true,
nullPointMode='connected'
).addTarget(target) + {
legend+: {
hideEmpty: true,
hideZero: true,
},
paceLength: 10,
tooltip+: {
sort: 2,
},
};
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(kube_pod_info{%(kubeStateMetricsSelector)s}, %(clusterLabel)s)' % $._config,
hide=if $._config.showMultiCluster then '' else '2',
refresh=2
);
local namespaceTemplate =
template.new(
name='namespace',
datasource='$datasource',
query='label_values(container_network_receive_packets_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster"}, namespace)' % $._config,
allValues='.+',
current='kube-system',
hide='',
refresh=2,
includeAll=true,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
definition: 'label_values(container_network_receive_packets_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster"}, namespace)' % $._config,
skipUrlSync: false,
};
local workloadTemplate =
template.new(
name='workload',
datasource='$datasource',
query='label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace"}, workload)' % $._config,
current='',
hide='',
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace"}, workload)' % $._config,
skipUrlSync: false,
};
local typeTemplate =
template.new(
name='type',
datasource='$datasource',
query='label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload"}, workload_type)' % $._config,
current='deployment',
hide='',
refresh=2,
includeAll=false,
sort=0
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload"}, workload_type)' % $._config,
skipUrlSync: false,
};
local resolutionTemplate =
template.new(
name='resolution',
datasource='$datasource',
query='30s,5m,1h',
current='5m',
hide='',
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: false,
text: '30s',
value: '30s',
},
{
selected: true,
text: '5m',
value: '5m',
},
{
selected: false,
text: '1h',
value: '1h',
},
],
};
local intervalTemplate =
template.new(
name='interval',
datasource='$datasource',
query='4h',
current='5m',
hide=2,
refresh=2,
includeAll=false,
sort=1
) + {
auto: false,
auto_count: 30,
auto_min: '10s',
skipUrlSync: false,
type: 'interval',
options: [
{
selected: true,
text: '4h',
value: '4h',
},
],
};
//##### Current Bandwidth Row ######
local currentBandwidthRow =
row.new(
title='Current Bandwidth'
);
//##### Average Bandwidth Row ######
local averageBandwidthRow =
row.new(
title='Average Bandwidth',
collapse=true,
);
//##### Bandwidth History Row ######
local bandwidthHistoryRow =
row.new(
title='Bandwidth HIstory',
);
//##### Packet Row ######
// collapsed, so row must include panels
local packetRow =
row.new(
title='Packets',
collapse=true,
);
//##### Error Row ######
// collapsed, so row must include panels
local errorRow =
row.new(
title='Errors',
collapse=true,
);
dashboard.new(
title='%(dashboardNamePrefix)sNetworking / Workload' % $._config.grafanaK8s,
tags=($._config.grafanaK8s.dashboardTags),
editable=true,
schemaVersion=18,
refresh=($._config.grafanaK8s.refresh),
time_from='now-1h',
time_to='now',
)
.addTemplate(
{
current: {
text: 'default',
value: $._config.datasourceName,
},
hide: 0,
label: 'Data Source',
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: $._config.datasourceFilterRegex,
type: 'datasource',
},
)
.addTemplate(clusterTemplate)
.addTemplate(namespaceTemplate)
.addTemplate(workloadTemplate)
.addTemplate(typeTemplate)
.addTemplate(resolutionTemplate)
.addTemplate(intervalTemplate)
.addAnnotation(annotation.default)
.addPanel(currentBandwidthRow, gridPos={ h: 1, w: 24, x: 0, y: 0 })
.addPanel(
newBarplotPanel(
graphTitle='Current Rate of Bytes Received',
graphQuery=|||
sort_desc(sum(irate(container_network_receive_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
legendFormat='{{ pod }}',
),
gridPos={ h: 9, w: 12, x: 0, y: 1 }
)
.addPanel(
newBarplotPanel(
graphTitle='Current Rate of Bytes Transmitted',
graphQuery=|||
sort_desc(sum(irate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
legendFormat='{{ pod }}',
),
gridPos={ h: 9, w: 12, x: 12, y: 1 }
)
.addPanel(
averageBandwidthRow
.addPanel(
newBarplotPanel(
graphTitle='Average Rate of Bytes Received',
graphQuery=|||
sort_desc(avg(irate(container_network_receive_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
legendFormat='{{ pod }}',
),
gridPos={ h: 9, w: 12, x: 0, y: 11 }
)
.addPanel(
newBarplotPanel(
graphTitle='Average Rate of Bytes Transmitted',
graphQuery=|||
sort_desc(avg(irate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
legendFormat='{{ pod }}',
),
gridPos={ h: 9, w: 12, x: 12, y: 11 }
),
gridPos={ h: 1, w: 24, x: 0, y: 10 },
)
.addPanel(
bandwidthHistoryRow, gridPos={ h: 1, w: 24, x: 0, y: 11 }
)
.addPanel(
newGraphPanel(
graphTitle='Receive Bandwidth',
graphQuery=|||
sort_desc(sum(irate(container_network_receive_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
),
gridPos={ h: 9, w: 12, x: 0, y: 12 }
)
.addPanel(
newGraphPanel(
graphTitle='Transmit Bandwidth',
graphQuery=|||
sort_desc(sum(irate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
),
gridPos={ h: 9, w: 12, x: 12, y: 12 }
)
.addPanel(
packetRow
.addPanel(
newGraphPanel(
graphTitle='Rate of Received Packets',
graphQuery=|||
sort_desc(sum(irate(container_network_receive_packets_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 12, x: 0, y: 22 }
)
.addPanel(
newGraphPanel(
graphTitle='Rate of Transmitted Packets',
graphQuery=|||
sort_desc(sum(irate(container_network_transmit_packets_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 12, x: 12, y: 22 }
),
gridPos={ h: 1, w: 24, x: 0, y: 21 }
)
.addPanel(
errorRow
.addPanel(
newGraphPanel(
graphTitle='Rate of Received Packets Dropped',
graphQuery=|||
sort_desc(sum(irate(container_network_receive_packets_dropped_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 12, x: 0, y: 23 }
)
.addPanel(
newGraphPanel(
graphTitle='Rate of Transmitted Packets Dropped',
graphQuery=|||
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 12, x: 12, y: 23 }
),
gridPos={ h: 1, w: 24, x: 0, y: 22 }
),
},
}

View file

@ -0,0 +1,5 @@
(import 'network-usage/cluster-total.libsonnet') +
(import 'network-usage/namespace-by-workload.libsonnet') +
(import 'network-usage/namespace-by-pod.libsonnet') +
(import 'network-usage/pod-total.libsonnet') +
(import 'network-usage/workload-total.libsonnet')

View file

@ -0,0 +1,172 @@
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet';
local dashboard = grafana.dashboard;
local row = grafana.row;
local prometheus = grafana.prometheus;
local template = grafana.template;
local graphPanel = grafana.graphPanel;
local promgrafonnet = import '../lib/promgrafonnet/promgrafonnet.libsonnet';
local gauge = promgrafonnet.gauge;
{
grafanaDashboards+:: {
'persistentvolumesusage.json':
local sizeGraph = graphPanel.new(
'Volume Space Usage',
datasource='$datasource',
format='bytes',
min=0,
span=9,
stack=true,
legend_show=true,
legend_values=true,
legend_min=true,
legend_max=true,
legend_current=true,
legend_total=false,
legend_avg=true,
legend_alignAsTable=true,
legend_rightSide=false,
).addTarget(prometheus.target(
|||
(
sum without(instance, node) (topk(1, (kubelet_volume_stats_capacity_bytes{%(clusterLabel)s="$cluster", %(kubeletSelector)s, namespace="$namespace", persistentvolumeclaim="$volume"})))
-
sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{%(clusterLabel)s="$cluster", %(kubeletSelector)s, namespace="$namespace", persistentvolumeclaim="$volume"})))
)
||| % $._config,
legendFormat='Used Space',
intervalFactor=1,
)).addTarget(prometheus.target(
|||
sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{%(clusterLabel)s="$cluster", %(kubeletSelector)s, namespace="$namespace", persistentvolumeclaim="$volume"})))
||| % $._config,
legendFormat='Free Space',
intervalFactor=1,
));
local sizeGauge = gauge.new(
'Volume Space Usage',
|||
max without(instance,node) (
(
topk(1, kubelet_volume_stats_capacity_bytes{%(clusterLabel)s="$cluster", %(kubeletSelector)s, namespace="$namespace", persistentvolumeclaim="$volume"})
-
topk(1, kubelet_volume_stats_available_bytes{%(clusterLabel)s="$cluster", %(kubeletSelector)s, namespace="$namespace", persistentvolumeclaim="$volume"})
)
/
topk(1, kubelet_volume_stats_capacity_bytes{%(clusterLabel)s="$cluster", %(kubeletSelector)s, namespace="$namespace", persistentvolumeclaim="$volume"})
* 100)
||| % $._config,
).withLowerBeingBetter();
local inodesGraph = graphPanel.new(
'Volume inodes Usage',
datasource='$datasource',
format='none',
min=0,
span=9,
stack=true,
legend_show=true,
legend_values=true,
legend_min=true,
legend_max=true,
legend_current=true,
legend_total=false,
legend_avg=true,
legend_alignAsTable=true,
legend_rightSide=false,
).addTarget(prometheus.target(
|||
sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{%(clusterLabel)s="$cluster", %(kubeletSelector)s, namespace="$namespace", persistentvolumeclaim="$volume"})))
||| % $._config,
legendFormat='Used inodes',
intervalFactor=1,
)).addTarget(prometheus.target(
|||
(
sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes{%(clusterLabel)s="$cluster", %(kubeletSelector)s, namespace="$namespace", persistentvolumeclaim="$volume"})))
-
sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{%(clusterLabel)s="$cluster", %(kubeletSelector)s, namespace="$namespace", persistentvolumeclaim="$volume"})))
)
||| % $._config,
legendFormat=' Free inodes',
intervalFactor=1,
));
local inodeGauge = gauge.new(
'Volume inodes Usage',
|||
max without(instance,node) (
topk(1, kubelet_volume_stats_inodes_used{%(clusterLabel)s="$cluster", %(kubeletSelector)s, namespace="$namespace", persistentvolumeclaim="$volume"})
/
topk(1, kubelet_volume_stats_inodes{%(clusterLabel)s="$cluster", %(kubeletSelector)s, namespace="$namespace", persistentvolumeclaim="$volume"})
* 100)
||| % $._config,
).withLowerBeingBetter();
dashboard.new(
'%(dashboardNamePrefix)sPersistent Volumes' % $._config.grafanaK8s,
time_from='now-7d',
uid=($._config.grafanaDashboardIDs['persistentvolumesusage.json']),
tags=($._config.grafanaK8s.dashboardTags),
).addTemplate(
{
current: {
text: 'default',
value: $._config.datasourceName,
},
hide: 0,
label: 'Data Source',
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: $._config.datasourceFilterRegex,
type: 'datasource',
},
)
.addTemplate(
template.new(
'cluster',
'$datasource',
'label_values(kubelet_volume_stats_capacity_bytes{%(kubeletSelector)s}, %(clusterLabel)s)' % $._config,
label='cluster',
refresh='time',
hide=if $._config.showMultiCluster then '' else 'variable',
sort=1,
)
)
.addTemplate(
template.new(
'namespace',
'$datasource',
'label_values(kubelet_volume_stats_capacity_bytes{%(clusterLabel)s="$cluster", %(kubeletSelector)s}, namespace)' % $._config,
label='Namespace',
refresh='time',
sort=1,
)
)
.addTemplate(
template.new(
'volume',
'$datasource',
'label_values(kubelet_volume_stats_capacity_bytes{%(clusterLabel)s="$cluster", %(kubeletSelector)s, namespace="$namespace"}, persistentvolumeclaim)' % $._config,
label='PersistentVolumeClaim',
refresh='time',
sort=1,
)
)
.addRow(
row.new()
.addPanel(sizeGraph)
.addPanel(sizeGauge)
)
.addRow(
row.new()
.addPanel(inodesGraph)
.addPanel(inodeGauge)
),
},
}

View file

@ -0,0 +1,202 @@
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet';
local dashboard = grafana.dashboard;
local row = grafana.row;
local prometheus = grafana.prometheus;
local template = grafana.template;
local graphPanel = grafana.graphPanel;
local singlestat = grafana.singlestat;
{
grafanaDashboards+:: {
'proxy.json':
local upCount =
singlestat.new(
'Up',
datasource='$datasource',
span=2,
valueName='min',
)
.addTarget(prometheus.target('sum(up{%(clusterLabel)s="$cluster", %(kubeProxySelector)s})' % $._config));
local rulesSyncRate =
graphPanel.new(
'Rules Sync Rate',
datasource='$datasource',
span=5,
min=0,
format='ops',
)
.addTarget(prometheus.target('sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{%(clusterLabel)s="$cluster", %(kubeProxySelector)s, instance=~"$instance"}[%(grafanaIntervalVar)s]))' % $._config, legendFormat='rate'));
local rulesSyncLatency =
graphPanel.new(
'Rule Sync Latency 99th Quantile',
datasource='$datasource',
span=5,
min=0,
format='s',
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
)
.addTarget(prometheus.target('histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{%(clusterLabel)s="$cluster", %(kubeProxySelector)s, instance=~"$instance"}[%(grafanaIntervalVar)s]))' % $._config, legendFormat='{{instance}}'));
local networkProgrammingRate =
graphPanel.new(
'Network Programming Rate',
datasource='$datasource',
span=6,
min=0,
format='ops',
)
.addTarget(prometheus.target('sum(rate(kubeproxy_network_programming_duration_seconds_count{%(clusterLabel)s="$cluster", %(kubeProxySelector)s, instance=~"$instance"}[%(grafanaIntervalVar)s]))' % $._config, legendFormat='rate'));
local networkProgrammingLatency =
graphPanel.new(
'Network Programming Latency 99th Quantile',
datasource='$datasource',
span=6,
min=0,
format='s',
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
)
.addTarget(prometheus.target('histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{%(clusterLabel)s="$cluster", %(kubeProxySelector)s, instance=~"$instance"}[%(grafanaIntervalVar)s])) by (instance, le))' % $._config, legendFormat='{{instance}}'));
local rpcRate =
graphPanel.new(
'Kube API Request Rate',
datasource='$datasource',
span=4,
format='ops',
)
.addTarget(prometheus.target('sum(rate(rest_client_requests_total{%(clusterLabel)s="$cluster", %(kubeProxySelector)s, instance=~"$instance",code=~"2.."}[%(grafanaIntervalVar)s]))' % $._config, legendFormat='2xx'))
.addTarget(prometheus.target('sum(rate(rest_client_requests_total{%(clusterLabel)s="$cluster", %(kubeProxySelector)s, instance=~"$instance",code=~"3.."}[%(grafanaIntervalVar)s]))' % $._config, legendFormat='3xx'))
.addTarget(prometheus.target('sum(rate(rest_client_requests_total{%(clusterLabel)s="$cluster", %(kubeProxySelector)s, instance=~"$instance",code=~"4.."}[%(grafanaIntervalVar)s]))' % $._config, legendFormat='4xx'))
.addTarget(prometheus.target('sum(rate(rest_client_requests_total{%(clusterLabel)s="$cluster", %(kubeProxySelector)s, instance=~"$instance",code=~"5.."}[%(grafanaIntervalVar)s]))' % $._config, legendFormat='5xx'));
local postRequestLatency =
graphPanel.new(
'Post Request Latency 99th Quantile',
datasource='$datasource',
span=8,
format='s',
min=0,
)
.addTarget(prometheus.target('histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{%(clusterLabel)s="$cluster", %(kubeProxySelector)s,instance=~"$instance",verb="POST"}[%(grafanaIntervalVar)s])) by (verb, url, le))' % $._config, legendFormat='{{verb}} {{url}}'));
local getRequestLatency =
graphPanel.new(
'Get Request Latency 99th Quantile',
datasource='$datasource',
span=12,
format='s',
min=0,
legend_show=true,
legend_values=true,
legend_current=true,
legend_alignAsTable=true,
legend_rightSide=true,
)
.addTarget(prometheus.target('histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{%(clusterLabel)s="$cluster", %(kubeProxySelector)s, instance=~"$instance", verb="GET"}[%(grafanaIntervalVar)s])) by (verb, url, le))' % $._config, legendFormat='{{verb}} {{url}}'));
local memory =
graphPanel.new(
'Memory',
datasource='$datasource',
span=4,
format='bytes',
)
.addTarget(prometheus.target('process_resident_memory_bytes{%(clusterLabel)s="$cluster", %(kubeProxySelector)s,instance=~"$instance"}' % $._config, legendFormat='{{instance}}'));
local cpu =
graphPanel.new(
'CPU usage',
datasource='$datasource',
span=4,
format='short',
min=0,
)
.addTarget(prometheus.target('rate(process_cpu_seconds_total{%(clusterLabel)s="$cluster", %(kubeProxySelector)s,instance=~"$instance"}[%(grafanaIntervalVar)s])' % $._config, legendFormat='{{instance}}'));
local goroutines =
graphPanel.new(
'Goroutines',
datasource='$datasource',
span=4,
format='short',
)
.addTarget(prometheus.target('go_goroutines{%(clusterLabel)s="$cluster", %(kubeProxySelector)s,instance=~"$instance"}' % $._config, legendFormat='{{instance}}'));
dashboard.new(
'%(dashboardNamePrefix)sProxy' % $._config.grafanaK8s,
time_from='now-1h',
uid=($._config.grafanaDashboardIDs['proxy.json']),
tags=($._config.grafanaK8s.dashboardTags),
).addTemplate(
{
current: {
text: 'default',
value: $._config.datasourceName,
},
hide: 0,
label: 'Data Source',
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: $._config.datasourceFilterRegex,
type: 'datasource',
},
)
.addTemplate(
template.new(
'cluster',
'$datasource',
'label_values(up{%(kubeProxySelector)s}, %(clusterLabel)s)' % $._config,
label='cluster',
refresh='time',
hide=if $._config.showMultiCluster then '' else 'variable',
sort=1,
)
)
.addTemplate(
template.new(
'instance',
'$datasource',
'label_values(up{%(kubeProxySelector)s, %(clusterLabel)s="$cluster", %(kubeProxySelector)s}, instance)' % $._config,
refresh='time',
includeAll=true,
sort=1,
)
)
.addRow(
row.new()
.addPanel(upCount)
.addPanel(rulesSyncRate)
.addPanel(rulesSyncLatency)
).addRow(
row.new()
.addPanel(networkProgrammingRate)
.addPanel(networkProgrammingLatency)
).addRow(
row.new()
.addPanel(rpcRate)
.addPanel(postRequestLatency)
).addRow(
row.new()
.addPanel(getRequestLatency)
).addRow(
row.new()
.addPanel(memory)
.addPanel(cpu)
.addPanel(goroutines)
),
},
}

Some files were not shown because too many files have changed in this diff Show more