update monitoring stack
continuous-integration/drone/push Build is passing Details

This commit is contained in:
Tobias Brunner 2020-12-11 21:32:59 +01:00
parent 5ffa599b0c
commit 1464357954
157 changed files with 9537 additions and 2468 deletions

View File

@ -8,8 +8,8 @@
"subdir": "grafana"
}
},
"version": "d7c1a53462ecd533593c60e5277b92fbf7ea7623",
"sum": "8OnIwMhzWtgoWYHNrDlkzUAMr/CPsWKauYEv0vnH1zs="
"version": "8024f4fdaeb3a3a7d72f77e2ed87deb92c79aeda",
"sum": "WXrJQtWuU5lJVc4jXkJGddPMpPP0+4eMcIB5cauZGgM="
},
{
"source": {
@ -18,8 +18,8 @@
"subdir": "Documentation/etcd-mixin"
}
},
"version": "e42127658c910d91e7902be958f12d41ac33d54f",
"sum": "L+PGlPK9mykGCJ9TIoEWdhMBjz+9lKuQ4YZ8fOeP9sk="
"version": "ca866c02422ff3f3d1f0876898a30c33dd7bcccf",
"sum": "bLqTqEr0jky9zz5MV/7ucn6H5mph2NlXas0TVnGNB1Y="
},
{
"source": {
@ -28,8 +28,8 @@
"subdir": "grafonnet"
}
},
"version": "8d382c732dbdc839ff07549a3f42d25828f1b268",
"sum": "DRSRw4luAXlBXblo19/T1Jrv+9hyV8ivlS0KEtNANec="
"version": "356bd73e4792ffe107725776ca8946895969c191",
"sum": "CSMZ3dJrpJpwvffie8BqcfrIVVwiKNqdPEN+1XWRBGU="
},
{
"source": {
@ -38,8 +38,8 @@
"subdir": "grafana-builder"
}
},
"version": "b5e45051995755ea373ea67642f8e5f54fcb8dd7",
"sum": "mD0zEP9FVFXeag7EaeS5OvUr2A9D6DQhGemoNn6+PLc="
"version": "216bc806bb512f218e3cf5ed3d4f5699b07f04d6",
"sum": "9/eJqljTTtJeq9QRjabdKWL6yD8a7VzLmGKBK3ir77k="
},
{
"source": {
@ -59,8 +59,8 @@
"subdir": ""
}
},
"version": "aa2adbcf39884fd9c85d7c3e0ff338b1d61ea1ba",
"sum": "ttkPUnv/5bqlOFcZ8fvp2wi/S7ZLKiqAZ4ZdTolX77M="
"version": "ead45674dba3c8712e422d99223453177aac6bf4",
"sum": "3i0NkntlBluDS1NRF+iSc2e727Alkv3ziuVjAP12/kE="
},
{
"source": {
@ -69,7 +69,7 @@
"subdir": "lib/promgrafonnet"
}
},
"version": "aa2adbcf39884fd9c85d7c3e0ff338b1d61ea1ba",
"version": "ead45674dba3c8712e422d99223453177aac6bf4",
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
},
{
@ -79,8 +79,8 @@
"subdir": "jsonnet/kube-state-metrics"
}
},
"version": "35ef70bb74520a78cc8dc7cf364e1ff4e0c45063",
"sum": "ySP+bI2ZMLPt/sguSh9WrwI5H5dasaNFRE8Uo9PcZrI="
"version": "89aaf6c524ee891140c4c8f2a05b1b16f5847309",
"sum": "zD/pbQLnQq+5hegEelaheHS8mn1h09GTktFO74iwlBI="
},
{
"source": {
@ -89,7 +89,7 @@
"subdir": "jsonnet/kube-state-metrics-mixin"
}
},
"version": "35ef70bb74520a78cc8dc7cf364e1ff4e0c45063",
"version": "7bdd62593c9273b5179cf3c9d2d819e9d997aaa4",
"sum": "Yf8mNAHrV1YWzrdV8Ry5dJ8YblepTGw3C0Zp10XIYLo="
},
{
@ -99,8 +99,8 @@
"subdir": "jsonnet/kube-prometheus"
}
},
"version": "980e95de011319b88a3b9c0787a81dcdf338a898",
"sum": "BxOXyWCSc9KkgWJXDau2Xtsy3aOYZDHz2VqOSLga7VU="
"version": "7d7d40b4dee70ecd3328dcdee2ed0cc8f806df93",
"sum": "6PhhQPWilq4skfe+z/hXKEg1pRqHnwvMR1Au6W136U0="
},
{
"source": {
@ -109,8 +109,8 @@
"subdir": "jsonnet/mixin"
}
},
"version": "55baf034c431ed2c78d950b187f7d8b34dd06860",
"sum": "+Q45oBC7O8g7KQOaiKhGglwndAMWRlLTR94KUI8Q1Ko="
"version": "117c9a2cd905479022a66ddd92a41f599cccf10d",
"sum": "6reUygVmQrLEWQzTKcH8ceDbvM+2ztK3z2VBR2K2l+U="
},
{
"source": {
@ -119,8 +119,19 @@
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "cd331ce9bb58bb926e391c6ae807621cb12cc29e",
"sum": "nM1eDP5vftqAeQSmVYzSBAh+lG0SN6zu46QiocQiVhk="
"version": "d8b7d3766225908d0239fd0d78258892cd0fc384",
"sum": "Nl+N/h76bzD9tZ8tx7tuNIKHwCIJ9zyOsAWplH8HvAE="
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus/alertmanager",
"subdir": "doc/alertmanager-mixin"
}
},
"version": "193ebba04d1e70d971047e983a0b489112610460",
"sum": "QcftU7gjCQyj7B6M4YJeCAeaPd0kwxd4J4rolo7AnLE=",
"name": "alertmanager"
},
{
"source": {
@ -129,8 +140,8 @@
"subdir": "docs/node-mixin"
}
},
"version": "f81747e608ea85ae44e76454eb63f9cb6484fb9e",
"sum": "VyMzZPxQIjiKQYGjZjXeKNWfLJ9vOl3emp84PWfsrUc="
"version": "8b466360a35581e0301bd22918be7011cf4203c3",
"sum": "rvyiD/yCB4BeYAWqYF53bP8c+aCUt2ipLHW2Ea8ELO8="
},
{
"source": {
@ -139,10 +150,20 @@
"subdir": "documentation/prometheus-mixin"
}
},
"version": "983ebb4a513302315a8117932ab832815f85e3d2",
"sum": "TBq4SL7YsPInARbJqwz25JaBvvAegcnRCsuz3K9niWc=",
"version": "26d89b4b0776fe4cd5a3656dfa520f119a375273",
"sum": "1VRVMuxAEZ9vdGHFlndmG9iQzDD6AoIXrX80CDpGDaU=",
"name": "prometheus"
},
{
"source": {
"git": {
"remote": "https://github.com/thanos-io/thanos",
"subdir": "mixin"
}
},
"version": "37e6ef61566c7c70793ba6d128f00c4c66cb2402",
"sum": "OptiWUMOHFrRGTZhSfxV1RCeXZ90qsefGNTD4lDYVG0="
},
{
"source": {
"git": {

View File

@ -1,5 +1,4 @@
apiVersion: v1
data: {}
kind: Secret
metadata:
name: alertmanager-main

View File

@ -0,0 +1,17 @@
apiVersion: bitnami.com/v1alpha1
kind: SealedSecret
metadata:
creationTimestamp: null
name: alertmanager-tbrnt-config
namespace: monitoring
spec:
encryptedData:
alertmanager.yaml: AgDHh1Qgrdffl6IFXJuk3ZzNHGARWZLDzbMLKp5Wo/ZYqclCji4T5wx7Fql6FALCvvUBvThxLfiwM2LQHRcWfWnf5AwxipCxpos9oVvlu4aON0WZd/Kjz/ZwDq5pgR/etCrSA2DYNxYq4vnTxUAk29eY5F4dWwRPcCgVZ5/KsTIcNx4x+4einqQbwAhkUtAwQl5fEPmpwNRquJZM29XIEUxZLWne0YmCmEgNGleUU20ByfYMwgtDJGjgr6XNPtTmByAHVrxNuQwAMxgT6GcfDLCNsByaS6CY3wmSTg1GUv/CG8Xx29FwDWyf1Ly2KbmcAAafN5QJGvCCTEt/WB85GtzQisrWFZTykv3Zjuz101p9ShXQZALylaX5h22hHFXuQyiIQZEeM2ixiYQjcPhiPjx1/hkbQ25QRD73/gjalZO8bprDrJxkLlw+hrgJ0LzxWL881U6INLKow+8/GmLleFhMUXRsGqacLreCIAr4uVGEMGMVLhHJKnj597HRnn0UCxVNkDk8QjHyiVgJBrQ3Pz9SFdF7mxvJ9F4rEgGkE4dvfvWxrZFumTLEkVRF9To+rKxsIVkewvoHtN/gMzFMzumP+fz/oB9yAHsxkwVyfqXBg52hNSYIx5Z/67yy3hDRKPBcZgknf9S+F37ET5BABFxazwG8NJjf4td+UsAGuAMzKI/94u7TxuXLPCs/tIGKD7kJnPxAqpalepzABtVCmOrtWwNPb1h4XeuraUS9beJ2zV9oV5nVFJmX94EJ7qpZt0Um7+GGeavQ5SV3XHRolDS5PpZPTAWnc/1rtZ0nsKk8lllEr3aDWveMXma06NKkIXz8+iAonvHsDZuw0W6jUdUUtraIbSua9YkyugqCBGeeXIPLwFxqJTqIX5vedZVMveFiaxtCJjL48SUGxtyugfiYbPa3xpHWWe22BcJyTmAOG9aIq4Tp4nvftLyvWe7c9PotJk/7gdv1IO4RLx//eLtKWw0uADa0ara4hDuI8Yktlti24TlA9XYz00d5WtE+lJsSZN8547BUfFzXSOZSSbfrFLZmEmBTgkbj4szX19bXSctJN3BtOmRfCEPXYQN10HgnhpwqYHbXKUSTZNWLojnFL1/E56wUXGxRg9NGOwSXzTyfoLGxI9NEQpGc0Rj2Wna+JSUhlAUnfYW1eH8yyg5FfkyhQdyZJFvYfF0rk+XG5XNhLumST19uxrAkMWhk+Z9/eWwOaZQMmDcoi2Rs0za+1GGjPW5k56Ip+spwW5cvYmdl1PgkZ4g1mupjiB0FdgZHGR+kGn1lbPtSUd+amh9PXSDWkqfnix62H7374rQ3ZyG7fs9sQNnnRrd/cDCMxAl5Upk8D9dfxRmvuxRd8b89h7EQwUBML7TIriA2Pci5Ftux2R5wyIXjznLC5/kFZg6/Av3uKmKK6dLR2Ooey7/3g14CEjMumdijjySl8Pd2UUxSKVKD7vkq+3xYm0CJZqVvT/iBOccrv0UEiTHBsXrfaugUvqIKTAGYhJy0fUBXKisPdA0HdzrUmx57Du36TGyuEzGtVuDarcWzQYPqKJxOIuofJ+AGTDY53OjdUJ8pwJD6HDz55tu85gaV6ZOvSYqjqeX2FUe7lPhsGUIh/FemfichpypHyFpPYhkwAIO1AinKvsqjUuDXE6n5b7NMbI1gl87fPqT5wUSKXZqwViyFqUA5DFqPTEqvHIGU5Wz0GajEaQ==
template:
metadata:
creationTimestamp: null
name: alertmanager-tbrnt-config
namespace: monitoring
type: Opaque
status: {}

File diff suppressed because it is too large Load Diff

View File

@ -21,7 +21,7 @@ spec:
- env:
- name: GF_INSTALL_PLUGINS
value: grafana-piechart-panel
image: grafana/grafana:7.1.0
image: grafana/grafana:7.3.5
name: grafana
ports:
- containerPort: 3000

View File

@ -12,3 +12,4 @@ spec:
targetPort: http
selector:
app: grafana
type: NodePort

View File

@ -3,7 +3,7 @@ kind: ClusterRole
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.7
app.kubernetes.io/version: v1.9.7
name: kube-state-metrics
rules:
- apiGroups:
@ -30,6 +30,7 @@ rules:
- daemonsets
- deployments
- replicasets
- ingresses
verbs:
- list
- watch
@ -104,14 +105,6 @@ rules:
- networking.k8s.io
resources:
- networkpolicies
- ingresses
verbs:
- list
- watch
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- list
- watch

View File

@ -3,7 +3,7 @@ kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.7
app.kubernetes.io/version: v1.9.7
name: kube-state-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io

View File

@ -3,7 +3,7 @@ kind: Deployment
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.7
app.kubernetes.io/version: v1.9.7
name: kube-state-metrics
namespace: monitoring
spec:
@ -15,7 +15,7 @@ spec:
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.7
app.kubernetes.io/version: v1.9.7
spec:
containers:
- args:
@ -25,32 +25,34 @@ spec:
- --telemetry-port=8082
image: quay.io/coreos/kube-state-metrics:v1.9.7
name: kube-state-metrics
securityContext:
runAsUser: 65534
- args:
- --logtostderr
- --secure-listen-address=:8443
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
- --upstream=http://127.0.0.1:8081/
image: quay.io/brancz/kube-rbac-proxy:v0.6.0
image: quay.io/brancz/kube-rbac-proxy:v0.8.0
name: kube-rbac-proxy-main
ports:
- containerPort: 8443
name: https-main
securityContext:
runAsUser: 65534
runAsGroup: 65532
runAsNonRoot: true
runAsUser: 65532
- args:
- --logtostderr
- --secure-listen-address=:9443
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
- --upstream=http://127.0.0.1:8082/
image: quay.io/brancz/kube-rbac-proxy:v0.6.0
image: quay.io/brancz/kube-rbac-proxy:v0.8.0
name: kube-rbac-proxy-self
ports:
- containerPort: 9443
name: https-self
securityContext:
runAsUser: 65534
runAsGroup: 65532
runAsNonRoot: true
runAsUser: 65532
nodeSelector:
kubernetes.io/os: linux
serviceAccountName: kube-state-metrics

View File

@ -3,7 +3,7 @@ kind: Service
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.7
app.kubernetes.io/version: v1.9.7
name: kube-state-metrics
namespace: monitoring
spec:

View File

@ -3,6 +3,6 @@ kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.7
app.kubernetes.io/version: v1.9.7
name: kube-state-metrics
namespace: monitoring

View File

@ -57,7 +57,7 @@ spec:
valueFrom:
fieldRef:
fieldPath: status.podIP
image: quay.io/brancz/kube-rbac-proxy:v0.6.0
image: quay.io/brancz/kube-rbac-proxy:v0.8.0
name: kube-rbac-proxy
ports:
- containerPort: 9100
@ -70,6 +70,10 @@ spec:
requests:
cpu: 10m
memory: 20Mi
securityContext:
runAsGroup: 65532
runAsNonRoot: true
runAsUser: 65532
hostNetwork: true
hostPID: true
nodeSelector:
@ -93,3 +97,4 @@ spec:
updateStrategy:
rollingUpdate:
maxUnavailable: 10%
type: RollingUpdate

View File

@ -25,7 +25,7 @@ spec:
- --metrics-relist-interval=1m
- --prometheus-url=http://prometheus-k8s.monitoring.svc.cluster.local:9090/
- --secure-port=6443
image: directxman12/k8s-prometheus-adapter:v0.7.0
image: directxman12/k8s-prometheus-adapter:v0.8.2
name: prometheus-adapter
ports:
- containerPort: 6443

View File

@ -4,7 +4,7 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.42.1
app.kubernetes.io/version: v0.44.1
name: prometheus-operator
namespace: monitoring
spec:
@ -19,4 +19,4 @@ spec:
matchLabels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.42.1
app.kubernetes.io/version: v0.44.1

View File

@ -12,7 +12,7 @@ spec:
namespace: monitoring
port: web
externalUrl: http://prometheus-k8s.monitoring:9090
image: quay.io/prometheus/prometheus:v2.20.0
image: quay.io/prometheus/prometheus:v2.22.1
nodeSelector:
kubernetes.io/os: linux
podMonitorNamespaceSelector:
@ -58,4 +58,4 @@ spec:
requests:
storage: 10Gi
storageClassName: local-path
version: v2.20.0
version: v2.22.1

View File

@ -40,10 +40,10 @@ spec:
rate(node_vmstat_pgmajfault{job="node-exporter"}[1m])
record: instance:node_vmstat_pgmajfault:rate1m
- expr: |
rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
record: instance_device:node_disk_io_time_seconds:rate1m
- expr: |
rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
record: instance_device:node_disk_io_time_weighted_seconds:rate1m
- expr: |
sum without (device) (
@ -390,11 +390,6 @@ spec:
quantile: "0.99"
verb: write
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- expr: |
sum(rate(apiserver_request_duration_seconds_sum{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)
/
sum(rate(apiserver_request_duration_seconds_count{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)
record: cluster:apiserver_request_duration_seconds:mean5m
- expr: |
histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
labels:
@ -571,9 +566,6 @@ spec:
record: code:apiserver_request_total:increase30d
- name: k8s.rules
rules:
- expr: |
sum(rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])) by (namespace)
record: namespace:container_cpu_usage_seconds_total:sum_rate
- expr: |
sum by (cluster, namespace, pod, container) (
rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])
@ -605,9 +597,6 @@ spec:
max by(namespace, pod, node) (kube_pod_info{node!=""})
)
record: node_namespace_pod_container:container_memory_swap
- expr: |
sum(container_memory_usage_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}) by (namespace)
record: namespace:container_memory_usage_bytes:sum
- expr: |
sum by (namespace) (
sum by (namespace, pod) (
@ -716,9 +705,6 @@ spec:
record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile
- name: node.rules
rules:
- expr: |
sum(min(kube_pod_info{node!=""}) by (cluster, node))
record: ':kube_pod_info_node_count:'
- expr: |
topk by(namespace, pod) (1,
max by (node, namespace, pod) (
@ -762,18 +748,18 @@ spec:
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
- name: kube-prometheus-node-recording.rules
rules:
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY
(instance)
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m]))
BY (instance)
record: instance:node_cpu:rate:sum
- expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
record: instance:node_network_receive_bytes:rate:sum
- expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
record: instance:node_network_transmit_bytes:rate:sum
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT
(cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total)
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total)
BY (instance, cpu)) BY (instance)
record: instance:node_cpu:ratio
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m]))
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
record: cluster:node_cpu:sum_rate5m
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total)
BY (instance, cpu))
@ -791,7 +777,7 @@ spec:
description: kube-state-metrics is experiencing errors at an elevated rate
in list operations. This is likely causing it to not be able to expose metrics
about Kubernetes objects correctly or at all.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricslisterrors
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatemetricslisterrors
summary: kube-state-metrics is experiencing errors in list operations.
expr: |
(sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m]))
@ -806,7 +792,7 @@ spec:
description: kube-state-metrics is experiencing errors at an elevated rate
in watch operations. This is likely causing it to not be able to expose
metrics about Kubernetes objects correctly or at all.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricswatcherrors
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatemetricswatcherrors
summary: kube-state-metrics is experiencing errors in watch operations.
expr: |
(sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m]))
@ -823,7 +809,7 @@ spec:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available space left and is filling
up.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemspacefillingup
summary: Filesystem is predicted to run out of space within the next 24 hours.
expr: |
(
@ -841,7 +827,7 @@ spec:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available space left and is filling
up fast.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemspacefillingup
summary: Filesystem is predicted to run out of space within the next 4 hours.
expr: |
(
@ -858,7 +844,7 @@ spec:
annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available space left.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutofspace
summary: Filesystem has less than 5% space left.
expr: |
(
@ -873,7 +859,7 @@ spec:
annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available space left.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutofspace
summary: Filesystem has less than 3% space left.
expr: |
(
@ -889,7 +875,7 @@ spec:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available inodes left and is filling
up.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemfilesfillingup
summary: Filesystem is predicted to run out of inodes within the next 24 hours.
expr: |
(
@ -907,7 +893,7 @@ spec:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available inodes left and is filling
up fast.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemfilesfillingup
summary: Filesystem is predicted to run out of inodes within the next 4 hours.
expr: |
(
@ -924,7 +910,7 @@ spec:
annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available inodes left.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutoffiles
summary: Filesystem has less than 5% inodes left.
expr: |
(
@ -939,7 +925,7 @@ spec:
annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available inodes left.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutoffiles
summary: Filesystem has less than 3% inodes left.
expr: |
(
@ -954,10 +940,10 @@ spec:
annotations:
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
{{ printf "%.0f" $value }} receive errors in the last two minutes.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworkreceiveerrs
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodenetworkreceiveerrs
summary: Network interface is reporting many receive errors.
expr: |
increase(node_network_receive_errs_total[2m]) > 10
rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01
for: 1h
labels:
severity: warning
@ -965,17 +951,17 @@ spec:
annotations:
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
{{ printf "%.0f" $value }} transmit errors in the last two minutes.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworktransmiterrs
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodenetworktransmiterrs
summary: Network interface is reporting many transmit errors.
expr: |
increase(node_network_transmit_errs_total[2m]) > 10
rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01
for: 1h
labels:
severity: warning
- alert: NodeHighNumberConntrackEntriesUsed
annotations:
description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodehighnumberconntrackentriesused
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodehighnumberconntrackentriesused
summary: Number of conntrack are getting close to the limit.
expr: |
(node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
@ -984,7 +970,7 @@ spec:
- alert: NodeTextFileCollectorScrapeError
annotations:
description: Node Exporter text file collector failed to scrape.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodetextfilecollectorscrapeerror
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodetextfilecollectorscrapeerror
summary: Node Exporter text file collector failed to scrape.
expr: |
node_textfile_scrape_error{job="node-exporter"} == 1
@ -994,7 +980,7 @@ spec:
annotations:
message: Clock on {{ $labels.instance }} is out of sync by more than 300s.
Ensure NTP is configured correctly on this host.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclockskewdetected
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodeclockskewdetected
summary: Clock skew detected.
expr: |
(
@ -1015,7 +1001,7 @@ spec:
annotations:
message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP
is configured on this host.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclocknotsynchronising
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodeclocknotsynchronising
summary: Clock not synchronising.
expr: |
min_over_time(node_timex_sync_status[5m]) == 0
@ -1029,7 +1015,7 @@ spec:
description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is
in degraded state due to one or more disks failures. Number of spare drives
is insufficient to fix issue automatically.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-noderaiddegraded
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/noderaiddegraded
summary: RAID Array is degraded
expr: |
node_md_disks_required - ignoring (state) (node_md_disks{state="active"}) > 0
@ -1040,19 +1026,142 @@ spec:
annotations:
description: At least one device in RAID array on {{ $labels.instance }} failed.
Array '{{ $labels.device }}' needs attention and possibly a disk swap.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-noderaiddiskfailure
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/noderaiddiskfailure
summary: Failed device in RAID array
expr: |
node_md_disks{state="fail"} > 0
labels:
severity: warning
- name: alertmanager.rules
rules:
- alert: AlertmanagerFailedReload
annotations:
description: Configuration has failed to load for {{ $labels.namespace }}/{{
$labels.pod}}.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerfailedreload
summary: Reloading an Alertmanager configuration has failed.
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
max_over_time(alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"}[5m]) == 0
for: 10m
labels:
severity: critical
- alert: AlertmanagerMembersInconsistent
annotations:
description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has only
found {{ $value }} members of the {{$labels.job}} cluster.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagermembersinconsistent
summary: A member of an Alertmanager cluster has not found all other cluster
members.
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}[5m])
< on (namespace,service) group_left
count by (namespace,service) (max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}[5m]))
for: 10m
labels:
severity: critical
- alert: AlertmanagerFailedToSendAlerts
annotations:
description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed
to send {{ $value | humanizePercentage }} of notifications to {{ $labels.integration
}}.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerfailedtosendalerts
summary: An Alertmanager instance failed to send notifications.
expr: |
(
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring"}[5m])
/
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring"}[5m])
)
> 0.01
for: 5m
labels:
severity: warning
- alert: AlertmanagerClusterFailedToSendAlerts
annotations:
description: The minimum notification failure rate to {{ $labels.integration
}} sent from any instance in the {{$labels.job}} cluster is {{ $value |
humanizePercentage }}.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclusterfailedtosendalerts
summary: All Alertmanager instances in a cluster failed to send notifications.
expr: |
min by (namespace,service) (
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring"}[5m])
/
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring"}[5m])
)
> 0.01
for: 5m
labels:
severity: critical
- alert: AlertmanagerConfigInconsistent
annotations:
description: Alertmanager instances within the {{$labels.job}} cluster have
different configurations.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerconfiginconsistent
summary: Alertmanager instances within the same cluster have different configurations.
expr: |
count by (namespace,service) (
count_values by (namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"})
)
!= 1
for: 20m
labels:
severity: critical
- alert: AlertmanagerClusterDown
annotations:
description: '{{ $value | humanizePercentage }} of Alertmanager instances
within the {{$labels.job}} cluster have been up for less than half of the
last 5m.'
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclusterdown
summary: Half or more of the Alertmanager instances within the same cluster
are down.
expr: |
(
count by (namespace,service) (
avg_over_time(up{job="alertmanager-main",namespace="monitoring"}[5m]) < 0.5
)
/
count by (namespace,service) (
up{job="alertmanager-main",namespace="monitoring"}
)
)
>= 0.5
for: 5m
labels:
severity: critical
- alert: AlertmanagerClusterCrashlooping
annotations:
description: '{{ $value | humanizePercentage }} of Alertmanager instances
within the {{$labels.job}} cluster have restarted at least 5 times in the
last 10m.'
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclustercrashlooping
summary: Half or more of the Alertmanager instances within the same cluster
are crashlooping.
expr: |
(
count by (namespace,service) (
changes(process_start_time_seconds{job="alertmanager-main",namespace="monitoring"}[10m]) > 4
)
/
count by (namespace,service) (
up{job="alertmanager-main",namespace="monitoring"}
)
)
>= 0.5
for: 5m
labels:
severity: critical
- name: prometheus-operator
rules:
- alert: PrometheusOperatorListErrors
annotations:
description: Errors while performing List operations in controller {{$labels.controller}}
in {{$labels.namespace}} namespace.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorlisterrors
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorlisterrors
summary: Errors while performing list operations in controller.
expr: |
(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4
@ -1063,7 +1172,7 @@ spec:
annotations:
description: Errors while performing watch operations in controller {{$labels.controller}}
in {{$labels.namespace}} namespace.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorwatcherrors
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorwatcherrors
summary: Errors while performing watch operations in controller.
expr: |
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4
@ -1074,7 +1183,7 @@ spec:
annotations:
description: Controller {{ $labels.controller }} in {{ $labels.namespace }}
namespace fails to reconcile {{ $value }} objects.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorsyncfailed
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorsyncfailed
summary: Last controller reconciliation failed
expr: |
min_over_time(prometheus_operator_syncs{status="failed",job="prometheus-operator",namespace="monitoring"}[5m]) > 0
@ -1086,7 +1195,7 @@ spec:
description: '{{ $value | humanizePercentage }} of reconciling operations
failed for {{ $labels.controller }} controller in {{ $labels.namespace }}
namespace.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorreconcileerrors
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorreconcileerrors
summary: Errors while reconciling controller.
expr: |
(sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator",namespace="monitoring"}[5m]))) > 0.1
@ -1097,7 +1206,7 @@ spec:
annotations:
description: Errors while reconciling Prometheus in {{ $labels.namespace }}
Namespace.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatornodelookuperrors
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatornodelookuperrors
summary: Errors while reconciling Prometheus.
expr: |
rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1
@ -1108,20 +1217,32 @@ spec:
annotations:
description: Prometheus operator in {{ $labels.namespace }} namespace isn't
ready to reconcile {{ $labels.controller }} resources.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatornotready
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatornotready
summary: Prometheus operator not ready
expr: |
min by(namespace, controller) (max_over_time(prometheus_operator_ready{job="prometheus-operator",namespace="monitoring"}[5m]) == 0)
for: 5m
labels:
severity: warning
- alert: PrometheusOperatorRejectedResources
annotations:
description: Prometheus operator in {{ $labels.namespace }} namespace rejected
{{ printf "%0.0f" $value }} {{ $labels.controller }}/{{ $labels.resource
}} resources.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorrejectedresources
summary: Resources rejected by Prometheus operator
expr: |
min_over_time(prometheus_operator_managed_resources{state="rejected",job="prometheus-operator",namespace="monitoring"}[5m]) > 0
for: 5m
labels:
severity: warning
- name: kubernetes-apps
rules:
- alert: KubePodCrashLooping
annotations:
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
}}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubepodcrashlooping
summary: Pod is crash looping.
expr: |
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[5m]) * 60 * 5 > 0
@ -1132,7 +1253,7 @@ spec:
annotations:
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
state for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubepodnotready
summary: Pod has been in a non-ready state for more than 15 minutes.
expr: |
sum by (namespace, pod) (
@ -1150,7 +1271,7 @@ spec:
description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
}} does not match, this indicates that the Deployment has failed but has
not been rolled back.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubedeploymentgenerationmismatch
summary: Deployment generation mismatch due to possible roll-back
expr: |
kube_deployment_status_observed_generation{job="kube-state-metrics"}
@ -1163,7 +1284,7 @@ spec:
annotations:
description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has
not matched the expected number of replicas for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubedeploymentreplicasmismatch
summary: Deployment has not matched the expected number of replicas.
expr: |
(
@ -1182,7 +1303,7 @@ spec:
annotations:
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }}
has not matched the expected number of replicas for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatefulsetreplicasmismatch
summary: Deployment has not matched the expected number of replicas.
expr: |
(
@ -1202,7 +1323,7 @@ spec:
description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
}} does not match, this indicates that the StatefulSet has failed but has
not been rolled back.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatefulsetgenerationmismatch
summary: StatefulSet generation mismatch due to possible roll-back
expr: |
kube_statefulset_status_observed_generation{job="kube-state-metrics"}
@ -1215,7 +1336,7 @@ spec:
annotations:
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }}
update has not been rolled out.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatefulsetupdatenotrolledout
summary: StatefulSet update has not been rolled out.
expr: |
(
@ -1242,7 +1363,7 @@ spec:
annotations:
description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has
not finished or progressed for at least 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubedaemonsetrolloutstuck
summary: DaemonSet rollout is stuck.
expr: |
(
@ -1275,7 +1396,7 @@ spec:
annotations:
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}}
has been in waiting state for longer than 1 hour.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubecontainerwaiting
summary: Pod container waiting longer than 1 hour
expr: |
sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
@ -1286,7 +1407,7 @@ spec:
annotations:
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
}} are not scheduled.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubedaemonsetnotscheduled
summary: DaemonSet pods are not scheduled.
expr: |
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"}
@ -1299,7 +1420,7 @@ spec:
annotations:
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
}} are running where they are not supposed to run.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubedaemonsetmisscheduled
summary: DaemonSet pods are misscheduled.
expr: |
kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
@ -1310,7 +1431,7 @@ spec:
annotations:
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking
more than 12 hours to complete.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubejobcompletion
summary: Job did not complete in time
expr: |
kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0
@ -1321,7 +1442,7 @@ spec:
annotations:
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to
complete. Removing failed job after investigation should clear this alert.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubejobfailed
summary: Job failed to complete.
expr: |
kube_job_failed{job="kube-state-metrics"} > 0
@ -1332,13 +1453,21 @@ spec:
annotations:
description: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched
the desired number of replicas for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpareplicasmismatch
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubehpareplicasmismatch
summary: HPA has not matched descired number of replicas.
expr: |
(kube_hpa_status_desired_replicas{job="kube-state-metrics"}
!=
kube_hpa_status_current_replicas{job="kube-state-metrics"})
and
(kube_hpa_status_current_replicas{job="kube-state-metrics"}
>
kube_hpa_spec_min_replicas{job="kube-state-metrics"})
and
(kube_hpa_status_current_replicas{job="kube-state-metrics"}
<
kube_hpa_spec_max_replicas{job="kube-state-metrics"})
and
changes(kube_hpa_status_current_replicas[15m]) == 0
for: 15m
labels:
@ -1347,7 +1476,7 @@ spec:
annotations:
description: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running
at max replicas for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpamaxedout
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubehpamaxedout
summary: HPA is running at max replicas
expr: |
kube_hpa_status_current_replicas{job="kube-state-metrics"}
@ -1362,7 +1491,7 @@ spec:
annotations:
description: Cluster has overcommitted CPU resource requests for Pods and
cannot tolerate node failure.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubecpuovercommit
summary: Cluster has overcommitted CPU resource requests.
expr: |
sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum{})
@ -1377,7 +1506,7 @@ spec:
annotations:
description: Cluster has overcommitted memory resource requests for Pods and
cannot tolerate node failure.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubememoryovercommit
summary: Cluster has overcommitted memory resource requests.
expr: |
sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum{})
@ -1393,7 +1522,7 @@ spec:
- alert: KubeCPUQuotaOvercommit
annotations:
description: Cluster has overcommitted CPU resource requests for Namespaces.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuquotaovercommit
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubecpuquotaovercommit
summary: Cluster has overcommitted CPU resource requests.
expr: |
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"})
@ -1406,12 +1535,12 @@ spec:
- alert: KubeMemoryQuotaOvercommit
annotations:
description: Cluster has overcommitted memory resource requests for Namespaces.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryquotaovercommit
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubememoryquotaovercommit
summary: Cluster has overcommitted memory resource requests.
expr: |
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"})
/
sum(kube_node_status_allocatable_memory_bytes{job="node-exporter"})
sum(kube_node_status_allocatable_memory_bytes{job="kube-state-metrics"})
> 1.5
for: 5m
labels:
@ -1420,7 +1549,7 @@ spec:
annotations:
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
}} of its {{ $labels.resource }} quota.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaalmostfull
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubequotaalmostfull
summary: Namespace quota is going to be full.
expr: |
kube_resourcequota{job="kube-state-metrics", type="used"}
@ -1434,7 +1563,7 @@ spec:
annotations:
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
}} of its {{ $labels.resource }} quota.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotafullyused
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubequotafullyused
summary: Namespace quota is fully used.
expr: |
kube_resourcequota{job="kube-state-metrics", type="used"}
@ -1448,7 +1577,7 @@ spec:
annotations:
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
}} of its {{ $labels.resource }} quota.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubequotaexceeded
summary: Namespace quota has exceeded the limits.
expr: |
kube_resourcequota{job="kube-state-metrics", type="used"}
@ -1463,7 +1592,7 @@ spec:
description: '{{ $value | humanizePercentage }} throttling of CPU in namespace
{{ $labels.namespace }} for container {{ $labels.container }} in pod {{
$labels.pod }}.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/cputhrottlinghigh
summary: Processes experience elevated CPU throttling.
expr: |
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container, pod, namespace)
@ -1480,7 +1609,7 @@ spec:
description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
}} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage
}} free.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubepersistentvolumefillingup
summary: PersistentVolume is filling up.
expr: |
kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}
@ -1496,7 +1625,7 @@ spec:
$labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is
expected to fill up within four days. Currently {{ $value | humanizePercentage
}} is available.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubepersistentvolumefillingup
summary: PersistentVolume is filling up.
expr: |
(
@ -1513,7 +1642,7 @@ spec:
annotations:
description: The persistent volume {{ $labels.persistentvolume }} has status
{{ $labels.phase }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeerrors
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubepersistentvolumeerrors
summary: PersistentVolume is having issues with provisioning.
expr: |
kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
@ -1526,7 +1655,7 @@ spec:
annotations:
description: There are {{ $value }} different semantic versions of Kubernetes
components running.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeversionmismatch
summary: Different semantic versions of Kubernetes components running.
expr: |
count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*).*"))) > 1
@ -1537,7 +1666,7 @@ spec:
annotations:
description: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
}}' is experiencing {{ $value | humanizePercentage }} errors.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeclienterrors
summary: Kubernetes API server client is experiencing errors.
expr: |
(sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
@ -1552,7 +1681,7 @@ spec:
- alert: KubeAPIErrorBudgetBurn
annotations:
description: The API server is burning too much error budget.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeapierrorbudgetburn
summary: The API server is burning too much error budget.
expr: |
sum(apiserver_request:burnrate1h) > (14.40 * 0.01000)
@ -1566,7 +1695,7 @@ spec:
- alert: KubeAPIErrorBudgetBurn
annotations:
description: The API server is burning too much error budget.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeapierrorbudgetburn
summary: The API server is burning too much error budget.
expr: |
sum(apiserver_request:burnrate6h) > (6.00 * 0.01000)
@ -1580,7 +1709,7 @@ spec:
- alert: KubeAPIErrorBudgetBurn
annotations:
description: The API server is burning too much error budget.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeapierrorbudgetburn
summary: The API server is burning too much error budget.
expr: |
sum(apiserver_request:burnrate1d) > (3.00 * 0.01000)
@ -1594,7 +1723,7 @@ spec:
- alert: KubeAPIErrorBudgetBurn
annotations:
description: The API server is burning too much error budget.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeapierrorbudgetburn
summary: The API server is burning too much error budget.
expr: |
sum(apiserver_request:burnrate3d) > (1.00 * 0.01000)
@ -1611,7 +1740,7 @@ spec:
annotations:
description: A client certificate used to authenticate to the apiserver is
expiring in less than 7.0 days.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeclientcertificateexpiration
summary: Client certificate is about to expire.
expr: |
apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
@ -1621,7 +1750,7 @@ spec:
annotations:
description: A client certificate used to authenticate to the apiserver is
expiring in less than 24.0 hours.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeclientcertificateexpiration
summary: Client certificate is about to expire.
expr: |
apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
@ -1633,7 +1762,7 @@ spec:
has reported errors. The number of errors have increased for it in the past
five minutes. High values indicate that the availability of the service
changes too often.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/aggregatedapierrors
summary: An aggregated API has reported errors.
expr: |
sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2
@ -1643,7 +1772,7 @@ spec:
annotations:
description: An aggregated API {{ $labels.name }}/{{ $labels.namespace }}
has been only {{ $value | humanize }}% available over the last 10m.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapidown
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/aggregatedapidown
summary: An aggregated API is down.
expr: |
(1 - max by(name, namespace)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85
@ -1653,7 +1782,7 @@ spec:
- alert: KubeAPIDown
annotations:
description: KubeAPI has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeapidown
summary: Target disappeared from Prometheus target discovery.
expr: |
absent(up{job="apiserver"} == 1)
@ -1665,7 +1794,7 @@ spec:
- alert: KubeNodeNotReady
annotations:
description: '{{ $labels.node }} has been unready for more than 15 minutes.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubenodenotready
summary: Node is not ready.
expr: |
kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
@ -1676,7 +1805,7 @@ spec:
annotations:
description: '{{ $labels.node }} is unreachable and some workloads may be
rescheduled.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubenodeunreachable
summary: Node is unreachable.
expr: |
(kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="kube-state-metrics",key=~"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn"}) == 1
@ -1687,7 +1816,7 @@ spec:
annotations:
description: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage
}} of its Pod capacity.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubelettoomanypods
summary: Kubelet is running at capacity.
expr: |
count by(node) (
@ -1704,7 +1833,7 @@ spec:
annotations:
description: The readiness status of node {{ $labels.node }} has changed {{
$value }} times in the last 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodereadinessflapping
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubenodereadinessflapping
summary: Node readiness status is flapping.
expr: |
sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (node) > 2
@ -1715,7 +1844,7 @@ spec:
annotations:
description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile
duration of {{ $value }} seconds on node {{ $labels.node }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletplegdurationhigh
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletplegdurationhigh
summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist.
expr: |
node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
@ -1726,7 +1855,7 @@ spec:
annotations:
description: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds
on node {{ $labels.node }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletpodstartuplatencyhigh
summary: Kubelet Pod startup latency is too high.
expr: |
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
@ -1737,7 +1866,7 @@ spec:
annotations:
description: Client certificate for Kubelet on node {{ $labels.node }} expires
in {{ $value | humanizeDuration }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletclientcertificateexpiration
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletclientcertificateexpiration
summary: Kubelet client certificate is about to expire.
expr: |
kubelet_certificate_manager_client_ttl_seconds < 604800
@ -1747,7 +1876,7 @@ spec:
annotations:
description: Client certificate for Kubelet on node {{ $labels.node }} expires
in {{ $value | humanizeDuration }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletclientcertificateexpiration
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletclientcertificateexpiration
summary: Kubelet client certificate is about to expire.
expr: |
kubelet_certificate_manager_client_ttl_seconds < 86400
@ -1757,7 +1886,7 @@ spec:
annotations:
description: Server certificate for Kubelet on node {{ $labels.node }} expires
in {{ $value | humanizeDuration }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificateexpiration
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletservercertificateexpiration
summary: Kubelet server certificate is about to expire.
expr: |
kubelet_certificate_manager_server_ttl_seconds < 604800
@ -1767,7 +1896,7 @@ spec:
annotations:
description: Server certificate for Kubelet on node {{ $labels.node }} expires
in {{ $value | humanizeDuration }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificateexpiration
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletservercertificateexpiration
summary: Kubelet server certificate is about to expire.
expr: |
kubelet_certificate_manager_server_ttl_seconds < 86400
@ -1777,7 +1906,7 @@ spec:
annotations:
description: Kubelet on node {{ $labels.node }} has failed to renew its client
certificate ({{ $value | humanize }} errors in the last 5 minutes).
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletclientcertificaterenewalerrors
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletclientcertificaterenewalerrors
summary: Kubelet has failed to renew its client certificate.
expr: |
increase(kubelet_certificate_manager_client_expiration_renew_errors[5m]) > 0
@ -1788,7 +1917,7 @@ spec:
annotations:
description: Kubelet on node {{ $labels.node }} has failed to renew its server
certificate ({{ $value | humanize }} errors in the last 5 minutes).
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificaterenewalerrors
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletservercertificaterenewalerrors
summary: Kubelet has failed to renew its server certificate.
expr: |
increase(kubelet_server_expiration_renew_errors[5m]) > 0
@ -1798,7 +1927,7 @@ spec:
- alert: KubeletDown
annotations:
description: Kubelet has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletdown
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletdown
summary: Target disappeared from Prometheus target discovery.
expr: |
absent(up{job="kubelet", metrics_path="/metrics"} == 1)
@ -1810,7 +1939,7 @@ spec:
- alert: KubeSchedulerDown
annotations:
description: KubeScheduler has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeschedulerdown
summary: Target disappeared from Prometheus target discovery.
expr: |
absent(up{job="kube-scheduler"} == 1)
@ -1823,7 +1952,7 @@ spec:
annotations:
description: KubeControllerManager has disappeared from Prometheus target
discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubecontrollermanagerdown
summary: Target disappeared from Prometheus target discovery.
expr: |
absent(up{job="kube-controller-manager"} == 1)
@ -1878,22 +2007,6 @@ spec:
for: 15m
labels:
severity: warning
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager
annotations:
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts
from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
expr: |
min without(alertmanager) (
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
/
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
)
* 100
> 3
for: 15m
labels:
severity: critical
- alert: PrometheusNotConnectedToAlertmanagers
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected
@ -1932,7 +2045,15 @@ spec:
samples.
summary: Prometheus is not ingesting samples.
expr: |
(
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
and
(
sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-k8s",namespace="monitoring"}) > 0
or
sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-k8s",namespace="monitoring"}) > 0
)
)
for: 10m
labels:
severity: warning
@ -1989,7 +2110,7 @@ spec:
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
(
max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
- on(job, instance) group_right
- ignoring(remote_name, url) group_right
max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
)
> 120
@ -2036,37 +2157,32 @@ spec:
for: 15m
labels:
severity: warning
- name: alertmanager.rules
rules:
- alert: AlertmanagerConfigInconsistent
- alert: PrometheusTargetLimitHit
annotations:
message: |
The configuration of the instances of the Alertmanager cluster `{{ $labels.namespace }}/{{ $labels.service }}` are out of sync.
{{ range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query }}
Configuration hash for pod {{ .Labels.pod }} is "{{ printf "%.f" .Value }}"
{{ end }}
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped
{{ printf "%.0f" $value }} targets because the number of targets exceeded
the configured target_limit.
summary: Prometheus has dropped targets because some scrape configs have exceeded
the targets limit.
expr: |
count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"})) != 1
for: 5m
labels:
severity: critical
- alert: AlertmanagerFailedReload
annotations:
message: Reloading Alertmanager's configuration has failed for {{ $labels.namespace
}}/{{ $labels.pod}}.
expr: |
alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"} == 0
for: 10m
increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
for: 15m
labels:
severity: warning
- alert: AlertmanagerMembersInconsistent
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager
annotations:
message: Alertmanager has not found all other members of the cluster.
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts
from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
expr: |
alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}
!= on (service) GROUP_LEFT()
count by (service) (alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"})
for: 5m
min without (alertmanager) (
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
/
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
)
* 100
> 3
for: 15m
labels:
severity: critical
- name: general.rules

View File

@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.2.4
controller-gen.kubebuilder.io/version: v0.4.1
creationTimestamp: null
name: alertmanagers.monitoring.coreos.com
spec:
@ -644,6 +644,96 @@ spec:
type: array
type: object
type: object
alertmanagerConfigNamespaceSelector:
description: Namespaces to be selected for AlertmanagerConfig discovery.
If nil, only check own namespace.
properties:
matchExpressions:
description: matchExpressions is a list of label selector requirements.
The requirements are ANDed.
items:
description: A label selector requirement is a selector that
contains values, a key, and an operator that relates the key
and values.
properties:
key:
description: key is the label key that the selector applies
to.
type: string
operator:
description: operator represents a key's relationship to
a set of values. Valid operators are In, NotIn, Exists
and DoesNotExist.
type: string
values:
description: values is an array of string values. If the
operator is In or NotIn, the values array must be non-empty.
If the operator is Exists or DoesNotExist, the values
array must be empty. This array is replaced during a strategic
merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value} pairs. A single
{key,value} in the matchLabels map is equivalent to an element
of matchExpressions, whose key field is "key", the operator
is "In", and the values array contains only "value". The requirements
are ANDed.
type: object
type: object
alertmanagerConfigSelector:
description: AlertmanagerConfigs to be selected for to merge and configure
Alertmanager with.
properties:
matchExpressions:
description: matchExpressions is a list of label selector requirements.
The requirements are ANDed.
items:
description: A label selector requirement is a selector that
contains values, a key, and an operator that relates the key
and values.
properties:
key:
description: key is the label key that the selector applies
to.
type: string
operator:
description: operator represents a key's relationship to
a set of values. Valid operators are In, NotIn, Exists
and DoesNotExist.
type: string
values:
description: values is an array of string values. If the
operator is In or NotIn, the values array must be non-empty.
If the operator is Exists or DoesNotExist, the values
array must be empty. This array is replaced during a strategic
merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value} pairs. A single
{key,value} in the matchLabels map is equivalent to an element
of matchExpressions, whose key field is "key", the operator
is "In", and the values array contains only "value". The requirements
are ANDed.
type: object
type: object
baseImage:
description: 'Base image that is used to deploy pods, without tag.
Deprecated: use ''image'' instead'
@ -653,6 +743,15 @@ spec:
in cluster. Needs to be provided for non RFC1918 [1] (public) addresses.
[1] RFC1918: https://tools.ietf.org/html/rfc1918'
type: string
clusterGossipInterval:
description: Interval between gossip attempts.
type: string
clusterPeerTimeout:
description: Timeout for cluster peering.
type: string
clusterPushpullInterval:
description: Interval between pushpull attempts.
type: string
configMaps:
description: ConfigMaps is a list of ConfigMaps in the same namespace
as the Alertmanager object, which shall be mounted into the Alertmanager
@ -667,9 +766,14 @@ spec:
The secret is mounted into /etc/alertmanager/config.
type: string
containers:
description: Containers allows injecting additional containers. This
description: 'Containers allows injecting additional containers. This
is meant to allow adding an authentication proxy to an Alertmanager
pod.
pod. Containers described here modify an operator generated container
if they share the same name and modifications are done via a strategic
merge patch. The current container names are: `alertmanager` and
`config-reloader`. Overriding containers is entirely outside the
scope of what the maintainers will support and by doing so, you
accept that this behaviour may break at any time without notice.'
items:
description: A single application container that you want to run
within a pod.
@ -771,9 +875,13 @@ spec:
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
@ -1208,6 +1316,7 @@ spec:
be referred to by services.
type: string
protocol:
default: TCP
description: Protocol for port. Must be UDP, TCP, or SCTP.
Defaults to "TCP".
type: string
@ -1215,6 +1324,10 @@ spec:
- containerPort
type: object
type: array
x-kubernetes-list-map-keys:
- containerPort
- protocol
x-kubernetes-list-type: map
readinessProbe:
description: 'Periodic probe of container service readiness.
Container will be removed from service endpoints if the probe
@ -1338,13 +1451,21 @@ spec:
properties:
limits:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
requests:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified,
@ -1854,9 +1975,13 @@ spec:
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
@ -2291,6 +2416,7 @@ spec:
be referred to by services.
type: string
protocol:
default: TCP
description: Protocol for port. Must be UDP, TCP, or SCTP.
Defaults to "TCP".
type: string
@ -2298,6 +2424,10 @@ spec:
- containerPort
type: object
type: array
x-kubernetes-list-map-keys:
- containerPort
- protocol
x-kubernetes-list-type: map
readinessProbe:
description: 'Periodic probe of container service readiness.
Container will be removed from service endpoints if the probe
@ -2421,13 +2551,21 @@ spec:
properties:
limits:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
requests:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified,
@ -2812,7 +2950,7 @@ spec:
description: Define which Nodes the Pods are scheduled on.
type: object
paused:
description: If set to true all actions on the underlaying managed
description: If set to true all actions on the underlying managed
objects are not goint to be performed, except for delete actions.
type: boolean
podMetadata:
@ -2861,13 +2999,21 @@ spec:
properties:
limits:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute resources
allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
requests:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise
@ -3048,6 +3194,9 @@ spec:
More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir'
type: string
sizeLimit:
anyOf:
- type: integer
- type: string
description: 'Total amount of local storage required for this
EmptyDir volume. The size limit is also applicable for memory
medium. The maximum usage on memory medium EmptyDir would
@ -3055,7 +3204,8 @@ spec:
and the sum of memory limits of all containers in a pod.
The default is nil which means that the limit is undefined.
More info: http://kubernetes.io/docs/user-guide/volumes#emptydir'
type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
volumeClaimTemplate:
description: A PVC spec to be used by the Prometheus StatefulSets.
@ -3151,13 +3301,21 @@ spec:
properties:
limits:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount
of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
requests:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount
of compute resources required. If Requests is omitted
for a container, it defaults to Limits if that is
@ -3237,7 +3395,11 @@ spec:
type: array
capacity:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: Represents the actual resources of the underlying
volume.
type: object
@ -3332,6 +3494,100 @@ spec:
type: string
type: object
type: array
topologySpreadConstraints:
description: If specified, the pod's topology spread constraints.
items:
description: TopologySpreadConstraint specifies how to spread matching
pods among the given topology.
properties:
labelSelector:
description: LabelSelector is used to find matching pods. Pods
that match this label selector are counted to determine the
number of pods in their corresponding topology domain.
properties:
matchExpressions:
description: matchExpressions is a list of label selector
requirements. The requirements are ANDed.
items:
description: A label selector requirement is a selector
that contains values, a key, and an operator that relates
the key and values.
properties:
key:
description: key is the label key that the selector
applies to.
type: string
operator:
description: operator represents a key's relationship
to a set of values. Valid operators are In, NotIn,
Exists and DoesNotExist.
type: string
values:
description: values is an array of string values.
If the operator is In or NotIn, the values array
must be non-empty. If the operator is Exists or
DoesNotExist, the values array must be empty. This
array is replaced during a strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value} pairs.
A single {key,value} in the matchLabels map is equivalent
to an element of matchExpressions, whose key field is
"key", the operator is "In", and the values array contains
only "value". The requirements are ANDed.
type: object
type: object
maxSkew:
description: 'MaxSkew describes the degree to which pods may
be unevenly distributed. It''s the maximum permitted difference
between the number of matching pods in any two topology domains
of a given topology type. For example, in a 3-zone cluster,
MaxSkew is set to 1, and pods with the same labelSelector
spread as 1/1/0: | zone1 | zone2 | zone3 | | P | P | |
- if MaxSkew is 1, incoming pod can only be scheduled to zone3
to become 1/1/1; scheduling it onto zone1(zone2) would make
the ActualSkew(2-0) on zone1(zone2) violate MaxSkew(1). -
if MaxSkew is 2, incoming pod can be scheduled onto any zone.
It''s a required field. Default value is 1 and 0 is not allowed.'
format: int32
type: integer
topologyKey:
description: TopologyKey is the key of node labels. Nodes that
have a label with this key and identical values are considered
to be in the same topology. We consider each <key, value>
as a "bucket", and try to put balanced number of pods into
each bucket. It's a required field.
type: string
whenUnsatisfiable:
description: 'WhenUnsatisfiable indicates how to deal with a
pod if it doesn''t satisfy the spread constraint. - DoNotSchedule
(default) tells the scheduler not to schedule it - ScheduleAnyway
tells the scheduler to still schedule it It''s considered
as "Unsatisfiable" if and only if placing incoming pod on
any topology violates "MaxSkew". For example, in a 3-zone
cluster, MaxSkew is set to 1, and pods with the same labelSelector
spread as 3/1/1: | zone1 | zone2 | zone3 | | P P P | P | P |
If WhenUnsatisfiable is set to DoNotSchedule, incoming pod
can only be scheduled to zone2(zone3) to become 3/2/1(3/1/2)
as ActualSkew(2-1) on zone2(zone3) satisfies MaxSkew(1). In
other words, the cluster can still be imbalanced, but scheduler
won''t make it *more* imbalanced. It''s a required field.'
type: string
required:
- maxSkew
- topologyKey
- whenUnsatisfiable
type: object
type: array
version:
description: Version the cluster should be on.
type: string
@ -3704,9 +3960,13 @@ spec:
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
@ -3729,6 +3989,9 @@ spec:
More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir'
type: string
sizeLimit:
anyOf:
- type: integer
- type: string
description: 'Total amount of local storage required for
this EmptyDir volume. The size limit is also applicable
for memory medium. The maximum usage on memory medium
@ -3736,7 +3999,8 @@ spec:
specified here and the sum of memory limits of all containers
in a pod. The default is nil which means that the limit
is undefined. More info: http://kubernetes.io/docs/user-guide/volumes#emptydir'
type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
fc:
description: FC represents a Fibre Channel resource that is
@ -4199,10 +4463,14 @@ spec:
for volumes, optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format
of the exposed resources, defaults
to "1"
type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to
select'
@ -4587,7 +4855,7 @@ spec:
format: int32
type: integer
paused:
description: Represents whether any actions on the underlaying managed
description: Represents whether any actions on the underlying managed
objects are being performed. Only delete actions will be performed.
type: boolean
replicas:

View File

@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.2.4
controller-gen.kubebuilder.io/version: v0.4.1
creationTimestamp: null
name: podmonitors.monitoring.coreos.com
spec:
@ -58,6 +58,69 @@ spec:
description: PodMetricsEndpoint defines a scrapeable endpoint of
a Kubernetes Pod serving Prometheus metrics.
properties:
basicAuth:
description: 'BasicAuth allow an endpoint to authenticate over
basic authentication. More info: https://prometheus.io/docs/operating/configuration/#endpoint'
properties:
password:
description: The secret in the service monitor namespace
that contains the password for authentication.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
username:
description: The secret in the service monitor namespace
that contains the username for authentication.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
type: object
bearerTokenSecret:
description: Secret to mount to read bearer token for scraping
targets. The secret needs to be in the same namespace as the
pod monitor and accessible by the Prometheus Operator.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
honorLabels:
description: HonorLabels chooses the metric's labels on collisions
with target labels.
@ -191,6 +254,121 @@ spec:
- type: string
description: 'Deprecated: Use ''port'' instead.'
x-kubernetes-int-or-string: true
tlsConfig:
description: TLS configuration to use when scraping the endpoint.
properties:
ca:
description: Struct containing the CA cert to use for the
targets.
properties:
configMap:
description: ConfigMap containing data to use for the
targets.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
secret:
description: Secret containing data to use for the targets.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
cert:
description: Struct containing the client cert file for
the targets.
properties:
configMap:
description: ConfigMap containing data to use for the
targets.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
secret:
description: Secret containing data to use for the targets.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
insecureSkipVerify:
description: Disable target certificate validation.
type: boolean
keySecret:
description: Secret containing the client key file for the
targets.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
serverName:
description: Used to verify the hostname for the targets.
type: string
type: object
type: object
type: array
podTargetLabels:
@ -248,6 +426,11 @@ spec:
are ANDed.
type: object
type: object
targetLimit:
description: TargetLimit defines a limit on the number of scraped
targets that will be accepted.
format: int64
type: integer
required:
- podMetricsEndpoints
- selector

View File

@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.2.4
controller-gen.kubebuilder.io/version: v0.4.1
creationTimestamp: null
name: probes.monitoring.coreos.com
spec:

View File

@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.2.4
controller-gen.kubebuilder.io/version: v0.4.1
creationTimestamp: null
name: prometheuses.monitoring.coreos.com
spec:
@ -765,7 +765,7 @@ spec:
description: TLS Config to use for alertmanager connection.
properties:
ca:
description: Stuct containing the CA cert to use for
description: Struct containing the CA cert to use for
the targets.
properties:
configMap:
@ -972,7 +972,8 @@ spec:
description: TLS Config to use for accessing apiserver.
properties:
ca:
description: Stuct containing the CA cert to use for the targets.
description: Struct containing the CA cert to use for the
targets.
properties:
configMap:
description: ConfigMap containing data to use for the
@ -1123,11 +1124,10 @@ spec:
the behavior of an operator generated container. Containers described
here modify an operator generated container if they share the same
name and modifications are done via a strategic merge patch. The
current container names are: `prometheus`, `prometheus-config-reloader`,
`rules-configmap-reloader`, and `thanos-sidecar`. Overriding containers
is entirely outside the scope of what the maintainers will support
and by doing so, you accept that this behaviour may break at any
time without notice.'
current container names are: `prometheus`, `config-reloader`, and
`thanos-sidecar`. Overriding containers is entirely outside the
scope of what the maintainers will support and by doing so, you
accept that this behaviour may break at any time without notice.'
items:
description: A single application container that you want to run
within a pod.
@ -1229,9 +1229,13 @@ spec:
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
@ -1666,6 +1670,7 @@ spec:
be referred to by services.
type: string
protocol:
default: TCP
description: Protocol for port. Must be UDP, TCP, or SCTP.
Defaults to "TCP".
type: string
@ -1673,6 +1678,10 @@ spec:
- containerPort
type: object
type: array
x-kubernetes-list-map-keys:
- containerPort
- protocol
x-kubernetes-list-type: map
readinessProbe:
description: 'Periodic probe of container service readiness.
Container will be removed from service endpoints if the probe
@ -1796,13 +1805,21 @@ spec:
properties:
limits:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
requests:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified,
@ -2196,6 +2213,15 @@ spec:
value will be taken instead.
format: int64
type: integer
enforcedTargetLimit:
description: EnforcedTargetLimit defines a global limit on the number
of scraped targets. This overrides any TargetLimit set per ServiceMonitor
or/and PodMonitor. It is meant to be used by admins to enforce the
TargetLimit to keep overall number of targets under the desired
limit. Note that if TargetLimit is higher that value will be taken
instead.
format: int64
type: integer
evaluationInterval:
description: Interval between consecutive evaluations.
type: string
@ -2347,9 +2373,13 @@ spec:
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
@ -2784,6 +2814,7 @@ spec:
be referred to by services.
type: string
protocol:
default: TCP
description: Protocol for port. Must be UDP, TCP, or SCTP.
Defaults to "TCP".
type: string
@ -2791,6 +2822,10 @@ spec:
- containerPort
type: object
type: array
x-kubernetes-list-map-keys:
- containerPort
- protocol
x-kubernetes-list-type: map
readinessProbe:
description: 'Periodic probe of container service readiness.
Container will be removed from service endpoints if the probe
@ -2914,13 +2949,21 @@ spec:
properties:
limits:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
requests:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified,
@ -3670,7 +3713,7 @@ spec:
description: TLS Config to use for remote read.
properties:
ca:
description: Stuct containing the CA cert to use for the
description: Struct containing the CA cert to use for the
targets.
properties:
configMap:
@ -3907,7 +3950,7 @@ spec:
description: TLS Config to use for remote write.
properties:
ca:
description: Stuct containing the CA cert to use for the
description: Struct containing the CA cert to use for the
targets.
properties:
configMap:
@ -4088,7 +4131,9 @@ spec:
will _not_ be added when value is set to empty string (`""`).
type: string
replicas:
description: Number of instances to deploy for a Prometheus deployment.
description: Number of replicas of each shard to deploy for a Prometheus
deployment. Number of replicas multiplied by shards is the total
number of Pods created.
format: int32
type: integer
resources:
@ -4096,13 +4141,21 @@ spec:
properties:
limits:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute resources
allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
requests:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise
@ -4479,6 +4532,17 @@ spec:
if SHA is set. Deprecated: use ''image'' instead. The image digest
can be specified as part of the image URL.'
type: string
shards:
description: 'EXPERIMENTAL: Number of shards to distribute targets
onto. Number of replicas multiplied by shards is the total number
of Pods created. Note that scaling down shards will not reshard
data onto remaining instances, it must be manually moved. Increasing
shards will not reshard data either but it will continue to be available
from the same instances. To query globally use Thanos sidecar and
Thanos querier or remote write data to a central location. Sharding
is done on the content of the `__address__` target meta-label.'
format: int32
type: integer
storage:
description: Storage spec to specify how storage shall be used.
properties:
@ -4499,6 +4563,9 @@ spec:
More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir'
type: string
sizeLimit:
anyOf:
- type: integer
- type: string
description: 'Total amount of local storage required for this
EmptyDir volume. The size limit is also applicable for memory
medium. The maximum usage on memory medium EmptyDir would
@ -4506,7 +4573,8 @@ spec:
and the sum of memory limits of all containers in a pod.
The default is nil which means that the limit is undefined.
More info: http://kubernetes.io/docs/user-guide/volumes#emptydir'
type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
volumeClaimTemplate:
description: A PVC spec to be used by the Prometheus StatefulSets.
@ -4602,13 +4670,21 @@ spec:
properties:
limits:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount
of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
requests:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount
of compute resources required. If Requests is omitted
for a container, it defaults to Limits if that is
@ -4688,7 +4764,11 @@ spec:
type: array
capacity:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: Represents the actual resources of the underlying
volume.
type: object
@ -4761,7 +4841,8 @@ spec:
Maps to the ''--grpc-server-tls-*'' CLI args.'
properties:
ca:
description: Stuct containing the CA cert to use for the targets.
description: Struct containing the CA cert to use for the
targets.
properties:
configMap:
description: ConfigMap containing data to use for the
@ -4907,7 +4988,8 @@ spec:
type: string
objectStorageConfig:
description: ObjectStorageConfig configures object storage in
Thanos.
Thanos. Alternative to ObjectStorageConfigFile, and lower order
priority.
properties:
key:
description: The key of the secret to select from. Must be
@ -4924,6 +5006,11 @@ spec:
required:
- key
type: object
objectStorageConfigFile:
description: ObjectStorageConfigFile specifies the path of the
object storage configuration file. When used alongside with
ObjectStorageConfig, ObjectStorageConfigFile takes precedence.
type: string
resources:
description: Resources defines the resource requirements for the
Thanos sidecar. If not provided, no requests/limits will be
@ -4931,13 +5018,21 @@ spec:
properties:
limits:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
requests:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise
@ -5021,6 +5116,100 @@ spec:
type: string
type: object
type: array
topologySpreadConstraints:
description: If specified, the pod's topology spread constraints.
items:
description: TopologySpreadConstraint specifies how to spread matching
pods among the given topology.
properties:
labelSelector:
description: LabelSelector is used to find matching pods. Pods
that match this label selector are counted to determine the
number of pods in their corresponding topology domain.
properties:
matchExpressions:
description: matchExpressions is a list of label selector
requirements. The requirements are ANDed.
items:
description: A label selector requirement is a selector
that contains values, a key, and an operator that relates
the key and values.
properties:
key:
description: key is the label key that the selector
applies to.
type: string
operator:
description: operator represents a key's relationship
to a set of values. Valid operators are In, NotIn,
Exists and DoesNotExist.
type: string
values:
description: values is an array of string values.
If the operator is In or NotIn, the values array
must be non-empty. If the operator is Exists or
DoesNotExist, the values array must be empty. This
array is replaced during a strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value} pairs.
A single {key,value} in the matchLabels map is equivalent
to an element of matchExpressions, whose key field is
"key", the operator is "In", and the values array contains
only "value". The requirements are ANDed.
type: object
type: object
maxSkew:
description: 'MaxSkew describes the degree to which pods may
be unevenly distributed. It''s the maximum permitted difference
between the number of matching pods in any two topology domains
of a given topology type. For example, in a 3-zone cluster,
MaxSkew is set to 1, and pods with the same labelSelector
spread as 1/1/0: | zone1 | zone2 | zone3 | | P | P | |
- if MaxSkew is 1, incoming pod can only be scheduled to zone3
to become 1/1/1; scheduling it onto zone1(zone2) would make
the ActualSkew(2-0) on zone1(zone2) violate MaxSkew(1). -
if MaxSkew is 2, incoming pod can be scheduled onto any zone.
It''s a required field. Default value is 1 and 0 is not allowed.'
format: int32
type: integer
topologyKey:
description: TopologyKey is the key of node labels. Nodes that
have a label with this key and identical values are considered
to be in the same topology. We consider each <key, value>
as a "bucket", and try to put balanced number of pods into
each bucket. It's a required field.
type: string
whenUnsatisfiable:
description: 'WhenUnsatisfiable indicates how to deal with a
pod if it doesn''t satisfy the spread constraint. - DoNotSchedule
(default) tells the scheduler not to schedule it - ScheduleAnyway
tells the scheduler to still schedule it It''s considered
as "Unsatisfiable" if and only if placing incoming pod on
any topology violates "MaxSkew". For example, in a 3-zone
cluster, MaxSkew is set to 1, and pods with the same labelSelector
spread as 3/1/1: | zone1 | zone2 | zone3 | | P P P | P | P |
If WhenUnsatisfiable is set to DoNotSchedule, incoming pod
can only be scheduled to zone2(zone3) to become 3/2/1(3/1/2)
as ActualSkew(2-1) on zone2(zone3) satisfies MaxSkew(1). In
other words, the cluster can still be imbalanced, but scheduler
won''t make it *more* imbalanced. It''s a required field.'
type: string
required:
- maxSkew
- topologyKey
- whenUnsatisfiable
type: object
type: array
version:
description: Version of Prometheus to be deployed.
type: string
@ -5393,9 +5582,13 @@ spec:
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
@ -5418,6 +5611,9 @@ spec:
More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir'
type: string
sizeLimit:
anyOf:
- type: integer
- type: string
description: 'Total amount of local storage required for
this EmptyDir volume. The size limit is also applicable
for memory medium. The maximum usage on memory medium
@ -5425,7 +5621,8 @@ spec:
specified here and the sum of memory limits of all containers
in a pod. The default is nil which means that the limit
is undefined. More info: http://kubernetes.io/docs/user-guide/volumes#emptydir'
type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
fc:
description: FC represents a Fibre Channel resource that is
@ -5888,10 +6085,14 @@ spec:
for volumes, optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format
of the exposed resources, defaults
to "1"
type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to
select'
@ -6268,6 +6469,14 @@ spec:
description: Enable compression of the write-ahead log using Snappy.
This flag is only available in versions of Prometheus >= 2.11.0.
type: boolean
web:
description: WebSpec defines the web command line flags when starting
Prometheus.
properties:
pageTitle:
description: The prometheus web page title
type: string
type: object
type: object
status:
description: 'Most recent observed status of the Prometheus cluster. Read-only.
@ -6280,7 +6489,7 @@ spec:
format: int32
type: integer
paused:
description: Represents whether any actions on the underlaying managed
description: Represents whether any actions on the underlying managed
objects are being performed. Only delete actions will be performed.
type: boolean
replicas:

View File

@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.2.4
controller-gen.kubebuilder.io/version: v0.4.1
creationTimestamp: null
name: prometheusrules.monitoring.coreos.com
spec:
@ -17,7 +17,8 @@ spec:
- name: v1
schema:
openAPIV3Schema:
description: PrometheusRule defines alerting rules for a Prometheus instance
description: PrometheusRule defines recording and alerting rules for a Prometheus
instance
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation

View File

@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.2.4
controller-gen.kubebuilder.io/version: v0.4.1
creationTimestamp: null
name: servicemonitors.monitoring.coreos.com
spec:
@ -246,7 +246,7 @@ spec:
description: TLS configuration to use when scraping the endpoint
properties:
ca:
description: Stuct containing the CA cert to use for the
description: Struct containing the CA cert to use for the
targets.
properties:
configMap:
@ -449,6 +449,11 @@ spec:
items:
type: string
type: array
targetLimit:
description: TargetLimit defines a limit on the number of scraped
targets that will be accepted.
format: int64
type: integer
required:
- endpoints
- selector

View File

@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.2.4
controller-gen.kubebuilder.io/version: v0.4.1
creationTimestamp: null
name: thanosrulers.monitoring.coreos.com
spec:
@ -672,7 +672,7 @@ spec:
the behavior of an operator generated container. Containers described
here modify an operator generated container if they share the same
name and modifications are done via a strategic merge patch. The
current container names are: `thanos-ruler` and `rules-configmap-reloader`.
current container names are: `thanos-ruler` and `config-reloader`.
Overriding containers is entirely outside the scope of what the
maintainers will support and by doing so, you accept that this behaviour
may break at any time without notice.'
@ -777,9 +777,13 @@ spec:
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
@ -1214,6 +1218,7 @@ spec:
be referred to by services.
type: string
protocol:
default: TCP
description: Protocol for port. Must be UDP, TCP, or SCTP.
Defaults to "TCP".
type: string
@ -1221,6 +1226,10 @@ spec:
- containerPort
type: object
type: array
x-kubernetes-list-map-keys:
- containerPort
- protocol
x-kubernetes-list-type: map
readinessProbe:
description: 'Periodic probe of container service readiness.
Container will be removed from service endpoints if the probe
@ -1344,13 +1353,21 @@ spec:
properties:
limits:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
requests:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified,
@ -1738,7 +1755,7 @@ spec:
the ''--grpc-server-tls-*'' CLI args.'
properties:
ca:
description: Stuct containing the CA cert to use for the targets.
description: Struct containing the CA cert to use for the targets.
properties:
configMap:
description: ConfigMap containing data to use for the targets.
@ -1979,9 +1996,13 @@ spec:
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
@ -2416,6 +2437,7 @@ spec:
be referred to by services.
type: string
protocol:
default: TCP
description: Protocol for port. Must be UDP, TCP, or SCTP.
Defaults to "TCP".
type: string
@ -2423,6 +2445,10 @@ spec:
- containerPort
type: object
type: array
x-kubernetes-list-map-keys:
- containerPort
- protocol
x-kubernetes-list-type: map
readinessProbe:
description: 'Periodic probe of container service readiness.
Container will be removed from service endpoints if the probe
@ -2546,13 +2572,21 @@ spec:
properties:
limits:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
requests:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified,
@ -2944,6 +2978,7 @@ spec:
type: object
objectStorageConfig:
description: ObjectStorageConfig configures object storage in Thanos.
Alternative to ObjectStorageConfigFile, and lower order priority.
properties:
key:
description: The key of the secret to select from. Must be a
@ -2959,6 +2994,11 @@ spec:
required:
- key
type: object
objectStorageConfigFile:
description: ObjectStorageConfigFile specifies the path of the object
storage configuration file. When used alongside with ObjectStorageConfig,
ObjectStorageConfigFile takes precedence.
type: string
paused:
description: When a ThanosRuler deployment is paused, no actions except
for deletion will be performed on the underlying objects.
@ -3055,13 +3095,21 @@ spec:
properties:
limits:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute resources
allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
requests:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise
@ -3314,6 +3362,9 @@ spec:
More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir'
type: string
sizeLimit:
anyOf:
- type: integer
- type: string
description: 'Total amount of local storage required for this
EmptyDir volume. The size limit is also applicable for memory
medium. The maximum usage on memory medium EmptyDir would
@ -3321,7 +3372,8 @@ spec:
and the sum of memory limits of all containers in a pod.
The default is nil which means that the limit is undefined.
More info: http://kubernetes.io/docs/user-guide/volumes#emptydir'
type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
volumeClaimTemplate:
description: A PVC spec to be used by the Prometheus StatefulSets.
@ -3417,13 +3469,21 @@ spec:
properties:
limits:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount
of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
requests:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount
of compute resources required. If Requests is omitted
for a container, it defaults to Limits if that is
@ -3503,7 +3563,11 @@ spec:
type: array
capacity:
additionalProperties:
type: string
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: Represents the actual resources of the underlying
volume.
type: object
@ -3592,6 +3656,100 @@ spec:
type: string
type: object
type: array
topologySpreadConstraints:
description: If specified, the pod's topology spread constraints.
items:
description: TopologySpreadConstraint specifies how to spread matching
pods among the given topology.
properties:
labelSelector:
description: LabelSelector is used to find matching pods. Pods
that match this label selector are counted to determine the
number of pods in their corresponding topology domain.
properties:
matchExpressions:
description: matchExpressions is a list of label selector
requirements. The requirements are ANDed.
items:
description: A label selector requirement is a selector
that contains values, a key, and an operator that relates
the key and values.
properties:
key:
description: key is the label key that the selector
applies to.
type: string
operator:
description: operator represents a key's relationship
to a set of values. Valid operators are In, NotIn,
Exists and DoesNotExist.
type: string
values:
description: values is an array of string values.
If the operator is In or NotIn, the values array
must be non-empty. If the operator is Exists or
DoesNotExist, the values array must be empty. This
array is replaced during a strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value} pairs.
A single {key,value} in the matchLabels map is equivalent
to an element of matchExpressions, whose key field is
"key", the operator is "In", and the values array contains
only "value". The requirements are ANDed.
type: object
type: object
maxSkew:
description: 'MaxSkew describes the degree to which pods may
be unevenly distributed. It''s the maximum permitted difference
between the number of matching pods in any two topology domains
of a given topology type. For example, in a 3-zone cluster,
MaxSkew is set to 1, and pods with the same labelSelector
spread as 1/1/0: | zone1 | zone2 | zone3 | | P | P | |
- if MaxSkew is 1, incoming pod can only be scheduled to zone3
to become 1/1/1; scheduling it onto zone1(zone2) would make
the ActualSkew(2-0) on zone1(zone2) violate MaxSkew(1). -
if MaxSkew is 2, incoming pod can be scheduled onto any zone.
It''s a required field. Default value is 1 and 0 is not allowed.'
format: int32
type: integer
topologyKey:
description: TopologyKey is the key of node labels. Nodes that
have a label with this key and identical values are considered
to be in the same topology. We consider each <key, value>
as a "bucket", and try to put balanced number of pods into
each bucket. It's a required field.
type: string
whenUnsatisfiable:
description: 'WhenUnsatisfiable indicates how to deal with a
pod if it doesn''t satisfy the spread constraint. - DoNotSchedule
(default) tells the scheduler not to schedule it - ScheduleAnyway
tells the scheduler to still schedule it It''s considered
as "Unsatisfiable" if and only if placing incoming pod on
any topology violates "MaxSkew". For example, in a 3-zone
cluster, MaxSkew is set to 1, and pods with the same labelSelector
spread as 3/1/1: | zone1 | zone2 | zone3 | | P P P | P | P |
If WhenUnsatisfiable is set to DoNotSchedule, incoming pod
can only be scheduled to zone2(zone3) to become 3/2/1(3/1/2)
as ActualSkew(2-1) on zone2(zone3) satisfies MaxSkew(1). In
other words, the cluster can still be imbalanced, but scheduler
won''t make it *more* imbalanced. It''s a required field.'
type: string
required:
- maxSkew
- topologyKey
- whenUnsatisfiable
type: object
type: array
tracingConfig:
description: TracingConfig configures tracing in Thanos. This is an
experimental feature, it may change in any upcoming release in a
@ -3938,9 +4096,13 @@ spec:
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the
exposed resources, defaults to "1"
type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
@ -3963,6 +4125,9 @@ spec:
More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir'
type: string
sizeLimit:
anyOf:
- type: integer
- type: string
description: 'Total amount of local storage required for
this EmptyDir volume. The size limit is also applicable
for memory medium. The maximum usage on memory medium
@ -3970,7 +4135,8 @@ spec:
specified here and the sum of memory limits of all containers
in a pod. The default is nil which means that the limit
is undefined. More info: http://kubernetes.io/docs/user-guide/volumes#emptydir'
type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
fc:
description: FC represents a Fibre Channel resource that is
@ -4433,10 +4599,14 @@ spec:
for volumes, optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format
of the exposed resources, defaults
to "1"
type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to
select'

View File

@ -4,7 +4,7 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.42.1
app.kubernetes.io/version: v0.44.1
name: prometheus-operator
rules:
- apiGroups:
@ -12,6 +12,7 @@ rules:
resources:
- alertmanagers
- alertmanagers/finalizers
- alertmanagerconfigs
- prometheuses
- prometheuses/finalizers
- thanosrulers
@ -68,6 +69,14 @@ rules:
- get
- list
- watch
- apiGroups:
- networking.k8s.io
resources:
- ingresses
verbs:
- get
- list
- watch
- apiGroups:
- authentication.k8s.io
resources:

View File

@ -4,7 +4,7 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.42.1
app.kubernetes.io/version: v0.44.1
name: prometheus-operator
roleRef:
apiGroup: rbac.authorization.k8s.io

View File

@ -4,7 +4,7 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.42.1
app.kubernetes.io/version: v0.44.1
name: prometheus-operator
namespace: monitoring
spec:
@ -18,15 +18,13 @@ spec:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.42.1
app.kubernetes.io/version: v0.44.1
spec:
containers:
- args:
- --kubelet-service=kube-system/kubelet
- --logtostderr=true
- --config-reloader-image=jimmidyson/configmap-reload:v0.4.0
- --prometheus-config-reloader=quay.io/prometheus-operator/prometheus-config-reloader:v0.42.1
image: quay.io/prometheus-operator/prometheus-operator:v0.42.1
- --prometheus-config-reloader=quay.io/prometheus-operator/prometheus-config-reloader:v0.44.1
image: quay.io/prometheus-operator/prometheus-operator:v0.44.1
name: prometheus-operator
ports:
- containerPort: 8080
@ -45,13 +43,15 @@ spec:
- --secure-listen-address=:8443
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
- --upstream=http://127.0.0.1:8080/
image: quay.io/brancz/kube-rbac-proxy:v0.6.0
image: quay.io/brancz/kube-rbac-proxy:v0.8.0
name: kube-rbac-proxy
ports:
- containerPort: 8443
name: https
securityContext:
runAsUser: 65534
runAsGroup: 65532
runAsNonRoot: true
runAsUser: 65532
nodeSelector:
beta.kubernetes.io/os: linux
securityContext:

View File

@ -4,7 +4,7 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.42.1
app.kubernetes.io/version: v0.44.1
name: prometheus-operator
namespace: monitoring
spec:

View File

@ -4,6 +4,6 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.42.1
app.kubernetes.io/version: v0.44.1
name: prometheus-operator
namespace: monitoring

1
monitoring/vendor/alertmanager vendored Symbolic link
View File

@ -0,0 +1 @@
github.com/prometheus/alertmanager/doc/alertmanager-mixin

View File

@ -1,11 +1,9 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
{
_config+:: {
namespace: 'default',
versions+:: {
grafana: '6.6.0',
grafana: '7.3.4',
},
imageRepos+:: {
@ -30,12 +28,14 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
version: 1,
editable: false,
}],
// Forces pod restarts when dashboards are changed
dashboardsChecksum: false,
config: {},
ldap: null,
plugins: [],
env: [],
port: 3000,
container: {
resources: {
requests: { cpu: '100m', memory: '100Mi' },
limits: { cpu: '200m', memory: '200Mi' },
},
@ -45,36 +45,65 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
grafanaDashboards: {},
grafana+: {
[if std.length($._config.grafana.config) > 0 then 'config']:
local secret = k.core.v1.secret;
local grafanaConfig = { 'grafana.ini': std.base64(std.encodeUTF8(std.manifestIni($._config.grafana.config))) } +
if $._config.grafana.ldap != null then { 'ldap.toml': std.base64(std.encodeUTF8($._config.grafana.ldap)) } else {};
secret.new('grafana-config', grafanaConfig) +
secret.mixin.metadata.withNamespace($._config.namespace),
{
apiVersion: 'v1',
kind: 'Secret',
metadata: {
name: 'grafana-config',
namespace: $._config.namespace,
},
type: 'Opaque',
data: {
'grafana.ini': std.base64(std.encodeUTF8(std.manifestIni($._config.grafana.config))),
} +
if $._config.grafana.ldap != null then { 'ldap.toml': std.base64(std.encodeUTF8($._config.grafana.ldap)) } else {},
},
dashboardDefinitions:
local configMap = k.core.v1.configMap;
[
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', '');
configMap.new(dashboardName, { [name]: std.manifestJsonEx($._config.grafana.dashboards[name], ' ') }) +
configMap.mixin.metadata.withNamespace($._config.namespace)
{
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
apiVersion: 'v1',
kind: 'ConfigMap',
metadata: {
name: dashboardName,
namespace: $._config.namespace,
},
data: { [name]: std.manifestJsonEx($._config.grafana.dashboards[name], ' ') },
}
for name in std.objectFields($._config.grafana.dashboards)
] + [
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', '');
configMap.new(dashboardName, { [name]: std.manifestJsonEx($._config.grafana.folderDashboards[folder][name], ' ') }) +
configMap.mixin.metadata.withNamespace($._config.namespace)
{
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
apiVersion: 'v1',
kind: 'ConfigMap',
metadata: {
name: dashboardName,
namespace: $._config.namespace,
},
data: { [name]: std.manifestJsonEx($._config.grafana.folderDashboards[folder][name], ' ') },
}
for folder in std.objectFields($._config.grafana.folderDashboards)
for name in std.objectFields($._config.grafana.folderDashboards[folder])
] + if std.length($._config.grafana.rawDashboards) > 0 then
] + (
if std.length($._config.grafana.rawDashboards) > 0 then
[
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', '');
configMap.new(dashboardName, { [name]: $._config.grafana.rawDashboards[name] }) +
configMap.mixin.metadata.withNamespace($._config.namespace)
{
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
apiVersion: 'v1',
kind: 'ConfigMap',
metadata: {
name: dashboardName,
namespace: $._config.namespace,
},
data: { [name]: $._config.grafana.rawDashboards[name] },
}
for name in std.objectFields($._config.grafana.rawDashboards)
] else [],
]
else
[]
),
dashboardSources:
local configMap = k.core.v1.configMap;
local dashboardSources = {
apiVersion: 1,
providers:
@ -106,59 +135,80 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
],
};
configMap.new('grafana-dashboards', { 'dashboards.yaml': std.manifestJsonEx(dashboardSources, ' ') }) +
configMap.mixin.metadata.withNamespace($._config.namespace),
{
kind: 'ConfigMap',
apiVersion: 'v1',
metadata: {
name: 'grafana-dashboards',
namespace: $._config.namespace,
},
data: { 'dashboards.yaml': std.manifestJsonEx(dashboardSources, ' ') },
},
dashboardDatasources:
local secret = k.core.v1.secret;
secret.new('grafana-datasources', { 'datasources.yaml': std.base64(std.encodeUTF8(std.manifestJsonEx({
{
apiVersion: 'v1',
kind: 'Secret',
metadata: {
name: 'grafana-datasources',
namespace: $._config.namespace,
},
type: 'Opaque',
data: { 'datasources.yaml': std.base64(std.encodeUTF8(std.manifestJsonEx({
apiVersion: 1,
datasources: $._config.grafana.datasources,
}, ' '))) }) +
secret.mixin.metadata.withNamespace($._config.namespace),
}, ' '))) },
},
service:
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local grafanaServiceNodePort = servicePort.newNamed('http', $._config.grafana.port, 'http');
service.new('grafana', $.grafana.deployment.spec.selector.matchLabels, grafanaServiceNodePort) +
service.mixin.metadata.withLabels({ app: 'grafana' }) +
service.mixin.metadata.withNamespace($._config.namespace),
{
apiVersion: 'v1',
kind: 'Service',
metadata: {
name: 'grafana',
namespace: $._config.namespace,
labels: {
app: 'grafana',
},
},
spec: {
selector: $.grafana.deployment.spec.selector.matchLabels,
type: 'NodePort',
ports: [
{ name: 'http', targetPort: 'http', port: 3000 },
],
},
},
serviceAccount:
local serviceAccount = k.core.v1.serviceAccount;
serviceAccount.new('grafana') +
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
{
apiVersion: 'v1',
kind: 'ServiceAccount',
metadata: {
name: 'grafana',
namespace: $._config.namespace,
},
},
deployment:
local deployment = k.apps.v1.deployment;
local container = k.apps.v1.deployment.mixin.spec.template.spec.containersType;
local volume = k.apps.v1.deployment.mixin.spec.template.spec.volumesType;
local containerPort = container.portsType;
local containerVolumeMount = container.volumeMountsType;
local podSelector = deployment.mixin.spec.template.spec.selectorType;
local env = container.envType;
local targetPort = $._config.grafana.port;
local portName = 'http';
local podLabels = { app: 'grafana' };
local configVolumeName = 'grafana-config';
local configSecretName = 'grafana-config';
local configVolume = volume.withName(configVolumeName) + volume.mixin.secret.withSecretName(configSecretName);
local configVolumeMount = containerVolumeMount.new(configVolumeName, '/etc/grafana');
local configVolume = { name: configVolumeName, secret: { secretName: configSecretName } };
local configVolumeMount = { name: configVolumeName, mountPath: '/etc/grafana', readOnly: false };
local storageVolumeName = 'grafana-storage';
local storageVolume = volume.fromEmptyDir(storageVolumeName);
local storageVolumeMount = containerVolumeMount.new(storageVolumeName, '/var/lib/grafana');
local storageVolume = { name: storageVolumeName, emptyDir: {} };
local storageVolumeMount = { name: storageVolumeName, mountPath: '/var/lib/grafana', readOnly: false };
local datasourcesVolumeName = 'grafana-datasources';
local datasourcesSecretName = 'grafana-datasources';
local datasourcesVolume = volume.withName(datasourcesVolumeName) + volume.mixin.secret.withSecretName(datasourcesSecretName);
local datasourcesVolumeMount = containerVolumeMount.new(datasourcesVolumeName, '/etc/grafana/provisioning/datasources');
local datasourcesVolume = { name: datasourcesVolumeName, secret: { secretName: datasourcesSecretName } };
local datasourcesVolumeMount = { name: datasourcesVolumeName, mountPath: '/etc/grafana/provisioning/datasources', readOnly: false };
local dashboardsVolumeName = 'grafana-dashboards';
local dashboardsConfigMapName = 'grafana-dashboards';
local dashboardsVolume = volume.withName(dashboardsVolumeName) + volume.mixin.configMap.withName(dashboardsConfigMapName);
local dashboardsVolumeMount = containerVolumeMount.new(dashboardsVolumeName, '/etc/grafana/provisioning/dashboards');
local dashboardsVolume = { name: dashboardsVolumeName, configMap: { name: dashboardsConfigMapName } };
local dashboardsVolumeMount = { name: dashboardsVolumeName, mountPath: '/etc/grafana/provisioning/dashboards', readOnly: false };
local volumeMounts =
[
@ -167,23 +217,36 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
dashboardsVolumeMount,
] +
[
local dashboardName = std.strReplace(name, '.json', '');
containerVolumeMount.new('grafana-dashboard-' + dashboardName, '/grafana-dashboard-definitions/0/' + dashboardName)
{
local dashboardName = std.strReplace(name, '.json', ''),
name: 'grafana-dashboard-' + dashboardName,
mountPath: '/grafana-dashboard-definitions/0/' + dashboardName,
readOnly: false,
}
for name in std.objectFields($._config.grafana.dashboards)
] +
[
local dashboardName = std.strReplace(name, '.json', '');
containerVolumeMount.new('grafana-dashboard-' + dashboardName, '/grafana-dashboard-definitions/' + folder + '/' + dashboardName)
{
local dashboardName = std.strReplace(name, '.json', ''),
name: 'grafana-dashboard-' + dashboardName,
mountPath: '/grafana-dashboard-definitions/' + folder + '/' + dashboardName,
readOnly: false,
}
for folder in std.objectFields($._config.grafana.folderDashboards)
for name in std.objectFields($._config.grafana.folderDashboards[folder])
] +
[
local dashboardName = std.strReplace(name, '.json', '');
containerVolumeMount.new('grafana-dashboard-' + dashboardName, '/grafana-dashboard-definitions/0/' + dashboardName)
for name in std.objectFields($._config.grafana.rawDashboards)
] +
{
if std.length($._config.grafana.config) > 0 then [configVolumeMount] else [];
local dashboardName = std.strReplace(name, '.json', ''),
name: 'grafana-dashboard-' + dashboardName,
mountPath: '/grafana-dashboard-definitions/0/' + dashboardName,
readOnly: false,
}
for name in std.objectFields($._config.grafana.rawDashboards)
] + (
if std.length($._config.grafana.config) > 0 then [configVolumeMount] else []
);
local volumes =
[
@ -192,52 +255,82 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
dashboardsVolume,
] +
[
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', '');
volume.withName(dashboardName) +
volume.mixin.configMap.withName(dashboardName)
{
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
name: dashboardName,
configMap: { name: dashboardName },
}
for name in std.objectFields($._config.grafana.dashboards)
] +
[
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', '');
volume.withName(dashboardName) +
volume.mixin.configMap.withName(dashboardName)
{
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
name: dashboardName,
configMap: { name: dashboardName },
}
for folder in std.objectFields($._config.grafana.folderDashboards)
for name in std.objectFields($._config.grafana.folderDashboards[folder])
] +
[
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', '');
volume.withName(dashboardName) +
volume.mixin.configMap.withName(dashboardName)
{
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
name: dashboardName,
configMap: { name: dashboardName },
}
for name in std.objectFields($._config.grafana.rawDashboards)
] +
if std.length($._config.grafana.config) > 0 then [configVolume] else [];
local plugins = (if std.length($._config.grafana.plugins) == 0 then [] else [env.new('GF_INSTALL_PLUGINS', std.join(',', $._config.grafana.plugins))]);
local plugins = (
if std.length($._config.grafana.plugins) == 0 then
[]
else
[{ name: 'GF_INSTALL_PLUGINS', value: std.join(',', $._config.grafana.plugins) }]
);
local c = [
container.new('grafana', $._config.imageRepos.grafana + ':' + $._config.versions.grafana) +
container.withEnv($._config.grafana.env + plugins) +
container.withVolumeMounts(volumeMounts) +
container.withPorts(containerPort.newNamed(targetPort, portName)) +
container.mixin.readinessProbe.httpGet.withPath('/api/health') +
container.mixin.readinessProbe.httpGet.withPort(portName) +
container.mixin.resources.withRequests($._config.grafana.container.requests) +
container.mixin.resources.withLimits($._config.grafana.container.limits),
] + $._config.grafana.containers;
local c = [{
name: 'grafana',
image: $._config.imageRepos.grafana + ':' + $._config.versions.grafana,
env: $._config.grafana.env + plugins,
volumeMounts: volumeMounts,
ports: [{ name: portName, containerPort: targetPort }],
readinessProbe: {
httpGet: { path: '/api/health', port: portName },
},
resources: $._config.grafana.resources,
}] + $._config.grafana.containers;
deployment.new('grafana', 1, c, podLabels) +
deployment.mixin.metadata.withNamespace($._config.namespace) +
deployment.mixin.metadata.withLabels(podLabels) +
deployment.mixin.spec.selector.withMatchLabels(podLabels) +
deployment.mixin.spec.template.metadata.withAnnotations({
{
apiVersion: 'apps/v1',
kind: 'Deployment',
metadata: {
name: 'grafana',
namespace: $._config.namespace,
labels: podLabels,
},
spec: {
replicas: 1,
selector: {
matchLabels: podLabels,
},
template: {
metadata: {
labels: podLabels,
annotations: {
[if std.length($._config.grafana.config) > 0 then 'checksum/grafana-config']: std.md5(std.toString($.grafana.config)),
'checksum/grafana-datasources': std.md5(std.toString($.grafana.dashboardDatasources)),
}) +
deployment.mixin.spec.template.spec.withNodeSelector({ 'beta.kubernetes.io/os': 'linux' }) +
deployment.mixin.spec.template.spec.withVolumes(volumes) +
deployment.mixin.spec.template.spec.securityContext.withRunAsNonRoot(true) +
deployment.mixin.spec.template.spec.securityContext.withRunAsUser(65534) +
deployment.mixin.spec.template.spec.securityContext.withFsGroup(65534) +
deployment.mixin.spec.template.spec.withServiceAccountName('grafana'),
[if $._config.grafana.dashboardsChecksum then 'checksum/grafana-dashboards']: std.md5(std.toString($.grafana.dashboardDefinitions)),
},
},
spec: {
containers: c,
volumes: volumes,
serviceAccountName: $.grafana.serviceAccount.metadata.name,
nodeSelector: { 'beta.kubernetes.io/os': 'linux' },
securityContext: { fsGroup: 65534, runAsNonRoot: true, runAsUser: 65534 },
},
},
},
},
},
}

View File

@ -9,16 +9,6 @@
}
},
"version": "master"
},
{
"source": {
"git": {
"remote": "https://github.com/ksonnet/ksonnet-lib.git",
"subdir": ""
}
},
"version": "master",
"name": "ksonnet"
}
],
"legacyImports": false

View File

@ -184,7 +184,7 @@
severity: 'critical',
},
annotations: {
message: 'etcd cluster "{{ $labels.job }}": 99th percentile fync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.',
message: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.',
},
},
{

View File

@ -27,6 +27,7 @@ local timepickerlib = import 'timepicker.libsonnet';
* @method addPanel(panel,gridPos) Appends a panel, with an optional grid position in grid coordinates, e.g. `gridPos={'x':0, 'y':0, 'w':12, 'h': 9}`
* @method addPanels(panels) Appends an array of panels
* @method addLink(link) Adds a [dashboard link](https://grafana.com/docs/grafana/latest/linking/dashboard-links/)
* @method addLinks(dashboardLink) Adds an array of [dashboard links](https://grafana.com/docs/grafana/latest/linking/dashboard-links/)
* @method addRequired(type, name, id, version)
* @method addInput(name, label, type, pluginId, pluginName, description, value)
* @method addRow(row) Adds a row. This is the legacy row concept from Grafana < 5, when rows were needed for layout. Rows should now be added via `addPanel`.
@ -149,6 +150,7 @@ local timepickerlib = import 'timepicker.libsonnet';
addLink(link):: self {
links+: [link],
},
addLinks(dashboardLinks):: std.foldl(function(d, t) d.addLink(t), dashboardLinks, self),
required:: [],
__requires: it.required,
addRequired(type, name, id, version):: self {

View File

@ -42,6 +42,7 @@
* @param links (optional)
* @param tableColumn (default `''`)
* @param maxPerRow (optional)
* @param maxDataPoints (default `100`)
*
* @method addTarget(target) Adds a target object.
*/
@ -100,6 +101,7 @@
links=[],
tableColumn='',
maxPerRow=null,
maxDataPoints=100,
)::
{
[if height != null then 'height']: height,
@ -116,7 +118,7 @@
],
links: links,
[if decimals != null then 'decimals']: decimals,
maxDataPoints: 100,
maxDataPoints: maxDataPoints,
interval: interval,
cacheTimeout: null,
format: format,

View File

@ -367,7 +367,7 @@
},
],
qpsPanel(selector):: {
qpsPanel(selector, statusLabelName='status_code'):: {
aliasColors: {
'1xx': '#EAB839',
'2xx': '#7EB26D',
@ -379,9 +379,13 @@
},
targets: [
{
expr: 'sum by (status) (label_replace(label_replace(rate(' + selector + '[$__interval]),'
+ ' "status", "${1}xx", "status_code", "([0-9]).."),'
+ ' "status", "${1}", "status_code", "([a-z]+)"))',
expr:
|||
sum by (status) (
label_replace(label_replace(rate(%s[$__interval]),
"status", "${1}xx", "%s", "([0-9]).."),
"status", "${1}", "%s", "([a-z]+)"))
||| % [selector, statusLabelName, statusLabelName],
format: 'time_series',
intervalFactor: 2,
legendFormat: '{{status}}',

View File

@ -14,7 +14,8 @@ A set of Grafana dashboards and Prometheus alerts for Kubernetes.
| release-0.3 | v1.17 and before | v2.11.0+ |
| release-0.4 | v1.18 | v2.11.0+ |
| release-0.5 | v1.19 | v2.11.0+ |
| master | v1.19 | v2.11.0+ |
| release-0.6 | v1.19+ | v2.11.0+ |
| master | v1.19+ | v2.11.0+ |
In Kubernetes 1.14 there was a major [metrics overhaul](https://github.com/kubernetes/enhancements/issues/1206) implemented.
Therefore v0.1.x of this repository is the last release to support Kubernetes 1.13 and previous version on a best effort basis.
@ -23,6 +24,8 @@ Some alerts now use Prometheus filters made available in Prometheus 2.11.0, whic
Warning: This compatibility matrix was initially created based on experience, we do not guarantee the compatibility, it may be updated based on new learnings.
Warning: By default the expressions will generate *grafana 7.2+* compatible rules using the *$__rate_interval* variable for rate functions. If you need backward compatible rules please set *grafana72: false* in your *_config*
## How to use
This mixin is designed to be vendored into the repo with your infrastructure config.

View File

@ -268,6 +268,14 @@
!=
kube_hpa_status_current_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s})
and
(kube_hpa_status_current_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
>
kube_hpa_spec_min_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s})
and
(kube_hpa_status_current_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
<
kube_hpa_spec_max_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s})
and
changes(kube_hpa_status_current_replicas[15m]) == 0
||| % $._config,
labels: {

View File

@ -82,7 +82,7 @@
expr: |||
sum(kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource="memory"})
/
sum(kube_node_status_allocatable_memory_bytes{%(nodeExporterSelector)s})
sum(kube_node_status_allocatable_memory_bytes{%(kubeStateMetricsSelector)s})
> %(namespaceOvercommitFactor)s
||| % $._config,
labels: {

View File

@ -58,6 +58,10 @@
'kubelet.json': 'B1azll2ETo7DTiM8CysrH6g4s5NCgkOz6ZdU8Q0j',
},
// Support for Grafana 7.2+ `$__rate_interval` instead of `$__interval`
grafana72: true,
grafanaIntervalVar: if self.grafana72 then '$__rate_interval' else '$__interval',
// Config for the Grafana dashboards in the Kubernetes Mixin
grafanaK8s: {
dashboardNamePrefix: 'Kubernetes / ',
@ -83,7 +87,7 @@
fstypeSelector: 'fstype=~"%s"' % std.join('|', self.fstypes),
// This list of disk device names is referenced in various expressions.
diskDevices: ['nvme.+', 'rbd.+', 'sd.+', 'vd.+', 'xvd.+', 'dm-.+', 'dasd.+'],
diskDevices: ['mmcblk.p.+', 'nvme.+', 'rbd.+', 'sd.+', 'vd.+', 'xvd.+', 'dm-.+', 'dasd.+'],
diskDeviceSelector: 'device=~"%s"' % std.join('|', self.diskDevices),
},
}

View File

@ -32,7 +32,7 @@ local singlestat = grafana.singlestat;
format='percentunit',
decimals=3,
fill=10,
description='How much error budget is left looking at our %.3f%% availability gurantees?' % $._config.SLOs.apiserver.target,
description='How much error budget is left looking at our %.3f%% availability guarantees?' % $._config.SLOs.apiserver.target,
)
.addTarget(prometheus.target('100 * (apiserver_request:availability%dd{verb="all", %(clusterLabel)s="$cluster"} - %f)' % [$._config.SLOs.apiserver.days, $._config.clusterLabel, $._config.SLOs.apiserver.target], legendFormat='errorbudget'));

View File

@ -26,7 +26,8 @@ local singlestat = grafana.singlestat;
span=2,
valueName='min',
)
.addTarget(prometheus.target('sum(kubelet_running_pods{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"})' % $._config, legendFormat='{{instance}}'));
// TODO: The second query selected by the OR operator is for backward compatibility with kubernetes < 1.19, so this can be retored to a single query once 1.23 is out
.addTarget(prometheus.target('sum(kubelet_running_pods{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"}) OR sum(kubelet_running_pod_count{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"})' % $._config, legendFormat='{{instance}}'));
local runningContainerCount =
singlestat.new(
@ -35,7 +36,8 @@ local singlestat = grafana.singlestat;
span=2,
valueName='min',
)
.addTarget(prometheus.target('sum(kubelet_running_containers{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"})' % $._config, legendFormat='{{instance}}'));
// TODO: The second query selected by the OR operator is for backward compatibility with kubernetes < 1.19, so this can be retored to a single query once 1.23 is out
.addTarget(prometheus.target('sum(kubelet_running_containers{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"}) OR sum(kubelet_running_container_count{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"})' % $._config, legendFormat='{{instance}}'));
local actualVolumeCount =
singlestat.new(

View File

@ -334,6 +334,14 @@ local singlestat = grafana.singlestat;
title='Errors',
collapse=true,
);
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
hide=if $._config.showMultiCluster then '' else '2',
refresh=1
);
dashboard.new(
title='%(dashboardNamePrefix)sNetworking / Cluster' % $._config.grafanaK8s,
@ -366,17 +374,18 @@ local singlestat = grafana.singlestat;
type: 'datasource',
},
)
.addTemplate(clusterTemplate)
.addPanel(
newBarplotPanel(
graphTitle='Current Rate of Bytes Received',
graphQuery='sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
graphQuery='sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
),
gridPos={ h: 9, w: 12, x: 0, y: 1 }
)
.addPanel(
newBarplotPanel(
graphTitle='Current Rate of Bytes Transmitted',
graphQuery='sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
graphQuery='sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
),
gridPos={ h: 9, w: 12, x: 12, y: 1 }
)
@ -384,14 +393,14 @@ local singlestat = grafana.singlestat;
newTablePanel(
tableTitle='Current Status',
colQueries=[
'sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
'sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
'sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
'sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
'sort_desc(sum(irate(container_network_receive_packets_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
'sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
'sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
'sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
'sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
]
),
gridPos={ h: 9, w: 24, x: 0, y: 10 }
@ -401,14 +410,14 @@ local singlestat = grafana.singlestat;
.addPanel(
newBarplotPanel(
graphTitle='Average Rate of Bytes Received',
graphQuery='sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
graphQuery='sort_desc(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
),
gridPos={ h: 9, w: 12, x: 0, y: 11 }
)
.addPanel(
newBarplotPanel(
graphTitle='Average Rate of Bytes Transmitted',
graphQuery='sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
graphQuery='sort_desc(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
),
gridPos={ h: 9, w: 12, x: 12, y: 11 }
),
@ -420,14 +429,14 @@ local singlestat = grafana.singlestat;
.addPanel(
newGraphPanel(
graphTitle='Receive Bandwidth',
graphQuery='sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
graphQuery='sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
),
gridPos={ h: 9, w: 24, x: 0, y: 12 }
)
.addPanel(
newGraphPanel(
graphTitle='Transmit Bandwidth',
graphQuery='sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
graphQuery='sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
),
gridPos={ h: 9, w: 24, x: 0, y: 21 }
)
@ -436,7 +445,7 @@ local singlestat = grafana.singlestat;
.addPanel(
newGraphPanel(
graphTitle='Rate of Received Packets',
graphQuery='sort_desc(sum(irate(container_network_receive_packets_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
graphQuery='sort_desc(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 24, x: 0, y: 31 }
@ -444,7 +453,7 @@ local singlestat = grafana.singlestat;
.addPanel(
newGraphPanel(
graphTitle='Rate of Transmitted Packets',
graphQuery='sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
graphQuery='sort_desc(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 24, x: 0, y: 40 }
@ -456,7 +465,7 @@ local singlestat = grafana.singlestat;
.addPanel(
newGraphPanel(
graphTitle='Rate of Received Packets Dropped',
graphQuery='sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
graphQuery='sort_desc(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 24, x: 0, y: 50 }
@ -464,7 +473,7 @@ local singlestat = grafana.singlestat;
.addPanel(
newGraphPanel(
graphTitle='Rate of Transmitted Packets Dropped',
graphQuery='sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
graphQuery='sort_desc(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 24, x: 0, y: 59 }
@ -472,7 +481,7 @@ local singlestat = grafana.singlestat;
.addPanel(
newGraphPanel(
graphTitle='Rate of TCP Retransmits out of all sent segments',
graphQuery='sort_desc(sum(rate(node_netstat_Tcp_RetransSegs[$interval:$resolution]) / rate(node_netstat_Tcp_OutSegs[$interval:$resolution])) by (instance))',
graphQuery='sort_desc(sum(rate(node_netstat_Tcp_RetransSegs{%(clusterLabel)s="$cluster"}[$interval:$resolution]) / rate(node_netstat_Tcp_OutSegs{%(clusterLabel)s="$cluster"}[$interval:$resolution])) by (instance))' % $._config,
graphFormat='percentunit',
legendFormat='{{instance}}'
) + { links: [
@ -486,7 +495,7 @@ local singlestat = grafana.singlestat;
).addPanel(
newGraphPanel(
graphTitle='Rate of TCP SYN Retransmits out of all retransmits',
graphQuery='sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs[$interval:$resolution])) by (instance))',
graphQuery='sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans{%(clusterLabel)s="$cluster"}[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs{%(clusterLabel)s="$cluster"}[$interval:$resolution])) by (instance))' % $._config,
graphFormat='percentunit',
legendFormat='{{instance}}'
) + { links: [

View File

@ -227,12 +227,20 @@ local singlestat = grafana.singlestat;
targets: targets,
};
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
hide=if $._config.showMultiCluster then '' else '2',
refresh=1
);
local namespaceTemplate =
template.new(
name='namespace',
datasource='$datasource',
query='label_values(container_network_receive_packets_total, namespace)',
query='label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
allValues='.+',
current='kube-system',
hide='',
@ -243,7 +251,7 @@ local singlestat = grafana.singlestat;
auto: false,
auto_count: 30,
auto_min: '10s',
definition: 'label_values(container_network_receive_packets_total, namespace)',
definition: 'label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
skipUrlSync: false,
};
@ -362,6 +370,7 @@ local singlestat = grafana.singlestat;
type: 'datasource',
},
)
.addTemplate(clusterTemplate)
.addTemplate(namespaceTemplate)
.addTemplate(resolutionTemplate)
.addTemplate(intervalTemplate)
@ -370,14 +379,14 @@ local singlestat = grafana.singlestat;
.addPanel(
newGaugePanel(
gaugeTitle='Current Rate of Bytes Received',
gaugeQuery='sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution]))'
gaugeQuery='sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution]))' % $._config,
),
gridPos={ h: 9, w: 12, x: 0, y: 1 }
)
.addPanel(
newGaugePanel(
gaugeTitle='Current Rate of Bytes Transmitted',
gaugeQuery='sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution]))'
gaugeQuery='sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution]))' % $._config,
),
gridPos={ h: 9, w: 12, x: 12, y: 1 }
)
@ -385,12 +394,12 @@ local singlestat = grafana.singlestat;
newTablePanel(
tableTitle='Current Status',
colQueries=[
'sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)',
'sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)',
'sum(irate(container_network_receive_packets_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)',
'sum(irate(container_network_transmit_packets_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)',
'sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)',
'sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)',
'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
]
),
gridPos={ h: 9, w: 24, x: 0, y: 10 }
@ -399,14 +408,14 @@ local singlestat = grafana.singlestat;
.addPanel(
newGraphPanel(
graphTitle='Receive Bandwidth',
graphQuery='sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)'
graphQuery='sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
),
gridPos={ h: 9, w: 12, x: 0, y: 20 }
)
.addPanel(
newGraphPanel(
graphTitle='Transmit Bandwidth',
graphQuery='sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)'
graphQuery='sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
),
gridPos={ h: 9, w: 12, x: 12, y: 20 }
)
@ -415,7 +424,7 @@ local singlestat = grafana.singlestat;
.addPanel(
newGraphPanel(
graphTitle='Rate of Received Packets',
graphQuery='sum(irate(container_network_receive_packets_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)',
graphQuery='sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps'
),
gridPos={ h: 10, w: 12, x: 0, y: 30 }
@ -423,7 +432,7 @@ local singlestat = grafana.singlestat;
.addPanel(
newGraphPanel(
graphTitle='Rate of Transmitted Packets',
graphQuery='sum(irate(container_network_transmit_packets_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)',
graphQuery='sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps'
),
gridPos={ h: 10, w: 12, x: 12, y: 30 }
@ -435,7 +444,7 @@ local singlestat = grafana.singlestat;
.addPanel(
newGraphPanel(
graphTitle='Rate of Received Packets Dropped',
graphQuery='sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)',
graphQuery='sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps'
),
gridPos={ h: 10, w: 12, x: 0, y: 40 }
@ -443,7 +452,7 @@ local singlestat = grafana.singlestat;
.addPanel(
newGraphPanel(
graphTitle='Rate of Transmitted Packets Dropped',
graphQuery='sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)',
graphQuery='sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps'
),
gridPos={ h: 10, w: 12, x: 12, y: 40 }

View File

@ -231,11 +231,20 @@ local singlestat = grafana.singlestat;
targets: targets,
};
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
hide=if $._config.showMultiCluster then '' else '2',
refresh=1
);
local namespaceTemplate =
template.new(
name='namespace',
datasource='$datasource',
query='label_values(container_network_receive_packets_total, namespace)',
query='label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
current='kube-system',
hide='',
refresh=1,
@ -245,7 +254,7 @@ local singlestat = grafana.singlestat;
auto: false,
auto_count: 30,
auto_min: '10s',
definition: 'label_values(container_network_receive_packets_total, namespace)',
definition: 'label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
skipUrlSync: false,
};
@ -253,7 +262,7 @@ local singlestat = grafana.singlestat;
template.new(
name='type',
datasource='$datasource',
query='label_values(namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+"}, workload_type)',
query='label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+"}, workload_type)' % $._config,
current='deployment',
hide='',
refresh=1,
@ -263,7 +272,7 @@ local singlestat = grafana.singlestat;
auto: false,
auto_count: 30,
auto_min: '10s',
definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+"}, workload_type)',
definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+"}, workload_type)' % $._config,
skipUrlSync: false,
};
@ -390,6 +399,7 @@ local singlestat = grafana.singlestat;
type: 'datasource',
},
)
.addTemplate(clusterTemplate)
.addTemplate(namespaceTemplate)
.addTemplate(typeTemplate)
.addTemplate(resolutionTemplate)
@ -400,10 +410,10 @@ local singlestat = grafana.singlestat;
newBarplotPanel(
graphTitle='Current Rate of Bytes Received',
graphQuery=|||
sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
legendFormat='{{ workload }}',
),
gridPos={ h: 9, w: 12, x: 0, y: 1 }
@ -412,10 +422,10 @@ local singlestat = grafana.singlestat;
newBarplotPanel(
graphTitle='Current Rate of Bytes Transmitted',
graphQuery=|||
sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
legendFormat='{{ workload }}',
),
gridPos={ h: 9, w: 12, x: 12, y: 1 }
@ -425,45 +435,45 @@ local singlestat = grafana.singlestat;
tableTitle='Current Status',
colQueries=[
|||
sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
|||
sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
|||
sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
|||
sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
|||
sort_desc(sum(irate(container_network_receive_packets_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
|||
sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
|||
sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
|||
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
]
),
gridPos={ h: 9, w: 24, x: 0, y: 10 }
@ -474,10 +484,10 @@ local singlestat = grafana.singlestat;
newBarplotPanel(
graphTitle='Average Rate of Bytes Received',
graphQuery=|||
sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
legendFormat='{{ workload }}',
),
gridPos={ h: 9, w: 12, x: 0, y: 20 }
@ -486,10 +496,10 @@ local singlestat = grafana.singlestat;
newBarplotPanel(
graphTitle='Average Rate of Bytes Transmitted',
graphQuery=|||
sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
legendFormat='{{ workload }}',
),
gridPos={ h: 9, w: 12, x: 12, y: 20 }
@ -503,10 +513,10 @@ local singlestat = grafana.singlestat;
newGraphPanel(
graphTitle='Receive Bandwidth',
graphQuery=|||
sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
),
gridPos={ h: 9, w: 12, x: 0, y: 38 }
)
@ -514,10 +524,10 @@ local singlestat = grafana.singlestat;
newGraphPanel(
graphTitle='Transmit Bandwidth',
graphQuery=|||
sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
),
gridPos={ h: 9, w: 12, x: 12, y: 38 }
)
@ -527,10 +537,10 @@ local singlestat = grafana.singlestat;
newGraphPanel(
graphTitle='Rate of Received Packets',
graphQuery=|||
sort_desc(sum(irate(container_network_receive_packets_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 12, x: 0, y: 40 }
@ -539,10 +549,10 @@ local singlestat = grafana.singlestat;
newGraphPanel(
graphTitle='Rate of Transmitted Packets',
graphQuery=|||
sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 12, x: 12, y: 40 }
@ -555,10 +565,10 @@ local singlestat = grafana.singlestat;
newGraphPanel(
graphTitle='Rate of Received Packets Dropped',
graphQuery=|||
sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 12, x: 0, y: 41 }
@ -567,10 +577,10 @@ local singlestat = grafana.singlestat;
newGraphPanel(
graphTitle='Rate of Transmitted Packets Dropped',
graphQuery=|||
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 12, x: 12, y: 41 }

View File

@ -106,11 +106,21 @@ local singlestat = grafana.singlestat;
},
};
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
hide=if $._config.showMultiCluster then '' else '2',
refresh=1
);
local namespaceTemplate =
template.new(
name='namespace',
datasource='$datasource',
query='label_values(container_network_receive_packets_total, namespace)',
query='label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
allValues='.+',
current='kube-system',
hide='',
@ -121,7 +131,7 @@ local singlestat = grafana.singlestat;
auto: false,
auto_count: 30,
auto_min: '10s',
definition: 'label_values(container_network_receive_packets_total, namespace)',
definition: 'label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
skipUrlSync: false,
};
@ -129,7 +139,7 @@ local singlestat = grafana.singlestat;
template.new(
name='pod',
datasource='$datasource',
query='label_values(container_network_receive_packets_total{namespace=~"$namespace"}, pod)',
query='label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}, pod)' % $._config,
allValues='.+',
current='',
hide='',
@ -140,7 +150,7 @@ local singlestat = grafana.singlestat;
auto: false,
auto_count: 30,
auto_min: '10s',
definition: 'label_values(container_network_receive_packets_total{namespace=~"$namespace"}, pod)',
definition: 'label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}, pod)' % $._config,
skipUrlSync: false,
};
@ -259,6 +269,7 @@ local singlestat = grafana.singlestat;
type: 'datasource',
},
)
.addTemplate(clusterTemplate)
.addTemplate(namespaceTemplate)
.addTemplate(podTemplate)
.addTemplate(resolutionTemplate)
@ -268,14 +279,14 @@ local singlestat = grafana.singlestat;
.addPanel(
newGaugePanel(
gaugeTitle='Current Rate of Bytes Received',
gaugeQuery='sum(irate(container_network_receive_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution]))'
gaugeQuery='sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution]))' % $._config,
),
gridPos={ h: 9, w: 12, x: 0, y: 1 }
)
.addPanel(
newGaugePanel(
gaugeTitle='Current Rate of Bytes Transmitted',
gaugeQuery='sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution]))'
gaugeQuery='sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution]))' % $._config,
),
gridPos={ h: 9, w: 12, x: 12, y: 1 }
)
@ -283,14 +294,14 @@ local singlestat = grafana.singlestat;
.addPanel(
newGraphPanel(
graphTitle='Receive Bandwidth',
graphQuery='sum(irate(container_network_receive_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)'
graphQuery='sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
),
gridPos={ h: 9, w: 12, x: 0, y: 11 }
)
.addPanel(
newGraphPanel(
graphTitle='Transmit Bandwidth',
graphQuery='sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)'
graphQuery='sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
),
gridPos={ h: 9, w: 12, x: 12, y: 11 }
)
@ -299,7 +310,7 @@ local singlestat = grafana.singlestat;
.addPanel(
newGraphPanel(
graphTitle='Rate of Received Packets',
graphQuery='sum(irate(container_network_receive_packets_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)',
graphQuery='sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps'
),
gridPos={ h: 10, w: 12, x: 0, y: 21 }
@ -307,7 +318,7 @@ local singlestat = grafana.singlestat;
.addPanel(
newGraphPanel(
graphTitle='Rate of Transmitted Packets',
graphQuery='sum(irate(container_network_transmit_packets_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)',
graphQuery='sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps'
),
gridPos={ h: 10, w: 12, x: 12, y: 21 }
@ -319,7 +330,7 @@ local singlestat = grafana.singlestat;
.addPanel(
newGraphPanel(
graphTitle='Rate of Received Packets Dropped',
graphQuery='sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)',
graphQuery='sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps'
),
gridPos={ h: 10, w: 12, x: 0, y: 32 }
@ -327,7 +338,7 @@ local singlestat = grafana.singlestat;
.addPanel(
newGraphPanel(
graphTitle='Rate of Transmitted Packets Dropped',
graphQuery='sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)',
graphQuery='sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps'
),
gridPos={ h: 10, w: 12, x: 12, y: 32 }

View File

@ -96,11 +96,20 @@ local singlestat = grafana.singlestat;
},
};
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
hide=if $._config.showMultiCluster then '' else '2',
refresh=1
);
local namespaceTemplate =
template.new(
name='namespace',
datasource='$datasource',
query='label_values(container_network_receive_packets_total, namespace)',
query='label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
allValues='.+',
current='kube-system',
hide='',
@ -111,7 +120,7 @@ local singlestat = grafana.singlestat;
auto: false,
auto_count: 30,
auto_min: '10s',
definition: 'label_values(container_network_receive_packets_total, namespace)',
definition: 'label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
skipUrlSync: false,
};
@ -119,7 +128,7 @@ local singlestat = grafana.singlestat;
template.new(
name='workload',
datasource='$datasource',
query='label_values(namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace"}, workload)',
query='label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace"}, workload)' % $._config,
current='',
hide='',
refresh=1,
@ -129,7 +138,7 @@ local singlestat = grafana.singlestat;
auto: false,
auto_count: 30,
auto_min: '10s',
definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace"}, workload)',
definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace"}, workload)' % $._config,
skipUrlSync: false,
};
@ -137,7 +146,7 @@ local singlestat = grafana.singlestat;
template.new(
name='type',
datasource='$datasource',
query='label_values(namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload"}, workload_type)',
query='label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload"}, workload_type)' % $._config,
current='deployment',
hide='',
refresh=1,
@ -147,7 +156,7 @@ local singlestat = grafana.singlestat;
auto: false,
auto_count: 30,
auto_min: '10s',
definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload"}, workload_type)',
definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload"}, workload_type)' % $._config,
skipUrlSync: false,
};
@ -274,6 +283,7 @@ local singlestat = grafana.singlestat;
type: 'datasource',
},
)
.addTemplate(clusterTemplate)
.addTemplate(namespaceTemplate)
.addTemplate(workloadTemplate)
.addTemplate(typeTemplate)
@ -285,10 +295,10 @@ local singlestat = grafana.singlestat;
newBarplotPanel(
graphTitle='Current Rate of Bytes Received',
graphQuery=|||
sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
legendFormat='{{ pod }}',
),
gridPos={ h: 9, w: 12, x: 0, y: 1 }
@ -297,10 +307,10 @@ local singlestat = grafana.singlestat;
newBarplotPanel(
graphTitle='Current Rate of Bytes Transmitted',
graphQuery=|||
sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
legendFormat='{{ pod }}',
),
gridPos={ h: 9, w: 12, x: 12, y: 1 }
@ -311,10 +321,10 @@ local singlestat = grafana.singlestat;
newBarplotPanel(
graphTitle='Average Rate of Bytes Received',
graphQuery=|||
sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
legendFormat='{{ pod }}',
),
gridPos={ h: 9, w: 12, x: 0, y: 11 }
@ -323,10 +333,10 @@ local singlestat = grafana.singlestat;
newBarplotPanel(
graphTitle='Average Rate of Bytes Transmitted',
graphQuery=|||
sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
legendFormat='{{ pod }}',
),
gridPos={ h: 9, w: 12, x: 12, y: 11 }
@ -340,10 +350,10 @@ local singlestat = grafana.singlestat;
newGraphPanel(
graphTitle='Receive Bandwidth',
graphQuery=|||
sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
),
gridPos={ h: 9, w: 12, x: 0, y: 12 }
)
@ -351,10 +361,10 @@ local singlestat = grafana.singlestat;
newGraphPanel(
graphTitle='Transmit Bandwidth',
graphQuery=|||
sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
),
gridPos={ h: 9, w: 12, x: 12, y: 12 }
)
@ -364,10 +374,10 @@ local singlestat = grafana.singlestat;
newGraphPanel(
graphTitle='Rate of Received Packets',
graphQuery=|||
sort_desc(sum(irate(container_network_receive_packets_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 12, x: 0, y: 22 }
@ -376,10 +386,10 @@ local singlestat = grafana.singlestat;
newGraphPanel(
graphTitle='Rate of Transmitted Packets',
graphQuery=|||
sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 12, x: 12, y: 22 }
@ -392,10 +402,10 @@ local singlestat = grafana.singlestat;
newGraphPanel(
graphTitle='Rate of Received Packets Dropped',
graphQuery=|||
sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 12, x: 0, y: 23 }
@ -404,10 +414,10 @@ local singlestat = grafana.singlestat;
newGraphPanel(
graphTitle='Rate of Transmitted Packets Dropped',
graphQuery=|||
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|||,
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
graphFormat='pps'
),
gridPos={ h: 9, w: 12, x: 12, y: 23 }

View File

@ -44,12 +44,12 @@ local template = grafana.template;
];
local networkColumns = [
'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config,
'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config,
'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config,
'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config,
'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config,
'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config,
'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config,
'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config,
'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config,
'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config,
'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config,
'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config,
];
local networkTableStyles = {
@ -96,7 +96,7 @@ local template = grafana.template;
})
.addPanel(
g.panel('CPU Utilisation') +
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle", %(clusterLabel)s="$cluster"}[$__interval]))' % $._config) +
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle", %(clusterLabel)s="$cluster"}[%(grafanaIntervalVar)s]))' % $._config) +
{ interval: $._config.grafanaK8s.minimumTimeInterval },
)
.addPanel(
@ -192,7 +192,7 @@ local template = grafana.template;
g.row('Network')
.addPanel(
g.panel('Receive Bandwidth') +
g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') +
g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
@ -201,7 +201,7 @@ local template = grafana.template;
g.row('Network')
.addPanel(
g.panel('Transmit Bandwidth') +
g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') +
g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
@ -210,7 +210,7 @@ local template = grafana.template;
g.row('Network')
.addPanel(
g.panel('Average Container Bandwidth by Namespace: Received') +
g.queryPanel('avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') +
g.queryPanel('avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
@ -219,7 +219,7 @@ local template = grafana.template;
g.row('Network')
.addPanel(
g.panel('Average Container Bandwidth by Namespace: Transmitted') +
g.queryPanel('avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') +
g.queryPanel('avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
@ -228,7 +228,7 @@ local template = grafana.template;
g.row('Network')
.addPanel(
g.panel('Rate of Received Packets') +
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') +
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
@ -237,7 +237,7 @@ local template = grafana.template;
g.row('Network')
.addPanel(
g.panel('Rate of Transmitted Packets') +
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') +
g.queryPanel('sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
@ -246,7 +246,7 @@ local template = grafana.template;
g.row('Network')
.addPanel(
g.panel('Rate of Received Packets Dropped') +
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') +
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
@ -255,7 +255,7 @@ local template = grafana.template;
g.row('Network')
.addPanel(
g.panel('Rate of Transmitted Packets Dropped') +
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') +
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)

View File

@ -24,7 +24,7 @@ local template = grafana.template;
})
.addPanel(
g.panel('CPU Utilisation') +
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle"}[$__interval]))' % $._config)
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle"}[%(grafanaIntervalVar)s]))' % $._config)
)
.addPanel(
g.panel('CPU Requests Commitment') +

View File

@ -36,12 +36,12 @@ local template = grafana.template;
};
local networkColumns = [
'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config,
'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config,
'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config,
'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config,
'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config,
'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config,
'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config,
'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config,
'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config,
'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config,
'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config,
'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config,
];
local networkTableStyles = {
@ -244,7 +244,7 @@ local template = grafana.template;
g.row('Network')
.addPanel(
g.panel('Receive Bandwidth') +
g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, '{{pod}}') +
g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
@ -253,7 +253,7 @@ local template = grafana.template;
g.row('Network')
.addPanel(
g.panel('Transmit Bandwidth') +
g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, '{{pod}}') +
g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
@ -262,7 +262,7 @@ local template = grafana.template;
g.row('Network')
.addPanel(
g.panel('Rate of Received Packets') +
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, '{{pod}}') +
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
@ -271,7 +271,7 @@ local template = grafana.template;
g.row('Network')
.addPanel(
g.panel('Rate of Transmitted Packets') +
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, '{{pod}}') +
g.queryPanel('sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
@ -280,7 +280,7 @@ local template = grafana.template;
g.row('Network')
.addPanel(
g.panel('Rate of Received Packets Dropped') +
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, '{{pod}}') +
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
@ -289,7 +289,7 @@ local template = grafana.template;
g.row('Network')
.addPanel(
g.panel('Rate of Transmitted Packets Dropped') +
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, '{{pod}}') +
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
)

View File

@ -213,7 +213,7 @@ local template = grafana.template;
g.row('Network')
.addPanel(
g.panel('Receive Bandwidth') +
g.queryPanel('sum(irate(container_network_receive_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
g.queryPanel('sum(irate(container_network_receive_bytes_total{namespace=~"$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack +
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
)
@ -222,7 +222,7 @@ local template = grafana.template;
g.row('Network')
.addPanel(
g.panel('Transmit Bandwidth') +
g.queryPanel('sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
g.queryPanel('sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack +
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
)
@ -231,7 +231,7 @@ local template = grafana.template;
g.row('Network')
.addPanel(
g.panel('Rate of Received Packets') +
g.queryPanel('sum(irate(container_network_receive_packets_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
g.queryPanel('sum(irate(container_network_receive_packets_total{namespace=~"$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack +
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
)
@ -240,7 +240,7 @@ local template = grafana.template;
g.row('Network')
.addPanel(
g.panel('Rate of Transmitted Packets') +
g.queryPanel('sum(irate(container_network_transmit_packets_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
g.queryPanel('sum(irate(container_network_transmit_packets_total{namespace=~"$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack +
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
)
@ -249,7 +249,7 @@ local template = grafana.template;
g.row('Network')
.addPanel(
g.panel('Rate of Received Packets Dropped') +
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack +
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
)
@ -258,7 +258,7 @@ local template = grafana.template;
g.row('Network')
.addPanel(
g.panel('Rate of Transmitted Packets Dropped') +
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack +
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
)

View File

@ -59,32 +59,32 @@ local template = grafana.template;
local networkColumns = [
|||
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
||| % $._config,
|||
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
||| % $._config,
|||
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
||| % $._config,
|||
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
||| % $._config,
|||
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
||| % $._config,
|||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
||| % $._config,
@ -285,7 +285,7 @@ local template = grafana.template;
.addPanel(
g.panel('Receive Bandwidth') +
g.queryPanel(|||
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config, '{{workload}}') +
@ -298,7 +298,7 @@ local template = grafana.template;
.addPanel(
g.panel('Transmit Bandwidth') +
g.queryPanel(|||
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config, '{{workload}}') +
@ -311,7 +311,7 @@ local template = grafana.template;
.addPanel(
g.panel('Average Container Bandwidth by Workload: Received') +
g.queryPanel(|||
(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config, '{{workload}}') +
@ -324,7 +324,7 @@ local template = grafana.template;
.addPanel(
g.panel('Average Container Bandwidth by Workload: Transmitted') +
g.queryPanel(|||
(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config, '{{workload}}') +
@ -337,7 +337,7 @@ local template = grafana.template;
.addPanel(
g.panel('Rate of Received Packets') +
g.queryPanel(|||
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config, '{{workload}}') +
@ -350,7 +350,7 @@ local template = grafana.template;
.addPanel(
g.panel('Rate of Transmitted Packets') +
g.queryPanel(|||
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config, '{{workload}}') +
@ -363,7 +363,7 @@ local template = grafana.template;
.addPanel(
g.panel('Rate of Received Packets Dropped') +
g.queryPanel(|||
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config, '{{workload}}') +
@ -376,7 +376,7 @@ local template = grafana.template;
.addPanel(
g.panel('Rate of Transmitted Packets Dropped') +
g.queryPanel(|||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config, '{{workload}}') +

View File

@ -61,32 +61,32 @@ local template = grafana.template;
local networkColumns = [
|||
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
|||
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
|||
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
|||
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
|||
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
|||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config,
@ -227,7 +227,7 @@ local template = grafana.template;
.addPanel(
g.panel('Receive Bandwidth') +
g.queryPanel(|||
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, '{{pod}}') +
@ -240,7 +240,7 @@ local template = grafana.template;
.addPanel(
g.panel('Transmit Bandwidth') +
g.queryPanel(|||
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, '{{pod}}') +
@ -253,7 +253,7 @@ local template = grafana.template;
.addPanel(
g.panel('Average Container Bandwidth by Pod: Received') +
g.queryPanel(|||
(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, '{{pod}}') +
@ -266,7 +266,7 @@ local template = grafana.template;
.addPanel(
g.panel('Average Container Bandwidth by Pod: Transmitted') +
g.queryPanel(|||
(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, '{{pod}}') +
@ -279,7 +279,7 @@ local template = grafana.template;
.addPanel(
g.panel('Rate of Received Packets') +
g.queryPanel(|||
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, '{{pod}}') +
@ -292,7 +292,7 @@ local template = grafana.template;
.addPanel(
g.panel('Rate of Transmitted Packets') +
g.queryPanel(|||
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, '{{pod}}') +
@ -305,7 +305,7 @@ local template = grafana.template;
.addPanel(
g.panel('Rate of Received Packets Dropped') +
g.queryPanel(|||
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, '{{pod}}') +
@ -318,7 +318,7 @@ local template = grafana.template;
.addPanel(
g.panel('Rate of Transmitted Packets Dropped') +
g.queryPanel(|||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, '{{pod}}') +

View File

@ -14,7 +14,7 @@ local numbersinglestat = promgrafonnet.numbersinglestat;
local cpuStat =
numbersinglestat.new(
'CPU',
'sum(rate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}[3m]))' % $._config,
'sum(rate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!="", namespace="$namespace", pod=~"$statefulset.*"}[3m]))' % $._config,
)
.withSpanSize(4)
.withPostfix('cores')
@ -23,7 +23,7 @@ local numbersinglestat = promgrafonnet.numbersinglestat;
local memoryStat =
numbersinglestat.new(
'Memory',
'sum(container_memory_usage_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}) / 1024^3' % $._config,
'sum(container_memory_usage_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!="", namespace="$namespace", pod=~"$statefulset.*"}) / 1024^3' % $._config,
)
.withSpanSize(4)
.withPostfix('GB')
@ -32,7 +32,7 @@ local numbersinglestat = promgrafonnet.numbersinglestat;
local networkStat =
numbersinglestat.new(
'Network',
'sum(rate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}[3m])) + sum(rate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace="$namespace",pod=~"$statefulset.*"}[3m]))' % $._config,
'sum(rate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}[3m])) + sum(rate(container_network_receive_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",pod=~"$statefulset.*"}[3m]))' % $._config,
)
.withSpanSize(4)
.withPostfix('Bps')

View File

@ -9,12 +9,6 @@
{
name: 'k8s.rules',
rules: [
{
record: 'namespace:container_cpu_usage_seconds_total:sum_rate',
expr: |||
sum(rate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, image!="", container!="POD"}[5m])) by (namespace)
||| % $._config,
},
{
// Reduces cardinality of this timeseries by #cores, which makes it
// more useable in dashboards. Also, allows us to do things like
@ -64,12 +58,6 @@
)
||| % $._config,
},
{
record: 'namespace:container_memory_usage_bytes:sum',
expr: |||
sum(container_memory_usage_bytes{%(cadvisorSelector)s, image!="", container!="POD"}) by (namespace)
||| % $._config,
},
{
record: 'namespace:kube_pod_container_resource_requests_memory_bytes:sum',
expr: |||

View File

@ -116,15 +116,6 @@
},
}
for verb in verbs
] + [
{
record: 'cluster:apiserver_request_duration_seconds:mean5m',
expr: |||
sum(rate(apiserver_request_duration_seconds_sum{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, %(podLabel)s)
/
sum(rate(apiserver_request_duration_seconds_count{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, %(podLabel)s)
||| % ($._config),
},
] + [
{
record: 'cluster_quantile:apiserver_request_duration_seconds:histogram_quantile',

View File

@ -10,14 +10,6 @@
{
name: 'node.rules',
rules: [
{
// Number of nodes in the cluster
// SINCE 2018-02-08
record: ':kube_pod_info_node_count:',
expr: |||
sum(min(kube_pod_info{node!=""}) by (%(clusterLabel)s, node))
||| % $._config,
},
{
// This rule results in the tuples (node, namespace, instance) => 1.
// It is used to calculate per-node metrics, given namespace & instance.

View File

@ -9,7 +9,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
commonLabels:: {
'app.kubernetes.io/name': 'kube-state-metrics',
'app.kubernetes.io/version': ksm.version,
'app.kubernetes.io/version': 'v' + ksm.version,
},
podLabels:: {
@ -58,6 +58,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
'daemonsets',
'deployments',
'replicasets',
'ingresses',
]) +
rulesType.withVerbs(['list', 'watch']),
@ -134,14 +135,6 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
rulesType.withApiGroups(['networking.k8s.io']) +
rulesType.withResources([
'networkpolicies',
'ingresses',
]) +
rulesType.withVerbs(['list', 'watch']),
rulesType.new() +
rulesType.withApiGroups(['coordination.k8s.io']) +
rulesType.withResources([
'leases',
]) +
rulesType.withVerbs(['list', 'watch']),
];
@ -171,8 +164,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
container.mixin.readinessProbe.httpGet.withPath('/') +
container.mixin.readinessProbe.httpGet.withPort(8081) +
container.mixin.readinessProbe.withInitialDelaySeconds(5) +
container.mixin.readinessProbe.withTimeoutSeconds(5) +
container.mixin.securityContext.withRunAsUser(65534);
container.mixin.readinessProbe.withTimeoutSeconds(5);
deployment.new(ksm.name, 1, c, ksm.commonLabels) +
deployment.mixin.metadata.withNamespace(ksm.namespace) +
@ -228,7 +220,6 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
roleBinding.new() +
roleBinding.mixin.metadata.withName(ksm.name) +
roleBinding.mixin.metadata.withNamespace(ksm.namespace) +
roleBinding.mixin.metadata.withLabels(ksm.commonLabels) +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withName(ksm.name) +
@ -245,7 +236,6 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
'--pod=$(POD_NAME)',
'--pod-namespace=$(POD_NAMESPACE)',
]) +
container.mixin.securityContext.withRunAsUser(65534) +
container.withEnv([
containerEnv.new('POD_NAME') +
containerEnv.mixin.valueFrom.fieldRef.withFieldPath('metadata.name'),

View File

@ -1,5 +1,3 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
{
_config+:: {
namespace: 'default',
@ -42,30 +40,14 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
repeat_interval: '12h',
receiver: 'Default',
routes: [
{
receiver: 'Watchdog',
match: {
alertname: 'Watchdog',
},
},
{
receiver: 'Critical',
match: {
severity: 'critical',
},
},
{ receiver: 'Watchdog', match: { alertname: 'Watchdog' } },
{ receiver: 'Critical', match: { severity: 'critical' } },
],
},
receivers: [
{
name: 'Default',
},
{
name: 'Watchdog',
},
{
name: 'Critical',
},
{ name: 'Default' },
{ name: 'Watchdog' },
{ name: 'Critical' },
],
},
replicas: 3,
@ -73,37 +55,50 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
},
alertmanager+:: {
secret:
local secret = k.core.v1.secret;
if std.type($._config.alertmanager.config) == 'object' then
secret.new('alertmanager-' + $._config.alertmanager.name, {})
.withStringData({ 'alertmanager.yaml': std.manifestYamlDoc($._config.alertmanager.config) }) +
secret.mixin.metadata.withNamespace($._config.namespace)
secret: {
apiVersion: 'v1',
kind: 'Secret',
type: 'Opaque',
metadata: {
name: 'alertmanager-' + $._config.alertmanager.name,
namespace: $._config.namespace,
},
stringData: {
'alertmanager.yaml': if std.type($._config.alertmanager.config) == 'object'
then
std.manifestYamlDoc($._config.alertmanager.config)
else
secret.new('alertmanager-' + $._config.alertmanager.name, {})
.withStringData({ 'alertmanager.yaml': $._config.alertmanager.config }) +
secret.mixin.metadata.withNamespace($._config.namespace),
$._config.alertmanager.config,
},
},
serviceAccount:
local serviceAccount = k.core.v1.serviceAccount;
serviceAccount: {
apiVersion: 'v1',
kind: 'ServiceAccount',
metadata: {
name: 'alertmanager-' + $._config.alertmanager.name,
namespace: $._config.namespace,
},
},
serviceAccount.new('alertmanager-' + $._config.alertmanager.name) +
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
service: {
apiVersion: 'v1',
kind: 'Service',
metadata: {
name: 'alertmanager-' + $._config.alertmanager.name,
namespace: $._config.namespace,
labels: { alertmanager: $._config.alertmanager.name },
},
spec: {
ports: [
{ name: 'web', targetPort: 'web', port: 9093 },
],
selector: { app: 'alertmanager', alertmanager: $._config.alertmanager.name },
sessionAffinity: 'ClientIP',
},
},
service:
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local alertmanagerPort = servicePort.newNamed('web', 9093, 'web');
service.new('alertmanager-' + $._config.alertmanager.name, { app: 'alertmanager', alertmanager: $._config.alertmanager.name }, alertmanagerPort) +
service.mixin.spec.withSessionAffinity('ClientIP') +
service.mixin.metadata.withNamespace($._config.namespace) +
service.mixin.metadata.withLabels({ alertmanager: $._config.alertmanager.name }),
serviceMonitor:
{
serviceMonitor: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
@ -120,16 +115,12 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
},
},
endpoints: [
{
port: 'web',
interval: '30s',
},
{ port: 'web', interval: '30s' },
],
},
},
alertmanager:
{
alertmanager: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'Alertmanager',
metadata: {

View File

@ -1,57 +0,0 @@
{
prometheusAlerts+:: {
groups+: [
{
name: 'alertmanager.rules',
rules: [
{
alert: 'AlertmanagerConfigInconsistent',
annotations: {
message: |||
The configuration of the instances of the Alertmanager cluster `{{ $labels.namespace }}/{{ $labels.service }}` are out of sync.
{{ range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query }}
Configuration hash for pod {{ .Labels.pod }} is "{{ printf "%.f" .Value }}"
{{ end }}
|||,
},
expr: |||
count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s})) != 1
||| % $._config,
'for': '5m',
labels: {
severity: 'critical',
},
},
{
alert: 'AlertmanagerFailedReload',
annotations: {
message: "Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}.",
},
expr: |||
alertmanager_config_last_reload_successful{%(alertmanagerSelector)s} == 0
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert: 'AlertmanagerMembersInconsistent',
annotations: {
message: 'Alertmanager has not found all other members of the cluster.',
},
expr: |||
alertmanager_cluster_members{%(alertmanagerSelector)s}
!= on (service) GROUP_LEFT()
count by (service) (alertmanager_cluster_members{%(alertmanagerSelector)s})
||| % $._config,
'for': '5m',
labels: {
severity: 'critical',
},
},
],
},
],
},
}

View File

@ -1,3 +1,2 @@
(import 'alertmanager.libsonnet') +
(import 'general.libsonnet') +
(import 'node.libsonnet')

View File

@ -26,7 +26,7 @@
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "release-0.42"
"version": "release-0.44"
},
{
"source": {
@ -37,16 +37,6 @@
},
"version": "master"
},
{
"source": {
"git": {
"remote": "https://github.com/ksonnet/ksonnet-lib",
"subdir": ""
}
},
"version": "master",
"name": "ksonnet"
},
{
"source": {
"git": {
@ -63,7 +53,7 @@
"subdir": "jsonnet/kube-state-metrics"
}
},
"version": "master"
"version": "release-1.9"
},
{
"source": {
@ -90,8 +80,27 @@
"subdir": "documentation/prometheus-mixin"
}
},
"version": "release-2.20",
"version": "release-2.23",
"name": "prometheus"
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus/alertmanager",
"subdir": "doc/alertmanager-mixin"
}
},
"version": "master",
"name": "alertmanager"
},
{
"source": {
"git": {
"remote": "https://github.com/thanos-io/thanos",
"subdir": "mixin"
}
},
"version": "release-0.17"
}
],
"legacyImports": true

View File

@ -1,14 +1,7 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
{
_config+:: {
versions+:: {
clusterVerticalAutoscaler: "v0.8.1"
},
imageRepos+:: {
clusterVerticalAutoscaler: 'gcr.io/google_containers/cpvpa-amd64'
},
versions+:: { clusterVerticalAutoscaler: '0.8.1' },
imageRepos+:: { clusterVerticalAutoscaler: 'gcr.io/google_containers/cpvpa-amd64' },
kubeStateMetrics+:: {
stepCPU: '1m',
@ -16,103 +9,120 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
},
},
ksmAutoscaler+:: {
clusterRole:
local clusterRole = k.rbac.v1.clusterRole;
local rulesType = clusterRole.rulesType;
clusterRole: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRole',
metadata: { name: 'ksm-autoscaler' },
rules: [{
apiGroups: [''],
resources: ['nodes'],
verbs: ['list', 'watch'],
}],
},
local rules = [
rulesType.new() +
rulesType.withApiGroups(['']) +
rulesType.withResources([
'nodes',
]) +
rulesType.withVerbs(['list', 'watch']),
];
clusterRoleBinding: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRoleBinding',
metadata: { name: 'ksm-autoscaler' },
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'ClusterRole',
name: 'ksm-autoscaler',
},
subjects: [{ kind: 'ServiceAccount', name: 'ksm-autoscaler', namespace: $._config.namespace }],
},
clusterRole.new() +
clusterRole.mixin.metadata.withName('ksm-autoscaler') +
clusterRole.withRules(rules),
roleBinding: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'RoleBinding',
metadata: {
name: 'ksm-autoscaler',
namespace: $._config.namespace,
},
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'Role',
name: 'ksm-autoscaler',
},
subjects: [{ kind: 'ServiceAccount', name: 'ksm-autoscaler' }],
},
clusterRoleBinding:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
role: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'Role',
metadata: {
name: 'ksm-autoscaler',
namespace: $._config.namespace,
},
rules: [
{
apiGroups: ['extensions'],
resources: ['deployments'],
verbs: ['patch'],
resourceNames: ['kube-state-metrics'],
},
{
apiGroups: ['apps'],
resources: ['deployments'],
verbs: ['patch'],
resourceNames: ['kube-state-metrics'],
},
],
},
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withName('ksm-autoscaler') +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName('ksm-autoscaler') +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'ksm-autoscaler', namespace: $._config.namespace }]),
serviceAccount: {
apiVersion: 'v1',
kind: 'ServiceAccount',
metadata: {
name: 'ksm-autoscaler',
namespace: $._config.namespace,
},
},
roleBinding:
local roleBinding = k.rbac.v1.roleBinding;
roleBinding.new() +
roleBinding.mixin.metadata.withName('ksm-autoscaler') +
roleBinding.mixin.metadata.withNamespace($._config.namespace) +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withName('ksm-autoscaler') +
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'ksm-autoscaler' }]),
role:
local role = k.rbac.v1.role;
local rulesType = role.rulesType;
local extensionsRule = rulesType.new() +
rulesType.withApiGroups(['extensions']) +
rulesType.withResources([
'deployments',
]) +
rulesType.withVerbs(['patch']) +
rulesType.withResourceNames(['kube-state-metrics']);
local appsRule = rulesType.new() +
rulesType.withApiGroups(['apps']) +
rulesType.withResources([
'deployments',
]) +
rulesType.withVerbs(['patch']) +
rulesType.withResourceNames(['kube-state-metrics']);
local rules = [extensionsRule, appsRule];
role.new() +
role.mixin.metadata.withName('ksm-autoscaler') +
role.mixin.metadata.withNamespace($._config.namespace) +
role.withRules(rules),
serviceAccount:
local serviceAccount = k.core.v1.serviceAccount;
serviceAccount.new('ksm-autoscaler') +
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
deployment:
local deployment = k.apps.v1.deployment;
local container = deployment.mixin.spec.template.spec.containersType;
local podSelector = deployment.mixin.spec.template.spec.selectorType;
local podLabels = { app: 'ksm-autoscaler' };
local kubeStateMetricsAutoscaler =
container.new('ksm-autoscaler', $._config.imageRepos.clusterVerticalAutoscaler + ':' + $._config.versions.clusterVerticalAutoscaler) +
container.withArgs([
local c = {
name: 'ksm-autoscaler',
image: $._config.imageRepos.clusterVerticalAutoscaler + ':v' + $._config.versions.clusterVerticalAutoscaler,
args: [
'/cpvpa',
'--target=deployment/kube-state-metrics',
'--namespace=' + $._config.namespace,
'--logtostderr=true',
'--poll-period-seconds=10',
'--default-config={"kube-state-metrics":{"requests":{"cpu":{"base":"' + $._config.kubeStateMetrics.baseCPU + '","step":"' + $._config.kubeStateMetrics.stepCPU + '","nodesPerStep":1},"memory":{"base":"' + $._config.kubeStateMetrics.baseMemory + '","step":"' + $._config.kubeStateMetrics.stepMemory + '","nodesPerStep":1}},"limits":{"cpu":{"base":"' + $._config.kubeStateMetrics.baseCPU + '","step":"' + $._config.kubeStateMetrics.stepCPU + '","nodesPerStep":1},"memory":{"base":"' + $._config.kubeStateMetrics.baseMemory + '","step":"' + $._config.kubeStateMetrics.stepMemory + '","nodesPerStep":1}}}}'
]) +
container.mixin.resources.withRequests({cpu: '20m', memory: '10Mi'});
'--default-config={"kube-state-metrics":{"requests":{"cpu":{"base":"' + $._config.kubeStateMetrics.baseCPU + '","step":"' + $._config.kubeStateMetrics.stepCPU + '","nodesPerStep":1},"memory":{"base":"' + $._config.kubeStateMetrics.baseMemory + '","step":"' + $._config.kubeStateMetrics.stepMemory + '","nodesPerStep":1}},"limits":{"cpu":{"base":"' + $._config.kubeStateMetrics.baseCPU + '","step":"' + $._config.kubeStateMetrics.stepCPU + '","nodesPerStep":1},"memory":{"base":"' + $._config.kubeStateMetrics.baseMemory + '","step":"' + $._config.kubeStateMetrics.stepMemory + '","nodesPerStep":1}}}}',
],
resources: {
requests: { cpu: '20m', memory: '10Mi' },
},
};
local c = [kubeStateMetricsAutoscaler];
deployment.new('ksm-autoscaler', 1, c, podLabels) +
deployment.mixin.metadata.withNamespace($._config.namespace) +
deployment.mixin.metadata.withLabels(podLabels) +
deployment.mixin.spec.selector.withMatchLabels(podLabels) +
deployment.mixin.spec.template.spec.withNodeSelector({ 'kubernetes.io/os': 'linux' }) +
deployment.mixin.spec.template.spec.securityContext.withRunAsNonRoot(true) +
deployment.mixin.spec.template.spec.securityContext.withRunAsUser(65534) +
deployment.mixin.spec.template.spec.withServiceAccountName('ksm-autoscaler'),
{
apiVersion: 'apps/v1',
kind: 'Deployment',
metadata: {
name: 'ksm-autoscaler',
namespace: $._config.namespace,
labels: podLabels,
},
spec: {
replicas: 1,
selector: { matchLabels: podLabels },
template: {
metadata: {
labels: podLabels,
},
spec: {
containers: [c],
serviceAccount: 'ksm-autoscaler',
nodeSelector: { 'kubernetes.io/os': 'linux' },
securityContext: {
runAsNonRoot: true,
runAsUser: 65534,
},
},
},
},
},
},
}

View File

@ -1,20 +1,11 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
{
prometheus+:: {
clusterRole+: {
rules+:
local role = k.rbac.v1.role;
local policyRule = role.rulesType;
local rule = policyRule.new() +
policyRule.withApiGroups(['']) +
policyRule.withResources([
'services',
'endpoints',
'pods',
]) +
policyRule.withVerbs(['get', 'list', 'watch']);
[rule]
rules+: [{
apiGroups: [''],
resources: ['services', 'endpoints', 'pods'],
verbs: ['get', 'list', 'watch'],
}],
},
},
}
}

View File

@ -1,23 +1,22 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local statefulSet = k.apps.v1.statefulSet;
local affinity = statefulSet.mixin.spec.template.spec.affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecutionType;
local matchExpression = affinity.mixin.podAffinityTerm.labelSelector.matchExpressionsType;
{
local antiaffinity(key, values, namespace) = {
affinity: {
podAntiAffinity: {
preferredDuringSchedulingIgnoredDuringExecution: [
affinity.new() +
affinity.withWeight(100) +
affinity.mixin.podAffinityTerm.withNamespaces(namespace) +
affinity.mixin.podAffinityTerm.withTopologyKey('kubernetes.io/hostname') +
affinity.mixin.podAffinityTerm.labelSelector.withMatchExpressions([
matchExpression.new() +
matchExpression.withKey(key) +
matchExpression.withOperator('In') +
matchExpression.withValues(values),
]),
{
weight: 100,
podAffinityTerm: {
namespaces: [namespace],
topologyKey: 'kubernetes.io/hostname',
labelSelector: {
matchExpressions: [{
key: key,
operator: 'In',
values: values,
}],
},
},
},
],
},
},
@ -30,12 +29,12 @@ local matchExpression = affinity.mixin.podAffinityTerm.labelSelector.matchExpres
},
},
prometheus+: {
prometheus+:: {
local p = self,
prometheus+: {
spec+:
antiaffinity('prometheus', [p.name], p.namespace),
antiaffinity('prometheus', [$._config.prometheus.name], $._config.namespace),
},
},
}

View File

@ -1,23 +1,42 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local service(name, namespace, labels, selector, ports) = {
apiVersion: 'v1',
kind: 'Service',
metadata: {
name: name,
namespace: namespace,
labels: labels,
},
spec: {
ports+: ports,
selector: selector,
clusterIP: 'None',
},
};
{
prometheus+:: {
kubeControllerManagerPrometheusDiscoveryService:
service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('https-metrics', 10257, 10257)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
service.mixin.spec.withClusterIp('None'),
kubeSchedulerPrometheusDiscoveryService:
service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('https-metrics', 10259, 10259)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
service.mixin.spec.withClusterIp('None'),
kubeDnsPrometheusDiscoveryService:
service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('http-metrics-skydns', 10055, 10055), servicePort.newNamed('http-metrics-dnsmasq', 10054, 10054)]) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) +
service.mixin.spec.withClusterIp('None'),
kubeControllerManagerPrometheusDiscoveryService: service(
'kube-controller-manager-prometheus-discovery',
'kube-system',
{ 'k8s-app': 'kube-controller-manager' },
{ 'k8s-app': 'kube-controller-manager' },
[{ name: 'https-metrics', port: 10257, targetPort: 10257 }]
),
kubeSchedulerPrometheusDiscoveryService: service(
'kube-scheduler-prometheus-discovery',
'kube-system',
{ 'k8s-app': 'kube-scheduler' },
{ 'k8s-app': 'kube-scheduler' },
[{ name: 'https-metrics', port: 10259, targetPort: 10259 }]
),
kubeDnsPrometheusDiscoveryService: service(
'kube-dns-prometheus-discovery',
'kube-syste',
{ 'k8s-app': 'kube-dns' },
{ 'k8s-app': 'kube-dns' },
[{ name: 'http-metrics-skydns', port: 10055, targetPort: 10055 }, { name: 'http-metrics-dnsmasq', port: 10054, targetPort: 10054 }]
),
},
}

View File

@ -9,9 +9,9 @@ local withImageRepository(repository) = {
if repository == null then image else repository + '/' + l.imageName(image),
_config+:: {
imageRepos:: {
[field]: substituteRepository(oldRepos[field], repository),
[field]: substituteRepository(oldRepos[field], repository)
for field in std.objectFields(oldRepos)
}
},
},
};

View File

@ -1,11 +1,10 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
// Custom metrics API allows the HPA v2 to scale based on arbirary metrics.
// For more details on usage visit https://github.com/DirectXMan12/k8s-prometheus-adapter#quick-links
{
_config+:: {
prometheusAdapter+:: {
namespace: $._config.namespace,
// Rules for custom-metrics
config+:: {
rules+: [
@ -14,19 +13,12 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
seriesFilters: [],
resources: {
overrides: {
namespace: {
resource: 'namespace'
},
pod: {
resource: 'pod'
}
namespace: { resource: 'namespace' },
pod: { resource: 'pod' },
},
},
name: {
matches: '^container_(.*)_seconds_total$',
as: ""
},
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>,container!="POD"}[1m])) by (<<.GroupBy>>)'
name: { matches: '^container_(.*)_seconds_total$', as: '' },
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>,container!="POD"}[1m])) by (<<.GroupBy>>)',
},
{
seriesQuery: '{__name__=~"^container_.*",container!="POD",namespace!="",pod!=""}',
@ -35,19 +27,12 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
],
resources: {
overrides: {
namespace: {
resource: 'namespace'
},
pod: {
resource: 'pod'
}
namespace: { resource: 'namespace' },
pod: { resource: 'pod' },
},
},
name: {
matches: '^container_(.*)_total$',
as: ''
},
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>,container!="POD"}[1m])) by (<<.GroupBy>>)'
name: { matches: '^container_(.*)_total$', as: '' },
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>,container!="POD"}[1m])) by (<<.GroupBy>>)',
},
{
seriesQuery: '{__name__=~"^container_.*",container!="POD",namespace!="",pod!=""}',
@ -56,60 +41,38 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
],
resources: {
overrides: {
namespace: {
resource: 'namespace'
},
pod: {
resource: 'pod'
}
namespace: { resource: 'namespace' },
pod: { resource: 'pod' },
},
},
name: {
matches: '^container_(.*)$',
as: ''
},
metricsQuery: 'sum(<<.Series>>{<<.LabelMatchers>>,container!="POD"}) by (<<.GroupBy>>)'
name: { matches: '^container_(.*)$', as: '' },
metricsQuery: 'sum(<<.Series>>{<<.LabelMatchers>>,container!="POD"}) by (<<.GroupBy>>)',
},
{
seriesQuery: '{namespace!="",__name__!~"^container_.*"}',
seriesFilters: [
{ isNot: '.*_total$' },
],
resources: {
template: '<<.Resource>>'
},
name: {
matches: '',
as: ''
},
metricsQuery: 'sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)'
resources: { template: '<<.Resource>>' },
name: { matches: '', as: '' },
metricsQuery: 'sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)',
},
{
seriesQuery: '{namespace!="",__name__!~"^container_.*"}',
seriesFilters: [
{ isNot: '.*_seconds_total' },
],
resources: {
template: '<<.Resource>>'
},
name: {
matches: '^(.*)_total$',
as: ''
},
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)'
resources: { template: '<<.Resource>>' },
name: { matches: '^(.*)_total$', as: '' },
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)',
},
{
seriesQuery: '{namespace!="",__name__!~"^container_.*"}',
seriesFilters: [],
resources: {
template: '<<.Resource>>'
resources: { template: '<<.Resource>>' },
name: { matches: '^(.*)_seconds_total$', as: '' },
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)',
},
name: {
matches: '^(.*)_seconds_total$',
as: ''
},
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)'
}
],
},
},
@ -125,7 +88,7 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
spec: {
service: {
name: $.prometheusAdapter.service.metadata.name,
namespace: $._config.namespace,
namespace: $._config.prometheusAdapter.namespace,
},
group: 'custom.metrics.k8s.io',
version: 'v1beta1',
@ -143,7 +106,7 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
spec: {
service: {
name: $.prometheusAdapter.service.metadata.name,
namespace: $._config.namespace,
namespace: $._config.prometheusAdapter.namespace,
},
group: 'custom.metrics.k8s.io',
version: 'v1beta2',
@ -152,46 +115,51 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
versionPriority: 200,
},
},
customMetricsClusterRoleServerResources:
local clusterRole = k.rbac.v1.clusterRole;
local policyRule = clusterRole.rulesType;
local rules =
policyRule.new() +
policyRule.withApiGroups(['custom.metrics.k8s.io']) +
policyRule.withResources(['*']) +
policyRule.withVerbs(['*']);
clusterRole.new() +
clusterRole.mixin.metadata.withName('custom-metrics-server-resources') +
clusterRole.withRules(rules),
customMetricsClusterRoleBindingServerResources:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withName('custom-metrics-server-resources') +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName('custom-metrics-server-resources') +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{
customMetricsClusterRoleServerResources: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRole',
metadata: {
name: 'custom-metrics-server-resources',
},
rules: [{
apiGroups: ['custom.metrics.k8s.io'],
resources: ['*'],
verbs: ['*'],
}],
},
customMetricsClusterRoleBindingServerResources: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRoleBinding',
metadata: {
name: 'custom-metrics-server-resources',
},
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'ClusterRole',
name: 'custom-metrics-server-resources',
},
subjects: [{
kind: 'ServiceAccount',
name: $.prometheusAdapter.serviceAccount.metadata.name,
namespace: $._config.namespace,
}]),
customMetricsClusterRoleBindingHPA:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withName('hpa-controller-custom-metrics') +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName('custom-metrics-server-resources') +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{
namespace: $._config.prometheusAdapter.namespace,
}],
},
customMetricsClusterRoleBindingHPA: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRoleBinding',
metadata: {
name: 'hpa-controller-custom-metrics',
},
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'ClusterRole',
name: 'custom-metrics-server-resources',
},
subjects: [{
kind: 'ServiceAccount',
name: 'horizontal-pod-autoscaler',
namespace: 'kube-system',
}]),
}
}],
},
},
}

View File

@ -1,33 +1,40 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
_config+:: {
eks: {
minimumAvailableIPs: 10,
minimumAvailableIPsTime: '10m'
}
minimumAvailableIPsTime: '10m',
},
},
prometheus+: {
serviceMonitorCoreDNS+: {
spec+: {
endpoints: [
{
bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token",
interval: "15s",
targetPort: 9153
}
]
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
interval: '15s',
targetPort: 9153,
},
],
},
},
AwsEksCniMetricService:
service.new('aws-node', { 'k8s-app' : 'aws-node' } , servicePort.newNamed('cni-metrics-port', 61678, 61678)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'aws-node' }) +
service.mixin.spec.withClusterIp('None'),
serviceMonitorAwsEksCNI:
{
AwsEksCniMetricService: {
apiVersion: 'v1',
kind: 'Service',
metadata: {
name: 'aws-node',
namespace: 'kube-system',
labels: { 'k8s-app': 'aws-node' },
},
spec: {
ports: [
{ name: 'cni-metrics-port', port: 61678, targetPort: 61678 },
],
selector: { 'k8s-app': 'aws-node' },
clusterIP: 'None',
},
},
serviceMonitorAwsEksCNI: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
@ -65,15 +72,15 @@ local servicePort = k.core.v1.service.mixin.spec.portsType;
name: 'kube-prometheus-eks.rules',
rules: [
{
expr: 'sum by(instance) (awscni_total_ip_addresses) - sum by(instance) (awscni_assigned_ip_addresses) < %s' % $._config.eks.minimumAvailableIPs,
expr: 'sum by(instance) (awscni_ip_max) - sum by(instance) (awscni_assigned_ip_addresses) < %s' % $._config.eks.minimumAvailableIPs,
labels: {
severity: 'critical',
},
annotations: {
message: 'Instance {{ $labels.instance }} has less than 10 IPs available.'
message: 'Instance {{ $labels.instance }} has less than 10 IPs available.',
},
'for': $._config.eks.minimumAvailableIPsTime,
alert: 'EksAvailableIPs'
alert: 'EksAvailableIPs',
},
],
},

View File

@ -0,0 +1,95 @@
// External metrics API allows the HPA v2 to scale based on metrics coming from outside of Kubernetes cluster
// For more details on usage visit https://github.com/DirectXMan12/k8s-prometheus-adapter#quick-links
{
_config+:: {
prometheusAdapter+:: {
namespace: $._config.namespace,
// Rules for external-metrics
config+:: {
externalRules+: [
// {
// seriesQuery: '{__name__=~"^.*_queue$",namespace!=""}',
// seriesFilters: [],
// resources: {
// overrides: {
// namespace: { resource: 'namespace' }
// },
// },
// name: { matches: '^.*_queue$', as: '$0' },
// metricsQuery: 'max(<<.Series>>{<<.LabelMatchers>>})',
// },
],
},
},
},
prometheusAdapter+:: {
externalMetricsApiService: {
apiVersion: 'apiregistration.k8s.io/v1',
kind: 'APIService',
metadata: {
name: 'v1beta1.external.metrics.k8s.io',
},
spec: {
service: {
name: $.prometheusAdapter.service.metadata.name,
namespace: $._config.prometheusAdapter.namespace,
},
group: 'external.metrics.k8s.io',
version: 'v1beta1',
insecureSkipTLSVerify: true,
groupPriorityMinimum: 100,
versionPriority: 100,
},
},
externalMetricsClusterRoleServerResources: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRole',
metadata: {
name: 'external-metrics-server-resources',
},
rules: [{
apiGroups: ['external.metrics.k8s.io'],
resources: ['*'],
verbs: ['*'],
}],
},
externalMetricsClusterRoleBindingServerResources: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRoleBinding',
metadata: {
name: 'external-metrics-server-resources',
},
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'ClusterRole',
name: 'external-metrics-server-resources',
},
subjects: [{
kind: 'ServiceAccount',
name: $.prometheusAdapter.serviceAccount.metadata.name,
namespace: $._config.prometheusAdapter.namespace,
}],
},
externalMetricsClusterRoleBindingHPA: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRoleBinding',
metadata: {
name: 'hpa-controller-external-metrics',
},
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'ClusterRole',
name: 'external-metrics-server-resources',
},
subjects: [{
kind: 'ServiceAccount',
name: 'horizontal-pod-autoscaler',
namespace: 'kube-system',
}],
},
},
}

View File

@ -0,0 +1,13 @@
(import './kube-prometheus-managed-cluster.libsonnet') + {
_config+:: {
prometheusAdapter+:: {
config+: {
resourceRules:: null,
},
},
},
prometheusAdapter+:: {
apiService:: null,
},
}

View File

@ -10,10 +10,7 @@
interval: '30s',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path'
},
{ sourceLabels: ['__metrics_path__'], targetLabel: 'metrics_path' },
],
},
{
@ -24,10 +21,7 @@
honorLabels: true,
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path'
},
{ sourceLabels: ['__metrics_path__'], targetLabel: 'metrics_path' },
],
metricRelabelings: [
// Drop a bunch of metrics which are disabled but still sent, see

View File

@ -1,13 +1,20 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
prometheus+:: {
kubeDnsPrometheusDiscoveryService:
service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('metrics', 9153, 9153)]) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) +
service.mixin.spec.withClusterIp('None'),
kubeDnsPrometheusDiscoveryService: {
apiVersion: 'v1',
kind: 'Service',
metadata: {
name: 'kube-dns-prometheus-discovery',
namespace: 'kube-system',
labels: { 'k8s-app': 'kube-dns' },
},
spec: {
ports: [
{ name: 'metrics', port: 9153, targetPort: 9153 },
],
selector: { 'k8s-app': 'kube-dns' },
clusterIP: 'None',
},
},
},
}

View File

@ -1,23 +1,40 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local service(name, namespace, labels, selector, ports) = {
apiVersion: 'v1',
kind: 'Service',
metadata: {
name: name,
namespace: namespace,
labels: labels,
},
spec: {
ports+: ports,
selector: selector,
clusterIP: 'None',
},
};
{
prometheus+:: {
kubeControllerManagerPrometheusDiscoveryService:
service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('https-metrics', 10257, 10257)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
service.mixin.spec.withClusterIp('None'),
kubeSchedulerPrometheusDiscoveryService:
service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('https-metrics', 10259, 10259)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
service.mixin.spec.withClusterIp('None'),
kubeDnsPrometheusDiscoveryService:
service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('metrics', 10055, 10055), servicePort.newNamed('http-metrics-dnsmasq', 10054, 10054)]) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) +
service.mixin.spec.withClusterIp('None'),
kubeControllerManagerPrometheusDiscoveryService: service(
'kube-controller-manager-prometheus-discovery',
'kube-system',
{ 'k8s-app': 'kube-controller-manager' },
{ 'k8s-app': 'kube-controller-manager' },
[{ name: 'https-metrics', port: 10257, targetPort: 10257 }]
),
kubeSchedulerPrometheusDiscoveryService: service(
'kube-controller-manager-prometheus-discovery',
'kube-system',
{ 'k8s-app': 'kube-scheduler' },
{ 'k8s-app': 'kube-scheduler' },
[{ name: 'https-metrics', port: 10259, targetPort: 10259 }]
),
kubeDnsPrometheusDiscoveryService: service(
'kube-controller-manager-prometheus-discovery',
'kube-system',
{ 'k8s-app': 'kube-dns' },
{ 'k8s-app': 'kube-dns' },
[{ name: 'metrics', port: 10055, targetPort: 10055 }, { name: 'http-metrics-dnsmasq', port: 10054, targetPort: 10054 }]
),
},
}

View File

@ -1,18 +1,33 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local service(name, namespace, labels, selector, ports) = {
apiVersion: 'v1',
kind: 'Service',
metadata: {
name: name,
namespace: namespace,
labels: labels,
},
spec: {
ports+: ports,
selector: selector,
clusterIP: 'None',
},
};
{
prometheus+: {
kubeControllerManagerPrometheusDiscoveryService:
service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('https-metrics', 10257, 10257)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
service.mixin.spec.withClusterIp('None'),
kubeSchedulerPrometheusDiscoveryService:
service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('https-metrics', 10259, 10259)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
service.mixin.spec.withClusterIp('None'),
kubeControllerManagerPrometheusDiscoveryService: service(
'kube-controller-manager-prometheus-discovery',
'kube-system',
{ 'k8s-app': 'kube-controller-manager' },
{ 'k8s-app': 'kube-controller-manager' },
[{ name: 'https-metrics', port: 10257, targetPort: 10257 }],
),
kubeSchedulerPrometheusDiscoveryService: service(
'kube-scheduler-prometheus-discovery',
'kube-system',
{ 'k8s-app': 'kube-scheduler' },
{ 'k8s-app': 'kube-scheduler' },
[{ name: 'https-metrics', port: 10259, targetPort: 10259 }],
),
},
}

View File

@ -1,18 +1,33 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local service(name, namespace, labels, selector, ports) = {
apiVersion: 'v1',
kind: 'Service',
metadata: {
name: name,
namespace: namespace,
labels: labels,
},
spec: {
ports+: ports,
selector: selector,
clusterIP: 'None',
},
};
{
prometheus+: {
kubeControllerManagerPrometheusDiscoveryService:
service.new('kube-controller-manager-prometheus-discovery', { component: 'kube-controller-manager' }, servicePort.newNamed('https-metrics', 10257, 10257)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
service.mixin.spec.withClusterIp('None'),
kubeSchedulerPrometheusDiscoveryService:
service.new('kube-scheduler-prometheus-discovery', { component: 'kube-scheduler' }, servicePort.newNamed('https-metrics', 10259, 10259)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
service.mixin.spec.withClusterIp('None'),
kubeControllerManagerPrometheusDiscoveryService: service(
'kube-controller-manager-prometheus-discovery',
'kube-system',
{ 'k8s-app': 'kube-controller-manager' },
{ component: 'kube-controller-manager' },
[{ name: 'https-metrics', port: 10257, targetPort: 10257 }]
),
kubeSchedulerPrometheusDiscoveryService: service(
'kube-scheduler-prometheus-discovery',
'kube-system',
{ 'k8s-app': 'kube-scheduler' },
{ component: 'kube-scheduler' },
[{ name: 'https-metrics', port: 10259, targetPort: 10259 }],
),
},
}

View File

@ -1,20 +1,36 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local service(name, namespace, labels, selector, ports) = {
apiVersion: 'v1',
kind: 'Service',
metadata: {
name: name,
namespace: namespace,
labels: labels,
},
spec: {
ports+: ports,
selector: selector,
clusterIP: 'None',
},
};
{
prometheus+: {
kubeControllerManagerPrometheusDiscoveryService:
service.new('kube-controller-manager-prometheus-discovery', { 'component': 'kube-controller-manager' }, servicePort.newNamed('https-metrics', 10257, 10257)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
service.mixin.spec.withClusterIp('None'),
kubeSchedulerPrometheusDiscoveryService:
service.new('kube-scheduler-prometheus-discovery', { 'component': 'kube-scheduler' }, servicePort.newNamed('https-metrics', 10259, 10259)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
service.mixin.spec.withClusterIp('None'),
kubeControllerManagerPrometheusDiscoveryService: service(
'kube-controller-manager-prometheus-discovery',
'kube-system',
{ 'k8s-app': 'kube-controller-manager' },
{ 'k8s-app': 'kube-controller-manager' },
[{ name: 'https-metrics', port: 10257, targetPort: 10257 }]
),
kubeSchedulerPrometheusDiscoveryService: service(
'kube-scheduler-prometheus-discovery',
'kube-system',
{ 'k8s-app': 'kube-scheduler' },
{ 'k8s-app': 'kube-scheduler' },
[{ name: 'https-metrics', port: 10259, targetPort: 10259 }],
),
serviceMonitorKubeScheduler+: {
spec+: {

View File

@ -1,21 +1,18 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local patch(ports) = {
spec+: {
ports: ports,
type: 'NodePort',
},
};
{
prometheus+: {
service+:
service.mixin.spec.withPorts(servicePort.newNamed('web', 9090, 'web') + servicePort.withNodePort(30900)) +
service.mixin.spec.withType('NodePort'),
service+: patch([{ name: 'web', port: 9090, targetPort: 'web', nodePort: 30900 }]),
},
alertmanager+: {
service+:
service.mixin.spec.withPorts(servicePort.newNamed('web', 9093, 'web') + servicePort.withNodePort(30903)) +
service.mixin.spec.withType('NodePort'),
service+: patch([{ name: 'web', port: 9093, targetPort: 'web', nodePort: 30903 }]),
},
grafana+: {
service+:
service.mixin.spec.withPorts(servicePort.newNamed('http', 3000, 'http') + servicePort.withNodePort(30902)) +
service.mixin.spec.withType('NodePort'),
service+: patch([{ name: 'http', port: 3000, targetPort: 'http', nodePort: 30902 }]),
},
}

View File

@ -1,5 +1,3 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
(import 'github.com/etcd-io/etcd/Documentation/etcd-mixin/mixin.libsonnet') + {
_config+:: {
etcd: {
@ -12,40 +10,40 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
},
},
prometheus+:: {
serviceEtcd:
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local etcdServicePort = servicePort.newNamed('metrics', 2379, 2379);
service.new('etcd', null, etcdServicePort) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'etcd' }) +
service.mixin.spec.withClusterIp('None'),
endpointsEtcd:
local endpoints = k.core.v1.endpoints;
local endpointSubset = endpoints.subsetsType;
local endpointPort = endpointSubset.portsType;
local etcdPort = endpointPort.new() +
endpointPort.withName('metrics') +
endpointPort.withPort(2379) +
endpointPort.withProtocol('TCP');
local subset = endpointSubset.new() +
endpointSubset.withAddresses([
serviceEtcd: {
apiVersion: 'v1',
kind: 'Service',
metadata: {
name: 'etcd',
namespace: 'kube-system',
labels: { 'k8s-app': 'etcd' },
},
spec: {
ports: [
{ name: 'metrics', targetPort: 2379, port: 2379 },
],
clusterIP: 'None',
},
},
endpointsEtcd: {
apiVersion: 'v1',
kind: 'Endpoints',
metadata: {
name: 'etcd',
namespace: 'kube-system',
labels: { 'k8s-app': 'etcd' },
},
subsets: [{
addresses: [
{ ip: etcdIP }
for etcdIP in $._config.etcd.ips
]) +
endpointSubset.withPorts(etcdPort);
endpoints.new() +
endpoints.mixin.metadata.withName('etcd') +
endpoints.mixin.metadata.withNamespace('kube-system') +
endpoints.mixin.metadata.withLabels({ 'k8s-app': 'etcd' }) +
endpoints.withSubsets(subset),
serviceMonitorEtcd:
{
],
ports: [
{ name: 'metrics', port: 2379, protocol: 'TCP' },
],
}],
},
serviceMonitorEtcd: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
@ -79,17 +77,22 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
},
},
},
secretEtcdCerts:
secretEtcdCerts: {
// Prometheus Operator allows us to mount secrets in the pod. By loading the secrets as files, they can be made available inside the Prometheus pod.
local secret = k.core.v1.secret;
secret.new('kube-etcd-client-certs', {
apiVersion: 'v1',
kind: 'Secret',
type: 'Opaque',
metadata: {
name: 'kube-etcd-client-certs',
namespace: $._config.namespace,
},
data: {
'etcd-client-ca.crt': std.base64($._config.etcd.clientCA),
'etcd-client.key': std.base64($._config.etcd.clientKey),
'etcd-client.crt': std.base64($._config.etcd.clientCert),
}) +
secret.mixin.metadata.withNamespace($._config.namespace),
prometheus+:
{
},
},
prometheus+: {
// Reference info: https://coreos.com/operators/prometheus/docs/latest/api.html#prometheusspec
spec+: {
secrets+: [$.prometheus.secretEtcdCerts.metadata.name],

View File

@ -24,7 +24,7 @@
spec+: {
local addArgs(c) =
if c.name == 'prometheus-operator'
then c + {args+: ['--config-reloader-cpu=0']}
then c { args+: ['--config-reloader-cpu=0'] }
else c,
containers: std.map(addArgs, super.containers),
},

View File

@ -1,15 +1,8 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
(import 'github.com/thanos-io/thanos/mixin/alerts/sidecar.libsonnet') +
{
_config+:: {
versions+:: {
thanos: 'v0.14.0',
},
imageRepos+:: {
thanos: 'quay.io/thanos/thanos',
},
versions+:: { thanos: 'v0.14.0' },
imageRepos+:: { thanos: 'quay.io/thanos/thanos' },
thanos+:: {
objectStorageConfig: {
key: 'thanos.yaml', // How the file inside the secret is called
@ -18,23 +11,34 @@ local servicePort = k.core.v1.service.mixin.spec.portsType;
},
},
prometheus+:: {
local p = self,
// Add the grpc port to the Prometheus service to be able to query it with the Thanos Querier
service+: {
spec+: {
ports+: [
servicePort.newNamed('grpc', 10901, 10901),
{ name: 'grpc', port: 10901, targetPort: 10901 },
],
},
},
// Create a new service that exposes both sidecar's HTTP metrics port and gRPC StoreAPI
serviceThanosSidecar:
local thanosGrpcSidecarPort = servicePort.newNamed('grpc', 10901, 10901);
local thanosHttpSidecarPort = servicePort.newNamed('http', 10902, 10902);
service.new('prometheus-' + $._config.prometheus.name + '-thanos-sidecar', { app: 'prometheus', prometheus: $._config.prometheus.name }) +
service.mixin.spec.withPorts([thanosGrpcSidecarPort, thanosHttpSidecarPort]) +
service.mixin.spec.withClusterIp('None') +
service.mixin.metadata.withLabels({'prometheus': $._config.prometheus.name, 'app': 'thanos-sidecar'}) +
service.mixin.metadata.withNamespace($._config.namespace),
serviceThanosSidecar: {
apiVersion: 'v1',
kind: 'Service',
metadata: {
name: 'prometheus-' + p.name + '-thanos-sidecar',
namespace: p.namespace,
labels: { prometheus: p.name, app: 'thanos-sidecar' },
},
spec: {
ports: [
{ name: 'grpc', port: 10901, targetPort: 10901 },
{ name: 'http', port: 10902, targetPort: 10902 },
],
selector: { app: 'prometheus', prometheus: p.name },
clusterIP: 'None',
},
},
prometheus+: {
spec+: {
thanos+: {
@ -50,7 +54,7 @@ local servicePort = k.core.v1.service.mixin.spec.portsType;
kind: 'ServiceMonitor',
metadata: {
name: 'thanos-sidecar',
namespace: $._config.namespace,
namespace: p.namespace,
labels: {
'k8s-app': 'prometheus',
},
@ -60,7 +64,7 @@ local servicePort = k.core.v1.service.mixin.spec.portsType;
jobLabel: 'app',
selector: {
matchLabels: {
prometheus: $._config.prometheus.name,
prometheus: p.name,
app: 'thanos-sidecar',
},
},

View File

@ -1,14 +1,21 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
prometheus+: {
serviceWeaveNet:
service.new('weave-net', { 'name': 'weave-net' }, servicePort.newNamed('weave-net-metrics', 6782, 6782)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'weave-net' }) +
service.mixin.spec.withClusterIp('None'),
serviceWeaveNet: {
apiVersion: 'v1',
kind: 'Service',
metadata: {
name: 'weave-net',
namespace: 'kube-system',
labels: { 'k8s-app': 'weave-net' },
},
spec: {
ports: [
{ name: 'weave-net-metrics', targetPort: 6782, port: 6782 },
],
selector: { name: 'weave-net' },
clusterIP: 'None',
},
},
serviceMonitorWeaveNet: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',

View File

@ -1,6 +1,3 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local k3 = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.3/k.libsonnet';
local configMapList = k3.core.v1.configMapList;
local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
(import 'github.com/brancz/kubernetes-grafana/grafana/grafana.libsonnet') +
@ -9,6 +6,7 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
(import './node-exporter/node-exporter.libsonnet') +
(import 'github.com/prometheus/node_exporter/docs/node-mixin/mixin.libsonnet') +
(import './alertmanager/alertmanager.libsonnet') +
(import 'github.com/prometheus/alertmanager/doc/alertmanager-mixin/mixin.libsonnet') +
(import 'github.com/prometheus-operator/prometheus-operator/jsonnet/prometheus-operator/prometheus-operator.libsonnet') +
(import 'github.com/prometheus-operator/prometheus-operator/jsonnet/mixin/mixin.libsonnet') +
(import './prometheus/prometheus.libsonnet') +
@ -16,11 +14,19 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
(import 'github.com/kubernetes-monitoring/kubernetes-mixin/mixin.libsonnet') +
(import 'github.com/prometheus/prometheus/documentation/prometheus-mixin/mixin.libsonnet') +
(import './alerts/alerts.libsonnet') +
(import './rules/rules.libsonnet') + {
(import './rules/rules.libsonnet') +
{
kubePrometheus+:: {
namespace: k.core.v1.namespace.new($._config.namespace),
namespace: {
apiVersion: 'v1',
kind: 'Namespace',
metadata: {
name: $._config.namespace,
},
prometheusOperator+:: {
},
},
prometheusOperator+::
{
service+: {
spec+: {
ports: [
@ -44,7 +50,7 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
insecureSkipVerify: true,
},
},
]
],
},
},
clusterRole+: {
@ -77,8 +83,14 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
},
}).deploymentMixin,
grafana+:: {
dashboardDefinitions: configMapList.new(super.dashboardDefinitions),
local dashboardDefinitions = super.dashboardDefinitions,
dashboardDefinitions: {
apiVersion: 'v1',
kind: 'ConfigMapList',
items: dashboardDefinitions,
},
serviceMonitor: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
@ -92,12 +104,10 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
app: 'grafana',
},
},
endpoints: [
{
endpoints: [{
port: 'http',
interval: '15s',
},
],
}],
},
},
},
@ -105,14 +115,8 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
_config+:: {
namespace: 'default',
versions+:: {
grafana: '7.1.0',
kubeRbacProxy: 'v0.6.0',
},
imageRepos+:: {
kubeRbacProxy: 'quay.io/brancz/kube-rbac-proxy',
},
versions+:: { grafana: '7.3.5', kubeRbacProxy: 'v0.8.0' },
imageRepos+:: { kubeRbacProxy: 'quay.io/brancz/kube-rbac-proxy' },
tlsCipherSuites: [
'TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256', // required by h2: http://golang.org/cl/30721
@ -143,6 +147,8 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
'TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305',
],
runbookURLPattern: 'https://github.com/prometheus-operator/kube-prometheus/wiki/%s',
cadvisorSelector: 'job="kubelet", metrics_path="/metrics/cadvisor"',
kubeletSelector: 'job="kubelet", metrics_path="/metrics"',
kubeStateMetricsSelector: 'job="kube-state-metrics"',
@ -155,6 +161,8 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
coreDNSSelector: 'job="kube-dns"',
podLabel: 'pod',
alertmanagerName: '{{ $labels.namespace }}/{{ $labels.pod}}',
alertmanagerClusterLabels: 'namespace,service',
alertmanagerSelector: 'job="alertmanager-' + $._config.alertmanager.name + '",namespace="' + $._config.namespace + '"',
prometheusSelector: 'job="prometheus-' + $._config.prometheus.name + '",namespace="' + $._config.namespace + '"',
prometheusName: '{{$labels.namespace}}/{{$labels.pod}}',
@ -191,13 +199,7 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
limits: { cpu: '250m', memory: '180Mi' },
},
},
prometheus+:: {
rules: $.prometheusRules + $.prometheusAlerts,
},
grafana+:: {
dashboards: $.grafanaDashboards,
},
prometheus+:: { rules: $.prometheusRules + $.prometheusAlerts },
grafana+:: { dashboards: $.grafanaDashboards },
},
}

View File

@ -1,8 +1,3 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local deployment = k.apps.v1.deployment;
local container = deployment.mixin.spec.template.spec.containersType;
local containerPort = container.portsType;
{
local krp = self,
config+:: {
@ -33,17 +28,24 @@ local containerPort = container.portsType;
spec+: {
template+: {
spec+: {
containers+: [
container.new(krp.config.kubeRbacProxy.name, krp.config.kubeRbacProxy.image) +
container.mixin.securityContext.withRunAsUser(65534) +
container.withArgs([
containers+: [{
name: krp.config.kubeRbacProxy.name,
image: krp.config.kubeRbacProxy.image,
args: [
'--logtostderr',
'--secure-listen-address=' + krp.config.kubeRbacProxy.secureListenAddress,
'--tls-cipher-suites=' + std.join(',', krp.config.kubeRbacProxy.tlsCipherSuites),
'--upstream=' + krp.config.kubeRbacProxy.upstream,
]) +
container.withPorts(containerPort.newNamed(krp.config.kubeRbacProxy.securePort, krp.config.kubeRbacProxy.securePortName)),
],
ports: [
{ name: krp.config.kubeRbacProxy.securePortName, containerPort: krp.config.kubeRbacProxy.securePort },
],
securityContext: {
runAsUser: 65532,
runAsGroup: 65532,
runAsNonRoot: true,
},
}],
},
},
},

View File

@ -15,7 +15,7 @@ local ksm = import 'github.com/kubernetes/kube-state-metrics/jsonnet/kube-state-
},
},
kubeStateMetrics+::
ksm + {
ksm {
local version = self.version,
name:: 'kube-state-metrics',
namespace:: $._config.namespace,

View File

@ -5,13 +5,13 @@ local imageName(image) =
local parts = std.split(image, '/');
local len = std.length(parts);
if len == 3 then
# registry.com/org/image
// registry.com/org/image
parts[2]
else if len == 2 then
# org/image
// org/image
parts[1]
else if len == 1 then
# image, ie. busybox
// image, ie. busybox
parts[0]
else
error 'unknown image format: ' + image;

View File

@ -1,16 +1,8 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
{
_config+:: {
namespace: 'default',
versions+:: {
nodeExporter: 'v1.0.1',
},
imageRepos+:: {
nodeExporter: 'quay.io/prometheus/node-exporter',
},
versions+:: { nodeExporter: 'v1.0.1' },
imageRepos+:: { nodeExporter: 'quay.io/prometheus/node-exporter' },
nodeExporter+:: {
listenAddress: '127.0.0.1',
@ -28,76 +20,49 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
},
nodeExporter+:: {
clusterRoleBinding:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
clusterRoleBinding: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRoleBinding',
metadata: {
name: 'node-exporter',
},
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'ClusterRole',
name: 'node-exporter',
},
subjects: [{
kind: 'ServiceAccount',
name: 'node-exporter',
namespace: $._config.namespace,
}],
},
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withName('node-exporter') +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName('node-exporter') +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'node-exporter', namespace: $._config.namespace }]),
clusterRole:
local clusterRole = k.rbac.v1.clusterRole;
local policyRule = clusterRole.rulesType;
local authenticationRole = policyRule.new() +
policyRule.withApiGroups(['authentication.k8s.io']) +
policyRule.withResources([
'tokenreviews',
]) +
policyRule.withVerbs(['create']);
local authorizationRole = policyRule.new() +
policyRule.withApiGroups(['authorization.k8s.io']) +
policyRule.withResources([
'subjectaccessreviews',
]) +
policyRule.withVerbs(['create']);
local rules = [authenticationRole, authorizationRole];
clusterRole.new() +
clusterRole.mixin.metadata.withName('node-exporter') +
clusterRole.withRules(rules),
clusterRole: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRole',
metadata: {
name: 'node-exporter',
},
rules: [
{
apiGroups: ['authentication.k8s.io'],
resources: ['tokenreviews'],
verbs: ['create'],
},
{
apiGroups: ['authorization.k8s.io'],
resources: ['subjectaccessreviews'],
verbs: ['create'],
},
],
},
daemonset:
local daemonset = k.apps.v1.daemonSet;
local container = daemonset.mixin.spec.template.spec.containersType;
local volume = daemonset.mixin.spec.template.spec.volumesType;
local containerPort = container.portsType;
local containerVolumeMount = container.volumeMountsType;
local podSelector = daemonset.mixin.spec.template.spec.selectorType;
local toleration = daemonset.mixin.spec.template.spec.tolerationsType;
local containerEnv = container.envType;
local podLabels = $._config.nodeExporter.labels;
local selectorLabels = $._config.nodeExporter.selectorLabels;
local existsToleration = toleration.new() +
toleration.withOperator('Exists');
local procVolumeName = 'proc';
local procVolume = volume.fromHostPath(procVolumeName, '/proc');
local procVolumeMount = containerVolumeMount.new(procVolumeName, '/host/proc').
withMountPropagation('HostToContainer').
withReadOnly(true);
local sysVolumeName = 'sys';
local sysVolume = volume.fromHostPath(sysVolumeName, '/sys');
local sysVolumeMount = containerVolumeMount.new(sysVolumeName, '/host/sys').
withMountPropagation('HostToContainer').
withReadOnly(true);
local rootVolumeName = 'root';
local rootVolume = volume.fromHostPath(rootVolumeName, '/');
local rootVolumeMount = containerVolumeMount.new(rootVolumeName, '/host/root').
withMountPropagation('HostToContainer').
withReadOnly(true);
local nodeExporter =
container.new('node-exporter', $._config.imageRepos.nodeExporter + ':' + $._config.versions.nodeExporter) +
container.withArgs([
local nodeExporter = {
name: 'node-exporter',
image: $._config.imageRepos.nodeExporter + ':' + $._config.versions.nodeExporter,
args: [
'--web.listen-address=' + std.join(':', [$._config.nodeExporter.listenAddress, std.toString($._config.nodeExporter.port)]),
'--path.procfs=/host/proc',
'--path.sysfs=/host/sys',
@ -105,20 +70,27 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
'--no-collector.wifi',
'--no-collector.hwmon',
'--collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)',
]) +
container.withVolumeMounts([procVolumeMount, sysVolumeMount, rootVolumeMount]) +
container.mixin.resources.withRequests($._config.resources['node-exporter'].requests) +
container.mixin.resources.withLimits($._config.resources['node-exporter'].limits);
],
volumeMounts: [
{ name: 'proc', mountPath: '/host/proc', mountPropagation: 'HostToContainer', readOnly: true },
{ name: 'sys', mountPath: '/host/sys', mountPropagation: 'HostToContainer', readOnly: true },
{ name: 'root', mountPath: '/host/root', mountPropagation: 'HostToContainer', readOnly: true },
],
resources: $._config.resources['node-exporter'],
};
local ip = containerEnv.fromFieldPath('IP', 'status.podIP');
local proxy =
container.new('kube-rbac-proxy', $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy) +
container.withArgs([
local proxy = {
name: 'kube-rbac-proxy',
image: $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy,
args: [
'--logtostderr',
'--secure-listen-address=[$(IP)]:' + $._config.nodeExporter.port,
'--tls-cipher-suites=' + std.join(',', $._config.tlsCipherSuites),
'--upstream=http://127.0.0.1:' + $._config.nodeExporter.port + '/',
]) +
],
env: [
{ name: 'IP', valueFrom: { fieldRef: { fieldPath: 'status.podIP' } } },
],
// Keep `hostPort` here, rather than in the node-exporter container
// because Kubernetes mandates that if you define a `hostPort` then
// `containerPort` must match. In our case, we are splitting the
@ -127,38 +99,66 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
// used by the service is tied to the proxy container. We *could*
// forgo declaring the host port, however it is important to declare
// it so that the scheduler can decide if the pod is schedulable.
container.withPorts(containerPort.new($._config.nodeExporter.port) + containerPort.withHostPort($._config.nodeExporter.port) + containerPort.withName('https')) +
container.mixin.resources.withRequests($._config.resources['kube-rbac-proxy'].requests) +
container.mixin.resources.withLimits($._config.resources['kube-rbac-proxy'].limits) +
container.withEnv([ip]);
ports: [
{ name: 'https', containerPort: $._config.nodeExporter.port, hostPort: $._config.nodeExporter.port },
],
resources: $._config.resources['kube-rbac-proxy'],
securityContext: {
runAsUser: 65532,
runAsGroup: 65532,
runAsNonRoot: true,
},
};
local c = [nodeExporter, proxy];
daemonset.new() +
daemonset.mixin.metadata.withName('node-exporter') +
daemonset.mixin.metadata.withNamespace($._config.namespace) +
daemonset.mixin.metadata.withLabels(podLabels) +
daemonset.mixin.spec.selector.withMatchLabels(selectorLabels) +
daemonset.mixin.spec.updateStrategy.rollingUpdate.withMaxUnavailable('10%') +
daemonset.mixin.spec.template.metadata.withLabels(podLabels) +
daemonset.mixin.spec.template.spec.withTolerations([existsToleration]) +
daemonset.mixin.spec.template.spec.withNodeSelector({ 'kubernetes.io/os': 'linux' }) +
daemonset.mixin.spec.template.spec.withContainers(c) +
daemonset.mixin.spec.template.spec.withVolumes([procVolume, sysVolume, rootVolume]) +
daemonset.mixin.spec.template.spec.securityContext.withRunAsNonRoot(true) +
daemonset.mixin.spec.template.spec.securityContext.withRunAsUser(65534) +
daemonset.mixin.spec.template.spec.withServiceAccountName('node-exporter') +
daemonset.mixin.spec.template.spec.withHostPid(true) +
daemonset.mixin.spec.template.spec.withHostNetwork(true),
serviceAccount:
local serviceAccount = k.core.v1.serviceAccount;
serviceAccount.new('node-exporter') +
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
serviceMonitor:
{
apiVersion: 'apps/v1',
kind: 'DaemonSet',
metadata: {
name: 'node-exporter',
namespace: $._config.namespace,
labels: $._config.nodeExporter.labels,
},
spec: {
selector: { matchLabels: $._config.nodeExporter.selectorLabels },
updateStrategy: {
type: 'RollingUpdate',
rollingUpdate: { maxUnavailable: '10%' },
},
template: {
metadata: { labels: $._config.nodeExporter.labels },
spec: {
nodeSelector: { 'kubernetes.io/os': 'linux' },
tolerations: [{
operator: 'Exists',
}],
containers: [nodeExporter, proxy],
volumes: [
{ name: 'proc', hostPath: { path: '/proc' } },
{ name: 'sys', hostPath: { path: '/sys' } },
{ name: 'root', hostPath: { path: '/' } },
],
serviceAccountName: 'node-exporter',
securityContext: {
runAsUser: 65534,
runAsNonRoot: true,
},
hostPID: true,
hostNetwork: true,
},
},
},
},
serviceAccount: {
apiVersion: 'v1',
kind: 'ServiceAccount',
metadata: {
name: 'node-exporter',
namespace: $._config.namespace,
},
},
serviceMonitor: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
@ -171,8 +171,7 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
selector: {
matchLabels: $._config.nodeExporter.selectorLabels,
},
endpoints: [
{
endpoints: [{
port: 'https',
scheme: 'https',
interval: '15s',
@ -189,20 +188,25 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
tlsConfig: {
insecureSkipVerify: true,
},
}],
},
},
service: {
apiVersion: 'v1',
kind: 'Service',
metadata: {
name: 'node-exporter',
namespace: $._config.namespace,
labels: $._config.nodeExporter.labels,
},
spec: {
ports: [
{ name: 'https', targetPort: 'https', port: $._config.nodeExporter.port },
],
selector: $._config.nodeExporter.selectorLabels,
clusterIP: 'None',
},
},
service:
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local nodeExporterPort = servicePort.newNamed('https', $._config.nodeExporter.port, 'https');
service.new('node-exporter', $._config.nodeExporter.selectorLabels, nodeExporterPort) +
service.mixin.metadata.withNamespace($._config.namespace) +
service.mixin.metadata.withLabels($._config.nodeExporter.labels) +
service.mixin.spec.withClusterIp('None'),
},
}

View File

@ -1,19 +1,13 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
{
_config+:: {
namespace: 'default',
versions+:: {
prometheusAdapter: 'v0.7.0',
},
imageRepos+:: {
prometheusAdapter: 'directxman12/k8s-prometheus-adapter',
},
versions+:: { prometheusAdapter: 'v0.8.2' },
imageRepos+:: { prometheusAdapter: 'directxman12/k8s-prometheus-adapter' },
prometheusAdapter+:: {
name: 'prometheus-adapter',
namespace: $._config.namespace,
labels: { name: $._config.prometheusAdapter.name },
prometheusURL: 'http://prometheus-' + $._config.prometheus.name + '.' + $._config.namespace + '.svc.cluster.local:9090/',
config: {
@ -23,46 +17,33 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
nodeQuery: 'sum(1 - irate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)',
resources: {
overrides: {
node: {
resource: 'node'
},
namespace: {
resource: 'namespace'
},
pod: {
resource: 'pod'
node: { resource: 'node' },
namespace: { resource: 'namespace' },
pod: { resource: 'pod' },
},
},
},
containerLabel: 'container'
containerLabel: 'container',
},
memory: {
containerQuery: 'sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>)',
nodeQuery: 'sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>)',
resources: {
overrides: {
instance: {
resource: 'node'
},
namespace: {
resource: 'namespace'
},
pod: {
resource: 'pod'
instance: { resource: 'node' },
namespace: { resource: 'namespace' },
pod: { resource: 'pod' },
},
},
},
containerLabel: 'container'
containerLabel: 'container',
},
window: '5m',
},
}
},
},
},
prometheusAdapter+:: {
apiService:
{
apiService: {
apiVersion: 'apiregistration.k8s.io/v1',
kind: 'APIService',
metadata: {
@ -71,7 +52,7 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
spec: {
service: {
name: $.prometheusAdapter.service.metadata.name,
namespace: $._config.namespace,
namespace: $._config.prometheusAdapter.namespace,
},
group: 'metrics.k8s.io',
version: 'v1beta1',
@ -81,19 +62,22 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
},
},
configMap:
local configmap = k.core.v1.configMap;
configmap.new('adapter-config', { 'config.yaml': std.manifestYamlDoc($._config.prometheusAdapter.config) }) +
configMap: {
apiVersion: 'v1',
kind: 'ConfigMap',
metadata: {
name: 'adapter-config',
namespace: $._config.prometheusAdapter.namespace,
},
data: { 'config.yaml': std.manifestYamlDoc($._config.prometheusAdapter.config) },
},
configmap.mixin.metadata.withNamespace($._config.namespace),
serviceMonitor:
{
serviceMonitor: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: $._config.prometheusAdapter.name,
namespace: $._config.namespace,
namespace: $._config.prometheusAdapter.namespace,
labels: $._config.prometheusAdapter.labels,
},
spec: {
@ -114,148 +98,180 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
},
},
service:
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
service.new(
$._config.prometheusAdapter.name,
$._config.prometheusAdapter.labels,
servicePort.newNamed('https', 443, 6443),
) +
service.mixin.metadata.withNamespace($._config.namespace) +
service.mixin.metadata.withLabels($._config.prometheusAdapter.labels),
service: {
apiVersion: 'v1',
kind: 'Service',
metadata: {
name: $._config.prometheusAdapter.name,
namespace: $._config.prometheusAdapter.namespace,
labels: $._config.prometheusAdapter.labels,
},
spec: {
ports: [
{ name: 'https', targetPort: 6443, port: 443 },
],
selector: $._config.prometheusAdapter.labels,
},
},
deployment:
local deployment = k.apps.v1.deployment;
local volume = deployment.mixin.spec.template.spec.volumesType;
local container = deployment.mixin.spec.template.spec.containersType;
local containerVolumeMount = container.volumeMountsType;
local c =
container.new($._config.prometheusAdapter.name, $._config.imageRepos.prometheusAdapter + ':' + $._config.versions.prometheusAdapter) +
container.withArgs([
local c = {
name: $._config.prometheusAdapter.name,
image: $._config.imageRepos.prometheusAdapter + ':' + $._config.versions.prometheusAdapter,
args: [
'--cert-dir=/var/run/serving-cert',
'--config=/etc/adapter/config.yaml',
'--logtostderr=true',
'--metrics-relist-interval=1m',
'--prometheus-url=' + $._config.prometheusAdapter.prometheusURL,
'--secure-port=6443',
]) +
container.withPorts([{ containerPort: 6443 }]) +
container.withVolumeMounts([
containerVolumeMount.new('tmpfs', '/tmp'),
containerVolumeMount.new('volume-serving-cert', '/var/run/serving-cert'),
containerVolumeMount.new('config', '/etc/adapter'),
],);
],
ports: [{ containerPort: 6443 }],
volumeMounts: [
{ name: 'tmpfs', mountPath: '/tmp', readOnly: false },
{ name: 'volume-serving-cert', mountPath: '/var/run/serving-cert', readOnly: false },
{ name: 'config', mountPath: '/etc/adapter', readOnly: false },
],
};
deployment.new($._config.prometheusAdapter.name, 1, c, $._config.prometheusAdapter.labels) +
deployment.mixin.metadata.withNamespace($._config.namespace) +
deployment.mixin.spec.selector.withMatchLabels($._config.prometheusAdapter.labels) +
deployment.mixin.spec.template.spec.withServiceAccountName($.prometheusAdapter.serviceAccount.metadata.name) +
deployment.mixin.spec.template.spec.withNodeSelector({ 'kubernetes.io/os': 'linux' }) +
deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(1) +
deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(0) +
deployment.mixin.spec.template.spec.withVolumes([
volume.fromEmptyDir(name='tmpfs'),
volume.fromEmptyDir(name='volume-serving-cert'),
{
apiVersion: 'apps/v1',
kind: 'Deployment',
metadata: {
name: $._config.prometheusAdapter.name,
namespace: $._config.prometheusAdapter.namespace,
},
spec: {
replicas: 1,
selector: { matchLabels: $._config.prometheusAdapter.labels },
strategy: {
rollingUpdate: {
maxSurge: 1,
maxUnavailable: 0,
},
},
template: {
metadata: { labels: $._config.prometheusAdapter.labels },
spec: {
containers: [c],
serviceAccountName: $.prometheusAdapter.serviceAccount.metadata.name,
nodeSelector: { 'kubernetes.io/os': 'linux' },
volumes: [
{ name: 'tmpfs', emptyDir: {} },
{ name: 'volume-serving-cert', emptyDir: {} },
{ name: 'config', configMap: { name: 'adapter-config' } },
]),
],
},
},
},
},
serviceAccount:
local serviceAccount = k.core.v1.serviceAccount;
serviceAccount: {
apiVersion: 'v1',
kind: 'ServiceAccount',
metadata: {
name: $._config.prometheusAdapter.name,
namespace: $._config.prometheusAdapter.namespace,
},
},
serviceAccount.new($._config.prometheusAdapter.name) +
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
clusterRole: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRole',
metadata: {
name: $._config.prometheusAdapter.name,
},
rules: [{
apiGroups: [''],
resources: ['nodes', 'namespaces', 'pods', 'services'],
verbs: ['get', 'list', 'watch'],
}],
},
clusterRole:
local clusterRole = k.rbac.v1.clusterRole;
local policyRule = clusterRole.rulesType;
local rules =
policyRule.new() +
policyRule.withApiGroups(['']) +
policyRule.withResources(['nodes', 'namespaces', 'pods', 'services']) +
policyRule.withVerbs(['get', 'list', 'watch']);
clusterRole.new() +
clusterRole.mixin.metadata.withName($._config.prometheusAdapter.name) +
clusterRole.withRules(rules),
clusterRoleBinding:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withName($._config.prometheusAdapter.name) +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName($.prometheusAdapter.clusterRole.metadata.name) +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{
clusterRoleBinding: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRoleBinding',
metadata: {
name: $._config.prometheusAdapter.name,
},
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'ClusterRole',
name: $.prometheusAdapter.clusterRole.metadata.name,
},
subjects: [{
kind: 'ServiceAccount',
name: $.prometheusAdapter.serviceAccount.metadata.name,
namespace: $._config.namespace,
}]),
namespace: $._config.prometheusAdapter.namespace,
}],
},
clusterRoleBindingDelegator:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withName('resource-metrics:system:auth-delegator') +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName('system:auth-delegator') +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{
clusterRoleBindingDelegator: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRoleBinding',
metadata: {
name: 'resource-metrics:system:auth-delegator',
},
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'ClusterRole',
name: 'system:auth-delegator',
},
subjects: [{
kind: 'ServiceAccount',
name: $.prometheusAdapter.serviceAccount.metadata.name,
namespace: $._config.namespace,
}]),
namespace: $._config.prometheusAdapter.namespace,
}],
},
clusterRoleServerResources:
local clusterRole = k.rbac.v1.clusterRole;
local policyRule = clusterRole.rulesType;
clusterRoleServerResources: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRole',
metadata: {
name: 'resource-metrics-server-resources',
},
rules: [{
apiGroups: ['metrics.k8s.io'],
resources: ['*'],
verbs: ['*'],
}],
},
local rules =
policyRule.new() +
policyRule.withApiGroups(['metrics.k8s.io']) +
policyRule.withResources(['*']) +
policyRule.withVerbs(['*']);
clusterRoleAggregatedMetricsReader: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRole',
metadata: {
name: 'system:aggregated-metrics-reader',
labels: {
'rbac.authorization.k8s.io/aggregate-to-admin': 'true',
'rbac.authorization.k8s.io/aggregate-to-edit': 'true',
'rbac.authorization.k8s.io/aggregate-to-view': 'true',
},
},
rules: [{
apiGroups: ['metrics.k8s.io'],
resources: ['pods', 'nodes'],
verbs: ['get', 'list', 'watch'],
}],
},
clusterRole.new() +
clusterRole.mixin.metadata.withName('resource-metrics-server-resources') +
clusterRole.withRules(rules),
clusterRoleAggregatedMetricsReader:
local clusterRole = k.rbac.v1.clusterRole;
local policyRule = clusterRole.rulesType;
local rules =
policyRule.new() +
policyRule.withApiGroups(['metrics.k8s.io']) +
policyRule.withResources(['pods', 'nodes']) +
policyRule.withVerbs(['get','list','watch']);
clusterRole.new() +
clusterRole.mixin.metadata.withName('system:aggregated-metrics-reader') +
clusterRole.mixin.metadata.withLabels({
"rbac.authorization.k8s.io/aggregate-to-admin": "true",
"rbac.authorization.k8s.io/aggregate-to-edit": "true",
"rbac.authorization.k8s.io/aggregate-to-view": "true",
}) +
clusterRole.withRules(rules),
roleBindingAuthReader:
local roleBinding = k.rbac.v1.roleBinding;
roleBinding.new() +
roleBinding.mixin.metadata.withName('resource-metrics-auth-reader') +
roleBinding.mixin.metadata.withNamespace('kube-system') +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withName('extension-apiserver-authentication-reader') +
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
roleBinding.withSubjects([{
roleBindingAuthReader: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'RoleBinding',
metadata: {
name: 'resource-metrics-auth-reader',
namespace: 'kube-system',
},
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'Role',
name: 'extension-apiserver-authentication-reader',
},
subjects: [{
kind: 'ServiceAccount',
name: $.prometheusAdapter.serviceAccount.metadata.name,
namespace: $._config.namespace,
}]),
namespace: $._config.prometheusAdapter.namespace,
}],
},
},
}

View File

@ -1,21 +1,12 @@
local k3 = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.3/k.libsonnet';
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local relabelings = import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet';
{
_config+:: {
namespace: 'default',
versions+:: {
prometheus: 'v2.20.0',
},
imageRepos+:: {
prometheus: 'quay.io/prometheus/prometheus',
},
alertmanager+:: {
name: 'main',
},
versions+:: { prometheus: 'v2.22.1' },
imageRepos+:: { prometheus: 'quay.io/prometheus/prometheus' },
alertmanager+:: { name: 'main' },
prometheus+:: {
name: 'k8s',
@ -35,24 +26,33 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
prometheusRules:: $._config.prometheus.rules,
alertmanagerName:: $.alertmanager.service.metadata.name,
serviceAccount:
local serviceAccount = k.core.v1.serviceAccount;
serviceAccount: {
apiVersion: 'v1',
kind: 'ServiceAccount',
metadata: {
name: 'prometheus-' + p.name,
namespace: p.namespace,
},
},
serviceAccount.new('prometheus-' + p.name) +
serviceAccount.mixin.metadata.withNamespace(p.namespace),
service:
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
service: {
apiVersion: 'v1',
kind: 'Service',
metadata: {
name: 'prometheus-' + p.name,
namespace: p.namespace,
labels: { prometheus: p.name },
},
spec: {
ports: [
{ name: 'web', targetPort: 'web', port: 9090 },
],
selector: { app: 'prometheus', prometheus: p.name },
sessionAffinity: 'ClientIP',
},
},
local prometheusPort = servicePort.newNamed('web', 9090, 'web');
service.new('prometheus-' + p.name, { app: 'prometheus', prometheus: p.name }, prometheusPort) +
service.mixin.spec.withSessionAffinity('ClientIP') +
service.mixin.metadata.withNamespace(p.namespace) +
service.mixin.metadata.withLabels({ prometheus: p.name }),
rules:
{
rules: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'PrometheusRule',
metadata: {
@ -69,117 +69,130 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
},
roleBindingSpecificNamespaces:
local roleBinding = k.rbac.v1.roleBinding;
local newSpecificRoleBinding(namespace) =
roleBinding.new() +
roleBinding.mixin.metadata.withName('prometheus-' + p.name) +
roleBinding.mixin.metadata.withNamespace(namespace) +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withName('prometheus-' + p.name) +
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + p.name, namespace: p.namespace }]);
local roleBindingList = k3.rbac.v1.roleBindingList;
roleBindingList.new([newSpecificRoleBinding(x) for x in p.roleBindingNamespaces]),
clusterRole:
local clusterRole = k.rbac.v1.clusterRole;
local policyRule = clusterRole.rulesType;
local nodeMetricsRule = policyRule.new() +
policyRule.withApiGroups(['']) +
policyRule.withResources(['nodes/metrics']) +
policyRule.withVerbs(['get']);
local metricsRule = policyRule.new() +
policyRule.withNonResourceUrls('/metrics') +
policyRule.withVerbs(['get']);
local rules = [nodeMetricsRule, metricsRule];
clusterRole.new() +
clusterRole.mixin.metadata.withName('prometheus-' + p.name) +
clusterRole.withRules(rules),
roleConfig:
local role = k.rbac.v1.role;
local policyRule = role.rulesType;
local configmapRule = policyRule.new() +
policyRule.withApiGroups(['']) +
policyRule.withResources([
'configmaps',
]) +
policyRule.withVerbs(['get']);
role.new() +
role.mixin.metadata.withName('prometheus-' + p.name + '-config') +
role.mixin.metadata.withNamespace(p.namespace) +
role.withRules(configmapRule),
roleBindingConfig:
local roleBinding = k.rbac.v1.roleBinding;
roleBinding.new() +
roleBinding.mixin.metadata.withName('prometheus-' + p.name + '-config') +
roleBinding.mixin.metadata.withNamespace(p.namespace) +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withName('prometheus-' + p.name + '-config') +
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + p.name, namespace: p.namespace }]),
clusterRoleBinding:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withName('prometheus-' + p.name) +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName('prometheus-' + p.name) +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + p.name, namespace: p.namespace }]),
roleSpecificNamespaces:
local role = k.rbac.v1.role;
local policyRule = role.rulesType;
local coreRule = policyRule.new() +
policyRule.withApiGroups(['']) +
policyRule.withResources([
'services',
'endpoints',
'pods',
]) +
policyRule.withVerbs(['get', 'list', 'watch']);
local ingressRule = policyRule.new() +
policyRule.withApiGroups(['extensions']) +
policyRule.withResources([
'ingresses',
]) +
policyRule.withVerbs(['get', 'list', 'watch']);
local newSpecificRole(namespace) =
role.new() +
role.mixin.metadata.withName('prometheus-' + p.name) +
role.mixin.metadata.withNamespace(namespace) +
role.withRules([coreRule, ingressRule]);
local roleList = k3.rbac.v1.roleList;
roleList.new([newSpecificRole(x) for x in p.roleBindingNamespaces]),
prometheus:
local statefulSet = k.apps.v1.statefulSet;
local container = statefulSet.mixin.spec.template.spec.containersType;
local resourceRequirements = container.mixin.resourcesType;
local selector = statefulSet.mixin.spec.selectorType;
local resources =
resourceRequirements.new() +
resourceRequirements.withRequests({ memory: '400Mi' });
local newSpecificRoleBinding(namespace) = {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'RoleBinding',
metadata: {
name: 'prometheus-' + p.name,
namespace: namespace,
},
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'Role',
name: 'prometheus-' + p.name,
},
subjects: [{
kind: 'ServiceAccount',
name: 'prometheus-' + p.name,
namespace: p.namespace,
}],
};
{
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'RoleBindingList',
items: [newSpecificRoleBinding(x) for x in p.roleBindingNamespaces],
},
clusterRole: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRole',
metadata: { name: 'prometheus-' + p.name },
rules: [
{
apiGroups: [''],
resources: ['nodes/metrics'],
verbs: ['get'],
},
{
nonResourceURLs: ['/metrics'],
verbs: ['get'],
},
],
},
roleConfig: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'Role',
metadata: {
name: 'prometheus-' + p.name + '-config',
namespace: p.namespace,
},
rules: [{
apiGroups: [''],
resources: ['configmaps'],
verbs: ['get'],
}],
},
roleBindingConfig: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'RoleBinding',
metadata: {
name: 'prometheus-' + p.name + '-config',
namespace: p.namespace,
},
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'Role',
name: 'prometheus-' + p.name + '-config',
},
subjects: [{
kind: 'ServiceAccount',
name: 'prometheus-' + p.name,
namespace: p.namespace,
}],
},
clusterRoleBinding: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRoleBinding',
metadata: { name: 'prometheus-' + p.name },
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'ClusterRole',
name: 'prometheus-' + p.name,
},
subjects: [{
kind: 'ServiceAccount',
name: 'prometheus-' + p.name,
namespace: p.namespace,
}],
},
roleSpecificNamespaces:
local newSpecificRole(namespace) = {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'Role',
metadata: {
name: 'prometheus-' + p.name,
namespace: namespace,
},
rules: [
{
apiGroups: [''],
resources: ['services', 'endpoints', 'pods'],
verbs: ['get', 'list', 'watch'],
},
{
apiGroups: ['extensions'],
resources: ['ingresses'],
verbs: ['get', 'list', 'watch'],
},
],
};
{
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'RoleList',
items: [newSpecificRole(x) for x in p.roleBindingNamespaces],
},
prometheus: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'Prometheus',
metadata: {
name: p.name,
namespace: p.namespace,
labels: {
prometheus: p.name,
},
labels: { prometheus: p.name },
},
spec: {
replicas: p.replicas,
@ -193,19 +206,21 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
podMonitorNamespaceSelector: {},
probeNamespaceSelector: {},
nodeSelector: { 'kubernetes.io/os': 'linux' },
ruleSelector: selector.withMatchLabels({
ruleSelector: {
matchLabels: {
role: 'alert-rules',
prometheus: p.name,
}),
resources: resources,
},
},
resources: {
requests: { memory: '400Mi' },
},
alerting: {
alertmanagers: [
{
alertmanagers: [{
namespace: p.namespace,
name: p.alertmanagerName,
port: 'web',
},
],
}],
},
securityContext: {
runAsUser: 1000,
@ -214,77 +229,59 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
},
},
},
serviceMonitor:
{
serviceMonitor: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'prometheus',
namespace: p.namespace,
labels: {
'k8s-app': 'prometheus',
},
labels: { 'k8s-app': 'prometheus' },
},
spec: {
selector: {
matchLabels: {
prometheus: p.name,
matchLabels: { prometheus: p.name },
},
},
endpoints: [
{
endpoints: [{
port: 'web',
interval: '30s',
},
],
}],
},
},
serviceMonitorKubeScheduler:
{
serviceMonitorKubeScheduler: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'kube-scheduler',
namespace: p.namespace,
labels: {
'k8s-app': 'kube-scheduler',
},
labels: { 'k8s-app': 'kube-scheduler' },
},
spec: {
jobLabel: 'k8s-app',
endpoints: [
{
endpoints: [{
port: 'https-metrics',
interval: '30s',
scheme: "https",
bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token",
tlsConfig: {
insecureSkipVerify: true
}
},
],
scheme: 'https',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
tlsConfig: { insecureSkipVerify: true },
}],
selector: {
matchLabels: {
'k8s-app': 'kube-scheduler',
},
matchLabels: { 'k8s-app': 'kube-scheduler' },
},
namespaceSelector: {
matchNames: [
'kube-system',
],
matchNames: ['kube-system'],
},
},
},
serviceMonitorKubelet:
{
serviceMonitorKubelet: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'kubelet',
namespace: p.namespace,
labels: {
'k8s-app': 'kubelet',
},
labels: { 'k8s-app': 'kubelet' },
},
spec: {
jobLabel: 'k8s-app',
@ -294,17 +291,13 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
scheme: 'https',
interval: '30s',
honorLabels: true,
tlsConfig: {
insecureSkipVerify: true,
},
tlsConfig: { insecureSkipVerify: true },
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
metricRelabelings: (import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet'),
relabelings: [
{
metricRelabelings: relabelings,
relabelings: [{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path',
},
],
}],
},
{
port: 'https-metrics',
@ -317,12 +310,10 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
insecureSkipVerify: true,
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
relabelings: [{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path',
},
],
}],
metricRelabelings: [
// Drop a bunch of metrics which are disabled but still sent, see
// https://github.com/google/cadvisor/issues/1925.
@ -339,83 +330,65 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
path: '/metrics/probes',
interval: '30s',
honorLabels: true,
tlsConfig: {
insecureSkipVerify: true,
},
tlsConfig: { insecureSkipVerify: true },
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
relabelings: [{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path',
},
],
}],
},
],
selector: {
matchLabels: {
'k8s-app': 'kubelet',
},
matchLabels: { 'k8s-app': 'kubelet' },
},
namespaceSelector: {
matchNames: [
'kube-system',
],
matchNames: ['kube-system'],
},
},
},
serviceMonitorKubeControllerManager:
{
serviceMonitorKubeControllerManager: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'kube-controller-manager',
namespace: p.namespace,
labels: {
'k8s-app': 'kube-controller-manager',
},
labels: { 'k8s-app': 'kube-controller-manager' },
},
spec: {
jobLabel: 'k8s-app',
endpoints: [
{
endpoints: [{
port: 'https-metrics',
interval: '30s',
scheme: "https",
bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token",
scheme: 'https',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
tlsConfig: {
insecureSkipVerify: true
insecureSkipVerify: true,
},
metricRelabelings: (import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet') + [
metricRelabelings: relabelings + [
{
sourceLabels: ['__name__'],
regex: 'etcd_(debugging|disk|request|server).*',
action: 'drop',
},
],
},
],
}],
selector: {
matchLabels: {
'k8s-app': 'kube-controller-manager',
},
matchLabels: { 'k8s-app': 'kube-controller-manager' },
},
namespaceSelector: {
matchNames: [
'kube-system',
],
matchNames: ['kube-system'],
},
},
},
serviceMonitorApiserver:
{
serviceMonitorApiserver: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'kube-apiserver',
namespace: p.namespace,
labels: {
'k8s-app': 'apiserver',
},
labels: { 'k8s-app': 'apiserver' },
},
spec: {
jobLabel: 'component',
@ -426,12 +399,9 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
},
},
namespaceSelector: {
matchNames: [
'default',
],
matchNames: ['default'],
},
endpoints: [
{
endpoints: [{
port: 'https',
interval: '30s',
scheme: 'https',
@ -440,7 +410,7 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
serverName: 'kubernetes',
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
metricRelabelings: (import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet') + [
metricRelabelings: relabelings + [
{
sourceLabels: ['__name__'],
regex: 'etcd_(debugging|disk|server).*',
@ -462,40 +432,31 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
action: 'drop',
},
],
},
],
}],
},
},
serviceMonitorCoreDNS:
{
serviceMonitorCoreDNS: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'coredns',
namespace: p.namespace,
labels: {
'k8s-app': 'coredns',
},
labels: { 'k8s-app': 'coredns' },
},
spec: {
jobLabel: 'k8s-app',
selector: {
matchLabels: {
'k8s-app': 'kube-dns',
},
matchLabels: { 'k8s-app': 'kube-dns' },
},
namespaceSelector: {
matchNames: [
'kube-system',
],
matchNames: ['kube-system'],
},
endpoints: [
{
endpoints: [{
port: 'metrics',
interval: '15s',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
},
],
}],
},
},
},

View File

@ -5,7 +5,7 @@
name: 'kube-prometheus-node-recording.rules',
rules: [
{
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance)',
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) BY (instance)',
record: 'instance:node_cpu:rate:sum',
},
{
@ -17,11 +17,11 @@
record: 'instance:node_network_transmit_bytes:rate:sum',
},
{
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)',
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)',
record: 'instance:node_cpu:ratio',
},
{
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m]))',
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))',
record: 'cluster:node_cpu:sum_rate5m',
},
{

View File

@ -88,6 +88,20 @@
},
'for': '5m',
},
{
alert: 'PrometheusOperatorRejectedResources',
expr: |||
min_over_time(prometheus_operator_managed_resources{state="rejected",%(prometheusOperatorSelector)s}[5m]) > 0
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'Prometheus operator in {{ $labels.namespace }} namespace rejected {{ printf "%0.0f" $value }} {{ $labels.controller }}/{{ $labels.resource }} resources.',
summary: 'Resources rejected by Prometheus operator',
},
'for': '5m',
},
],
},
],

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -15,14 +15,12 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
},
versions+:: {
prometheusOperator: 'v0.42.1',
prometheusOperator: 'v0.44.1',
prometheusConfigReloader: self.prometheusOperator,
configmapReloader: 'v0.4.0',
},
imageRepos+:: {
prometheusOperator: 'quay.io/prometheus-operator/prometheus-operator',
configmapReloader: 'jimmidyson/configmap-reload',
prometheusConfigReloader: 'quay.io/prometheus-operator/prometheus-config-reloader',
},
},
@ -36,13 +34,12 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
image:: $._config.imageRepos.prometheusOperator,
version:: $._config.versions.prometheusOperator,
configReloaderImage:: $._config.imageRepos.configmapReloader,
configReloaderVersion:: $._config.versions.configmapReloader,
prometheusConfigReloaderImage:: $._config.imageRepos.prometheusConfigReloader,
prometheusConfigReloaderVersion:: $._config.versions.prometheusConfigReloader,
// Prefixing with 0 to ensure these manifests are listed and therefore created first.
'0alertmanagerCustomResourceDefinition': import 'alertmanager-crd.libsonnet',
'0alertmanagerConfigCustomResourceDefinition': import 'alertmanagerconfig-crd.libsonnet',
'0prometheusCustomResourceDefinition': import 'prometheus-crd.libsonnet',
'0servicemonitorCustomResourceDefinition': import 'servicemonitor-crd.libsonnet',
'0podmonitorCustomResourceDefinition': import 'podmonitor-crd.libsonnet',
@ -70,6 +67,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
policyRule.withResources([
'alertmanagers',
'alertmanagers/finalizers',
'alertmanagerconfigs',
'prometheuses',
'prometheuses/finalizers',
'thanosrulers',
@ -126,7 +124,15 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
]) +
policyRule.withVerbs(['get', 'list', 'watch']);
local rules = [monitoringRule, appsRule, coreRule, podRule, routingRule, nodeRule, namespaceRule];
local ingressRule = policyRule.new() +
policyRule.withApiGroups(['networking.k8s.io']) +
policyRule.withResources([
'ingresses',
]) +
policyRule.withVerbs(['get', 'list', 'watch']);
local rules = [monitoringRule, appsRule, coreRule, podRule, routingRule, nodeRule, namespaceRule, ingressRule];
clusterRole.new() +
clusterRole.mixin.metadata.withLabels(po.commonLabels) +
@ -145,10 +151,6 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
container.withPorts(containerPort.newNamed(targetPort, 'http')) +
container.withArgs([
'--kubelet-service=kube-system/kubelet',
// Prometheus Operator is run with a read-only root file system. By
// default glog saves logfiles to /tmp. Make it log to stderr instead.
'--logtostderr=true',
'--config-reloader-image=' + po.configReloaderImage + ':' + po.configReloaderVersion,
'--prometheus-config-reloader=' + po.prometheusConfigReloaderImage + ':' + po.prometheusConfigReloaderVersion,
]) +
container.mixin.securityContext.withAllowPrivilegeEscalation(false) +

View File

@ -1 +1 @@
{"apiVersion":"apiextensions.k8s.io/v1","kind":"CustomResourceDefinition","metadata":{"annotations":{"controller-gen.kubebuilder.io/version":"v0.2.4"},"creationTimestamp":null,"name":"prometheusrules.monitoring.coreos.com"},"spec":{"group":"monitoring.coreos.com","names":{"kind":"PrometheusRule","listKind":"PrometheusRuleList","plural":"prometheusrules","singular":"prometheusrule"},"scope":"Namespaced","versions":[{"name":"v1","schema":{"openAPIV3Schema":{"description":"PrometheusRule defines alerting rules for a Prometheus instance","properties":{"apiVersion":{"description":"APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources","type":"string"},"kind":{"description":"Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds","type":"string"},"metadata":{"type":"object"},"spec":{"description":"Specification of desired alerting rule definitions for Prometheus.","properties":{"groups":{"description":"Content of Prometheus rule file","items":{"description":"RuleGroup is a list of sequentially evaluated recording and alerting rules. Note: PartialResponseStrategy is only used by ThanosRuler and will be ignored by Prometheus instances. Valid values for this field are 'warn' or 'abort'. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response","properties":{"interval":{"type":"string"},"name":{"type":"string"},"partial_response_strategy":{"type":"string"},"rules":{"items":{"description":"Rule describes an alerting or recording rule.","properties":{"alert":{"type":"string"},"annotations":{"additionalProperties":{"type":"string"},"type":"object"},"expr":{"anyOf":[{"type":"integer"},{"type":"string"}],"x-kubernetes-int-or-string":true},"for":{"type":"string"},"labels":{"additionalProperties":{"type":"string"},"type":"object"},"record":{"type":"string"}},"required":["expr"],"type":"object"},"type":"array"}},"required":["name","rules"],"type":"object"},"type":"array"}},"type":"object"}},"required":["spec"],"type":"object"}},"served":true,"storage":true}]},"status":{"acceptedNames":{"kind":"","plural":""},"conditions":[],"storedVersions":[]}}
{"apiVersion":"apiextensions.k8s.io/v1","kind":"CustomResourceDefinition","metadata":{"annotations":{"controller-gen.kubebuilder.io/version":"v0.4.1"},"creationTimestamp":null,"name":"prometheusrules.monitoring.coreos.com"},"spec":{"group":"monitoring.coreos.com","names":{"kind":"PrometheusRule","listKind":"PrometheusRuleList","plural":"prometheusrules","singular":"prometheusrule"},"scope":"Namespaced","versions":[{"name":"v1","schema":{"openAPIV3Schema":{"description":"PrometheusRule defines recording and alerting rules for a Prometheus instance","properties":{"apiVersion":{"description":"APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources","type":"string"},"kind":{"description":"Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds","type":"string"},"metadata":{"type":"object"},"spec":{"description":"Specification of desired alerting rule definitions for Prometheus.","properties":{"groups":{"description":"Content of Prometheus rule file","items":{"description":"RuleGroup is a list of sequentially evaluated recording and alerting rules. Note: PartialResponseStrategy is only used by ThanosRuler and will be ignored by Prometheus instances. Valid values for this field are 'warn' or 'abort'. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response","properties":{"interval":{"type":"string"},"name":{"type":"string"},"partial_response_strategy":{"type":"string"},"rules":{"items":{"description":"Rule describes an alerting or recording rule.","properties":{"alert":{"type":"string"},"annotations":{"additionalProperties":{"type":"string"},"type":"object"},"expr":{"anyOf":[{"type":"integer"},{"type":"string"}],"x-kubernetes-int-or-string":true},"for":{"type":"string"},"labels":{"additionalProperties":{"type":"string"},"type":"object"},"record":{"type":"string"}},"required":["expr"],"type":"object"},"type":"array"}},"required":["name","rules"],"type":"object"},"type":"array"}},"type":"object"}},"required":["spec"],"type":"object"}},"served":true,"storage":true}]},"status":{"acceptedNames":{"kind":"","plural":""},"conditions":[],"storedVersions":[]}}

Some files were not shown because too many files have changed in this diff Show More