update monitoring stack
continuous-integration/drone/push Build is passing Details

This commit is contained in:
Tobias Brunner 2020-12-11 21:32:59 +01:00
parent 5ffa599b0c
commit 1464357954
157 changed files with 9537 additions and 2468 deletions

View File

@ -8,8 +8,8 @@
"subdir": "grafana" "subdir": "grafana"
} }
}, },
"version": "d7c1a53462ecd533593c60e5277b92fbf7ea7623", "version": "8024f4fdaeb3a3a7d72f77e2ed87deb92c79aeda",
"sum": "8OnIwMhzWtgoWYHNrDlkzUAMr/CPsWKauYEv0vnH1zs=" "sum": "WXrJQtWuU5lJVc4jXkJGddPMpPP0+4eMcIB5cauZGgM="
}, },
{ {
"source": { "source": {
@ -18,8 +18,8 @@
"subdir": "Documentation/etcd-mixin" "subdir": "Documentation/etcd-mixin"
} }
}, },
"version": "e42127658c910d91e7902be958f12d41ac33d54f", "version": "ca866c02422ff3f3d1f0876898a30c33dd7bcccf",
"sum": "L+PGlPK9mykGCJ9TIoEWdhMBjz+9lKuQ4YZ8fOeP9sk=" "sum": "bLqTqEr0jky9zz5MV/7ucn6H5mph2NlXas0TVnGNB1Y="
}, },
{ {
"source": { "source": {
@ -28,8 +28,8 @@
"subdir": "grafonnet" "subdir": "grafonnet"
} }
}, },
"version": "8d382c732dbdc839ff07549a3f42d25828f1b268", "version": "356bd73e4792ffe107725776ca8946895969c191",
"sum": "DRSRw4luAXlBXblo19/T1Jrv+9hyV8ivlS0KEtNANec=" "sum": "CSMZ3dJrpJpwvffie8BqcfrIVVwiKNqdPEN+1XWRBGU="
}, },
{ {
"source": { "source": {
@ -38,8 +38,8 @@
"subdir": "grafana-builder" "subdir": "grafana-builder"
} }
}, },
"version": "b5e45051995755ea373ea67642f8e5f54fcb8dd7", "version": "216bc806bb512f218e3cf5ed3d4f5699b07f04d6",
"sum": "mD0zEP9FVFXeag7EaeS5OvUr2A9D6DQhGemoNn6+PLc=" "sum": "9/eJqljTTtJeq9QRjabdKWL6yD8a7VzLmGKBK3ir77k="
}, },
{ {
"source": { "source": {
@ -59,8 +59,8 @@
"subdir": "" "subdir": ""
} }
}, },
"version": "aa2adbcf39884fd9c85d7c3e0ff338b1d61ea1ba", "version": "ead45674dba3c8712e422d99223453177aac6bf4",
"sum": "ttkPUnv/5bqlOFcZ8fvp2wi/S7ZLKiqAZ4ZdTolX77M=" "sum": "3i0NkntlBluDS1NRF+iSc2e727Alkv3ziuVjAP12/kE="
}, },
{ {
"source": { "source": {
@ -69,7 +69,7 @@
"subdir": "lib/promgrafonnet" "subdir": "lib/promgrafonnet"
} }
}, },
"version": "aa2adbcf39884fd9c85d7c3e0ff338b1d61ea1ba", "version": "ead45674dba3c8712e422d99223453177aac6bf4",
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps=" "sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
}, },
{ {
@ -79,8 +79,8 @@
"subdir": "jsonnet/kube-state-metrics" "subdir": "jsonnet/kube-state-metrics"
} }
}, },
"version": "35ef70bb74520a78cc8dc7cf364e1ff4e0c45063", "version": "89aaf6c524ee891140c4c8f2a05b1b16f5847309",
"sum": "ySP+bI2ZMLPt/sguSh9WrwI5H5dasaNFRE8Uo9PcZrI=" "sum": "zD/pbQLnQq+5hegEelaheHS8mn1h09GTktFO74iwlBI="
}, },
{ {
"source": { "source": {
@ -89,7 +89,7 @@
"subdir": "jsonnet/kube-state-metrics-mixin" "subdir": "jsonnet/kube-state-metrics-mixin"
} }
}, },
"version": "35ef70bb74520a78cc8dc7cf364e1ff4e0c45063", "version": "7bdd62593c9273b5179cf3c9d2d819e9d997aaa4",
"sum": "Yf8mNAHrV1YWzrdV8Ry5dJ8YblepTGw3C0Zp10XIYLo=" "sum": "Yf8mNAHrV1YWzrdV8Ry5dJ8YblepTGw3C0Zp10XIYLo="
}, },
{ {
@ -99,8 +99,8 @@
"subdir": "jsonnet/kube-prometheus" "subdir": "jsonnet/kube-prometheus"
} }
}, },
"version": "980e95de011319b88a3b9c0787a81dcdf338a898", "version": "7d7d40b4dee70ecd3328dcdee2ed0cc8f806df93",
"sum": "BxOXyWCSc9KkgWJXDau2Xtsy3aOYZDHz2VqOSLga7VU=" "sum": "6PhhQPWilq4skfe+z/hXKEg1pRqHnwvMR1Au6W136U0="
}, },
{ {
"source": { "source": {
@ -109,8 +109,8 @@
"subdir": "jsonnet/mixin" "subdir": "jsonnet/mixin"
} }
}, },
"version": "55baf034c431ed2c78d950b187f7d8b34dd06860", "version": "117c9a2cd905479022a66ddd92a41f599cccf10d",
"sum": "+Q45oBC7O8g7KQOaiKhGglwndAMWRlLTR94KUI8Q1Ko=" "sum": "6reUygVmQrLEWQzTKcH8ceDbvM+2ztK3z2VBR2K2l+U="
}, },
{ {
"source": { "source": {
@ -119,8 +119,19 @@
"subdir": "jsonnet/prometheus-operator" "subdir": "jsonnet/prometheus-operator"
} }
}, },
"version": "cd331ce9bb58bb926e391c6ae807621cb12cc29e", "version": "d8b7d3766225908d0239fd0d78258892cd0fc384",
"sum": "nM1eDP5vftqAeQSmVYzSBAh+lG0SN6zu46QiocQiVhk=" "sum": "Nl+N/h76bzD9tZ8tx7tuNIKHwCIJ9zyOsAWplH8HvAE="
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus/alertmanager",
"subdir": "doc/alertmanager-mixin"
}
},
"version": "193ebba04d1e70d971047e983a0b489112610460",
"sum": "QcftU7gjCQyj7B6M4YJeCAeaPd0kwxd4J4rolo7AnLE=",
"name": "alertmanager"
}, },
{ {
"source": { "source": {
@ -129,8 +140,8 @@
"subdir": "docs/node-mixin" "subdir": "docs/node-mixin"
} }
}, },
"version": "f81747e608ea85ae44e76454eb63f9cb6484fb9e", "version": "8b466360a35581e0301bd22918be7011cf4203c3",
"sum": "VyMzZPxQIjiKQYGjZjXeKNWfLJ9vOl3emp84PWfsrUc=" "sum": "rvyiD/yCB4BeYAWqYF53bP8c+aCUt2ipLHW2Ea8ELO8="
}, },
{ {
"source": { "source": {
@ -139,10 +150,20 @@
"subdir": "documentation/prometheus-mixin" "subdir": "documentation/prometheus-mixin"
} }
}, },
"version": "983ebb4a513302315a8117932ab832815f85e3d2", "version": "26d89b4b0776fe4cd5a3656dfa520f119a375273",
"sum": "TBq4SL7YsPInARbJqwz25JaBvvAegcnRCsuz3K9niWc=", "sum": "1VRVMuxAEZ9vdGHFlndmG9iQzDD6AoIXrX80CDpGDaU=",
"name": "prometheus" "name": "prometheus"
}, },
{
"source": {
"git": {
"remote": "https://github.com/thanos-io/thanos",
"subdir": "mixin"
}
},
"version": "37e6ef61566c7c70793ba6d128f00c4c66cb2402",
"sum": "OptiWUMOHFrRGTZhSfxV1RCeXZ90qsefGNTD4lDYVG0="
},
{ {
"source": { "source": {
"git": { "git": {

View File

@ -1,5 +1,4 @@
apiVersion: v1 apiVersion: v1
data: {}
kind: Secret kind: Secret
metadata: metadata:
name: alertmanager-main name: alertmanager-main

View File

@ -0,0 +1,17 @@
apiVersion: bitnami.com/v1alpha1
kind: SealedSecret
metadata:
creationTimestamp: null
name: alertmanager-tbrnt-config
namespace: monitoring
spec:
encryptedData:
alertmanager.yaml: AgDHh1Qgrdffl6IFXJuk3ZzNHGARWZLDzbMLKp5Wo/ZYqclCji4T5wx7Fql6FALCvvUBvThxLfiwM2LQHRcWfWnf5AwxipCxpos9oVvlu4aON0WZd/Kjz/ZwDq5pgR/etCrSA2DYNxYq4vnTxUAk29eY5F4dWwRPcCgVZ5/KsTIcNx4x+4einqQbwAhkUtAwQl5fEPmpwNRquJZM29XIEUxZLWne0YmCmEgNGleUU20ByfYMwgtDJGjgr6XNPtTmByAHVrxNuQwAMxgT6GcfDLCNsByaS6CY3wmSTg1GUv/CG8Xx29FwDWyf1Ly2KbmcAAafN5QJGvCCTEt/WB85GtzQisrWFZTykv3Zjuz101p9ShXQZALylaX5h22hHFXuQyiIQZEeM2ixiYQjcPhiPjx1/hkbQ25QRD73/gjalZO8bprDrJxkLlw+hrgJ0LzxWL881U6INLKow+8/GmLleFhMUXRsGqacLreCIAr4uVGEMGMVLhHJKnj597HRnn0UCxVNkDk8QjHyiVgJBrQ3Pz9SFdF7mxvJ9F4rEgGkE4dvfvWxrZFumTLEkVRF9To+rKxsIVkewvoHtN/gMzFMzumP+fz/oB9yAHsxkwVyfqXBg52hNSYIx5Z/67yy3hDRKPBcZgknf9S+F37ET5BABFxazwG8NJjf4td+UsAGuAMzKI/94u7TxuXLPCs/tIGKD7kJnPxAqpalepzABtVCmOrtWwNPb1h4XeuraUS9beJ2zV9oV5nVFJmX94EJ7qpZt0Um7+GGeavQ5SV3XHRolDS5PpZPTAWnc/1rtZ0nsKk8lllEr3aDWveMXma06NKkIXz8+iAonvHsDZuw0W6jUdUUtraIbSua9YkyugqCBGeeXIPLwFxqJTqIX5vedZVMveFiaxtCJjL48SUGxtyugfiYbPa3xpHWWe22BcJyTmAOG9aIq4Tp4nvftLyvWe7c9PotJk/7gdv1IO4RLx//eLtKWw0uADa0ara4hDuI8Yktlti24TlA9XYz00d5WtE+lJsSZN8547BUfFzXSOZSSbfrFLZmEmBTgkbj4szX19bXSctJN3BtOmRfCEPXYQN10HgnhpwqYHbXKUSTZNWLojnFL1/E56wUXGxRg9NGOwSXzTyfoLGxI9NEQpGc0Rj2Wna+JSUhlAUnfYW1eH8yyg5FfkyhQdyZJFvYfF0rk+XG5XNhLumST19uxrAkMWhk+Z9/eWwOaZQMmDcoi2Rs0za+1GGjPW5k56Ip+spwW5cvYmdl1PgkZ4g1mupjiB0FdgZHGR+kGn1lbPtSUd+amh9PXSDWkqfnix62H7374rQ3ZyG7fs9sQNnnRrd/cDCMxAl5Upk8D9dfxRmvuxRd8b89h7EQwUBML7TIriA2Pci5Ftux2R5wyIXjznLC5/kFZg6/Av3uKmKK6dLR2Ooey7/3g14CEjMumdijjySl8Pd2UUxSKVKD7vkq+3xYm0CJZqVvT/iBOccrv0UEiTHBsXrfaugUvqIKTAGYhJy0fUBXKisPdA0HdzrUmx57Du36TGyuEzGtVuDarcWzQYPqKJxOIuofJ+AGTDY53OjdUJ8pwJD6HDz55tu85gaV6ZOvSYqjqeX2FUe7lPhsGUIh/FemfichpypHyFpPYhkwAIO1AinKvsqjUuDXE6n5b7NMbI1gl87fPqT5wUSKXZqwViyFqUA5DFqPTEqvHIGU5Wz0GajEaQ==
template:
metadata:
creationTimestamp: null
name: alertmanager-tbrnt-config
namespace: monitoring
type: Opaque
status: {}

File diff suppressed because it is too large Load Diff

View File

@ -21,7 +21,7 @@ spec:
- env: - env:
- name: GF_INSTALL_PLUGINS - name: GF_INSTALL_PLUGINS
value: grafana-piechart-panel value: grafana-piechart-panel
image: grafana/grafana:7.1.0 image: grafana/grafana:7.3.5
name: grafana name: grafana
ports: ports:
- containerPort: 3000 - containerPort: 3000

View File

@ -12,3 +12,4 @@ spec:
targetPort: http targetPort: http
selector: selector:
app: grafana app: grafana
type: NodePort

View File

@ -3,7 +3,7 @@ kind: ClusterRole
metadata: metadata:
labels: labels:
app.kubernetes.io/name: kube-state-metrics app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.7 app.kubernetes.io/version: v1.9.7
name: kube-state-metrics name: kube-state-metrics
rules: rules:
- apiGroups: - apiGroups:
@ -30,6 +30,7 @@ rules:
- daemonsets - daemonsets
- deployments - deployments
- replicasets - replicasets
- ingresses
verbs: verbs:
- list - list
- watch - watch
@ -104,14 +105,6 @@ rules:
- networking.k8s.io - networking.k8s.io
resources: resources:
- networkpolicies - networkpolicies
- ingresses
verbs:
- list
- watch
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs: verbs:
- list - list
- watch - watch

View File

@ -3,7 +3,7 @@ kind: ClusterRoleBinding
metadata: metadata:
labels: labels:
app.kubernetes.io/name: kube-state-metrics app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.7 app.kubernetes.io/version: v1.9.7
name: kube-state-metrics name: kube-state-metrics
roleRef: roleRef:
apiGroup: rbac.authorization.k8s.io apiGroup: rbac.authorization.k8s.io

View File

@ -3,7 +3,7 @@ kind: Deployment
metadata: metadata:
labels: labels:
app.kubernetes.io/name: kube-state-metrics app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.7 app.kubernetes.io/version: v1.9.7
name: kube-state-metrics name: kube-state-metrics
namespace: monitoring namespace: monitoring
spec: spec:
@ -15,7 +15,7 @@ spec:
metadata: metadata:
labels: labels:
app.kubernetes.io/name: kube-state-metrics app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.7 app.kubernetes.io/version: v1.9.7
spec: spec:
containers: containers:
- args: - args:
@ -25,32 +25,34 @@ spec:
- --telemetry-port=8082 - --telemetry-port=8082
image: quay.io/coreos/kube-state-metrics:v1.9.7 image: quay.io/coreos/kube-state-metrics:v1.9.7
name: kube-state-metrics name: kube-state-metrics
securityContext:
runAsUser: 65534
- args: - args:
- --logtostderr - --logtostderr
- --secure-listen-address=:8443 - --secure-listen-address=:8443
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
- --upstream=http://127.0.0.1:8081/ - --upstream=http://127.0.0.1:8081/
image: quay.io/brancz/kube-rbac-proxy:v0.6.0 image: quay.io/brancz/kube-rbac-proxy:v0.8.0
name: kube-rbac-proxy-main name: kube-rbac-proxy-main
ports: ports:
- containerPort: 8443 - containerPort: 8443
name: https-main name: https-main
securityContext: securityContext:
runAsUser: 65534 runAsGroup: 65532
runAsNonRoot: true
runAsUser: 65532
- args: - args:
- --logtostderr - --logtostderr
- --secure-listen-address=:9443 - --secure-listen-address=:9443
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
- --upstream=http://127.0.0.1:8082/ - --upstream=http://127.0.0.1:8082/
image: quay.io/brancz/kube-rbac-proxy:v0.6.0 image: quay.io/brancz/kube-rbac-proxy:v0.8.0
name: kube-rbac-proxy-self name: kube-rbac-proxy-self
ports: ports:
- containerPort: 9443 - containerPort: 9443
name: https-self name: https-self
securityContext: securityContext:
runAsUser: 65534 runAsGroup: 65532
runAsNonRoot: true
runAsUser: 65532
nodeSelector: nodeSelector:
kubernetes.io/os: linux kubernetes.io/os: linux
serviceAccountName: kube-state-metrics serviceAccountName: kube-state-metrics

View File

@ -3,7 +3,7 @@ kind: Service
metadata: metadata:
labels: labels:
app.kubernetes.io/name: kube-state-metrics app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.7 app.kubernetes.io/version: v1.9.7
name: kube-state-metrics name: kube-state-metrics
namespace: monitoring namespace: monitoring
spec: spec:

View File

@ -3,6 +3,6 @@ kind: ServiceAccount
metadata: metadata:
labels: labels:
app.kubernetes.io/name: kube-state-metrics app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.7 app.kubernetes.io/version: v1.9.7
name: kube-state-metrics name: kube-state-metrics
namespace: monitoring namespace: monitoring

View File

@ -57,7 +57,7 @@ spec:
valueFrom: valueFrom:
fieldRef: fieldRef:
fieldPath: status.podIP fieldPath: status.podIP
image: quay.io/brancz/kube-rbac-proxy:v0.6.0 image: quay.io/brancz/kube-rbac-proxy:v0.8.0
name: kube-rbac-proxy name: kube-rbac-proxy
ports: ports:
- containerPort: 9100 - containerPort: 9100
@ -70,6 +70,10 @@ spec:
requests: requests:
cpu: 10m cpu: 10m
memory: 20Mi memory: 20Mi
securityContext:
runAsGroup: 65532
runAsNonRoot: true
runAsUser: 65532
hostNetwork: true hostNetwork: true
hostPID: true hostPID: true
nodeSelector: nodeSelector:
@ -93,3 +97,4 @@ spec:
updateStrategy: updateStrategy:
rollingUpdate: rollingUpdate:
maxUnavailable: 10% maxUnavailable: 10%
type: RollingUpdate

View File

@ -25,7 +25,7 @@ spec:
- --metrics-relist-interval=1m - --metrics-relist-interval=1m
- --prometheus-url=http://prometheus-k8s.monitoring.svc.cluster.local:9090/ - --prometheus-url=http://prometheus-k8s.monitoring.svc.cluster.local:9090/
- --secure-port=6443 - --secure-port=6443
image: directxman12/k8s-prometheus-adapter:v0.7.0 image: directxman12/k8s-prometheus-adapter:v0.8.2
name: prometheus-adapter name: prometheus-adapter
ports: ports:
- containerPort: 6443 - containerPort: 6443

View File

@ -4,7 +4,7 @@ metadata:
labels: labels:
app.kubernetes.io/component: controller app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.42.1 app.kubernetes.io/version: v0.44.1
name: prometheus-operator name: prometheus-operator
namespace: monitoring namespace: monitoring
spec: spec:
@ -19,4 +19,4 @@ spec:
matchLabels: matchLabels:
app.kubernetes.io/component: controller app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.42.1 app.kubernetes.io/version: v0.44.1

View File

@ -12,7 +12,7 @@ spec:
namespace: monitoring namespace: monitoring
port: web port: web
externalUrl: http://prometheus-k8s.monitoring:9090 externalUrl: http://prometheus-k8s.monitoring:9090
image: quay.io/prometheus/prometheus:v2.20.0 image: quay.io/prometheus/prometheus:v2.22.1
nodeSelector: nodeSelector:
kubernetes.io/os: linux kubernetes.io/os: linux
podMonitorNamespaceSelector: podMonitorNamespaceSelector:
@ -58,4 +58,4 @@ spec:
requests: requests:
storage: 10Gi storage: 10Gi
storageClassName: local-path storageClassName: local-path
version: v2.20.0 version: v2.22.1

View File

@ -40,10 +40,10 @@ spec:
rate(node_vmstat_pgmajfault{job="node-exporter"}[1m]) rate(node_vmstat_pgmajfault{job="node-exporter"}[1m])
record: instance:node_vmstat_pgmajfault:rate1m record: instance:node_vmstat_pgmajfault:rate1m
- expr: | - expr: |
rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m]) rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
record: instance_device:node_disk_io_time_seconds:rate1m record: instance_device:node_disk_io_time_seconds:rate1m
- expr: | - expr: |
rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m]) rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
record: instance_device:node_disk_io_time_weighted_seconds:rate1m record: instance_device:node_disk_io_time_weighted_seconds:rate1m
- expr: | - expr: |
sum without (device) ( sum without (device) (
@ -390,11 +390,6 @@ spec:
quantile: "0.99" quantile: "0.99"
verb: write verb: write
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- expr: |
sum(rate(apiserver_request_duration_seconds_sum{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)
/
sum(rate(apiserver_request_duration_seconds_count{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)
record: cluster:apiserver_request_duration_seconds:mean5m
- expr: | - expr: |
histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)) histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
labels: labels:
@ -571,9 +566,6 @@ spec:
record: code:apiserver_request_total:increase30d record: code:apiserver_request_total:increase30d
- name: k8s.rules - name: k8s.rules
rules: rules:
- expr: |
sum(rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])) by (namespace)
record: namespace:container_cpu_usage_seconds_total:sum_rate
- expr: | - expr: |
sum by (cluster, namespace, pod, container) ( sum by (cluster, namespace, pod, container) (
rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m]) rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])
@ -605,9 +597,6 @@ spec:
max by(namespace, pod, node) (kube_pod_info{node!=""}) max by(namespace, pod, node) (kube_pod_info{node!=""})
) )
record: node_namespace_pod_container:container_memory_swap record: node_namespace_pod_container:container_memory_swap
- expr: |
sum(container_memory_usage_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}) by (namespace)
record: namespace:container_memory_usage_bytes:sum
- expr: | - expr: |
sum by (namespace) ( sum by (namespace) (
sum by (namespace, pod) ( sum by (namespace, pod) (
@ -716,9 +705,6 @@ spec:
record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile
- name: node.rules - name: node.rules
rules: rules:
- expr: |
sum(min(kube_pod_info{node!=""}) by (cluster, node))
record: ':kube_pod_info_node_count:'
- expr: | - expr: |
topk by(namespace, pod) (1, topk by(namespace, pod) (1,
max by (node, namespace, pod) ( max by (node, namespace, pod) (
@ -762,18 +748,18 @@ spec:
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
- name: kube-prometheus-node-recording.rules - name: kube-prometheus-node-recording.rules
rules: rules:
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m]))
(instance) BY (instance)
record: instance:node_cpu:rate:sum record: instance:node_cpu:rate:sum
- expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance) - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
record: instance:node_network_receive_bytes:rate:sum record: instance:node_network_receive_bytes:rate:sum
- expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance) - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
record: instance:node_network_transmit_bytes:rate:sum record: instance:node_network_transmit_bytes:rate:sum
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
(cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total)
BY (instance, cpu)) BY (instance) BY (instance, cpu)) BY (instance)
record: instance:node_cpu:ratio record: instance:node_cpu:ratio
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
record: cluster:node_cpu:sum_rate5m record: cluster:node_cpu:sum_rate5m
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) - expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total)
BY (instance, cpu)) BY (instance, cpu))
@ -791,7 +777,7 @@ spec:
description: kube-state-metrics is experiencing errors at an elevated rate description: kube-state-metrics is experiencing errors at an elevated rate
in list operations. This is likely causing it to not be able to expose metrics in list operations. This is likely causing it to not be able to expose metrics
about Kubernetes objects correctly or at all. about Kubernetes objects correctly or at all.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricslisterrors runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatemetricslisterrors
summary: kube-state-metrics is experiencing errors in list operations. summary: kube-state-metrics is experiencing errors in list operations.
expr: | expr: |
(sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m])) (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m]))
@ -806,7 +792,7 @@ spec:
description: kube-state-metrics is experiencing errors at an elevated rate description: kube-state-metrics is experiencing errors at an elevated rate
in watch operations. This is likely causing it to not be able to expose in watch operations. This is likely causing it to not be able to expose
metrics about Kubernetes objects correctly or at all. metrics about Kubernetes objects correctly or at all.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricswatcherrors runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatemetricswatcherrors
summary: kube-state-metrics is experiencing errors in watch operations. summary: kube-state-metrics is experiencing errors in watch operations.
expr: | expr: |
(sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m])) (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m]))
@ -823,7 +809,7 @@ spec:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available space left and is filling has only {{ printf "%.2f" $value }}% available space left and is filling
up. up.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemspacefillingup
summary: Filesystem is predicted to run out of space within the next 24 hours. summary: Filesystem is predicted to run out of space within the next 24 hours.
expr: | expr: |
( (
@ -841,7 +827,7 @@ spec:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available space left and is filling has only {{ printf "%.2f" $value }}% available space left and is filling
up fast. up fast.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemspacefillingup
summary: Filesystem is predicted to run out of space within the next 4 hours. summary: Filesystem is predicted to run out of space within the next 4 hours.
expr: | expr: |
( (
@ -858,7 +844,7 @@ spec:
annotations: annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available space left. has only {{ printf "%.2f" $value }}% available space left.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutofspace
summary: Filesystem has less than 5% space left. summary: Filesystem has less than 5% space left.
expr: | expr: |
( (
@ -873,7 +859,7 @@ spec:
annotations: annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available space left. has only {{ printf "%.2f" $value }}% available space left.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutofspace
summary: Filesystem has less than 3% space left. summary: Filesystem has less than 3% space left.
expr: | expr: |
( (
@ -889,7 +875,7 @@ spec:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available inodes left and is filling has only {{ printf "%.2f" $value }}% available inodes left and is filling
up. up.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemfilesfillingup
summary: Filesystem is predicted to run out of inodes within the next 24 hours. summary: Filesystem is predicted to run out of inodes within the next 24 hours.
expr: | expr: |
( (
@ -907,7 +893,7 @@ spec:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available inodes left and is filling has only {{ printf "%.2f" $value }}% available inodes left and is filling
up fast. up fast.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemfilesfillingup
summary: Filesystem is predicted to run out of inodes within the next 4 hours. summary: Filesystem is predicted to run out of inodes within the next 4 hours.
expr: | expr: |
( (
@ -924,7 +910,7 @@ spec:
annotations: annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available inodes left. has only {{ printf "%.2f" $value }}% available inodes left.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutoffiles
summary: Filesystem has less than 5% inodes left. summary: Filesystem has less than 5% inodes left.
expr: | expr: |
( (
@ -939,7 +925,7 @@ spec:
annotations: annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available inodes left. has only {{ printf "%.2f" $value }}% available inodes left.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutoffiles
summary: Filesystem has less than 3% inodes left. summary: Filesystem has less than 3% inodes left.
expr: | expr: |
( (
@ -954,10 +940,10 @@ spec:
annotations: annotations:
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
{{ printf "%.0f" $value }} receive errors in the last two minutes.' {{ printf "%.0f" $value }} receive errors in the last two minutes.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworkreceiveerrs runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodenetworkreceiveerrs
summary: Network interface is reporting many receive errors. summary: Network interface is reporting many receive errors.
expr: | expr: |
increase(node_network_receive_errs_total[2m]) > 10 rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01
for: 1h for: 1h
labels: labels:
severity: warning severity: warning
@ -965,17 +951,17 @@ spec:
annotations: annotations:
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
{{ printf "%.0f" $value }} transmit errors in the last two minutes.' {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworktransmiterrs runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodenetworktransmiterrs
summary: Network interface is reporting many transmit errors. summary: Network interface is reporting many transmit errors.
expr: | expr: |
increase(node_network_transmit_errs_total[2m]) > 10 rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01
for: 1h for: 1h
labels: labels:
severity: warning severity: warning
- alert: NodeHighNumberConntrackEntriesUsed - alert: NodeHighNumberConntrackEntriesUsed
annotations: annotations:
description: '{{ $value | humanizePercentage }} of conntrack entries are used.' description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodehighnumberconntrackentriesused runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodehighnumberconntrackentriesused
summary: Number of conntrack are getting close to the limit. summary: Number of conntrack are getting close to the limit.
expr: | expr: |
(node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75 (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
@ -984,7 +970,7 @@ spec:
- alert: NodeTextFileCollectorScrapeError - alert: NodeTextFileCollectorScrapeError
annotations: annotations:
description: Node Exporter text file collector failed to scrape. description: Node Exporter text file collector failed to scrape.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodetextfilecollectorscrapeerror runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodetextfilecollectorscrapeerror
summary: Node Exporter text file collector failed to scrape. summary: Node Exporter text file collector failed to scrape.
expr: | expr: |
node_textfile_scrape_error{job="node-exporter"} == 1 node_textfile_scrape_error{job="node-exporter"} == 1
@ -994,7 +980,7 @@ spec:
annotations: annotations:
message: Clock on {{ $labels.instance }} is out of sync by more than 300s. message: Clock on {{ $labels.instance }} is out of sync by more than 300s.
Ensure NTP is configured correctly on this host. Ensure NTP is configured correctly on this host.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclockskewdetected runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodeclockskewdetected
summary: Clock skew detected. summary: Clock skew detected.
expr: | expr: |
( (
@ -1015,7 +1001,7 @@ spec:
annotations: annotations:
message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP
is configured on this host. is configured on this host.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclocknotsynchronising runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodeclocknotsynchronising
summary: Clock not synchronising. summary: Clock not synchronising.
expr: | expr: |
min_over_time(node_timex_sync_status[5m]) == 0 min_over_time(node_timex_sync_status[5m]) == 0
@ -1029,7 +1015,7 @@ spec:
description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is
in degraded state due to one or more disks failures. Number of spare drives in degraded state due to one or more disks failures. Number of spare drives
is insufficient to fix issue automatically. is insufficient to fix issue automatically.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-noderaiddegraded runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/noderaiddegraded
summary: RAID Array is degraded summary: RAID Array is degraded
expr: | expr: |
node_md_disks_required - ignoring (state) (node_md_disks{state="active"}) > 0 node_md_disks_required - ignoring (state) (node_md_disks{state="active"}) > 0
@ -1040,19 +1026,142 @@ spec:
annotations: annotations:
description: At least one device in RAID array on {{ $labels.instance }} failed. description: At least one device in RAID array on {{ $labels.instance }} failed.
Array '{{ $labels.device }}' needs attention and possibly a disk swap. Array '{{ $labels.device }}' needs attention and possibly a disk swap.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-noderaiddiskfailure runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/noderaiddiskfailure
summary: Failed device in RAID array summary: Failed device in RAID array
expr: | expr: |
node_md_disks{state="fail"} > 0 node_md_disks{state="fail"} > 0
labels: labels:
severity: warning severity: warning
- name: alertmanager.rules
rules:
- alert: AlertmanagerFailedReload
annotations:
description: Configuration has failed to load for {{ $labels.namespace }}/{{
$labels.pod}}.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerfailedreload
summary: Reloading an Alertmanager configuration has failed.
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
max_over_time(alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"}[5m]) == 0
for: 10m
labels:
severity: critical
- alert: AlertmanagerMembersInconsistent
annotations:
description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has only
found {{ $value }} members of the {{$labels.job}} cluster.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagermembersinconsistent
summary: A member of an Alertmanager cluster has not found all other cluster
members.
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}[5m])
< on (namespace,service) group_left
count by (namespace,service) (max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}[5m]))
for: 10m
labels:
severity: critical
- alert: AlertmanagerFailedToSendAlerts
annotations:
description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed
to send {{ $value | humanizePercentage }} of notifications to {{ $labels.integration
}}.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerfailedtosendalerts
summary: An Alertmanager instance failed to send notifications.
expr: |
(
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring"}[5m])
/
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring"}[5m])
)
> 0.01
for: 5m
labels:
severity: warning
- alert: AlertmanagerClusterFailedToSendAlerts
annotations:
description: The minimum notification failure rate to {{ $labels.integration
}} sent from any instance in the {{$labels.job}} cluster is {{ $value |
humanizePercentage }}.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclusterfailedtosendalerts
summary: All Alertmanager instances in a cluster failed to send notifications.
expr: |
min by (namespace,service) (
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring"}[5m])
/
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring"}[5m])
)
> 0.01
for: 5m
labels:
severity: critical
- alert: AlertmanagerConfigInconsistent
annotations:
description: Alertmanager instances within the {{$labels.job}} cluster have
different configurations.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerconfiginconsistent
summary: Alertmanager instances within the same cluster have different configurations.
expr: |
count by (namespace,service) (
count_values by (namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"})
)
!= 1
for: 20m
labels:
severity: critical
- alert: AlertmanagerClusterDown
annotations:
description: '{{ $value | humanizePercentage }} of Alertmanager instances
within the {{$labels.job}} cluster have been up for less than half of the
last 5m.'
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclusterdown
summary: Half or more of the Alertmanager instances within the same cluster
are down.
expr: |
(
count by (namespace,service) (
avg_over_time(up{job="alertmanager-main",namespace="monitoring"}[5m]) < 0.5
)
/
count by (namespace,service) (
up{job="alertmanager-main",namespace="monitoring"}
)
)
>= 0.5
for: 5m
labels:
severity: critical
- alert: AlertmanagerClusterCrashlooping
annotations:
description: '{{ $value | humanizePercentage }} of Alertmanager instances
within the {{$labels.job}} cluster have restarted at least 5 times in the
last 10m.'
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclustercrashlooping
summary: Half or more of the Alertmanager instances within the same cluster
are crashlooping.
expr: |
(
count by (namespace,service) (
changes(process_start_time_seconds{job="alertmanager-main",namespace="monitoring"}[10m]) > 4
)
/
count by (namespace,service) (
up{job="alertmanager-main",namespace="monitoring"}
)
)
>= 0.5
for: 5m
labels:
severity: critical
- name: prometheus-operator - name: prometheus-operator
rules: rules:
- alert: PrometheusOperatorListErrors - alert: PrometheusOperatorListErrors
annotations: annotations:
description: Errors while performing List operations in controller {{$labels.controller}} description: Errors while performing List operations in controller {{$labels.controller}}
in {{$labels.namespace}} namespace. in {{$labels.namespace}} namespace.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorlisterrors runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorlisterrors
summary: Errors while performing list operations in controller. summary: Errors while performing list operations in controller.
expr: | expr: |
(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4 (sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4
@ -1063,7 +1172,7 @@ spec:
annotations: annotations:
description: Errors while performing watch operations in controller {{$labels.controller}} description: Errors while performing watch operations in controller {{$labels.controller}}
in {{$labels.namespace}} namespace. in {{$labels.namespace}} namespace.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorwatcherrors runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorwatcherrors
summary: Errors while performing watch operations in controller. summary: Errors while performing watch operations in controller.
expr: | expr: |
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4 (sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4
@ -1074,7 +1183,7 @@ spec:
annotations: annotations:
description: Controller {{ $labels.controller }} in {{ $labels.namespace }} description: Controller {{ $labels.controller }} in {{ $labels.namespace }}
namespace fails to reconcile {{ $value }} objects. namespace fails to reconcile {{ $value }} objects.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorsyncfailed runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorsyncfailed
summary: Last controller reconciliation failed summary: Last controller reconciliation failed
expr: | expr: |
min_over_time(prometheus_operator_syncs{status="failed",job="prometheus-operator",namespace="monitoring"}[5m]) > 0 min_over_time(prometheus_operator_syncs{status="failed",job="prometheus-operator",namespace="monitoring"}[5m]) > 0
@ -1086,7 +1195,7 @@ spec:
description: '{{ $value | humanizePercentage }} of reconciling operations description: '{{ $value | humanizePercentage }} of reconciling operations
failed for {{ $labels.controller }} controller in {{ $labels.namespace }} failed for {{ $labels.controller }} controller in {{ $labels.namespace }}
namespace.' namespace.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorreconcileerrors runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorreconcileerrors
summary: Errors while reconciling controller. summary: Errors while reconciling controller.
expr: | expr: |
(sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator",namespace="monitoring"}[5m]))) > 0.1 (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator",namespace="monitoring"}[5m]))) > 0.1
@ -1097,7 +1206,7 @@ spec:
annotations: annotations:
description: Errors while reconciling Prometheus in {{ $labels.namespace }} description: Errors while reconciling Prometheus in {{ $labels.namespace }}
Namespace. Namespace.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatornodelookuperrors runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatornodelookuperrors
summary: Errors while reconciling Prometheus. summary: Errors while reconciling Prometheus.
expr: | expr: |
rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1 rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1
@ -1108,20 +1217,32 @@ spec:
annotations: annotations:
description: Prometheus operator in {{ $labels.namespace }} namespace isn't description: Prometheus operator in {{ $labels.namespace }} namespace isn't
ready to reconcile {{ $labels.controller }} resources. ready to reconcile {{ $labels.controller }} resources.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatornotready runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatornotready
summary: Prometheus operator not ready summary: Prometheus operator not ready
expr: | expr: |
min by(namespace, controller) (max_over_time(prometheus_operator_ready{job="prometheus-operator",namespace="monitoring"}[5m]) == 0) min by(namespace, controller) (max_over_time(prometheus_operator_ready{job="prometheus-operator",namespace="monitoring"}[5m]) == 0)
for: 5m for: 5m
labels: labels:
severity: warning severity: warning
- alert: PrometheusOperatorRejectedResources
annotations:
description: Prometheus operator in {{ $labels.namespace }} namespace rejected
{{ printf "%0.0f" $value }} {{ $labels.controller }}/{{ $labels.resource
}} resources.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorrejectedresources
summary: Resources rejected by Prometheus operator
expr: |
min_over_time(prometheus_operator_managed_resources{state="rejected",job="prometheus-operator",namespace="monitoring"}[5m]) > 0
for: 5m
labels:
severity: warning
- name: kubernetes-apps - name: kubernetes-apps
rules: rules:
- alert: KubePodCrashLooping - alert: KubePodCrashLooping
annotations: annotations:
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container description: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
}}) is restarting {{ printf "%.2f" $value }} times / 5 minutes. }}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubepodcrashlooping
summary: Pod is crash looping. summary: Pod is crash looping.
expr: | expr: |
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[5m]) * 60 * 5 > 0 rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[5m]) * 60 * 5 > 0
@ -1132,7 +1253,7 @@ spec:
annotations: annotations:
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
state for longer than 15 minutes. state for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubepodnotready
summary: Pod has been in a non-ready state for more than 15 minutes. summary: Pod has been in a non-ready state for more than 15 minutes.
expr: | expr: |
sum by (namespace, pod) ( sum by (namespace, pod) (
@ -1150,7 +1271,7 @@ spec:
description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
}} does not match, this indicates that the Deployment has failed but has }} does not match, this indicates that the Deployment has failed but has
not been rolled back. not been rolled back.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubedeploymentgenerationmismatch
summary: Deployment generation mismatch due to possible roll-back summary: Deployment generation mismatch due to possible roll-back
expr: | expr: |
kube_deployment_status_observed_generation{job="kube-state-metrics"} kube_deployment_status_observed_generation{job="kube-state-metrics"}
@ -1163,7 +1284,7 @@ spec:
annotations: annotations:
description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has
not matched the expected number of replicas for longer than 15 minutes. not matched the expected number of replicas for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubedeploymentreplicasmismatch
summary: Deployment has not matched the expected number of replicas. summary: Deployment has not matched the expected number of replicas.
expr: | expr: |
( (
@ -1182,7 +1303,7 @@ spec:
annotations: annotations:
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }}
has not matched the expected number of replicas for longer than 15 minutes. has not matched the expected number of replicas for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatefulsetreplicasmismatch
summary: Deployment has not matched the expected number of replicas. summary: Deployment has not matched the expected number of replicas.
expr: | expr: |
( (
@ -1202,7 +1323,7 @@ spec:
description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
}} does not match, this indicates that the StatefulSet has failed but has }} does not match, this indicates that the StatefulSet has failed but has
not been rolled back. not been rolled back.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatefulsetgenerationmismatch
summary: StatefulSet generation mismatch due to possible roll-back summary: StatefulSet generation mismatch due to possible roll-back
expr: | expr: |
kube_statefulset_status_observed_generation{job="kube-state-metrics"} kube_statefulset_status_observed_generation{job="kube-state-metrics"}
@ -1215,7 +1336,7 @@ spec:
annotations: annotations:
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }}
update has not been rolled out. update has not been rolled out.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatefulsetupdatenotrolledout
summary: StatefulSet update has not been rolled out. summary: StatefulSet update has not been rolled out.
expr: | expr: |
( (
@ -1242,7 +1363,7 @@ spec:
annotations: annotations:
description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has
not finished or progressed for at least 15 minutes. not finished or progressed for at least 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubedaemonsetrolloutstuck
summary: DaemonSet rollout is stuck. summary: DaemonSet rollout is stuck.
expr: | expr: |
( (
@ -1275,7 +1396,7 @@ spec:
annotations: annotations:
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} description: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}}
has been in waiting state for longer than 1 hour. has been in waiting state for longer than 1 hour.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubecontainerwaiting
summary: Pod container waiting longer than 1 hour summary: Pod container waiting longer than 1 hour
expr: | expr: |
sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0 sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
@ -1286,7 +1407,7 @@ spec:
annotations: annotations:
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
}} are not scheduled.' }} are not scheduled.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubedaemonsetnotscheduled
summary: DaemonSet pods are not scheduled. summary: DaemonSet pods are not scheduled.
expr: | expr: |
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"}
@ -1299,7 +1420,7 @@ spec:
annotations: annotations:
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
}} are running where they are not supposed to run.' }} are running where they are not supposed to run.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubedaemonsetmisscheduled
summary: DaemonSet pods are misscheduled. summary: DaemonSet pods are misscheduled.
expr: | expr: |
kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0 kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
@ -1310,7 +1431,7 @@ spec:
annotations: annotations:
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking
more than 12 hours to complete. more than 12 hours to complete.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubejobcompletion
summary: Job did not complete in time summary: Job did not complete in time
expr: | expr: |
kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0 kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0
@ -1321,7 +1442,7 @@ spec:
annotations: annotations:
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to
complete. Removing failed job after investigation should clear this alert. complete. Removing failed job after investigation should clear this alert.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubejobfailed
summary: Job failed to complete. summary: Job failed to complete.
expr: | expr: |
kube_job_failed{job="kube-state-metrics"} > 0 kube_job_failed{job="kube-state-metrics"} > 0
@ -1332,13 +1453,21 @@ spec:
annotations: annotations:
description: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched description: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched
the desired number of replicas for longer than 15 minutes. the desired number of replicas for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpareplicasmismatch runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubehpareplicasmismatch
summary: HPA has not matched descired number of replicas. summary: HPA has not matched descired number of replicas.
expr: | expr: |
(kube_hpa_status_desired_replicas{job="kube-state-metrics"} (kube_hpa_status_desired_replicas{job="kube-state-metrics"}
!= !=
kube_hpa_status_current_replicas{job="kube-state-metrics"}) kube_hpa_status_current_replicas{job="kube-state-metrics"})
and and
(kube_hpa_status_current_replicas{job="kube-state-metrics"}
>
kube_hpa_spec_min_replicas{job="kube-state-metrics"})
and
(kube_hpa_status_current_replicas{job="kube-state-metrics"}
<
kube_hpa_spec_max_replicas{job="kube-state-metrics"})
and
changes(kube_hpa_status_current_replicas[15m]) == 0 changes(kube_hpa_status_current_replicas[15m]) == 0
for: 15m for: 15m
labels: labels:
@ -1347,7 +1476,7 @@ spec:
annotations: annotations:
description: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running description: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running
at max replicas for longer than 15 minutes. at max replicas for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpamaxedout runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubehpamaxedout
summary: HPA is running at max replicas summary: HPA is running at max replicas
expr: | expr: |
kube_hpa_status_current_replicas{job="kube-state-metrics"} kube_hpa_status_current_replicas{job="kube-state-metrics"}
@ -1362,7 +1491,7 @@ spec:
annotations: annotations:
description: Cluster has overcommitted CPU resource requests for Pods and description: Cluster has overcommitted CPU resource requests for Pods and
cannot tolerate node failure. cannot tolerate node failure.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubecpuovercommit
summary: Cluster has overcommitted CPU resource requests. summary: Cluster has overcommitted CPU resource requests.
expr: | expr: |
sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum{}) sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum{})
@ -1377,7 +1506,7 @@ spec:
annotations: annotations:
description: Cluster has overcommitted memory resource requests for Pods and description: Cluster has overcommitted memory resource requests for Pods and
cannot tolerate node failure. cannot tolerate node failure.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubememoryovercommit
summary: Cluster has overcommitted memory resource requests. summary: Cluster has overcommitted memory resource requests.
expr: | expr: |
sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum{}) sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum{})
@ -1393,7 +1522,7 @@ spec:
- alert: KubeCPUQuotaOvercommit - alert: KubeCPUQuotaOvercommit
annotations: annotations:
description: Cluster has overcommitted CPU resource requests for Namespaces. description: Cluster has overcommitted CPU resource requests for Namespaces.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuquotaovercommit runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubecpuquotaovercommit
summary: Cluster has overcommitted CPU resource requests. summary: Cluster has overcommitted CPU resource requests.
expr: | expr: |
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"}) sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"})
@ -1406,12 +1535,12 @@ spec:
- alert: KubeMemoryQuotaOvercommit - alert: KubeMemoryQuotaOvercommit
annotations: annotations:
description: Cluster has overcommitted memory resource requests for Namespaces. description: Cluster has overcommitted memory resource requests for Namespaces.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryquotaovercommit runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubememoryquotaovercommit
summary: Cluster has overcommitted memory resource requests. summary: Cluster has overcommitted memory resource requests.
expr: | expr: |
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"}) sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"})
/ /
sum(kube_node_status_allocatable_memory_bytes{job="node-exporter"}) sum(kube_node_status_allocatable_memory_bytes{job="kube-state-metrics"})
> 1.5 > 1.5
for: 5m for: 5m
labels: labels:
@ -1420,7 +1549,7 @@ spec:
annotations: annotations:
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
}} of its {{ $labels.resource }} quota. }} of its {{ $labels.resource }} quota.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaalmostfull runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubequotaalmostfull
summary: Namespace quota is going to be full. summary: Namespace quota is going to be full.
expr: | expr: |
kube_resourcequota{job="kube-state-metrics", type="used"} kube_resourcequota{job="kube-state-metrics", type="used"}
@ -1434,7 +1563,7 @@ spec:
annotations: annotations:
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
}} of its {{ $labels.resource }} quota. }} of its {{ $labels.resource }} quota.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotafullyused runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubequotafullyused
summary: Namespace quota is fully used. summary: Namespace quota is fully used.
expr: | expr: |
kube_resourcequota{job="kube-state-metrics", type="used"} kube_resourcequota{job="kube-state-metrics", type="used"}
@ -1448,7 +1577,7 @@ spec:
annotations: annotations:
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
}} of its {{ $labels.resource }} quota. }} of its {{ $labels.resource }} quota.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubequotaexceeded
summary: Namespace quota has exceeded the limits. summary: Namespace quota has exceeded the limits.
expr: | expr: |
kube_resourcequota{job="kube-state-metrics", type="used"} kube_resourcequota{job="kube-state-metrics", type="used"}
@ -1463,7 +1592,7 @@ spec:
description: '{{ $value | humanizePercentage }} throttling of CPU in namespace description: '{{ $value | humanizePercentage }} throttling of CPU in namespace
{{ $labels.namespace }} for container {{ $labels.container }} in pod {{ {{ $labels.namespace }} for container {{ $labels.container }} in pod {{
$labels.pod }}.' $labels.pod }}.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/cputhrottlinghigh
summary: Processes experience elevated CPU throttling. summary: Processes experience elevated CPU throttling.
expr: | expr: |
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container, pod, namespace) sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container, pod, namespace)
@ -1480,7 +1609,7 @@ spec:
description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
}} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage
}} free. }} free.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubepersistentvolumefillingup
summary: PersistentVolume is filling up. summary: PersistentVolume is filling up.
expr: | expr: |
kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"} kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}
@ -1496,7 +1625,7 @@ spec:
$labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is
expected to fill up within four days. Currently {{ $value | humanizePercentage expected to fill up within four days. Currently {{ $value | humanizePercentage
}} is available. }} is available.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubepersistentvolumefillingup
summary: PersistentVolume is filling up. summary: PersistentVolume is filling up.
expr: | expr: |
( (
@ -1513,7 +1642,7 @@ spec:
annotations: annotations:
description: The persistent volume {{ $labels.persistentvolume }} has status description: The persistent volume {{ $labels.persistentvolume }} has status
{{ $labels.phase }}. {{ $labels.phase }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeerrors runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubepersistentvolumeerrors
summary: PersistentVolume is having issues with provisioning. summary: PersistentVolume is having issues with provisioning.
expr: | expr: |
kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0 kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
@ -1526,7 +1655,7 @@ spec:
annotations: annotations:
description: There are {{ $value }} different semantic versions of Kubernetes description: There are {{ $value }} different semantic versions of Kubernetes
components running. components running.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeversionmismatch
summary: Different semantic versions of Kubernetes components running. summary: Different semantic versions of Kubernetes components running.
expr: | expr: |
count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*).*"))) > 1 count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*).*"))) > 1
@ -1537,7 +1666,7 @@ spec:
annotations: annotations:
description: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance description: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
}}' is experiencing {{ $value | humanizePercentage }} errors.' }}' is experiencing {{ $value | humanizePercentage }} errors.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeclienterrors
summary: Kubernetes API server client is experiencing errors. summary: Kubernetes API server client is experiencing errors.
expr: | expr: |
(sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job) (sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
@ -1552,7 +1681,7 @@ spec:
- alert: KubeAPIErrorBudgetBurn - alert: KubeAPIErrorBudgetBurn
annotations: annotations:
description: The API server is burning too much error budget. description: The API server is burning too much error budget.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeapierrorbudgetburn
summary: The API server is burning too much error budget. summary: The API server is burning too much error budget.
expr: | expr: |
sum(apiserver_request:burnrate1h) > (14.40 * 0.01000) sum(apiserver_request:burnrate1h) > (14.40 * 0.01000)
@ -1566,7 +1695,7 @@ spec:
- alert: KubeAPIErrorBudgetBurn - alert: KubeAPIErrorBudgetBurn
annotations: annotations:
description: The API server is burning too much error budget. description: The API server is burning too much error budget.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeapierrorbudgetburn
summary: The API server is burning too much error budget. summary: The API server is burning too much error budget.
expr: | expr: |
sum(apiserver_request:burnrate6h) > (6.00 * 0.01000) sum(apiserver_request:burnrate6h) > (6.00 * 0.01000)
@ -1580,7 +1709,7 @@ spec:
- alert: KubeAPIErrorBudgetBurn - alert: KubeAPIErrorBudgetBurn
annotations: annotations:
description: The API server is burning too much error budget. description: The API server is burning too much error budget.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeapierrorbudgetburn
summary: The API server is burning too much error budget. summary: The API server is burning too much error budget.
expr: | expr: |
sum(apiserver_request:burnrate1d) > (3.00 * 0.01000) sum(apiserver_request:burnrate1d) > (3.00 * 0.01000)
@ -1594,7 +1723,7 @@ spec:
- alert: KubeAPIErrorBudgetBurn - alert: KubeAPIErrorBudgetBurn
annotations: annotations:
description: The API server is burning too much error budget. description: The API server is burning too much error budget.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeapierrorbudgetburn
summary: The API server is burning too much error budget. summary: The API server is burning too much error budget.
expr: | expr: |
sum(apiserver_request:burnrate3d) > (1.00 * 0.01000) sum(apiserver_request:burnrate3d) > (1.00 * 0.01000)
@ -1611,7 +1740,7 @@ spec:
annotations: annotations:
description: A client certificate used to authenticate to the apiserver is description: A client certificate used to authenticate to the apiserver is
expiring in less than 7.0 days. expiring in less than 7.0 days.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeclientcertificateexpiration
summary: Client certificate is about to expire. summary: Client certificate is about to expire.
expr: | expr: |
apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800 apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
@ -1621,7 +1750,7 @@ spec:
annotations: annotations:
description: A client certificate used to authenticate to the apiserver is description: A client certificate used to authenticate to the apiserver is
expiring in less than 24.0 hours. expiring in less than 24.0 hours.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeclientcertificateexpiration
summary: Client certificate is about to expire. summary: Client certificate is about to expire.
expr: | expr: |
apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400 apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
@ -1633,7 +1762,7 @@ spec:
has reported errors. The number of errors have increased for it in the past has reported errors. The number of errors have increased for it in the past
five minutes. High values indicate that the availability of the service five minutes. High values indicate that the availability of the service
changes too often. changes too often.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/aggregatedapierrors
summary: An aggregated API has reported errors. summary: An aggregated API has reported errors.
expr: | expr: |
sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2 sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2
@ -1643,7 +1772,7 @@ spec:
annotations: annotations:
description: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} description: An aggregated API {{ $labels.name }}/{{ $labels.namespace }}
has been only {{ $value | humanize }}% available over the last 10m. has been only {{ $value | humanize }}% available over the last 10m.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapidown runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/aggregatedapidown
summary: An aggregated API is down. summary: An aggregated API is down.
expr: | expr: |
(1 - max by(name, namespace)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85 (1 - max by(name, namespace)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85
@ -1653,7 +1782,7 @@ spec:
- alert: KubeAPIDown - alert: KubeAPIDown
annotations: annotations:
description: KubeAPI has disappeared from Prometheus target discovery. description: KubeAPI has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeapidown
summary: Target disappeared from Prometheus target discovery. summary: Target disappeared from Prometheus target discovery.
expr: | expr: |
absent(up{job="apiserver"} == 1) absent(up{job="apiserver"} == 1)
@ -1665,7 +1794,7 @@ spec:
- alert: KubeNodeNotReady - alert: KubeNodeNotReady
annotations: annotations:
description: '{{ $labels.node }} has been unready for more than 15 minutes.' description: '{{ $labels.node }} has been unready for more than 15 minutes.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubenodenotready
summary: Node is not ready. summary: Node is not ready.
expr: | expr: |
kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0 kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
@ -1676,7 +1805,7 @@ spec:
annotations: annotations:
description: '{{ $labels.node }} is unreachable and some workloads may be description: '{{ $labels.node }} is unreachable and some workloads may be
rescheduled.' rescheduled.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubenodeunreachable
summary: Node is unreachable. summary: Node is unreachable.
expr: | expr: |
(kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="kube-state-metrics",key=~"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn"}) == 1 (kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="kube-state-metrics",key=~"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn"}) == 1
@ -1687,7 +1816,7 @@ spec:
annotations: annotations:
description: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage description: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage
}} of its Pod capacity. }} of its Pod capacity.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubelettoomanypods
summary: Kubelet is running at capacity. summary: Kubelet is running at capacity.
expr: | expr: |
count by(node) ( count by(node) (
@ -1704,7 +1833,7 @@ spec:
annotations: annotations:
description: The readiness status of node {{ $labels.node }} has changed {{ description: The readiness status of node {{ $labels.node }} has changed {{
$value }} times in the last 15 minutes. $value }} times in the last 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodereadinessflapping runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubenodereadinessflapping
summary: Node readiness status is flapping. summary: Node readiness status is flapping.
expr: | expr: |
sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (node) > 2 sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (node) > 2
@ -1715,7 +1844,7 @@ spec:
annotations: annotations:
description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile
duration of {{ $value }} seconds on node {{ $labels.node }}. duration of {{ $value }} seconds on node {{ $labels.node }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletplegdurationhigh runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletplegdurationhigh
summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist. summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist.
expr: | expr: |
node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10 node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
@ -1726,7 +1855,7 @@ spec:
annotations: annotations:
description: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds description: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds
on node {{ $labels.node }}. on node {{ $labels.node }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletpodstartuplatencyhigh
summary: Kubelet Pod startup latency is too high. summary: Kubelet Pod startup latency is too high.
expr: | expr: |
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60 histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
@ -1737,7 +1866,7 @@ spec:
annotations: annotations:
description: Client certificate for Kubelet on node {{ $labels.node }} expires description: Client certificate for Kubelet on node {{ $labels.node }} expires
in {{ $value | humanizeDuration }}. in {{ $value | humanizeDuration }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletclientcertificateexpiration runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletclientcertificateexpiration
summary: Kubelet client certificate is about to expire. summary: Kubelet client certificate is about to expire.
expr: | expr: |
kubelet_certificate_manager_client_ttl_seconds < 604800 kubelet_certificate_manager_client_ttl_seconds < 604800
@ -1747,7 +1876,7 @@ spec:
annotations: annotations:
description: Client certificate for Kubelet on node {{ $labels.node }} expires description: Client certificate for Kubelet on node {{ $labels.node }} expires
in {{ $value | humanizeDuration }}. in {{ $value | humanizeDuration }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletclientcertificateexpiration runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletclientcertificateexpiration
summary: Kubelet client certificate is about to expire. summary: Kubelet client certificate is about to expire.
expr: | expr: |
kubelet_certificate_manager_client_ttl_seconds < 86400 kubelet_certificate_manager_client_ttl_seconds < 86400
@ -1757,7 +1886,7 @@ spec:
annotations: annotations:
description: Server certificate for Kubelet on node {{ $labels.node }} expires description: Server certificate for Kubelet on node {{ $labels.node }} expires
in {{ $value | humanizeDuration }}. in {{ $value | humanizeDuration }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificateexpiration runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletservercertificateexpiration
summary: Kubelet server certificate is about to expire. summary: Kubelet server certificate is about to expire.
expr: | expr: |
kubelet_certificate_manager_server_ttl_seconds < 604800 kubelet_certificate_manager_server_ttl_seconds < 604800
@ -1767,7 +1896,7 @@ spec:
annotations: annotations:
description: Server certificate for Kubelet on node {{ $labels.node }} expires description: Server certificate for Kubelet on node {{ $labels.node }} expires
in {{ $value | humanizeDuration }}. in {{ $value | humanizeDuration }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificateexpiration runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletservercertificateexpiration
summary: Kubelet server certificate is about to expire. summary: Kubelet server certificate is about to expire.
expr: | expr: |
kubelet_certificate_manager_server_ttl_seconds < 86400 kubelet_certificate_manager_server_ttl_seconds < 86400
@ -1777,7 +1906,7 @@ spec:
annotations: annotations:
description: Kubelet on node {{ $labels.node }} has failed to renew its client description: Kubelet on node {{ $labels.node }} has failed to renew its client
certificate ({{ $value | humanize }} errors in the last 5 minutes). certificate ({{ $value | humanize }} errors in the last 5 minutes).
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletclientcertificaterenewalerrors runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletclientcertificaterenewalerrors
summary: Kubelet has failed to renew its client certificate. summary: Kubelet has failed to renew its client certificate.
expr: | expr: |
increase(kubelet_certificate_manager_client_expiration_renew_errors[5m]) > 0 increase(kubelet_certificate_manager_client_expiration_renew_errors[5m]) > 0
@ -1788,7 +1917,7 @@ spec:
annotations: annotations:
description: Kubelet on node {{ $labels.node }} has failed to renew its server description: Kubelet on node {{ $labels.node }} has failed to renew its server
certificate ({{ $value | humanize }} errors in the last 5 minutes). certificate ({{ $value | humanize }} errors in the last 5 minutes).
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificaterenewalerrors runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletservercertificaterenewalerrors
summary: Kubelet has failed to renew its server certificate. summary: Kubelet has failed to renew its server certificate.
expr: | expr: |
increase(kubelet_server_expiration_renew_errors[5m]) > 0 increase(kubelet_server_expiration_renew_errors[5m]) > 0
@ -1798,7 +1927,7 @@ spec:
- alert: KubeletDown - alert: KubeletDown
annotations: annotations:
description: Kubelet has disappeared from Prometheus target discovery. description: Kubelet has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletdown runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletdown
summary: Target disappeared from Prometheus target discovery. summary: Target disappeared from Prometheus target discovery.
expr: | expr: |
absent(up{job="kubelet", metrics_path="/metrics"} == 1) absent(up{job="kubelet", metrics_path="/metrics"} == 1)
@ -1810,7 +1939,7 @@ spec:
- alert: KubeSchedulerDown - alert: KubeSchedulerDown
annotations: annotations:
description: KubeScheduler has disappeared from Prometheus target discovery. description: KubeScheduler has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeschedulerdown
summary: Target disappeared from Prometheus target discovery. summary: Target disappeared from Prometheus target discovery.
expr: | expr: |
absent(up{job="kube-scheduler"} == 1) absent(up{job="kube-scheduler"} == 1)
@ -1823,7 +1952,7 @@ spec:
annotations: annotations:
description: KubeControllerManager has disappeared from Prometheus target description: KubeControllerManager has disappeared from Prometheus target
discovery. discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubecontrollermanagerdown
summary: Target disappeared from Prometheus target discovery. summary: Target disappeared from Prometheus target discovery.
expr: | expr: |
absent(up{job="kube-controller-manager"} == 1) absent(up{job="kube-controller-manager"} == 1)
@ -1878,22 +2007,6 @@ spec:
for: 15m for: 15m
labels: labels:
severity: warning severity: warning
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager
annotations:
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts
from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
expr: |
min without(alertmanager) (
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
/
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
)
* 100
> 3
for: 15m
labels:
severity: critical
- alert: PrometheusNotConnectedToAlertmanagers - alert: PrometheusNotConnectedToAlertmanagers
annotations: annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected
@ -1932,7 +2045,15 @@ spec:
samples. samples.
summary: Prometheus is not ingesting samples. summary: Prometheus is not ingesting samples.
expr: | expr: |
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0 (
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
and
(
sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-k8s",namespace="monitoring"}) > 0
or
sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-k8s",namespace="monitoring"}) > 0
)
)
for: 10m for: 10m
labels: labels:
severity: warning severity: warning
@ -1989,7 +2110,7 @@ spec:
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
( (
max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-k8s",namespace="monitoring"}[5m]) max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
- on(job, instance) group_right - ignoring(remote_name, url) group_right
max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-k8s",namespace="monitoring"}[5m]) max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
) )
> 120 > 120
@ -2036,37 +2157,32 @@ spec:
for: 15m for: 15m
labels: labels:
severity: warning severity: warning
- name: alertmanager.rules - alert: PrometheusTargetLimitHit
rules:
- alert: AlertmanagerConfigInconsistent
annotations: annotations:
message: | description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped
The configuration of the instances of the Alertmanager cluster `{{ $labels.namespace }}/{{ $labels.service }}` are out of sync. {{ printf "%.0f" $value }} targets because the number of targets exceeded
{{ range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query }} the configured target_limit.
Configuration hash for pod {{ .Labels.pod }} is "{{ printf "%.f" .Value }}" summary: Prometheus has dropped targets because some scrape configs have exceeded
{{ end }} the targets limit.
expr: | expr: |
count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"})) != 1 increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
for: 5m for: 15m
labels:
severity: critical
- alert: AlertmanagerFailedReload
annotations:
message: Reloading Alertmanager's configuration has failed for {{ $labels.namespace
}}/{{ $labels.pod}}.
expr: |
alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"} == 0
for: 10m
labels: labels:
severity: warning severity: warning
- alert: AlertmanagerMembersInconsistent - alert: PrometheusErrorSendingAlertsToAnyAlertmanager
annotations: annotations:
message: Alertmanager has not found all other members of the cluster. description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts
from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
expr: | expr: |
alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"} min without (alertmanager) (
!= on (service) GROUP_LEFT() rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
count by (service) (alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}) /
for: 5m rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
)
* 100
> 3
for: 15m
labels: labels:
severity: critical severity: critical
- name: general.rules - name: general.rules

View File

@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition kind: CustomResourceDefinition
metadata: metadata:
annotations: annotations:
controller-gen.kubebuilder.io/version: v0.2.4 controller-gen.kubebuilder.io/version: v0.4.1
creationTimestamp: null creationTimestamp: null
name: alertmanagers.monitoring.coreos.com name: alertmanagers.monitoring.coreos.com
spec: spec:
@ -644,6 +644,96 @@ spec:
type: array type: array
type: object type: object
type: object type: object
alertmanagerConfigNamespaceSelector:
description: Namespaces to be selected for AlertmanagerConfig discovery.
If nil, only check own namespace.
properties:
matchExpressions:
description: matchExpressions is a list of label selector requirements.
The requirements are ANDed.
items:
description: A label selector requirement is a selector that
contains values, a key, and an operator that relates the key
and values.
properties:
key:
description: key is the label key that the selector applies
to.
type: string
operator:
description: operator represents a key's relationship to
a set of values. Valid operators are In, NotIn, Exists
and DoesNotExist.
type: string
values:
description: values is an array of string values. If the
operator is In or NotIn, the values array must be non-empty.
If the operator is Exists or DoesNotExist, the values
array must be empty. This array is replaced during a strategic
merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value} pairs. A single
{key,value} in the matchLabels map is equivalent to an element
of matchExpressions, whose key field is "key", the operator
is "In", and the values array contains only "value". The requirements
are ANDed.
type: object
type: object
alertmanagerConfigSelector:
description: AlertmanagerConfigs to be selected for to merge and configure
Alertmanager with.
properties:
matchExpressions:
description: matchExpressions is a list of label selector requirements.
The requirements are ANDed.
items:
description: A label selector requirement is a selector that
contains values, a key, and an operator that relates the key
and values.
properties:
key:
description: key is the label key that the selector applies
to.
type: string
operator:
description: operator represents a key's relationship to
a set of values. Valid operators are In, NotIn, Exists
and DoesNotExist.
type: string
values:
description: values is an array of string values. If the
operator is In or NotIn, the values array must be non-empty.
If the operator is Exists or DoesNotExist, the values
array must be empty. This array is replaced during a strategic
merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value} pairs. A single
{key,value} in the matchLabels map is equivalent to an element
of matchExpressions, whose key field is "key", the operator
is "In", and the values array contains only "value". The requirements
are ANDed.
type: object
type: object
baseImage: baseImage:
description: 'Base image that is used to deploy pods, without tag. description: 'Base image that is used to deploy pods, without tag.
Deprecated: use ''image'' instead' Deprecated: use ''image'' instead'
@ -653,6 +743,15 @@ spec:
in cluster. Needs to be provided for non RFC1918 [1] (public) addresses. in cluster. Needs to be provided for non RFC1918 [1] (public) addresses.
[1] RFC1918: https://tools.ietf.org/html/rfc1918' [1] RFC1918: https://tools.ietf.org/html/rfc1918'
type: string type: string
clusterGossipInterval:
description: Interval between gossip attempts.
type: string
clusterPeerTimeout:
description: Timeout for cluster peering.
type: string
clusterPushpullInterval:
description: Interval between pushpull attempts.
type: string
configMaps: configMaps:
description: ConfigMaps is a list of ConfigMaps in the same namespace description: ConfigMaps is a list of ConfigMaps in the same namespace
as the Alertmanager object, which shall be mounted into the Alertmanager as the Alertmanager object, which shall be mounted into the Alertmanager
@ -667,9 +766,14 @@ spec:
The secret is mounted into /etc/alertmanager/config. The secret is mounted into /etc/alertmanager/config.
type: string type: string
containers: containers:
description: Containers allows injecting additional containers. This description: 'Containers allows injecting additional containers. This
is meant to allow adding an authentication proxy to an Alertmanager is meant to allow adding an authentication proxy to an Alertmanager
pod. pod. Containers described here modify an operator generated container
if they share the same name and modifications are done via a strategic
merge patch. The current container names are: `alertmanager` and
`config-reloader`. Overriding containers is entirely outside the
scope of what the maintainers will support and by doing so, you
accept that this behaviour may break at any time without notice.'
items: items:
description: A single application container that you want to run description: A single application container that you want to run
within a pod. within a pod.
@ -771,9 +875,13 @@ spec:
optional for env vars' optional for env vars'
type: string type: string
divisor: divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the description: Specifies the output format of the
exposed resources, defaults to "1" exposed resources, defaults to "1"
type: string pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource: resource:
description: 'Required: resource to select' description: 'Required: resource to select'
type: string type: string
@ -1208,6 +1316,7 @@ spec:
be referred to by services. be referred to by services.
type: string type: string
protocol: protocol:
default: TCP
description: Protocol for port. Must be UDP, TCP, or SCTP. description: Protocol for port. Must be UDP, TCP, or SCTP.
Defaults to "TCP". Defaults to "TCP".
type: string type: string
@ -1215,6 +1324,10 @@ spec:
- containerPort - containerPort
type: object type: object
type: array type: array
x-kubernetes-list-map-keys:
- containerPort
- protocol
x-kubernetes-list-type: map
readinessProbe: readinessProbe:
description: 'Periodic probe of container service readiness. description: 'Periodic probe of container service readiness.
Container will be removed from service endpoints if the probe Container will be removed from service endpoints if the probe
@ -1338,13 +1451,21 @@ spec:
properties: properties:
limits: limits:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object type: object
requests: requests:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container, resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, it defaults to Limits if that is explicitly specified,
@ -1854,9 +1975,13 @@ spec:
optional for env vars' optional for env vars'
type: string type: string
divisor: divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the description: Specifies the output format of the
exposed resources, defaults to "1" exposed resources, defaults to "1"
type: string pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource: resource:
description: 'Required: resource to select' description: 'Required: resource to select'
type: string type: string
@ -2291,6 +2416,7 @@ spec:
be referred to by services. be referred to by services.
type: string type: string
protocol: protocol:
default: TCP
description: Protocol for port. Must be UDP, TCP, or SCTP. description: Protocol for port. Must be UDP, TCP, or SCTP.
Defaults to "TCP". Defaults to "TCP".
type: string type: string
@ -2298,6 +2424,10 @@ spec:
- containerPort - containerPort
type: object type: object
type: array type: array
x-kubernetes-list-map-keys:
- containerPort
- protocol
x-kubernetes-list-type: map
readinessProbe: readinessProbe:
description: 'Periodic probe of container service readiness. description: 'Periodic probe of container service readiness.
Container will be removed from service endpoints if the probe Container will be removed from service endpoints if the probe
@ -2421,13 +2551,21 @@ spec:
properties: properties:
limits: limits:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object type: object
requests: requests:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container, resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, it defaults to Limits if that is explicitly specified,
@ -2812,7 +2950,7 @@ spec:
description: Define which Nodes the Pods are scheduled on. description: Define which Nodes the Pods are scheduled on.
type: object type: object
paused: paused:
description: If set to true all actions on the underlaying managed description: If set to true all actions on the underlying managed
objects are not goint to be performed, except for delete actions. objects are not goint to be performed, except for delete actions.
type: boolean type: boolean
podMetadata: podMetadata:
@ -2861,13 +2999,21 @@ spec:
properties: properties:
limits: limits:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute resources description: 'Limits describes the maximum amount of compute resources
allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object type: object
requests: requests:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container, resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise it defaults to Limits if that is explicitly specified, otherwise
@ -3048,6 +3194,9 @@ spec:
More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir' More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir'
type: string type: string
sizeLimit: sizeLimit:
anyOf:
- type: integer
- type: string
description: 'Total amount of local storage required for this description: 'Total amount of local storage required for this
EmptyDir volume. The size limit is also applicable for memory EmptyDir volume. The size limit is also applicable for memory
medium. The maximum usage on memory medium EmptyDir would medium. The maximum usage on memory medium EmptyDir would
@ -3055,7 +3204,8 @@ spec:
and the sum of memory limits of all containers in a pod. and the sum of memory limits of all containers in a pod.
The default is nil which means that the limit is undefined. The default is nil which means that the limit is undefined.
More info: http://kubernetes.io/docs/user-guide/volumes#emptydir' More info: http://kubernetes.io/docs/user-guide/volumes#emptydir'
type: string pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object type: object
volumeClaimTemplate: volumeClaimTemplate:
description: A PVC spec to be used by the Prometheus StatefulSets. description: A PVC spec to be used by the Prometheus StatefulSets.
@ -3151,13 +3301,21 @@ spec:
properties: properties:
limits: limits:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount description: 'Limits describes the maximum amount
of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object type: object
requests: requests:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount description: 'Requests describes the minimum amount
of compute resources required. If Requests is omitted of compute resources required. If Requests is omitted
for a container, it defaults to Limits if that is for a container, it defaults to Limits if that is
@ -3237,7 +3395,11 @@ spec:
type: array type: array
capacity: capacity:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: Represents the actual resources of the underlying description: Represents the actual resources of the underlying
volume. volume.
type: object type: object
@ -3332,6 +3494,100 @@ spec:
type: string type: string
type: object type: object
type: array type: array
topologySpreadConstraints:
description: If specified, the pod's topology spread constraints.
items:
description: TopologySpreadConstraint specifies how to spread matching
pods among the given topology.
properties:
labelSelector:
description: LabelSelector is used to find matching pods. Pods
that match this label selector are counted to determine the
number of pods in their corresponding topology domain.
properties:
matchExpressions:
description: matchExpressions is a list of label selector
requirements. The requirements are ANDed.
items:
description: A label selector requirement is a selector
that contains values, a key, and an operator that relates
the key and values.
properties:
key:
description: key is the label key that the selector
applies to.
type: string
operator:
description: operator represents a key's relationship
to a set of values. Valid operators are In, NotIn,
Exists and DoesNotExist.
type: string
values:
description: values is an array of string values.
If the operator is In or NotIn, the values array
must be non-empty. If the operator is Exists or
DoesNotExist, the values array must be empty. This
array is replaced during a strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value} pairs.
A single {key,value} in the matchLabels map is equivalent
to an element of matchExpressions, whose key field is
"key", the operator is "In", and the values array contains
only "value". The requirements are ANDed.
type: object
type: object
maxSkew:
description: 'MaxSkew describes the degree to which pods may
be unevenly distributed. It''s the maximum permitted difference
between the number of matching pods in any two topology domains
of a given topology type. For example, in a 3-zone cluster,
MaxSkew is set to 1, and pods with the same labelSelector
spread as 1/1/0: | zone1 | zone2 | zone3 | | P | P | |
- if MaxSkew is 1, incoming pod can only be scheduled to zone3
to become 1/1/1; scheduling it onto zone1(zone2) would make
the ActualSkew(2-0) on zone1(zone2) violate MaxSkew(1). -
if MaxSkew is 2, incoming pod can be scheduled onto any zone.
It''s a required field. Default value is 1 and 0 is not allowed.'
format: int32
type: integer
topologyKey:
description: TopologyKey is the key of node labels. Nodes that
have a label with this key and identical values are considered
to be in the same topology. We consider each <key, value>
as a "bucket", and try to put balanced number of pods into
each bucket. It's a required field.
type: string
whenUnsatisfiable:
description: 'WhenUnsatisfiable indicates how to deal with a
pod if it doesn''t satisfy the spread constraint. - DoNotSchedule
(default) tells the scheduler not to schedule it - ScheduleAnyway
tells the scheduler to still schedule it It''s considered
as "Unsatisfiable" if and only if placing incoming pod on
any topology violates "MaxSkew". For example, in a 3-zone
cluster, MaxSkew is set to 1, and pods with the same labelSelector
spread as 3/1/1: | zone1 | zone2 | zone3 | | P P P | P | P |
If WhenUnsatisfiable is set to DoNotSchedule, incoming pod
can only be scheduled to zone2(zone3) to become 3/2/1(3/1/2)
as ActualSkew(2-1) on zone2(zone3) satisfies MaxSkew(1). In
other words, the cluster can still be imbalanced, but scheduler
won''t make it *more* imbalanced. It''s a required field.'
type: string
required:
- maxSkew
- topologyKey
- whenUnsatisfiable
type: object
type: array
version: version:
description: Version the cluster should be on. description: Version the cluster should be on.
type: string type: string
@ -3704,9 +3960,13 @@ spec:
optional for env vars' optional for env vars'
type: string type: string
divisor: divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the description: Specifies the output format of the
exposed resources, defaults to "1" exposed resources, defaults to "1"
type: string pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource: resource:
description: 'Required: resource to select' description: 'Required: resource to select'
type: string type: string
@ -3729,6 +3989,9 @@ spec:
More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir' More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir'
type: string type: string
sizeLimit: sizeLimit:
anyOf:
- type: integer
- type: string
description: 'Total amount of local storage required for description: 'Total amount of local storage required for
this EmptyDir volume. The size limit is also applicable this EmptyDir volume. The size limit is also applicable
for memory medium. The maximum usage on memory medium for memory medium. The maximum usage on memory medium
@ -3736,7 +3999,8 @@ spec:
specified here and the sum of memory limits of all containers specified here and the sum of memory limits of all containers
in a pod. The default is nil which means that the limit in a pod. The default is nil which means that the limit
is undefined. More info: http://kubernetes.io/docs/user-guide/volumes#emptydir' is undefined. More info: http://kubernetes.io/docs/user-guide/volumes#emptydir'
type: string pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object type: object
fc: fc:
description: FC represents a Fibre Channel resource that is description: FC represents a Fibre Channel resource that is
@ -4199,10 +4463,14 @@ spec:
for volumes, optional for env vars' for volumes, optional for env vars'
type: string type: string
divisor: divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format description: Specifies the output format
of the exposed resources, defaults of the exposed resources, defaults
to "1" to "1"
type: string pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource: resource:
description: 'Required: resource to description: 'Required: resource to
select' select'
@ -4587,7 +4855,7 @@ spec:
format: int32 format: int32
type: integer type: integer
paused: paused:
description: Represents whether any actions on the underlaying managed description: Represents whether any actions on the underlying managed
objects are being performed. Only delete actions will be performed. objects are being performed. Only delete actions will be performed.
type: boolean type: boolean
replicas: replicas:

View File

@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition kind: CustomResourceDefinition
metadata: metadata:
annotations: annotations:
controller-gen.kubebuilder.io/version: v0.2.4 controller-gen.kubebuilder.io/version: v0.4.1
creationTimestamp: null creationTimestamp: null
name: podmonitors.monitoring.coreos.com name: podmonitors.monitoring.coreos.com
spec: spec:
@ -58,6 +58,69 @@ spec:
description: PodMetricsEndpoint defines a scrapeable endpoint of description: PodMetricsEndpoint defines a scrapeable endpoint of
a Kubernetes Pod serving Prometheus metrics. a Kubernetes Pod serving Prometheus metrics.
properties: properties:
basicAuth:
description: 'BasicAuth allow an endpoint to authenticate over
basic authentication. More info: https://prometheus.io/docs/operating/configuration/#endpoint'
properties:
password:
description: The secret in the service monitor namespace
that contains the password for authentication.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
username:
description: The secret in the service monitor namespace
that contains the username for authentication.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
type: object
bearerTokenSecret:
description: Secret to mount to read bearer token for scraping
targets. The secret needs to be in the same namespace as the
pod monitor and accessible by the Prometheus Operator.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
honorLabels: honorLabels:
description: HonorLabels chooses the metric's labels on collisions description: HonorLabels chooses the metric's labels on collisions
with target labels. with target labels.
@ -191,6 +254,121 @@ spec:
- type: string - type: string
description: 'Deprecated: Use ''port'' instead.' description: 'Deprecated: Use ''port'' instead.'
x-kubernetes-int-or-string: true x-kubernetes-int-or-string: true
tlsConfig:
description: TLS configuration to use when scraping the endpoint.
properties:
ca:
description: Struct containing the CA cert to use for the
targets.
properties:
configMap:
description: ConfigMap containing data to use for the
targets.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
secret:
description: Secret containing data to use for the targets.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
cert:
description: Struct containing the client cert file for
the targets.
properties:
configMap:
description: ConfigMap containing data to use for the
targets.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
secret:
description: Secret containing data to use for the targets.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
insecureSkipVerify:
description: Disable target certificate validation.
type: boolean
keySecret:
description: Secret containing the client key file for the
targets.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
serverName:
description: Used to verify the hostname for the targets.
type: string
type: object
type: object type: object
type: array type: array
podTargetLabels: podTargetLabels:
@ -248,6 +426,11 @@ spec:
are ANDed. are ANDed.
type: object type: object
type: object type: object
targetLimit:
description: TargetLimit defines a limit on the number of scraped
targets that will be accepted.
format: int64
type: integer
required: required:
- podMetricsEndpoints - podMetricsEndpoints
- selector - selector

View File

@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition kind: CustomResourceDefinition
metadata: metadata:
annotations: annotations:
controller-gen.kubebuilder.io/version: v0.2.4 controller-gen.kubebuilder.io/version: v0.4.1
creationTimestamp: null creationTimestamp: null
name: probes.monitoring.coreos.com name: probes.monitoring.coreos.com
spec: spec:

View File

@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition kind: CustomResourceDefinition
metadata: metadata:
annotations: annotations:
controller-gen.kubebuilder.io/version: v0.2.4 controller-gen.kubebuilder.io/version: v0.4.1
creationTimestamp: null creationTimestamp: null
name: prometheuses.monitoring.coreos.com name: prometheuses.monitoring.coreos.com
spec: spec:
@ -765,7 +765,7 @@ spec:
description: TLS Config to use for alertmanager connection. description: TLS Config to use for alertmanager connection.
properties: properties:
ca: ca:
description: Stuct containing the CA cert to use for description: Struct containing the CA cert to use for
the targets. the targets.
properties: properties:
configMap: configMap:
@ -972,7 +972,8 @@ spec:
description: TLS Config to use for accessing apiserver. description: TLS Config to use for accessing apiserver.
properties: properties:
ca: ca:
description: Stuct containing the CA cert to use for the targets. description: Struct containing the CA cert to use for the
targets.
properties: properties:
configMap: configMap:
description: ConfigMap containing data to use for the description: ConfigMap containing data to use for the
@ -1123,11 +1124,10 @@ spec:
the behavior of an operator generated container. Containers described the behavior of an operator generated container. Containers described
here modify an operator generated container if they share the same here modify an operator generated container if they share the same
name and modifications are done via a strategic merge patch. The name and modifications are done via a strategic merge patch. The
current container names are: `prometheus`, `prometheus-config-reloader`, current container names are: `prometheus`, `config-reloader`, and
`rules-configmap-reloader`, and `thanos-sidecar`. Overriding containers `thanos-sidecar`. Overriding containers is entirely outside the
is entirely outside the scope of what the maintainers will support scope of what the maintainers will support and by doing so, you
and by doing so, you accept that this behaviour may break at any accept that this behaviour may break at any time without notice.'
time without notice.'
items: items:
description: A single application container that you want to run description: A single application container that you want to run
within a pod. within a pod.
@ -1229,9 +1229,13 @@ spec:
optional for env vars' optional for env vars'
type: string type: string
divisor: divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the description: Specifies the output format of the
exposed resources, defaults to "1" exposed resources, defaults to "1"
type: string pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource: resource:
description: 'Required: resource to select' description: 'Required: resource to select'
type: string type: string
@ -1666,6 +1670,7 @@ spec:
be referred to by services. be referred to by services.
type: string type: string
protocol: protocol:
default: TCP
description: Protocol for port. Must be UDP, TCP, or SCTP. description: Protocol for port. Must be UDP, TCP, or SCTP.
Defaults to "TCP". Defaults to "TCP".
type: string type: string
@ -1673,6 +1678,10 @@ spec:
- containerPort - containerPort
type: object type: object
type: array type: array
x-kubernetes-list-map-keys:
- containerPort
- protocol
x-kubernetes-list-type: map
readinessProbe: readinessProbe:
description: 'Periodic probe of container service readiness. description: 'Periodic probe of container service readiness.
Container will be removed from service endpoints if the probe Container will be removed from service endpoints if the probe
@ -1796,13 +1805,21 @@ spec:
properties: properties:
limits: limits:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object type: object
requests: requests:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container, resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, it defaults to Limits if that is explicitly specified,
@ -2196,6 +2213,15 @@ spec:
value will be taken instead. value will be taken instead.
format: int64 format: int64
type: integer type: integer
enforcedTargetLimit:
description: EnforcedTargetLimit defines a global limit on the number
of scraped targets. This overrides any TargetLimit set per ServiceMonitor
or/and PodMonitor. It is meant to be used by admins to enforce the
TargetLimit to keep overall number of targets under the desired
limit. Note that if TargetLimit is higher that value will be taken
instead.
format: int64
type: integer
evaluationInterval: evaluationInterval:
description: Interval between consecutive evaluations. description: Interval between consecutive evaluations.
type: string type: string
@ -2347,9 +2373,13 @@ spec:
optional for env vars' optional for env vars'
type: string type: string
divisor: divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the description: Specifies the output format of the
exposed resources, defaults to "1" exposed resources, defaults to "1"
type: string pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource: resource:
description: 'Required: resource to select' description: 'Required: resource to select'
type: string type: string
@ -2784,6 +2814,7 @@ spec:
be referred to by services. be referred to by services.
type: string type: string
protocol: protocol:
default: TCP
description: Protocol for port. Must be UDP, TCP, or SCTP. description: Protocol for port. Must be UDP, TCP, or SCTP.
Defaults to "TCP". Defaults to "TCP".
type: string type: string
@ -2791,6 +2822,10 @@ spec:
- containerPort - containerPort
type: object type: object
type: array type: array
x-kubernetes-list-map-keys:
- containerPort
- protocol
x-kubernetes-list-type: map
readinessProbe: readinessProbe:
description: 'Periodic probe of container service readiness. description: 'Periodic probe of container service readiness.
Container will be removed from service endpoints if the probe Container will be removed from service endpoints if the probe
@ -2914,13 +2949,21 @@ spec:
properties: properties:
limits: limits:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object type: object
requests: requests:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container, resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, it defaults to Limits if that is explicitly specified,
@ -3670,7 +3713,7 @@ spec:
description: TLS Config to use for remote read. description: TLS Config to use for remote read.
properties: properties:
ca: ca:
description: Stuct containing the CA cert to use for the description: Struct containing the CA cert to use for the
targets. targets.
properties: properties:
configMap: configMap:
@ -3907,7 +3950,7 @@ spec:
description: TLS Config to use for remote write. description: TLS Config to use for remote write.
properties: properties:
ca: ca:
description: Stuct containing the CA cert to use for the description: Struct containing the CA cert to use for the
targets. targets.
properties: properties:
configMap: configMap:
@ -4088,7 +4131,9 @@ spec:
will _not_ be added when value is set to empty string (`""`). will _not_ be added when value is set to empty string (`""`).
type: string type: string
replicas: replicas:
description: Number of instances to deploy for a Prometheus deployment. description: Number of replicas of each shard to deploy for a Prometheus
deployment. Number of replicas multiplied by shards is the total
number of Pods created.
format: int32 format: int32
type: integer type: integer
resources: resources:
@ -4096,13 +4141,21 @@ spec:
properties: properties:
limits: limits:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute resources description: 'Limits describes the maximum amount of compute resources
allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object type: object
requests: requests:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container, resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise it defaults to Limits if that is explicitly specified, otherwise
@ -4479,6 +4532,17 @@ spec:
if SHA is set. Deprecated: use ''image'' instead. The image digest if SHA is set. Deprecated: use ''image'' instead. The image digest
can be specified as part of the image URL.' can be specified as part of the image URL.'
type: string type: string
shards:
description: 'EXPERIMENTAL: Number of shards to distribute targets
onto. Number of replicas multiplied by shards is the total number
of Pods created. Note that scaling down shards will not reshard
data onto remaining instances, it must be manually moved. Increasing
shards will not reshard data either but it will continue to be available
from the same instances. To query globally use Thanos sidecar and
Thanos querier or remote write data to a central location. Sharding
is done on the content of the `__address__` target meta-label.'
format: int32
type: integer
storage: storage:
description: Storage spec to specify how storage shall be used. description: Storage spec to specify how storage shall be used.
properties: properties:
@ -4499,6 +4563,9 @@ spec:
More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir' More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir'
type: string type: string
sizeLimit: sizeLimit:
anyOf:
- type: integer
- type: string
description: 'Total amount of local storage required for this description: 'Total amount of local storage required for this
EmptyDir volume. The size limit is also applicable for memory EmptyDir volume. The size limit is also applicable for memory
medium. The maximum usage on memory medium EmptyDir would medium. The maximum usage on memory medium EmptyDir would
@ -4506,7 +4573,8 @@ spec:
and the sum of memory limits of all containers in a pod. and the sum of memory limits of all containers in a pod.
The default is nil which means that the limit is undefined. The default is nil which means that the limit is undefined.
More info: http://kubernetes.io/docs/user-guide/volumes#emptydir' More info: http://kubernetes.io/docs/user-guide/volumes#emptydir'
type: string pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object type: object
volumeClaimTemplate: volumeClaimTemplate:
description: A PVC spec to be used by the Prometheus StatefulSets. description: A PVC spec to be used by the Prometheus StatefulSets.
@ -4602,13 +4670,21 @@ spec:
properties: properties:
limits: limits:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount description: 'Limits describes the maximum amount
of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object type: object
requests: requests:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount description: 'Requests describes the minimum amount
of compute resources required. If Requests is omitted of compute resources required. If Requests is omitted
for a container, it defaults to Limits if that is for a container, it defaults to Limits if that is
@ -4688,7 +4764,11 @@ spec:
type: array type: array
capacity: capacity:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: Represents the actual resources of the underlying description: Represents the actual resources of the underlying
volume. volume.
type: object type: object
@ -4761,7 +4841,8 @@ spec:
Maps to the ''--grpc-server-tls-*'' CLI args.' Maps to the ''--grpc-server-tls-*'' CLI args.'
properties: properties:
ca: ca:
description: Stuct containing the CA cert to use for the targets. description: Struct containing the CA cert to use for the
targets.
properties: properties:
configMap: configMap:
description: ConfigMap containing data to use for the description: ConfigMap containing data to use for the
@ -4907,7 +4988,8 @@ spec:
type: string type: string
objectStorageConfig: objectStorageConfig:
description: ObjectStorageConfig configures object storage in description: ObjectStorageConfig configures object storage in
Thanos. Thanos. Alternative to ObjectStorageConfigFile, and lower order
priority.
properties: properties:
key: key:
description: The key of the secret to select from. Must be description: The key of the secret to select from. Must be
@ -4924,6 +5006,11 @@ spec:
required: required:
- key - key
type: object type: object
objectStorageConfigFile:
description: ObjectStorageConfigFile specifies the path of the
object storage configuration file. When used alongside with
ObjectStorageConfig, ObjectStorageConfigFile takes precedence.
type: string
resources: resources:
description: Resources defines the resource requirements for the description: Resources defines the resource requirements for the
Thanos sidecar. If not provided, no requests/limits will be Thanos sidecar. If not provided, no requests/limits will be
@ -4931,13 +5018,21 @@ spec:
properties: properties:
limits: limits:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object type: object
requests: requests:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container, resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise it defaults to Limits if that is explicitly specified, otherwise
@ -5021,6 +5116,100 @@ spec:
type: string type: string
type: object type: object
type: array type: array
topologySpreadConstraints:
description: If specified, the pod's topology spread constraints.
items:
description: TopologySpreadConstraint specifies how to spread matching
pods among the given topology.
properties:
labelSelector:
description: LabelSelector is used to find matching pods. Pods
that match this label selector are counted to determine the
number of pods in their corresponding topology domain.
properties:
matchExpressions:
description: matchExpressions is a list of label selector
requirements. The requirements are ANDed.
items:
description: A label selector requirement is a selector
that contains values, a key, and an operator that relates
the key and values.
properties:
key:
description: key is the label key that the selector
applies to.
type: string
operator:
description: operator represents a key's relationship
to a set of values. Valid operators are In, NotIn,
Exists and DoesNotExist.
type: string
values:
description: values is an array of string values.
If the operator is In or NotIn, the values array
must be non-empty. If the operator is Exists or
DoesNotExist, the values array must be empty. This
array is replaced during a strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value} pairs.
A single {key,value} in the matchLabels map is equivalent
to an element of matchExpressions, whose key field is
"key", the operator is "In", and the values array contains
only "value". The requirements are ANDed.
type: object
type: object
maxSkew:
description: 'MaxSkew describes the degree to which pods may
be unevenly distributed. It''s the maximum permitted difference
between the number of matching pods in any two topology domains
of a given topology type. For example, in a 3-zone cluster,
MaxSkew is set to 1, and pods with the same labelSelector
spread as 1/1/0: | zone1 | zone2 | zone3 | | P | P | |
- if MaxSkew is 1, incoming pod can only be scheduled to zone3
to become 1/1/1; scheduling it onto zone1(zone2) would make
the ActualSkew(2-0) on zone1(zone2) violate MaxSkew(1). -
if MaxSkew is 2, incoming pod can be scheduled onto any zone.
It''s a required field. Default value is 1 and 0 is not allowed.'
format: int32
type: integer
topologyKey:
description: TopologyKey is the key of node labels. Nodes that
have a label with this key and identical values are considered
to be in the same topology. We consider each <key, value>
as a "bucket", and try to put balanced number of pods into
each bucket. It's a required field.
type: string
whenUnsatisfiable:
description: 'WhenUnsatisfiable indicates how to deal with a
pod if it doesn''t satisfy the spread constraint. - DoNotSchedule
(default) tells the scheduler not to schedule it - ScheduleAnyway
tells the scheduler to still schedule it It''s considered
as "Unsatisfiable" if and only if placing incoming pod on
any topology violates "MaxSkew". For example, in a 3-zone
cluster, MaxSkew is set to 1, and pods with the same labelSelector
spread as 3/1/1: | zone1 | zone2 | zone3 | | P P P | P | P |
If WhenUnsatisfiable is set to DoNotSchedule, incoming pod
can only be scheduled to zone2(zone3) to become 3/2/1(3/1/2)
as ActualSkew(2-1) on zone2(zone3) satisfies MaxSkew(1). In
other words, the cluster can still be imbalanced, but scheduler
won''t make it *more* imbalanced. It''s a required field.'
type: string
required:
- maxSkew
- topologyKey
- whenUnsatisfiable
type: object
type: array
version: version:
description: Version of Prometheus to be deployed. description: Version of Prometheus to be deployed.
type: string type: string
@ -5393,9 +5582,13 @@ spec:
optional for env vars' optional for env vars'
type: string type: string
divisor: divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the description: Specifies the output format of the
exposed resources, defaults to "1" exposed resources, defaults to "1"
type: string pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource: resource:
description: 'Required: resource to select' description: 'Required: resource to select'
type: string type: string
@ -5418,6 +5611,9 @@ spec:
More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir' More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir'
type: string type: string
sizeLimit: sizeLimit:
anyOf:
- type: integer
- type: string
description: 'Total amount of local storage required for description: 'Total amount of local storage required for
this EmptyDir volume. The size limit is also applicable this EmptyDir volume. The size limit is also applicable
for memory medium. The maximum usage on memory medium for memory medium. The maximum usage on memory medium
@ -5425,7 +5621,8 @@ spec:
specified here and the sum of memory limits of all containers specified here and the sum of memory limits of all containers
in a pod. The default is nil which means that the limit in a pod. The default is nil which means that the limit
is undefined. More info: http://kubernetes.io/docs/user-guide/volumes#emptydir' is undefined. More info: http://kubernetes.io/docs/user-guide/volumes#emptydir'
type: string pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object type: object
fc: fc:
description: FC represents a Fibre Channel resource that is description: FC represents a Fibre Channel resource that is
@ -5888,10 +6085,14 @@ spec:
for volumes, optional for env vars' for volumes, optional for env vars'
type: string type: string
divisor: divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format description: Specifies the output format
of the exposed resources, defaults of the exposed resources, defaults
to "1" to "1"
type: string pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource: resource:
description: 'Required: resource to description: 'Required: resource to
select' select'
@ -6268,6 +6469,14 @@ spec:
description: Enable compression of the write-ahead log using Snappy. description: Enable compression of the write-ahead log using Snappy.
This flag is only available in versions of Prometheus >= 2.11.0. This flag is only available in versions of Prometheus >= 2.11.0.
type: boolean type: boolean
web:
description: WebSpec defines the web command line flags when starting
Prometheus.
properties:
pageTitle:
description: The prometheus web page title
type: string
type: object
type: object type: object
status: status:
description: 'Most recent observed status of the Prometheus cluster. Read-only. description: 'Most recent observed status of the Prometheus cluster. Read-only.
@ -6280,7 +6489,7 @@ spec:
format: int32 format: int32
type: integer type: integer
paused: paused:
description: Represents whether any actions on the underlaying managed description: Represents whether any actions on the underlying managed
objects are being performed. Only delete actions will be performed. objects are being performed. Only delete actions will be performed.
type: boolean type: boolean
replicas: replicas:

View File

@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition kind: CustomResourceDefinition
metadata: metadata:
annotations: annotations:
controller-gen.kubebuilder.io/version: v0.2.4 controller-gen.kubebuilder.io/version: v0.4.1
creationTimestamp: null creationTimestamp: null
name: prometheusrules.monitoring.coreos.com name: prometheusrules.monitoring.coreos.com
spec: spec:
@ -17,7 +17,8 @@ spec:
- name: v1 - name: v1
schema: schema:
openAPIV3Schema: openAPIV3Schema:
description: PrometheusRule defines alerting rules for a Prometheus instance description: PrometheusRule defines recording and alerting rules for a Prometheus
instance
properties: properties:
apiVersion: apiVersion:
description: 'APIVersion defines the versioned schema of this representation description: 'APIVersion defines the versioned schema of this representation

View File

@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition kind: CustomResourceDefinition
metadata: metadata:
annotations: annotations:
controller-gen.kubebuilder.io/version: v0.2.4 controller-gen.kubebuilder.io/version: v0.4.1
creationTimestamp: null creationTimestamp: null
name: servicemonitors.monitoring.coreos.com name: servicemonitors.monitoring.coreos.com
spec: spec:
@ -246,7 +246,7 @@ spec:
description: TLS configuration to use when scraping the endpoint description: TLS configuration to use when scraping the endpoint
properties: properties:
ca: ca:
description: Stuct containing the CA cert to use for the description: Struct containing the CA cert to use for the
targets. targets.
properties: properties:
configMap: configMap:
@ -449,6 +449,11 @@ spec:
items: items:
type: string type: string
type: array type: array
targetLimit:
description: TargetLimit defines a limit on the number of scraped
targets that will be accepted.
format: int64
type: integer
required: required:
- endpoints - endpoints
- selector - selector

View File

@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition kind: CustomResourceDefinition
metadata: metadata:
annotations: annotations:
controller-gen.kubebuilder.io/version: v0.2.4 controller-gen.kubebuilder.io/version: v0.4.1
creationTimestamp: null creationTimestamp: null
name: thanosrulers.monitoring.coreos.com name: thanosrulers.monitoring.coreos.com
spec: spec:
@ -672,7 +672,7 @@ spec:
the behavior of an operator generated container. Containers described the behavior of an operator generated container. Containers described
here modify an operator generated container if they share the same here modify an operator generated container if they share the same
name and modifications are done via a strategic merge patch. The name and modifications are done via a strategic merge patch. The
current container names are: `thanos-ruler` and `rules-configmap-reloader`. current container names are: `thanos-ruler` and `config-reloader`.
Overriding containers is entirely outside the scope of what the Overriding containers is entirely outside the scope of what the
maintainers will support and by doing so, you accept that this behaviour maintainers will support and by doing so, you accept that this behaviour
may break at any time without notice.' may break at any time without notice.'
@ -777,9 +777,13 @@ spec:
optional for env vars' optional for env vars'
type: string type: string
divisor: divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the description: Specifies the output format of the
exposed resources, defaults to "1" exposed resources, defaults to "1"
type: string pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource: resource:
description: 'Required: resource to select' description: 'Required: resource to select'
type: string type: string
@ -1214,6 +1218,7 @@ spec:
be referred to by services. be referred to by services.
type: string type: string
protocol: protocol:
default: TCP
description: Protocol for port. Must be UDP, TCP, or SCTP. description: Protocol for port. Must be UDP, TCP, or SCTP.
Defaults to "TCP". Defaults to "TCP".
type: string type: string
@ -1221,6 +1226,10 @@ spec:
- containerPort - containerPort
type: object type: object
type: array type: array
x-kubernetes-list-map-keys:
- containerPort
- protocol
x-kubernetes-list-type: map
readinessProbe: readinessProbe:
description: 'Periodic probe of container service readiness. description: 'Periodic probe of container service readiness.
Container will be removed from service endpoints if the probe Container will be removed from service endpoints if the probe
@ -1344,13 +1353,21 @@ spec:
properties: properties:
limits: limits:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object type: object
requests: requests:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container, resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, it defaults to Limits if that is explicitly specified,
@ -1738,7 +1755,7 @@ spec:
the ''--grpc-server-tls-*'' CLI args.' the ''--grpc-server-tls-*'' CLI args.'
properties: properties:
ca: ca:
description: Stuct containing the CA cert to use for the targets. description: Struct containing the CA cert to use for the targets.
properties: properties:
configMap: configMap:
description: ConfigMap containing data to use for the targets. description: ConfigMap containing data to use for the targets.
@ -1979,9 +1996,13 @@ spec:
optional for env vars' optional for env vars'
type: string type: string
divisor: divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the description: Specifies the output format of the
exposed resources, defaults to "1" exposed resources, defaults to "1"
type: string pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource: resource:
description: 'Required: resource to select' description: 'Required: resource to select'
type: string type: string
@ -2416,6 +2437,7 @@ spec:
be referred to by services. be referred to by services.
type: string type: string
protocol: protocol:
default: TCP
description: Protocol for port. Must be UDP, TCP, or SCTP. description: Protocol for port. Must be UDP, TCP, or SCTP.
Defaults to "TCP". Defaults to "TCP".
type: string type: string
@ -2423,6 +2445,10 @@ spec:
- containerPort - containerPort
type: object type: object
type: array type: array
x-kubernetes-list-map-keys:
- containerPort
- protocol
x-kubernetes-list-type: map
readinessProbe: readinessProbe:
description: 'Periodic probe of container service readiness. description: 'Periodic probe of container service readiness.
Container will be removed from service endpoints if the probe Container will be removed from service endpoints if the probe
@ -2546,13 +2572,21 @@ spec:
properties: properties:
limits: limits:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute description: 'Limits describes the maximum amount of compute
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object type: object
requests: requests:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container, resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, it defaults to Limits if that is explicitly specified,
@ -2944,6 +2978,7 @@ spec:
type: object type: object
objectStorageConfig: objectStorageConfig:
description: ObjectStorageConfig configures object storage in Thanos. description: ObjectStorageConfig configures object storage in Thanos.
Alternative to ObjectStorageConfigFile, and lower order priority.
properties: properties:
key: key:
description: The key of the secret to select from. Must be a description: The key of the secret to select from. Must be a
@ -2959,6 +2994,11 @@ spec:
required: required:
- key - key
type: object type: object
objectStorageConfigFile:
description: ObjectStorageConfigFile specifies the path of the object
storage configuration file. When used alongside with ObjectStorageConfig,
ObjectStorageConfigFile takes precedence.
type: string
paused: paused:
description: When a ThanosRuler deployment is paused, no actions except description: When a ThanosRuler deployment is paused, no actions except
for deletion will be performed on the underlying objects. for deletion will be performed on the underlying objects.
@ -3055,13 +3095,21 @@ spec:
properties: properties:
limits: limits:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount of compute resources description: 'Limits describes the maximum amount of compute resources
allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object type: object
requests: requests:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount of compute description: 'Requests describes the minimum amount of compute
resources required. If Requests is omitted for a container, resources required. If Requests is omitted for a container,
it defaults to Limits if that is explicitly specified, otherwise it defaults to Limits if that is explicitly specified, otherwise
@ -3314,6 +3362,9 @@ spec:
More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir' More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir'
type: string type: string
sizeLimit: sizeLimit:
anyOf:
- type: integer
- type: string
description: 'Total amount of local storage required for this description: 'Total amount of local storage required for this
EmptyDir volume. The size limit is also applicable for memory EmptyDir volume. The size limit is also applicable for memory
medium. The maximum usage on memory medium EmptyDir would medium. The maximum usage on memory medium EmptyDir would
@ -3321,7 +3372,8 @@ spec:
and the sum of memory limits of all containers in a pod. and the sum of memory limits of all containers in a pod.
The default is nil which means that the limit is undefined. The default is nil which means that the limit is undefined.
More info: http://kubernetes.io/docs/user-guide/volumes#emptydir' More info: http://kubernetes.io/docs/user-guide/volumes#emptydir'
type: string pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object type: object
volumeClaimTemplate: volumeClaimTemplate:
description: A PVC spec to be used by the Prometheus StatefulSets. description: A PVC spec to be used by the Prometheus StatefulSets.
@ -3417,13 +3469,21 @@ spec:
properties: properties:
limits: limits:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Limits describes the maximum amount description: 'Limits describes the maximum amount
of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object type: object
requests: requests:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: 'Requests describes the minimum amount description: 'Requests describes the minimum amount
of compute resources required. If Requests is omitted of compute resources required. If Requests is omitted
for a container, it defaults to Limits if that is for a container, it defaults to Limits if that is
@ -3503,7 +3563,11 @@ spec:
type: array type: array
capacity: capacity:
additionalProperties: additionalProperties:
type: string anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: Represents the actual resources of the underlying description: Represents the actual resources of the underlying
volume. volume.
type: object type: object
@ -3592,6 +3656,100 @@ spec:
type: string type: string
type: object type: object
type: array type: array
topologySpreadConstraints:
description: If specified, the pod's topology spread constraints.
items:
description: TopologySpreadConstraint specifies how to spread matching
pods among the given topology.
properties:
labelSelector:
description: LabelSelector is used to find matching pods. Pods
that match this label selector are counted to determine the
number of pods in their corresponding topology domain.
properties:
matchExpressions:
description: matchExpressions is a list of label selector
requirements. The requirements are ANDed.
items:
description: A label selector requirement is a selector
that contains values, a key, and an operator that relates
the key and values.
properties:
key:
description: key is the label key that the selector
applies to.
type: string
operator:
description: operator represents a key's relationship
to a set of values. Valid operators are In, NotIn,
Exists and DoesNotExist.
type: string
values:
description: values is an array of string values.
If the operator is In or NotIn, the values array
must be non-empty. If the operator is Exists or
DoesNotExist, the values array must be empty. This
array is replaced during a strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value} pairs.
A single {key,value} in the matchLabels map is equivalent
to an element of matchExpressions, whose key field is
"key", the operator is "In", and the values array contains
only "value". The requirements are ANDed.
type: object
type: object
maxSkew:
description: 'MaxSkew describes the degree to which pods may
be unevenly distributed. It''s the maximum permitted difference
between the number of matching pods in any two topology domains
of a given topology type. For example, in a 3-zone cluster,
MaxSkew is set to 1, and pods with the same labelSelector
spread as 1/1/0: | zone1 | zone2 | zone3 | | P | P | |
- if MaxSkew is 1, incoming pod can only be scheduled to zone3
to become 1/1/1; scheduling it onto zone1(zone2) would make
the ActualSkew(2-0) on zone1(zone2) violate MaxSkew(1). -
if MaxSkew is 2, incoming pod can be scheduled onto any zone.
It''s a required field. Default value is 1 and 0 is not allowed.'
format: int32
type: integer
topologyKey:
description: TopologyKey is the key of node labels. Nodes that
have a label with this key and identical values are considered
to be in the same topology. We consider each <key, value>
as a "bucket", and try to put balanced number of pods into
each bucket. It's a required field.
type: string
whenUnsatisfiable:
description: 'WhenUnsatisfiable indicates how to deal with a
pod if it doesn''t satisfy the spread constraint. - DoNotSchedule
(default) tells the scheduler not to schedule it - ScheduleAnyway
tells the scheduler to still schedule it It''s considered
as "Unsatisfiable" if and only if placing incoming pod on
any topology violates "MaxSkew". For example, in a 3-zone
cluster, MaxSkew is set to 1, and pods with the same labelSelector
spread as 3/1/1: | zone1 | zone2 | zone3 | | P P P | P | P |
If WhenUnsatisfiable is set to DoNotSchedule, incoming pod
can only be scheduled to zone2(zone3) to become 3/2/1(3/1/2)
as ActualSkew(2-1) on zone2(zone3) satisfies MaxSkew(1). In
other words, the cluster can still be imbalanced, but scheduler
won''t make it *more* imbalanced. It''s a required field.'
type: string
required:
- maxSkew
- topologyKey
- whenUnsatisfiable
type: object
type: array
tracingConfig: tracingConfig:
description: TracingConfig configures tracing in Thanos. This is an description: TracingConfig configures tracing in Thanos. This is an
experimental feature, it may change in any upcoming release in a experimental feature, it may change in any upcoming release in a
@ -3938,9 +4096,13 @@ spec:
optional for env vars' optional for env vars'
type: string type: string
divisor: divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the description: Specifies the output format of the
exposed resources, defaults to "1" exposed resources, defaults to "1"
type: string pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource: resource:
description: 'Required: resource to select' description: 'Required: resource to select'
type: string type: string
@ -3963,6 +4125,9 @@ spec:
More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir' More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir'
type: string type: string
sizeLimit: sizeLimit:
anyOf:
- type: integer
- type: string
description: 'Total amount of local storage required for description: 'Total amount of local storage required for
this EmptyDir volume. The size limit is also applicable this EmptyDir volume. The size limit is also applicable
for memory medium. The maximum usage on memory medium for memory medium. The maximum usage on memory medium
@ -3970,7 +4135,8 @@ spec:
specified here and the sum of memory limits of all containers specified here and the sum of memory limits of all containers
in a pod. The default is nil which means that the limit in a pod. The default is nil which means that the limit
is undefined. More info: http://kubernetes.io/docs/user-guide/volumes#emptydir' is undefined. More info: http://kubernetes.io/docs/user-guide/volumes#emptydir'
type: string pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object type: object
fc: fc:
description: FC represents a Fibre Channel resource that is description: FC represents a Fibre Channel resource that is
@ -4433,10 +4599,14 @@ spec:
for volumes, optional for env vars' for volumes, optional for env vars'
type: string type: string
divisor: divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format description: Specifies the output format
of the exposed resources, defaults of the exposed resources, defaults
to "1" to "1"
type: string pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource: resource:
description: 'Required: resource to description: 'Required: resource to
select' select'

View File

@ -4,7 +4,7 @@ metadata:
labels: labels:
app.kubernetes.io/component: controller app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.42.1 app.kubernetes.io/version: v0.44.1
name: prometheus-operator name: prometheus-operator
rules: rules:
- apiGroups: - apiGroups:
@ -12,6 +12,7 @@ rules:
resources: resources:
- alertmanagers - alertmanagers
- alertmanagers/finalizers - alertmanagers/finalizers
- alertmanagerconfigs
- prometheuses - prometheuses
- prometheuses/finalizers - prometheuses/finalizers
- thanosrulers - thanosrulers
@ -68,6 +69,14 @@ rules:
- get - get
- list - list
- watch - watch
- apiGroups:
- networking.k8s.io
resources:
- ingresses
verbs:
- get
- list
- watch
- apiGroups: - apiGroups:
- authentication.k8s.io - authentication.k8s.io
resources: resources:

View File

@ -4,7 +4,7 @@ metadata:
labels: labels:
app.kubernetes.io/component: controller app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.42.1 app.kubernetes.io/version: v0.44.1
name: prometheus-operator name: prometheus-operator
roleRef: roleRef:
apiGroup: rbac.authorization.k8s.io apiGroup: rbac.authorization.k8s.io

View File

@ -4,7 +4,7 @@ metadata:
labels: labels:
app.kubernetes.io/component: controller app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.42.1 app.kubernetes.io/version: v0.44.1
name: prometheus-operator name: prometheus-operator
namespace: monitoring namespace: monitoring
spec: spec:
@ -18,15 +18,13 @@ spec:
labels: labels:
app.kubernetes.io/component: controller app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.42.1 app.kubernetes.io/version: v0.44.1
spec: spec:
containers: containers:
- args: - args:
- --kubelet-service=kube-system/kubelet - --kubelet-service=kube-system/kubelet
- --logtostderr=true - --prometheus-config-reloader=quay.io/prometheus-operator/prometheus-config-reloader:v0.44.1
- --config-reloader-image=jimmidyson/configmap-reload:v0.4.0 image: quay.io/prometheus-operator/prometheus-operator:v0.44.1
- --prometheus-config-reloader=quay.io/prometheus-operator/prometheus-config-reloader:v0.42.1
image: quay.io/prometheus-operator/prometheus-operator:v0.42.1
name: prometheus-operator name: prometheus-operator
ports: ports:
- containerPort: 8080 - containerPort: 8080
@ -45,13 +43,15 @@ spec:
- --secure-listen-address=:8443 - --secure-listen-address=:8443
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
- --upstream=http://127.0.0.1:8080/ - --upstream=http://127.0.0.1:8080/
image: quay.io/brancz/kube-rbac-proxy:v0.6.0 image: quay.io/brancz/kube-rbac-proxy:v0.8.0
name: kube-rbac-proxy name: kube-rbac-proxy
ports: ports:
- containerPort: 8443 - containerPort: 8443
name: https name: https
securityContext: securityContext:
runAsUser: 65534 runAsGroup: 65532
runAsNonRoot: true
runAsUser: 65532
nodeSelector: nodeSelector:
beta.kubernetes.io/os: linux beta.kubernetes.io/os: linux
securityContext: securityContext:

View File

@ -4,7 +4,7 @@ metadata:
labels: labels:
app.kubernetes.io/component: controller app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.42.1 app.kubernetes.io/version: v0.44.1
name: prometheus-operator name: prometheus-operator
namespace: monitoring namespace: monitoring
spec: spec:

View File

@ -4,6 +4,6 @@ metadata:
labels: labels:
app.kubernetes.io/component: controller app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.42.1 app.kubernetes.io/version: v0.44.1
name: prometheus-operator name: prometheus-operator
namespace: monitoring namespace: monitoring

1
monitoring/vendor/alertmanager vendored Symbolic link
View File

@ -0,0 +1 @@
github.com/prometheus/alertmanager/doc/alertmanager-mixin

View File

@ -1,11 +1,9 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
{ {
_config+:: { _config+:: {
namespace: 'default', namespace: 'default',
versions+:: { versions+:: {
grafana: '6.6.0', grafana: '7.3.4',
}, },
imageRepos+:: { imageRepos+:: {
@ -30,12 +28,14 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
version: 1, version: 1,
editable: false, editable: false,
}], }],
// Forces pod restarts when dashboards are changed
dashboardsChecksum: false,
config: {}, config: {},
ldap: null, ldap: null,
plugins: [], plugins: [],
env: [], env: [],
port: 3000, port: 3000,
container: { resources: {
requests: { cpu: '100m', memory: '100Mi' }, requests: { cpu: '100m', memory: '100Mi' },
limits: { cpu: '200m', memory: '200Mi' }, limits: { cpu: '200m', memory: '200Mi' },
}, },
@ -45,36 +45,65 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
grafanaDashboards: {}, grafanaDashboards: {},
grafana+: { grafana+: {
[if std.length($._config.grafana.config) > 0 then 'config']: [if std.length($._config.grafana.config) > 0 then 'config']:
local secret = k.core.v1.secret; {
local grafanaConfig = { 'grafana.ini': std.base64(std.encodeUTF8(std.manifestIni($._config.grafana.config))) } + apiVersion: 'v1',
if $._config.grafana.ldap != null then { 'ldap.toml': std.base64(std.encodeUTF8($._config.grafana.ldap)) } else {}; kind: 'Secret',
secret.new('grafana-config', grafanaConfig) + metadata: {
secret.mixin.metadata.withNamespace($._config.namespace), name: 'grafana-config',
namespace: $._config.namespace,
},
type: 'Opaque',
data: {
'grafana.ini': std.base64(std.encodeUTF8(std.manifestIni($._config.grafana.config))),
} +
if $._config.grafana.ldap != null then { 'ldap.toml': std.base64(std.encodeUTF8($._config.grafana.ldap)) } else {},
},
dashboardDefinitions: dashboardDefinitions:
local configMap = k.core.v1.configMap;
[ [
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''); {
configMap.new(dashboardName, { [name]: std.manifestJsonEx($._config.grafana.dashboards[name], ' ') }) + local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
configMap.mixin.metadata.withNamespace($._config.namespace) apiVersion: 'v1',
kind: 'ConfigMap',
metadata: {
name: dashboardName,
namespace: $._config.namespace,
},
data: { [name]: std.manifestJsonEx($._config.grafana.dashboards[name], ' ') },
}
for name in std.objectFields($._config.grafana.dashboards) for name in std.objectFields($._config.grafana.dashboards)
] + [ ] + [
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''); {
configMap.new(dashboardName, { [name]: std.manifestJsonEx($._config.grafana.folderDashboards[folder][name], ' ') }) + local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
configMap.mixin.metadata.withNamespace($._config.namespace) apiVersion: 'v1',
kind: 'ConfigMap',
metadata: {
name: dashboardName,
namespace: $._config.namespace,
},
data: { [name]: std.manifestJsonEx($._config.grafana.folderDashboards[folder][name], ' ') },
}
for folder in std.objectFields($._config.grafana.folderDashboards) for folder in std.objectFields($._config.grafana.folderDashboards)
for name in std.objectFields($._config.grafana.folderDashboards[folder]) for name in std.objectFields($._config.grafana.folderDashboards[folder])
] + if std.length($._config.grafana.rawDashboards) > 0 then ] + (
[ if std.length($._config.grafana.rawDashboards) > 0 then
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''); [
configMap.new(dashboardName, { [name]: $._config.grafana.rawDashboards[name] }) +
configMap.mixin.metadata.withNamespace($._config.namespace)
for name in std.objectFields($._config.grafana.rawDashboards) {
] else [], local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
apiVersion: 'v1',
kind: 'ConfigMap',
metadata: {
name: dashboardName,
namespace: $._config.namespace,
},
data: { [name]: $._config.grafana.rawDashboards[name] },
}
for name in std.objectFields($._config.grafana.rawDashboards)
]
else
[]
),
dashboardSources: dashboardSources:
local configMap = k.core.v1.configMap;
local dashboardSources = { local dashboardSources = {
apiVersion: 1, apiVersion: 1,
providers: providers:
@ -106,59 +135,80 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
], ],
}; };
configMap.new('grafana-dashboards', { 'dashboards.yaml': std.manifestJsonEx(dashboardSources, ' ') }) + {
configMap.mixin.metadata.withNamespace($._config.namespace), kind: 'ConfigMap',
apiVersion: 'v1',
metadata: {
name: 'grafana-dashboards',
namespace: $._config.namespace,
},
data: { 'dashboards.yaml': std.manifestJsonEx(dashboardSources, ' ') },
},
dashboardDatasources: dashboardDatasources:
local secret = k.core.v1.secret; {
secret.new('grafana-datasources', { 'datasources.yaml': std.base64(std.encodeUTF8(std.manifestJsonEx({ apiVersion: 'v1',
apiVersion: 1, kind: 'Secret',
datasources: $._config.grafana.datasources, metadata: {
}, ' '))) }) + name: 'grafana-datasources',
secret.mixin.metadata.withNamespace($._config.namespace), namespace: $._config.namespace,
},
type: 'Opaque',
data: { 'datasources.yaml': std.base64(std.encodeUTF8(std.manifestJsonEx({
apiVersion: 1,
datasources: $._config.grafana.datasources,
}, ' '))) },
},
service: service:
local service = k.core.v1.service; {
local servicePort = k.core.v1.service.mixin.spec.portsType; apiVersion: 'v1',
kind: 'Service',
local grafanaServiceNodePort = servicePort.newNamed('http', $._config.grafana.port, 'http'); metadata: {
name: 'grafana',
service.new('grafana', $.grafana.deployment.spec.selector.matchLabels, grafanaServiceNodePort) + namespace: $._config.namespace,
service.mixin.metadata.withLabels({ app: 'grafana' }) + labels: {
service.mixin.metadata.withNamespace($._config.namespace), app: 'grafana',
},
},
spec: {
selector: $.grafana.deployment.spec.selector.matchLabels,
type: 'NodePort',
ports: [
{ name: 'http', targetPort: 'http', port: 3000 },
],
},
},
serviceAccount: serviceAccount:
local serviceAccount = k.core.v1.serviceAccount; {
serviceAccount.new('grafana') + apiVersion: 'v1',
serviceAccount.mixin.metadata.withNamespace($._config.namespace), kind: 'ServiceAccount',
metadata: {
name: 'grafana',
namespace: $._config.namespace,
},
},
deployment: deployment:
local deployment = k.apps.v1.deployment;
local container = k.apps.v1.deployment.mixin.spec.template.spec.containersType;
local volume = k.apps.v1.deployment.mixin.spec.template.spec.volumesType;
local containerPort = container.portsType;
local containerVolumeMount = container.volumeMountsType;
local podSelector = deployment.mixin.spec.template.spec.selectorType;
local env = container.envType;
local targetPort = $._config.grafana.port; local targetPort = $._config.grafana.port;
local portName = 'http'; local portName = 'http';
local podLabels = { app: 'grafana' }; local podLabels = { app: 'grafana' };
local configVolumeName = 'grafana-config'; local configVolumeName = 'grafana-config';
local configSecretName = 'grafana-config'; local configSecretName = 'grafana-config';
local configVolume = volume.withName(configVolumeName) + volume.mixin.secret.withSecretName(configSecretName); local configVolume = { name: configVolumeName, secret: { secretName: configSecretName } };
local configVolumeMount = containerVolumeMount.new(configVolumeName, '/etc/grafana'); local configVolumeMount = { name: configVolumeName, mountPath: '/etc/grafana', readOnly: false };
local storageVolumeName = 'grafana-storage'; local storageVolumeName = 'grafana-storage';
local storageVolume = volume.fromEmptyDir(storageVolumeName); local storageVolume = { name: storageVolumeName, emptyDir: {} };
local storageVolumeMount = containerVolumeMount.new(storageVolumeName, '/var/lib/grafana'); local storageVolumeMount = { name: storageVolumeName, mountPath: '/var/lib/grafana', readOnly: false };
local datasourcesVolumeName = 'grafana-datasources'; local datasourcesVolumeName = 'grafana-datasources';
local datasourcesSecretName = 'grafana-datasources'; local datasourcesSecretName = 'grafana-datasources';
local datasourcesVolume = volume.withName(datasourcesVolumeName) + volume.mixin.secret.withSecretName(datasourcesSecretName); local datasourcesVolume = { name: datasourcesVolumeName, secret: { secretName: datasourcesSecretName } };
local datasourcesVolumeMount = containerVolumeMount.new(datasourcesVolumeName, '/etc/grafana/provisioning/datasources'); local datasourcesVolumeMount = { name: datasourcesVolumeName, mountPath: '/etc/grafana/provisioning/datasources', readOnly: false };
local dashboardsVolumeName = 'grafana-dashboards'; local dashboardsVolumeName = 'grafana-dashboards';
local dashboardsConfigMapName = 'grafana-dashboards'; local dashboardsConfigMapName = 'grafana-dashboards';
local dashboardsVolume = volume.withName(dashboardsVolumeName) + volume.mixin.configMap.withName(dashboardsConfigMapName); local dashboardsVolume = { name: dashboardsVolumeName, configMap: { name: dashboardsConfigMapName } };
local dashboardsVolumeMount = containerVolumeMount.new(dashboardsVolumeName, '/etc/grafana/provisioning/dashboards'); local dashboardsVolumeMount = { name: dashboardsVolumeName, mountPath: '/etc/grafana/provisioning/dashboards', readOnly: false };
local volumeMounts = local volumeMounts =
[ [
@ -167,23 +217,36 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
dashboardsVolumeMount, dashboardsVolumeMount,
] + ] +
[ [
local dashboardName = std.strReplace(name, '.json', ''); {
containerVolumeMount.new('grafana-dashboard-' + dashboardName, '/grafana-dashboard-definitions/0/' + dashboardName) local dashboardName = std.strReplace(name, '.json', ''),
name: 'grafana-dashboard-' + dashboardName,
mountPath: '/grafana-dashboard-definitions/0/' + dashboardName,
readOnly: false,
}
for name in std.objectFields($._config.grafana.dashboards) for name in std.objectFields($._config.grafana.dashboards)
] + ] +
[ [
local dashboardName = std.strReplace(name, '.json', ''); {
containerVolumeMount.new('grafana-dashboard-' + dashboardName, '/grafana-dashboard-definitions/' + folder + '/' + dashboardName) local dashboardName = std.strReplace(name, '.json', ''),
name: 'grafana-dashboard-' + dashboardName,
mountPath: '/grafana-dashboard-definitions/' + folder + '/' + dashboardName,
readOnly: false,
}
for folder in std.objectFields($._config.grafana.folderDashboards) for folder in std.objectFields($._config.grafana.folderDashboards)
for name in std.objectFields($._config.grafana.folderDashboards[folder]) for name in std.objectFields($._config.grafana.folderDashboards[folder])
] + ] +
[ [
local dashboardName = std.strReplace(name, '.json', ''); {
containerVolumeMount.new('grafana-dashboard-' + dashboardName, '/grafana-dashboard-definitions/0/' + dashboardName)
for name in std.objectFields($._config.grafana.rawDashboards)
] +
if std.length($._config.grafana.config) > 0 then [configVolumeMount] else []; local dashboardName = std.strReplace(name, '.json', ''),
name: 'grafana-dashboard-' + dashboardName,
mountPath: '/grafana-dashboard-definitions/0/' + dashboardName,
readOnly: false,
}
for name in std.objectFields($._config.grafana.rawDashboards)
] + (
if std.length($._config.grafana.config) > 0 then [configVolumeMount] else []
);
local volumes = local volumes =
[ [
@ -192,52 +255,82 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
dashboardsVolume, dashboardsVolume,
] + ] +
[ [
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''); {
volume.withName(dashboardName) + local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
volume.mixin.configMap.withName(dashboardName) name: dashboardName,
configMap: { name: dashboardName },
}
for name in std.objectFields($._config.grafana.dashboards) for name in std.objectFields($._config.grafana.dashboards)
] + ] +
[ [
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''); {
volume.withName(dashboardName) + local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
volume.mixin.configMap.withName(dashboardName) name: dashboardName,
configMap: { name: dashboardName },
}
for folder in std.objectFields($._config.grafana.folderDashboards) for folder in std.objectFields($._config.grafana.folderDashboards)
for name in std.objectFields($._config.grafana.folderDashboards[folder]) for name in std.objectFields($._config.grafana.folderDashboards[folder])
] + ] +
[ [
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''); {
volume.withName(dashboardName) + local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
volume.mixin.configMap.withName(dashboardName) name: dashboardName,
configMap: { name: dashboardName },
}
for name in std.objectFields($._config.grafana.rawDashboards) for name in std.objectFields($._config.grafana.rawDashboards)
] + ] +
if std.length($._config.grafana.config) > 0 then [configVolume] else []; if std.length($._config.grafana.config) > 0 then [configVolume] else [];
local plugins = (if std.length($._config.grafana.plugins) == 0 then [] else [env.new('GF_INSTALL_PLUGINS', std.join(',', $._config.grafana.plugins))]); local plugins = (
if std.length($._config.grafana.plugins) == 0 then
[]
else
[{ name: 'GF_INSTALL_PLUGINS', value: std.join(',', $._config.grafana.plugins) }]
);
local c = [ local c = [{
container.new('grafana', $._config.imageRepos.grafana + ':' + $._config.versions.grafana) + name: 'grafana',
container.withEnv($._config.grafana.env + plugins) + image: $._config.imageRepos.grafana + ':' + $._config.versions.grafana,
container.withVolumeMounts(volumeMounts) + env: $._config.grafana.env + plugins,
container.withPorts(containerPort.newNamed(targetPort, portName)) + volumeMounts: volumeMounts,
container.mixin.readinessProbe.httpGet.withPath('/api/health') + ports: [{ name: portName, containerPort: targetPort }],
container.mixin.readinessProbe.httpGet.withPort(portName) + readinessProbe: {
container.mixin.resources.withRequests($._config.grafana.container.requests) + httpGet: { path: '/api/health', port: portName },
container.mixin.resources.withLimits($._config.grafana.container.limits), },
] + $._config.grafana.containers; resources: $._config.grafana.resources,
}] + $._config.grafana.containers;
deployment.new('grafana', 1, c, podLabels) + {
deployment.mixin.metadata.withNamespace($._config.namespace) + apiVersion: 'apps/v1',
deployment.mixin.metadata.withLabels(podLabels) + kind: 'Deployment',
deployment.mixin.spec.selector.withMatchLabels(podLabels) + metadata: {
deployment.mixin.spec.template.metadata.withAnnotations({ name: 'grafana',
[if std.length($._config.grafana.config) > 0 then 'checksum/grafana-config']: std.md5(std.toString($.grafana.config)), namespace: $._config.namespace,
'checksum/grafana-datasources': std.md5(std.toString($.grafana.dashboardDatasources)), labels: podLabels,
}) + },
deployment.mixin.spec.template.spec.withNodeSelector({ 'beta.kubernetes.io/os': 'linux' }) + spec: {
deployment.mixin.spec.template.spec.withVolumes(volumes) + replicas: 1,
deployment.mixin.spec.template.spec.securityContext.withRunAsNonRoot(true) + selector: {
deployment.mixin.spec.template.spec.securityContext.withRunAsUser(65534) + matchLabels: podLabels,
deployment.mixin.spec.template.spec.securityContext.withFsGroup(65534) + },
deployment.mixin.spec.template.spec.withServiceAccountName('grafana'), template: {
metadata: {
labels: podLabels,
annotations: {
[if std.length($._config.grafana.config) > 0 then 'checksum/grafana-config']: std.md5(std.toString($.grafana.config)),
'checksum/grafana-datasources': std.md5(std.toString($.grafana.dashboardDatasources)),
[if $._config.grafana.dashboardsChecksum then 'checksum/grafana-dashboards']: std.md5(std.toString($.grafana.dashboardDefinitions)),
},
},
spec: {
containers: c,
volumes: volumes,
serviceAccountName: $.grafana.serviceAccount.metadata.name,
nodeSelector: { 'beta.kubernetes.io/os': 'linux' },
securityContext: { fsGroup: 65534, runAsNonRoot: true, runAsUser: 65534 },
},
},
},
},
}, },
} }

View File

@ -9,16 +9,6 @@
} }
}, },
"version": "master" "version": "master"
},
{
"source": {
"git": {
"remote": "https://github.com/ksonnet/ksonnet-lib.git",
"subdir": ""
}
},
"version": "master",
"name": "ksonnet"
} }
], ],
"legacyImports": false "legacyImports": false

View File

@ -184,7 +184,7 @@
severity: 'critical', severity: 'critical',
}, },
annotations: { annotations: {
message: 'etcd cluster "{{ $labels.job }}": 99th percentile fync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.', message: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.',
}, },
}, },
{ {

View File

@ -27,6 +27,7 @@ local timepickerlib = import 'timepicker.libsonnet';
* @method addPanel(panel,gridPos) Appends a panel, with an optional grid position in grid coordinates, e.g. `gridPos={'x':0, 'y':0, 'w':12, 'h': 9}` * @method addPanel(panel,gridPos) Appends a panel, with an optional grid position in grid coordinates, e.g. `gridPos={'x':0, 'y':0, 'w':12, 'h': 9}`
* @method addPanels(panels) Appends an array of panels * @method addPanels(panels) Appends an array of panels
* @method addLink(link) Adds a [dashboard link](https://grafana.com/docs/grafana/latest/linking/dashboard-links/) * @method addLink(link) Adds a [dashboard link](https://grafana.com/docs/grafana/latest/linking/dashboard-links/)
* @method addLinks(dashboardLink) Adds an array of [dashboard links](https://grafana.com/docs/grafana/latest/linking/dashboard-links/)
* @method addRequired(type, name, id, version) * @method addRequired(type, name, id, version)
* @method addInput(name, label, type, pluginId, pluginName, description, value) * @method addInput(name, label, type, pluginId, pluginName, description, value)
* @method addRow(row) Adds a row. This is the legacy row concept from Grafana < 5, when rows were needed for layout. Rows should now be added via `addPanel`. * @method addRow(row) Adds a row. This is the legacy row concept from Grafana < 5, when rows were needed for layout. Rows should now be added via `addPanel`.
@ -149,6 +150,7 @@ local timepickerlib = import 'timepicker.libsonnet';
addLink(link):: self { addLink(link):: self {
links+: [link], links+: [link],
}, },
addLinks(dashboardLinks):: std.foldl(function(d, t) d.addLink(t), dashboardLinks, self),
required:: [], required:: [],
__requires: it.required, __requires: it.required,
addRequired(type, name, id, version):: self { addRequired(type, name, id, version):: self {

View File

@ -42,6 +42,7 @@
* @param links (optional) * @param links (optional)
* @param tableColumn (default `''`) * @param tableColumn (default `''`)
* @param maxPerRow (optional) * @param maxPerRow (optional)
* @param maxDataPoints (default `100`)
* *
* @method addTarget(target) Adds a target object. * @method addTarget(target) Adds a target object.
*/ */
@ -100,6 +101,7 @@
links=[], links=[],
tableColumn='', tableColumn='',
maxPerRow=null, maxPerRow=null,
maxDataPoints=100,
):: )::
{ {
[if height != null then 'height']: height, [if height != null then 'height']: height,
@ -116,7 +118,7 @@
], ],
links: links, links: links,
[if decimals != null then 'decimals']: decimals, [if decimals != null then 'decimals']: decimals,
maxDataPoints: 100, maxDataPoints: maxDataPoints,
interval: interval, interval: interval,
cacheTimeout: null, cacheTimeout: null,
format: format, format: format,

View File

@ -367,7 +367,7 @@
}, },
], ],
qpsPanel(selector):: { qpsPanel(selector, statusLabelName='status_code'):: {
aliasColors: { aliasColors: {
'1xx': '#EAB839', '1xx': '#EAB839',
'2xx': '#7EB26D', '2xx': '#7EB26D',
@ -379,9 +379,13 @@
}, },
targets: [ targets: [
{ {
expr: 'sum by (status) (label_replace(label_replace(rate(' + selector + '[$__interval]),' expr:
+ ' "status", "${1}xx", "status_code", "([0-9]).."),' |||
+ ' "status", "${1}", "status_code", "([a-z]+)"))', sum by (status) (
label_replace(label_replace(rate(%s[$__interval]),
"status", "${1}xx", "%s", "([0-9]).."),
"status", "${1}", "%s", "([a-z]+)"))
||| % [selector, statusLabelName, statusLabelName],
format: 'time_series', format: 'time_series',
intervalFactor: 2, intervalFactor: 2,
legendFormat: '{{status}}', legendFormat: '{{status}}',

View File

@ -14,7 +14,8 @@ A set of Grafana dashboards and Prometheus alerts for Kubernetes.
| release-0.3 | v1.17 and before | v2.11.0+ | | release-0.3 | v1.17 and before | v2.11.0+ |
| release-0.4 | v1.18 | v2.11.0+ | | release-0.4 | v1.18 | v2.11.0+ |
| release-0.5 | v1.19 | v2.11.0+ | | release-0.5 | v1.19 | v2.11.0+ |
| master | v1.19 | v2.11.0+ | | release-0.6 | v1.19+ | v2.11.0+ |
| master | v1.19+ | v2.11.0+ |
In Kubernetes 1.14 there was a major [metrics overhaul](https://github.com/kubernetes/enhancements/issues/1206) implemented. In Kubernetes 1.14 there was a major [metrics overhaul](https://github.com/kubernetes/enhancements/issues/1206) implemented.
Therefore v0.1.x of this repository is the last release to support Kubernetes 1.13 and previous version on a best effort basis. Therefore v0.1.x of this repository is the last release to support Kubernetes 1.13 and previous version on a best effort basis.
@ -23,6 +24,8 @@ Some alerts now use Prometheus filters made available in Prometheus 2.11.0, whic
Warning: This compatibility matrix was initially created based on experience, we do not guarantee the compatibility, it may be updated based on new learnings. Warning: This compatibility matrix was initially created based on experience, we do not guarantee the compatibility, it may be updated based on new learnings.
Warning: By default the expressions will generate *grafana 7.2+* compatible rules using the *$__rate_interval* variable for rate functions. If you need backward compatible rules please set *grafana72: false* in your *_config*
## How to use ## How to use
This mixin is designed to be vendored into the repo with your infrastructure config. This mixin is designed to be vendored into the repo with your infrastructure config.

View File

@ -268,6 +268,14 @@
!= !=
kube_hpa_status_current_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}) kube_hpa_status_current_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s})
and and
(kube_hpa_status_current_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
>
kube_hpa_spec_min_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s})
and
(kube_hpa_status_current_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
<
kube_hpa_spec_max_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s})
and
changes(kube_hpa_status_current_replicas[15m]) == 0 changes(kube_hpa_status_current_replicas[15m]) == 0
||| % $._config, ||| % $._config,
labels: { labels: {

View File

@ -82,7 +82,7 @@
expr: ||| expr: |||
sum(kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource="memory"}) sum(kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource="memory"})
/ /
sum(kube_node_status_allocatable_memory_bytes{%(nodeExporterSelector)s}) sum(kube_node_status_allocatable_memory_bytes{%(kubeStateMetricsSelector)s})
> %(namespaceOvercommitFactor)s > %(namespaceOvercommitFactor)s
||| % $._config, ||| % $._config,
labels: { labels: {

View File

@ -58,6 +58,10 @@
'kubelet.json': 'B1azll2ETo7DTiM8CysrH6g4s5NCgkOz6ZdU8Q0j', 'kubelet.json': 'B1azll2ETo7DTiM8CysrH6g4s5NCgkOz6ZdU8Q0j',
}, },
// Support for Grafana 7.2+ `$__rate_interval` instead of `$__interval`
grafana72: true,
grafanaIntervalVar: if self.grafana72 then '$__rate_interval' else '$__interval',
// Config for the Grafana dashboards in the Kubernetes Mixin // Config for the Grafana dashboards in the Kubernetes Mixin
grafanaK8s: { grafanaK8s: {
dashboardNamePrefix: 'Kubernetes / ', dashboardNamePrefix: 'Kubernetes / ',
@ -83,7 +87,7 @@
fstypeSelector: 'fstype=~"%s"' % std.join('|', self.fstypes), fstypeSelector: 'fstype=~"%s"' % std.join('|', self.fstypes),
// This list of disk device names is referenced in various expressions. // This list of disk device names is referenced in various expressions.
diskDevices: ['nvme.+', 'rbd.+', 'sd.+', 'vd.+', 'xvd.+', 'dm-.+', 'dasd.+'], diskDevices: ['mmcblk.p.+', 'nvme.+', 'rbd.+', 'sd.+', 'vd.+', 'xvd.+', 'dm-.+', 'dasd.+'],
diskDeviceSelector: 'device=~"%s"' % std.join('|', self.diskDevices), diskDeviceSelector: 'device=~"%s"' % std.join('|', self.diskDevices),
}, },
} }

View File

@ -32,7 +32,7 @@ local singlestat = grafana.singlestat;
format='percentunit', format='percentunit',
decimals=3, decimals=3,
fill=10, fill=10,
description='How much error budget is left looking at our %.3f%% availability gurantees?' % $._config.SLOs.apiserver.target, description='How much error budget is left looking at our %.3f%% availability guarantees?' % $._config.SLOs.apiserver.target,
) )
.addTarget(prometheus.target('100 * (apiserver_request:availability%dd{verb="all", %(clusterLabel)s="$cluster"} - %f)' % [$._config.SLOs.apiserver.days, $._config.clusterLabel, $._config.SLOs.apiserver.target], legendFormat='errorbudget')); .addTarget(prometheus.target('100 * (apiserver_request:availability%dd{verb="all", %(clusterLabel)s="$cluster"} - %f)' % [$._config.SLOs.apiserver.days, $._config.clusterLabel, $._config.SLOs.apiserver.target], legendFormat='errorbudget'));

View File

@ -26,7 +26,8 @@ local singlestat = grafana.singlestat;
span=2, span=2,
valueName='min', valueName='min',
) )
.addTarget(prometheus.target('sum(kubelet_running_pods{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"})' % $._config, legendFormat='{{instance}}')); // TODO: The second query selected by the OR operator is for backward compatibility with kubernetes < 1.19, so this can be retored to a single query once 1.23 is out
.addTarget(prometheus.target('sum(kubelet_running_pods{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"}) OR sum(kubelet_running_pod_count{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"})' % $._config, legendFormat='{{instance}}'));
local runningContainerCount = local runningContainerCount =
singlestat.new( singlestat.new(
@ -35,7 +36,8 @@ local singlestat = grafana.singlestat;
span=2, span=2,
valueName='min', valueName='min',
) )
.addTarget(prometheus.target('sum(kubelet_running_containers{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"})' % $._config, legendFormat='{{instance}}')); // TODO: The second query selected by the OR operator is for backward compatibility with kubernetes < 1.19, so this can be retored to a single query once 1.23 is out
.addTarget(prometheus.target('sum(kubelet_running_containers{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"}) OR sum(kubelet_running_container_count{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"})' % $._config, legendFormat='{{instance}}'));
local actualVolumeCount = local actualVolumeCount =
singlestat.new( singlestat.new(

View File

@ -334,6 +334,14 @@ local singlestat = grafana.singlestat;
title='Errors', title='Errors',
collapse=true, collapse=true,
); );
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
hide=if $._config.showMultiCluster then '' else '2',
refresh=1
);
dashboard.new( dashboard.new(
title='%(dashboardNamePrefix)sNetworking / Cluster' % $._config.grafanaK8s, title='%(dashboardNamePrefix)sNetworking / Cluster' % $._config.grafanaK8s,
@ -366,17 +374,18 @@ local singlestat = grafana.singlestat;
type: 'datasource', type: 'datasource',
}, },
) )
.addTemplate(clusterTemplate)
.addPanel( .addPanel(
newBarplotPanel( newBarplotPanel(
graphTitle='Current Rate of Bytes Received', graphTitle='Current Rate of Bytes Received',
graphQuery='sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))', graphQuery='sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
), ),
gridPos={ h: 9, w: 12, x: 0, y: 1 } gridPos={ h: 9, w: 12, x: 0, y: 1 }
) )
.addPanel( .addPanel(
newBarplotPanel( newBarplotPanel(
graphTitle='Current Rate of Bytes Transmitted', graphTitle='Current Rate of Bytes Transmitted',
graphQuery='sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))', graphQuery='sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
), ),
gridPos={ h: 9, w: 12, x: 12, y: 1 } gridPos={ h: 9, w: 12, x: 12, y: 1 }
) )
@ -384,14 +393,14 @@ local singlestat = grafana.singlestat;
newTablePanel( newTablePanel(
tableTitle='Current Status', tableTitle='Current Status',
colQueries=[ colQueries=[
'sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))', 'sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))', 'sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))', 'sort_desc(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))', 'sort_desc(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(sum(irate(container_network_receive_packets_total{namespace=~".+"}[$interval:$resolution])) by (namespace))', 'sort_desc(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~".+"}[$interval:$resolution])) by (namespace))', 'sort_desc(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~".+"}[$interval:$resolution])) by (namespace))', 'sort_desc(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
'sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~".+"}[$interval:$resolution])) by (namespace))', 'sort_desc(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
] ]
), ),
gridPos={ h: 9, w: 24, x: 0, y: 10 } gridPos={ h: 9, w: 24, x: 0, y: 10 }
@ -401,14 +410,14 @@ local singlestat = grafana.singlestat;
.addPanel( .addPanel(
newBarplotPanel( newBarplotPanel(
graphTitle='Average Rate of Bytes Received', graphTitle='Average Rate of Bytes Received',
graphQuery='sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))', graphQuery='sort_desc(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
), ),
gridPos={ h: 9, w: 12, x: 0, y: 11 } gridPos={ h: 9, w: 12, x: 0, y: 11 }
) )
.addPanel( .addPanel(
newBarplotPanel( newBarplotPanel(
graphTitle='Average Rate of Bytes Transmitted', graphTitle='Average Rate of Bytes Transmitted',
graphQuery='sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))', graphQuery='sort_desc(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
), ),
gridPos={ h: 9, w: 12, x: 12, y: 11 } gridPos={ h: 9, w: 12, x: 12, y: 11 }
), ),
@ -420,14 +429,14 @@ local singlestat = grafana.singlestat;
.addPanel( .addPanel(
newGraphPanel( newGraphPanel(
graphTitle='Receive Bandwidth', graphTitle='Receive Bandwidth',
graphQuery='sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))', graphQuery='sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
), ),
gridPos={ h: 9, w: 24, x: 0, y: 12 } gridPos={ h: 9, w: 24, x: 0, y: 12 }
) )
.addPanel( .addPanel(
newGraphPanel( newGraphPanel(
graphTitle='Transmit Bandwidth', graphTitle='Transmit Bandwidth',
graphQuery='sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))', graphQuery='sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
), ),
gridPos={ h: 9, w: 24, x: 0, y: 21 } gridPos={ h: 9, w: 24, x: 0, y: 21 }
) )
@ -436,7 +445,7 @@ local singlestat = grafana.singlestat;
.addPanel( .addPanel(
newGraphPanel( newGraphPanel(
graphTitle='Rate of Received Packets', graphTitle='Rate of Received Packets',
graphQuery='sort_desc(sum(irate(container_network_receive_packets_total{namespace=~".+"}[$interval:$resolution])) by (namespace))', graphQuery='sort_desc(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
graphFormat='pps' graphFormat='pps'
), ),
gridPos={ h: 9, w: 24, x: 0, y: 31 } gridPos={ h: 9, w: 24, x: 0, y: 31 }
@ -444,7 +453,7 @@ local singlestat = grafana.singlestat;
.addPanel( .addPanel(
newGraphPanel( newGraphPanel(
graphTitle='Rate of Transmitted Packets', graphTitle='Rate of Transmitted Packets',
graphQuery='sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~".+"}[$interval:$resolution])) by (namespace))', graphQuery='sort_desc(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
graphFormat='pps' graphFormat='pps'
), ),
gridPos={ h: 9, w: 24, x: 0, y: 40 } gridPos={ h: 9, w: 24, x: 0, y: 40 }
@ -456,7 +465,7 @@ local singlestat = grafana.singlestat;
.addPanel( .addPanel(
newGraphPanel( newGraphPanel(
graphTitle='Rate of Received Packets Dropped', graphTitle='Rate of Received Packets Dropped',
graphQuery='sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~".+"}[$interval:$resolution])) by (namespace))', graphQuery='sort_desc(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
graphFormat='pps' graphFormat='pps'
), ),
gridPos={ h: 9, w: 24, x: 0, y: 50 } gridPos={ h: 9, w: 24, x: 0, y: 50 }
@ -464,7 +473,7 @@ local singlestat = grafana.singlestat;
.addPanel( .addPanel(
newGraphPanel( newGraphPanel(
graphTitle='Rate of Transmitted Packets Dropped', graphTitle='Rate of Transmitted Packets Dropped',
graphQuery='sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~".+"}[$interval:$resolution])) by (namespace))', graphQuery='sort_desc(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
graphFormat='pps' graphFormat='pps'
), ),
gridPos={ h: 9, w: 24, x: 0, y: 59 } gridPos={ h: 9, w: 24, x: 0, y: 59 }
@ -472,7 +481,7 @@ local singlestat = grafana.singlestat;
.addPanel( .addPanel(
newGraphPanel( newGraphPanel(
graphTitle='Rate of TCP Retransmits out of all sent segments', graphTitle='Rate of TCP Retransmits out of all sent segments',
graphQuery='sort_desc(sum(rate(node_netstat_Tcp_RetransSegs[$interval:$resolution]) / rate(node_netstat_Tcp_OutSegs[$interval:$resolution])) by (instance))', graphQuery='sort_desc(sum(rate(node_netstat_Tcp_RetransSegs{%(clusterLabel)s="$cluster"}[$interval:$resolution]) / rate(node_netstat_Tcp_OutSegs{%(clusterLabel)s="$cluster"}[$interval:$resolution])) by (instance))' % $._config,
graphFormat='percentunit', graphFormat='percentunit',
legendFormat='{{instance}}' legendFormat='{{instance}}'
) + { links: [ ) + { links: [
@ -486,7 +495,7 @@ local singlestat = grafana.singlestat;
).addPanel( ).addPanel(
newGraphPanel( newGraphPanel(
graphTitle='Rate of TCP SYN Retransmits out of all retransmits', graphTitle='Rate of TCP SYN Retransmits out of all retransmits',
graphQuery='sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs[$interval:$resolution])) by (instance))', graphQuery='sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans{%(clusterLabel)s="$cluster"}[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs{%(clusterLabel)s="$cluster"}[$interval:$resolution])) by (instance))' % $._config,
graphFormat='percentunit', graphFormat='percentunit',
legendFormat='{{instance}}' legendFormat='{{instance}}'
) + { links: [ ) + { links: [

View File

@ -227,12 +227,20 @@ local singlestat = grafana.singlestat;
targets: targets, targets: targets,
}; };
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
hide=if $._config.showMultiCluster then '' else '2',
refresh=1
);
local namespaceTemplate = local namespaceTemplate =
template.new( template.new(
name='namespace', name='namespace',
datasource='$datasource', datasource='$datasource',
query='label_values(container_network_receive_packets_total, namespace)', query='label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
allValues='.+', allValues='.+',
current='kube-system', current='kube-system',
hide='', hide='',
@ -243,7 +251,7 @@ local singlestat = grafana.singlestat;
auto: false, auto: false,
auto_count: 30, auto_count: 30,
auto_min: '10s', auto_min: '10s',
definition: 'label_values(container_network_receive_packets_total, namespace)', definition: 'label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
skipUrlSync: false, skipUrlSync: false,
}; };
@ -362,6 +370,7 @@ local singlestat = grafana.singlestat;
type: 'datasource', type: 'datasource',
}, },
) )
.addTemplate(clusterTemplate)
.addTemplate(namespaceTemplate) .addTemplate(namespaceTemplate)
.addTemplate(resolutionTemplate) .addTemplate(resolutionTemplate)
.addTemplate(intervalTemplate) .addTemplate(intervalTemplate)
@ -370,14 +379,14 @@ local singlestat = grafana.singlestat;
.addPanel( .addPanel(
newGaugePanel( newGaugePanel(
gaugeTitle='Current Rate of Bytes Received', gaugeTitle='Current Rate of Bytes Received',
gaugeQuery='sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution]))' gaugeQuery='sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution]))' % $._config,
), ),
gridPos={ h: 9, w: 12, x: 0, y: 1 } gridPos={ h: 9, w: 12, x: 0, y: 1 }
) )
.addPanel( .addPanel(
newGaugePanel( newGaugePanel(
gaugeTitle='Current Rate of Bytes Transmitted', gaugeTitle='Current Rate of Bytes Transmitted',
gaugeQuery='sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution]))' gaugeQuery='sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution]))' % $._config,
), ),
gridPos={ h: 9, w: 12, x: 12, y: 1 } gridPos={ h: 9, w: 12, x: 12, y: 1 }
) )
@ -385,12 +394,12 @@ local singlestat = grafana.singlestat;
newTablePanel( newTablePanel(
tableTitle='Current Status', tableTitle='Current Status',
colQueries=[ colQueries=[
'sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)', 'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
'sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)', 'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
'sum(irate(container_network_receive_packets_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)', 'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
'sum(irate(container_network_transmit_packets_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)', 'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
'sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)', 'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
'sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)', 'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
] ]
), ),
gridPos={ h: 9, w: 24, x: 0, y: 10 } gridPos={ h: 9, w: 24, x: 0, y: 10 }
@ -399,14 +408,14 @@ local singlestat = grafana.singlestat;
.addPanel( .addPanel(
newGraphPanel( newGraphPanel(
graphTitle='Receive Bandwidth', graphTitle='Receive Bandwidth',
graphQuery='sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)' graphQuery='sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
), ),
gridPos={ h: 9, w: 12, x: 0, y: 20 } gridPos={ h: 9, w: 12, x: 0, y: 20 }
) )
.addPanel( .addPanel(
newGraphPanel( newGraphPanel(
graphTitle='Transmit Bandwidth', graphTitle='Transmit Bandwidth',
graphQuery='sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)' graphQuery='sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
), ),
gridPos={ h: 9, w: 12, x: 12, y: 20 } gridPos={ h: 9, w: 12, x: 12, y: 20 }
) )
@ -415,7 +424,7 @@ local singlestat = grafana.singlestat;
.addPanel( .addPanel(
newGraphPanel( newGraphPanel(
graphTitle='Rate of Received Packets', graphTitle='Rate of Received Packets',
graphQuery='sum(irate(container_network_receive_packets_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)', graphQuery='sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps' graphFormat='pps'
), ),
gridPos={ h: 10, w: 12, x: 0, y: 30 } gridPos={ h: 10, w: 12, x: 0, y: 30 }
@ -423,7 +432,7 @@ local singlestat = grafana.singlestat;
.addPanel( .addPanel(
newGraphPanel( newGraphPanel(
graphTitle='Rate of Transmitted Packets', graphTitle='Rate of Transmitted Packets',
graphQuery='sum(irate(container_network_transmit_packets_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)', graphQuery='sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps' graphFormat='pps'
), ),
gridPos={ h: 10, w: 12, x: 12, y: 30 } gridPos={ h: 10, w: 12, x: 12, y: 30 }
@ -435,7 +444,7 @@ local singlestat = grafana.singlestat;
.addPanel( .addPanel(
newGraphPanel( newGraphPanel(
graphTitle='Rate of Received Packets Dropped', graphTitle='Rate of Received Packets Dropped',
graphQuery='sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)', graphQuery='sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps' graphFormat='pps'
), ),
gridPos={ h: 10, w: 12, x: 0, y: 40 } gridPos={ h: 10, w: 12, x: 0, y: 40 }
@ -443,7 +452,7 @@ local singlestat = grafana.singlestat;
.addPanel( .addPanel(
newGraphPanel( newGraphPanel(
graphTitle='Rate of Transmitted Packets Dropped', graphTitle='Rate of Transmitted Packets Dropped',
graphQuery='sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)', graphQuery='sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps' graphFormat='pps'
), ),
gridPos={ h: 10, w: 12, x: 12, y: 40 } gridPos={ h: 10, w: 12, x: 12, y: 40 }

View File

@ -231,11 +231,20 @@ local singlestat = grafana.singlestat;
targets: targets, targets: targets,
}; };
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
hide=if $._config.showMultiCluster then '' else '2',
refresh=1
);
local namespaceTemplate = local namespaceTemplate =
template.new( template.new(
name='namespace', name='namespace',
datasource='$datasource', datasource='$datasource',
query='label_values(container_network_receive_packets_total, namespace)', query='label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
current='kube-system', current='kube-system',
hide='', hide='',
refresh=1, refresh=1,
@ -245,7 +254,7 @@ local singlestat = grafana.singlestat;
auto: false, auto: false,
auto_count: 30, auto_count: 30,
auto_min: '10s', auto_min: '10s',
definition: 'label_values(container_network_receive_packets_total, namespace)', definition: 'label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
skipUrlSync: false, skipUrlSync: false,
}; };
@ -253,7 +262,7 @@ local singlestat = grafana.singlestat;
template.new( template.new(
name='type', name='type',
datasource='$datasource', datasource='$datasource',
query='label_values(namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+"}, workload_type)', query='label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+"}, workload_type)' % $._config,
current='deployment', current='deployment',
hide='', hide='',
refresh=1, refresh=1,
@ -263,7 +272,7 @@ local singlestat = grafana.singlestat;
auto: false, auto: false,
auto_count: 30, auto_count: 30,
auto_min: '10s', auto_min: '10s',
definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+"}, workload_type)', definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+"}, workload_type)' % $._config,
skipUrlSync: false, skipUrlSync: false,
}; };
@ -390,6 +399,7 @@ local singlestat = grafana.singlestat;
type: 'datasource', type: 'datasource',
}, },
) )
.addTemplate(clusterTemplate)
.addTemplate(namespaceTemplate) .addTemplate(namespaceTemplate)
.addTemplate(typeTemplate) .addTemplate(typeTemplate)
.addTemplate(resolutionTemplate) .addTemplate(resolutionTemplate)
@ -400,10 +410,10 @@ local singlestat = grafana.singlestat;
newBarplotPanel( newBarplotPanel(
graphTitle='Current Rate of Bytes Received', graphTitle='Current Rate of Bytes Received',
graphQuery=||| graphQuery=|||
sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||, ||| % $._config,
legendFormat='{{ workload }}', legendFormat='{{ workload }}',
), ),
gridPos={ h: 9, w: 12, x: 0, y: 1 } gridPos={ h: 9, w: 12, x: 0, y: 1 }
@ -412,10 +422,10 @@ local singlestat = grafana.singlestat;
newBarplotPanel( newBarplotPanel(
graphTitle='Current Rate of Bytes Transmitted', graphTitle='Current Rate of Bytes Transmitted',
graphQuery=||| graphQuery=|||
sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||, ||| % $._config,
legendFormat='{{ workload }}', legendFormat='{{ workload }}',
), ),
gridPos={ h: 9, w: 12, x: 12, y: 1 } gridPos={ h: 9, w: 12, x: 12, y: 1 }
@ -425,45 +435,45 @@ local singlestat = grafana.singlestat;
tableTitle='Current Status', tableTitle='Current Status',
colQueries=[ colQueries=[
||| |||
sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||, ||| % $._config,
||| |||
sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||, ||| % $._config,
||| |||
sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||, ||| % $._config,
||| |||
sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||, ||| % $._config,
||| |||
sort_desc(sum(irate(container_network_receive_packets_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||, ||| % $._config,
||| |||
sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||, ||| % $._config,
||| |||
sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||, ||| % $._config,
||| |||
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||, ||| % $._config,
] ]
), ),
gridPos={ h: 9, w: 24, x: 0, y: 10 } gridPos={ h: 9, w: 24, x: 0, y: 10 }
@ -474,10 +484,10 @@ local singlestat = grafana.singlestat;
newBarplotPanel( newBarplotPanel(
graphTitle='Average Rate of Bytes Received', graphTitle='Average Rate of Bytes Received',
graphQuery=||| graphQuery=|||
sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||, ||| % $._config,
legendFormat='{{ workload }}', legendFormat='{{ workload }}',
), ),
gridPos={ h: 9, w: 12, x: 0, y: 20 } gridPos={ h: 9, w: 12, x: 0, y: 20 }
@ -486,10 +496,10 @@ local singlestat = grafana.singlestat;
newBarplotPanel( newBarplotPanel(
graphTitle='Average Rate of Bytes Transmitted', graphTitle='Average Rate of Bytes Transmitted',
graphQuery=||| graphQuery=|||
sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||, ||| % $._config,
legendFormat='{{ workload }}', legendFormat='{{ workload }}',
), ),
gridPos={ h: 9, w: 12, x: 12, y: 20 } gridPos={ h: 9, w: 12, x: 12, y: 20 }
@ -503,10 +513,10 @@ local singlestat = grafana.singlestat;
newGraphPanel( newGraphPanel(
graphTitle='Receive Bandwidth', graphTitle='Receive Bandwidth',
graphQuery=||| graphQuery=|||
sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||, ||| % $._config,
), ),
gridPos={ h: 9, w: 12, x: 0, y: 38 } gridPos={ h: 9, w: 12, x: 0, y: 38 }
) )
@ -514,10 +524,10 @@ local singlestat = grafana.singlestat;
newGraphPanel( newGraphPanel(
graphTitle='Transmit Bandwidth', graphTitle='Transmit Bandwidth',
graphQuery=||| graphQuery=|||
sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||, ||| % $._config,
), ),
gridPos={ h: 9, w: 12, x: 12, y: 38 } gridPos={ h: 9, w: 12, x: 12, y: 38 }
) )
@ -527,10 +537,10 @@ local singlestat = grafana.singlestat;
newGraphPanel( newGraphPanel(
graphTitle='Rate of Received Packets', graphTitle='Rate of Received Packets',
graphQuery=||| graphQuery=|||
sort_desc(sum(irate(container_network_receive_packets_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||, ||| % $._config,
graphFormat='pps' graphFormat='pps'
), ),
gridPos={ h: 9, w: 12, x: 0, y: 40 } gridPos={ h: 9, w: 12, x: 0, y: 40 }
@ -539,10 +549,10 @@ local singlestat = grafana.singlestat;
newGraphPanel( newGraphPanel(
graphTitle='Rate of Transmitted Packets', graphTitle='Rate of Transmitted Packets',
graphQuery=||| graphQuery=|||
sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||, ||| % $._config,
graphFormat='pps' graphFormat='pps'
), ),
gridPos={ h: 9, w: 12, x: 12, y: 40 } gridPos={ h: 9, w: 12, x: 12, y: 40 }
@ -555,10 +565,10 @@ local singlestat = grafana.singlestat;
newGraphPanel( newGraphPanel(
graphTitle='Rate of Received Packets Dropped', graphTitle='Rate of Received Packets Dropped',
graphQuery=||| graphQuery=|||
sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||, ||| % $._config,
graphFormat='pps' graphFormat='pps'
), ),
gridPos={ h: 9, w: 12, x: 0, y: 41 } gridPos={ h: 9, w: 12, x: 0, y: 41 }
@ -567,10 +577,10 @@ local singlestat = grafana.singlestat;
newGraphPanel( newGraphPanel(
graphTitle='Rate of Transmitted Packets Dropped', graphTitle='Rate of Transmitted Packets Dropped',
graphQuery=||| graphQuery=|||
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|||, ||| % $._config,
graphFormat='pps' graphFormat='pps'
), ),
gridPos={ h: 9, w: 12, x: 12, y: 41 } gridPos={ h: 9, w: 12, x: 12, y: 41 }

View File

@ -106,11 +106,21 @@ local singlestat = grafana.singlestat;
}, },
}; };
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
hide=if $._config.showMultiCluster then '' else '2',
refresh=1
);
local namespaceTemplate = local namespaceTemplate =
template.new( template.new(
name='namespace', name='namespace',
datasource='$datasource', datasource='$datasource',
query='label_values(container_network_receive_packets_total, namespace)', query='label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
allValues='.+', allValues='.+',
current='kube-system', current='kube-system',
hide='', hide='',
@ -121,7 +131,7 @@ local singlestat = grafana.singlestat;
auto: false, auto: false,
auto_count: 30, auto_count: 30,
auto_min: '10s', auto_min: '10s',
definition: 'label_values(container_network_receive_packets_total, namespace)', definition: 'label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
skipUrlSync: false, skipUrlSync: false,
}; };
@ -129,7 +139,7 @@ local singlestat = grafana.singlestat;
template.new( template.new(
name='pod', name='pod',
datasource='$datasource', datasource='$datasource',
query='label_values(container_network_receive_packets_total{namespace=~"$namespace"}, pod)', query='label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}, pod)' % $._config,
allValues='.+', allValues='.+',
current='', current='',
hide='', hide='',
@ -140,7 +150,7 @@ local singlestat = grafana.singlestat;
auto: false, auto: false,
auto_count: 30, auto_count: 30,
auto_min: '10s', auto_min: '10s',
definition: 'label_values(container_network_receive_packets_total{namespace=~"$namespace"}, pod)', definition: 'label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}, pod)' % $._config,
skipUrlSync: false, skipUrlSync: false,
}; };
@ -259,6 +269,7 @@ local singlestat = grafana.singlestat;
type: 'datasource', type: 'datasource',
}, },
) )
.addTemplate(clusterTemplate)
.addTemplate(namespaceTemplate) .addTemplate(namespaceTemplate)
.addTemplate(podTemplate) .addTemplate(podTemplate)
.addTemplate(resolutionTemplate) .addTemplate(resolutionTemplate)
@ -268,14 +279,14 @@ local singlestat = grafana.singlestat;
.addPanel( .addPanel(
newGaugePanel( newGaugePanel(
gaugeTitle='Current Rate of Bytes Received', gaugeTitle='Current Rate of Bytes Received',
gaugeQuery='sum(irate(container_network_receive_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution]))' gaugeQuery='sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution]))' % $._config,
), ),
gridPos={ h: 9, w: 12, x: 0, y: 1 } gridPos={ h: 9, w: 12, x: 0, y: 1 }
) )
.addPanel( .addPanel(
newGaugePanel( newGaugePanel(
gaugeTitle='Current Rate of Bytes Transmitted', gaugeTitle='Current Rate of Bytes Transmitted',
gaugeQuery='sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution]))' gaugeQuery='sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution]))' % $._config,
), ),
gridPos={ h: 9, w: 12, x: 12, y: 1 } gridPos={ h: 9, w: 12, x: 12, y: 1 }
) )
@ -283,14 +294,14 @@ local singlestat = grafana.singlestat;
.addPanel( .addPanel(
newGraphPanel( newGraphPanel(
graphTitle='Receive Bandwidth', graphTitle='Receive Bandwidth',
graphQuery='sum(irate(container_network_receive_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' graphQuery='sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
), ),
gridPos={ h: 9, w: 12, x: 0, y: 11 } gridPos={ h: 9, w: 12, x: 0, y: 11 }
) )
.addPanel( .addPanel(
newGraphPanel( newGraphPanel(
graphTitle='Transmit Bandwidth', graphTitle='Transmit Bandwidth',
graphQuery='sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' graphQuery='sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
), ),
gridPos={ h: 9, w: 12, x: 12, y: 11 } gridPos={ h: 9, w: 12, x: 12, y: 11 }
) )
@ -299,7 +310,7 @@ local singlestat = grafana.singlestat;
.addPanel( .addPanel(
newGraphPanel( newGraphPanel(
graphTitle='Rate of Received Packets', graphTitle='Rate of Received Packets',
graphQuery='sum(irate(container_network_receive_packets_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)', graphQuery='sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps' graphFormat='pps'
), ),
gridPos={ h: 10, w: 12, x: 0, y: 21 } gridPos={ h: 10, w: 12, x: 0, y: 21 }
@ -307,7 +318,7 @@ local singlestat = grafana.singlestat;
.addPanel( .addPanel(
newGraphPanel( newGraphPanel(
graphTitle='Rate of Transmitted Packets', graphTitle='Rate of Transmitted Packets',
graphQuery='sum(irate(container_network_transmit_packets_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)', graphQuery='sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps' graphFormat='pps'
), ),
gridPos={ h: 10, w: 12, x: 12, y: 21 } gridPos={ h: 10, w: 12, x: 12, y: 21 }
@ -319,7 +330,7 @@ local singlestat = grafana.singlestat;
.addPanel( .addPanel(
newGraphPanel( newGraphPanel(
graphTitle='Rate of Received Packets Dropped', graphTitle='Rate of Received Packets Dropped',
graphQuery='sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)', graphQuery='sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps' graphFormat='pps'
), ),
gridPos={ h: 10, w: 12, x: 0, y: 32 } gridPos={ h: 10, w: 12, x: 0, y: 32 }
@ -327,7 +338,7 @@ local singlestat = grafana.singlestat;
.addPanel( .addPanel(
newGraphPanel( newGraphPanel(
graphTitle='Rate of Transmitted Packets Dropped', graphTitle='Rate of Transmitted Packets Dropped',
graphQuery='sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)', graphQuery='sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
graphFormat='pps' graphFormat='pps'
), ),
gridPos={ h: 10, w: 12, x: 12, y: 32 } gridPos={ h: 10, w: 12, x: 12, y: 32 }

View File

@ -96,11 +96,20 @@ local singlestat = grafana.singlestat;
}, },
}; };
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
hide=if $._config.showMultiCluster then '' else '2',
refresh=1
);
local namespaceTemplate = local namespaceTemplate =
template.new( template.new(
name='namespace', name='namespace',
datasource='$datasource', datasource='$datasource',
query='label_values(container_network_receive_packets_total, namespace)', query='label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
allValues='.+', allValues='.+',
current='kube-system', current='kube-system',
hide='', hide='',
@ -111,7 +120,7 @@ local singlestat = grafana.singlestat;
auto: false, auto: false,
auto_count: 30, auto_count: 30,
auto_min: '10s', auto_min: '10s',
definition: 'label_values(container_network_receive_packets_total, namespace)', definition: 'label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
skipUrlSync: false, skipUrlSync: false,
}; };
@ -119,7 +128,7 @@ local singlestat = grafana.singlestat;
template.new( template.new(
name='workload', name='workload',
datasource='$datasource', datasource='$datasource',
query='label_values(namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace"}, workload)', query='label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace"}, workload)' % $._config,
current='', current='',
hide='', hide='',
refresh=1, refresh=1,
@ -129,7 +138,7 @@ local singlestat = grafana.singlestat;
auto: false, auto: false,
auto_count: 30, auto_count: 30,
auto_min: '10s', auto_min: '10s',
definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace"}, workload)', definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace"}, workload)' % $._config,
skipUrlSync: false, skipUrlSync: false,
}; };
@ -137,7 +146,7 @@ local singlestat = grafana.singlestat;
template.new( template.new(
name='type', name='type',
datasource='$datasource', datasource='$datasource',
query='label_values(namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload"}, workload_type)', query='label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload"}, workload_type)' % $._config,
current='deployment', current='deployment',
hide='', hide='',
refresh=1, refresh=1,
@ -147,7 +156,7 @@ local singlestat = grafana.singlestat;
auto: false, auto: false,
auto_count: 30, auto_count: 30,
auto_min: '10s', auto_min: '10s',
definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload"}, workload_type)', definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload"}, workload_type)' % $._config,
skipUrlSync: false, skipUrlSync: false,
}; };
@ -274,6 +283,7 @@ local singlestat = grafana.singlestat;
type: 'datasource', type: 'datasource',
}, },
) )
.addTemplate(clusterTemplate)
.addTemplate(namespaceTemplate) .addTemplate(namespaceTemplate)
.addTemplate(workloadTemplate) .addTemplate(workloadTemplate)
.addTemplate(typeTemplate) .addTemplate(typeTemplate)
@ -285,10 +295,10 @@ local singlestat = grafana.singlestat;
newBarplotPanel( newBarplotPanel(
graphTitle='Current Rate of Bytes Received', graphTitle='Current Rate of Bytes Received',
graphQuery=||| graphQuery=|||
sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|||, ||| % $._config,
legendFormat='{{ pod }}', legendFormat='{{ pod }}',
), ),
gridPos={ h: 9, w: 12, x: 0, y: 1 } gridPos={ h: 9, w: 12, x: 0, y: 1 }
@ -297,10 +307,10 @@ local singlestat = grafana.singlestat;
newBarplotPanel( newBarplotPanel(
graphTitle='Current Rate of Bytes Transmitted', graphTitle='Current Rate of Bytes Transmitted',
graphQuery=||| graphQuery=|||
sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|||, ||| % $._config,
legendFormat='{{ pod }}', legendFormat='{{ pod }}',
), ),
gridPos={ h: 9, w: 12, x: 12, y: 1 } gridPos={ h: 9, w: 12, x: 12, y: 1 }
@ -311,10 +321,10 @@ local singlestat = grafana.singlestat;
newBarplotPanel( newBarplotPanel(
graphTitle='Average Rate of Bytes Received', graphTitle='Average Rate of Bytes Received',
graphQuery=||| graphQuery=|||
sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|||, ||| % $._config,
legendFormat='{{ pod }}', legendFormat='{{ pod }}',
), ),
gridPos={ h: 9, w: 12, x: 0, y: 11 } gridPos={ h: 9, w: 12, x: 0, y: 11 }
@ -323,10 +333,10 @@ local singlestat = grafana.singlestat;
newBarplotPanel( newBarplotPanel(
graphTitle='Average Rate of Bytes Transmitted', graphTitle='Average Rate of Bytes Transmitted',
graphQuery=||| graphQuery=|||
sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|||, ||| % $._config,
legendFormat='{{ pod }}', legendFormat='{{ pod }}',
), ),
gridPos={ h: 9, w: 12, x: 12, y: 11 } gridPos={ h: 9, w: 12, x: 12, y: 11 }
@ -340,10 +350,10 @@ local singlestat = grafana.singlestat;
newGraphPanel( newGraphPanel(
graphTitle='Receive Bandwidth', graphTitle='Receive Bandwidth',
graphQuery=||| graphQuery=|||
sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|||, ||| % $._config,
), ),
gridPos={ h: 9, w: 12, x: 0, y: 12 } gridPos={ h: 9, w: 12, x: 0, y: 12 }
) )
@ -351,10 +361,10 @@ local singlestat = grafana.singlestat;
newGraphPanel( newGraphPanel(
graphTitle='Transmit Bandwidth', graphTitle='Transmit Bandwidth',
graphQuery=||| graphQuery=|||
sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|||, ||| % $._config,
), ),
gridPos={ h: 9, w: 12, x: 12, y: 12 } gridPos={ h: 9, w: 12, x: 12, y: 12 }
) )
@ -364,10 +374,10 @@ local singlestat = grafana.singlestat;
newGraphPanel( newGraphPanel(
graphTitle='Rate of Received Packets', graphTitle='Rate of Received Packets',
graphQuery=||| graphQuery=|||
sort_desc(sum(irate(container_network_receive_packets_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|||, ||| % $._config,
graphFormat='pps' graphFormat='pps'
), ),
gridPos={ h: 9, w: 12, x: 0, y: 22 } gridPos={ h: 9, w: 12, x: 0, y: 22 }
@ -376,10 +386,10 @@ local singlestat = grafana.singlestat;
newGraphPanel( newGraphPanel(
graphTitle='Rate of Transmitted Packets', graphTitle='Rate of Transmitted Packets',
graphQuery=||| graphQuery=|||
sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|||, ||| % $._config,
graphFormat='pps' graphFormat='pps'
), ),
gridPos={ h: 9, w: 12, x: 12, y: 22 } gridPos={ h: 9, w: 12, x: 12, y: 22 }
@ -392,10 +402,10 @@ local singlestat = grafana.singlestat;
newGraphPanel( newGraphPanel(
graphTitle='Rate of Received Packets Dropped', graphTitle='Rate of Received Packets Dropped',
graphQuery=||| graphQuery=|||
sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|||, ||| % $._config,
graphFormat='pps' graphFormat='pps'
), ),
gridPos={ h: 9, w: 12, x: 0, y: 23 } gridPos={ h: 9, w: 12, x: 0, y: 23 }
@ -404,10 +414,10 @@ local singlestat = grafana.singlestat;
newGraphPanel( newGraphPanel(
graphTitle='Rate of Transmitted Packets Dropped', graphTitle='Rate of Transmitted Packets Dropped',
graphQuery=||| graphQuery=|||
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution]) sort_desc(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|||, ||| % $._config,
graphFormat='pps' graphFormat='pps'
), ),
gridPos={ h: 9, w: 12, x: 12, y: 23 } gridPos={ h: 9, w: 12, x: 12, y: 23 }

View File

@ -44,12 +44,12 @@ local template = grafana.template;
]; ];
local networkColumns = [ local networkColumns = [
'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, 'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config,
'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, 'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config,
'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, 'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config,
'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, 'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config,
'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, 'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config,
'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, 'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config,
]; ];
local networkTableStyles = { local networkTableStyles = {
@ -96,7 +96,7 @@ local template = grafana.template;
}) })
.addPanel( .addPanel(
g.panel('CPU Utilisation') + g.panel('CPU Utilisation') +
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle", %(clusterLabel)s="$cluster"}[$__interval]))' % $._config) + g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle", %(clusterLabel)s="$cluster"}[%(grafanaIntervalVar)s]))' % $._config) +
{ interval: $._config.grafanaK8s.minimumTimeInterval }, { interval: $._config.grafanaK8s.minimumTimeInterval },
) )
.addPanel( .addPanel(
@ -192,7 +192,7 @@ local template = grafana.template;
g.row('Network') g.row('Network')
.addPanel( .addPanel(
g.panel('Receive Bandwidth') + g.panel('Receive Bandwidth') +
g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') + g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps') }, { yaxes: g.yaxes('Bps') },
) )
@ -201,7 +201,7 @@ local template = grafana.template;
g.row('Network') g.row('Network')
.addPanel( .addPanel(
g.panel('Transmit Bandwidth') + g.panel('Transmit Bandwidth') +
g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') + g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps') }, { yaxes: g.yaxes('Bps') },
) )
@ -210,7 +210,7 @@ local template = grafana.template;
g.row('Network') g.row('Network')
.addPanel( .addPanel(
g.panel('Average Container Bandwidth by Namespace: Received') + g.panel('Average Container Bandwidth by Namespace: Received') +
g.queryPanel('avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') + g.queryPanel('avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps') }, { yaxes: g.yaxes('Bps') },
) )
@ -219,7 +219,7 @@ local template = grafana.template;
g.row('Network') g.row('Network')
.addPanel( .addPanel(
g.panel('Average Container Bandwidth by Namespace: Transmitted') + g.panel('Average Container Bandwidth by Namespace: Transmitted') +
g.queryPanel('avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') + g.queryPanel('avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps') }, { yaxes: g.yaxes('Bps') },
) )
@ -228,7 +228,7 @@ local template = grafana.template;
g.row('Network') g.row('Network')
.addPanel( .addPanel(
g.panel('Rate of Received Packets') + g.panel('Rate of Received Packets') +
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') + g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps') }, { yaxes: g.yaxes('Bps') },
) )
@ -237,7 +237,7 @@ local template = grafana.template;
g.row('Network') g.row('Network')
.addPanel( .addPanel(
g.panel('Rate of Transmitted Packets') + g.panel('Rate of Transmitted Packets') +
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') + g.queryPanel('sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps') }, { yaxes: g.yaxes('Bps') },
) )
@ -246,7 +246,7 @@ local template = grafana.template;
g.row('Network') g.row('Network')
.addPanel( .addPanel(
g.panel('Rate of Received Packets Dropped') + g.panel('Rate of Received Packets Dropped') +
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') + g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps') }, { yaxes: g.yaxes('Bps') },
) )
@ -255,7 +255,7 @@ local template = grafana.template;
g.row('Network') g.row('Network')
.addPanel( .addPanel(
g.panel('Rate of Transmitted Packets Dropped') + g.panel('Rate of Transmitted Packets Dropped') +
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') + g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps') }, { yaxes: g.yaxes('Bps') },
) )

View File

@ -24,7 +24,7 @@ local template = grafana.template;
}) })
.addPanel( .addPanel(
g.panel('CPU Utilisation') + g.panel('CPU Utilisation') +
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle"}[$__interval]))' % $._config) g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle"}[%(grafanaIntervalVar)s]))' % $._config)
) )
.addPanel( .addPanel(
g.panel('CPU Requests Commitment') + g.panel('CPU Requests Commitment') +

View File

@ -36,12 +36,12 @@ local template = grafana.template;
}; };
local networkColumns = [ local networkColumns = [
'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, 'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config,
'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, 'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config,
'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, 'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config,
'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, 'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config,
'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, 'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config,
'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, 'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config,
]; ];
local networkTableStyles = { local networkTableStyles = {
@ -244,7 +244,7 @@ local template = grafana.template;
g.row('Network') g.row('Network')
.addPanel( .addPanel(
g.panel('Receive Bandwidth') + g.panel('Receive Bandwidth') +
g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, '{{pod}}') + g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps') }, { yaxes: g.yaxes('Bps') },
) )
@ -253,7 +253,7 @@ local template = grafana.template;
g.row('Network') g.row('Network')
.addPanel( .addPanel(
g.panel('Transmit Bandwidth') + g.panel('Transmit Bandwidth') +
g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, '{{pod}}') + g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps') }, { yaxes: g.yaxes('Bps') },
) )
@ -262,7 +262,7 @@ local template = grafana.template;
g.row('Network') g.row('Network')
.addPanel( .addPanel(
g.panel('Rate of Received Packets') + g.panel('Rate of Received Packets') +
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, '{{pod}}') + g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps') }, { yaxes: g.yaxes('Bps') },
) )
@ -271,7 +271,7 @@ local template = grafana.template;
g.row('Network') g.row('Network')
.addPanel( .addPanel(
g.panel('Rate of Transmitted Packets') + g.panel('Rate of Transmitted Packets') +
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, '{{pod}}') + g.queryPanel('sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps') }, { yaxes: g.yaxes('Bps') },
) )
@ -280,7 +280,7 @@ local template = grafana.template;
g.row('Network') g.row('Network')
.addPanel( .addPanel(
g.panel('Rate of Received Packets Dropped') + g.panel('Rate of Received Packets Dropped') +
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, '{{pod}}') + g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps') }, { yaxes: g.yaxes('Bps') },
) )
@ -289,7 +289,7 @@ local template = grafana.template;
g.row('Network') g.row('Network')
.addPanel( .addPanel(
g.panel('Rate of Transmitted Packets Dropped') + g.panel('Rate of Transmitted Packets Dropped') +
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, '{{pod}}') + g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps') }, { yaxes: g.yaxes('Bps') },
) )

View File

@ -213,7 +213,7 @@ local template = grafana.template;
g.row('Network') g.row('Network')
.addPanel( .addPanel(
g.panel('Receive Bandwidth') + g.panel('Receive Bandwidth') +
g.queryPanel('sum(irate(container_network_receive_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') + g.queryPanel('sum(irate(container_network_receive_bytes_total{namespace=~"$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval }, { yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
) )
@ -222,7 +222,7 @@ local template = grafana.template;
g.row('Network') g.row('Network')
.addPanel( .addPanel(
g.panel('Transmit Bandwidth') + g.panel('Transmit Bandwidth') +
g.queryPanel('sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') + g.queryPanel('sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval }, { yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
) )
@ -231,7 +231,7 @@ local template = grafana.template;
g.row('Network') g.row('Network')
.addPanel( .addPanel(
g.panel('Rate of Received Packets') + g.panel('Rate of Received Packets') +
g.queryPanel('sum(irate(container_network_receive_packets_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') + g.queryPanel('sum(irate(container_network_receive_packets_total{namespace=~"$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval }, { yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
) )
@ -240,7 +240,7 @@ local template = grafana.template;
g.row('Network') g.row('Network')
.addPanel( .addPanel(
g.panel('Rate of Transmitted Packets') + g.panel('Rate of Transmitted Packets') +
g.queryPanel('sum(irate(container_network_transmit_packets_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') + g.queryPanel('sum(irate(container_network_transmit_packets_total{namespace=~"$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval }, { yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
) )
@ -249,7 +249,7 @@ local template = grafana.template;
g.row('Network') g.row('Network')
.addPanel( .addPanel(
g.panel('Rate of Received Packets Dropped') + g.panel('Rate of Received Packets Dropped') +
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') + g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval }, { yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
) )
@ -258,7 +258,7 @@ local template = grafana.template;
g.row('Network') g.row('Network')
.addPanel( .addPanel(
g.panel('Rate of Transmitted Packets Dropped') + g.panel('Rate of Transmitted Packets Dropped') +
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') + g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval }, { yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
) )

View File

@ -59,32 +59,32 @@ local template = grafana.template;
local networkColumns = [ local networkColumns = [
||| |||
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval]) (sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
||| % $._config, ||| % $._config,
||| |||
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval]) (sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
||| % $._config, ||| % $._config,
||| |||
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval]) (sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
||| % $._config, ||| % $._config,
||| |||
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval]) (sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
||| % $._config, ||| % $._config,
||| |||
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval]) (sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
||| % $._config, ||| % $._config,
||| |||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval]) (sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
||| % $._config, ||| % $._config,
@ -285,7 +285,7 @@ local template = grafana.template;
.addPanel( .addPanel(
g.panel('Receive Bandwidth') + g.panel('Receive Bandwidth') +
g.queryPanel(||| g.queryPanel(|||
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval]) (sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config, '{{workload}}') + ||| % $._config, '{{workload}}') +
@ -298,7 +298,7 @@ local template = grafana.template;
.addPanel( .addPanel(
g.panel('Transmit Bandwidth') + g.panel('Transmit Bandwidth') +
g.queryPanel(||| g.queryPanel(|||
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval]) (sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config, '{{workload}}') + ||| % $._config, '{{workload}}') +
@ -311,7 +311,7 @@ local template = grafana.template;
.addPanel( .addPanel(
g.panel('Average Container Bandwidth by Workload: Received') + g.panel('Average Container Bandwidth by Workload: Received') +
g.queryPanel(||| g.queryPanel(|||
(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval]) (avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config, '{{workload}}') + ||| % $._config, '{{workload}}') +
@ -324,7 +324,7 @@ local template = grafana.template;
.addPanel( .addPanel(
g.panel('Average Container Bandwidth by Workload: Transmitted') + g.panel('Average Container Bandwidth by Workload: Transmitted') +
g.queryPanel(||| g.queryPanel(|||
(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval]) (avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config, '{{workload}}') + ||| % $._config, '{{workload}}') +
@ -337,7 +337,7 @@ local template = grafana.template;
.addPanel( .addPanel(
g.panel('Rate of Received Packets') + g.panel('Rate of Received Packets') +
g.queryPanel(||| g.queryPanel(|||
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval]) (sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config, '{{workload}}') + ||| % $._config, '{{workload}}') +
@ -350,7 +350,7 @@ local template = grafana.template;
.addPanel( .addPanel(
g.panel('Rate of Transmitted Packets') + g.panel('Rate of Transmitted Packets') +
g.queryPanel(||| g.queryPanel(|||
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval]) (sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config, '{{workload}}') + ||| % $._config, '{{workload}}') +
@ -363,7 +363,7 @@ local template = grafana.template;
.addPanel( .addPanel(
g.panel('Rate of Received Packets Dropped') + g.panel('Rate of Received Packets Dropped') +
g.queryPanel(||| g.queryPanel(|||
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval]) (sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config, '{{workload}}') + ||| % $._config, '{{workload}}') +
@ -376,7 +376,7 @@ local template = grafana.template;
.addPanel( .addPanel(
g.panel('Rate of Transmitted Packets Dropped') + g.panel('Rate of Transmitted Packets Dropped') +
g.queryPanel(||| g.queryPanel(|||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval]) (sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config, '{{workload}}') + ||| % $._config, '{{workload}}') +

View File

@ -61,32 +61,32 @@ local template = grafana.template;
local networkColumns = [ local networkColumns = [
||| |||
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval]) (sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, ||| % $._config,
||| |||
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval]) (sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, ||| % $._config,
||| |||
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval]) (sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, ||| % $._config,
||| |||
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval]) (sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, ||| % $._config,
||| |||
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval]) (sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, ||| % $._config,
||| |||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval]) (sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, ||| % $._config,
@ -227,7 +227,7 @@ local template = grafana.template;
.addPanel( .addPanel(
g.panel('Receive Bandwidth') + g.panel('Receive Bandwidth') +
g.queryPanel(||| g.queryPanel(|||
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval]) (sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, '{{pod}}') + ||| % $._config, '{{pod}}') +
@ -240,7 +240,7 @@ local template = grafana.template;
.addPanel( .addPanel(
g.panel('Transmit Bandwidth') + g.panel('Transmit Bandwidth') +
g.queryPanel(||| g.queryPanel(|||
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval]) (sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, '{{pod}}') + ||| % $._config, '{{pod}}') +
@ -253,7 +253,7 @@ local template = grafana.template;
.addPanel( .addPanel(
g.panel('Average Container Bandwidth by Pod: Received') + g.panel('Average Container Bandwidth by Pod: Received') +
g.queryPanel(||| g.queryPanel(|||
(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval]) (avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, '{{pod}}') + ||| % $._config, '{{pod}}') +
@ -266,7 +266,7 @@ local template = grafana.template;
.addPanel( .addPanel(
g.panel('Average Container Bandwidth by Pod: Transmitted') + g.panel('Average Container Bandwidth by Pod: Transmitted') +
g.queryPanel(||| g.queryPanel(|||
(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval]) (avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, '{{pod}}') + ||| % $._config, '{{pod}}') +
@ -279,7 +279,7 @@ local template = grafana.template;
.addPanel( .addPanel(
g.panel('Rate of Received Packets') + g.panel('Rate of Received Packets') +
g.queryPanel(||| g.queryPanel(|||
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval]) (sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, '{{pod}}') + ||| % $._config, '{{pod}}') +
@ -292,7 +292,7 @@ local template = grafana.template;
.addPanel( .addPanel(
g.panel('Rate of Transmitted Packets') + g.panel('Rate of Transmitted Packets') +
g.queryPanel(||| g.queryPanel(|||
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval]) (sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, '{{pod}}') + ||| % $._config, '{{pod}}') +
@ -305,7 +305,7 @@ local template = grafana.template;
.addPanel( .addPanel(
g.panel('Rate of Received Packets Dropped') + g.panel('Rate of Received Packets Dropped') +
g.queryPanel(||| g.queryPanel(|||
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval]) (sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, '{{pod}}') + ||| % $._config, '{{pod}}') +
@ -318,7 +318,7 @@ local template = grafana.template;
.addPanel( .addPanel(
g.panel('Rate of Transmitted Packets Dropped') + g.panel('Rate of Transmitted Packets Dropped') +
g.queryPanel(||| g.queryPanel(|||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval]) (sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod)) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
||| % $._config, '{{pod}}') + ||| % $._config, '{{pod}}') +

View File

@ -14,7 +14,7 @@ local numbersinglestat = promgrafonnet.numbersinglestat;
local cpuStat = local cpuStat =
numbersinglestat.new( numbersinglestat.new(
'CPU', 'CPU',
'sum(rate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}[3m]))' % $._config, 'sum(rate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!="", namespace="$namespace", pod=~"$statefulset.*"}[3m]))' % $._config,
) )
.withSpanSize(4) .withSpanSize(4)
.withPostfix('cores') .withPostfix('cores')
@ -23,7 +23,7 @@ local numbersinglestat = promgrafonnet.numbersinglestat;
local memoryStat = local memoryStat =
numbersinglestat.new( numbersinglestat.new(
'Memory', 'Memory',
'sum(container_memory_usage_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}) / 1024^3' % $._config, 'sum(container_memory_usage_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!="", namespace="$namespace", pod=~"$statefulset.*"}) / 1024^3' % $._config,
) )
.withSpanSize(4) .withSpanSize(4)
.withPostfix('GB') .withPostfix('GB')
@ -32,7 +32,7 @@ local numbersinglestat = promgrafonnet.numbersinglestat;
local networkStat = local networkStat =
numbersinglestat.new( numbersinglestat.new(
'Network', 'Network',
'sum(rate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}[3m])) + sum(rate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace="$namespace",pod=~"$statefulset.*"}[3m]))' % $._config, 'sum(rate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}[3m])) + sum(rate(container_network_receive_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",pod=~"$statefulset.*"}[3m]))' % $._config,
) )
.withSpanSize(4) .withSpanSize(4)
.withPostfix('Bps') .withPostfix('Bps')

View File

@ -9,12 +9,6 @@
{ {
name: 'k8s.rules', name: 'k8s.rules',
rules: [ rules: [
{
record: 'namespace:container_cpu_usage_seconds_total:sum_rate',
expr: |||
sum(rate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, image!="", container!="POD"}[5m])) by (namespace)
||| % $._config,
},
{ {
// Reduces cardinality of this timeseries by #cores, which makes it // Reduces cardinality of this timeseries by #cores, which makes it
// more useable in dashboards. Also, allows us to do things like // more useable in dashboards. Also, allows us to do things like
@ -64,12 +58,6 @@
) )
||| % $._config, ||| % $._config,
}, },
{
record: 'namespace:container_memory_usage_bytes:sum',
expr: |||
sum(container_memory_usage_bytes{%(cadvisorSelector)s, image!="", container!="POD"}) by (namespace)
||| % $._config,
},
{ {
record: 'namespace:kube_pod_container_resource_requests_memory_bytes:sum', record: 'namespace:kube_pod_container_resource_requests_memory_bytes:sum',
expr: ||| expr: |||

View File

@ -116,15 +116,6 @@
}, },
} }
for verb in verbs for verb in verbs
] + [
{
record: 'cluster:apiserver_request_duration_seconds:mean5m',
expr: |||
sum(rate(apiserver_request_duration_seconds_sum{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, %(podLabel)s)
/
sum(rate(apiserver_request_duration_seconds_count{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, %(podLabel)s)
||| % ($._config),
},
] + [ ] + [
{ {
record: 'cluster_quantile:apiserver_request_duration_seconds:histogram_quantile', record: 'cluster_quantile:apiserver_request_duration_seconds:histogram_quantile',

View File

@ -10,14 +10,6 @@
{ {
name: 'node.rules', name: 'node.rules',
rules: [ rules: [
{
// Number of nodes in the cluster
// SINCE 2018-02-08
record: ':kube_pod_info_node_count:',
expr: |||
sum(min(kube_pod_info{node!=""}) by (%(clusterLabel)s, node))
||| % $._config,
},
{ {
// This rule results in the tuples (node, namespace, instance) => 1. // This rule results in the tuples (node, namespace, instance) => 1.
// It is used to calculate per-node metrics, given namespace & instance. // It is used to calculate per-node metrics, given namespace & instance.

View File

@ -9,7 +9,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
commonLabels:: { commonLabels:: {
'app.kubernetes.io/name': 'kube-state-metrics', 'app.kubernetes.io/name': 'kube-state-metrics',
'app.kubernetes.io/version': ksm.version, 'app.kubernetes.io/version': 'v' + ksm.version,
}, },
podLabels:: { podLabels:: {
@ -58,6 +58,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
'daemonsets', 'daemonsets',
'deployments', 'deployments',
'replicasets', 'replicasets',
'ingresses',
]) + ]) +
rulesType.withVerbs(['list', 'watch']), rulesType.withVerbs(['list', 'watch']),
@ -134,14 +135,6 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
rulesType.withApiGroups(['networking.k8s.io']) + rulesType.withApiGroups(['networking.k8s.io']) +
rulesType.withResources([ rulesType.withResources([
'networkpolicies', 'networkpolicies',
'ingresses',
]) +
rulesType.withVerbs(['list', 'watch']),
rulesType.new() +
rulesType.withApiGroups(['coordination.k8s.io']) +
rulesType.withResources([
'leases',
]) + ]) +
rulesType.withVerbs(['list', 'watch']), rulesType.withVerbs(['list', 'watch']),
]; ];
@ -171,8 +164,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
container.mixin.readinessProbe.httpGet.withPath('/') + container.mixin.readinessProbe.httpGet.withPath('/') +
container.mixin.readinessProbe.httpGet.withPort(8081) + container.mixin.readinessProbe.httpGet.withPort(8081) +
container.mixin.readinessProbe.withInitialDelaySeconds(5) + container.mixin.readinessProbe.withInitialDelaySeconds(5) +
container.mixin.readinessProbe.withTimeoutSeconds(5) + container.mixin.readinessProbe.withTimeoutSeconds(5);
container.mixin.securityContext.withRunAsUser(65534);
deployment.new(ksm.name, 1, c, ksm.commonLabels) + deployment.new(ksm.name, 1, c, ksm.commonLabels) +
deployment.mixin.metadata.withNamespace(ksm.namespace) + deployment.mixin.metadata.withNamespace(ksm.namespace) +
@ -228,7 +220,6 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
roleBinding.new() + roleBinding.new() +
roleBinding.mixin.metadata.withName(ksm.name) + roleBinding.mixin.metadata.withName(ksm.name) +
roleBinding.mixin.metadata.withNamespace(ksm.namespace) +
roleBinding.mixin.metadata.withLabels(ksm.commonLabels) + roleBinding.mixin.metadata.withLabels(ksm.commonLabels) +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') + roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withName(ksm.name) + roleBinding.mixin.roleRef.withName(ksm.name) +
@ -245,7 +236,6 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
'--pod=$(POD_NAME)', '--pod=$(POD_NAME)',
'--pod-namespace=$(POD_NAMESPACE)', '--pod-namespace=$(POD_NAMESPACE)',
]) + ]) +
container.mixin.securityContext.withRunAsUser(65534) +
container.withEnv([ container.withEnv([
containerEnv.new('POD_NAME') + containerEnv.new('POD_NAME') +
containerEnv.mixin.valueFrom.fieldRef.withFieldPath('metadata.name'), containerEnv.mixin.valueFrom.fieldRef.withFieldPath('metadata.name'),

View File

@ -1,5 +1,3 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
{ {
_config+:: { _config+:: {
namespace: 'default', namespace: 'default',
@ -42,30 +40,14 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
repeat_interval: '12h', repeat_interval: '12h',
receiver: 'Default', receiver: 'Default',
routes: [ routes: [
{ { receiver: 'Watchdog', match: { alertname: 'Watchdog' } },
receiver: 'Watchdog', { receiver: 'Critical', match: { severity: 'critical' } },
match: {
alertname: 'Watchdog',
},
},
{
receiver: 'Critical',
match: {
severity: 'critical',
},
},
], ],
}, },
receivers: [ receivers: [
{ { name: 'Default' },
name: 'Default', { name: 'Watchdog' },
}, { name: 'Critical' },
{
name: 'Watchdog',
},
{
name: 'Critical',
},
], ],
}, },
replicas: 3, replicas: 3,
@ -73,84 +55,93 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
}, },
alertmanager+:: { alertmanager+:: {
secret: secret: {
local secret = k.core.v1.secret; apiVersion: 'v1',
kind: 'Secret',
type: 'Opaque',
metadata: {
name: 'alertmanager-' + $._config.alertmanager.name,
namespace: $._config.namespace,
},
stringData: {
'alertmanager.yaml': if std.type($._config.alertmanager.config) == 'object'
then
std.manifestYamlDoc($._config.alertmanager.config)
else
$._config.alertmanager.config,
},
},
if std.type($._config.alertmanager.config) == 'object' then serviceAccount: {
secret.new('alertmanager-' + $._config.alertmanager.name, {}) apiVersion: 'v1',
.withStringData({ 'alertmanager.yaml': std.manifestYamlDoc($._config.alertmanager.config) }) + kind: 'ServiceAccount',
secret.mixin.metadata.withNamespace($._config.namespace) metadata: {
else name: 'alertmanager-' + $._config.alertmanager.name,
secret.new('alertmanager-' + $._config.alertmanager.name, {}) namespace: $._config.namespace,
.withStringData({ 'alertmanager.yaml': $._config.alertmanager.config }) + },
secret.mixin.metadata.withNamespace($._config.namespace), },
serviceAccount: service: {
local serviceAccount = k.core.v1.serviceAccount; apiVersion: 'v1',
kind: 'Service',
metadata: {
name: 'alertmanager-' + $._config.alertmanager.name,
namespace: $._config.namespace,
labels: { alertmanager: $._config.alertmanager.name },
},
spec: {
ports: [
{ name: 'web', targetPort: 'web', port: 9093 },
],
selector: { app: 'alertmanager', alertmanager: $._config.alertmanager.name },
sessionAffinity: 'ClientIP',
},
},
serviceAccount.new('alertmanager-' + $._config.alertmanager.name) + serviceMonitor: {
serviceAccount.mixin.metadata.withNamespace($._config.namespace), apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
service: metadata: {
local service = k.core.v1.service; name: 'alertmanager',
local servicePort = k.core.v1.service.mixin.spec.portsType; namespace: $._config.namespace,
labels: {
local alertmanagerPort = servicePort.newNamed('web', 9093, 'web'); 'k8s-app': 'alertmanager',
service.new('alertmanager-' + $._config.alertmanager.name, { app: 'alertmanager', alertmanager: $._config.alertmanager.name }, alertmanagerPort) +
service.mixin.spec.withSessionAffinity('ClientIP') +
service.mixin.metadata.withNamespace($._config.namespace) +
service.mixin.metadata.withLabels({ alertmanager: $._config.alertmanager.name }),
serviceMonitor:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'alertmanager',
namespace: $._config.namespace,
labels: {
'k8s-app': 'alertmanager',
},
},
spec: {
selector: {
matchLabels: {
alertmanager: $._config.alertmanager.name,
},
},
endpoints: [
{
port: 'web',
interval: '30s',
},
],
}, },
}, },
spec: {
alertmanager: selector: {
{ matchLabels: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'Alertmanager',
metadata: {
name: $._config.alertmanager.name,
namespace: $._config.namespace,
labels: {
alertmanager: $._config.alertmanager.name, alertmanager: $._config.alertmanager.name,
}, },
}, },
spec: { endpoints: [
replicas: $._config.alertmanager.replicas, { port: 'web', interval: '30s' },
version: $._config.versions.alertmanager, ],
image: $._config.imageRepos.alertmanager + ':' + $._config.versions.alertmanager, },
nodeSelector: { 'kubernetes.io/os': 'linux' }, },
serviceAccountName: 'alertmanager-' + $._config.alertmanager.name,
securityContext: { alertmanager: {
runAsUser: 1000, apiVersion: 'monitoring.coreos.com/v1',
runAsNonRoot: true, kind: 'Alertmanager',
fsGroup: 2000, metadata: {
}, name: $._config.alertmanager.name,
namespace: $._config.namespace,
labels: {
alertmanager: $._config.alertmanager.name,
}, },
}, },
spec: {
replicas: $._config.alertmanager.replicas,
version: $._config.versions.alertmanager,
image: $._config.imageRepos.alertmanager + ':' + $._config.versions.alertmanager,
nodeSelector: { 'kubernetes.io/os': 'linux' },
serviceAccountName: 'alertmanager-' + $._config.alertmanager.name,
securityContext: {
runAsUser: 1000,
runAsNonRoot: true,
fsGroup: 2000,
},
},
},
}, },
} }

View File

@ -1,57 +0,0 @@
{
prometheusAlerts+:: {
groups+: [
{
name: 'alertmanager.rules',
rules: [
{
alert: 'AlertmanagerConfigInconsistent',
annotations: {
message: |||
The configuration of the instances of the Alertmanager cluster `{{ $labels.namespace }}/{{ $labels.service }}` are out of sync.
{{ range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query }}
Configuration hash for pod {{ .Labels.pod }} is "{{ printf "%.f" .Value }}"
{{ end }}
|||,
},
expr: |||
count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s})) != 1
||| % $._config,
'for': '5m',
labels: {
severity: 'critical',
},
},
{
alert: 'AlertmanagerFailedReload',
annotations: {
message: "Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}.",
},
expr: |||
alertmanager_config_last_reload_successful{%(alertmanagerSelector)s} == 0
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert: 'AlertmanagerMembersInconsistent',
annotations: {
message: 'Alertmanager has not found all other members of the cluster.',
},
expr: |||
alertmanager_cluster_members{%(alertmanagerSelector)s}
!= on (service) GROUP_LEFT()
count by (service) (alertmanager_cluster_members{%(alertmanagerSelector)s})
||| % $._config,
'for': '5m',
labels: {
severity: 'critical',
},
},
],
},
],
},
}

View File

@ -1,3 +1,2 @@
(import 'alertmanager.libsonnet') +
(import 'general.libsonnet') + (import 'general.libsonnet') +
(import 'node.libsonnet') (import 'node.libsonnet')

View File

@ -26,7 +26,7 @@
"subdir": "jsonnet/prometheus-operator" "subdir": "jsonnet/prometheus-operator"
} }
}, },
"version": "release-0.42" "version": "release-0.44"
}, },
{ {
"source": { "source": {
@ -37,16 +37,6 @@
}, },
"version": "master" "version": "master"
}, },
{
"source": {
"git": {
"remote": "https://github.com/ksonnet/ksonnet-lib",
"subdir": ""
}
},
"version": "master",
"name": "ksonnet"
},
{ {
"source": { "source": {
"git": { "git": {
@ -63,7 +53,7 @@
"subdir": "jsonnet/kube-state-metrics" "subdir": "jsonnet/kube-state-metrics"
} }
}, },
"version": "master" "version": "release-1.9"
}, },
{ {
"source": { "source": {
@ -90,8 +80,27 @@
"subdir": "documentation/prometheus-mixin" "subdir": "documentation/prometheus-mixin"
} }
}, },
"version": "release-2.20", "version": "release-2.23",
"name": "prometheus" "name": "prometheus"
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus/alertmanager",
"subdir": "doc/alertmanager-mixin"
}
},
"version": "master",
"name": "alertmanager"
},
{
"source": {
"git": {
"remote": "https://github.com/thanos-io/thanos",
"subdir": "mixin"
}
},
"version": "release-0.17"
} }
], ],
"legacyImports": true "legacyImports": true

View File

@ -1,14 +1,7 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
{ {
_config+:: { _config+:: {
versions+:: { versions+:: { clusterVerticalAutoscaler: '0.8.1' },
clusterVerticalAutoscaler: "v0.8.1" imageRepos+:: { clusterVerticalAutoscaler: 'gcr.io/google_containers/cpvpa-amd64' },
},
imageRepos+:: {
clusterVerticalAutoscaler: 'gcr.io/google_containers/cpvpa-amd64'
},
kubeStateMetrics+:: { kubeStateMetrics+:: {
stepCPU: '1m', stepCPU: '1m',
@ -16,103 +9,120 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
}, },
}, },
ksmAutoscaler+:: { ksmAutoscaler+:: {
clusterRole: clusterRole: {
local clusterRole = k.rbac.v1.clusterRole; apiVersion: 'rbac.authorization.k8s.io/v1',
local rulesType = clusterRole.rulesType; kind: 'ClusterRole',
metadata: { name: 'ksm-autoscaler' },
rules: [{
apiGroups: [''],
resources: ['nodes'],
verbs: ['list', 'watch'],
}],
},
local rules = [ clusterRoleBinding: {
rulesType.new() + apiVersion: 'rbac.authorization.k8s.io/v1',
rulesType.withApiGroups(['']) + kind: 'ClusterRoleBinding',
rulesType.withResources([ metadata: { name: 'ksm-autoscaler' },
'nodes', roleRef: {
]) + apiGroup: 'rbac.authorization.k8s.io',
rulesType.withVerbs(['list', 'watch']), kind: 'ClusterRole',
]; name: 'ksm-autoscaler',
},
subjects: [{ kind: 'ServiceAccount', name: 'ksm-autoscaler', namespace: $._config.namespace }],
},
clusterRole.new() + roleBinding: {
clusterRole.mixin.metadata.withName('ksm-autoscaler') + apiVersion: 'rbac.authorization.k8s.io/v1',
clusterRole.withRules(rules), kind: 'RoleBinding',
metadata: {
name: 'ksm-autoscaler',
namespace: $._config.namespace,
},
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'Role',
name: 'ksm-autoscaler',
},
subjects: [{ kind: 'ServiceAccount', name: 'ksm-autoscaler' }],
},
clusterRoleBinding: role: {
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding; apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'Role',
metadata: {
name: 'ksm-autoscaler',
namespace: $._config.namespace,
},
rules: [
{
apiGroups: ['extensions'],
resources: ['deployments'],
verbs: ['patch'],
resourceNames: ['kube-state-metrics'],
},
{
apiGroups: ['apps'],
resources: ['deployments'],
verbs: ['patch'],
resourceNames: ['kube-state-metrics'],
},
],
},
clusterRoleBinding.new() + serviceAccount: {
clusterRoleBinding.mixin.metadata.withName('ksm-autoscaler') + apiVersion: 'v1',
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') + kind: 'ServiceAccount',
clusterRoleBinding.mixin.roleRef.withName('ksm-autoscaler') + metadata: {
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) + name: 'ksm-autoscaler',
clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'ksm-autoscaler', namespace: $._config.namespace }]), namespace: $._config.namespace,
},
},
roleBinding:
local roleBinding = k.rbac.v1.roleBinding;
roleBinding.new() +
roleBinding.mixin.metadata.withName('ksm-autoscaler') +
roleBinding.mixin.metadata.withNamespace($._config.namespace) +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withName('ksm-autoscaler') +
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'ksm-autoscaler' }]),
role:
local role = k.rbac.v1.role;
local rulesType = role.rulesType;
local extensionsRule = rulesType.new() +
rulesType.withApiGroups(['extensions']) +
rulesType.withResources([
'deployments',
]) +
rulesType.withVerbs(['patch']) +
rulesType.withResourceNames(['kube-state-metrics']);
local appsRule = rulesType.new() +
rulesType.withApiGroups(['apps']) +
rulesType.withResources([
'deployments',
]) +
rulesType.withVerbs(['patch']) +
rulesType.withResourceNames(['kube-state-metrics']);
local rules = [extensionsRule, appsRule];
role.new() +
role.mixin.metadata.withName('ksm-autoscaler') +
role.mixin.metadata.withNamespace($._config.namespace) +
role.withRules(rules),
serviceAccount:
local serviceAccount = k.core.v1.serviceAccount;
serviceAccount.new('ksm-autoscaler') +
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
deployment: deployment:
local deployment = k.apps.v1.deployment;
local container = deployment.mixin.spec.template.spec.containersType;
local podSelector = deployment.mixin.spec.template.spec.selectorType;
local podLabels = { app: 'ksm-autoscaler' }; local podLabels = { app: 'ksm-autoscaler' };
local c = {
local kubeStateMetricsAutoscaler = name: 'ksm-autoscaler',
container.new('ksm-autoscaler', $._config.imageRepos.clusterVerticalAutoscaler + ':' + $._config.versions.clusterVerticalAutoscaler) + image: $._config.imageRepos.clusterVerticalAutoscaler + ':v' + $._config.versions.clusterVerticalAutoscaler,
container.withArgs([ args: [
'/cpvpa', '/cpvpa',
'--target=deployment/kube-state-metrics', '--target=deployment/kube-state-metrics',
'--namespace=' + $._config.namespace, '--namespace=' + $._config.namespace,
'--logtostderr=true', '--logtostderr=true',
'--poll-period-seconds=10', '--poll-period-seconds=10',
'--default-config={"kube-state-metrics":{"requests":{"cpu":{"base":"' + $._config.kubeStateMetrics.baseCPU + '","step":"' + $._config.kubeStateMetrics.stepCPU + '","nodesPerStep":1},"memory":{"base":"' + $._config.kubeStateMetrics.baseMemory + '","step":"' + $._config.kubeStateMetrics.stepMemory + '","nodesPerStep":1}},"limits":{"cpu":{"base":"' + $._config.kubeStateMetrics.baseCPU + '","step":"' + $._config.kubeStateMetrics.stepCPU + '","nodesPerStep":1},"memory":{"base":"' + $._config.kubeStateMetrics.baseMemory + '","step":"' + $._config.kubeStateMetrics.stepMemory + '","nodesPerStep":1}}}}' '--default-config={"kube-state-metrics":{"requests":{"cpu":{"base":"' + $._config.kubeStateMetrics.baseCPU + '","step":"' + $._config.kubeStateMetrics.stepCPU + '","nodesPerStep":1},"memory":{"base":"' + $._config.kubeStateMetrics.baseMemory + '","step":"' + $._config.kubeStateMetrics.stepMemory + '","nodesPerStep":1}},"limits":{"cpu":{"base":"' + $._config.kubeStateMetrics.baseCPU + '","step":"' + $._config.kubeStateMetrics.stepCPU + '","nodesPerStep":1},"memory":{"base":"' + $._config.kubeStateMetrics.baseMemory + '","step":"' + $._config.kubeStateMetrics.stepMemory + '","nodesPerStep":1}}}}',
]) + ],
container.mixin.resources.withRequests({cpu: '20m', memory: '10Mi'}); resources: {
requests: { cpu: '20m', memory: '10Mi' },
local c = [kubeStateMetricsAutoscaler]; },
};
deployment.new('ksm-autoscaler', 1, c, podLabels) +
deployment.mixin.metadata.withNamespace($._config.namespace) + {
deployment.mixin.metadata.withLabels(podLabels) + apiVersion: 'apps/v1',
deployment.mixin.spec.selector.withMatchLabels(podLabels) + kind: 'Deployment',
deployment.mixin.spec.template.spec.withNodeSelector({ 'kubernetes.io/os': 'linux' }) + metadata: {
deployment.mixin.spec.template.spec.securityContext.withRunAsNonRoot(true) + name: 'ksm-autoscaler',
deployment.mixin.spec.template.spec.securityContext.withRunAsUser(65534) + namespace: $._config.namespace,
deployment.mixin.spec.template.spec.withServiceAccountName('ksm-autoscaler'), labels: podLabels,
},
spec: {
replicas: 1,
selector: { matchLabels: podLabels },
template: {
metadata: {
labels: podLabels,
},
spec: {
containers: [c],
serviceAccount: 'ksm-autoscaler',
nodeSelector: { 'kubernetes.io/os': 'linux' },
securityContext: {
runAsNonRoot: true,
runAsUser: 65534,
},
},
},
},
},
}, },
} }

View File

@ -1,20 +1,11 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
{ {
prometheus+:: { prometheus+:: {
clusterRole+: { clusterRole+: {
rules+: rules+: [{
local role = k.rbac.v1.role; apiGroups: [''],
local policyRule = role.rulesType; resources: ['services', 'endpoints', 'pods'],
local rule = policyRule.new() + verbs: ['get', 'list', 'watch'],
policyRule.withApiGroups(['']) + }],
policyRule.withResources([ },
'services', },
'endpoints', }
'pods',
]) +
policyRule.withVerbs(['get', 'list', 'watch']);
[rule]
},
}
}

View File

@ -1,23 +1,22 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local statefulSet = k.apps.v1.statefulSet;
local affinity = statefulSet.mixin.spec.template.spec.affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecutionType;
local matchExpression = affinity.mixin.podAffinityTerm.labelSelector.matchExpressionsType;
{ {
local antiaffinity(key, values, namespace) = { local antiaffinity(key, values, namespace) = {
affinity: { affinity: {
podAntiAffinity: { podAntiAffinity: {
preferredDuringSchedulingIgnoredDuringExecution: [ preferredDuringSchedulingIgnoredDuringExecution: [
affinity.new() + {
affinity.withWeight(100) + weight: 100,
affinity.mixin.podAffinityTerm.withNamespaces(namespace) + podAffinityTerm: {
affinity.mixin.podAffinityTerm.withTopologyKey('kubernetes.io/hostname') + namespaces: [namespace],
affinity.mixin.podAffinityTerm.labelSelector.withMatchExpressions([ topologyKey: 'kubernetes.io/hostname',
matchExpression.new() + labelSelector: {
matchExpression.withKey(key) + matchExpressions: [{
matchExpression.withOperator('In') + key: key,
matchExpression.withValues(values), operator: 'In',
]), values: values,
}],
},
},
},
], ],
}, },
}, },
@ -30,12 +29,12 @@ local matchExpression = affinity.mixin.podAffinityTerm.labelSelector.matchExpres
}, },
}, },
prometheus+: { prometheus+:: {
local p = self, local p = self,
prometheus+: { prometheus+: {
spec+: spec+:
antiaffinity('prometheus', [p.name], p.namespace), antiaffinity('prometheus', [$._config.prometheus.name], $._config.namespace),
}, },
}, },
} }

View File

@ -1,23 +1,42 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet'; local service(name, namespace, labels, selector, ports) = {
local service = k.core.v1.service; apiVersion: 'v1',
local servicePort = k.core.v1.service.mixin.spec.portsType; kind: 'Service',
metadata: {
name: name,
namespace: namespace,
labels: labels,
},
spec: {
ports+: ports,
selector: selector,
clusterIP: 'None',
},
};
{ {
prometheus+:: { prometheus+:: {
kubeControllerManagerPrometheusDiscoveryService: kubeControllerManagerPrometheusDiscoveryService: service(
service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('https-metrics', 10257, 10257)) + 'kube-controller-manager-prometheus-discovery',
service.mixin.metadata.withNamespace('kube-system') + 'kube-system',
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) + { 'k8s-app': 'kube-controller-manager' },
service.mixin.spec.withClusterIp('None'), { 'k8s-app': 'kube-controller-manager' },
kubeSchedulerPrometheusDiscoveryService: [{ name: 'https-metrics', port: 10257, targetPort: 10257 }]
service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('https-metrics', 10259, 10259)) + ),
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) + kubeSchedulerPrometheusDiscoveryService: service(
service.mixin.spec.withClusterIp('None'), 'kube-scheduler-prometheus-discovery',
kubeDnsPrometheusDiscoveryService: 'kube-system',
service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('http-metrics-skydns', 10055, 10055), servicePort.newNamed('http-metrics-dnsmasq', 10054, 10054)]) + { 'k8s-app': 'kube-scheduler' },
service.mixin.metadata.withNamespace('kube-system') + { 'k8s-app': 'kube-scheduler' },
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) + [{ name: 'https-metrics', port: 10259, targetPort: 10259 }]
service.mixin.spec.withClusterIp('None'), ),
kubeDnsPrometheusDiscoveryService: service(
'kube-dns-prometheus-discovery',
'kube-syste',
{ 'k8s-app': 'kube-dns' },
{ 'k8s-app': 'kube-dns' },
[{ name: 'http-metrics-skydns', port: 10055, targetPort: 10055 }, { name: 'http-metrics-dnsmasq', port: 10054, targetPort: 10054 }]
),
}, },
} }

View File

@ -9,9 +9,9 @@ local withImageRepository(repository) = {
if repository == null then image else repository + '/' + l.imageName(image), if repository == null then image else repository + '/' + l.imageName(image),
_config+:: { _config+:: {
imageRepos:: { imageRepos:: {
[field]: substituteRepository(oldRepos[field], repository), [field]: substituteRepository(oldRepos[field], repository)
for field in std.objectFields(oldRepos) for field in std.objectFields(oldRepos)
} },
}, },
}; };

View File

@ -1,11 +1,10 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
// Custom metrics API allows the HPA v2 to scale based on arbirary metrics. // Custom metrics API allows the HPA v2 to scale based on arbirary metrics.
// For more details on usage visit https://github.com/DirectXMan12/k8s-prometheus-adapter#quick-links // For more details on usage visit https://github.com/DirectXMan12/k8s-prometheus-adapter#quick-links
{ {
_config+:: { _config+:: {
prometheusAdapter+:: { prometheusAdapter+:: {
namespace: $._config.namespace,
// Rules for custom-metrics // Rules for custom-metrics
config+:: { config+:: {
rules+: [ rules+: [
@ -14,19 +13,12 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
seriesFilters: [], seriesFilters: [],
resources: { resources: {
overrides: { overrides: {
namespace: { namespace: { resource: 'namespace' },
resource: 'namespace' pod: { resource: 'pod' },
},
pod: {
resource: 'pod'
}
}, },
}, },
name: { name: { matches: '^container_(.*)_seconds_total$', as: '' },
matches: '^container_(.*)_seconds_total$', metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>,container!="POD"}[1m])) by (<<.GroupBy>>)',
as: ""
},
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>,container!="POD"}[1m])) by (<<.GroupBy>>)'
}, },
{ {
seriesQuery: '{__name__=~"^container_.*",container!="POD",namespace!="",pod!=""}', seriesQuery: '{__name__=~"^container_.*",container!="POD",namespace!="",pod!=""}',
@ -35,19 +27,12 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
], ],
resources: { resources: {
overrides: { overrides: {
namespace: { namespace: { resource: 'namespace' },
resource: 'namespace' pod: { resource: 'pod' },
},
pod: {
resource: 'pod'
}
}, },
}, },
name: { name: { matches: '^container_(.*)_total$', as: '' },
matches: '^container_(.*)_total$', metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>,container!="POD"}[1m])) by (<<.GroupBy>>)',
as: ''
},
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>,container!="POD"}[1m])) by (<<.GroupBy>>)'
}, },
{ {
seriesQuery: '{__name__=~"^container_.*",container!="POD",namespace!="",pod!=""}', seriesQuery: '{__name__=~"^container_.*",container!="POD",namespace!="",pod!=""}',
@ -56,60 +41,38 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
], ],
resources: { resources: {
overrides: { overrides: {
namespace: { namespace: { resource: 'namespace' },
resource: 'namespace' pod: { resource: 'pod' },
},
pod: {
resource: 'pod'
}
}, },
}, },
name: { name: { matches: '^container_(.*)$', as: '' },
matches: '^container_(.*)$', metricsQuery: 'sum(<<.Series>>{<<.LabelMatchers>>,container!="POD"}) by (<<.GroupBy>>)',
as: ''
},
metricsQuery: 'sum(<<.Series>>{<<.LabelMatchers>>,container!="POD"}) by (<<.GroupBy>>)'
}, },
{ {
seriesQuery: '{namespace!="",__name__!~"^container_.*"}', seriesQuery: '{namespace!="",__name__!~"^container_.*"}',
seriesFilters: [ seriesFilters: [
{ isNot: '.*_total$' }, { isNot: '.*_total$' },
], ],
resources: { resources: { template: '<<.Resource>>' },
template: '<<.Resource>>' name: { matches: '', as: '' },
}, metricsQuery: 'sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)',
name: {
matches: '',
as: ''
},
metricsQuery: 'sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)'
}, },
{ {
seriesQuery: '{namespace!="",__name__!~"^container_.*"}', seriesQuery: '{namespace!="",__name__!~"^container_.*"}',
seriesFilters: [ seriesFilters: [
{ isNot: '.*_seconds_total' }, { isNot: '.*_seconds_total' },
], ],
resources: { resources: { template: '<<.Resource>>' },
template: '<<.Resource>>' name: { matches: '^(.*)_total$', as: '' },
}, metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)',
name: {
matches: '^(.*)_total$',
as: ''
},
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)'
}, },
{ {
seriesQuery: '{namespace!="",__name__!~"^container_.*"}', seriesQuery: '{namespace!="",__name__!~"^container_.*"}',
seriesFilters: [], seriesFilters: [],
resources: { resources: { template: '<<.Resource>>' },
template: '<<.Resource>>' name: { matches: '^(.*)_seconds_total$', as: '' },
}, metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)',
name: { },
matches: '^(.*)_seconds_total$',
as: ''
},
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)'
}
], ],
}, },
}, },
@ -125,7 +88,7 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
spec: { spec: {
service: { service: {
name: $.prometheusAdapter.service.metadata.name, name: $.prometheusAdapter.service.metadata.name,
namespace: $._config.namespace, namespace: $._config.prometheusAdapter.namespace,
}, },
group: 'custom.metrics.k8s.io', group: 'custom.metrics.k8s.io',
version: 'v1beta1', version: 'v1beta1',
@ -143,7 +106,7 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
spec: { spec: {
service: { service: {
name: $.prometheusAdapter.service.metadata.name, name: $.prometheusAdapter.service.metadata.name,
namespace: $._config.namespace, namespace: $._config.prometheusAdapter.namespace,
}, },
group: 'custom.metrics.k8s.io', group: 'custom.metrics.k8s.io',
version: 'v1beta2', version: 'v1beta2',
@ -152,46 +115,51 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
versionPriority: 200, versionPriority: 200,
}, },
}, },
customMetricsClusterRoleServerResources: customMetricsClusterRoleServerResources: {
local clusterRole = k.rbac.v1.clusterRole; apiVersion: 'rbac.authorization.k8s.io/v1',
local policyRule = clusterRole.rulesType; kind: 'ClusterRole',
metadata: {
local rules = name: 'custom-metrics-server-resources',
policyRule.new() + },
policyRule.withApiGroups(['custom.metrics.k8s.io']) + rules: [{
policyRule.withResources(['*']) + apiGroups: ['custom.metrics.k8s.io'],
policyRule.withVerbs(['*']); resources: ['*'],
verbs: ['*'],
clusterRole.new() + }],
clusterRole.mixin.metadata.withName('custom-metrics-server-resources') + },
clusterRole.withRules(rules), customMetricsClusterRoleBindingServerResources: {
apiVersion: 'rbac.authorization.k8s.io/v1',
customMetricsClusterRoleBindingServerResources: kind: 'ClusterRoleBinding',
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding; metadata: {
name: 'custom-metrics-server-resources',
clusterRoleBinding.new() + },
clusterRoleBinding.mixin.metadata.withName('custom-metrics-server-resources') + roleRef: {
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') + apiGroup: 'rbac.authorization.k8s.io',
clusterRoleBinding.mixin.roleRef.withName('custom-metrics-server-resources') + kind: 'ClusterRole',
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) + name: 'custom-metrics-server-resources',
clusterRoleBinding.withSubjects([{ },
subjects: [{
kind: 'ServiceAccount', kind: 'ServiceAccount',
name: $.prometheusAdapter.serviceAccount.metadata.name, name: $.prometheusAdapter.serviceAccount.metadata.name,
namespace: $._config.namespace, namespace: $._config.prometheusAdapter.namespace,
}]), }],
},
customMetricsClusterRoleBindingHPA: customMetricsClusterRoleBindingHPA: {
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding; apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRoleBinding',
clusterRoleBinding.new() + metadata: {
clusterRoleBinding.mixin.metadata.withName('hpa-controller-custom-metrics') + name: 'hpa-controller-custom-metrics',
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') + },
clusterRoleBinding.mixin.roleRef.withName('custom-metrics-server-resources') + roleRef: {
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) + apiGroup: 'rbac.authorization.k8s.io',
clusterRoleBinding.withSubjects([{ kind: 'ClusterRole',
name: 'custom-metrics-server-resources',
},
subjects: [{
kind: 'ServiceAccount', kind: 'ServiceAccount',
name: 'horizontal-pod-autoscaler', name: 'horizontal-pod-autoscaler',
namespace: 'kube-system', namespace: 'kube-system',
}]), }],
} },
},
} }

View File

@ -1,63 +1,70 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{ {
_config+:: { _config+:: {
eks: { eks: {
minimumAvailableIPs: 10, minimumAvailableIPs: 10,
minimumAvailableIPsTime: '10m' minimumAvailableIPsTime: '10m',
} },
}, },
prometheus+: { prometheus+: {
serviceMonitorCoreDNS+: { serviceMonitorCoreDNS+: {
spec+: { spec+: {
endpoints: [ endpoints: [
{ {
bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token", bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
interval: "15s", interval: '15s',
targetPort: 9153 targetPort: 9153,
} },
] ],
},
},
AwsEksCniMetricService: {
apiVersion: 'v1',
kind: 'Service',
metadata: {
name: 'aws-node',
namespace: 'kube-system',
labels: { 'k8s-app': 'aws-node' },
},
spec: {
ports: [
{ name: 'cni-metrics-port', port: 61678, targetPort: 61678 },
],
selector: { 'k8s-app': 'aws-node' },
clusterIP: 'None',
},
},
serviceMonitorAwsEksCNI: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'awsekscni',
namespace: $._config.namespace,
labels: {
'k8s-app': 'eks-cni',
}, },
}, },
AwsEksCniMetricService: spec: {
service.new('aws-node', { 'k8s-app' : 'aws-node' } , servicePort.newNamed('cni-metrics-port', 61678, 61678)) + jobLabel: 'k8s-app',
service.mixin.metadata.withNamespace('kube-system') + selector: {
service.mixin.metadata.withLabels({ 'k8s-app': 'aws-node' }) + matchLabels: {
service.mixin.spec.withClusterIp('None'), 'k8s-app': 'aws-node',
serviceMonitorAwsEksCNI:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'awsekscni',
namespace: $._config.namespace,
labels: {
'k8s-app': 'eks-cni',
}, },
}, },
spec: { namespaceSelector: {
jobLabel: 'k8s-app', matchNames: [
selector: { 'kube-system',
matchLabels: {
'k8s-app': 'aws-node',
},
},
namespaceSelector: {
matchNames: [
'kube-system',
],
},
endpoints: [
{
port: 'cni-metrics-port',
interval: '30s',
path: '/metrics',
},
], ],
}, },
endpoints: [
{
port: 'cni-metrics-port',
interval: '30s',
path: '/metrics',
},
],
}, },
},
}, },
prometheusRules+: { prometheusRules+: {
groups+: [ groups+: [
@ -65,15 +72,15 @@ local servicePort = k.core.v1.service.mixin.spec.portsType;
name: 'kube-prometheus-eks.rules', name: 'kube-prometheus-eks.rules',
rules: [ rules: [
{ {
expr: 'sum by(instance) (awscni_total_ip_addresses) - sum by(instance) (awscni_assigned_ip_addresses) < %s' % $._config.eks.minimumAvailableIPs, expr: 'sum by(instance) (awscni_ip_max) - sum by(instance) (awscni_assigned_ip_addresses) < %s' % $._config.eks.minimumAvailableIPs,
labels: { labels: {
severity: 'critical', severity: 'critical',
}, },
annotations: { annotations: {
message: 'Instance {{ $labels.instance }} has less than 10 IPs available.' message: 'Instance {{ $labels.instance }} has less than 10 IPs available.',
}, },
'for': $._config.eks.minimumAvailableIPsTime, 'for': $._config.eks.minimumAvailableIPsTime,
alert: 'EksAvailableIPs' alert: 'EksAvailableIPs',
}, },
], ],
}, },

View File

@ -0,0 +1,95 @@
// External metrics API allows the HPA v2 to scale based on metrics coming from outside of Kubernetes cluster
// For more details on usage visit https://github.com/DirectXMan12/k8s-prometheus-adapter#quick-links
{
_config+:: {
prometheusAdapter+:: {
namespace: $._config.namespace,
// Rules for external-metrics
config+:: {
externalRules+: [
// {
// seriesQuery: '{__name__=~"^.*_queue$",namespace!=""}',
// seriesFilters: [],
// resources: {
// overrides: {
// namespace: { resource: 'namespace' }
// },
// },
// name: { matches: '^.*_queue$', as: '$0' },
// metricsQuery: 'max(<<.Series>>{<<.LabelMatchers>>})',
// },
],
},
},
},
prometheusAdapter+:: {
externalMetricsApiService: {
apiVersion: 'apiregistration.k8s.io/v1',
kind: 'APIService',
metadata: {
name: 'v1beta1.external.metrics.k8s.io',
},
spec: {
service: {
name: $.prometheusAdapter.service.metadata.name,
namespace: $._config.prometheusAdapter.namespace,
},
group: 'external.metrics.k8s.io',
version: 'v1beta1',
insecureSkipTLSVerify: true,
groupPriorityMinimum: 100,
versionPriority: 100,
},
},
externalMetricsClusterRoleServerResources: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRole',
metadata: {
name: 'external-metrics-server-resources',
},
rules: [{
apiGroups: ['external.metrics.k8s.io'],
resources: ['*'],
verbs: ['*'],
}],
},
externalMetricsClusterRoleBindingServerResources: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRoleBinding',
metadata: {
name: 'external-metrics-server-resources',
},
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'ClusterRole',
name: 'external-metrics-server-resources',
},
subjects: [{
kind: 'ServiceAccount',
name: $.prometheusAdapter.serviceAccount.metadata.name,
namespace: $._config.prometheusAdapter.namespace,
}],
},
externalMetricsClusterRoleBindingHPA: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRoleBinding',
metadata: {
name: 'hpa-controller-external-metrics',
},
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'ClusterRole',
name: 'external-metrics-server-resources',
},
subjects: [{
kind: 'ServiceAccount',
name: 'horizontal-pod-autoscaler',
namespace: 'kube-system',
}],
},
},
}

View File

@ -0,0 +1,13 @@
(import './kube-prometheus-managed-cluster.libsonnet') + {
_config+:: {
prometheusAdapter+:: {
config+: {
resourceRules:: null,
},
},
},
prometheusAdapter+:: {
apiService:: null,
},
}

View File

@ -10,10 +10,7 @@
interval: '30s', interval: '30s',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [ relabelings: [
{ { sourceLabels: ['__metrics_path__'], targetLabel: 'metrics_path' },
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path'
},
], ],
}, },
{ {
@ -24,10 +21,7 @@
honorLabels: true, honorLabels: true,
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [ relabelings: [
{ { sourceLabels: ['__metrics_path__'], targetLabel: 'metrics_path' },
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path'
},
], ],
metricRelabelings: [ metricRelabelings: [
// Drop a bunch of metrics which are disabled but still sent, see // Drop a bunch of metrics which are disabled but still sent, see

View File

@ -1,13 +1,20 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{ {
prometheus+:: { prometheus+:: {
kubeDnsPrometheusDiscoveryService: kubeDnsPrometheusDiscoveryService: {
service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('metrics', 9153, 9153)]) + apiVersion: 'v1',
service.mixin.metadata.withNamespace('kube-system') + kind: 'Service',
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) + metadata: {
service.mixin.spec.withClusterIp('None'), name: 'kube-dns-prometheus-discovery',
namespace: 'kube-system',
labels: { 'k8s-app': 'kube-dns' },
},
spec: {
ports: [
{ name: 'metrics', port: 9153, targetPort: 9153 },
],
selector: { 'k8s-app': 'kube-dns' },
clusterIP: 'None',
},
},
}, },
} }

View File

@ -1,23 +1,40 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet'; local service(name, namespace, labels, selector, ports) = {
local service = k.core.v1.service; apiVersion: 'v1',
local servicePort = k.core.v1.service.mixin.spec.portsType; kind: 'Service',
metadata: {
name: name,
namespace: namespace,
labels: labels,
},
spec: {
ports+: ports,
selector: selector,
clusterIP: 'None',
},
};
{ {
prometheus+:: { prometheus+:: {
kubeControllerManagerPrometheusDiscoveryService: kubeControllerManagerPrometheusDiscoveryService: service(
service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('https-metrics', 10257, 10257)) + 'kube-controller-manager-prometheus-discovery',
service.mixin.metadata.withNamespace('kube-system') + 'kube-system',
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) + { 'k8s-app': 'kube-controller-manager' },
service.mixin.spec.withClusterIp('None'), { 'k8s-app': 'kube-controller-manager' },
kubeSchedulerPrometheusDiscoveryService: [{ name: 'https-metrics', port: 10257, targetPort: 10257 }]
service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('https-metrics', 10259, 10259)) + ),
service.mixin.metadata.withNamespace('kube-system') + kubeSchedulerPrometheusDiscoveryService: service(
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) + 'kube-controller-manager-prometheus-discovery',
service.mixin.spec.withClusterIp('None'), 'kube-system',
kubeDnsPrometheusDiscoveryService: { 'k8s-app': 'kube-scheduler' },
service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('metrics', 10055, 10055), servicePort.newNamed('http-metrics-dnsmasq', 10054, 10054)]) + { 'k8s-app': 'kube-scheduler' },
service.mixin.metadata.withNamespace('kube-system') + [{ name: 'https-metrics', port: 10259, targetPort: 10259 }]
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) + ),
service.mixin.spec.withClusterIp('None'), kubeDnsPrometheusDiscoveryService: service(
'kube-controller-manager-prometheus-discovery',
'kube-system',
{ 'k8s-app': 'kube-dns' },
{ 'k8s-app': 'kube-dns' },
[{ name: 'metrics', port: 10055, targetPort: 10055 }, { name: 'http-metrics-dnsmasq', port: 10054, targetPort: 10054 }]
),
}, },
} }

View File

@ -1,18 +1,33 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet'; local service(name, namespace, labels, selector, ports) = {
local service = k.core.v1.service; apiVersion: 'v1',
local servicePort = k.core.v1.service.mixin.spec.portsType; kind: 'Service',
metadata: {
name: name,
namespace: namespace,
labels: labels,
},
spec: {
ports+: ports,
selector: selector,
clusterIP: 'None',
},
};
{ {
prometheus+: { prometheus+: {
kubeControllerManagerPrometheusDiscoveryService: kubeControllerManagerPrometheusDiscoveryService: service(
service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('https-metrics', 10257, 10257)) + 'kube-controller-manager-prometheus-discovery',
service.mixin.metadata.withNamespace('kube-system') + 'kube-system',
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) + { 'k8s-app': 'kube-controller-manager' },
service.mixin.spec.withClusterIp('None'), { 'k8s-app': 'kube-controller-manager' },
kubeSchedulerPrometheusDiscoveryService: [{ name: 'https-metrics', port: 10257, targetPort: 10257 }],
service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('https-metrics', 10259, 10259)) + ),
service.mixin.metadata.withNamespace('kube-system') + kubeSchedulerPrometheusDiscoveryService: service(
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) + 'kube-scheduler-prometheus-discovery',
service.mixin.spec.withClusterIp('None'), 'kube-system',
{ 'k8s-app': 'kube-scheduler' },
{ 'k8s-app': 'kube-scheduler' },
[{ name: 'https-metrics', port: 10259, targetPort: 10259 }],
),
}, },
} }

View File

@ -1,18 +1,33 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet'; local service(name, namespace, labels, selector, ports) = {
local service = k.core.v1.service; apiVersion: 'v1',
local servicePort = k.core.v1.service.mixin.spec.portsType; kind: 'Service',
metadata: {
name: name,
namespace: namespace,
labels: labels,
},
spec: {
ports+: ports,
selector: selector,
clusterIP: 'None',
},
};
{ {
prometheus+: { prometheus+: {
kubeControllerManagerPrometheusDiscoveryService: kubeControllerManagerPrometheusDiscoveryService: service(
service.new('kube-controller-manager-prometheus-discovery', { component: 'kube-controller-manager' }, servicePort.newNamed('https-metrics', 10257, 10257)) + 'kube-controller-manager-prometheus-discovery',
service.mixin.metadata.withNamespace('kube-system') + 'kube-system',
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) + { 'k8s-app': 'kube-controller-manager' },
service.mixin.spec.withClusterIp('None'), { component: 'kube-controller-manager' },
kubeSchedulerPrometheusDiscoveryService: [{ name: 'https-metrics', port: 10257, targetPort: 10257 }]
service.new('kube-scheduler-prometheus-discovery', { component: 'kube-scheduler' }, servicePort.newNamed('https-metrics', 10259, 10259)) + ),
service.mixin.metadata.withNamespace('kube-system') + kubeSchedulerPrometheusDiscoveryService: service(
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) + 'kube-scheduler-prometheus-discovery',
service.mixin.spec.withClusterIp('None'), 'kube-system',
{ 'k8s-app': 'kube-scheduler' },
{ component: 'kube-scheduler' },
[{ name: 'https-metrics', port: 10259, targetPort: 10259 }],
),
}, },
} }

View File

@ -1,20 +1,36 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet'; local service(name, namespace, labels, selector, ports) = {
local service = k.core.v1.service; apiVersion: 'v1',
local servicePort = k.core.v1.service.mixin.spec.portsType; kind: 'Service',
metadata: {
name: name,
namespace: namespace,
labels: labels,
},
spec: {
ports+: ports,
selector: selector,
clusterIP: 'None',
},
};
{ {
prometheus+: { prometheus+: {
kubeControllerManagerPrometheusDiscoveryService: kubeControllerManagerPrometheusDiscoveryService: service(
service.new('kube-controller-manager-prometheus-discovery', { 'component': 'kube-controller-manager' }, servicePort.newNamed('https-metrics', 10257, 10257)) + 'kube-controller-manager-prometheus-discovery',
service.mixin.metadata.withNamespace('kube-system') + 'kube-system',
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) + { 'k8s-app': 'kube-controller-manager' },
service.mixin.spec.withClusterIp('None'), { 'k8s-app': 'kube-controller-manager' },
kubeSchedulerPrometheusDiscoveryService: [{ name: 'https-metrics', port: 10257, targetPort: 10257 }]
service.new('kube-scheduler-prometheus-discovery', { 'component': 'kube-scheduler' }, servicePort.newNamed('https-metrics', 10259, 10259)) + ),
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) + kubeSchedulerPrometheusDiscoveryService: service(
service.mixin.spec.withClusterIp('None'), 'kube-scheduler-prometheus-discovery',
'kube-system',
{ 'k8s-app': 'kube-scheduler' },
{ 'k8s-app': 'kube-scheduler' },
[{ name: 'https-metrics', port: 10259, targetPort: 10259 }],
),
serviceMonitorKubeScheduler+: { serviceMonitorKubeScheduler+: {
spec+: { spec+: {

View File

@ -1,21 +1,18 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet'; local patch(ports) = {
local service = k.core.v1.service; spec+: {
local servicePort = k.core.v1.service.mixin.spec.portsType; ports: ports,
type: 'NodePort',
},
};
{ {
prometheus+: { prometheus+: {
service+: service+: patch([{ name: 'web', port: 9090, targetPort: 'web', nodePort: 30900 }]),
service.mixin.spec.withPorts(servicePort.newNamed('web', 9090, 'web') + servicePort.withNodePort(30900)) +
service.mixin.spec.withType('NodePort'),
}, },
alertmanager+: { alertmanager+: {
service+: service+: patch([{ name: 'web', port: 9093, targetPort: 'web', nodePort: 30903 }]),
service.mixin.spec.withPorts(servicePort.newNamed('web', 9093, 'web') + servicePort.withNodePort(30903)) +
service.mixin.spec.withType('NodePort'),
}, },
grafana+: { grafana+: {
service+: service+: patch([{ name: 'http', port: 3000, targetPort: 'http', nodePort: 30902 }]),
service.mixin.spec.withPorts(servicePort.newNamed('http', 3000, 'http') + servicePort.withNodePort(30902)) +
service.mixin.spec.withType('NodePort'),
}, },
} }

View File

@ -1,5 +1,3 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
(import 'github.com/etcd-io/etcd/Documentation/etcd-mixin/mixin.libsonnet') + { (import 'github.com/etcd-io/etcd/Documentation/etcd-mixin/mixin.libsonnet') + {
_config+:: { _config+:: {
etcd: { etcd: {
@ -12,88 +10,93 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
}, },
}, },
prometheus+:: { prometheus+:: {
serviceEtcd: serviceEtcd: {
local service = k.core.v1.service; apiVersion: 'v1',
local servicePort = k.core.v1.service.mixin.spec.portsType; kind: 'Service',
metadata: {
local etcdServicePort = servicePort.newNamed('metrics', 2379, 2379); name: 'etcd',
namespace: 'kube-system',
service.new('etcd', null, etcdServicePort) + labels: { 'k8s-app': 'etcd' },
service.mixin.metadata.withNamespace('kube-system') + },
service.mixin.metadata.withLabels({ 'k8s-app': 'etcd' }) + spec: {
service.mixin.spec.withClusterIp('None'), ports: [
endpointsEtcd: { name: 'metrics', targetPort: 2379, port: 2379 },
local endpoints = k.core.v1.endpoints; ],
local endpointSubset = endpoints.subsetsType; clusterIP: 'None',
local endpointPort = endpointSubset.portsType; },
},
local etcdPort = endpointPort.new() + endpointsEtcd: {
endpointPort.withName('metrics') + apiVersion: 'v1',
endpointPort.withPort(2379) + kind: 'Endpoints',
endpointPort.withProtocol('TCP'); metadata: {
name: 'etcd',
local subset = endpointSubset.new() + namespace: 'kube-system',
endpointSubset.withAddresses([ labels: { 'k8s-app': 'etcd' },
{ ip: etcdIP } },
for etcdIP in $._config.etcd.ips subsets: [{
]) + addresses: [
endpointSubset.withPorts(etcdPort); { ip: etcdIP }
for etcdIP in $._config.etcd.ips
endpoints.new() + ],
endpoints.mixin.metadata.withName('etcd') + ports: [
endpoints.mixin.metadata.withNamespace('kube-system') + { name: 'metrics', port: 2379, protocol: 'TCP' },
endpoints.mixin.metadata.withLabels({ 'k8s-app': 'etcd' }) + ],
endpoints.withSubsets(subset), }],
serviceMonitorEtcd: },
{ serviceMonitorEtcd: {
apiVersion: 'monitoring.coreos.com/v1', apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor', kind: 'ServiceMonitor',
metadata: { metadata: {
name: 'etcd', name: 'etcd',
namespace: 'kube-system', namespace: 'kube-system',
labels: { labels: {
'k8s-app': 'etcd',
},
},
spec: {
jobLabel: 'k8s-app',
endpoints: [
{
port: 'metrics',
interval: '30s',
scheme: 'https',
// Prometheus Operator (and Prometheus) allow us to specify a tlsConfig. This is required as most likely your etcd metrics end points is secure.
tlsConfig: {
caFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client-ca.crt',
keyFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client.key',
certFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client.crt',
[if $._config.etcd.serverName != null then 'serverName']: $._config.etcd.serverName,
[if $._config.etcd.insecureSkipVerify != null then 'insecureSkipVerify']: $._config.etcd.insecureSkipVerify,
},
},
],
selector: {
matchLabels: {
'k8s-app': 'etcd', 'k8s-app': 'etcd',
}, },
}, },
spec: {
jobLabel: 'k8s-app',
endpoints: [
{
port: 'metrics',
interval: '30s',
scheme: 'https',
// Prometheus Operator (and Prometheus) allow us to specify a tlsConfig. This is required as most likely your etcd metrics end points is secure.
tlsConfig: {
caFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client-ca.crt',
keyFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client.key',
certFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client.crt',
[if $._config.etcd.serverName != null then 'serverName']: $._config.etcd.serverName,
[if $._config.etcd.insecureSkipVerify != null then 'insecureSkipVerify']: $._config.etcd.insecureSkipVerify,
},
},
],
selector: {
matchLabels: {
'k8s-app': 'etcd',
},
},
},
}, },
secretEtcdCerts: },
secretEtcdCerts: {
// Prometheus Operator allows us to mount secrets in the pod. By loading the secrets as files, they can be made available inside the Prometheus pod. // Prometheus Operator allows us to mount secrets in the pod. By loading the secrets as files, they can be made available inside the Prometheus pod.
local secret = k.core.v1.secret; apiVersion: 'v1',
secret.new('kube-etcd-client-certs', { kind: 'Secret',
type: 'Opaque',
metadata: {
name: 'kube-etcd-client-certs',
namespace: $._config.namespace,
},
data: {
'etcd-client-ca.crt': std.base64($._config.etcd.clientCA), 'etcd-client-ca.crt': std.base64($._config.etcd.clientCA),
'etcd-client.key': std.base64($._config.etcd.clientKey), 'etcd-client.key': std.base64($._config.etcd.clientKey),
'etcd-client.crt': std.base64($._config.etcd.clientCert), 'etcd-client.crt': std.base64($._config.etcd.clientCert),
}) +
secret.mixin.metadata.withNamespace($._config.namespace),
prometheus+:
{
// Reference info: https://coreos.com/operators/prometheus/docs/latest/api.html#prometheusspec
spec+: {
secrets+: [$.prometheus.secretEtcdCerts.metadata.name],
},
}, },
},
prometheus+: {
// Reference info: https://coreos.com/operators/prometheus/docs/latest/api.html#prometheusspec
spec+: {
secrets+: [$.prometheus.secretEtcdCerts.metadata.name],
},
},
}, },
} }

View File

@ -23,9 +23,9 @@
template+: { template+: {
spec+: { spec+: {
local addArgs(c) = local addArgs(c) =
if c.name == 'prometheus-operator' if c.name == 'prometheus-operator'
then c + {args+: ['--config-reloader-cpu=0']} then c { args+: ['--config-reloader-cpu=0'] }
else c, else c,
containers: std.map(addArgs, super.containers), containers: std.map(addArgs, super.containers),
}, },
}, },

View File

@ -1,15 +1,8 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet'; (import 'github.com/thanos-io/thanos/mixin/alerts/sidecar.libsonnet') +
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{ {
_config+:: { _config+:: {
versions+:: { versions+:: { thanos: 'v0.14.0' },
thanos: 'v0.14.0', imageRepos+:: { thanos: 'quay.io/thanos/thanos' },
},
imageRepos+:: {
thanos: 'quay.io/thanos/thanos',
},
thanos+:: { thanos+:: {
objectStorageConfig: { objectStorageConfig: {
key: 'thanos.yaml', // How the file inside the secret is called key: 'thanos.yaml', // How the file inside the secret is called
@ -18,23 +11,34 @@ local servicePort = k.core.v1.service.mixin.spec.portsType;
}, },
}, },
prometheus+:: { prometheus+:: {
local p = self,
// Add the grpc port to the Prometheus service to be able to query it with the Thanos Querier // Add the grpc port to the Prometheus service to be able to query it with the Thanos Querier
service+: { service+: {
spec+: { spec+: {
ports+: [ ports+: [
servicePort.newNamed('grpc', 10901, 10901), { name: 'grpc', port: 10901, targetPort: 10901 },
], ],
}, },
}, },
// Create a new service that exposes both sidecar's HTTP metrics port and gRPC StoreAPI // Create a new service that exposes both sidecar's HTTP metrics port and gRPC StoreAPI
serviceThanosSidecar: serviceThanosSidecar: {
local thanosGrpcSidecarPort = servicePort.newNamed('grpc', 10901, 10901); apiVersion: 'v1',
local thanosHttpSidecarPort = servicePort.newNamed('http', 10902, 10902); kind: 'Service',
service.new('prometheus-' + $._config.prometheus.name + '-thanos-sidecar', { app: 'prometheus', prometheus: $._config.prometheus.name }) + metadata: {
service.mixin.spec.withPorts([thanosGrpcSidecarPort, thanosHttpSidecarPort]) + name: 'prometheus-' + p.name + '-thanos-sidecar',
service.mixin.spec.withClusterIp('None') + namespace: p.namespace,
service.mixin.metadata.withLabels({'prometheus': $._config.prometheus.name, 'app': 'thanos-sidecar'}) + labels: { prometheus: p.name, app: 'thanos-sidecar' },
service.mixin.metadata.withNamespace($._config.namespace), },
spec: {
ports: [
{ name: 'grpc', port: 10901, targetPort: 10901 },
{ name: 'http', port: 10902, targetPort: 10902 },
],
selector: { app: 'prometheus', prometheus: p.name },
clusterIP: 'None',
},
},
prometheus+: { prometheus+: {
spec+: { spec+: {
thanos+: { thanos+: {
@ -50,7 +54,7 @@ local servicePort = k.core.v1.service.mixin.spec.portsType;
kind: 'ServiceMonitor', kind: 'ServiceMonitor',
metadata: { metadata: {
name: 'thanos-sidecar', name: 'thanos-sidecar',
namespace: $._config.namespace, namespace: p.namespace,
labels: { labels: {
'k8s-app': 'prometheus', 'k8s-app': 'prometheus',
}, },
@ -60,7 +64,7 @@ local servicePort = k.core.v1.service.mixin.spec.portsType;
jobLabel: 'app', jobLabel: 'app',
selector: { selector: {
matchLabels: { matchLabels: {
prometheus: $._config.prometheus.name, prometheus: p.name,
app: 'thanos-sidecar', app: 'thanos-sidecar',
}, },
}, },

View File

@ -1,14 +1,21 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{ {
prometheus+: { prometheus+: {
serviceWeaveNet: serviceWeaveNet: {
service.new('weave-net', { 'name': 'weave-net' }, servicePort.newNamed('weave-net-metrics', 6782, 6782)) + apiVersion: 'v1',
service.mixin.metadata.withNamespace('kube-system') + kind: 'Service',
service.mixin.metadata.withLabels({ 'k8s-app': 'weave-net' }) + metadata: {
service.mixin.spec.withClusterIp('None'), name: 'weave-net',
namespace: 'kube-system',
labels: { 'k8s-app': 'weave-net' },
},
spec: {
ports: [
{ name: 'weave-net-metrics', targetPort: 6782, port: 6782 },
],
selector: { name: 'weave-net' },
clusterIP: 'None',
},
},
serviceMonitorWeaveNet: { serviceMonitorWeaveNet: {
apiVersion: 'monitoring.coreos.com/v1', apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor', kind: 'ServiceMonitor',

View File

@ -1,6 +1,3 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local k3 = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.3/k.libsonnet';
local configMapList = k3.core.v1.configMapList;
local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet'; local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
(import 'github.com/brancz/kubernetes-grafana/grafana/grafana.libsonnet') + (import 'github.com/brancz/kubernetes-grafana/grafana/grafana.libsonnet') +
@ -9,6 +6,7 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
(import './node-exporter/node-exporter.libsonnet') + (import './node-exporter/node-exporter.libsonnet') +
(import 'github.com/prometheus/node_exporter/docs/node-mixin/mixin.libsonnet') + (import 'github.com/prometheus/node_exporter/docs/node-mixin/mixin.libsonnet') +
(import './alertmanager/alertmanager.libsonnet') + (import './alertmanager/alertmanager.libsonnet') +
(import 'github.com/prometheus/alertmanager/doc/alertmanager-mixin/mixin.libsonnet') +
(import 'github.com/prometheus-operator/prometheus-operator/jsonnet/prometheus-operator/prometheus-operator.libsonnet') + (import 'github.com/prometheus-operator/prometheus-operator/jsonnet/prometheus-operator/prometheus-operator.libsonnet') +
(import 'github.com/prometheus-operator/prometheus-operator/jsonnet/mixin/mixin.libsonnet') + (import 'github.com/prometheus-operator/prometheus-operator/jsonnet/mixin/mixin.libsonnet') +
(import './prometheus/prometheus.libsonnet') + (import './prometheus/prometheus.libsonnet') +
@ -16,69 +14,83 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
(import 'github.com/kubernetes-monitoring/kubernetes-mixin/mixin.libsonnet') + (import 'github.com/kubernetes-monitoring/kubernetes-mixin/mixin.libsonnet') +
(import 'github.com/prometheus/prometheus/documentation/prometheus-mixin/mixin.libsonnet') + (import 'github.com/prometheus/prometheus/documentation/prometheus-mixin/mixin.libsonnet') +
(import './alerts/alerts.libsonnet') + (import './alerts/alerts.libsonnet') +
(import './rules/rules.libsonnet') + { (import './rules/rules.libsonnet') +
{
kubePrometheus+:: { kubePrometheus+:: {
namespace: k.core.v1.namespace.new($._config.namespace), namespace: {
apiVersion: 'v1',
kind: 'Namespace',
metadata: {
name: $._config.namespace,
},
},
}, },
prometheusOperator+:: { prometheusOperator+::
service+: { {
spec+: { service+: {
ports: [ spec+: {
ports: [
{
name: 'https',
port: 8443,
targetPort: 'https',
},
],
},
},
serviceMonitor+: {
spec+: {
endpoints: [
{
port: 'https',
scheme: 'https',
honorLabels: true,
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
tlsConfig: {
insecureSkipVerify: true,
},
},
],
},
},
clusterRole+: {
rules+: [
{ {
name: 'https', apiGroups: ['authentication.k8s.io'],
port: 8443, resources: ['tokenreviews'],
targetPort: 'https', verbs: ['create'],
},
{
apiGroups: ['authorization.k8s.io'],
resources: ['subjectaccessreviews'],
verbs: ['create'],
}, },
], ],
}, },
}, } +
serviceMonitor+: { (kubeRbacProxyContainer {
spec+: { config+:: {
endpoints: [ kubeRbacProxy: {
{ local cfg = self,
port: 'https', image: $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy,
scheme: 'https', name: 'kube-rbac-proxy',
honorLabels: true, securePortName: 'https',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', securePort: 8443,
tlsConfig: { secureListenAddress: ':%d' % self.securePort,
insecureSkipVerify: true, upstream: 'http://127.0.0.1:8080/',
}, tlsCipherSuites: $._config.tlsCipherSuites,
}, },
] },
}, }).deploymentMixin,
},
clusterRole+: {
rules+: [
{
apiGroups: ['authentication.k8s.io'],
resources: ['tokenreviews'],
verbs: ['create'],
},
{
apiGroups: ['authorization.k8s.io'],
resources: ['subjectaccessreviews'],
verbs: ['create'],
},
],
},
} +
(kubeRbacProxyContainer {
config+:: {
kubeRbacProxy: {
local cfg = self,
image: $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy,
name: 'kube-rbac-proxy',
securePortName: 'https',
securePort: 8443,
secureListenAddress: ':%d' % self.securePort,
upstream: 'http://127.0.0.1:8080/',
tlsCipherSuites: $._config.tlsCipherSuites,
},
},
}).deploymentMixin,
grafana+:: { grafana+:: {
dashboardDefinitions: configMapList.new(super.dashboardDefinitions), local dashboardDefinitions = super.dashboardDefinitions,
dashboardDefinitions: {
apiVersion: 'v1',
kind: 'ConfigMapList',
items: dashboardDefinitions,
},
serviceMonitor: { serviceMonitor: {
apiVersion: 'monitoring.coreos.com/v1', apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor', kind: 'ServiceMonitor',
@ -92,12 +104,10 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
app: 'grafana', app: 'grafana',
}, },
}, },
endpoints: [ endpoints: [{
{ port: 'http',
port: 'http', interval: '15s',
interval: '15s', }],
},
],
}, },
}, },
}, },
@ -105,14 +115,8 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
_config+:: { _config+:: {
namespace: 'default', namespace: 'default',
versions+:: { versions+:: { grafana: '7.3.5', kubeRbacProxy: 'v0.8.0' },
grafana: '7.1.0', imageRepos+:: { kubeRbacProxy: 'quay.io/brancz/kube-rbac-proxy' },
kubeRbacProxy: 'v0.6.0',
},
imageRepos+:: {
kubeRbacProxy: 'quay.io/brancz/kube-rbac-proxy',
},
tlsCipherSuites: [ tlsCipherSuites: [
'TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256', // required by h2: http://golang.org/cl/30721 'TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256', // required by h2: http://golang.org/cl/30721
@ -143,6 +147,8 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
'TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305', 'TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305',
], ],
runbookURLPattern: 'https://github.com/prometheus-operator/kube-prometheus/wiki/%s',
cadvisorSelector: 'job="kubelet", metrics_path="/metrics/cadvisor"', cadvisorSelector: 'job="kubelet", metrics_path="/metrics/cadvisor"',
kubeletSelector: 'job="kubelet", metrics_path="/metrics"', kubeletSelector: 'job="kubelet", metrics_path="/metrics"',
kubeStateMetricsSelector: 'job="kube-state-metrics"', kubeStateMetricsSelector: 'job="kube-state-metrics"',
@ -155,6 +161,8 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
coreDNSSelector: 'job="kube-dns"', coreDNSSelector: 'job="kube-dns"',
podLabel: 'pod', podLabel: 'pod',
alertmanagerName: '{{ $labels.namespace }}/{{ $labels.pod}}',
alertmanagerClusterLabels: 'namespace,service',
alertmanagerSelector: 'job="alertmanager-' + $._config.alertmanager.name + '",namespace="' + $._config.namespace + '"', alertmanagerSelector: 'job="alertmanager-' + $._config.alertmanager.name + '",namespace="' + $._config.namespace + '"',
prometheusSelector: 'job="prometheus-' + $._config.prometheus.name + '",namespace="' + $._config.namespace + '"', prometheusSelector: 'job="prometheus-' + $._config.prometheus.name + '",namespace="' + $._config.namespace + '"',
prometheusName: '{{$labels.namespace}}/{{$labels.pod}}', prometheusName: '{{$labels.namespace}}/{{$labels.pod}}',
@ -191,13 +199,7 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
limits: { cpu: '250m', memory: '180Mi' }, limits: { cpu: '250m', memory: '180Mi' },
}, },
}, },
prometheus+:: { prometheus+:: { rules: $.prometheusRules + $.prometheusAlerts },
rules: $.prometheusRules + $.prometheusAlerts, grafana+:: { dashboards: $.grafanaDashboards },
},
grafana+:: {
dashboards: $.grafanaDashboards,
},
}, },
} }

View File

@ -1,8 +1,3 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local deployment = k.apps.v1.deployment;
local container = deployment.mixin.spec.template.spec.containersType;
local containerPort = container.portsType;
{ {
local krp = self, local krp = self,
config+:: { config+:: {
@ -33,17 +28,24 @@ local containerPort = container.portsType;
spec+: { spec+: {
template+: { template+: {
spec+: { spec+: {
containers+: [ containers+: [{
container.new(krp.config.kubeRbacProxy.name, krp.config.kubeRbacProxy.image) + name: krp.config.kubeRbacProxy.name,
container.mixin.securityContext.withRunAsUser(65534) + image: krp.config.kubeRbacProxy.image,
container.withArgs([ args: [
'--logtostderr', '--logtostderr',
'--secure-listen-address=' + krp.config.kubeRbacProxy.secureListenAddress, '--secure-listen-address=' + krp.config.kubeRbacProxy.secureListenAddress,
'--tls-cipher-suites=' + std.join(',', krp.config.kubeRbacProxy.tlsCipherSuites), '--tls-cipher-suites=' + std.join(',', krp.config.kubeRbacProxy.tlsCipherSuites),
'--upstream=' + krp.config.kubeRbacProxy.upstream, '--upstream=' + krp.config.kubeRbacProxy.upstream,
]) + ],
container.withPorts(containerPort.newNamed(krp.config.kubeRbacProxy.securePort, krp.config.kubeRbacProxy.securePortName)), ports: [
], { name: krp.config.kubeRbacProxy.securePortName, containerPort: krp.config.kubeRbacProxy.securePort },
],
securityContext: {
runAsUser: 65532,
runAsGroup: 65532,
runAsNonRoot: true,
},
}],
}, },
}, },
}, },

View File

@ -15,7 +15,7 @@ local ksm = import 'github.com/kubernetes/kube-state-metrics/jsonnet/kube-state-
}, },
}, },
kubeStateMetrics+:: kubeStateMetrics+::
ksm + { ksm {
local version = self.version, local version = self.version,
name:: 'kube-state-metrics', name:: 'kube-state-metrics',
namespace:: $._config.namespace, namespace:: $._config.namespace,
@ -100,33 +100,33 @@ local ksm = import 'github.com/kubernetes/kube-state-metrics/jsonnet/kube-state-
], ],
}, },
}, },
} + } +
(kubeRbacProxyContainer { (kubeRbacProxyContainer {
config+:: { config+:: {
kubeRbacProxy: { kubeRbacProxy: {
local cfg = self, local cfg = self,
image: $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy, image: $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy,
name: 'kube-rbac-proxy-main', name: 'kube-rbac-proxy-main',
securePortName: 'https-main', securePortName: 'https-main',
securePort: 8443, securePort: 8443,
secureListenAddress: ':%d' % self.securePort, secureListenAddress: ':%d' % self.securePort,
upstream: 'http://127.0.0.1:8081/', upstream: 'http://127.0.0.1:8081/',
tlsCipherSuites: $._config.tlsCipherSuites, tlsCipherSuites: $._config.tlsCipherSuites,
}, },
}, },
}).deploymentMixin + }).deploymentMixin +
(kubeRbacProxyContainer { (kubeRbacProxyContainer {
config+:: { config+:: {
kubeRbacProxy: { kubeRbacProxy: {
local cfg = self, local cfg = self,
image: $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy, image: $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy,
name: 'kube-rbac-proxy-self', name: 'kube-rbac-proxy-self',
securePortName: 'https-self', securePortName: 'https-self',
securePort: 9443, securePort: 9443,
secureListenAddress: ':%d' % self.securePort, secureListenAddress: ':%d' % self.securePort,
upstream: 'http://127.0.0.1:8082/', upstream: 'http://127.0.0.1:8082/',
tlsCipherSuites: $._config.tlsCipherSuites, tlsCipherSuites: $._config.tlsCipherSuites,
}, },
}, },
}).deploymentMixin, }).deploymentMixin,
} }

View File

@ -5,16 +5,16 @@ local imageName(image) =
local parts = std.split(image, '/'); local parts = std.split(image, '/');
local len = std.length(parts); local len = std.length(parts);
if len == 3 then if len == 3 then
# registry.com/org/image // registry.com/org/image
parts[2] parts[2]
else if len == 2 then else if len == 2 then
# org/image // org/image
parts[1] parts[1]
else if len == 1 then else if len == 1 then
# image, ie. busybox // image, ie. busybox
parts[0] parts[0]
else else
error 'unknown image format: ' + image; error 'unknown image format: ' + image;
{ {
imageName:: imageName, imageName:: imageName,

View File

@ -1,16 +1,8 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
{ {
_config+:: { _config+:: {
namespace: 'default', namespace: 'default',
versions+:: { nodeExporter: 'v1.0.1' },
versions+:: { imageRepos+:: { nodeExporter: 'quay.io/prometheus/node-exporter' },
nodeExporter: 'v1.0.1',
},
imageRepos+:: {
nodeExporter: 'quay.io/prometheus/node-exporter',
},
nodeExporter+:: { nodeExporter+:: {
listenAddress: '127.0.0.1', listenAddress: '127.0.0.1',
@ -28,76 +20,49 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
}, },
nodeExporter+:: { nodeExporter+:: {
clusterRoleBinding: clusterRoleBinding: {
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding; apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRoleBinding',
metadata: {
name: 'node-exporter',
},
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'ClusterRole',
name: 'node-exporter',
},
subjects: [{
kind: 'ServiceAccount',
name: 'node-exporter',
namespace: $._config.namespace,
}],
},
clusterRoleBinding.new() + clusterRole: {
clusterRoleBinding.mixin.metadata.withName('node-exporter') + apiVersion: 'rbac.authorization.k8s.io/v1',
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') + kind: 'ClusterRole',
clusterRoleBinding.mixin.roleRef.withName('node-exporter') + metadata: {
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) + name: 'node-exporter',
clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'node-exporter', namespace: $._config.namespace }]), },
rules: [
clusterRole: {
local clusterRole = k.rbac.v1.clusterRole; apiGroups: ['authentication.k8s.io'],
local policyRule = clusterRole.rulesType; resources: ['tokenreviews'],
verbs: ['create'],
local authenticationRole = policyRule.new() + },
policyRule.withApiGroups(['authentication.k8s.io']) + {
policyRule.withResources([ apiGroups: ['authorization.k8s.io'],
'tokenreviews', resources: ['subjectaccessreviews'],
]) + verbs: ['create'],
policyRule.withVerbs(['create']); },
],
local authorizationRole = policyRule.new() + },
policyRule.withApiGroups(['authorization.k8s.io']) +
policyRule.withResources([
'subjectaccessreviews',
]) +
policyRule.withVerbs(['create']);
local rules = [authenticationRole, authorizationRole];
clusterRole.new() +
clusterRole.mixin.metadata.withName('node-exporter') +
clusterRole.withRules(rules),
daemonset: daemonset:
local daemonset = k.apps.v1.daemonSet; local nodeExporter = {
local container = daemonset.mixin.spec.template.spec.containersType; name: 'node-exporter',
local volume = daemonset.mixin.spec.template.spec.volumesType; image: $._config.imageRepos.nodeExporter + ':' + $._config.versions.nodeExporter,
local containerPort = container.portsType; args: [
local containerVolumeMount = container.volumeMountsType;
local podSelector = daemonset.mixin.spec.template.spec.selectorType;
local toleration = daemonset.mixin.spec.template.spec.tolerationsType;
local containerEnv = container.envType;
local podLabels = $._config.nodeExporter.labels;
local selectorLabels = $._config.nodeExporter.selectorLabels;
local existsToleration = toleration.new() +
toleration.withOperator('Exists');
local procVolumeName = 'proc';
local procVolume = volume.fromHostPath(procVolumeName, '/proc');
local procVolumeMount = containerVolumeMount.new(procVolumeName, '/host/proc').
withMountPropagation('HostToContainer').
withReadOnly(true);
local sysVolumeName = 'sys';
local sysVolume = volume.fromHostPath(sysVolumeName, '/sys');
local sysVolumeMount = containerVolumeMount.new(sysVolumeName, '/host/sys').
withMountPropagation('HostToContainer').
withReadOnly(true);
local rootVolumeName = 'root';
local rootVolume = volume.fromHostPath(rootVolumeName, '/');
local rootVolumeMount = containerVolumeMount.new(rootVolumeName, '/host/root').
withMountPropagation('HostToContainer').
withReadOnly(true);
local nodeExporter =
container.new('node-exporter', $._config.imageRepos.nodeExporter + ':' + $._config.versions.nodeExporter) +
container.withArgs([
'--web.listen-address=' + std.join(':', [$._config.nodeExporter.listenAddress, std.toString($._config.nodeExporter.port)]), '--web.listen-address=' + std.join(':', [$._config.nodeExporter.listenAddress, std.toString($._config.nodeExporter.port)]),
'--path.procfs=/host/proc', '--path.procfs=/host/proc',
'--path.sysfs=/host/sys', '--path.sysfs=/host/sys',
@ -105,20 +70,27 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
'--no-collector.wifi', '--no-collector.wifi',
'--no-collector.hwmon', '--no-collector.hwmon',
'--collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)', '--collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)',
]) + ],
container.withVolumeMounts([procVolumeMount, sysVolumeMount, rootVolumeMount]) + volumeMounts: [
container.mixin.resources.withRequests($._config.resources['node-exporter'].requests) + { name: 'proc', mountPath: '/host/proc', mountPropagation: 'HostToContainer', readOnly: true },
container.mixin.resources.withLimits($._config.resources['node-exporter'].limits); { name: 'sys', mountPath: '/host/sys', mountPropagation: 'HostToContainer', readOnly: true },
{ name: 'root', mountPath: '/host/root', mountPropagation: 'HostToContainer', readOnly: true },
],
resources: $._config.resources['node-exporter'],
};
local ip = containerEnv.fromFieldPath('IP', 'status.podIP'); local proxy = {
local proxy = name: 'kube-rbac-proxy',
container.new('kube-rbac-proxy', $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy) + image: $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy,
container.withArgs([ args: [
'--logtostderr', '--logtostderr',
'--secure-listen-address=[$(IP)]:' + $._config.nodeExporter.port, '--secure-listen-address=[$(IP)]:' + $._config.nodeExporter.port,
'--tls-cipher-suites=' + std.join(',', $._config.tlsCipherSuites), '--tls-cipher-suites=' + std.join(',', $._config.tlsCipherSuites),
'--upstream=http://127.0.0.1:' + $._config.nodeExporter.port + '/', '--upstream=http://127.0.0.1:' + $._config.nodeExporter.port + '/',
]) + ],
env: [
{ name: 'IP', valueFrom: { fieldRef: { fieldPath: 'status.podIP' } } },
],
// Keep `hostPort` here, rather than in the node-exporter container // Keep `hostPort` here, rather than in the node-exporter container
// because Kubernetes mandates that if you define a `hostPort` then // because Kubernetes mandates that if you define a `hostPort` then
// `containerPort` must match. In our case, we are splitting the // `containerPort` must match. In our case, we are splitting the
@ -127,82 +99,114 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
// used by the service is tied to the proxy container. We *could* // used by the service is tied to the proxy container. We *could*
// forgo declaring the host port, however it is important to declare // forgo declaring the host port, however it is important to declare
// it so that the scheduler can decide if the pod is schedulable. // it so that the scheduler can decide if the pod is schedulable.
container.withPorts(containerPort.new($._config.nodeExporter.port) + containerPort.withHostPort($._config.nodeExporter.port) + containerPort.withName('https')) + ports: [
container.mixin.resources.withRequests($._config.resources['kube-rbac-proxy'].requests) + { name: 'https', containerPort: $._config.nodeExporter.port, hostPort: $._config.nodeExporter.port },
container.mixin.resources.withLimits($._config.resources['kube-rbac-proxy'].limits) + ],
container.withEnv([ip]); resources: $._config.resources['kube-rbac-proxy'],
securityContext: {
runAsUser: 65532,
runAsGroup: 65532,
runAsNonRoot: true,
},
};
local c = [nodeExporter, proxy];
daemonset.new() +
daemonset.mixin.metadata.withName('node-exporter') +
daemonset.mixin.metadata.withNamespace($._config.namespace) +
daemonset.mixin.metadata.withLabels(podLabels) +
daemonset.mixin.spec.selector.withMatchLabels(selectorLabels) +
daemonset.mixin.spec.updateStrategy.rollingUpdate.withMaxUnavailable('10%') +
daemonset.mixin.spec.template.metadata.withLabels(podLabels) +
daemonset.mixin.spec.template.spec.withTolerations([existsToleration]) +
daemonset.mixin.spec.template.spec.withNodeSelector({ 'kubernetes.io/os': 'linux' }) +
daemonset.mixin.spec.template.spec.withContainers(c) +
daemonset.mixin.spec.template.spec.withVolumes([procVolume, sysVolume, rootVolume]) +
daemonset.mixin.spec.template.spec.securityContext.withRunAsNonRoot(true) +
daemonset.mixin.spec.template.spec.securityContext.withRunAsUser(65534) +
daemonset.mixin.spec.template.spec.withServiceAccountName('node-exporter') +
daemonset.mixin.spec.template.spec.withHostPid(true) +
daemonset.mixin.spec.template.spec.withHostNetwork(true),
serviceAccount:
local serviceAccount = k.core.v1.serviceAccount;
serviceAccount.new('node-exporter') +
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
serviceMonitor:
{ {
apiVersion: 'monitoring.coreos.com/v1', apiVersion: 'apps/v1',
kind: 'ServiceMonitor', kind: 'DaemonSet',
metadata: { metadata: {
name: 'node-exporter', name: 'node-exporter',
namespace: $._config.namespace, namespace: $._config.namespace,
labels: $._config.nodeExporter.labels, labels: $._config.nodeExporter.labels,
}, },
spec: { spec: {
jobLabel: 'app.kubernetes.io/name', selector: { matchLabels: $._config.nodeExporter.selectorLabels },
selector: { updateStrategy: {
matchLabels: $._config.nodeExporter.selectorLabels, type: 'RollingUpdate',
rollingUpdate: { maxUnavailable: '10%' },
}, },
endpoints: [ template: {
{ metadata: { labels: $._config.nodeExporter.labels },
port: 'https', spec: {
scheme: 'https', nodeSelector: { 'kubernetes.io/os': 'linux' },
interval: '15s', tolerations: [{
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', operator: 'Exists',
relabelings: [ }],
{ containers: [nodeExporter, proxy],
action: 'replace', volumes: [
regex: '(.*)', { name: 'proc', hostPath: { path: '/proc' } },
replacement: '$1', { name: 'sys', hostPath: { path: '/sys' } },
sourceLabels: ['__meta_kubernetes_pod_node_name'], { name: 'root', hostPath: { path: '/' } },
targetLabel: 'instance',
},
], ],
tlsConfig: { serviceAccountName: 'node-exporter',
insecureSkipVerify: true, securityContext: {
runAsUser: 65534,
runAsNonRoot: true,
}, },
hostPID: true,
hostNetwork: true,
}, },
], },
}, },
}, },
service: serviceAccount: {
local service = k.core.v1.service; apiVersion: 'v1',
local servicePort = k.core.v1.service.mixin.spec.portsType; kind: 'ServiceAccount',
metadata: {
name: 'node-exporter',
namespace: $._config.namespace,
},
},
local nodeExporterPort = servicePort.newNamed('https', $._config.nodeExporter.port, 'https'); serviceMonitor: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'node-exporter',
namespace: $._config.namespace,
labels: $._config.nodeExporter.labels,
},
spec: {
jobLabel: 'app.kubernetes.io/name',
selector: {
matchLabels: $._config.nodeExporter.selectorLabels,
},
endpoints: [{
port: 'https',
scheme: 'https',
interval: '15s',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
action: 'replace',
regex: '(.*)',
replacement: '$1',
sourceLabels: ['__meta_kubernetes_pod_node_name'],
targetLabel: 'instance',
},
],
tlsConfig: {
insecureSkipVerify: true,
},
}],
},
},
service.new('node-exporter', $._config.nodeExporter.selectorLabels, nodeExporterPort) + service: {
service.mixin.metadata.withNamespace($._config.namespace) + apiVersion: 'v1',
service.mixin.metadata.withLabels($._config.nodeExporter.labels) + kind: 'Service',
service.mixin.spec.withClusterIp('None'), metadata: {
name: 'node-exporter',
namespace: $._config.namespace,
labels: $._config.nodeExporter.labels,
},
spec: {
ports: [
{ name: 'https', targetPort: 'https', port: $._config.nodeExporter.port },
],
selector: $._config.nodeExporter.selectorLabels,
clusterIP: 'None',
},
},
}, },
} }

View File

@ -1,19 +1,13 @@
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
{ {
_config+:: { _config+:: {
namespace: 'default', namespace: 'default',
versions+:: { versions+:: { prometheusAdapter: 'v0.8.2' },
prometheusAdapter: 'v0.7.0', imageRepos+:: { prometheusAdapter: 'directxman12/k8s-prometheus-adapter' },
},
imageRepos+:: {
prometheusAdapter: 'directxman12/k8s-prometheus-adapter',
},
prometheusAdapter+:: { prometheusAdapter+:: {
name: 'prometheus-adapter', name: 'prometheus-adapter',
namespace: $._config.namespace,
labels: { name: $._config.prometheusAdapter.name }, labels: { name: $._config.prometheusAdapter.name },
prometheusURL: 'http://prometheus-' + $._config.prometheus.name + '.' + $._config.namespace + '.svc.cluster.local:9090/', prometheusURL: 'http://prometheus-' + $._config.prometheus.name + '.' + $._config.namespace + '.svc.cluster.local:9090/',
config: { config: {
@ -23,239 +17,261 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
nodeQuery: 'sum(1 - irate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)', nodeQuery: 'sum(1 - irate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)',
resources: { resources: {
overrides: { overrides: {
node: { node: { resource: 'node' },
resource: 'node' namespace: { resource: 'namespace' },
}, pod: { resource: 'pod' },
namespace: {
resource: 'namespace'
},
pod: {
resource: 'pod'
},
}, },
}, },
containerLabel: 'container' containerLabel: 'container',
}, },
memory: { memory: {
containerQuery: 'sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>)', containerQuery: 'sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>)',
nodeQuery: 'sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>)', nodeQuery: 'sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>)',
resources: { resources: {
overrides: { overrides: {
instance: { instance: { resource: 'node' },
resource: 'node' namespace: { resource: 'namespace' },
}, pod: { resource: 'pod' },
namespace: {
resource: 'namespace'
},
pod: {
resource: 'pod'
},
}, },
}, },
containerLabel: 'container' containerLabel: 'container',
}, },
window: '5m', window: '5m',
}, },
} },
}, },
}, },
prometheusAdapter+:: { prometheusAdapter+:: {
apiService: apiService: {
{ apiVersion: 'apiregistration.k8s.io/v1',
apiVersion: 'apiregistration.k8s.io/v1', kind: 'APIService',
kind: 'APIService', metadata: {
metadata: { name: 'v1beta1.metrics.k8s.io',
name: 'v1beta1.metrics.k8s.io',
},
spec: {
service: {
name: $.prometheusAdapter.service.metadata.name,
namespace: $._config.namespace,
},
group: 'metrics.k8s.io',
version: 'v1beta1',
insecureSkipTLSVerify: true,
groupPriorityMinimum: 100,
versionPriority: 100,
},
}, },
spec: {
configMap: service: {
local configmap = k.core.v1.configMap; name: $.prometheusAdapter.service.metadata.name,
configmap.new('adapter-config', { 'config.yaml': std.manifestYamlDoc($._config.prometheusAdapter.config) }) + namespace: $._config.prometheusAdapter.namespace,
configmap.mixin.metadata.withNamespace($._config.namespace),
serviceMonitor:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: $._config.prometheusAdapter.name,
namespace: $._config.namespace,
labels: $._config.prometheusAdapter.labels,
}, },
spec: { group: 'metrics.k8s.io',
selector: { version: 'v1beta1',
matchLabels: $._config.prometheusAdapter.labels, insecureSkipTLSVerify: true,
}, groupPriorityMinimum: 100,
endpoints: [ versionPriority: 100,
{ },
port: 'https', },
interval: '30s',
scheme: 'https', configMap: {
tlsConfig: { apiVersion: 'v1',
insecureSkipVerify: true, kind: 'ConfigMap',
}, metadata: {
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', name: 'adapter-config',
namespace: $._config.prometheusAdapter.namespace,
},
data: { 'config.yaml': std.manifestYamlDoc($._config.prometheusAdapter.config) },
},
serviceMonitor: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: $._config.prometheusAdapter.name,
namespace: $._config.prometheusAdapter.namespace,
labels: $._config.prometheusAdapter.labels,
},
spec: {
selector: {
matchLabels: $._config.prometheusAdapter.labels,
},
endpoints: [
{
port: 'https',
interval: '30s',
scheme: 'https',
tlsConfig: {
insecureSkipVerify: true,
}, },
], bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
}, },
],
}, },
},
service: service: {
local service = k.core.v1.service; apiVersion: 'v1',
local servicePort = k.core.v1.service.mixin.spec.portsType; kind: 'Service',
metadata: {
service.new( name: $._config.prometheusAdapter.name,
$._config.prometheusAdapter.name, namespace: $._config.prometheusAdapter.namespace,
$._config.prometheusAdapter.labels, labels: $._config.prometheusAdapter.labels,
servicePort.newNamed('https', 443, 6443), },
) + spec: {
service.mixin.metadata.withNamespace($._config.namespace) + ports: [
service.mixin.metadata.withLabels($._config.prometheusAdapter.labels), { name: 'https', targetPort: 6443, port: 443 },
],
selector: $._config.prometheusAdapter.labels,
},
},
deployment: deployment:
local deployment = k.apps.v1.deployment; local c = {
local volume = deployment.mixin.spec.template.spec.volumesType; name: $._config.prometheusAdapter.name,
local container = deployment.mixin.spec.template.spec.containersType; image: $._config.imageRepos.prometheusAdapter + ':' + $._config.versions.prometheusAdapter,
local containerVolumeMount = container.volumeMountsType; args: [
local c =
container.new($._config.prometheusAdapter.name, $._config.imageRepos.prometheusAdapter + ':' + $._config.versions.prometheusAdapter) +
container.withArgs([
'--cert-dir=/var/run/serving-cert', '--cert-dir=/var/run/serving-cert',
'--config=/etc/adapter/config.yaml', '--config=/etc/adapter/config.yaml',
'--logtostderr=true', '--logtostderr=true',
'--metrics-relist-interval=1m', '--metrics-relist-interval=1m',
'--prometheus-url=' + $._config.prometheusAdapter.prometheusURL, '--prometheus-url=' + $._config.prometheusAdapter.prometheusURL,
'--secure-port=6443', '--secure-port=6443',
]) + ],
container.withPorts([{ containerPort: 6443 }]) + ports: [{ containerPort: 6443 }],
container.withVolumeMounts([ volumeMounts: [
containerVolumeMount.new('tmpfs', '/tmp'), { name: 'tmpfs', mountPath: '/tmp', readOnly: false },
containerVolumeMount.new('volume-serving-cert', '/var/run/serving-cert'), { name: 'volume-serving-cert', mountPath: '/var/run/serving-cert', readOnly: false },
containerVolumeMount.new('config', '/etc/adapter'), { name: 'config', mountPath: '/etc/adapter', readOnly: false },
],); ],
};
deployment.new($._config.prometheusAdapter.name, 1, c, $._config.prometheusAdapter.labels) + {
deployment.mixin.metadata.withNamespace($._config.namespace) + apiVersion: 'apps/v1',
deployment.mixin.spec.selector.withMatchLabels($._config.prometheusAdapter.labels) + kind: 'Deployment',
deployment.mixin.spec.template.spec.withServiceAccountName($.prometheusAdapter.serviceAccount.metadata.name) + metadata: {
deployment.mixin.spec.template.spec.withNodeSelector({ 'kubernetes.io/os': 'linux' }) + name: $._config.prometheusAdapter.name,
deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(1) + namespace: $._config.prometheusAdapter.namespace,
deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(0) + },
deployment.mixin.spec.template.spec.withVolumes([ spec: {
volume.fromEmptyDir(name='tmpfs'), replicas: 1,
volume.fromEmptyDir(name='volume-serving-cert'), selector: { matchLabels: $._config.prometheusAdapter.labels },
{ name: 'config', configMap: { name: 'adapter-config' } }, strategy: {
]), rollingUpdate: {
maxSurge: 1,
maxUnavailable: 0,
},
},
template: {
metadata: { labels: $._config.prometheusAdapter.labels },
spec: {
containers: [c],
serviceAccountName: $.prometheusAdapter.serviceAccount.metadata.name,
nodeSelector: { 'kubernetes.io/os': 'linux' },
volumes: [
{ name: 'tmpfs', emptyDir: {} },
{ name: 'volume-serving-cert', emptyDir: {} },
{ name: 'config', configMap: { name: 'adapter-config' } },
],
},
},
},
},
serviceAccount: serviceAccount: {
local serviceAccount = k.core.v1.serviceAccount; apiVersion: 'v1',
kind: 'ServiceAccount',
metadata: {
name: $._config.prometheusAdapter.name,
namespace: $._config.prometheusAdapter.namespace,
},
},
serviceAccount.new($._config.prometheusAdapter.name) + clusterRole: {
serviceAccount.mixin.metadata.withNamespace($._config.namespace), apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRole',
metadata: {
name: $._config.prometheusAdapter.name,
},
rules: [{
apiGroups: [''],
resources: ['nodes', 'namespaces', 'pods', 'services'],
verbs: ['get', 'list', 'watch'],
}],
},
clusterRole: clusterRoleBinding: {
local clusterRole = k.rbac.v1.clusterRole; apiVersion: 'rbac.authorization.k8s.io/v1',
local policyRule = clusterRole.rulesType; kind: 'ClusterRoleBinding',
metadata: {
local rules = name: $._config.prometheusAdapter.name,
policyRule.new() + },
policyRule.withApiGroups(['']) + roleRef: {
policyRule.withResources(['nodes', 'namespaces', 'pods', 'services']) + apiGroup: 'rbac.authorization.k8s.io',
policyRule.withVerbs(['get', 'list', 'watch']); kind: 'ClusterRole',
name: $.prometheusAdapter.clusterRole.metadata.name,
clusterRole.new() + },
clusterRole.mixin.metadata.withName($._config.prometheusAdapter.name) + subjects: [{
clusterRole.withRules(rules),
clusterRoleBinding:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withName($._config.prometheusAdapter.name) +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName($.prometheusAdapter.clusterRole.metadata.name) +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{
kind: 'ServiceAccount', kind: 'ServiceAccount',
name: $.prometheusAdapter.serviceAccount.metadata.name, name: $.prometheusAdapter.serviceAccount.metadata.name,
namespace: $._config.namespace, namespace: $._config.prometheusAdapter.namespace,
}]), }],
},
clusterRoleBindingDelegator: clusterRoleBindingDelegator: {
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding; apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRoleBinding',
clusterRoleBinding.new() + metadata: {
clusterRoleBinding.mixin.metadata.withName('resource-metrics:system:auth-delegator') + name: 'resource-metrics:system:auth-delegator',
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') + },
clusterRoleBinding.mixin.roleRef.withName('system:auth-delegator') + roleRef: {
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) + apiGroup: 'rbac.authorization.k8s.io',
clusterRoleBinding.withSubjects([{ kind: 'ClusterRole',
name: 'system:auth-delegator',
},
subjects: [{
kind: 'ServiceAccount', kind: 'ServiceAccount',
name: $.prometheusAdapter.serviceAccount.metadata.name, name: $.prometheusAdapter.serviceAccount.metadata.name,
namespace: $._config.namespace, namespace: $._config.prometheusAdapter.namespace,
}]), }],
},
clusterRoleServerResources: clusterRoleServerResources: {
local clusterRole = k.rbac.v1.clusterRole; apiVersion: 'rbac.authorization.k8s.io/v1',
local policyRule = clusterRole.rulesType; kind: 'ClusterRole',
metadata: {
name: 'resource-metrics-server-resources',
},
rules: [{
apiGroups: ['metrics.k8s.io'],
resources: ['*'],
verbs: ['*'],
}],
},
local rules = clusterRoleAggregatedMetricsReader: {
policyRule.new() + apiVersion: 'rbac.authorization.k8s.io/v1',
policyRule.withApiGroups(['metrics.k8s.io']) + kind: 'ClusterRole',
policyRule.withResources(['*']) + metadata: {
policyRule.withVerbs(['*']); name: 'system:aggregated-metrics-reader',
labels: {
'rbac.authorization.k8s.io/aggregate-to-admin': 'true',
'rbac.authorization.k8s.io/aggregate-to-edit': 'true',
'rbac.authorization.k8s.io/aggregate-to-view': 'true',
},
},
rules: [{
apiGroups: ['metrics.k8s.io'],
resources: ['pods', 'nodes'],
verbs: ['get', 'list', 'watch'],
}],
},
clusterRole.new() + roleBindingAuthReader: {
clusterRole.mixin.metadata.withName('resource-metrics-server-resources') + apiVersion: 'rbac.authorization.k8s.io/v1',
clusterRole.withRules(rules), kind: 'RoleBinding',
metadata: {
clusterRoleAggregatedMetricsReader: name: 'resource-metrics-auth-reader',
local clusterRole = k.rbac.v1.clusterRole; namespace: 'kube-system',
local policyRule = clusterRole.rulesType; },
roleRef: {
local rules = apiGroup: 'rbac.authorization.k8s.io',
policyRule.new() + kind: 'Role',
policyRule.withApiGroups(['metrics.k8s.io']) + name: 'extension-apiserver-authentication-reader',
policyRule.withResources(['pods', 'nodes']) + },
policyRule.withVerbs(['get','list','watch']); subjects: [{
clusterRole.new() +
clusterRole.mixin.metadata.withName('system:aggregated-metrics-reader') +
clusterRole.mixin.metadata.withLabels({
"rbac.authorization.k8s.io/aggregate-to-admin": "true",
"rbac.authorization.k8s.io/aggregate-to-edit": "true",
"rbac.authorization.k8s.io/aggregate-to-view": "true",
}) +
clusterRole.withRules(rules),
roleBindingAuthReader:
local roleBinding = k.rbac.v1.roleBinding;
roleBinding.new() +
roleBinding.mixin.metadata.withName('resource-metrics-auth-reader') +
roleBinding.mixin.metadata.withNamespace('kube-system') +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withName('extension-apiserver-authentication-reader') +
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
roleBinding.withSubjects([{
kind: 'ServiceAccount', kind: 'ServiceAccount',
name: $.prometheusAdapter.serviceAccount.metadata.name, name: $.prometheusAdapter.serviceAccount.metadata.name,
namespace: $._config.namespace, namespace: $._config.prometheusAdapter.namespace,
}]), }],
},
}, },
} }

View File

@ -1,21 +1,12 @@
local k3 = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.3/k.libsonnet'; local relabelings = import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet';
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
{ {
_config+:: { _config+:: {
namespace: 'default', namespace: 'default',
versions+:: { versions+:: { prometheus: 'v2.22.1' },
prometheus: 'v2.20.0', imageRepos+:: { prometheus: 'quay.io/prometheus/prometheus' },
}, alertmanager+:: { name: 'main' },
imageRepos+:: {
prometheus: 'quay.io/prometheus/prometheus',
},
alertmanager+:: {
name: 'main',
},
prometheus+:: { prometheus+:: {
name: 'k8s', name: 'k8s',
@ -35,468 +26,438 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
prometheusRules:: $._config.prometheus.rules, prometheusRules:: $._config.prometheus.rules,
alertmanagerName:: $.alertmanager.service.metadata.name, alertmanagerName:: $.alertmanager.service.metadata.name,
serviceAccount: serviceAccount: {
local serviceAccount = k.core.v1.serviceAccount; apiVersion: 'v1',
kind: 'ServiceAccount',
serviceAccount.new('prometheus-' + p.name) + metadata: {
serviceAccount.mixin.metadata.withNamespace(p.namespace), name: 'prometheus-' + p.name,
service: namespace: p.namespace,
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local prometheusPort = servicePort.newNamed('web', 9090, 'web');
service.new('prometheus-' + p.name, { app: 'prometheus', prometheus: p.name }, prometheusPort) +
service.mixin.spec.withSessionAffinity('ClientIP') +
service.mixin.metadata.withNamespace(p.namespace) +
service.mixin.metadata.withLabels({ prometheus: p.name }),
rules:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'PrometheusRule',
metadata: {
labels: {
prometheus: p.name,
role: 'alert-rules',
},
name: 'prometheus-' + p.name + '-rules',
namespace: p.namespace,
},
spec: {
groups: p.prometheusRules.groups,
},
}, },
},
service: {
apiVersion: 'v1',
kind: 'Service',
metadata: {
name: 'prometheus-' + p.name,
namespace: p.namespace,
labels: { prometheus: p.name },
},
spec: {
ports: [
{ name: 'web', targetPort: 'web', port: 9090 },
],
selector: { app: 'prometheus', prometheus: p.name },
sessionAffinity: 'ClientIP',
},
},
rules: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'PrometheusRule',
metadata: {
labels: {
prometheus: p.name,
role: 'alert-rules',
},
name: 'prometheus-' + p.name + '-rules',
namespace: p.namespace,
},
spec: {
groups: p.prometheusRules.groups,
},
},
roleBindingSpecificNamespaces: roleBindingSpecificNamespaces:
local roleBinding = k.rbac.v1.roleBinding; local newSpecificRoleBinding(namespace) = {
apiVersion: 'rbac.authorization.k8s.io/v1',
local newSpecificRoleBinding(namespace) = kind: 'RoleBinding',
roleBinding.new() +
roleBinding.mixin.metadata.withName('prometheus-' + p.name) +
roleBinding.mixin.metadata.withNamespace(namespace) +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withName('prometheus-' + p.name) +
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + p.name, namespace: p.namespace }]);
local roleBindingList = k3.rbac.v1.roleBindingList;
roleBindingList.new([newSpecificRoleBinding(x) for x in p.roleBindingNamespaces]),
clusterRole:
local clusterRole = k.rbac.v1.clusterRole;
local policyRule = clusterRole.rulesType;
local nodeMetricsRule = policyRule.new() +
policyRule.withApiGroups(['']) +
policyRule.withResources(['nodes/metrics']) +
policyRule.withVerbs(['get']);
local metricsRule = policyRule.new() +
policyRule.withNonResourceUrls('/metrics') +
policyRule.withVerbs(['get']);
local rules = [nodeMetricsRule, metricsRule];
clusterRole.new() +
clusterRole.mixin.metadata.withName('prometheus-' + p.name) +
clusterRole.withRules(rules),
roleConfig:
local role = k.rbac.v1.role;
local policyRule = role.rulesType;
local configmapRule = policyRule.new() +
policyRule.withApiGroups(['']) +
policyRule.withResources([
'configmaps',
]) +
policyRule.withVerbs(['get']);
role.new() +
role.mixin.metadata.withName('prometheus-' + p.name + '-config') +
role.mixin.metadata.withNamespace(p.namespace) +
role.withRules(configmapRule),
roleBindingConfig:
local roleBinding = k.rbac.v1.roleBinding;
roleBinding.new() +
roleBinding.mixin.metadata.withName('prometheus-' + p.name + '-config') +
roleBinding.mixin.metadata.withNamespace(p.namespace) +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withName('prometheus-' + p.name + '-config') +
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + p.name, namespace: p.namespace }]),
clusterRoleBinding:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withName('prometheus-' + p.name) +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName('prometheus-' + p.name) +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + p.name, namespace: p.namespace }]),
roleSpecificNamespaces:
local role = k.rbac.v1.role;
local policyRule = role.rulesType;
local coreRule = policyRule.new() +
policyRule.withApiGroups(['']) +
policyRule.withResources([
'services',
'endpoints',
'pods',
]) +
policyRule.withVerbs(['get', 'list', 'watch']);
local ingressRule = policyRule.new() +
policyRule.withApiGroups(['extensions']) +
policyRule.withResources([
'ingresses',
]) +
policyRule.withVerbs(['get', 'list', 'watch']);
local newSpecificRole(namespace) =
role.new() +
role.mixin.metadata.withName('prometheus-' + p.name) +
role.mixin.metadata.withNamespace(namespace) +
role.withRules([coreRule, ingressRule]);
local roleList = k3.rbac.v1.roleList;
roleList.new([newSpecificRole(x) for x in p.roleBindingNamespaces]),
prometheus:
local statefulSet = k.apps.v1.statefulSet;
local container = statefulSet.mixin.spec.template.spec.containersType;
local resourceRequirements = container.mixin.resourcesType;
local selector = statefulSet.mixin.spec.selectorType;
local resources =
resourceRequirements.new() +
resourceRequirements.withRequests({ memory: '400Mi' });
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'Prometheus',
metadata: { metadata: {
name: p.name, name: 'prometheus-' + p.name,
namespace: p.namespace, namespace: namespace,
labels: {
prometheus: p.name,
},
}, },
spec: { roleRef: {
replicas: p.replicas, apiGroup: 'rbac.authorization.k8s.io',
version: $._config.versions.prometheus, kind: 'Role',
image: $._config.imageRepos.prometheus + ':' + $._config.versions.prometheus, name: 'prometheus-' + p.name,
serviceAccountName: 'prometheus-' + p.name, },
serviceMonitorSelector: {}, subjects: [{
podMonitorSelector: {}, kind: 'ServiceAccount',
probeSelector: {}, name: 'prometheus-' + p.name,
serviceMonitorNamespaceSelector: {}, namespace: p.namespace,
podMonitorNamespaceSelector: {}, }],
probeNamespaceSelector: {}, };
nodeSelector: { 'kubernetes.io/os': 'linux' }, {
ruleSelector: selector.withMatchLabels({ apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'RoleBindingList',
items: [newSpecificRoleBinding(x) for x in p.roleBindingNamespaces],
},
clusterRole: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRole',
metadata: { name: 'prometheus-' + p.name },
rules: [
{
apiGroups: [''],
resources: ['nodes/metrics'],
verbs: ['get'],
},
{
nonResourceURLs: ['/metrics'],
verbs: ['get'],
},
],
},
roleConfig: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'Role',
metadata: {
name: 'prometheus-' + p.name + '-config',
namespace: p.namespace,
},
rules: [{
apiGroups: [''],
resources: ['configmaps'],
verbs: ['get'],
}],
},
roleBindingConfig: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'RoleBinding',
metadata: {
name: 'prometheus-' + p.name + '-config',
namespace: p.namespace,
},
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'Role',
name: 'prometheus-' + p.name + '-config',
},
subjects: [{
kind: 'ServiceAccount',
name: 'prometheus-' + p.name,
namespace: p.namespace,
}],
},
clusterRoleBinding: {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'ClusterRoleBinding',
metadata: { name: 'prometheus-' + p.name },
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'ClusterRole',
name: 'prometheus-' + p.name,
},
subjects: [{
kind: 'ServiceAccount',
name: 'prometheus-' + p.name,
namespace: p.namespace,
}],
},
roleSpecificNamespaces:
local newSpecificRole(namespace) = {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'Role',
metadata: {
name: 'prometheus-' + p.name,
namespace: namespace,
},
rules: [
{
apiGroups: [''],
resources: ['services', 'endpoints', 'pods'],
verbs: ['get', 'list', 'watch'],
},
{
apiGroups: ['extensions'],
resources: ['ingresses'],
verbs: ['get', 'list', 'watch'],
},
],
};
{
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'RoleList',
items: [newSpecificRole(x) for x in p.roleBindingNamespaces],
},
prometheus: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'Prometheus',
metadata: {
name: p.name,
namespace: p.namespace,
labels: { prometheus: p.name },
},
spec: {
replicas: p.replicas,
version: $._config.versions.prometheus,
image: $._config.imageRepos.prometheus + ':' + $._config.versions.prometheus,
serviceAccountName: 'prometheus-' + p.name,
serviceMonitorSelector: {},
podMonitorSelector: {},
probeSelector: {},
serviceMonitorNamespaceSelector: {},
podMonitorNamespaceSelector: {},
probeNamespaceSelector: {},
nodeSelector: { 'kubernetes.io/os': 'linux' },
ruleSelector: {
matchLabels: {
role: 'alert-rules', role: 'alert-rules',
prometheus: p.name, prometheus: p.name,
}), },
resources: resources, },
alerting: { resources: {
alertmanagers: [ requests: { memory: '400Mi' },
},
alerting: {
alertmanagers: [{
namespace: p.namespace,
name: p.alertmanagerName,
port: 'web',
}],
},
securityContext: {
runAsUser: 1000,
runAsNonRoot: true,
fsGroup: 2000,
},
},
},
serviceMonitor: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'prometheus',
namespace: p.namespace,
labels: { 'k8s-app': 'prometheus' },
},
spec: {
selector: {
matchLabels: { prometheus: p.name },
},
endpoints: [{
port: 'web',
interval: '30s',
}],
},
},
serviceMonitorKubeScheduler: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'kube-scheduler',
namespace: p.namespace,
labels: { 'k8s-app': 'kube-scheduler' },
},
spec: {
jobLabel: 'k8s-app',
endpoints: [{
port: 'https-metrics',
interval: '30s',
scheme: 'https',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
tlsConfig: { insecureSkipVerify: true },
}],
selector: {
matchLabels: { 'k8s-app': 'kube-scheduler' },
},
namespaceSelector: {
matchNames: ['kube-system'],
},
},
},
serviceMonitorKubelet: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'kubelet',
namespace: p.namespace,
labels: { 'k8s-app': 'kubelet' },
},
spec: {
jobLabel: 'k8s-app',
endpoints: [
{
port: 'https-metrics',
scheme: 'https',
interval: '30s',
honorLabels: true,
tlsConfig: { insecureSkipVerify: true },
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
metricRelabelings: relabelings,
relabelings: [{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path',
}],
},
{
port: 'https-metrics',
scheme: 'https',
path: '/metrics/cadvisor',
interval: '30s',
honorLabels: true,
honorTimestamps: false,
tlsConfig: {
insecureSkipVerify: true,
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path',
}],
metricRelabelings: [
// Drop a bunch of metrics which are disabled but still sent, see
// https://github.com/google/cadvisor/issues/1925.
{ {
namespace: p.namespace, sourceLabels: ['__name__'],
name: p.alertmanagerName, regex: 'container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)',
port: 'web', action: 'drop',
}, },
], ],
}, },
securityContext: { {
runAsUser: 1000, port: 'https-metrics',
runAsNonRoot: true, scheme: 'https',
fsGroup: 2000, path: '/metrics/probes',
interval: '30s',
honorLabels: true,
tlsConfig: { insecureSkipVerify: true },
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path',
}],
}, },
],
selector: {
matchLabels: { 'k8s-app': 'kubelet' },
},
namespaceSelector: {
matchNames: ['kube-system'],
}, },
}, },
serviceMonitor: },
{
apiVersion: 'monitoring.coreos.com/v1', serviceMonitorKubeControllerManager: {
kind: 'ServiceMonitor', apiVersion: 'monitoring.coreos.com/v1',
metadata: { kind: 'ServiceMonitor',
name: 'prometheus', metadata: {
namespace: p.namespace, name: 'kube-controller-manager',
labels: { namespace: p.namespace,
'k8s-app': 'prometheus', labels: { 'k8s-app': 'kube-controller-manager' },
},
spec: {
jobLabel: 'k8s-app',
endpoints: [{
port: 'https-metrics',
interval: '30s',
scheme: 'https',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
tlsConfig: {
insecureSkipVerify: true,
}, },
}, metricRelabelings: relabelings + [
spec: {
selector: {
matchLabels: {
prometheus: p.name,
},
},
endpoints: [
{ {
port: 'web', sourceLabels: ['__name__'],
interval: '30s', regex: 'etcd_(debugging|disk|request|server).*',
action: 'drop',
}, },
], ],
}],
selector: {
matchLabels: { 'k8s-app': 'kube-controller-manager' },
},
namespaceSelector: {
matchNames: ['kube-system'],
}, },
}, },
serviceMonitorKubeScheduler: },
{
apiVersion: 'monitoring.coreos.com/v1', serviceMonitorApiserver: {
kind: 'ServiceMonitor', apiVersion: 'monitoring.coreos.com/v1',
metadata: { kind: 'ServiceMonitor',
name: 'kube-scheduler', metadata: {
namespace: p.namespace, name: 'kube-apiserver',
labels: { namespace: p.namespace,
'k8s-app': 'kube-scheduler', labels: { 'k8s-app': 'apiserver' },
},
spec: {
jobLabel: 'component',
selector: {
matchLabels: {
component: 'apiserver',
provider: 'kubernetes',
}, },
}, },
spec: { namespaceSelector: {
jobLabel: 'k8s-app', matchNames: ['default'],
endpoints: [ },
endpoints: [{
port: 'https',
interval: '30s',
scheme: 'https',
tlsConfig: {
caFile: '/var/run/secrets/kubernetes.io/serviceaccount/ca.crt',
serverName: 'kubernetes',
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
metricRelabelings: relabelings + [
{ {
port: 'https-metrics', sourceLabels: ['__name__'],
interval: '30s', regex: 'etcd_(debugging|disk|server).*',
scheme: "https", action: 'drop',
bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token", },
tlsConfig: { {
insecureSkipVerify: true sourceLabels: ['__name__'],
} regex: 'apiserver_admission_controller_admission_latencies_seconds_.*',
action: 'drop',
},
{
sourceLabels: ['__name__'],
regex: 'apiserver_admission_step_admission_latencies_seconds_.*',
action: 'drop',
},
{
sourceLabels: ['__name__', 'le'],
regex: 'apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)',
action: 'drop',
}, },
], ],
selector: { }],
matchLabels: {
'k8s-app': 'kube-scheduler',
},
},
namespaceSelector: {
matchNames: [
'kube-system',
],
},
},
}, },
serviceMonitorKubelet: },
{
apiVersion: 'monitoring.coreos.com/v1', serviceMonitorCoreDNS: {
kind: 'ServiceMonitor', apiVersion: 'monitoring.coreos.com/v1',
metadata: { kind: 'ServiceMonitor',
name: 'kubelet', metadata: {
namespace: p.namespace, name: 'coredns',
labels: { namespace: p.namespace,
'k8s-app': 'kubelet', labels: { 'k8s-app': 'coredns' },
},
},
spec: {
jobLabel: 'k8s-app',
endpoints: [
{
port: 'https-metrics',
scheme: 'https',
interval: '30s',
honorLabels: true,
tlsConfig: {
insecureSkipVerify: true,
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
metricRelabelings: (import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet'),
relabelings: [
{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path',
},
],
},
{
port: 'https-metrics',
scheme: 'https',
path: '/metrics/cadvisor',
interval: '30s',
honorLabels: true,
honorTimestamps: false,
tlsConfig: {
insecureSkipVerify: true,
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path',
},
],
metricRelabelings: [
// Drop a bunch of metrics which are disabled but still sent, see
// https://github.com/google/cadvisor/issues/1925.
{
sourceLabels: ['__name__'],
regex: 'container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)',
action: 'drop',
},
],
},
{
port: 'https-metrics',
scheme: 'https',
path: '/metrics/probes',
interval: '30s',
honorLabels: true,
tlsConfig: {
insecureSkipVerify: true,
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path',
},
],
},
],
selector: {
matchLabels: {
'k8s-app': 'kubelet',
},
},
namespaceSelector: {
matchNames: [
'kube-system',
],
},
},
}, },
serviceMonitorKubeControllerManager: spec: {
{ jobLabel: 'k8s-app',
apiVersion: 'monitoring.coreos.com/v1', selector: {
kind: 'ServiceMonitor', matchLabels: { 'k8s-app': 'kube-dns' },
metadata: {
name: 'kube-controller-manager',
namespace: p.namespace,
labels: {
'k8s-app': 'kube-controller-manager',
},
}, },
spec: { namespaceSelector: {
jobLabel: 'k8s-app', matchNames: ['kube-system'],
endpoints: [
{
port: 'https-metrics',
interval: '30s',
scheme: "https",
bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token",
tlsConfig: {
insecureSkipVerify: true
},
metricRelabelings: (import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet') + [
{
sourceLabels: ['__name__'],
regex: 'etcd_(debugging|disk|request|server).*',
action: 'drop',
},
],
},
],
selector: {
matchLabels: {
'k8s-app': 'kube-controller-manager',
},
},
namespaceSelector: {
matchNames: [
'kube-system',
],
},
},
},
serviceMonitorApiserver:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'kube-apiserver',
namespace: p.namespace,
labels: {
'k8s-app': 'apiserver',
},
},
spec: {
jobLabel: 'component',
selector: {
matchLabels: {
component: 'apiserver',
provider: 'kubernetes',
},
},
namespaceSelector: {
matchNames: [
'default',
],
},
endpoints: [
{
port: 'https',
interval: '30s',
scheme: 'https',
tlsConfig: {
caFile: '/var/run/secrets/kubernetes.io/serviceaccount/ca.crt',
serverName: 'kubernetes',
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
metricRelabelings: (import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet') + [
{
sourceLabels: ['__name__'],
regex: 'etcd_(debugging|disk|server).*',
action: 'drop',
},
{
sourceLabels: ['__name__'],
regex: 'apiserver_admission_controller_admission_latencies_seconds_.*',
action: 'drop',
},
{
sourceLabels: ['__name__'],
regex: 'apiserver_admission_step_admission_latencies_seconds_.*',
action: 'drop',
},
{
sourceLabels: ['__name__', 'le'],
regex: 'apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)',
action: 'drop',
},
],
},
],
},
},
serviceMonitorCoreDNS:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'coredns',
namespace: p.namespace,
labels: {
'k8s-app': 'coredns',
},
},
spec: {
jobLabel: 'k8s-app',
selector: {
matchLabels: {
'k8s-app': 'kube-dns',
},
},
namespaceSelector: {
matchNames: [
'kube-system',
],
},
endpoints: [
{
port: 'metrics',
interval: '15s',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
},
],
}, },
endpoints: [{
port: 'metrics',
interval: '15s',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
}],
}, },
},
}, },
} }

View File

@ -5,7 +5,7 @@
name: 'kube-prometheus-node-recording.rules', name: 'kube-prometheus-node-recording.rules',
rules: [ rules: [
{ {
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance)', expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) BY (instance)',
record: 'instance:node_cpu:rate:sum', record: 'instance:node_cpu:rate:sum',
}, },
{ {
@ -17,11 +17,11 @@
record: 'instance:node_network_transmit_bytes:rate:sum', record: 'instance:node_network_transmit_bytes:rate:sum',
}, },
{ {
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)', expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)',
record: 'instance:node_cpu:ratio', record: 'instance:node_cpu:ratio',
}, },
{ {
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m]))', expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))',
record: 'cluster:node_cpu:sum_rate5m', record: 'cluster:node_cpu:sum_rate5m',
}, },
{ {

View File

@ -88,6 +88,20 @@
}, },
'for': '5m', 'for': '5m',
}, },
{
alert: 'PrometheusOperatorRejectedResources',
expr: |||
min_over_time(prometheus_operator_managed_resources{state="rejected",%(prometheusOperatorSelector)s}[5m]) > 0
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'Prometheus operator in {{ $labels.namespace }} namespace rejected {{ printf "%0.0f" $value }} {{ $labels.controller }}/{{ $labels.resource }} resources.',
summary: 'Resources rejected by Prometheus operator',
},
'for': '5m',
},
], ],
}, },
], ],

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -15,14 +15,12 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
}, },
versions+:: { versions+:: {
prometheusOperator: 'v0.42.1', prometheusOperator: 'v0.44.1',
prometheusConfigReloader: self.prometheusOperator, prometheusConfigReloader: self.prometheusOperator,
configmapReloader: 'v0.4.0',
}, },
imageRepos+:: { imageRepos+:: {
prometheusOperator: 'quay.io/prometheus-operator/prometheus-operator', prometheusOperator: 'quay.io/prometheus-operator/prometheus-operator',
configmapReloader: 'jimmidyson/configmap-reload',
prometheusConfigReloader: 'quay.io/prometheus-operator/prometheus-config-reloader', prometheusConfigReloader: 'quay.io/prometheus-operator/prometheus-config-reloader',
}, },
}, },
@ -36,13 +34,12 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
image:: $._config.imageRepos.prometheusOperator, image:: $._config.imageRepos.prometheusOperator,
version:: $._config.versions.prometheusOperator, version:: $._config.versions.prometheusOperator,
configReloaderImage:: $._config.imageRepos.configmapReloader,
configReloaderVersion:: $._config.versions.configmapReloader,
prometheusConfigReloaderImage:: $._config.imageRepos.prometheusConfigReloader, prometheusConfigReloaderImage:: $._config.imageRepos.prometheusConfigReloader,
prometheusConfigReloaderVersion:: $._config.versions.prometheusConfigReloader, prometheusConfigReloaderVersion:: $._config.versions.prometheusConfigReloader,
// Prefixing with 0 to ensure these manifests are listed and therefore created first. // Prefixing with 0 to ensure these manifests are listed and therefore created first.
'0alertmanagerCustomResourceDefinition': import 'alertmanager-crd.libsonnet', '0alertmanagerCustomResourceDefinition': import 'alertmanager-crd.libsonnet',
'0alertmanagerConfigCustomResourceDefinition': import 'alertmanagerconfig-crd.libsonnet',
'0prometheusCustomResourceDefinition': import 'prometheus-crd.libsonnet', '0prometheusCustomResourceDefinition': import 'prometheus-crd.libsonnet',
'0servicemonitorCustomResourceDefinition': import 'servicemonitor-crd.libsonnet', '0servicemonitorCustomResourceDefinition': import 'servicemonitor-crd.libsonnet',
'0podmonitorCustomResourceDefinition': import 'podmonitor-crd.libsonnet', '0podmonitorCustomResourceDefinition': import 'podmonitor-crd.libsonnet',
@ -70,6 +67,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
policyRule.withResources([ policyRule.withResources([
'alertmanagers', 'alertmanagers',
'alertmanagers/finalizers', 'alertmanagers/finalizers',
'alertmanagerconfigs',
'prometheuses', 'prometheuses',
'prometheuses/finalizers', 'prometheuses/finalizers',
'thanosrulers', 'thanosrulers',
@ -126,7 +124,15 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
]) + ]) +
policyRule.withVerbs(['get', 'list', 'watch']); policyRule.withVerbs(['get', 'list', 'watch']);
local rules = [monitoringRule, appsRule, coreRule, podRule, routingRule, nodeRule, namespaceRule]; local ingressRule = policyRule.new() +
policyRule.withApiGroups(['networking.k8s.io']) +
policyRule.withResources([
'ingresses',
]) +
policyRule.withVerbs(['get', 'list', 'watch']);
local rules = [monitoringRule, appsRule, coreRule, podRule, routingRule, nodeRule, namespaceRule, ingressRule];
clusterRole.new() + clusterRole.new() +
clusterRole.mixin.metadata.withLabels(po.commonLabels) + clusterRole.mixin.metadata.withLabels(po.commonLabels) +
@ -145,10 +151,6 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
container.withPorts(containerPort.newNamed(targetPort, 'http')) + container.withPorts(containerPort.newNamed(targetPort, 'http')) +
container.withArgs([ container.withArgs([
'--kubelet-service=kube-system/kubelet', '--kubelet-service=kube-system/kubelet',
// Prometheus Operator is run with a read-only root file system. By
// default glog saves logfiles to /tmp. Make it log to stderr instead.
'--logtostderr=true',
'--config-reloader-image=' + po.configReloaderImage + ':' + po.configReloaderVersion,
'--prometheus-config-reloader=' + po.prometheusConfigReloaderImage + ':' + po.prometheusConfigReloaderVersion, '--prometheus-config-reloader=' + po.prometheusConfigReloaderImage + ':' + po.prometheusConfigReloaderVersion,
]) + ]) +
container.mixin.securityContext.withAllowPrivilegeEscalation(false) + container.mixin.securityContext.withAllowPrivilegeEscalation(false) +

View File

@ -1 +1 @@
{"apiVersion":"apiextensions.k8s.io/v1","kind":"CustomResourceDefinition","metadata":{"annotations":{"controller-gen.kubebuilder.io/version":"v0.2.4"},"creationTimestamp":null,"name":"prometheusrules.monitoring.coreos.com"},"spec":{"group":"monitoring.coreos.com","names":{"kind":"PrometheusRule","listKind":"PrometheusRuleList","plural":"prometheusrules","singular":"prometheusrule"},"scope":"Namespaced","versions":[{"name":"v1","schema":{"openAPIV3Schema":{"description":"PrometheusRule defines alerting rules for a Prometheus instance","properties":{"apiVersion":{"description":"APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources","type":"string"},"kind":{"description":"Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds","type":"string"},"metadata":{"type":"object"},"spec":{"description":"Specification of desired alerting rule definitions for Prometheus.","properties":{"groups":{"description":"Content of Prometheus rule file","items":{"description":"RuleGroup is a list of sequentially evaluated recording and alerting rules. Note: PartialResponseStrategy is only used by ThanosRuler and will be ignored by Prometheus instances. Valid values for this field are 'warn' or 'abort'. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response","properties":{"interval":{"type":"string"},"name":{"type":"string"},"partial_response_strategy":{"type":"string"},"rules":{"items":{"description":"Rule describes an alerting or recording rule.","properties":{"alert":{"type":"string"},"annotations":{"additionalProperties":{"type":"string"},"type":"object"},"expr":{"anyOf":[{"type":"integer"},{"type":"string"}],"x-kubernetes-int-or-string":true},"for":{"type":"string"},"labels":{"additionalProperties":{"type":"string"},"type":"object"},"record":{"type":"string"}},"required":["expr"],"type":"object"},"type":"array"}},"required":["name","rules"],"type":"object"},"type":"array"}},"type":"object"}},"required":["spec"],"type":"object"}},"served":true,"storage":true}]},"status":{"acceptedNames":{"kind":"","plural":""},"conditions":[],"storedVersions":[]}} {"apiVersion":"apiextensions.k8s.io/v1","kind":"CustomResourceDefinition","metadata":{"annotations":{"controller-gen.kubebuilder.io/version":"v0.4.1"},"creationTimestamp":null,"name":"prometheusrules.monitoring.coreos.com"},"spec":{"group":"monitoring.coreos.com","names":{"kind":"PrometheusRule","listKind":"PrometheusRuleList","plural":"prometheusrules","singular":"prometheusrule"},"scope":"Namespaced","versions":[{"name":"v1","schema":{"openAPIV3Schema":{"description":"PrometheusRule defines recording and alerting rules for a Prometheus instance","properties":{"apiVersion":{"description":"APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources","type":"string"},"kind":{"description":"Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds","type":"string"},"metadata":{"type":"object"},"spec":{"description":"Specification of desired alerting rule definitions for Prometheus.","properties":{"groups":{"description":"Content of Prometheus rule file","items":{"description":"RuleGroup is a list of sequentially evaluated recording and alerting rules. Note: PartialResponseStrategy is only used by ThanosRuler and will be ignored by Prometheus instances. Valid values for this field are 'warn' or 'abort'. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response","properties":{"interval":{"type":"string"},"name":{"type":"string"},"partial_response_strategy":{"type":"string"},"rules":{"items":{"description":"Rule describes an alerting or recording rule.","properties":{"alert":{"type":"string"},"annotations":{"additionalProperties":{"type":"string"},"type":"object"},"expr":{"anyOf":[{"type":"integer"},{"type":"string"}],"x-kubernetes-int-or-string":true},"for":{"type":"string"},"labels":{"additionalProperties":{"type":"string"},"type":"object"},"record":{"type":"string"}},"required":["expr"],"type":"object"},"type":"array"}},"required":["name","rules"],"type":"object"},"type":"array"}},"type":"object"}},"required":["spec"],"type":"object"}},"served":true,"storage":true}]},"status":{"acceptedNames":{"kind":"","plural":""},"conditions":[],"storedVersions":[]}}

Some files were not shown because too many files have changed in this diff Show More