update monitoring stack
continuous-integration/drone/push Build is passing
Details
continuous-integration/drone/push Build is passing
Details
This commit is contained in:
parent
5ffa599b0c
commit
1464357954
|
@ -8,8 +8,8 @@
|
|||
"subdir": "grafana"
|
||||
}
|
||||
},
|
||||
"version": "d7c1a53462ecd533593c60e5277b92fbf7ea7623",
|
||||
"sum": "8OnIwMhzWtgoWYHNrDlkzUAMr/CPsWKauYEv0vnH1zs="
|
||||
"version": "8024f4fdaeb3a3a7d72f77e2ed87deb92c79aeda",
|
||||
"sum": "WXrJQtWuU5lJVc4jXkJGddPMpPP0+4eMcIB5cauZGgM="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
|
@ -18,8 +18,8 @@
|
|||
"subdir": "Documentation/etcd-mixin"
|
||||
}
|
||||
},
|
||||
"version": "e42127658c910d91e7902be958f12d41ac33d54f",
|
||||
"sum": "L+PGlPK9mykGCJ9TIoEWdhMBjz+9lKuQ4YZ8fOeP9sk="
|
||||
"version": "ca866c02422ff3f3d1f0876898a30c33dd7bcccf",
|
||||
"sum": "bLqTqEr0jky9zz5MV/7ucn6H5mph2NlXas0TVnGNB1Y="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
|
@ -28,8 +28,8 @@
|
|||
"subdir": "grafonnet"
|
||||
}
|
||||
},
|
||||
"version": "8d382c732dbdc839ff07549a3f42d25828f1b268",
|
||||
"sum": "DRSRw4luAXlBXblo19/T1Jrv+9hyV8ivlS0KEtNANec="
|
||||
"version": "356bd73e4792ffe107725776ca8946895969c191",
|
||||
"sum": "CSMZ3dJrpJpwvffie8BqcfrIVVwiKNqdPEN+1XWRBGU="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
|
@ -38,8 +38,8 @@
|
|||
"subdir": "grafana-builder"
|
||||
}
|
||||
},
|
||||
"version": "b5e45051995755ea373ea67642f8e5f54fcb8dd7",
|
||||
"sum": "mD0zEP9FVFXeag7EaeS5OvUr2A9D6DQhGemoNn6+PLc="
|
||||
"version": "216bc806bb512f218e3cf5ed3d4f5699b07f04d6",
|
||||
"sum": "9/eJqljTTtJeq9QRjabdKWL6yD8a7VzLmGKBK3ir77k="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
|
@ -59,8 +59,8 @@
|
|||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "aa2adbcf39884fd9c85d7c3e0ff338b1d61ea1ba",
|
||||
"sum": "ttkPUnv/5bqlOFcZ8fvp2wi/S7ZLKiqAZ4ZdTolX77M="
|
||||
"version": "ead45674dba3c8712e422d99223453177aac6bf4",
|
||||
"sum": "3i0NkntlBluDS1NRF+iSc2e727Alkv3ziuVjAP12/kE="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
|
@ -69,7 +69,7 @@
|
|||
"subdir": "lib/promgrafonnet"
|
||||
}
|
||||
},
|
||||
"version": "aa2adbcf39884fd9c85d7c3e0ff338b1d61ea1ba",
|
||||
"version": "ead45674dba3c8712e422d99223453177aac6bf4",
|
||||
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
|
||||
},
|
||||
{
|
||||
|
@ -79,8 +79,8 @@
|
|||
"subdir": "jsonnet/kube-state-metrics"
|
||||
}
|
||||
},
|
||||
"version": "35ef70bb74520a78cc8dc7cf364e1ff4e0c45063",
|
||||
"sum": "ySP+bI2ZMLPt/sguSh9WrwI5H5dasaNFRE8Uo9PcZrI="
|
||||
"version": "89aaf6c524ee891140c4c8f2a05b1b16f5847309",
|
||||
"sum": "zD/pbQLnQq+5hegEelaheHS8mn1h09GTktFO74iwlBI="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
|
@ -89,7 +89,7 @@
|
|||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
||||
}
|
||||
},
|
||||
"version": "35ef70bb74520a78cc8dc7cf364e1ff4e0c45063",
|
||||
"version": "7bdd62593c9273b5179cf3c9d2d819e9d997aaa4",
|
||||
"sum": "Yf8mNAHrV1YWzrdV8Ry5dJ8YblepTGw3C0Zp10XIYLo="
|
||||
},
|
||||
{
|
||||
|
@ -99,8 +99,8 @@
|
|||
"subdir": "jsonnet/kube-prometheus"
|
||||
}
|
||||
},
|
||||
"version": "980e95de011319b88a3b9c0787a81dcdf338a898",
|
||||
"sum": "BxOXyWCSc9KkgWJXDau2Xtsy3aOYZDHz2VqOSLga7VU="
|
||||
"version": "7d7d40b4dee70ecd3328dcdee2ed0cc8f806df93",
|
||||
"sum": "6PhhQPWilq4skfe+z/hXKEg1pRqHnwvMR1Au6W136U0="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
|
@ -109,8 +109,8 @@
|
|||
"subdir": "jsonnet/mixin"
|
||||
}
|
||||
},
|
||||
"version": "55baf034c431ed2c78d950b187f7d8b34dd06860",
|
||||
"sum": "+Q45oBC7O8g7KQOaiKhGglwndAMWRlLTR94KUI8Q1Ko="
|
||||
"version": "117c9a2cd905479022a66ddd92a41f599cccf10d",
|
||||
"sum": "6reUygVmQrLEWQzTKcH8ceDbvM+2ztK3z2VBR2K2l+U="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
|
@ -119,8 +119,19 @@
|
|||
"subdir": "jsonnet/prometheus-operator"
|
||||
}
|
||||
},
|
||||
"version": "cd331ce9bb58bb926e391c6ae807621cb12cc29e",
|
||||
"sum": "nM1eDP5vftqAeQSmVYzSBAh+lG0SN6zu46QiocQiVhk="
|
||||
"version": "d8b7d3766225908d0239fd0d78258892cd0fc384",
|
||||
"sum": "Nl+N/h76bzD9tZ8tx7tuNIKHwCIJ9zyOsAWplH8HvAE="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/prometheus/alertmanager",
|
||||
"subdir": "doc/alertmanager-mixin"
|
||||
}
|
||||
},
|
||||
"version": "193ebba04d1e70d971047e983a0b489112610460",
|
||||
"sum": "QcftU7gjCQyj7B6M4YJeCAeaPd0kwxd4J4rolo7AnLE=",
|
||||
"name": "alertmanager"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
|
@ -129,8 +140,8 @@
|
|||
"subdir": "docs/node-mixin"
|
||||
}
|
||||
},
|
||||
"version": "f81747e608ea85ae44e76454eb63f9cb6484fb9e",
|
||||
"sum": "VyMzZPxQIjiKQYGjZjXeKNWfLJ9vOl3emp84PWfsrUc="
|
||||
"version": "8b466360a35581e0301bd22918be7011cf4203c3",
|
||||
"sum": "rvyiD/yCB4BeYAWqYF53bP8c+aCUt2ipLHW2Ea8ELO8="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
|
@ -139,10 +150,20 @@
|
|||
"subdir": "documentation/prometheus-mixin"
|
||||
}
|
||||
},
|
||||
"version": "983ebb4a513302315a8117932ab832815f85e3d2",
|
||||
"sum": "TBq4SL7YsPInARbJqwz25JaBvvAegcnRCsuz3K9niWc=",
|
||||
"version": "26d89b4b0776fe4cd5a3656dfa520f119a375273",
|
||||
"sum": "1VRVMuxAEZ9vdGHFlndmG9iQzDD6AoIXrX80CDpGDaU=",
|
||||
"name": "prometheus"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/thanos-io/thanos",
|
||||
"subdir": "mixin"
|
||||
}
|
||||
},
|
||||
"version": "37e6ef61566c7c70793ba6d128f00c4c66cb2402",
|
||||
"sum": "OptiWUMOHFrRGTZhSfxV1RCeXZ90qsefGNTD4lDYVG0="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
apiVersion: v1
|
||||
data: {}
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: alertmanager-main
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
apiVersion: bitnami.com/v1alpha1
|
||||
kind: SealedSecret
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
name: alertmanager-tbrnt-config
|
||||
namespace: monitoring
|
||||
spec:
|
||||
encryptedData:
|
||||
alertmanager.yaml: AgDHh1Qgrdffl6IFXJuk3ZzNHGARWZLDzbMLKp5Wo/ZYqclCji4T5wx7Fql6FALCvvUBvThxLfiwM2LQHRcWfWnf5AwxipCxpos9oVvlu4aON0WZd/Kjz/ZwDq5pgR/etCrSA2DYNxYq4vnTxUAk29eY5F4dWwRPcCgVZ5/KsTIcNx4x+4einqQbwAhkUtAwQl5fEPmpwNRquJZM29XIEUxZLWne0YmCmEgNGleUU20ByfYMwgtDJGjgr6XNPtTmByAHVrxNuQwAMxgT6GcfDLCNsByaS6CY3wmSTg1GUv/CG8Xx29FwDWyf1Ly2KbmcAAafN5QJGvCCTEt/WB85GtzQisrWFZTykv3Zjuz101p9ShXQZALylaX5h22hHFXuQyiIQZEeM2ixiYQjcPhiPjx1/hkbQ25QRD73/gjalZO8bprDrJxkLlw+hrgJ0LzxWL881U6INLKow+8/GmLleFhMUXRsGqacLreCIAr4uVGEMGMVLhHJKnj597HRnn0UCxVNkDk8QjHyiVgJBrQ3Pz9SFdF7mxvJ9F4rEgGkE4dvfvWxrZFumTLEkVRF9To+rKxsIVkewvoHtN/gMzFMzumP+fz/oB9yAHsxkwVyfqXBg52hNSYIx5Z/67yy3hDRKPBcZgknf9S+F37ET5BABFxazwG8NJjf4td+UsAGuAMzKI/94u7TxuXLPCs/tIGKD7kJnPxAqpalepzABtVCmOrtWwNPb1h4XeuraUS9beJ2zV9oV5nVFJmX94EJ7qpZt0Um7+GGeavQ5SV3XHRolDS5PpZPTAWnc/1rtZ0nsKk8lllEr3aDWveMXma06NKkIXz8+iAonvHsDZuw0W6jUdUUtraIbSua9YkyugqCBGeeXIPLwFxqJTqIX5vedZVMveFiaxtCJjL48SUGxtyugfiYbPa3xpHWWe22BcJyTmAOG9aIq4Tp4nvftLyvWe7c9PotJk/7gdv1IO4RLx//eLtKWw0uADa0ara4hDuI8Yktlti24TlA9XYz00d5WtE+lJsSZN8547BUfFzXSOZSSbfrFLZmEmBTgkbj4szX19bXSctJN3BtOmRfCEPXYQN10HgnhpwqYHbXKUSTZNWLojnFL1/E56wUXGxRg9NGOwSXzTyfoLGxI9NEQpGc0Rj2Wna+JSUhlAUnfYW1eH8yyg5FfkyhQdyZJFvYfF0rk+XG5XNhLumST19uxrAkMWhk+Z9/eWwOaZQMmDcoi2Rs0za+1GGjPW5k56Ip+spwW5cvYmdl1PgkZ4g1mupjiB0FdgZHGR+kGn1lbPtSUd+amh9PXSDWkqfnix62H7374rQ3ZyG7fs9sQNnnRrd/cDCMxAl5Upk8D9dfxRmvuxRd8b89h7EQwUBML7TIriA2Pci5Ftux2R5wyIXjznLC5/kFZg6/Av3uKmKK6dLR2Ooey7/3g14CEjMumdijjySl8Pd2UUxSKVKD7vkq+3xYm0CJZqVvT/iBOccrv0UEiTHBsXrfaugUvqIKTAGYhJy0fUBXKisPdA0HdzrUmx57Du36TGyuEzGtVuDarcWzQYPqKJxOIuofJ+AGTDY53OjdUJ8pwJD6HDz55tu85gaV6ZOvSYqjqeX2FUe7lPhsGUIh/FemfichpypHyFpPYhkwAIO1AinKvsqjUuDXE6n5b7NMbI1gl87fPqT5wUSKXZqwViyFqUA5DFqPTEqvHIGU5Wz0GajEaQ==
|
||||
template:
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
name: alertmanager-tbrnt-config
|
||||
namespace: monitoring
|
||||
type: Opaque
|
||||
status: {}
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -21,7 +21,7 @@ spec:
|
|||
- env:
|
||||
- name: GF_INSTALL_PLUGINS
|
||||
value: grafana-piechart-panel
|
||||
image: grafana/grafana:7.1.0
|
||||
image: grafana/grafana:7.3.5
|
||||
name: grafana
|
||||
ports:
|
||||
- containerPort: 3000
|
||||
|
|
|
@ -12,3 +12,4 @@ spec:
|
|||
targetPort: http
|
||||
selector:
|
||||
app: grafana
|
||||
type: NodePort
|
||||
|
|
|
@ -3,7 +3,7 @@ kind: ClusterRole
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.7
|
||||
app.kubernetes.io/version: v1.9.7
|
||||
name: kube-state-metrics
|
||||
rules:
|
||||
- apiGroups:
|
||||
|
@ -30,6 +30,7 @@ rules:
|
|||
- daemonsets
|
||||
- deployments
|
||||
- replicasets
|
||||
- ingresses
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
|
@ -104,14 +105,6 @@ rules:
|
|||
- networking.k8s.io
|
||||
resources:
|
||||
- networkpolicies
|
||||
- ingresses
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- coordination.k8s.io
|
||||
resources:
|
||||
- leases
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
|
|
|
@ -3,7 +3,7 @@ kind: ClusterRoleBinding
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.7
|
||||
app.kubernetes.io/version: v1.9.7
|
||||
name: kube-state-metrics
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
|
|
|
@ -3,7 +3,7 @@ kind: Deployment
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.7
|
||||
app.kubernetes.io/version: v1.9.7
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
spec:
|
||||
|
@ -15,7 +15,7 @@ spec:
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.7
|
||||
app.kubernetes.io/version: v1.9.7
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
|
@ -25,32 +25,34 @@ spec:
|
|||
- --telemetry-port=8082
|
||||
image: quay.io/coreos/kube-state-metrics:v1.9.7
|
||||
name: kube-state-metrics
|
||||
securityContext:
|
||||
runAsUser: 65534
|
||||
- args:
|
||||
- --logtostderr
|
||||
- --secure-listen-address=:8443
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
|
||||
- --upstream=http://127.0.0.1:8081/
|
||||
image: quay.io/brancz/kube-rbac-proxy:v0.6.0
|
||||
image: quay.io/brancz/kube-rbac-proxy:v0.8.0
|
||||
name: kube-rbac-proxy-main
|
||||
ports:
|
||||
- containerPort: 8443
|
||||
name: https-main
|
||||
securityContext:
|
||||
runAsUser: 65534
|
||||
runAsGroup: 65532
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65532
|
||||
- args:
|
||||
- --logtostderr
|
||||
- --secure-listen-address=:9443
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
|
||||
- --upstream=http://127.0.0.1:8082/
|
||||
image: quay.io/brancz/kube-rbac-proxy:v0.6.0
|
||||
image: quay.io/brancz/kube-rbac-proxy:v0.8.0
|
||||
name: kube-rbac-proxy-self
|
||||
ports:
|
||||
- containerPort: 9443
|
||||
name: https-self
|
||||
securityContext:
|
||||
runAsUser: 65534
|
||||
runAsGroup: 65532
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65532
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
serviceAccountName: kube-state-metrics
|
||||
|
|
|
@ -3,7 +3,7 @@ kind: Service
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.7
|
||||
app.kubernetes.io/version: v1.9.7
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
spec:
|
||||
|
|
|
@ -3,6 +3,6 @@ kind: ServiceAccount
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.7
|
||||
app.kubernetes.io/version: v1.9.7
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
|
|
|
@ -57,7 +57,7 @@ spec:
|
|||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.podIP
|
||||
image: quay.io/brancz/kube-rbac-proxy:v0.6.0
|
||||
image: quay.io/brancz/kube-rbac-proxy:v0.8.0
|
||||
name: kube-rbac-proxy
|
||||
ports:
|
||||
- containerPort: 9100
|
||||
|
@ -70,6 +70,10 @@ spec:
|
|||
requests:
|
||||
cpu: 10m
|
||||
memory: 20Mi
|
||||
securityContext:
|
||||
runAsGroup: 65532
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65532
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
nodeSelector:
|
||||
|
@ -93,3 +97,4 @@ spec:
|
|||
updateStrategy:
|
||||
rollingUpdate:
|
||||
maxUnavailable: 10%
|
||||
type: RollingUpdate
|
||||
|
|
|
@ -25,7 +25,7 @@ spec:
|
|||
- --metrics-relist-interval=1m
|
||||
- --prometheus-url=http://prometheus-k8s.monitoring.svc.cluster.local:9090/
|
||||
- --secure-port=6443
|
||||
image: directxman12/k8s-prometheus-adapter:v0.7.0
|
||||
image: directxman12/k8s-prometheus-adapter:v0.8.2
|
||||
name: prometheus-adapter
|
||||
ports:
|
||||
- containerPort: 6443
|
||||
|
|
|
@ -4,7 +4,7 @@ metadata:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.42.1
|
||||
app.kubernetes.io/version: v0.44.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
spec:
|
||||
|
@ -19,4 +19,4 @@ spec:
|
|||
matchLabels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.42.1
|
||||
app.kubernetes.io/version: v0.44.1
|
||||
|
|
|
@ -12,7 +12,7 @@ spec:
|
|||
namespace: monitoring
|
||||
port: web
|
||||
externalUrl: http://prometheus-k8s.monitoring:9090
|
||||
image: quay.io/prometheus/prometheus:v2.20.0
|
||||
image: quay.io/prometheus/prometheus:v2.22.1
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
podMonitorNamespaceSelector:
|
||||
|
@ -58,4 +58,4 @@ spec:
|
|||
requests:
|
||||
storage: 10Gi
|
||||
storageClassName: local-path
|
||||
version: v2.20.0
|
||||
version: v2.22.1
|
||||
|
|
|
@ -40,10 +40,10 @@ spec:
|
|||
rate(node_vmstat_pgmajfault{job="node-exporter"}[1m])
|
||||
record: instance:node_vmstat_pgmajfault:rate1m
|
||||
- expr: |
|
||||
rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
|
||||
rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
|
||||
record: instance_device:node_disk_io_time_seconds:rate1m
|
||||
- expr: |
|
||||
rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
|
||||
rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
|
||||
record: instance_device:node_disk_io_time_weighted_seconds:rate1m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
|
@ -390,11 +390,6 @@ spec:
|
|||
quantile: "0.99"
|
||||
verb: write
|
||||
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
||||
- expr: |
|
||||
sum(rate(apiserver_request_duration_seconds_sum{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)
|
||||
/
|
||||
sum(rate(apiserver_request_duration_seconds_count{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)
|
||||
record: cluster:apiserver_request_duration_seconds:mean5m
|
||||
- expr: |
|
||||
histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
|
||||
labels:
|
||||
|
@ -571,9 +566,6 @@ spec:
|
|||
record: code:apiserver_request_total:increase30d
|
||||
- name: k8s.rules
|
||||
rules:
|
||||
- expr: |
|
||||
sum(rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])) by (namespace)
|
||||
record: namespace:container_cpu_usage_seconds_total:sum_rate
|
||||
- expr: |
|
||||
sum by (cluster, namespace, pod, container) (
|
||||
rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])
|
||||
|
@ -605,9 +597,6 @@ spec:
|
|||
max by(namespace, pod, node) (kube_pod_info{node!=""})
|
||||
)
|
||||
record: node_namespace_pod_container:container_memory_swap
|
||||
- expr: |
|
||||
sum(container_memory_usage_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}) by (namespace)
|
||||
record: namespace:container_memory_usage_bytes:sum
|
||||
- expr: |
|
||||
sum by (namespace) (
|
||||
sum by (namespace, pod) (
|
||||
|
@ -716,9 +705,6 @@ spec:
|
|||
record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile
|
||||
- name: node.rules
|
||||
rules:
|
||||
- expr: |
|
||||
sum(min(kube_pod_info{node!=""}) by (cluster, node))
|
||||
record: ':kube_pod_info_node_count:'
|
||||
- expr: |
|
||||
topk by(namespace, pod) (1,
|
||||
max by (node, namespace, pod) (
|
||||
|
@ -762,18 +748,18 @@ spec:
|
|||
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
|
||||
- name: kube-prometheus-node-recording.rules
|
||||
rules:
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY
|
||||
(instance)
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m]))
|
||||
BY (instance)
|
||||
record: instance:node_cpu:rate:sum
|
||||
- expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
|
||||
record: instance:node_network_receive_bytes:rate:sum
|
||||
- expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
|
||||
record: instance:node_network_transmit_bytes:rate:sum
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT
|
||||
(cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total)
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
|
||||
WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total)
|
||||
BY (instance, cpu)) BY (instance)
|
||||
record: instance:node_cpu:ratio
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m]))
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
|
||||
record: cluster:node_cpu:sum_rate5m
|
||||
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total)
|
||||
BY (instance, cpu))
|
||||
|
@ -791,7 +777,7 @@ spec:
|
|||
description: kube-state-metrics is experiencing errors at an elevated rate
|
||||
in list operations. This is likely causing it to not be able to expose metrics
|
||||
about Kubernetes objects correctly or at all.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricslisterrors
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatemetricslisterrors
|
||||
summary: kube-state-metrics is experiencing errors in list operations.
|
||||
expr: |
|
||||
(sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m]))
|
||||
|
@ -806,7 +792,7 @@ spec:
|
|||
description: kube-state-metrics is experiencing errors at an elevated rate
|
||||
in watch operations. This is likely causing it to not be able to expose
|
||||
metrics about Kubernetes objects correctly or at all.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricswatcherrors
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatemetricswatcherrors
|
||||
summary: kube-state-metrics is experiencing errors in watch operations.
|
||||
expr: |
|
||||
(sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m]))
|
||||
|
@ -823,7 +809,7 @@ spec:
|
|||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available space left and is filling
|
||||
up.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemspacefillingup
|
||||
summary: Filesystem is predicted to run out of space within the next 24 hours.
|
||||
expr: |
|
||||
(
|
||||
|
@ -841,7 +827,7 @@ spec:
|
|||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available space left and is filling
|
||||
up fast.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemspacefillingup
|
||||
summary: Filesystem is predicted to run out of space within the next 4 hours.
|
||||
expr: |
|
||||
(
|
||||
|
@ -858,7 +844,7 @@ spec:
|
|||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available space left.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutofspace
|
||||
summary: Filesystem has less than 5% space left.
|
||||
expr: |
|
||||
(
|
||||
|
@ -873,7 +859,7 @@ spec:
|
|||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available space left.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutofspace
|
||||
summary: Filesystem has less than 3% space left.
|
||||
expr: |
|
||||
(
|
||||
|
@ -889,7 +875,7 @@ spec:
|
|||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available inodes left and is filling
|
||||
up.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemfilesfillingup
|
||||
summary: Filesystem is predicted to run out of inodes within the next 24 hours.
|
||||
expr: |
|
||||
(
|
||||
|
@ -907,7 +893,7 @@ spec:
|
|||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available inodes left and is filling
|
||||
up fast.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemfilesfillingup
|
||||
summary: Filesystem is predicted to run out of inodes within the next 4 hours.
|
||||
expr: |
|
||||
(
|
||||
|
@ -924,7 +910,7 @@ spec:
|
|||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available inodes left.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutoffiles
|
||||
summary: Filesystem has less than 5% inodes left.
|
||||
expr: |
|
||||
(
|
||||
|
@ -939,7 +925,7 @@ spec:
|
|||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available inodes left.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutoffiles
|
||||
summary: Filesystem has less than 3% inodes left.
|
||||
expr: |
|
||||
(
|
||||
|
@ -954,10 +940,10 @@ spec:
|
|||
annotations:
|
||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
|
||||
{{ printf "%.0f" $value }} receive errors in the last two minutes.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworkreceiveerrs
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodenetworkreceiveerrs
|
||||
summary: Network interface is reporting many receive errors.
|
||||
expr: |
|
||||
increase(node_network_receive_errs_total[2m]) > 10
|
||||
rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
|
@ -965,17 +951,17 @@ spec:
|
|||
annotations:
|
||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
|
||||
{{ printf "%.0f" $value }} transmit errors in the last two minutes.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworktransmiterrs
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodenetworktransmiterrs
|
||||
summary: Network interface is reporting many transmit errors.
|
||||
expr: |
|
||||
increase(node_network_transmit_errs_total[2m]) > 10
|
||||
rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeHighNumberConntrackEntriesUsed
|
||||
annotations:
|
||||
description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodehighnumberconntrackentriesused
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodehighnumberconntrackentriesused
|
||||
summary: Number of conntrack are getting close to the limit.
|
||||
expr: |
|
||||
(node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
|
||||
|
@ -984,7 +970,7 @@ spec:
|
|||
- alert: NodeTextFileCollectorScrapeError
|
||||
annotations:
|
||||
description: Node Exporter text file collector failed to scrape.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodetextfilecollectorscrapeerror
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodetextfilecollectorscrapeerror
|
||||
summary: Node Exporter text file collector failed to scrape.
|
||||
expr: |
|
||||
node_textfile_scrape_error{job="node-exporter"} == 1
|
||||
|
@ -994,7 +980,7 @@ spec:
|
|||
annotations:
|
||||
message: Clock on {{ $labels.instance }} is out of sync by more than 300s.
|
||||
Ensure NTP is configured correctly on this host.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclockskewdetected
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodeclockskewdetected
|
||||
summary: Clock skew detected.
|
||||
expr: |
|
||||
(
|
||||
|
@ -1015,7 +1001,7 @@ spec:
|
|||
annotations:
|
||||
message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP
|
||||
is configured on this host.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclocknotsynchronising
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodeclocknotsynchronising
|
||||
summary: Clock not synchronising.
|
||||
expr: |
|
||||
min_over_time(node_timex_sync_status[5m]) == 0
|
||||
|
@ -1029,7 +1015,7 @@ spec:
|
|||
description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is
|
||||
in degraded state due to one or more disks failures. Number of spare drives
|
||||
is insufficient to fix issue automatically.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-noderaiddegraded
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/noderaiddegraded
|
||||
summary: RAID Array is degraded
|
||||
expr: |
|
||||
node_md_disks_required - ignoring (state) (node_md_disks{state="active"}) > 0
|
||||
|
@ -1040,19 +1026,142 @@ spec:
|
|||
annotations:
|
||||
description: At least one device in RAID array on {{ $labels.instance }} failed.
|
||||
Array '{{ $labels.device }}' needs attention and possibly a disk swap.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-noderaiddiskfailure
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/noderaiddiskfailure
|
||||
summary: Failed device in RAID array
|
||||
expr: |
|
||||
node_md_disks{state="fail"} > 0
|
||||
labels:
|
||||
severity: warning
|
||||
- name: alertmanager.rules
|
||||
rules:
|
||||
- alert: AlertmanagerFailedReload
|
||||
annotations:
|
||||
description: Configuration has failed to load for {{ $labels.namespace }}/{{
|
||||
$labels.pod}}.
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerfailedreload
|
||||
summary: Reloading an Alertmanager configuration has failed.
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
max_over_time(alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"}[5m]) == 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: AlertmanagerMembersInconsistent
|
||||
annotations:
|
||||
description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has only
|
||||
found {{ $value }} members of the {{$labels.job}} cluster.
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagermembersinconsistent
|
||||
summary: A member of an Alertmanager cluster has not found all other cluster
|
||||
members.
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}[5m])
|
||||
< on (namespace,service) group_left
|
||||
count by (namespace,service) (max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}[5m]))
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: AlertmanagerFailedToSendAlerts
|
||||
annotations:
|
||||
description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed
|
||||
to send {{ $value | humanizePercentage }} of notifications to {{ $labels.integration
|
||||
}}.
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerfailedtosendalerts
|
||||
summary: An Alertmanager instance failed to send notifications.
|
||||
expr: |
|
||||
(
|
||||
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring"}[5m])
|
||||
/
|
||||
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring"}[5m])
|
||||
)
|
||||
> 0.01
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: AlertmanagerClusterFailedToSendAlerts
|
||||
annotations:
|
||||
description: The minimum notification failure rate to {{ $labels.integration
|
||||
}} sent from any instance in the {{$labels.job}} cluster is {{ $value |
|
||||
humanizePercentage }}.
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclusterfailedtosendalerts
|
||||
summary: All Alertmanager instances in a cluster failed to send notifications.
|
||||
expr: |
|
||||
min by (namespace,service) (
|
||||
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring"}[5m])
|
||||
/
|
||||
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring"}[5m])
|
||||
)
|
||||
> 0.01
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: AlertmanagerConfigInconsistent
|
||||
annotations:
|
||||
description: Alertmanager instances within the {{$labels.job}} cluster have
|
||||
different configurations.
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerconfiginconsistent
|
||||
summary: Alertmanager instances within the same cluster have different configurations.
|
||||
expr: |
|
||||
count by (namespace,service) (
|
||||
count_values by (namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"})
|
||||
)
|
||||
!= 1
|
||||
for: 20m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: AlertmanagerClusterDown
|
||||
annotations:
|
||||
description: '{{ $value | humanizePercentage }} of Alertmanager instances
|
||||
within the {{$labels.job}} cluster have been up for less than half of the
|
||||
last 5m.'
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclusterdown
|
||||
summary: Half or more of the Alertmanager instances within the same cluster
|
||||
are down.
|
||||
expr: |
|
||||
(
|
||||
count by (namespace,service) (
|
||||
avg_over_time(up{job="alertmanager-main",namespace="monitoring"}[5m]) < 0.5
|
||||
)
|
||||
/
|
||||
count by (namespace,service) (
|
||||
up{job="alertmanager-main",namespace="monitoring"}
|
||||
)
|
||||
)
|
||||
>= 0.5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: AlertmanagerClusterCrashlooping
|
||||
annotations:
|
||||
description: '{{ $value | humanizePercentage }} of Alertmanager instances
|
||||
within the {{$labels.job}} cluster have restarted at least 5 times in the
|
||||
last 10m.'
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclustercrashlooping
|
||||
summary: Half or more of the Alertmanager instances within the same cluster
|
||||
are crashlooping.
|
||||
expr: |
|
||||
(
|
||||
count by (namespace,service) (
|
||||
changes(process_start_time_seconds{job="alertmanager-main",namespace="monitoring"}[10m]) > 4
|
||||
)
|
||||
/
|
||||
count by (namespace,service) (
|
||||
up{job="alertmanager-main",namespace="monitoring"}
|
||||
)
|
||||
)
|
||||
>= 0.5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- name: prometheus-operator
|
||||
rules:
|
||||
- alert: PrometheusOperatorListErrors
|
||||
annotations:
|
||||
description: Errors while performing List operations in controller {{$labels.controller}}
|
||||
in {{$labels.namespace}} namespace.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorlisterrors
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorlisterrors
|
||||
summary: Errors while performing list operations in controller.
|
||||
expr: |
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4
|
||||
|
@ -1063,7 +1172,7 @@ spec:
|
|||
annotations:
|
||||
description: Errors while performing watch operations in controller {{$labels.controller}}
|
||||
in {{$labels.namespace}} namespace.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorwatcherrors
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorwatcherrors
|
||||
summary: Errors while performing watch operations in controller.
|
||||
expr: |
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4
|
||||
|
@ -1074,7 +1183,7 @@ spec:
|
|||
annotations:
|
||||
description: Controller {{ $labels.controller }} in {{ $labels.namespace }}
|
||||
namespace fails to reconcile {{ $value }} objects.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorsyncfailed
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorsyncfailed
|
||||
summary: Last controller reconciliation failed
|
||||
expr: |
|
||||
min_over_time(prometheus_operator_syncs{status="failed",job="prometheus-operator",namespace="monitoring"}[5m]) > 0
|
||||
|
@ -1086,7 +1195,7 @@ spec:
|
|||
description: '{{ $value | humanizePercentage }} of reconciling operations
|
||||
failed for {{ $labels.controller }} controller in {{ $labels.namespace }}
|
||||
namespace.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorreconcileerrors
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorreconcileerrors
|
||||
summary: Errors while reconciling controller.
|
||||
expr: |
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator",namespace="monitoring"}[5m]))) > 0.1
|
||||
|
@ -1097,7 +1206,7 @@ spec:
|
|||
annotations:
|
||||
description: Errors while reconciling Prometheus in {{ $labels.namespace }}
|
||||
Namespace.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatornodelookuperrors
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatornodelookuperrors
|
||||
summary: Errors while reconciling Prometheus.
|
||||
expr: |
|
||||
rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1
|
||||
|
@ -1108,20 +1217,32 @@ spec:
|
|||
annotations:
|
||||
description: Prometheus operator in {{ $labels.namespace }} namespace isn't
|
||||
ready to reconcile {{ $labels.controller }} resources.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatornotready
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatornotready
|
||||
summary: Prometheus operator not ready
|
||||
expr: |
|
||||
min by(namespace, controller) (max_over_time(prometheus_operator_ready{job="prometheus-operator",namespace="monitoring"}[5m]) == 0)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorRejectedResources
|
||||
annotations:
|
||||
description: Prometheus operator in {{ $labels.namespace }} namespace rejected
|
||||
{{ printf "%0.0f" $value }} {{ $labels.controller }}/{{ $labels.resource
|
||||
}} resources.
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorrejectedresources
|
||||
summary: Resources rejected by Prometheus operator
|
||||
expr: |
|
||||
min_over_time(prometheus_operator_managed_resources{state="rejected",job="prometheus-operator",namespace="monitoring"}[5m]) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- name: kubernetes-apps
|
||||
rules:
|
||||
- alert: KubePodCrashLooping
|
||||
annotations:
|
||||
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
|
||||
}}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubepodcrashlooping
|
||||
summary: Pod is crash looping.
|
||||
expr: |
|
||||
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[5m]) * 60 * 5 > 0
|
||||
|
@ -1132,7 +1253,7 @@ spec:
|
|||
annotations:
|
||||
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
|
||||
state for longer than 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubepodnotready
|
||||
summary: Pod has been in a non-ready state for more than 15 minutes.
|
||||
expr: |
|
||||
sum by (namespace, pod) (
|
||||
|
@ -1150,7 +1271,7 @@ spec:
|
|||
description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
|
||||
}} does not match, this indicates that the Deployment has failed but has
|
||||
not been rolled back.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubedeploymentgenerationmismatch
|
||||
summary: Deployment generation mismatch due to possible roll-back
|
||||
expr: |
|
||||
kube_deployment_status_observed_generation{job="kube-state-metrics"}
|
||||
|
@ -1163,7 +1284,7 @@ spec:
|
|||
annotations:
|
||||
description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has
|
||||
not matched the expected number of replicas for longer than 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubedeploymentreplicasmismatch
|
||||
summary: Deployment has not matched the expected number of replicas.
|
||||
expr: |
|
||||
(
|
||||
|
@ -1182,7 +1303,7 @@ spec:
|
|||
annotations:
|
||||
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }}
|
||||
has not matched the expected number of replicas for longer than 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatefulsetreplicasmismatch
|
||||
summary: Deployment has not matched the expected number of replicas.
|
||||
expr: |
|
||||
(
|
||||
|
@ -1202,7 +1323,7 @@ spec:
|
|||
description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
|
||||
}} does not match, this indicates that the StatefulSet has failed but has
|
||||
not been rolled back.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatefulsetgenerationmismatch
|
||||
summary: StatefulSet generation mismatch due to possible roll-back
|
||||
expr: |
|
||||
kube_statefulset_status_observed_generation{job="kube-state-metrics"}
|
||||
|
@ -1215,7 +1336,7 @@ spec:
|
|||
annotations:
|
||||
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }}
|
||||
update has not been rolled out.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatefulsetupdatenotrolledout
|
||||
summary: StatefulSet update has not been rolled out.
|
||||
expr: |
|
||||
(
|
||||
|
@ -1242,7 +1363,7 @@ spec:
|
|||
annotations:
|
||||
description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has
|
||||
not finished or progressed for at least 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubedaemonsetrolloutstuck
|
||||
summary: DaemonSet rollout is stuck.
|
||||
expr: |
|
||||
(
|
||||
|
@ -1275,7 +1396,7 @@ spec:
|
|||
annotations:
|
||||
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}}
|
||||
has been in waiting state for longer than 1 hour.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubecontainerwaiting
|
||||
summary: Pod container waiting longer than 1 hour
|
||||
expr: |
|
||||
sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
|
||||
|
@ -1286,7 +1407,7 @@ spec:
|
|||
annotations:
|
||||
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
|
||||
}} are not scheduled.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubedaemonsetnotscheduled
|
||||
summary: DaemonSet pods are not scheduled.
|
||||
expr: |
|
||||
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"}
|
||||
|
@ -1299,7 +1420,7 @@ spec:
|
|||
annotations:
|
||||
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
|
||||
}} are running where they are not supposed to run.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubedaemonsetmisscheduled
|
||||
summary: DaemonSet pods are misscheduled.
|
||||
expr: |
|
||||
kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
|
||||
|
@ -1310,7 +1431,7 @@ spec:
|
|||
annotations:
|
||||
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking
|
||||
more than 12 hours to complete.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubejobcompletion
|
||||
summary: Job did not complete in time
|
||||
expr: |
|
||||
kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0
|
||||
|
@ -1321,7 +1442,7 @@ spec:
|
|||
annotations:
|
||||
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to
|
||||
complete. Removing failed job after investigation should clear this alert.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubejobfailed
|
||||
summary: Job failed to complete.
|
||||
expr: |
|
||||
kube_job_failed{job="kube-state-metrics"} > 0
|
||||
|
@ -1332,13 +1453,21 @@ spec:
|
|||
annotations:
|
||||
description: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched
|
||||
the desired number of replicas for longer than 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpareplicasmismatch
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubehpareplicasmismatch
|
||||
summary: HPA has not matched descired number of replicas.
|
||||
expr: |
|
||||
(kube_hpa_status_desired_replicas{job="kube-state-metrics"}
|
||||
!=
|
||||
kube_hpa_status_current_replicas{job="kube-state-metrics"})
|
||||
and
|
||||
(kube_hpa_status_current_replicas{job="kube-state-metrics"}
|
||||
>
|
||||
kube_hpa_spec_min_replicas{job="kube-state-metrics"})
|
||||
and
|
||||
(kube_hpa_status_current_replicas{job="kube-state-metrics"}
|
||||
<
|
||||
kube_hpa_spec_max_replicas{job="kube-state-metrics"})
|
||||
and
|
||||
changes(kube_hpa_status_current_replicas[15m]) == 0
|
||||
for: 15m
|
||||
labels:
|
||||
|
@ -1347,7 +1476,7 @@ spec:
|
|||
annotations:
|
||||
description: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running
|
||||
at max replicas for longer than 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpamaxedout
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubehpamaxedout
|
||||
summary: HPA is running at max replicas
|
||||
expr: |
|
||||
kube_hpa_status_current_replicas{job="kube-state-metrics"}
|
||||
|
@ -1362,7 +1491,7 @@ spec:
|
|||
annotations:
|
||||
description: Cluster has overcommitted CPU resource requests for Pods and
|
||||
cannot tolerate node failure.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubecpuovercommit
|
||||
summary: Cluster has overcommitted CPU resource requests.
|
||||
expr: |
|
||||
sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum{})
|
||||
|
@ -1377,7 +1506,7 @@ spec:
|
|||
annotations:
|
||||
description: Cluster has overcommitted memory resource requests for Pods and
|
||||
cannot tolerate node failure.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubememoryovercommit
|
||||
summary: Cluster has overcommitted memory resource requests.
|
||||
expr: |
|
||||
sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum{})
|
||||
|
@ -1393,7 +1522,7 @@ spec:
|
|||
- alert: KubeCPUQuotaOvercommit
|
||||
annotations:
|
||||
description: Cluster has overcommitted CPU resource requests for Namespaces.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuquotaovercommit
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubecpuquotaovercommit
|
||||
summary: Cluster has overcommitted CPU resource requests.
|
||||
expr: |
|
||||
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"})
|
||||
|
@ -1406,12 +1535,12 @@ spec:
|
|||
- alert: KubeMemoryQuotaOvercommit
|
||||
annotations:
|
||||
description: Cluster has overcommitted memory resource requests for Namespaces.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryquotaovercommit
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubememoryquotaovercommit
|
||||
summary: Cluster has overcommitted memory resource requests.
|
||||
expr: |
|
||||
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"})
|
||||
/
|
||||
sum(kube_node_status_allocatable_memory_bytes{job="node-exporter"})
|
||||
sum(kube_node_status_allocatable_memory_bytes{job="kube-state-metrics"})
|
||||
> 1.5
|
||||
for: 5m
|
||||
labels:
|
||||
|
@ -1420,7 +1549,7 @@ spec:
|
|||
annotations:
|
||||
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
||||
}} of its {{ $labels.resource }} quota.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaalmostfull
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubequotaalmostfull
|
||||
summary: Namespace quota is going to be full.
|
||||
expr: |
|
||||
kube_resourcequota{job="kube-state-metrics", type="used"}
|
||||
|
@ -1434,7 +1563,7 @@ spec:
|
|||
annotations:
|
||||
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
||||
}} of its {{ $labels.resource }} quota.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotafullyused
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubequotafullyused
|
||||
summary: Namespace quota is fully used.
|
||||
expr: |
|
||||
kube_resourcequota{job="kube-state-metrics", type="used"}
|
||||
|
@ -1448,7 +1577,7 @@ spec:
|
|||
annotations:
|
||||
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
||||
}} of its {{ $labels.resource }} quota.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubequotaexceeded
|
||||
summary: Namespace quota has exceeded the limits.
|
||||
expr: |
|
||||
kube_resourcequota{job="kube-state-metrics", type="used"}
|
||||
|
@ -1463,7 +1592,7 @@ spec:
|
|||
description: '{{ $value | humanizePercentage }} throttling of CPU in namespace
|
||||
{{ $labels.namespace }} for container {{ $labels.container }} in pod {{
|
||||
$labels.pod }}.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/cputhrottlinghigh
|
||||
summary: Processes experience elevated CPU throttling.
|
||||
expr: |
|
||||
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container, pod, namespace)
|
||||
|
@ -1480,7 +1609,7 @@ spec:
|
|||
description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
|
||||
}} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage
|
||||
}} free.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubepersistentvolumefillingup
|
||||
summary: PersistentVolume is filling up.
|
||||
expr: |
|
||||
kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}
|
||||
|
@ -1496,7 +1625,7 @@ spec:
|
|||
$labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is
|
||||
expected to fill up within four days. Currently {{ $value | humanizePercentage
|
||||
}} is available.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubepersistentvolumefillingup
|
||||
summary: PersistentVolume is filling up.
|
||||
expr: |
|
||||
(
|
||||
|
@ -1513,7 +1642,7 @@ spec:
|
|||
annotations:
|
||||
description: The persistent volume {{ $labels.persistentvolume }} has status
|
||||
{{ $labels.phase }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeerrors
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubepersistentvolumeerrors
|
||||
summary: PersistentVolume is having issues with provisioning.
|
||||
expr: |
|
||||
kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
|
||||
|
@ -1526,7 +1655,7 @@ spec:
|
|||
annotations:
|
||||
description: There are {{ $value }} different semantic versions of Kubernetes
|
||||
components running.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeversionmismatch
|
||||
summary: Different semantic versions of Kubernetes components running.
|
||||
expr: |
|
||||
count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*).*"))) > 1
|
||||
|
@ -1537,7 +1666,7 @@ spec:
|
|||
annotations:
|
||||
description: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
|
||||
}}' is experiencing {{ $value | humanizePercentage }} errors.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeclienterrors
|
||||
summary: Kubernetes API server client is experiencing errors.
|
||||
expr: |
|
||||
(sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
|
||||
|
@ -1552,7 +1681,7 @@ spec:
|
|||
- alert: KubeAPIErrorBudgetBurn
|
||||
annotations:
|
||||
description: The API server is burning too much error budget.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeapierrorbudgetburn
|
||||
summary: The API server is burning too much error budget.
|
||||
expr: |
|
||||
sum(apiserver_request:burnrate1h) > (14.40 * 0.01000)
|
||||
|
@ -1566,7 +1695,7 @@ spec:
|
|||
- alert: KubeAPIErrorBudgetBurn
|
||||
annotations:
|
||||
description: The API server is burning too much error budget.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeapierrorbudgetburn
|
||||
summary: The API server is burning too much error budget.
|
||||
expr: |
|
||||
sum(apiserver_request:burnrate6h) > (6.00 * 0.01000)
|
||||
|
@ -1580,7 +1709,7 @@ spec:
|
|||
- alert: KubeAPIErrorBudgetBurn
|
||||
annotations:
|
||||
description: The API server is burning too much error budget.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeapierrorbudgetburn
|
||||
summary: The API server is burning too much error budget.
|
||||
expr: |
|
||||
sum(apiserver_request:burnrate1d) > (3.00 * 0.01000)
|
||||
|
@ -1594,7 +1723,7 @@ spec:
|
|||
- alert: KubeAPIErrorBudgetBurn
|
||||
annotations:
|
||||
description: The API server is burning too much error budget.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeapierrorbudgetburn
|
||||
summary: The API server is burning too much error budget.
|
||||
expr: |
|
||||
sum(apiserver_request:burnrate3d) > (1.00 * 0.01000)
|
||||
|
@ -1611,7 +1740,7 @@ spec:
|
|||
annotations:
|
||||
description: A client certificate used to authenticate to the apiserver is
|
||||
expiring in less than 7.0 days.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeclientcertificateexpiration
|
||||
summary: Client certificate is about to expire.
|
||||
expr: |
|
||||
apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
|
||||
|
@ -1621,7 +1750,7 @@ spec:
|
|||
annotations:
|
||||
description: A client certificate used to authenticate to the apiserver is
|
||||
expiring in less than 24.0 hours.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeclientcertificateexpiration
|
||||
summary: Client certificate is about to expire.
|
||||
expr: |
|
||||
apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
|
||||
|
@ -1633,7 +1762,7 @@ spec:
|
|||
has reported errors. The number of errors have increased for it in the past
|
||||
five minutes. High values indicate that the availability of the service
|
||||
changes too often.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/aggregatedapierrors
|
||||
summary: An aggregated API has reported errors.
|
||||
expr: |
|
||||
sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2
|
||||
|
@ -1643,7 +1772,7 @@ spec:
|
|||
annotations:
|
||||
description: An aggregated API {{ $labels.name }}/{{ $labels.namespace }}
|
||||
has been only {{ $value | humanize }}% available over the last 10m.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapidown
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/aggregatedapidown
|
||||
summary: An aggregated API is down.
|
||||
expr: |
|
||||
(1 - max by(name, namespace)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85
|
||||
|
@ -1653,7 +1782,7 @@ spec:
|
|||
- alert: KubeAPIDown
|
||||
annotations:
|
||||
description: KubeAPI has disappeared from Prometheus target discovery.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeapidown
|
||||
summary: Target disappeared from Prometheus target discovery.
|
||||
expr: |
|
||||
absent(up{job="apiserver"} == 1)
|
||||
|
@ -1665,7 +1794,7 @@ spec:
|
|||
- alert: KubeNodeNotReady
|
||||
annotations:
|
||||
description: '{{ $labels.node }} has been unready for more than 15 minutes.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubenodenotready
|
||||
summary: Node is not ready.
|
||||
expr: |
|
||||
kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
|
||||
|
@ -1676,7 +1805,7 @@ spec:
|
|||
annotations:
|
||||
description: '{{ $labels.node }} is unreachable and some workloads may be
|
||||
rescheduled.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubenodeunreachable
|
||||
summary: Node is unreachable.
|
||||
expr: |
|
||||
(kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="kube-state-metrics",key=~"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn"}) == 1
|
||||
|
@ -1687,7 +1816,7 @@ spec:
|
|||
annotations:
|
||||
description: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage
|
||||
}} of its Pod capacity.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubelettoomanypods
|
||||
summary: Kubelet is running at capacity.
|
||||
expr: |
|
||||
count by(node) (
|
||||
|
@ -1704,7 +1833,7 @@ spec:
|
|||
annotations:
|
||||
description: The readiness status of node {{ $labels.node }} has changed {{
|
||||
$value }} times in the last 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodereadinessflapping
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubenodereadinessflapping
|
||||
summary: Node readiness status is flapping.
|
||||
expr: |
|
||||
sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (node) > 2
|
||||
|
@ -1715,7 +1844,7 @@ spec:
|
|||
annotations:
|
||||
description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile
|
||||
duration of {{ $value }} seconds on node {{ $labels.node }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletplegdurationhigh
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletplegdurationhigh
|
||||
summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist.
|
||||
expr: |
|
||||
node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
|
||||
|
@ -1726,7 +1855,7 @@ spec:
|
|||
annotations:
|
||||
description: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds
|
||||
on node {{ $labels.node }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletpodstartuplatencyhigh
|
||||
summary: Kubelet Pod startup latency is too high.
|
||||
expr: |
|
||||
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
|
||||
|
@ -1737,7 +1866,7 @@ spec:
|
|||
annotations:
|
||||
description: Client certificate for Kubelet on node {{ $labels.node }} expires
|
||||
in {{ $value | humanizeDuration }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletclientcertificateexpiration
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletclientcertificateexpiration
|
||||
summary: Kubelet client certificate is about to expire.
|
||||
expr: |
|
||||
kubelet_certificate_manager_client_ttl_seconds < 604800
|
||||
|
@ -1747,7 +1876,7 @@ spec:
|
|||
annotations:
|
||||
description: Client certificate for Kubelet on node {{ $labels.node }} expires
|
||||
in {{ $value | humanizeDuration }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletclientcertificateexpiration
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletclientcertificateexpiration
|
||||
summary: Kubelet client certificate is about to expire.
|
||||
expr: |
|
||||
kubelet_certificate_manager_client_ttl_seconds < 86400
|
||||
|
@ -1757,7 +1886,7 @@ spec:
|
|||
annotations:
|
||||
description: Server certificate for Kubelet on node {{ $labels.node }} expires
|
||||
in {{ $value | humanizeDuration }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificateexpiration
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletservercertificateexpiration
|
||||
summary: Kubelet server certificate is about to expire.
|
||||
expr: |
|
||||
kubelet_certificate_manager_server_ttl_seconds < 604800
|
||||
|
@ -1767,7 +1896,7 @@ spec:
|
|||
annotations:
|
||||
description: Server certificate for Kubelet on node {{ $labels.node }} expires
|
||||
in {{ $value | humanizeDuration }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificateexpiration
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletservercertificateexpiration
|
||||
summary: Kubelet server certificate is about to expire.
|
||||
expr: |
|
||||
kubelet_certificate_manager_server_ttl_seconds < 86400
|
||||
|
@ -1777,7 +1906,7 @@ spec:
|
|||
annotations:
|
||||
description: Kubelet on node {{ $labels.node }} has failed to renew its client
|
||||
certificate ({{ $value | humanize }} errors in the last 5 minutes).
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletclientcertificaterenewalerrors
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletclientcertificaterenewalerrors
|
||||
summary: Kubelet has failed to renew its client certificate.
|
||||
expr: |
|
||||
increase(kubelet_certificate_manager_client_expiration_renew_errors[5m]) > 0
|
||||
|
@ -1788,7 +1917,7 @@ spec:
|
|||
annotations:
|
||||
description: Kubelet on node {{ $labels.node }} has failed to renew its server
|
||||
certificate ({{ $value | humanize }} errors in the last 5 minutes).
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificaterenewalerrors
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletservercertificaterenewalerrors
|
||||
summary: Kubelet has failed to renew its server certificate.
|
||||
expr: |
|
||||
increase(kubelet_server_expiration_renew_errors[5m]) > 0
|
||||
|
@ -1798,7 +1927,7 @@ spec:
|
|||
- alert: KubeletDown
|
||||
annotations:
|
||||
description: Kubelet has disappeared from Prometheus target discovery.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletdown
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeletdown
|
||||
summary: Target disappeared from Prometheus target discovery.
|
||||
expr: |
|
||||
absent(up{job="kubelet", metrics_path="/metrics"} == 1)
|
||||
|
@ -1810,7 +1939,7 @@ spec:
|
|||
- alert: KubeSchedulerDown
|
||||
annotations:
|
||||
description: KubeScheduler has disappeared from Prometheus target discovery.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeschedulerdown
|
||||
summary: Target disappeared from Prometheus target discovery.
|
||||
expr: |
|
||||
absent(up{job="kube-scheduler"} == 1)
|
||||
|
@ -1823,7 +1952,7 @@ spec:
|
|||
annotations:
|
||||
description: KubeControllerManager has disappeared from Prometheus target
|
||||
discovery.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubecontrollermanagerdown
|
||||
summary: Target disappeared from Prometheus target discovery.
|
||||
expr: |
|
||||
absent(up{job="kube-controller-manager"} == 1)
|
||||
|
@ -1878,22 +2007,6 @@ spec:
|
|||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager
|
||||
annotations:
|
||||
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts
|
||||
from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
|
||||
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
|
||||
expr: |
|
||||
min without(alertmanager) (
|
||||
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
/
|
||||
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
)
|
||||
* 100
|
||||
> 3
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: PrometheusNotConnectedToAlertmanagers
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected
|
||||
|
@ -1932,7 +2045,15 @@ spec:
|
|||
samples.
|
||||
summary: Prometheus is not ingesting samples.
|
||||
expr: |
|
||||
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
|
||||
(
|
||||
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
|
||||
and
|
||||
(
|
||||
sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-k8s",namespace="monitoring"}) > 0
|
||||
or
|
||||
sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-k8s",namespace="monitoring"}) > 0
|
||||
)
|
||||
)
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
|
@ -1989,7 +2110,7 @@ spec:
|
|||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
(
|
||||
max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
- on(job, instance) group_right
|
||||
- ignoring(remote_name, url) group_right
|
||||
max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
)
|
||||
> 120
|
||||
|
@ -2036,37 +2157,32 @@ spec:
|
|||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- name: alertmanager.rules
|
||||
rules:
|
||||
- alert: AlertmanagerConfigInconsistent
|
||||
- alert: PrometheusTargetLimitHit
|
||||
annotations:
|
||||
message: |
|
||||
The configuration of the instances of the Alertmanager cluster `{{ $labels.namespace }}/{{ $labels.service }}` are out of sync.
|
||||
{{ range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query }}
|
||||
Configuration hash for pod {{ .Labels.pod }} is "{{ printf "%.f" .Value }}"
|
||||
{{ end }}
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped
|
||||
{{ printf "%.0f" $value }} targets because the number of targets exceeded
|
||||
the configured target_limit.
|
||||
summary: Prometheus has dropped targets because some scrape configs have exceeded
|
||||
the targets limit.
|
||||
expr: |
|
||||
count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"})) != 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: AlertmanagerFailedReload
|
||||
annotations:
|
||||
message: Reloading Alertmanager's configuration has failed for {{ $labels.namespace
|
||||
}}/{{ $labels.pod}}.
|
||||
expr: |
|
||||
alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"} == 0
|
||||
for: 10m
|
||||
increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: AlertmanagerMembersInconsistent
|
||||
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager
|
||||
annotations:
|
||||
message: Alertmanager has not found all other members of the cluster.
|
||||
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts
|
||||
from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
|
||||
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
|
||||
expr: |
|
||||
alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}
|
||||
!= on (service) GROUP_LEFT()
|
||||
count by (service) (alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"})
|
||||
for: 5m
|
||||
min without (alertmanager) (
|
||||
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
/
|
||||
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
)
|
||||
* 100
|
||||
> 3
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- name: general.rules
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
|
|||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.2.4
|
||||
controller-gen.kubebuilder.io/version: v0.4.1
|
||||
creationTimestamp: null
|
||||
name: alertmanagers.monitoring.coreos.com
|
||||
spec:
|
||||
|
@ -644,6 +644,96 @@ spec:
|
|||
type: array
|
||||
type: object
|
||||
type: object
|
||||
alertmanagerConfigNamespaceSelector:
|
||||
description: Namespaces to be selected for AlertmanagerConfig discovery.
|
||||
If nil, only check own namespace.
|
||||
properties:
|
||||
matchExpressions:
|
||||
description: matchExpressions is a list of label selector requirements.
|
||||
The requirements are ANDed.
|
||||
items:
|
||||
description: A label selector requirement is a selector that
|
||||
contains values, a key, and an operator that relates the key
|
||||
and values.
|
||||
properties:
|
||||
key:
|
||||
description: key is the label key that the selector applies
|
||||
to.
|
||||
type: string
|
||||
operator:
|
||||
description: operator represents a key's relationship to
|
||||
a set of values. Valid operators are In, NotIn, Exists
|
||||
and DoesNotExist.
|
||||
type: string
|
||||
values:
|
||||
description: values is an array of string values. If the
|
||||
operator is In or NotIn, the values array must be non-empty.
|
||||
If the operator is Exists or DoesNotExist, the values
|
||||
array must be empty. This array is replaced during a strategic
|
||||
merge patch.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
required:
|
||||
- key
|
||||
- operator
|
||||
type: object
|
||||
type: array
|
||||
matchLabels:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: matchLabels is a map of {key,value} pairs. A single
|
||||
{key,value} in the matchLabels map is equivalent to an element
|
||||
of matchExpressions, whose key field is "key", the operator
|
||||
is "In", and the values array contains only "value". The requirements
|
||||
are ANDed.
|
||||
type: object
|
||||
type: object
|
||||
alertmanagerConfigSelector:
|
||||
description: AlertmanagerConfigs to be selected for to merge and configure
|
||||
Alertmanager with.
|
||||
properties:
|
||||
matchExpressions:
|
||||
description: matchExpressions is a list of label selector requirements.
|
||||
The requirements are ANDed.
|
||||
items:
|
||||
description: A label selector requirement is a selector that
|
||||
contains values, a key, and an operator that relates the key
|
||||
and values.
|
||||
properties:
|
||||
key:
|
||||
description: key is the label key that the selector applies
|
||||
to.
|
||||
type: string
|
||||
operator:
|
||||
description: operator represents a key's relationship to
|
||||
a set of values. Valid operators are In, NotIn, Exists
|
||||
and DoesNotExist.
|
||||
type: string
|
||||
values:
|
||||
description: values is an array of string values. If the
|
||||
operator is In or NotIn, the values array must be non-empty.
|
||||
If the operator is Exists or DoesNotExist, the values
|
||||
array must be empty. This array is replaced during a strategic
|
||||
merge patch.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
required:
|
||||
- key
|
||||
- operator
|
||||
type: object
|
||||
type: array
|
||||
matchLabels:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: matchLabels is a map of {key,value} pairs. A single
|
||||
{key,value} in the matchLabels map is equivalent to an element
|
||||
of matchExpressions, whose key field is "key", the operator
|
||||
is "In", and the values array contains only "value". The requirements
|
||||
are ANDed.
|
||||
type: object
|
||||
type: object
|
||||
baseImage:
|
||||
description: 'Base image that is used to deploy pods, without tag.
|
||||
Deprecated: use ''image'' instead'
|
||||
|
@ -653,6 +743,15 @@ spec:
|
|||
in cluster. Needs to be provided for non RFC1918 [1] (public) addresses.
|
||||
[1] RFC1918: https://tools.ietf.org/html/rfc1918'
|
||||
type: string
|
||||
clusterGossipInterval:
|
||||
description: Interval between gossip attempts.
|
||||
type: string
|
||||
clusterPeerTimeout:
|
||||
description: Timeout for cluster peering.
|
||||
type: string
|
||||
clusterPushpullInterval:
|
||||
description: Interval between pushpull attempts.
|
||||
type: string
|
||||
configMaps:
|
||||
description: ConfigMaps is a list of ConfigMaps in the same namespace
|
||||
as the Alertmanager object, which shall be mounted into the Alertmanager
|
||||
|
@ -667,9 +766,14 @@ spec:
|
|||
The secret is mounted into /etc/alertmanager/config.
|
||||
type: string
|
||||
containers:
|
||||
description: Containers allows injecting additional containers. This
|
||||
description: 'Containers allows injecting additional containers. This
|
||||
is meant to allow adding an authentication proxy to an Alertmanager
|
||||
pod.
|
||||
pod. Containers described here modify an operator generated container
|
||||
if they share the same name and modifications are done via a strategic
|
||||
merge patch. The current container names are: `alertmanager` and
|
||||
`config-reloader`. Overriding containers is entirely outside the
|
||||
scope of what the maintainers will support and by doing so, you
|
||||
accept that this behaviour may break at any time without notice.'
|
||||
items:
|
||||
description: A single application container that you want to run
|
||||
within a pod.
|
||||
|
@ -771,9 +875,13 @@ spec:
|
|||
optional for env vars'
|
||||
type: string
|
||||
divisor:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Specifies the output format of the
|
||||
exposed resources, defaults to "1"
|
||||
type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
resource:
|
||||
description: 'Required: resource to select'
|
||||
type: string
|
||||
|
@ -1208,6 +1316,7 @@ spec:
|
|||
be referred to by services.
|
||||
type: string
|
||||
protocol:
|
||||
default: TCP
|
||||
description: Protocol for port. Must be UDP, TCP, or SCTP.
|
||||
Defaults to "TCP".
|
||||
type: string
|
||||
|
@ -1215,6 +1324,10 @@ spec:
|
|||
- containerPort
|
||||
type: object
|
||||
type: array
|
||||
x-kubernetes-list-map-keys:
|
||||
- containerPort
|
||||
- protocol
|
||||
x-kubernetes-list-type: map
|
||||
readinessProbe:
|
||||
description: 'Periodic probe of container service readiness.
|
||||
Container will be removed from service endpoints if the probe
|
||||
|
@ -1338,13 +1451,21 @@ spec:
|
|||
properties:
|
||||
limits:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Limits describes the maximum amount of compute
|
||||
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
|
||||
type: object
|
||||
requests:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Requests describes the minimum amount of compute
|
||||
resources required. If Requests is omitted for a container,
|
||||
it defaults to Limits if that is explicitly specified,
|
||||
|
@ -1854,9 +1975,13 @@ spec:
|
|||
optional for env vars'
|
||||
type: string
|
||||
divisor:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Specifies the output format of the
|
||||
exposed resources, defaults to "1"
|
||||
type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
resource:
|
||||
description: 'Required: resource to select'
|
||||
type: string
|
||||
|
@ -2291,6 +2416,7 @@ spec:
|
|||
be referred to by services.
|
||||
type: string
|
||||
protocol:
|
||||
default: TCP
|
||||
description: Protocol for port. Must be UDP, TCP, or SCTP.
|
||||
Defaults to "TCP".
|
||||
type: string
|
||||
|
@ -2298,6 +2424,10 @@ spec:
|
|||
- containerPort
|
||||
type: object
|
||||
type: array
|
||||
x-kubernetes-list-map-keys:
|
||||
- containerPort
|
||||
- protocol
|
||||
x-kubernetes-list-type: map
|
||||
readinessProbe:
|
||||
description: 'Periodic probe of container service readiness.
|
||||
Container will be removed from service endpoints if the probe
|
||||
|
@ -2421,13 +2551,21 @@ spec:
|
|||
properties:
|
||||
limits:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Limits describes the maximum amount of compute
|
||||
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
|
||||
type: object
|
||||
requests:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Requests describes the minimum amount of compute
|
||||
resources required. If Requests is omitted for a container,
|
||||
it defaults to Limits if that is explicitly specified,
|
||||
|
@ -2812,7 +2950,7 @@ spec:
|
|||
description: Define which Nodes the Pods are scheduled on.
|
||||
type: object
|
||||
paused:
|
||||
description: If set to true all actions on the underlaying managed
|
||||
description: If set to true all actions on the underlying managed
|
||||
objects are not goint to be performed, except for delete actions.
|
||||
type: boolean
|
||||
podMetadata:
|
||||
|
@ -2861,13 +2999,21 @@ spec:
|
|||
properties:
|
||||
limits:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Limits describes the maximum amount of compute resources
|
||||
allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
|
||||
type: object
|
||||
requests:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Requests describes the minimum amount of compute
|
||||
resources required. If Requests is omitted for a container,
|
||||
it defaults to Limits if that is explicitly specified, otherwise
|
||||
|
@ -3048,6 +3194,9 @@ spec:
|
|||
More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir'
|
||||
type: string
|
||||
sizeLimit:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: 'Total amount of local storage required for this
|
||||
EmptyDir volume. The size limit is also applicable for memory
|
||||
medium. The maximum usage on memory medium EmptyDir would
|
||||
|
@ -3055,7 +3204,8 @@ spec:
|
|||
and the sum of memory limits of all containers in a pod.
|
||||
The default is nil which means that the limit is undefined.
|
||||
More info: http://kubernetes.io/docs/user-guide/volumes#emptydir'
|
||||
type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
type: object
|
||||
volumeClaimTemplate:
|
||||
description: A PVC spec to be used by the Prometheus StatefulSets.
|
||||
|
@ -3151,13 +3301,21 @@ spec:
|
|||
properties:
|
||||
limits:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Limits describes the maximum amount
|
||||
of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
|
||||
type: object
|
||||
requests:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Requests describes the minimum amount
|
||||
of compute resources required. If Requests is omitted
|
||||
for a container, it defaults to Limits if that is
|
||||
|
@ -3237,7 +3395,11 @@ spec:
|
|||
type: array
|
||||
capacity:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: Represents the actual resources of the underlying
|
||||
volume.
|
||||
type: object
|
||||
|
@ -3332,6 +3494,100 @@ spec:
|
|||
type: string
|
||||
type: object
|
||||
type: array
|
||||
topologySpreadConstraints:
|
||||
description: If specified, the pod's topology spread constraints.
|
||||
items:
|
||||
description: TopologySpreadConstraint specifies how to spread matching
|
||||
pods among the given topology.
|
||||
properties:
|
||||
labelSelector:
|
||||
description: LabelSelector is used to find matching pods. Pods
|
||||
that match this label selector are counted to determine the
|
||||
number of pods in their corresponding topology domain.
|
||||
properties:
|
||||
matchExpressions:
|
||||
description: matchExpressions is a list of label selector
|
||||
requirements. The requirements are ANDed.
|
||||
items:
|
||||
description: A label selector requirement is a selector
|
||||
that contains values, a key, and an operator that relates
|
||||
the key and values.
|
||||
properties:
|
||||
key:
|
||||
description: key is the label key that the selector
|
||||
applies to.
|
||||
type: string
|
||||
operator:
|
||||
description: operator represents a key's relationship
|
||||
to a set of values. Valid operators are In, NotIn,
|
||||
Exists and DoesNotExist.
|
||||
type: string
|
||||
values:
|
||||
description: values is an array of string values.
|
||||
If the operator is In or NotIn, the values array
|
||||
must be non-empty. If the operator is Exists or
|
||||
DoesNotExist, the values array must be empty. This
|
||||
array is replaced during a strategic merge patch.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
required:
|
||||
- key
|
||||
- operator
|
||||
type: object
|
||||
type: array
|
||||
matchLabels:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: matchLabels is a map of {key,value} pairs.
|
||||
A single {key,value} in the matchLabels map is equivalent
|
||||
to an element of matchExpressions, whose key field is
|
||||
"key", the operator is "In", and the values array contains
|
||||
only "value". The requirements are ANDed.
|
||||
type: object
|
||||
type: object
|
||||
maxSkew:
|
||||
description: 'MaxSkew describes the degree to which pods may
|
||||
be unevenly distributed. It''s the maximum permitted difference
|
||||
between the number of matching pods in any two topology domains
|
||||
of a given topology type. For example, in a 3-zone cluster,
|
||||
MaxSkew is set to 1, and pods with the same labelSelector
|
||||
spread as 1/1/0: | zone1 | zone2 | zone3 | | P | P | |
|
||||
- if MaxSkew is 1, incoming pod can only be scheduled to zone3
|
||||
to become 1/1/1; scheduling it onto zone1(zone2) would make
|
||||
the ActualSkew(2-0) on zone1(zone2) violate MaxSkew(1). -
|
||||
if MaxSkew is 2, incoming pod can be scheduled onto any zone.
|
||||
It''s a required field. Default value is 1 and 0 is not allowed.'
|
||||
format: int32
|
||||
type: integer
|
||||
topologyKey:
|
||||
description: TopologyKey is the key of node labels. Nodes that
|
||||
have a label with this key and identical values are considered
|
||||
to be in the same topology. We consider each <key, value>
|
||||
as a "bucket", and try to put balanced number of pods into
|
||||
each bucket. It's a required field.
|
||||
type: string
|
||||
whenUnsatisfiable:
|
||||
description: 'WhenUnsatisfiable indicates how to deal with a
|
||||
pod if it doesn''t satisfy the spread constraint. - DoNotSchedule
|
||||
(default) tells the scheduler not to schedule it - ScheduleAnyway
|
||||
tells the scheduler to still schedule it It''s considered
|
||||
as "Unsatisfiable" if and only if placing incoming pod on
|
||||
any topology violates "MaxSkew". For example, in a 3-zone
|
||||
cluster, MaxSkew is set to 1, and pods with the same labelSelector
|
||||
spread as 3/1/1: | zone1 | zone2 | zone3 | | P P P | P | P |
|
||||
If WhenUnsatisfiable is set to DoNotSchedule, incoming pod
|
||||
can only be scheduled to zone2(zone3) to become 3/2/1(3/1/2)
|
||||
as ActualSkew(2-1) on zone2(zone3) satisfies MaxSkew(1). In
|
||||
other words, the cluster can still be imbalanced, but scheduler
|
||||
won''t make it *more* imbalanced. It''s a required field.'
|
||||
type: string
|
||||
required:
|
||||
- maxSkew
|
||||
- topologyKey
|
||||
- whenUnsatisfiable
|
||||
type: object
|
||||
type: array
|
||||
version:
|
||||
description: Version the cluster should be on.
|
||||
type: string
|
||||
|
@ -3704,9 +3960,13 @@ spec:
|
|||
optional for env vars'
|
||||
type: string
|
||||
divisor:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Specifies the output format of the
|
||||
exposed resources, defaults to "1"
|
||||
type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
resource:
|
||||
description: 'Required: resource to select'
|
||||
type: string
|
||||
|
@ -3729,6 +3989,9 @@ spec:
|
|||
More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir'
|
||||
type: string
|
||||
sizeLimit:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: 'Total amount of local storage required for
|
||||
this EmptyDir volume. The size limit is also applicable
|
||||
for memory medium. The maximum usage on memory medium
|
||||
|
@ -3736,7 +3999,8 @@ spec:
|
|||
specified here and the sum of memory limits of all containers
|
||||
in a pod. The default is nil which means that the limit
|
||||
is undefined. More info: http://kubernetes.io/docs/user-guide/volumes#emptydir'
|
||||
type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
type: object
|
||||
fc:
|
||||
description: FC represents a Fibre Channel resource that is
|
||||
|
@ -4199,10 +4463,14 @@ spec:
|
|||
for volumes, optional for env vars'
|
||||
type: string
|
||||
divisor:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Specifies the output format
|
||||
of the exposed resources, defaults
|
||||
to "1"
|
||||
type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
resource:
|
||||
description: 'Required: resource to
|
||||
select'
|
||||
|
@ -4587,7 +4855,7 @@ spec:
|
|||
format: int32
|
||||
type: integer
|
||||
paused:
|
||||
description: Represents whether any actions on the underlaying managed
|
||||
description: Represents whether any actions on the underlying managed
|
||||
objects are being performed. Only delete actions will be performed.
|
||||
type: boolean
|
||||
replicas:
|
||||
|
|
|
@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
|
|||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.2.4
|
||||
controller-gen.kubebuilder.io/version: v0.4.1
|
||||
creationTimestamp: null
|
||||
name: podmonitors.monitoring.coreos.com
|
||||
spec:
|
||||
|
@ -58,6 +58,69 @@ spec:
|
|||
description: PodMetricsEndpoint defines a scrapeable endpoint of
|
||||
a Kubernetes Pod serving Prometheus metrics.
|
||||
properties:
|
||||
basicAuth:
|
||||
description: 'BasicAuth allow an endpoint to authenticate over
|
||||
basic authentication. More info: https://prometheus.io/docs/operating/configuration/#endpoint'
|
||||
properties:
|
||||
password:
|
||||
description: The secret in the service monitor namespace
|
||||
that contains the password for authentication.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must
|
||||
be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
username:
|
||||
description: The secret in the service monitor namespace
|
||||
that contains the username for authentication.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must
|
||||
be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
type: object
|
||||
bearerTokenSecret:
|
||||
description: Secret to mount to read bearer token for scraping
|
||||
targets. The secret needs to be in the same namespace as the
|
||||
pod monitor and accessible by the Prometheus Operator.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must
|
||||
be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
honorLabels:
|
||||
description: HonorLabels chooses the metric's labels on collisions
|
||||
with target labels.
|
||||
|
@ -191,6 +254,121 @@ spec:
|
|||
- type: string
|
||||
description: 'Deprecated: Use ''port'' instead.'
|
||||
x-kubernetes-int-or-string: true
|
||||
tlsConfig:
|
||||
description: TLS configuration to use when scraping the endpoint.
|
||||
properties:
|
||||
ca:
|
||||
description: Struct containing the CA cert to use for the
|
||||
targets.
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use for the
|
||||
targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key to select.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind,
|
||||
uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the ConfigMap or its
|
||||
key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
secret:
|
||||
description: Secret containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind,
|
||||
uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key
|
||||
must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
type: object
|
||||
cert:
|
||||
description: Struct containing the client cert file for
|
||||
the targets.
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use for the
|
||||
targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key to select.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind,
|
||||
uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the ConfigMap or its
|
||||
key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
secret:
|
||||
description: Secret containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind,
|
||||
uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key
|
||||
must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
type: object
|
||||
insecureSkipVerify:
|
||||
description: Disable target certificate validation.
|
||||
type: boolean
|
||||
keySecret:
|
||||
description: Secret containing the client key file for the
|
||||
targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must
|
||||
be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
serverName:
|
||||
description: Used to verify the hostname for the targets.
|
||||
type: string
|
||||
type: object
|
||||
type: object
|
||||
type: array
|
||||
podTargetLabels:
|
||||
|
@ -248,6 +426,11 @@ spec:
|
|||
are ANDed.
|
||||
type: object
|
||||
type: object
|
||||
targetLimit:
|
||||
description: TargetLimit defines a limit on the number of scraped
|
||||
targets that will be accepted.
|
||||
format: int64
|
||||
type: integer
|
||||
required:
|
||||
- podMetricsEndpoints
|
||||
- selector
|
||||
|
|
|
@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
|
|||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.2.4
|
||||
controller-gen.kubebuilder.io/version: v0.4.1
|
||||
creationTimestamp: null
|
||||
name: probes.monitoring.coreos.com
|
||||
spec:
|
||||
|
|
|
@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
|
|||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.2.4
|
||||
controller-gen.kubebuilder.io/version: v0.4.1
|
||||
creationTimestamp: null
|
||||
name: prometheuses.monitoring.coreos.com
|
||||
spec:
|
||||
|
@ -765,7 +765,7 @@ spec:
|
|||
description: TLS Config to use for alertmanager connection.
|
||||
properties:
|
||||
ca:
|
||||
description: Stuct containing the CA cert to use for
|
||||
description: Struct containing the CA cert to use for
|
||||
the targets.
|
||||
properties:
|
||||
configMap:
|
||||
|
@ -972,7 +972,8 @@ spec:
|
|||
description: TLS Config to use for accessing apiserver.
|
||||
properties:
|
||||
ca:
|
||||
description: Stuct containing the CA cert to use for the targets.
|
||||
description: Struct containing the CA cert to use for the
|
||||
targets.
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use for the
|
||||
|
@ -1123,11 +1124,10 @@ spec:
|
|||
the behavior of an operator generated container. Containers described
|
||||
here modify an operator generated container if they share the same
|
||||
name and modifications are done via a strategic merge patch. The
|
||||
current container names are: `prometheus`, `prometheus-config-reloader`,
|
||||
`rules-configmap-reloader`, and `thanos-sidecar`. Overriding containers
|
||||
is entirely outside the scope of what the maintainers will support
|
||||
and by doing so, you accept that this behaviour may break at any
|
||||
time without notice.'
|
||||
current container names are: `prometheus`, `config-reloader`, and
|
||||
`thanos-sidecar`. Overriding containers is entirely outside the
|
||||
scope of what the maintainers will support and by doing so, you
|
||||
accept that this behaviour may break at any time without notice.'
|
||||
items:
|
||||
description: A single application container that you want to run
|
||||
within a pod.
|
||||
|
@ -1229,9 +1229,13 @@ spec:
|
|||
optional for env vars'
|
||||
type: string
|
||||
divisor:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Specifies the output format of the
|
||||
exposed resources, defaults to "1"
|
||||
type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
resource:
|
||||
description: 'Required: resource to select'
|
||||
type: string
|
||||
|
@ -1666,6 +1670,7 @@ spec:
|
|||
be referred to by services.
|
||||
type: string
|
||||
protocol:
|
||||
default: TCP
|
||||
description: Protocol for port. Must be UDP, TCP, or SCTP.
|
||||
Defaults to "TCP".
|
||||
type: string
|
||||
|
@ -1673,6 +1678,10 @@ spec:
|
|||
- containerPort
|
||||
type: object
|
||||
type: array
|
||||
x-kubernetes-list-map-keys:
|
||||
- containerPort
|
||||
- protocol
|
||||
x-kubernetes-list-type: map
|
||||
readinessProbe:
|
||||
description: 'Periodic probe of container service readiness.
|
||||
Container will be removed from service endpoints if the probe
|
||||
|
@ -1796,13 +1805,21 @@ spec:
|
|||
properties:
|
||||
limits:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Limits describes the maximum amount of compute
|
||||
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
|
||||
type: object
|
||||
requests:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Requests describes the minimum amount of compute
|
||||
resources required. If Requests is omitted for a container,
|
||||
it defaults to Limits if that is explicitly specified,
|
||||
|
@ -2196,6 +2213,15 @@ spec:
|
|||
value will be taken instead.
|
||||
format: int64
|
||||
type: integer
|
||||
enforcedTargetLimit:
|
||||
description: EnforcedTargetLimit defines a global limit on the number
|
||||
of scraped targets. This overrides any TargetLimit set per ServiceMonitor
|
||||
or/and PodMonitor. It is meant to be used by admins to enforce the
|
||||
TargetLimit to keep overall number of targets under the desired
|
||||
limit. Note that if TargetLimit is higher that value will be taken
|
||||
instead.
|
||||
format: int64
|
||||
type: integer
|
||||
evaluationInterval:
|
||||
description: Interval between consecutive evaluations.
|
||||
type: string
|
||||
|
@ -2347,9 +2373,13 @@ spec:
|
|||
optional for env vars'
|
||||
type: string
|
||||
divisor:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Specifies the output format of the
|
||||
exposed resources, defaults to "1"
|
||||
type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
resource:
|
||||
description: 'Required: resource to select'
|
||||
type: string
|
||||
|
@ -2784,6 +2814,7 @@ spec:
|
|||
be referred to by services.
|
||||
type: string
|
||||
protocol:
|
||||
default: TCP
|
||||
description: Protocol for port. Must be UDP, TCP, or SCTP.
|
||||
Defaults to "TCP".
|
||||
type: string
|
||||
|
@ -2791,6 +2822,10 @@ spec:
|
|||
- containerPort
|
||||
type: object
|
||||
type: array
|
||||
x-kubernetes-list-map-keys:
|
||||
- containerPort
|
||||
- protocol
|
||||
x-kubernetes-list-type: map
|
||||
readinessProbe:
|
||||
description: 'Periodic probe of container service readiness.
|
||||
Container will be removed from service endpoints if the probe
|
||||
|
@ -2914,13 +2949,21 @@ spec:
|
|||
properties:
|
||||
limits:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Limits describes the maximum amount of compute
|
||||
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
|
||||
type: object
|
||||
requests:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Requests describes the minimum amount of compute
|
||||
resources required. If Requests is omitted for a container,
|
||||
it defaults to Limits if that is explicitly specified,
|
||||
|
@ -3670,7 +3713,7 @@ spec:
|
|||
description: TLS Config to use for remote read.
|
||||
properties:
|
||||
ca:
|
||||
description: Stuct containing the CA cert to use for the
|
||||
description: Struct containing the CA cert to use for the
|
||||
targets.
|
||||
properties:
|
||||
configMap:
|
||||
|
@ -3907,7 +3950,7 @@ spec:
|
|||
description: TLS Config to use for remote write.
|
||||
properties:
|
||||
ca:
|
||||
description: Stuct containing the CA cert to use for the
|
||||
description: Struct containing the CA cert to use for the
|
||||
targets.
|
||||
properties:
|
||||
configMap:
|
||||
|
@ -4088,7 +4131,9 @@ spec:
|
|||
will _not_ be added when value is set to empty string (`""`).
|
||||
type: string
|
||||
replicas:
|
||||
description: Number of instances to deploy for a Prometheus deployment.
|
||||
description: Number of replicas of each shard to deploy for a Prometheus
|
||||
deployment. Number of replicas multiplied by shards is the total
|
||||
number of Pods created.
|
||||
format: int32
|
||||
type: integer
|
||||
resources:
|
||||
|
@ -4096,13 +4141,21 @@ spec:
|
|||
properties:
|
||||
limits:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Limits describes the maximum amount of compute resources
|
||||
allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
|
||||
type: object
|
||||
requests:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Requests describes the minimum amount of compute
|
||||
resources required. If Requests is omitted for a container,
|
||||
it defaults to Limits if that is explicitly specified, otherwise
|
||||
|
@ -4479,6 +4532,17 @@ spec:
|
|||
if SHA is set. Deprecated: use ''image'' instead. The image digest
|
||||
can be specified as part of the image URL.'
|
||||
type: string
|
||||
shards:
|
||||
description: 'EXPERIMENTAL: Number of shards to distribute targets
|
||||
onto. Number of replicas multiplied by shards is the total number
|
||||
of Pods created. Note that scaling down shards will not reshard
|
||||
data onto remaining instances, it must be manually moved. Increasing
|
||||
shards will not reshard data either but it will continue to be available
|
||||
from the same instances. To query globally use Thanos sidecar and
|
||||
Thanos querier or remote write data to a central location. Sharding
|
||||
is done on the content of the `__address__` target meta-label.'
|
||||
format: int32
|
||||
type: integer
|
||||
storage:
|
||||
description: Storage spec to specify how storage shall be used.
|
||||
properties:
|
||||
|
@ -4499,6 +4563,9 @@ spec:
|
|||
More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir'
|
||||
type: string
|
||||
sizeLimit:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: 'Total amount of local storage required for this
|
||||
EmptyDir volume. The size limit is also applicable for memory
|
||||
medium. The maximum usage on memory medium EmptyDir would
|
||||
|
@ -4506,7 +4573,8 @@ spec:
|
|||
and the sum of memory limits of all containers in a pod.
|
||||
The default is nil which means that the limit is undefined.
|
||||
More info: http://kubernetes.io/docs/user-guide/volumes#emptydir'
|
||||
type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
type: object
|
||||
volumeClaimTemplate:
|
||||
description: A PVC spec to be used by the Prometheus StatefulSets.
|
||||
|
@ -4602,13 +4670,21 @@ spec:
|
|||
properties:
|
||||
limits:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Limits describes the maximum amount
|
||||
of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
|
||||
type: object
|
||||
requests:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Requests describes the minimum amount
|
||||
of compute resources required. If Requests is omitted
|
||||
for a container, it defaults to Limits if that is
|
||||
|
@ -4688,7 +4764,11 @@ spec:
|
|||
type: array
|
||||
capacity:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: Represents the actual resources of the underlying
|
||||
volume.
|
||||
type: object
|
||||
|
@ -4761,7 +4841,8 @@ spec:
|
|||
Maps to the ''--grpc-server-tls-*'' CLI args.'
|
||||
properties:
|
||||
ca:
|
||||
description: Stuct containing the CA cert to use for the targets.
|
||||
description: Struct containing the CA cert to use for the
|
||||
targets.
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use for the
|
||||
|
@ -4907,7 +4988,8 @@ spec:
|
|||
type: string
|
||||
objectStorageConfig:
|
||||
description: ObjectStorageConfig configures object storage in
|
||||
Thanos.
|
||||
Thanos. Alternative to ObjectStorageConfigFile, and lower order
|
||||
priority.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be
|
||||
|
@ -4924,6 +5006,11 @@ spec:
|
|||
required:
|
||||
- key
|
||||
type: object
|
||||
objectStorageConfigFile:
|
||||
description: ObjectStorageConfigFile specifies the path of the
|
||||
object storage configuration file. When used alongside with
|
||||
ObjectStorageConfig, ObjectStorageConfigFile takes precedence.
|
||||
type: string
|
||||
resources:
|
||||
description: Resources defines the resource requirements for the
|
||||
Thanos sidecar. If not provided, no requests/limits will be
|
||||
|
@ -4931,13 +5018,21 @@ spec:
|
|||
properties:
|
||||
limits:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Limits describes the maximum amount of compute
|
||||
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
|
||||
type: object
|
||||
requests:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Requests describes the minimum amount of compute
|
||||
resources required. If Requests is omitted for a container,
|
||||
it defaults to Limits if that is explicitly specified, otherwise
|
||||
|
@ -5021,6 +5116,100 @@ spec:
|
|||
type: string
|
||||
type: object
|
||||
type: array
|
||||
topologySpreadConstraints:
|
||||
description: If specified, the pod's topology spread constraints.
|
||||
items:
|
||||
description: TopologySpreadConstraint specifies how to spread matching
|
||||
pods among the given topology.
|
||||
properties:
|
||||
labelSelector:
|
||||
description: LabelSelector is used to find matching pods. Pods
|
||||
that match this label selector are counted to determine the
|
||||
number of pods in their corresponding topology domain.
|
||||
properties:
|
||||
matchExpressions:
|
||||
description: matchExpressions is a list of label selector
|
||||
requirements. The requirements are ANDed.
|
||||
items:
|
||||
description: A label selector requirement is a selector
|
||||
that contains values, a key, and an operator that relates
|
||||
the key and values.
|
||||
properties:
|
||||
key:
|
||||
description: key is the label key that the selector
|
||||
applies to.
|
||||
type: string
|
||||
operator:
|
||||
description: operator represents a key's relationship
|
||||
to a set of values. Valid operators are In, NotIn,
|
||||
Exists and DoesNotExist.
|
||||
type: string
|
||||
values:
|
||||
description: values is an array of string values.
|
||||
If the operator is In or NotIn, the values array
|
||||
must be non-empty. If the operator is Exists or
|
||||
DoesNotExist, the values array must be empty. This
|
||||
array is replaced during a strategic merge patch.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
required:
|
||||
- key
|
||||
- operator
|
||||
type: object
|
||||
type: array
|
||||
matchLabels:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: matchLabels is a map of {key,value} pairs.
|
||||
A single {key,value} in the matchLabels map is equivalent
|
||||
to an element of matchExpressions, whose key field is
|
||||
"key", the operator is "In", and the values array contains
|
||||
only "value". The requirements are ANDed.
|
||||
type: object
|
||||
type: object
|
||||
maxSkew:
|
||||
description: 'MaxSkew describes the degree to which pods may
|
||||
be unevenly distributed. It''s the maximum permitted difference
|
||||
between the number of matching pods in any two topology domains
|
||||
of a given topology type. For example, in a 3-zone cluster,
|
||||
MaxSkew is set to 1, and pods with the same labelSelector
|
||||
spread as 1/1/0: | zone1 | zone2 | zone3 | | P | P | |
|
||||
- if MaxSkew is 1, incoming pod can only be scheduled to zone3
|
||||
to become 1/1/1; scheduling it onto zone1(zone2) would make
|
||||
the ActualSkew(2-0) on zone1(zone2) violate MaxSkew(1). -
|
||||
if MaxSkew is 2, incoming pod can be scheduled onto any zone.
|
||||
It''s a required field. Default value is 1 and 0 is not allowed.'
|
||||
format: int32
|
||||
type: integer
|
||||
topologyKey:
|
||||
description: TopologyKey is the key of node labels. Nodes that
|
||||
have a label with this key and identical values are considered
|
||||
to be in the same topology. We consider each <key, value>
|
||||
as a "bucket", and try to put balanced number of pods into
|
||||
each bucket. It's a required field.
|
||||
type: string
|
||||
whenUnsatisfiable:
|
||||
description: 'WhenUnsatisfiable indicates how to deal with a
|
||||
pod if it doesn''t satisfy the spread constraint. - DoNotSchedule
|
||||
(default) tells the scheduler not to schedule it - ScheduleAnyway
|
||||
tells the scheduler to still schedule it It''s considered
|
||||
as "Unsatisfiable" if and only if placing incoming pod on
|
||||
any topology violates "MaxSkew". For example, in a 3-zone
|
||||
cluster, MaxSkew is set to 1, and pods with the same labelSelector
|
||||
spread as 3/1/1: | zone1 | zone2 | zone3 | | P P P | P | P |
|
||||
If WhenUnsatisfiable is set to DoNotSchedule, incoming pod
|
||||
can only be scheduled to zone2(zone3) to become 3/2/1(3/1/2)
|
||||
as ActualSkew(2-1) on zone2(zone3) satisfies MaxSkew(1). In
|
||||
other words, the cluster can still be imbalanced, but scheduler
|
||||
won''t make it *more* imbalanced. It''s a required field.'
|
||||
type: string
|
||||
required:
|
||||
- maxSkew
|
||||
- topologyKey
|
||||
- whenUnsatisfiable
|
||||
type: object
|
||||
type: array
|
||||
version:
|
||||
description: Version of Prometheus to be deployed.
|
||||
type: string
|
||||
|
@ -5393,9 +5582,13 @@ spec:
|
|||
optional for env vars'
|
||||
type: string
|
||||
divisor:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Specifies the output format of the
|
||||
exposed resources, defaults to "1"
|
||||
type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
resource:
|
||||
description: 'Required: resource to select'
|
||||
type: string
|
||||
|
@ -5418,6 +5611,9 @@ spec:
|
|||
More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir'
|
||||
type: string
|
||||
sizeLimit:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: 'Total amount of local storage required for
|
||||
this EmptyDir volume. The size limit is also applicable
|
||||
for memory medium. The maximum usage on memory medium
|
||||
|
@ -5425,7 +5621,8 @@ spec:
|
|||
specified here and the sum of memory limits of all containers
|
||||
in a pod. The default is nil which means that the limit
|
||||
is undefined. More info: http://kubernetes.io/docs/user-guide/volumes#emptydir'
|
||||
type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
type: object
|
||||
fc:
|
||||
description: FC represents a Fibre Channel resource that is
|
||||
|
@ -5888,10 +6085,14 @@ spec:
|
|||
for volumes, optional for env vars'
|
||||
type: string
|
||||
divisor:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Specifies the output format
|
||||
of the exposed resources, defaults
|
||||
to "1"
|
||||
type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
resource:
|
||||
description: 'Required: resource to
|
||||
select'
|
||||
|
@ -6268,6 +6469,14 @@ spec:
|
|||
description: Enable compression of the write-ahead log using Snappy.
|
||||
This flag is only available in versions of Prometheus >= 2.11.0.
|
||||
type: boolean
|
||||
web:
|
||||
description: WebSpec defines the web command line flags when starting
|
||||
Prometheus.
|
||||
properties:
|
||||
pageTitle:
|
||||
description: The prometheus web page title
|
||||
type: string
|
||||
type: object
|
||||
type: object
|
||||
status:
|
||||
description: 'Most recent observed status of the Prometheus cluster. Read-only.
|
||||
|
@ -6280,7 +6489,7 @@ spec:
|
|||
format: int32
|
||||
type: integer
|
||||
paused:
|
||||
description: Represents whether any actions on the underlaying managed
|
||||
description: Represents whether any actions on the underlying managed
|
||||
objects are being performed. Only delete actions will be performed.
|
||||
type: boolean
|
||||
replicas:
|
||||
|
|
|
@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
|
|||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.2.4
|
||||
controller-gen.kubebuilder.io/version: v0.4.1
|
||||
creationTimestamp: null
|
||||
name: prometheusrules.monitoring.coreos.com
|
||||
spec:
|
||||
|
@ -17,7 +17,8 @@ spec:
|
|||
- name: v1
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
description: PrometheusRule defines alerting rules for a Prometheus instance
|
||||
description: PrometheusRule defines recording and alerting rules for a Prometheus
|
||||
instance
|
||||
properties:
|
||||
apiVersion:
|
||||
description: 'APIVersion defines the versioned schema of this representation
|
||||
|
|
|
@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
|
|||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.2.4
|
||||
controller-gen.kubebuilder.io/version: v0.4.1
|
||||
creationTimestamp: null
|
||||
name: servicemonitors.monitoring.coreos.com
|
||||
spec:
|
||||
|
@ -246,7 +246,7 @@ spec:
|
|||
description: TLS configuration to use when scraping the endpoint
|
||||
properties:
|
||||
ca:
|
||||
description: Stuct containing the CA cert to use for the
|
||||
description: Struct containing the CA cert to use for the
|
||||
targets.
|
||||
properties:
|
||||
configMap:
|
||||
|
@ -449,6 +449,11 @@ spec:
|
|||
items:
|
||||
type: string
|
||||
type: array
|
||||
targetLimit:
|
||||
description: TargetLimit defines a limit on the number of scraped
|
||||
targets that will be accepted.
|
||||
format: int64
|
||||
type: integer
|
||||
required:
|
||||
- endpoints
|
||||
- selector
|
||||
|
|
|
@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
|
|||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.2.4
|
||||
controller-gen.kubebuilder.io/version: v0.4.1
|
||||
creationTimestamp: null
|
||||
name: thanosrulers.monitoring.coreos.com
|
||||
spec:
|
||||
|
@ -672,7 +672,7 @@ spec:
|
|||
the behavior of an operator generated container. Containers described
|
||||
here modify an operator generated container if they share the same
|
||||
name and modifications are done via a strategic merge patch. The
|
||||
current container names are: `thanos-ruler` and `rules-configmap-reloader`.
|
||||
current container names are: `thanos-ruler` and `config-reloader`.
|
||||
Overriding containers is entirely outside the scope of what the
|
||||
maintainers will support and by doing so, you accept that this behaviour
|
||||
may break at any time without notice.'
|
||||
|
@ -777,9 +777,13 @@ spec:
|
|||
optional for env vars'
|
||||
type: string
|
||||
divisor:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Specifies the output format of the
|
||||
exposed resources, defaults to "1"
|
||||
type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
resource:
|
||||
description: 'Required: resource to select'
|
||||
type: string
|
||||
|
@ -1214,6 +1218,7 @@ spec:
|
|||
be referred to by services.
|
||||
type: string
|
||||
protocol:
|
||||
default: TCP
|
||||
description: Protocol for port. Must be UDP, TCP, or SCTP.
|
||||
Defaults to "TCP".
|
||||
type: string
|
||||
|
@ -1221,6 +1226,10 @@ spec:
|
|||
- containerPort
|
||||
type: object
|
||||
type: array
|
||||
x-kubernetes-list-map-keys:
|
||||
- containerPort
|
||||
- protocol
|
||||
x-kubernetes-list-type: map
|
||||
readinessProbe:
|
||||
description: 'Periodic probe of container service readiness.
|
||||
Container will be removed from service endpoints if the probe
|
||||
|
@ -1344,13 +1353,21 @@ spec:
|
|||
properties:
|
||||
limits:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Limits describes the maximum amount of compute
|
||||
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
|
||||
type: object
|
||||
requests:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Requests describes the minimum amount of compute
|
||||
resources required. If Requests is omitted for a container,
|
||||
it defaults to Limits if that is explicitly specified,
|
||||
|
@ -1738,7 +1755,7 @@ spec:
|
|||
the ''--grpc-server-tls-*'' CLI args.'
|
||||
properties:
|
||||
ca:
|
||||
description: Stuct containing the CA cert to use for the targets.
|
||||
description: Struct containing the CA cert to use for the targets.
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use for the targets.
|
||||
|
@ -1979,9 +1996,13 @@ spec:
|
|||
optional for env vars'
|
||||
type: string
|
||||
divisor:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Specifies the output format of the
|
||||
exposed resources, defaults to "1"
|
||||
type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
resource:
|
||||
description: 'Required: resource to select'
|
||||
type: string
|
||||
|
@ -2416,6 +2437,7 @@ spec:
|
|||
be referred to by services.
|
||||
type: string
|
||||
protocol:
|
||||
default: TCP
|
||||
description: Protocol for port. Must be UDP, TCP, or SCTP.
|
||||
Defaults to "TCP".
|
||||
type: string
|
||||
|
@ -2423,6 +2445,10 @@ spec:
|
|||
- containerPort
|
||||
type: object
|
||||
type: array
|
||||
x-kubernetes-list-map-keys:
|
||||
- containerPort
|
||||
- protocol
|
||||
x-kubernetes-list-type: map
|
||||
readinessProbe:
|
||||
description: 'Periodic probe of container service readiness.
|
||||
Container will be removed from service endpoints if the probe
|
||||
|
@ -2546,13 +2572,21 @@ spec:
|
|||
properties:
|
||||
limits:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Limits describes the maximum amount of compute
|
||||
resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
|
||||
type: object
|
||||
requests:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Requests describes the minimum amount of compute
|
||||
resources required. If Requests is omitted for a container,
|
||||
it defaults to Limits if that is explicitly specified,
|
||||
|
@ -2944,6 +2978,7 @@ spec:
|
|||
type: object
|
||||
objectStorageConfig:
|
||||
description: ObjectStorageConfig configures object storage in Thanos.
|
||||
Alternative to ObjectStorageConfigFile, and lower order priority.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a
|
||||
|
@ -2959,6 +2994,11 @@ spec:
|
|||
required:
|
||||
- key
|
||||
type: object
|
||||
objectStorageConfigFile:
|
||||
description: ObjectStorageConfigFile specifies the path of the object
|
||||
storage configuration file. When used alongside with ObjectStorageConfig,
|
||||
ObjectStorageConfigFile takes precedence.
|
||||
type: string
|
||||
paused:
|
||||
description: When a ThanosRuler deployment is paused, no actions except
|
||||
for deletion will be performed on the underlying objects.
|
||||
|
@ -3055,13 +3095,21 @@ spec:
|
|||
properties:
|
||||
limits:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Limits describes the maximum amount of compute resources
|
||||
allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
|
||||
type: object
|
||||
requests:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Requests describes the minimum amount of compute
|
||||
resources required. If Requests is omitted for a container,
|
||||
it defaults to Limits if that is explicitly specified, otherwise
|
||||
|
@ -3314,6 +3362,9 @@ spec:
|
|||
More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir'
|
||||
type: string
|
||||
sizeLimit:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: 'Total amount of local storage required for this
|
||||
EmptyDir volume. The size limit is also applicable for memory
|
||||
medium. The maximum usage on memory medium EmptyDir would
|
||||
|
@ -3321,7 +3372,8 @@ spec:
|
|||
and the sum of memory limits of all containers in a pod.
|
||||
The default is nil which means that the limit is undefined.
|
||||
More info: http://kubernetes.io/docs/user-guide/volumes#emptydir'
|
||||
type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
type: object
|
||||
volumeClaimTemplate:
|
||||
description: A PVC spec to be used by the Prometheus StatefulSets.
|
||||
|
@ -3417,13 +3469,21 @@ spec:
|
|||
properties:
|
||||
limits:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Limits describes the maximum amount
|
||||
of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
|
||||
type: object
|
||||
requests:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: 'Requests describes the minimum amount
|
||||
of compute resources required. If Requests is omitted
|
||||
for a container, it defaults to Limits if that is
|
||||
|
@ -3503,7 +3563,11 @@ spec:
|
|||
type: array
|
||||
capacity:
|
||||
additionalProperties:
|
||||
type: string
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
description: Represents the actual resources of the underlying
|
||||
volume.
|
||||
type: object
|
||||
|
@ -3592,6 +3656,100 @@ spec:
|
|||
type: string
|
||||
type: object
|
||||
type: array
|
||||
topologySpreadConstraints:
|
||||
description: If specified, the pod's topology spread constraints.
|
||||
items:
|
||||
description: TopologySpreadConstraint specifies how to spread matching
|
||||
pods among the given topology.
|
||||
properties:
|
||||
labelSelector:
|
||||
description: LabelSelector is used to find matching pods. Pods
|
||||
that match this label selector are counted to determine the
|
||||
number of pods in their corresponding topology domain.
|
||||
properties:
|
||||
matchExpressions:
|
||||
description: matchExpressions is a list of label selector
|
||||
requirements. The requirements are ANDed.
|
||||
items:
|
||||
description: A label selector requirement is a selector
|
||||
that contains values, a key, and an operator that relates
|
||||
the key and values.
|
||||
properties:
|
||||
key:
|
||||
description: key is the label key that the selector
|
||||
applies to.
|
||||
type: string
|
||||
operator:
|
||||
description: operator represents a key's relationship
|
||||
to a set of values. Valid operators are In, NotIn,
|
||||
Exists and DoesNotExist.
|
||||
type: string
|
||||
values:
|
||||
description: values is an array of string values.
|
||||
If the operator is In or NotIn, the values array
|
||||
must be non-empty. If the operator is Exists or
|
||||
DoesNotExist, the values array must be empty. This
|
||||
array is replaced during a strategic merge patch.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
required:
|
||||
- key
|
||||
- operator
|
||||
type: object
|
||||
type: array
|
||||
matchLabels:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: matchLabels is a map of {key,value} pairs.
|
||||
A single {key,value} in the matchLabels map is equivalent
|
||||
to an element of matchExpressions, whose key field is
|
||||
"key", the operator is "In", and the values array contains
|
||||
only "value". The requirements are ANDed.
|
||||
type: object
|
||||
type: object
|
||||
maxSkew:
|
||||
description: 'MaxSkew describes the degree to which pods may
|
||||
be unevenly distributed. It''s the maximum permitted difference
|
||||
between the number of matching pods in any two topology domains
|
||||
of a given topology type. For example, in a 3-zone cluster,
|
||||
MaxSkew is set to 1, and pods with the same labelSelector
|
||||
spread as 1/1/0: | zone1 | zone2 | zone3 | | P | P | |
|
||||
- if MaxSkew is 1, incoming pod can only be scheduled to zone3
|
||||
to become 1/1/1; scheduling it onto zone1(zone2) would make
|
||||
the ActualSkew(2-0) on zone1(zone2) violate MaxSkew(1). -
|
||||
if MaxSkew is 2, incoming pod can be scheduled onto any zone.
|
||||
It''s a required field. Default value is 1 and 0 is not allowed.'
|
||||
format: int32
|
||||
type: integer
|
||||
topologyKey:
|
||||
description: TopologyKey is the key of node labels. Nodes that
|
||||
have a label with this key and identical values are considered
|
||||
to be in the same topology. We consider each <key, value>
|
||||
as a "bucket", and try to put balanced number of pods into
|
||||
each bucket. It's a required field.
|
||||
type: string
|
||||
whenUnsatisfiable:
|
||||
description: 'WhenUnsatisfiable indicates how to deal with a
|
||||
pod if it doesn''t satisfy the spread constraint. - DoNotSchedule
|
||||
(default) tells the scheduler not to schedule it - ScheduleAnyway
|
||||
tells the scheduler to still schedule it It''s considered
|
||||
as "Unsatisfiable" if and only if placing incoming pod on
|
||||
any topology violates "MaxSkew". For example, in a 3-zone
|
||||
cluster, MaxSkew is set to 1, and pods with the same labelSelector
|
||||
spread as 3/1/1: | zone1 | zone2 | zone3 | | P P P | P | P |
|
||||
If WhenUnsatisfiable is set to DoNotSchedule, incoming pod
|
||||
can only be scheduled to zone2(zone3) to become 3/2/1(3/1/2)
|
||||
as ActualSkew(2-1) on zone2(zone3) satisfies MaxSkew(1). In
|
||||
other words, the cluster can still be imbalanced, but scheduler
|
||||
won''t make it *more* imbalanced. It''s a required field.'
|
||||
type: string
|
||||
required:
|
||||
- maxSkew
|
||||
- topologyKey
|
||||
- whenUnsatisfiable
|
||||
type: object
|
||||
type: array
|
||||
tracingConfig:
|
||||
description: TracingConfig configures tracing in Thanos. This is an
|
||||
experimental feature, it may change in any upcoming release in a
|
||||
|
@ -3938,9 +4096,13 @@ spec:
|
|||
optional for env vars'
|
||||
type: string
|
||||
divisor:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Specifies the output format of the
|
||||
exposed resources, defaults to "1"
|
||||
type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
resource:
|
||||
description: 'Required: resource to select'
|
||||
type: string
|
||||
|
@ -3963,6 +4125,9 @@ spec:
|
|||
More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir'
|
||||
type: string
|
||||
sizeLimit:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: 'Total amount of local storage required for
|
||||
this EmptyDir volume. The size limit is also applicable
|
||||
for memory medium. The maximum usage on memory medium
|
||||
|
@ -3970,7 +4135,8 @@ spec:
|
|||
specified here and the sum of memory limits of all containers
|
||||
in a pod. The default is nil which means that the limit
|
||||
is undefined. More info: http://kubernetes.io/docs/user-guide/volumes#emptydir'
|
||||
type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
type: object
|
||||
fc:
|
||||
description: FC represents a Fibre Channel resource that is
|
||||
|
@ -4433,10 +4599,14 @@ spec:
|
|||
for volumes, optional for env vars'
|
||||
type: string
|
||||
divisor:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Specifies the output format
|
||||
of the exposed resources, defaults
|
||||
to "1"
|
||||
type: string
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
resource:
|
||||
description: 'Required: resource to
|
||||
select'
|
||||
|
|
|
@ -4,7 +4,7 @@ metadata:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.42.1
|
||||
app.kubernetes.io/version: v0.44.1
|
||||
name: prometheus-operator
|
||||
rules:
|
||||
- apiGroups:
|
||||
|
@ -12,6 +12,7 @@ rules:
|
|||
resources:
|
||||
- alertmanagers
|
||||
- alertmanagers/finalizers
|
||||
- alertmanagerconfigs
|
||||
- prometheuses
|
||||
- prometheuses/finalizers
|
||||
- thanosrulers
|
||||
|
@ -68,6 +69,14 @@ rules:
|
|||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- ingresses
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- authentication.k8s.io
|
||||
resources:
|
||||
|
|
|
@ -4,7 +4,7 @@ metadata:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.42.1
|
||||
app.kubernetes.io/version: v0.44.1
|
||||
name: prometheus-operator
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
|
|
|
@ -4,7 +4,7 @@ metadata:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.42.1
|
||||
app.kubernetes.io/version: v0.44.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
spec:
|
||||
|
@ -18,15 +18,13 @@ spec:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.42.1
|
||||
app.kubernetes.io/version: v0.44.1
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --kubelet-service=kube-system/kubelet
|
||||
- --logtostderr=true
|
||||
- --config-reloader-image=jimmidyson/configmap-reload:v0.4.0
|
||||
- --prometheus-config-reloader=quay.io/prometheus-operator/prometheus-config-reloader:v0.42.1
|
||||
image: quay.io/prometheus-operator/prometheus-operator:v0.42.1
|
||||
- --prometheus-config-reloader=quay.io/prometheus-operator/prometheus-config-reloader:v0.44.1
|
||||
image: quay.io/prometheus-operator/prometheus-operator:v0.44.1
|
||||
name: prometheus-operator
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
|
@ -45,13 +43,15 @@ spec:
|
|||
- --secure-listen-address=:8443
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
|
||||
- --upstream=http://127.0.0.1:8080/
|
||||
image: quay.io/brancz/kube-rbac-proxy:v0.6.0
|
||||
image: quay.io/brancz/kube-rbac-proxy:v0.8.0
|
||||
name: kube-rbac-proxy
|
||||
ports:
|
||||
- containerPort: 8443
|
||||
name: https
|
||||
securityContext:
|
||||
runAsUser: 65534
|
||||
runAsGroup: 65532
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65532
|
||||
nodeSelector:
|
||||
beta.kubernetes.io/os: linux
|
||||
securityContext:
|
||||
|
|
|
@ -4,7 +4,7 @@ metadata:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.42.1
|
||||
app.kubernetes.io/version: v0.44.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
spec:
|
||||
|
|
|
@ -4,6 +4,6 @@ metadata:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.42.1
|
||||
app.kubernetes.io/version: v0.44.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
github.com/prometheus/alertmanager/doc/alertmanager-mixin
|
301
monitoring/vendor/github.com/brancz/kubernetes-grafana/grafana/grafana.libsonnet
generated
vendored
301
monitoring/vendor/github.com/brancz/kubernetes-grafana/grafana/grafana.libsonnet
generated
vendored
|
@ -1,11 +1,9 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
namespace: 'default',
|
||||
|
||||
versions+:: {
|
||||
grafana: '6.6.0',
|
||||
grafana: '7.3.4',
|
||||
},
|
||||
|
||||
imageRepos+:: {
|
||||
|
@ -30,12 +28,14 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
version: 1,
|
||||
editable: false,
|
||||
}],
|
||||
// Forces pod restarts when dashboards are changed
|
||||
dashboardsChecksum: false,
|
||||
config: {},
|
||||
ldap: null,
|
||||
plugins: [],
|
||||
env: [],
|
||||
port: 3000,
|
||||
container: {
|
||||
resources: {
|
||||
requests: { cpu: '100m', memory: '100Mi' },
|
||||
limits: { cpu: '200m', memory: '200Mi' },
|
||||
},
|
||||
|
@ -45,36 +45,65 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
grafanaDashboards: {},
|
||||
grafana+: {
|
||||
[if std.length($._config.grafana.config) > 0 then 'config']:
|
||||
local secret = k.core.v1.secret;
|
||||
local grafanaConfig = { 'grafana.ini': std.base64(std.encodeUTF8(std.manifestIni($._config.grafana.config))) } +
|
||||
if $._config.grafana.ldap != null then { 'ldap.toml': std.base64(std.encodeUTF8($._config.grafana.ldap)) } else {};
|
||||
secret.new('grafana-config', grafanaConfig) +
|
||||
secret.mixin.metadata.withNamespace($._config.namespace),
|
||||
{
|
||||
apiVersion: 'v1',
|
||||
kind: 'Secret',
|
||||
metadata: {
|
||||
name: 'grafana-config',
|
||||
namespace: $._config.namespace,
|
||||
},
|
||||
type: 'Opaque',
|
||||
data: {
|
||||
'grafana.ini': std.base64(std.encodeUTF8(std.manifestIni($._config.grafana.config))),
|
||||
} +
|
||||
if $._config.grafana.ldap != null then { 'ldap.toml': std.base64(std.encodeUTF8($._config.grafana.ldap)) } else {},
|
||||
},
|
||||
dashboardDefinitions:
|
||||
local configMap = k.core.v1.configMap;
|
||||
[
|
||||
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', '');
|
||||
configMap.new(dashboardName, { [name]: std.manifestJsonEx($._config.grafana.dashboards[name], ' ') }) +
|
||||
configMap.mixin.metadata.withNamespace($._config.namespace)
|
||||
|
||||
{
|
||||
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
|
||||
apiVersion: 'v1',
|
||||
kind: 'ConfigMap',
|
||||
metadata: {
|
||||
name: dashboardName,
|
||||
namespace: $._config.namespace,
|
||||
},
|
||||
data: { [name]: std.manifestJsonEx($._config.grafana.dashboards[name], ' ') },
|
||||
}
|
||||
for name in std.objectFields($._config.grafana.dashboards)
|
||||
] + [
|
||||
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', '');
|
||||
configMap.new(dashboardName, { [name]: std.manifestJsonEx($._config.grafana.folderDashboards[folder][name], ' ') }) +
|
||||
configMap.mixin.metadata.withNamespace($._config.namespace)
|
||||
|
||||
{
|
||||
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
|
||||
apiVersion: 'v1',
|
||||
kind: 'ConfigMap',
|
||||
metadata: {
|
||||
name: dashboardName,
|
||||
namespace: $._config.namespace,
|
||||
},
|
||||
data: { [name]: std.manifestJsonEx($._config.grafana.folderDashboards[folder][name], ' ') },
|
||||
}
|
||||
for folder in std.objectFields($._config.grafana.folderDashboards)
|
||||
for name in std.objectFields($._config.grafana.folderDashboards[folder])
|
||||
] + if std.length($._config.grafana.rawDashboards) > 0 then
|
||||
[
|
||||
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', '');
|
||||
configMap.new(dashboardName, { [name]: $._config.grafana.rawDashboards[name] }) +
|
||||
configMap.mixin.metadata.withNamespace($._config.namespace)
|
||||
] + (
|
||||
if std.length($._config.grafana.rawDashboards) > 0 then
|
||||
[
|
||||
|
||||
for name in std.objectFields($._config.grafana.rawDashboards)
|
||||
] else [],
|
||||
{
|
||||
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
|
||||
apiVersion: 'v1',
|
||||
kind: 'ConfigMap',
|
||||
metadata: {
|
||||
name: dashboardName,
|
||||
namespace: $._config.namespace,
|
||||
},
|
||||
data: { [name]: $._config.grafana.rawDashboards[name] },
|
||||
}
|
||||
for name in std.objectFields($._config.grafana.rawDashboards)
|
||||
]
|
||||
else
|
||||
[]
|
||||
),
|
||||
dashboardSources:
|
||||
local configMap = k.core.v1.configMap;
|
||||
local dashboardSources = {
|
||||
apiVersion: 1,
|
||||
providers:
|
||||
|
@ -106,59 +135,80 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
],
|
||||
};
|
||||
|
||||
configMap.new('grafana-dashboards', { 'dashboards.yaml': std.manifestJsonEx(dashboardSources, ' ') }) +
|
||||
configMap.mixin.metadata.withNamespace($._config.namespace),
|
||||
{
|
||||
kind: 'ConfigMap',
|
||||
apiVersion: 'v1',
|
||||
metadata: {
|
||||
name: 'grafana-dashboards',
|
||||
namespace: $._config.namespace,
|
||||
},
|
||||
data: { 'dashboards.yaml': std.manifestJsonEx(dashboardSources, ' ') },
|
||||
},
|
||||
dashboardDatasources:
|
||||
local secret = k.core.v1.secret;
|
||||
secret.new('grafana-datasources', { 'datasources.yaml': std.base64(std.encodeUTF8(std.manifestJsonEx({
|
||||
apiVersion: 1,
|
||||
datasources: $._config.grafana.datasources,
|
||||
}, ' '))) }) +
|
||||
secret.mixin.metadata.withNamespace($._config.namespace),
|
||||
{
|
||||
apiVersion: 'v1',
|
||||
kind: 'Secret',
|
||||
metadata: {
|
||||
name: 'grafana-datasources',
|
||||
namespace: $._config.namespace,
|
||||
},
|
||||
type: 'Opaque',
|
||||
data: { 'datasources.yaml': std.base64(std.encodeUTF8(std.manifestJsonEx({
|
||||
apiVersion: 1,
|
||||
datasources: $._config.grafana.datasources,
|
||||
}, ' '))) },
|
||||
},
|
||||
service:
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
local grafanaServiceNodePort = servicePort.newNamed('http', $._config.grafana.port, 'http');
|
||||
|
||||
service.new('grafana', $.grafana.deployment.spec.selector.matchLabels, grafanaServiceNodePort) +
|
||||
service.mixin.metadata.withLabels({ app: 'grafana' }) +
|
||||
service.mixin.metadata.withNamespace($._config.namespace),
|
||||
{
|
||||
apiVersion: 'v1',
|
||||
kind: 'Service',
|
||||
metadata: {
|
||||
name: 'grafana',
|
||||
namespace: $._config.namespace,
|
||||
labels: {
|
||||
app: 'grafana',
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
selector: $.grafana.deployment.spec.selector.matchLabels,
|
||||
type: 'NodePort',
|
||||
ports: [
|
||||
{ name: 'http', targetPort: 'http', port: 3000 },
|
||||
],
|
||||
},
|
||||
},
|
||||
serviceAccount:
|
||||
local serviceAccount = k.core.v1.serviceAccount;
|
||||
serviceAccount.new('grafana') +
|
||||
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
|
||||
{
|
||||
apiVersion: 'v1',
|
||||
kind: 'ServiceAccount',
|
||||
metadata: {
|
||||
name: 'grafana',
|
||||
namespace: $._config.namespace,
|
||||
},
|
||||
},
|
||||
deployment:
|
||||
local deployment = k.apps.v1.deployment;
|
||||
local container = k.apps.v1.deployment.mixin.spec.template.spec.containersType;
|
||||
local volume = k.apps.v1.deployment.mixin.spec.template.spec.volumesType;
|
||||
local containerPort = container.portsType;
|
||||
local containerVolumeMount = container.volumeMountsType;
|
||||
local podSelector = deployment.mixin.spec.template.spec.selectorType;
|
||||
local env = container.envType;
|
||||
|
||||
local targetPort = $._config.grafana.port;
|
||||
local portName = 'http';
|
||||
local podLabels = { app: 'grafana' };
|
||||
|
||||
local configVolumeName = 'grafana-config';
|
||||
local configSecretName = 'grafana-config';
|
||||
local configVolume = volume.withName(configVolumeName) + volume.mixin.secret.withSecretName(configSecretName);
|
||||
local configVolumeMount = containerVolumeMount.new(configVolumeName, '/etc/grafana');
|
||||
local configVolume = { name: configVolumeName, secret: { secretName: configSecretName } };
|
||||
local configVolumeMount = { name: configVolumeName, mountPath: '/etc/grafana', readOnly: false };
|
||||
|
||||
local storageVolumeName = 'grafana-storage';
|
||||
local storageVolume = volume.fromEmptyDir(storageVolumeName);
|
||||
local storageVolumeMount = containerVolumeMount.new(storageVolumeName, '/var/lib/grafana');
|
||||
local storageVolume = { name: storageVolumeName, emptyDir: {} };
|
||||
local storageVolumeMount = { name: storageVolumeName, mountPath: '/var/lib/grafana', readOnly: false };
|
||||
|
||||
local datasourcesVolumeName = 'grafana-datasources';
|
||||
local datasourcesSecretName = 'grafana-datasources';
|
||||
local datasourcesVolume = volume.withName(datasourcesVolumeName) + volume.mixin.secret.withSecretName(datasourcesSecretName);
|
||||
local datasourcesVolumeMount = containerVolumeMount.new(datasourcesVolumeName, '/etc/grafana/provisioning/datasources');
|
||||
local datasourcesVolume = { name: datasourcesVolumeName, secret: { secretName: datasourcesSecretName } };
|
||||
local datasourcesVolumeMount = { name: datasourcesVolumeName, mountPath: '/etc/grafana/provisioning/datasources', readOnly: false };
|
||||
|
||||
local dashboardsVolumeName = 'grafana-dashboards';
|
||||
local dashboardsConfigMapName = 'grafana-dashboards';
|
||||
local dashboardsVolume = volume.withName(dashboardsVolumeName) + volume.mixin.configMap.withName(dashboardsConfigMapName);
|
||||
local dashboardsVolumeMount = containerVolumeMount.new(dashboardsVolumeName, '/etc/grafana/provisioning/dashboards');
|
||||
local dashboardsVolume = { name: dashboardsVolumeName, configMap: { name: dashboardsConfigMapName } };
|
||||
local dashboardsVolumeMount = { name: dashboardsVolumeName, mountPath: '/etc/grafana/provisioning/dashboards', readOnly: false };
|
||||
|
||||
local volumeMounts =
|
||||
[
|
||||
|
@ -167,23 +217,36 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
dashboardsVolumeMount,
|
||||
] +
|
||||
[
|
||||
local dashboardName = std.strReplace(name, '.json', '');
|
||||
containerVolumeMount.new('grafana-dashboard-' + dashboardName, '/grafana-dashboard-definitions/0/' + dashboardName)
|
||||
{
|
||||
local dashboardName = std.strReplace(name, '.json', ''),
|
||||
name: 'grafana-dashboard-' + dashboardName,
|
||||
mountPath: '/grafana-dashboard-definitions/0/' + dashboardName,
|
||||
readOnly: false,
|
||||
}
|
||||
for name in std.objectFields($._config.grafana.dashboards)
|
||||
] +
|
||||
[
|
||||
local dashboardName = std.strReplace(name, '.json', '');
|
||||
containerVolumeMount.new('grafana-dashboard-' + dashboardName, '/grafana-dashboard-definitions/' + folder + '/' + dashboardName)
|
||||
{
|
||||
local dashboardName = std.strReplace(name, '.json', ''),
|
||||
name: 'grafana-dashboard-' + dashboardName,
|
||||
mountPath: '/grafana-dashboard-definitions/' + folder + '/' + dashboardName,
|
||||
readOnly: false,
|
||||
}
|
||||
for folder in std.objectFields($._config.grafana.folderDashboards)
|
||||
for name in std.objectFields($._config.grafana.folderDashboards[folder])
|
||||
] +
|
||||
[
|
||||
local dashboardName = std.strReplace(name, '.json', '');
|
||||
containerVolumeMount.new('grafana-dashboard-' + dashboardName, '/grafana-dashboard-definitions/0/' + dashboardName)
|
||||
for name in std.objectFields($._config.grafana.rawDashboards)
|
||||
] +
|
||||
{
|
||||
|
||||
if std.length($._config.grafana.config) > 0 then [configVolumeMount] else [];
|
||||
local dashboardName = std.strReplace(name, '.json', ''),
|
||||
name: 'grafana-dashboard-' + dashboardName,
|
||||
mountPath: '/grafana-dashboard-definitions/0/' + dashboardName,
|
||||
readOnly: false,
|
||||
}
|
||||
for name in std.objectFields($._config.grafana.rawDashboards)
|
||||
] + (
|
||||
if std.length($._config.grafana.config) > 0 then [configVolumeMount] else []
|
||||
);
|
||||
|
||||
local volumes =
|
||||
[
|
||||
|
@ -192,52 +255,82 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
dashboardsVolume,
|
||||
] +
|
||||
[
|
||||
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', '');
|
||||
volume.withName(dashboardName) +
|
||||
volume.mixin.configMap.withName(dashboardName)
|
||||
{
|
||||
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
|
||||
name: dashboardName,
|
||||
configMap: { name: dashboardName },
|
||||
}
|
||||
for name in std.objectFields($._config.grafana.dashboards)
|
||||
] +
|
||||
[
|
||||
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', '');
|
||||
volume.withName(dashboardName) +
|
||||
volume.mixin.configMap.withName(dashboardName)
|
||||
{
|
||||
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
|
||||
name: dashboardName,
|
||||
configMap: { name: dashboardName },
|
||||
}
|
||||
for folder in std.objectFields($._config.grafana.folderDashboards)
|
||||
for name in std.objectFields($._config.grafana.folderDashboards[folder])
|
||||
] +
|
||||
[
|
||||
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', '');
|
||||
volume.withName(dashboardName) +
|
||||
volume.mixin.configMap.withName(dashboardName)
|
||||
{
|
||||
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
|
||||
name: dashboardName,
|
||||
configMap: { name: dashboardName },
|
||||
}
|
||||
for name in std.objectFields($._config.grafana.rawDashboards)
|
||||
] +
|
||||
if std.length($._config.grafana.config) > 0 then [configVolume] else [];
|
||||
|
||||
local plugins = (if std.length($._config.grafana.plugins) == 0 then [] else [env.new('GF_INSTALL_PLUGINS', std.join(',', $._config.grafana.plugins))]);
|
||||
local plugins = (
|
||||
if std.length($._config.grafana.plugins) == 0 then
|
||||
[]
|
||||
else
|
||||
[{ name: 'GF_INSTALL_PLUGINS', value: std.join(',', $._config.grafana.plugins) }]
|
||||
);
|
||||
|
||||
local c = [
|
||||
container.new('grafana', $._config.imageRepos.grafana + ':' + $._config.versions.grafana) +
|
||||
container.withEnv($._config.grafana.env + plugins) +
|
||||
container.withVolumeMounts(volumeMounts) +
|
||||
container.withPorts(containerPort.newNamed(targetPort, portName)) +
|
||||
container.mixin.readinessProbe.httpGet.withPath('/api/health') +
|
||||
container.mixin.readinessProbe.httpGet.withPort(portName) +
|
||||
container.mixin.resources.withRequests($._config.grafana.container.requests) +
|
||||
container.mixin.resources.withLimits($._config.grafana.container.limits),
|
||||
] + $._config.grafana.containers;
|
||||
local c = [{
|
||||
name: 'grafana',
|
||||
image: $._config.imageRepos.grafana + ':' + $._config.versions.grafana,
|
||||
env: $._config.grafana.env + plugins,
|
||||
volumeMounts: volumeMounts,
|
||||
ports: [{ name: portName, containerPort: targetPort }],
|
||||
readinessProbe: {
|
||||
httpGet: { path: '/api/health', port: portName },
|
||||
},
|
||||
resources: $._config.grafana.resources,
|
||||
}] + $._config.grafana.containers;
|
||||
|
||||
deployment.new('grafana', 1, c, podLabels) +
|
||||
deployment.mixin.metadata.withNamespace($._config.namespace) +
|
||||
deployment.mixin.metadata.withLabels(podLabels) +
|
||||
deployment.mixin.spec.selector.withMatchLabels(podLabels) +
|
||||
deployment.mixin.spec.template.metadata.withAnnotations({
|
||||
[if std.length($._config.grafana.config) > 0 then 'checksum/grafana-config']: std.md5(std.toString($.grafana.config)),
|
||||
'checksum/grafana-datasources': std.md5(std.toString($.grafana.dashboardDatasources)),
|
||||
}) +
|
||||
deployment.mixin.spec.template.spec.withNodeSelector({ 'beta.kubernetes.io/os': 'linux' }) +
|
||||
deployment.mixin.spec.template.spec.withVolumes(volumes) +
|
||||
deployment.mixin.spec.template.spec.securityContext.withRunAsNonRoot(true) +
|
||||
deployment.mixin.spec.template.spec.securityContext.withRunAsUser(65534) +
|
||||
deployment.mixin.spec.template.spec.securityContext.withFsGroup(65534) +
|
||||
deployment.mixin.spec.template.spec.withServiceAccountName('grafana'),
|
||||
{
|
||||
apiVersion: 'apps/v1',
|
||||
kind: 'Deployment',
|
||||
metadata: {
|
||||
name: 'grafana',
|
||||
namespace: $._config.namespace,
|
||||
labels: podLabels,
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
selector: {
|
||||
matchLabels: podLabels,
|
||||
},
|
||||
template: {
|
||||
metadata: {
|
||||
labels: podLabels,
|
||||
annotations: {
|
||||
[if std.length($._config.grafana.config) > 0 then 'checksum/grafana-config']: std.md5(std.toString($.grafana.config)),
|
||||
'checksum/grafana-datasources': std.md5(std.toString($.grafana.dashboardDatasources)),
|
||||
[if $._config.grafana.dashboardsChecksum then 'checksum/grafana-dashboards']: std.md5(std.toString($.grafana.dashboardDefinitions)),
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
containers: c,
|
||||
volumes: volumes,
|
||||
serviceAccountName: $.grafana.serviceAccount.metadata.name,
|
||||
nodeSelector: { 'beta.kubernetes.io/os': 'linux' },
|
||||
securityContext: { fsGroup: 65534, runAsNonRoot: true, runAsUser: 65534 },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
10
monitoring/vendor/github.com/brancz/kubernetes-grafana/grafana/jsonnetfile.json
generated
vendored
10
monitoring/vendor/github.com/brancz/kubernetes-grafana/grafana/jsonnetfile.json
generated
vendored
|
@ -9,16 +9,6 @@
|
|||
}
|
||||
},
|
||||
"version": "master"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/ksonnet/ksonnet-lib.git",
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "master",
|
||||
"name": "ksonnet"
|
||||
}
|
||||
],
|
||||
"legacyImports": false
|
||||
|
|
2
monitoring/vendor/github.com/etcd-io/etcd/Documentation/etcd-mixin/mixin.libsonnet
generated
vendored
2
monitoring/vendor/github.com/etcd-io/etcd/Documentation/etcd-mixin/mixin.libsonnet
generated
vendored
|
@ -184,7 +184,7 @@
|
|||
severity: 'critical',
|
||||
},
|
||||
annotations: {
|
||||
message: 'etcd cluster "{{ $labels.job }}": 99th percentile fync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.',
|
||||
message: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.',
|
||||
},
|
||||
},
|
||||
{
|
||||
|
|
2
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/dashboard.libsonnet
generated
vendored
2
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/dashboard.libsonnet
generated
vendored
|
@ -27,6 +27,7 @@ local timepickerlib = import 'timepicker.libsonnet';
|
|||
* @method addPanel(panel,gridPos) Appends a panel, with an optional grid position in grid coordinates, e.g. `gridPos={'x':0, 'y':0, 'w':12, 'h': 9}`
|
||||
* @method addPanels(panels) Appends an array of panels
|
||||
* @method addLink(link) Adds a [dashboard link](https://grafana.com/docs/grafana/latest/linking/dashboard-links/)
|
||||
* @method addLinks(dashboardLink) Adds an array of [dashboard links](https://grafana.com/docs/grafana/latest/linking/dashboard-links/)
|
||||
* @method addRequired(type, name, id, version)
|
||||
* @method addInput(name, label, type, pluginId, pluginName, description, value)
|
||||
* @method addRow(row) Adds a row. This is the legacy row concept from Grafana < 5, when rows were needed for layout. Rows should now be added via `addPanel`.
|
||||
|
@ -149,6 +150,7 @@ local timepickerlib = import 'timepicker.libsonnet';
|
|||
addLink(link):: self {
|
||||
links+: [link],
|
||||
},
|
||||
addLinks(dashboardLinks):: std.foldl(function(d, t) d.addLink(t), dashboardLinks, self),
|
||||
required:: [],
|
||||
__requires: it.required,
|
||||
addRequired(type, name, id, version):: self {
|
||||
|
|
4
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/singlestat.libsonnet
generated
vendored
4
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/singlestat.libsonnet
generated
vendored
|
@ -42,6 +42,7 @@
|
|||
* @param links (optional)
|
||||
* @param tableColumn (default `''`)
|
||||
* @param maxPerRow (optional)
|
||||
* @param maxDataPoints (default `100`)
|
||||
*
|
||||
* @method addTarget(target) Adds a target object.
|
||||
*/
|
||||
|
@ -100,6 +101,7 @@
|
|||
links=[],
|
||||
tableColumn='',
|
||||
maxPerRow=null,
|
||||
maxDataPoints=100,
|
||||
)::
|
||||
{
|
||||
[if height != null then 'height']: height,
|
||||
|
@ -116,7 +118,7 @@
|
|||
],
|
||||
links: links,
|
||||
[if decimals != null then 'decimals']: decimals,
|
||||
maxDataPoints: 100,
|
||||
maxDataPoints: maxDataPoints,
|
||||
interval: interval,
|
||||
cacheTimeout: null,
|
||||
format: format,
|
||||
|
|
12
monitoring/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet
generated
vendored
12
monitoring/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet
generated
vendored
|
@ -367,7 +367,7 @@
|
|||
},
|
||||
],
|
||||
|
||||
qpsPanel(selector):: {
|
||||
qpsPanel(selector, statusLabelName='status_code'):: {
|
||||
aliasColors: {
|
||||
'1xx': '#EAB839',
|
||||
'2xx': '#7EB26D',
|
||||
|
@ -379,9 +379,13 @@
|
|||
},
|
||||
targets: [
|
||||
{
|
||||
expr: 'sum by (status) (label_replace(label_replace(rate(' + selector + '[$__interval]),'
|
||||
+ ' "status", "${1}xx", "status_code", "([0-9]).."),'
|
||||
+ ' "status", "${1}", "status_code", "([a-z]+)"))',
|
||||
expr:
|
||||
|||
|
||||
sum by (status) (
|
||||
label_replace(label_replace(rate(%s[$__interval]),
|
||||
"status", "${1}xx", "%s", "([0-9]).."),
|
||||
"status", "${1}", "%s", "([a-z]+)"))
|
||||
||| % [selector, statusLabelName, statusLabelName],
|
||||
format: 'time_series',
|
||||
intervalFactor: 2,
|
||||
legendFormat: '{{status}}',
|
||||
|
|
|
@ -14,7 +14,8 @@ A set of Grafana dashboards and Prometheus alerts for Kubernetes.
|
|||
| release-0.3 | v1.17 and before | v2.11.0+ |
|
||||
| release-0.4 | v1.18 | v2.11.0+ |
|
||||
| release-0.5 | v1.19 | v2.11.0+ |
|
||||
| master | v1.19 | v2.11.0+ |
|
||||
| release-0.6 | v1.19+ | v2.11.0+ |
|
||||
| master | v1.19+ | v2.11.0+ |
|
||||
|
||||
In Kubernetes 1.14 there was a major [metrics overhaul](https://github.com/kubernetes/enhancements/issues/1206) implemented.
|
||||
Therefore v0.1.x of this repository is the last release to support Kubernetes 1.13 and previous version on a best effort basis.
|
||||
|
@ -23,6 +24,8 @@ Some alerts now use Prometheus filters made available in Prometheus 2.11.0, whic
|
|||
|
||||
Warning: This compatibility matrix was initially created based on experience, we do not guarantee the compatibility, it may be updated based on new learnings.
|
||||
|
||||
Warning: By default the expressions will generate *grafana 7.2+* compatible rules using the *$__rate_interval* variable for rate functions. If you need backward compatible rules please set *grafana72: false* in your *_config*
|
||||
|
||||
## How to use
|
||||
|
||||
This mixin is designed to be vendored into the repo with your infrastructure config.
|
||||
|
|
|
@ -268,6 +268,14 @@
|
|||
!=
|
||||
kube_hpa_status_current_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s})
|
||||
and
|
||||
(kube_hpa_status_current_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
|
||||
>
|
||||
kube_hpa_spec_min_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s})
|
||||
and
|
||||
(kube_hpa_status_current_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
|
||||
<
|
||||
kube_hpa_spec_max_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s})
|
||||
and
|
||||
changes(kube_hpa_status_current_replicas[15m]) == 0
|
||||
||| % $._config,
|
||||
labels: {
|
||||
|
|
|
@ -82,7 +82,7 @@
|
|||
expr: |||
|
||||
sum(kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource="memory"})
|
||||
/
|
||||
sum(kube_node_status_allocatable_memory_bytes{%(nodeExporterSelector)s})
|
||||
sum(kube_node_status_allocatable_memory_bytes{%(kubeStateMetricsSelector)s})
|
||||
> %(namespaceOvercommitFactor)s
|
||||
||| % $._config,
|
||||
labels: {
|
||||
|
|
6
monitoring/vendor/github.com/kubernetes-monitoring/kubernetes-mixin/config.libsonnet
generated
vendored
6
monitoring/vendor/github.com/kubernetes-monitoring/kubernetes-mixin/config.libsonnet
generated
vendored
|
@ -58,6 +58,10 @@
|
|||
'kubelet.json': 'B1azll2ETo7DTiM8CysrH6g4s5NCgkOz6ZdU8Q0j',
|
||||
},
|
||||
|
||||
// Support for Grafana 7.2+ `$__rate_interval` instead of `$__interval`
|
||||
grafana72: true,
|
||||
grafanaIntervalVar: if self.grafana72 then '$__rate_interval' else '$__interval',
|
||||
|
||||
// Config for the Grafana dashboards in the Kubernetes Mixin
|
||||
grafanaK8s: {
|
||||
dashboardNamePrefix: 'Kubernetes / ',
|
||||
|
@ -83,7 +87,7 @@
|
|||
fstypeSelector: 'fstype=~"%s"' % std.join('|', self.fstypes),
|
||||
|
||||
// This list of disk device names is referenced in various expressions.
|
||||
diskDevices: ['nvme.+', 'rbd.+', 'sd.+', 'vd.+', 'xvd.+', 'dm-.+', 'dasd.+'],
|
||||
diskDevices: ['mmcblk.p.+', 'nvme.+', 'rbd.+', 'sd.+', 'vd.+', 'xvd.+', 'dm-.+', 'dasd.+'],
|
||||
diskDeviceSelector: 'device=~"%s"' % std.join('|', self.diskDevices),
|
||||
},
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ local singlestat = grafana.singlestat;
|
|||
format='percentunit',
|
||||
decimals=3,
|
||||
fill=10,
|
||||
description='How much error budget is left looking at our %.3f%% availability gurantees?' % $._config.SLOs.apiserver.target,
|
||||
description='How much error budget is left looking at our %.3f%% availability guarantees?' % $._config.SLOs.apiserver.target,
|
||||
)
|
||||
.addTarget(prometheus.target('100 * (apiserver_request:availability%dd{verb="all", %(clusterLabel)s="$cluster"} - %f)' % [$._config.SLOs.apiserver.days, $._config.clusterLabel, $._config.SLOs.apiserver.target], legendFormat='errorbudget'));
|
||||
|
||||
|
|
|
@ -26,7 +26,8 @@ local singlestat = grafana.singlestat;
|
|||
span=2,
|
||||
valueName='min',
|
||||
)
|
||||
.addTarget(prometheus.target('sum(kubelet_running_pods{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"})' % $._config, legendFormat='{{instance}}'));
|
||||
// TODO: The second query selected by the OR operator is for backward compatibility with kubernetes < 1.19, so this can be retored to a single query once 1.23 is out
|
||||
.addTarget(prometheus.target('sum(kubelet_running_pods{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"}) OR sum(kubelet_running_pod_count{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"})' % $._config, legendFormat='{{instance}}'));
|
||||
|
||||
local runningContainerCount =
|
||||
singlestat.new(
|
||||
|
@ -35,7 +36,8 @@ local singlestat = grafana.singlestat;
|
|||
span=2,
|
||||
valueName='min',
|
||||
)
|
||||
.addTarget(prometheus.target('sum(kubelet_running_containers{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"})' % $._config, legendFormat='{{instance}}'));
|
||||
// TODO: The second query selected by the OR operator is for backward compatibility with kubernetes < 1.19, so this can be retored to a single query once 1.23 is out
|
||||
.addTarget(prometheus.target('sum(kubelet_running_containers{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"}) OR sum(kubelet_running_container_count{%(clusterLabel)s="$cluster", %(kubeletSelector)s, instance=~"$instance"})' % $._config, legendFormat='{{instance}}'));
|
||||
|
||||
local actualVolumeCount =
|
||||
singlestat.new(
|
||||
|
|
|
@ -334,6 +334,14 @@ local singlestat = grafana.singlestat;
|
|||
title='Errors',
|
||||
collapse=true,
|
||||
);
|
||||
local clusterTemplate =
|
||||
template.new(
|
||||
name='cluster',
|
||||
datasource='$datasource',
|
||||
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
|
||||
hide=if $._config.showMultiCluster then '' else '2',
|
||||
refresh=1
|
||||
);
|
||||
|
||||
dashboard.new(
|
||||
title='%(dashboardNamePrefix)sNetworking / Cluster' % $._config.grafanaK8s,
|
||||
|
@ -366,17 +374,18 @@ local singlestat = grafana.singlestat;
|
|||
type: 'datasource',
|
||||
},
|
||||
)
|
||||
.addTemplate(clusterTemplate)
|
||||
.addPanel(
|
||||
newBarplotPanel(
|
||||
graphTitle='Current Rate of Bytes Received',
|
||||
graphQuery='sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
|
||||
graphQuery='sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 0, y: 1 }
|
||||
)
|
||||
.addPanel(
|
||||
newBarplotPanel(
|
||||
graphTitle='Current Rate of Bytes Transmitted',
|
||||
graphQuery='sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
|
||||
graphQuery='sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 12, y: 1 }
|
||||
)
|
||||
|
@ -384,14 +393,14 @@ local singlestat = grafana.singlestat;
|
|||
newTablePanel(
|
||||
tableTitle='Current Status',
|
||||
colQueries=[
|
||||
'sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
|
||||
'sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
|
||||
'sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
|
||||
'sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
|
||||
'sort_desc(sum(irate(container_network_receive_packets_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
|
||||
'sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
|
||||
'sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
|
||||
'sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
|
||||
'sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
|
||||
'sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
|
||||
'sort_desc(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
|
||||
'sort_desc(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
|
||||
'sort_desc(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
|
||||
'sort_desc(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
|
||||
'sort_desc(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
|
||||
'sort_desc(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
|
||||
]
|
||||
),
|
||||
gridPos={ h: 9, w: 24, x: 0, y: 10 }
|
||||
|
@ -401,14 +410,14 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(
|
||||
newBarplotPanel(
|
||||
graphTitle='Average Rate of Bytes Received',
|
||||
graphQuery='sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
|
||||
graphQuery='sort_desc(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 0, y: 11 }
|
||||
)
|
||||
.addPanel(
|
||||
newBarplotPanel(
|
||||
graphTitle='Average Rate of Bytes Transmitted',
|
||||
graphQuery='sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
|
||||
graphQuery='sort_desc(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 12, y: 11 }
|
||||
),
|
||||
|
@ -420,14 +429,14 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(
|
||||
newGraphPanel(
|
||||
graphTitle='Receive Bandwidth',
|
||||
graphQuery='sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
|
||||
graphQuery='sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
|
||||
),
|
||||
gridPos={ h: 9, w: 24, x: 0, y: 12 }
|
||||
)
|
||||
.addPanel(
|
||||
newGraphPanel(
|
||||
graphTitle='Transmit Bandwidth',
|
||||
graphQuery='sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
|
||||
graphQuery='sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
|
||||
),
|
||||
gridPos={ h: 9, w: 24, x: 0, y: 21 }
|
||||
)
|
||||
|
@ -436,7 +445,7 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(
|
||||
newGraphPanel(
|
||||
graphTitle='Rate of Received Packets',
|
||||
graphQuery='sort_desc(sum(irate(container_network_receive_packets_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
|
||||
graphQuery='sort_desc(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
|
||||
graphFormat='pps'
|
||||
),
|
||||
gridPos={ h: 9, w: 24, x: 0, y: 31 }
|
||||
|
@ -444,7 +453,7 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(
|
||||
newGraphPanel(
|
||||
graphTitle='Rate of Transmitted Packets',
|
||||
graphQuery='sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
|
||||
graphQuery='sort_desc(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
|
||||
graphFormat='pps'
|
||||
),
|
||||
gridPos={ h: 9, w: 24, x: 0, y: 40 }
|
||||
|
@ -456,7 +465,7 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(
|
||||
newGraphPanel(
|
||||
graphTitle='Rate of Received Packets Dropped',
|
||||
graphQuery='sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
|
||||
graphQuery='sort_desc(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
|
||||
graphFormat='pps'
|
||||
),
|
||||
gridPos={ h: 9, w: 24, x: 0, y: 50 }
|
||||
|
@ -464,7 +473,7 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(
|
||||
newGraphPanel(
|
||||
graphTitle='Rate of Transmitted Packets Dropped',
|
||||
graphQuery='sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~".+"}[$interval:$resolution])) by (namespace))',
|
||||
graphQuery='sort_desc(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~".+"}[$interval:$resolution])) by (namespace))' % $._config,
|
||||
graphFormat='pps'
|
||||
),
|
||||
gridPos={ h: 9, w: 24, x: 0, y: 59 }
|
||||
|
@ -472,7 +481,7 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(
|
||||
newGraphPanel(
|
||||
graphTitle='Rate of TCP Retransmits out of all sent segments',
|
||||
graphQuery='sort_desc(sum(rate(node_netstat_Tcp_RetransSegs[$interval:$resolution]) / rate(node_netstat_Tcp_OutSegs[$interval:$resolution])) by (instance))',
|
||||
graphQuery='sort_desc(sum(rate(node_netstat_Tcp_RetransSegs{%(clusterLabel)s="$cluster"}[$interval:$resolution]) / rate(node_netstat_Tcp_OutSegs{%(clusterLabel)s="$cluster"}[$interval:$resolution])) by (instance))' % $._config,
|
||||
graphFormat='percentunit',
|
||||
legendFormat='{{instance}}'
|
||||
) + { links: [
|
||||
|
@ -486,7 +495,7 @@ local singlestat = grafana.singlestat;
|
|||
).addPanel(
|
||||
newGraphPanel(
|
||||
graphTitle='Rate of TCP SYN Retransmits out of all retransmits',
|
||||
graphQuery='sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs[$interval:$resolution])) by (instance))',
|
||||
graphQuery='sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans{%(clusterLabel)s="$cluster"}[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs{%(clusterLabel)s="$cluster"}[$interval:$resolution])) by (instance))' % $._config,
|
||||
graphFormat='percentunit',
|
||||
legendFormat='{{instance}}'
|
||||
) + { links: [
|
||||
|
|
|
@ -227,12 +227,20 @@ local singlestat = grafana.singlestat;
|
|||
targets: targets,
|
||||
};
|
||||
|
||||
local clusterTemplate =
|
||||
template.new(
|
||||
name='cluster',
|
||||
datasource='$datasource',
|
||||
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
|
||||
hide=if $._config.showMultiCluster then '' else '2',
|
||||
refresh=1
|
||||
);
|
||||
|
||||
local namespaceTemplate =
|
||||
template.new(
|
||||
name='namespace',
|
||||
datasource='$datasource',
|
||||
query='label_values(container_network_receive_packets_total, namespace)',
|
||||
query='label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
|
||||
allValues='.+',
|
||||
current='kube-system',
|
||||
hide='',
|
||||
|
@ -243,7 +251,7 @@ local singlestat = grafana.singlestat;
|
|||
auto: false,
|
||||
auto_count: 30,
|
||||
auto_min: '10s',
|
||||
definition: 'label_values(container_network_receive_packets_total, namespace)',
|
||||
definition: 'label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
|
||||
skipUrlSync: false,
|
||||
};
|
||||
|
||||
|
@ -362,6 +370,7 @@ local singlestat = grafana.singlestat;
|
|||
type: 'datasource',
|
||||
},
|
||||
)
|
||||
.addTemplate(clusterTemplate)
|
||||
.addTemplate(namespaceTemplate)
|
||||
.addTemplate(resolutionTemplate)
|
||||
.addTemplate(intervalTemplate)
|
||||
|
@ -370,14 +379,14 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(
|
||||
newGaugePanel(
|
||||
gaugeTitle='Current Rate of Bytes Received',
|
||||
gaugeQuery='sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution]))'
|
||||
gaugeQuery='sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution]))' % $._config,
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 0, y: 1 }
|
||||
)
|
||||
.addPanel(
|
||||
newGaugePanel(
|
||||
gaugeTitle='Current Rate of Bytes Transmitted',
|
||||
gaugeQuery='sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution]))'
|
||||
gaugeQuery='sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution]))' % $._config,
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 12, y: 1 }
|
||||
)
|
||||
|
@ -385,12 +394,12 @@ local singlestat = grafana.singlestat;
|
|||
newTablePanel(
|
||||
tableTitle='Current Status',
|
||||
colQueries=[
|
||||
'sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)',
|
||||
'sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)',
|
||||
'sum(irate(container_network_receive_packets_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)',
|
||||
'sum(irate(container_network_transmit_packets_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)',
|
||||
'sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)',
|
||||
'sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)',
|
||||
'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
|
||||
]
|
||||
),
|
||||
gridPos={ h: 9, w: 24, x: 0, y: 10 }
|
||||
|
@ -399,14 +408,14 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(
|
||||
newGraphPanel(
|
||||
graphTitle='Receive Bandwidth',
|
||||
graphQuery='sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)'
|
||||
graphQuery='sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 0, y: 20 }
|
||||
)
|
||||
.addPanel(
|
||||
newGraphPanel(
|
||||
graphTitle='Transmit Bandwidth',
|
||||
graphQuery='sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)'
|
||||
graphQuery='sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 12, y: 20 }
|
||||
)
|
||||
|
@ -415,7 +424,7 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(
|
||||
newGraphPanel(
|
||||
graphTitle='Rate of Received Packets',
|
||||
graphQuery='sum(irate(container_network_receive_packets_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)',
|
||||
graphQuery='sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
|
||||
graphFormat='pps'
|
||||
),
|
||||
gridPos={ h: 10, w: 12, x: 0, y: 30 }
|
||||
|
@ -423,7 +432,7 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(
|
||||
newGraphPanel(
|
||||
graphTitle='Rate of Transmitted Packets',
|
||||
graphQuery='sum(irate(container_network_transmit_packets_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)',
|
||||
graphQuery='sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
|
||||
graphFormat='pps'
|
||||
),
|
||||
gridPos={ h: 10, w: 12, x: 12, y: 30 }
|
||||
|
@ -435,7 +444,7 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(
|
||||
newGraphPanel(
|
||||
graphTitle='Rate of Received Packets Dropped',
|
||||
graphQuery='sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)',
|
||||
graphQuery='sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
|
||||
graphFormat='pps'
|
||||
),
|
||||
gridPos={ h: 10, w: 12, x: 0, y: 40 }
|
||||
|
@ -443,7 +452,7 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(
|
||||
newGraphPanel(
|
||||
graphTitle='Rate of Transmitted Packets Dropped',
|
||||
graphQuery='sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])) by (pod)',
|
||||
graphQuery='sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])) by (pod)' % $._config,
|
||||
graphFormat='pps'
|
||||
),
|
||||
gridPos={ h: 10, w: 12, x: 12, y: 40 }
|
||||
|
|
|
@ -231,11 +231,20 @@ local singlestat = grafana.singlestat;
|
|||
targets: targets,
|
||||
};
|
||||
|
||||
local clusterTemplate =
|
||||
template.new(
|
||||
name='cluster',
|
||||
datasource='$datasource',
|
||||
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
|
||||
hide=if $._config.showMultiCluster then '' else '2',
|
||||
refresh=1
|
||||
);
|
||||
|
||||
local namespaceTemplate =
|
||||
template.new(
|
||||
name='namespace',
|
||||
datasource='$datasource',
|
||||
query='label_values(container_network_receive_packets_total, namespace)',
|
||||
query='label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
|
||||
current='kube-system',
|
||||
hide='',
|
||||
refresh=1,
|
||||
|
@ -245,7 +254,7 @@ local singlestat = grafana.singlestat;
|
|||
auto: false,
|
||||
auto_count: 30,
|
||||
auto_min: '10s',
|
||||
definition: 'label_values(container_network_receive_packets_total, namespace)',
|
||||
definition: 'label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
|
||||
skipUrlSync: false,
|
||||
};
|
||||
|
||||
|
@ -253,7 +262,7 @@ local singlestat = grafana.singlestat;
|
|||
template.new(
|
||||
name='type',
|
||||
datasource='$datasource',
|
||||
query='label_values(namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+"}, workload_type)',
|
||||
query='label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+"}, workload_type)' % $._config,
|
||||
current='deployment',
|
||||
hide='',
|
||||
refresh=1,
|
||||
|
@ -263,7 +272,7 @@ local singlestat = grafana.singlestat;
|
|||
auto: false,
|
||||
auto_count: 30,
|
||||
auto_min: '10s',
|
||||
definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+"}, workload_type)',
|
||||
definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+"}, workload_type)' % $._config,
|
||||
skipUrlSync: false,
|
||||
};
|
||||
|
||||
|
@ -390,6 +399,7 @@ local singlestat = grafana.singlestat;
|
|||
type: 'datasource',
|
||||
},
|
||||
)
|
||||
.addTemplate(clusterTemplate)
|
||||
.addTemplate(namespaceTemplate)
|
||||
.addTemplate(typeTemplate)
|
||||
.addTemplate(resolutionTemplate)
|
||||
|
@ -400,10 +410,10 @@ local singlestat = grafana.singlestat;
|
|||
newBarplotPanel(
|
||||
graphTitle='Current Rate of Bytes Received',
|
||||
graphQuery=|||
|
||||
sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
legendFormat='{{ workload }}',
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 0, y: 1 }
|
||||
|
@ -412,10 +422,10 @@ local singlestat = grafana.singlestat;
|
|||
newBarplotPanel(
|
||||
graphTitle='Current Rate of Bytes Transmitted',
|
||||
graphQuery=|||
|
||||
sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
legendFormat='{{ workload }}',
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 12, y: 1 }
|
||||
|
@ -425,45 +435,45 @@ local singlestat = grafana.singlestat;
|
|||
tableTitle='Current Status',
|
||||
colQueries=[
|
||||
|||
|
||||
sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
|||
|
||||
sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
|||
|
||||
sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
|||
|
||||
sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
|||
|
||||
sort_desc(sum(irate(container_network_receive_packets_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
|||
|
||||
sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
|||
|
||||
sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
|||
|
||||
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
]
|
||||
),
|
||||
gridPos={ h: 9, w: 24, x: 0, y: 10 }
|
||||
|
@ -474,10 +484,10 @@ local singlestat = grafana.singlestat;
|
|||
newBarplotPanel(
|
||||
graphTitle='Average Rate of Bytes Received',
|
||||
graphQuery=|||
|
||||
sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
legendFormat='{{ workload }}',
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 0, y: 20 }
|
||||
|
@ -486,10 +496,10 @@ local singlestat = grafana.singlestat;
|
|||
newBarplotPanel(
|
||||
graphTitle='Average Rate of Bytes Transmitted',
|
||||
graphQuery=|||
|
||||
sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
legendFormat='{{ workload }}',
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 12, y: 20 }
|
||||
|
@ -503,10 +513,10 @@ local singlestat = grafana.singlestat;
|
|||
newGraphPanel(
|
||||
graphTitle='Receive Bandwidth',
|
||||
graphQuery=|||
|
||||
sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 0, y: 38 }
|
||||
)
|
||||
|
@ -514,10 +524,10 @@ local singlestat = grafana.singlestat;
|
|||
newGraphPanel(
|
||||
graphTitle='Transmit Bandwidth',
|
||||
graphQuery=|||
|
||||
sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 12, y: 38 }
|
||||
)
|
||||
|
@ -527,10 +537,10 @@ local singlestat = grafana.singlestat;
|
|||
newGraphPanel(
|
||||
graphTitle='Rate of Received Packets',
|
||||
graphQuery=|||
|
||||
sort_desc(sum(irate(container_network_receive_packets_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
graphFormat='pps'
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 0, y: 40 }
|
||||
|
@ -539,10 +549,10 @@ local singlestat = grafana.singlestat;
|
|||
newGraphPanel(
|
||||
graphTitle='Rate of Transmitted Packets',
|
||||
graphQuery=|||
|
||||
sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
graphFormat='pps'
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 12, y: 40 }
|
||||
|
@ -555,10 +565,10 @@ local singlestat = grafana.singlestat;
|
|||
newGraphPanel(
|
||||
graphTitle='Rate of Received Packets Dropped',
|
||||
graphQuery=|||
|
||||
sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
graphFormat='pps'
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 0, y: 41 }
|
||||
|
@ -567,10 +577,10 @@ local singlestat = grafana.singlestat;
|
|||
newGraphPanel(
|
||||
graphTitle='Rate of Transmitted Packets Dropped',
|
||||
graphQuery=|||
|
||||
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
graphFormat='pps'
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 12, y: 41 }
|
||||
|
|
|
@ -106,11 +106,21 @@ local singlestat = grafana.singlestat;
|
|||
},
|
||||
};
|
||||
|
||||
local clusterTemplate =
|
||||
template.new(
|
||||
name='cluster',
|
||||
datasource='$datasource',
|
||||
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
|
||||
hide=if $._config.showMultiCluster then '' else '2',
|
||||
refresh=1
|
||||
);
|
||||
|
||||
|
||||
local namespaceTemplate =
|
||||
template.new(
|
||||
name='namespace',
|
||||
datasource='$datasource',
|
||||
query='label_values(container_network_receive_packets_total, namespace)',
|
||||
query='label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
|
||||
allValues='.+',
|
||||
current='kube-system',
|
||||
hide='',
|
||||
|
@ -121,7 +131,7 @@ local singlestat = grafana.singlestat;
|
|||
auto: false,
|
||||
auto_count: 30,
|
||||
auto_min: '10s',
|
||||
definition: 'label_values(container_network_receive_packets_total, namespace)',
|
||||
definition: 'label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
|
||||
skipUrlSync: false,
|
||||
};
|
||||
|
||||
|
@ -129,7 +139,7 @@ local singlestat = grafana.singlestat;
|
|||
template.new(
|
||||
name='pod',
|
||||
datasource='$datasource',
|
||||
query='label_values(container_network_receive_packets_total{namespace=~"$namespace"}, pod)',
|
||||
query='label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}, pod)' % $._config,
|
||||
allValues='.+',
|
||||
current='',
|
||||
hide='',
|
||||
|
@ -140,7 +150,7 @@ local singlestat = grafana.singlestat;
|
|||
auto: false,
|
||||
auto_count: 30,
|
||||
auto_min: '10s',
|
||||
definition: 'label_values(container_network_receive_packets_total{namespace=~"$namespace"}, pod)',
|
||||
definition: 'label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}, pod)' % $._config,
|
||||
skipUrlSync: false,
|
||||
};
|
||||
|
||||
|
@ -259,6 +269,7 @@ local singlestat = grafana.singlestat;
|
|||
type: 'datasource',
|
||||
},
|
||||
)
|
||||
.addTemplate(clusterTemplate)
|
||||
.addTemplate(namespaceTemplate)
|
||||
.addTemplate(podTemplate)
|
||||
.addTemplate(resolutionTemplate)
|
||||
|
@ -268,14 +279,14 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(
|
||||
newGaugePanel(
|
||||
gaugeTitle='Current Rate of Bytes Received',
|
||||
gaugeQuery='sum(irate(container_network_receive_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution]))'
|
||||
gaugeQuery='sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution]))' % $._config,
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 0, y: 1 }
|
||||
)
|
||||
.addPanel(
|
||||
newGaugePanel(
|
||||
gaugeTitle='Current Rate of Bytes Transmitted',
|
||||
gaugeQuery='sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution]))'
|
||||
gaugeQuery='sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution]))' % $._config,
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 12, y: 1 }
|
||||
)
|
||||
|
@ -283,14 +294,14 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(
|
||||
newGraphPanel(
|
||||
graphTitle='Receive Bandwidth',
|
||||
graphQuery='sum(irate(container_network_receive_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)'
|
||||
graphQuery='sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 0, y: 11 }
|
||||
)
|
||||
.addPanel(
|
||||
newGraphPanel(
|
||||
graphTitle='Transmit Bandwidth',
|
||||
graphQuery='sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)'
|
||||
graphQuery='sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 12, y: 11 }
|
||||
)
|
||||
|
@ -299,7 +310,7 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(
|
||||
newGraphPanel(
|
||||
graphTitle='Rate of Received Packets',
|
||||
graphQuery='sum(irate(container_network_receive_packets_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)',
|
||||
graphQuery='sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
|
||||
graphFormat='pps'
|
||||
),
|
||||
gridPos={ h: 10, w: 12, x: 0, y: 21 }
|
||||
|
@ -307,7 +318,7 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(
|
||||
newGraphPanel(
|
||||
graphTitle='Rate of Transmitted Packets',
|
||||
graphQuery='sum(irate(container_network_transmit_packets_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)',
|
||||
graphQuery='sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
|
||||
graphFormat='pps'
|
||||
),
|
||||
gridPos={ h: 10, w: 12, x: 12, y: 21 }
|
||||
|
@ -319,7 +330,7 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(
|
||||
newGraphPanel(
|
||||
graphTitle='Rate of Received Packets Dropped',
|
||||
graphQuery='sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)',
|
||||
graphQuery='sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
|
||||
graphFormat='pps'
|
||||
),
|
||||
gridPos={ h: 10, w: 12, x: 0, y: 32 }
|
||||
|
@ -327,7 +338,7 @@ local singlestat = grafana.singlestat;
|
|||
.addPanel(
|
||||
newGraphPanel(
|
||||
graphTitle='Rate of Transmitted Packets Dropped',
|
||||
graphQuery='sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)',
|
||||
graphQuery='sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace", pod=~"$pod"}[$interval:$resolution])) by (pod)' % $._config,
|
||||
graphFormat='pps'
|
||||
),
|
||||
gridPos={ h: 10, w: 12, x: 12, y: 32 }
|
||||
|
|
|
@ -96,11 +96,20 @@ local singlestat = grafana.singlestat;
|
|||
},
|
||||
};
|
||||
|
||||
local clusterTemplate =
|
||||
template.new(
|
||||
name='cluster',
|
||||
datasource='$datasource',
|
||||
query='label_values(kube_pod_info, %s)' % $._config.clusterLabel,
|
||||
hide=if $._config.showMultiCluster then '' else '2',
|
||||
refresh=1
|
||||
);
|
||||
|
||||
local namespaceTemplate =
|
||||
template.new(
|
||||
name='namespace',
|
||||
datasource='$datasource',
|
||||
query='label_values(container_network_receive_packets_total, namespace)',
|
||||
query='label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
|
||||
allValues='.+',
|
||||
current='kube-system',
|
||||
hide='',
|
||||
|
@ -111,7 +120,7 @@ local singlestat = grafana.singlestat;
|
|||
auto: false,
|
||||
auto_count: 30,
|
||||
auto_min: '10s',
|
||||
definition: 'label_values(container_network_receive_packets_total, namespace)',
|
||||
definition: 'label_values(container_network_receive_packets_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config,
|
||||
skipUrlSync: false,
|
||||
};
|
||||
|
||||
|
@ -119,7 +128,7 @@ local singlestat = grafana.singlestat;
|
|||
template.new(
|
||||
name='workload',
|
||||
datasource='$datasource',
|
||||
query='label_values(namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace"}, workload)',
|
||||
query='label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace"}, workload)' % $._config,
|
||||
current='',
|
||||
hide='',
|
||||
refresh=1,
|
||||
|
@ -129,7 +138,7 @@ local singlestat = grafana.singlestat;
|
|||
auto: false,
|
||||
auto_count: 30,
|
||||
auto_min: '10s',
|
||||
definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace"}, workload)',
|
||||
definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace"}, workload)' % $._config,
|
||||
skipUrlSync: false,
|
||||
};
|
||||
|
||||
|
@ -137,7 +146,7 @@ local singlestat = grafana.singlestat;
|
|||
template.new(
|
||||
name='type',
|
||||
datasource='$datasource',
|
||||
query='label_values(namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload"}, workload_type)',
|
||||
query='label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload"}, workload_type)' % $._config,
|
||||
current='deployment',
|
||||
hide='',
|
||||
refresh=1,
|
||||
|
@ -147,7 +156,7 @@ local singlestat = grafana.singlestat;
|
|||
auto: false,
|
||||
auto_count: 30,
|
||||
auto_min: '10s',
|
||||
definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload"}, workload_type)',
|
||||
definition: 'label_values(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload"}, workload_type)' % $._config,
|
||||
skipUrlSync: false,
|
||||
};
|
||||
|
||||
|
@ -274,6 +283,7 @@ local singlestat = grafana.singlestat;
|
|||
type: 'datasource',
|
||||
},
|
||||
)
|
||||
.addTemplate(clusterTemplate)
|
||||
.addTemplate(namespaceTemplate)
|
||||
.addTemplate(workloadTemplate)
|
||||
.addTemplate(typeTemplate)
|
||||
|
@ -285,10 +295,10 @@ local singlestat = grafana.singlestat;
|
|||
newBarplotPanel(
|
||||
graphTitle='Current Rate of Bytes Received',
|
||||
graphQuery=|||
|
||||
sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
legendFormat='{{ pod }}',
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 0, y: 1 }
|
||||
|
@ -297,10 +307,10 @@ local singlestat = grafana.singlestat;
|
|||
newBarplotPanel(
|
||||
graphTitle='Current Rate of Bytes Transmitted',
|
||||
graphQuery=|||
|
||||
sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
legendFormat='{{ pod }}',
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 12, y: 1 }
|
||||
|
@ -311,10 +321,10 @@ local singlestat = grafana.singlestat;
|
|||
newBarplotPanel(
|
||||
graphTitle='Average Rate of Bytes Received',
|
||||
graphQuery=|||
|
||||
sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
legendFormat='{{ pod }}',
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 0, y: 11 }
|
||||
|
@ -323,10 +333,10 @@ local singlestat = grafana.singlestat;
|
|||
newBarplotPanel(
|
||||
graphTitle='Average Rate of Bytes Transmitted',
|
||||
graphQuery=|||
|
||||
sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
legendFormat='{{ pod }}',
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 12, y: 11 }
|
||||
|
@ -340,10 +350,10 @@ local singlestat = grafana.singlestat;
|
|||
newGraphPanel(
|
||||
graphTitle='Receive Bandwidth',
|
||||
graphQuery=|||
|
||||
sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 0, y: 12 }
|
||||
)
|
||||
|
@ -351,10 +361,10 @@ local singlestat = grafana.singlestat;
|
|||
newGraphPanel(
|
||||
graphTitle='Transmit Bandwidth',
|
||||
graphQuery=|||
|
||||
sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 12, y: 12 }
|
||||
)
|
||||
|
@ -364,10 +374,10 @@ local singlestat = grafana.singlestat;
|
|||
newGraphPanel(
|
||||
graphTitle='Rate of Received Packets',
|
||||
graphQuery=|||
|
||||
sort_desc(sum(irate(container_network_receive_packets_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
graphFormat='pps'
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 0, y: 22 }
|
||||
|
@ -376,10 +386,10 @@ local singlestat = grafana.singlestat;
|
|||
newGraphPanel(
|
||||
graphTitle='Rate of Transmitted Packets',
|
||||
graphQuery=|||
|
||||
sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
graphFormat='pps'
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 12, y: 22 }
|
||||
|
@ -392,10 +402,10 @@ local singlestat = grafana.singlestat;
|
|||
newGraphPanel(
|
||||
graphTitle='Rate of Received Packets Dropped',
|
||||
graphQuery=|||
|
||||
sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
graphFormat='pps'
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 0, y: 23 }
|
||||
|
@ -404,10 +414,10 @@ local singlestat = grafana.singlestat;
|
|||
newGraphPanel(
|
||||
graphTitle='Rate of Transmitted Packets Dropped',
|
||||
graphQuery=|||
|
||||
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace"}[$interval:$resolution])
|
||||
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster",namespace=~"$namespace"}[$interval:$resolution])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
|||,
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
graphFormat='pps'
|
||||
),
|
||||
gridPos={ h: 9, w: 12, x: 12, y: 23 }
|
||||
|
|
|
@ -44,12 +44,12 @@ local template = grafana.template;
|
|||
];
|
||||
|
||||
local networkColumns = [
|
||||
'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config,
|
||||
'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config,
|
||||
'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config,
|
||||
'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config,
|
||||
'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config,
|
||||
'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config,
|
||||
'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config,
|
||||
'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config,
|
||||
'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config,
|
||||
'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config,
|
||||
'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config,
|
||||
'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config,
|
||||
];
|
||||
|
||||
local networkTableStyles = {
|
||||
|
@ -96,7 +96,7 @@ local template = grafana.template;
|
|||
})
|
||||
.addPanel(
|
||||
g.panel('CPU Utilisation') +
|
||||
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle", %(clusterLabel)s="$cluster"}[$__interval]))' % $._config) +
|
||||
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle", %(clusterLabel)s="$cluster"}[%(grafanaIntervalVar)s]))' % $._config) +
|
||||
{ interval: $._config.grafanaK8s.minimumTimeInterval },
|
||||
)
|
||||
.addPanel(
|
||||
|
@ -192,7 +192,7 @@ local template = grafana.template;
|
|||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Receive Bandwidth') +
|
||||
g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
|
@ -201,7 +201,7 @@ local template = grafana.template;
|
|||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Transmit Bandwidth') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
|
@ -210,7 +210,7 @@ local template = grafana.template;
|
|||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Average Container Bandwidth by Namespace: Received') +
|
||||
g.queryPanel('avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.queryPanel('avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
|
@ -219,7 +219,7 @@ local template = grafana.template;
|
|||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Average Container Bandwidth by Namespace: Transmitted') +
|
||||
g.queryPanel('avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.queryPanel('avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
|
@ -228,7 +228,7 @@ local template = grafana.template;
|
|||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Received Packets') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
|
@ -237,7 +237,7 @@ local template = grafana.template;
|
|||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Transmitted Packets') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
|
@ -246,7 +246,7 @@ local template = grafana.template;
|
|||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Received Packets Dropped') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
|
@ -255,7 +255,7 @@ local template = grafana.template;
|
|||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Transmitted Packets Dropped') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[$__interval])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config, '{{namespace}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
|
|
|
@ -24,7 +24,7 @@ local template = grafana.template;
|
|||
})
|
||||
.addPanel(
|
||||
g.panel('CPU Utilisation') +
|
||||
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle"}[$__interval]))' % $._config)
|
||||
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle"}[%(grafanaIntervalVar)s]))' % $._config)
|
||||
)
|
||||
.addPanel(
|
||||
g.panel('CPU Requests Commitment') +
|
||||
|
|
|
@ -36,12 +36,12 @@ local template = grafana.template;
|
|||
};
|
||||
|
||||
local networkColumns = [
|
||||
'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config,
|
||||
'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config,
|
||||
];
|
||||
|
||||
local networkTableStyles = {
|
||||
|
@ -244,7 +244,7 @@ local template = grafana.template;
|
|||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Receive Bandwidth') +
|
||||
g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
|
@ -253,7 +253,7 @@ local template = grafana.template;
|
|||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Transmit Bandwidth') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
|
@ -262,7 +262,7 @@ local template = grafana.template;
|
|||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Received Packets') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
|
@ -271,7 +271,7 @@ local template = grafana.template;
|
|||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Transmitted Packets') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
|
@ -280,7 +280,7 @@ local template = grafana.template;
|
|||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Received Packets Dropped') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
|
@ -289,7 +289,7 @@ local template = grafana.template;
|
|||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Transmitted Packets Dropped') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps') },
|
||||
)
|
||||
|
|
|
@ -213,7 +213,7 @@ local template = grafana.template;
|
|||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Receive Bandwidth') +
|
||||
g.queryPanel('sum(irate(container_network_receive_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
|
||||
g.queryPanel('sum(irate(container_network_receive_bytes_total{namespace=~"$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
|
||||
)
|
||||
|
@ -222,7 +222,7 @@ local template = grafana.template;
|
|||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Transmit Bandwidth') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
|
||||
)
|
||||
|
@ -231,7 +231,7 @@ local template = grafana.template;
|
|||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Received Packets') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_total{namespace=~"$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
|
||||
)
|
||||
|
@ -240,7 +240,7 @@ local template = grafana.template;
|
|||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Transmitted Packets') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_packets_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_packets_total{namespace=~"$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
|
||||
)
|
||||
|
@ -249,7 +249,7 @@ local template = grafana.template;
|
|||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Received Packets Dropped') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
|
||||
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
|
||||
)
|
||||
|
@ -258,7 +258,7 @@ local template = grafana.template;
|
|||
g.row('Network')
|
||||
.addPanel(
|
||||
g.panel('Rate of Transmitted Packets Dropped') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
|
||||
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
|
||||
)
|
||||
|
|
|
@ -59,32 +59,32 @@ local template = grafana.template;
|
|||
|
||||
local networkColumns = [
|
||||
|||
|
||||
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
|||
|
||||
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
|||
|
||||
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
|||
|
||||
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
|||
|
||||
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
|||
|
||||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload_type="$type"}) by (workload))
|
||||
||| % $._config,
|
||||
|
@ -285,7 +285,7 @@ local template = grafana.template;
|
|||
.addPanel(
|
||||
g.panel('Receive Bandwidth') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config, '{{workload}}') +
|
||||
|
@ -298,7 +298,7 @@ local template = grafana.template;
|
|||
.addPanel(
|
||||
g.panel('Transmit Bandwidth') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config, '{{workload}}') +
|
||||
|
@ -311,7 +311,7 @@ local template = grafana.template;
|
|||
.addPanel(
|
||||
g.panel('Average Container Bandwidth by Workload: Received') +
|
||||
g.queryPanel(|||
|
||||
(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
|
||||
(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config, '{{workload}}') +
|
||||
|
@ -324,7 +324,7 @@ local template = grafana.template;
|
|||
.addPanel(
|
||||
g.panel('Average Container Bandwidth by Workload: Transmitted') +
|
||||
g.queryPanel(|||
|
||||
(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
|
||||
(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config, '{{workload}}') +
|
||||
|
@ -337,7 +337,7 @@ local template = grafana.template;
|
|||
.addPanel(
|
||||
g.panel('Rate of Received Packets') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config, '{{workload}}') +
|
||||
|
@ -350,7 +350,7 @@ local template = grafana.template;
|
|||
.addPanel(
|
||||
g.panel('Rate of Transmitted Packets') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config, '{{workload}}') +
|
||||
|
@ -363,7 +363,7 @@ local template = grafana.template;
|
|||
.addPanel(
|
||||
g.panel('Rate of Received Packets Dropped') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config, '{{workload}}') +
|
||||
|
@ -376,7 +376,7 @@ local template = grafana.template;
|
|||
.addPanel(
|
||||
g.panel('Rate of Transmitted Packets Dropped') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
|
||||
||| % $._config, '{{workload}}') +
|
||||
|
|
|
@ -61,32 +61,32 @@ local template = grafana.template;
|
|||
|
||||
local networkColumns = [
|
||||
|||
|
||||
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
|||
|
||||
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
|||
|
||||
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
|||
|
||||
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
|||
|
||||
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
|||
|
||||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config,
|
||||
|
@ -227,7 +227,7 @@ local template = grafana.template;
|
|||
.addPanel(
|
||||
g.panel('Receive Bandwidth') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config, '{{pod}}') +
|
||||
|
@ -240,7 +240,7 @@ local template = grafana.template;
|
|||
.addPanel(
|
||||
g.panel('Transmit Bandwidth') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config, '{{pod}}') +
|
||||
|
@ -253,7 +253,7 @@ local template = grafana.template;
|
|||
.addPanel(
|
||||
g.panel('Average Container Bandwidth by Pod: Received') +
|
||||
g.queryPanel(|||
|
||||
(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
|
||||
(avg(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config, '{{pod}}') +
|
||||
|
@ -266,7 +266,7 @@ local template = grafana.template;
|
|||
.addPanel(
|
||||
g.panel('Average Container Bandwidth by Pod: Transmitted') +
|
||||
g.queryPanel(|||
|
||||
(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
|
||||
(avg(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config, '{{pod}}') +
|
||||
|
@ -279,7 +279,7 @@ local template = grafana.template;
|
|||
.addPanel(
|
||||
g.panel('Rate of Received Packets') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config, '{{pod}}') +
|
||||
|
@ -292,7 +292,7 @@ local template = grafana.template;
|
|||
.addPanel(
|
||||
g.panel('Rate of Transmitted Packets') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config, '{{pod}}') +
|
||||
|
@ -305,7 +305,7 @@ local template = grafana.template;
|
|||
.addPanel(
|
||||
g.panel('Rate of Received Packets Dropped') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config, '{{pod}}') +
|
||||
|
@ -318,7 +318,7 @@ local template = grafana.template;
|
|||
.addPanel(
|
||||
g.panel('Rate of Transmitted Packets Dropped') +
|
||||
g.queryPanel(|||
|
||||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
|
||||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[%(grafanaIntervalVar)s])
|
||||
* on (namespace,pod)
|
||||
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
|
||||
||| % $._config, '{{pod}}') +
|
||||
|
|
|
@ -14,7 +14,7 @@ local numbersinglestat = promgrafonnet.numbersinglestat;
|
|||
local cpuStat =
|
||||
numbersinglestat.new(
|
||||
'CPU',
|
||||
'sum(rate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}[3m]))' % $._config,
|
||||
'sum(rate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!="", namespace="$namespace", pod=~"$statefulset.*"}[3m]))' % $._config,
|
||||
)
|
||||
.withSpanSize(4)
|
||||
.withPostfix('cores')
|
||||
|
@ -23,7 +23,7 @@ local numbersinglestat = promgrafonnet.numbersinglestat;
|
|||
local memoryStat =
|
||||
numbersinglestat.new(
|
||||
'Memory',
|
||||
'sum(container_memory_usage_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}) / 1024^3' % $._config,
|
||||
'sum(container_memory_usage_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!="", namespace="$namespace", pod=~"$statefulset.*"}) / 1024^3' % $._config,
|
||||
)
|
||||
.withSpanSize(4)
|
||||
.withPostfix('GB')
|
||||
|
@ -32,7 +32,7 @@ local numbersinglestat = promgrafonnet.numbersinglestat;
|
|||
local networkStat =
|
||||
numbersinglestat.new(
|
||||
'Network',
|
||||
'sum(rate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}[3m])) + sum(rate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace="$namespace",pod=~"$statefulset.*"}[3m]))' % $._config,
|
||||
'sum(rate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}[3m])) + sum(rate(container_network_receive_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",pod=~"$statefulset.*"}[3m]))' % $._config,
|
||||
)
|
||||
.withSpanSize(4)
|
||||
.withPostfix('Bps')
|
||||
|
|
12
monitoring/vendor/github.com/kubernetes-monitoring/kubernetes-mixin/rules/apps.libsonnet
generated
vendored
12
monitoring/vendor/github.com/kubernetes-monitoring/kubernetes-mixin/rules/apps.libsonnet
generated
vendored
|
@ -9,12 +9,6 @@
|
|||
{
|
||||
name: 'k8s.rules',
|
||||
rules: [
|
||||
{
|
||||
record: 'namespace:container_cpu_usage_seconds_total:sum_rate',
|
||||
expr: |||
|
||||
sum(rate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, image!="", container!="POD"}[5m])) by (namespace)
|
||||
||| % $._config,
|
||||
},
|
||||
{
|
||||
// Reduces cardinality of this timeseries by #cores, which makes it
|
||||
// more useable in dashboards. Also, allows us to do things like
|
||||
|
@ -64,12 +58,6 @@
|
|||
)
|
||||
||| % $._config,
|
||||
},
|
||||
{
|
||||
record: 'namespace:container_memory_usage_bytes:sum',
|
||||
expr: |||
|
||||
sum(container_memory_usage_bytes{%(cadvisorSelector)s, image!="", container!="POD"}) by (namespace)
|
||||
||| % $._config,
|
||||
},
|
||||
{
|
||||
record: 'namespace:kube_pod_container_resource_requests_memory_bytes:sum',
|
||||
expr: |||
|
||||
|
|
|
@ -116,15 +116,6 @@
|
|||
},
|
||||
}
|
||||
for verb in verbs
|
||||
] + [
|
||||
{
|
||||
record: 'cluster:apiserver_request_duration_seconds:mean5m',
|
||||
expr: |||
|
||||
sum(rate(apiserver_request_duration_seconds_sum{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, %(podLabel)s)
|
||||
/
|
||||
sum(rate(apiserver_request_duration_seconds_count{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, %(podLabel)s)
|
||||
||| % ($._config),
|
||||
},
|
||||
] + [
|
||||
{
|
||||
record: 'cluster_quantile:apiserver_request_duration_seconds:histogram_quantile',
|
||||
|
|
|
@ -10,14 +10,6 @@
|
|||
{
|
||||
name: 'node.rules',
|
||||
rules: [
|
||||
{
|
||||
// Number of nodes in the cluster
|
||||
// SINCE 2018-02-08
|
||||
record: ':kube_pod_info_node_count:',
|
||||
expr: |||
|
||||
sum(min(kube_pod_info{node!=""}) by (%(clusterLabel)s, node))
|
||||
||| % $._config,
|
||||
},
|
||||
{
|
||||
// This rule results in the tuples (node, namespace, instance) => 1.
|
||||
// It is used to calculate per-node metrics, given namespace & instance.
|
||||
|
|
|
@ -9,7 +9,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
|
||||
commonLabels:: {
|
||||
'app.kubernetes.io/name': 'kube-state-metrics',
|
||||
'app.kubernetes.io/version': ksm.version,
|
||||
'app.kubernetes.io/version': 'v' + ksm.version,
|
||||
},
|
||||
|
||||
podLabels:: {
|
||||
|
@ -58,6 +58,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
'daemonsets',
|
||||
'deployments',
|
||||
'replicasets',
|
||||
'ingresses',
|
||||
]) +
|
||||
rulesType.withVerbs(['list', 'watch']),
|
||||
|
||||
|
@ -134,14 +135,6 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
rulesType.withApiGroups(['networking.k8s.io']) +
|
||||
rulesType.withResources([
|
||||
'networkpolicies',
|
||||
'ingresses',
|
||||
]) +
|
||||
rulesType.withVerbs(['list', 'watch']),
|
||||
|
||||
rulesType.new() +
|
||||
rulesType.withApiGroups(['coordination.k8s.io']) +
|
||||
rulesType.withResources([
|
||||
'leases',
|
||||
]) +
|
||||
rulesType.withVerbs(['list', 'watch']),
|
||||
];
|
||||
|
@ -171,8 +164,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
container.mixin.readinessProbe.httpGet.withPath('/') +
|
||||
container.mixin.readinessProbe.httpGet.withPort(8081) +
|
||||
container.mixin.readinessProbe.withInitialDelaySeconds(5) +
|
||||
container.mixin.readinessProbe.withTimeoutSeconds(5) +
|
||||
container.mixin.securityContext.withRunAsUser(65534);
|
||||
container.mixin.readinessProbe.withTimeoutSeconds(5);
|
||||
|
||||
deployment.new(ksm.name, 1, c, ksm.commonLabels) +
|
||||
deployment.mixin.metadata.withNamespace(ksm.namespace) +
|
||||
|
@ -228,7 +220,6 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
|
||||
roleBinding.new() +
|
||||
roleBinding.mixin.metadata.withName(ksm.name) +
|
||||
roleBinding.mixin.metadata.withNamespace(ksm.namespace) +
|
||||
roleBinding.mixin.metadata.withLabels(ksm.commonLabels) +
|
||||
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
|
||||
roleBinding.mixin.roleRef.withName(ksm.name) +
|
||||
|
@ -245,7 +236,6 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
'--pod=$(POD_NAME)',
|
||||
'--pod-namespace=$(POD_NAMESPACE)',
|
||||
]) +
|
||||
container.mixin.securityContext.withRunAsUser(65534) +
|
||||
container.withEnv([
|
||||
containerEnv.new('POD_NAME') +
|
||||
containerEnv.mixin.valueFrom.fieldRef.withFieldPath('metadata.name'),
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
namespace: 'default',
|
||||
|
@ -42,30 +40,14 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
repeat_interval: '12h',
|
||||
receiver: 'Default',
|
||||
routes: [
|
||||
{
|
||||
receiver: 'Watchdog',
|
||||
match: {
|
||||
alertname: 'Watchdog',
|
||||
},
|
||||
},
|
||||
{
|
||||
receiver: 'Critical',
|
||||
match: {
|
||||
severity: 'critical',
|
||||
},
|
||||
},
|
||||
{ receiver: 'Watchdog', match: { alertname: 'Watchdog' } },
|
||||
{ receiver: 'Critical', match: { severity: 'critical' } },
|
||||
],
|
||||
},
|
||||
receivers: [
|
||||
{
|
||||
name: 'Default',
|
||||
},
|
||||
{
|
||||
name: 'Watchdog',
|
||||
},
|
||||
{
|
||||
name: 'Critical',
|
||||
},
|
||||
{ name: 'Default' },
|
||||
{ name: 'Watchdog' },
|
||||
{ name: 'Critical' },
|
||||
],
|
||||
},
|
||||
replicas: 3,
|
||||
|
@ -73,84 +55,93 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
},
|
||||
|
||||
alertmanager+:: {
|
||||
secret:
|
||||
local secret = k.core.v1.secret;
|
||||
secret: {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Secret',
|
||||
type: 'Opaque',
|
||||
metadata: {
|
||||
name: 'alertmanager-' + $._config.alertmanager.name,
|
||||
namespace: $._config.namespace,
|
||||
},
|
||||
stringData: {
|
||||
'alertmanager.yaml': if std.type($._config.alertmanager.config) == 'object'
|
||||
then
|
||||
std.manifestYamlDoc($._config.alertmanager.config)
|
||||
else
|
||||
$._config.alertmanager.config,
|
||||
},
|
||||
},
|
||||
|
||||
if std.type($._config.alertmanager.config) == 'object' then
|
||||
secret.new('alertmanager-' + $._config.alertmanager.name, {})
|
||||
.withStringData({ 'alertmanager.yaml': std.manifestYamlDoc($._config.alertmanager.config) }) +
|
||||
secret.mixin.metadata.withNamespace($._config.namespace)
|
||||
else
|
||||
secret.new('alertmanager-' + $._config.alertmanager.name, {})
|
||||
.withStringData({ 'alertmanager.yaml': $._config.alertmanager.config }) +
|
||||
secret.mixin.metadata.withNamespace($._config.namespace),
|
||||
serviceAccount: {
|
||||
apiVersion: 'v1',
|
||||
kind: 'ServiceAccount',
|
||||
metadata: {
|
||||
name: 'alertmanager-' + $._config.alertmanager.name,
|
||||
namespace: $._config.namespace,
|
||||
},
|
||||
},
|
||||
|
||||
serviceAccount:
|
||||
local serviceAccount = k.core.v1.serviceAccount;
|
||||
service: {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Service',
|
||||
metadata: {
|
||||
name: 'alertmanager-' + $._config.alertmanager.name,
|
||||
namespace: $._config.namespace,
|
||||
labels: { alertmanager: $._config.alertmanager.name },
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{ name: 'web', targetPort: 'web', port: 9093 },
|
||||
],
|
||||
selector: { app: 'alertmanager', alertmanager: $._config.alertmanager.name },
|
||||
sessionAffinity: 'ClientIP',
|
||||
},
|
||||
},
|
||||
|
||||
serviceAccount.new('alertmanager-' + $._config.alertmanager.name) +
|
||||
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
|
||||
|
||||
service:
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
local alertmanagerPort = servicePort.newNamed('web', 9093, 'web');
|
||||
|
||||
service.new('alertmanager-' + $._config.alertmanager.name, { app: 'alertmanager', alertmanager: $._config.alertmanager.name }, alertmanagerPort) +
|
||||
service.mixin.spec.withSessionAffinity('ClientIP') +
|
||||
service.mixin.metadata.withNamespace($._config.namespace) +
|
||||
service.mixin.metadata.withLabels({ alertmanager: $._config.alertmanager.name }),
|
||||
|
||||
serviceMonitor:
|
||||
{
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'alertmanager',
|
||||
namespace: $._config.namespace,
|
||||
labels: {
|
||||
'k8s-app': 'alertmanager',
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
selector: {
|
||||
matchLabels: {
|
||||
alertmanager: $._config.alertmanager.name,
|
||||
},
|
||||
},
|
||||
endpoints: [
|
||||
{
|
||||
port: 'web',
|
||||
interval: '30s',
|
||||
},
|
||||
],
|
||||
serviceMonitor: {
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'alertmanager',
|
||||
namespace: $._config.namespace,
|
||||
labels: {
|
||||
'k8s-app': 'alertmanager',
|
||||
},
|
||||
},
|
||||
|
||||
alertmanager:
|
||||
{
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'Alertmanager',
|
||||
metadata: {
|
||||
name: $._config.alertmanager.name,
|
||||
namespace: $._config.namespace,
|
||||
labels: {
|
||||
spec: {
|
||||
selector: {
|
||||
matchLabels: {
|
||||
alertmanager: $._config.alertmanager.name,
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
replicas: $._config.alertmanager.replicas,
|
||||
version: $._config.versions.alertmanager,
|
||||
image: $._config.imageRepos.alertmanager + ':' + $._config.versions.alertmanager,
|
||||
nodeSelector: { 'kubernetes.io/os': 'linux' },
|
||||
serviceAccountName: 'alertmanager-' + $._config.alertmanager.name,
|
||||
securityContext: {
|
||||
runAsUser: 1000,
|
||||
runAsNonRoot: true,
|
||||
fsGroup: 2000,
|
||||
},
|
||||
endpoints: [
|
||||
{ port: 'web', interval: '30s' },
|
||||
],
|
||||
},
|
||||
},
|
||||
|
||||
alertmanager: {
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'Alertmanager',
|
||||
metadata: {
|
||||
name: $._config.alertmanager.name,
|
||||
namespace: $._config.namespace,
|
||||
labels: {
|
||||
alertmanager: $._config.alertmanager.name,
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
replicas: $._config.alertmanager.replicas,
|
||||
version: $._config.versions.alertmanager,
|
||||
image: $._config.imageRepos.alertmanager + ':' + $._config.versions.alertmanager,
|
||||
nodeSelector: { 'kubernetes.io/os': 'linux' },
|
||||
serviceAccountName: 'alertmanager-' + $._config.alertmanager.name,
|
||||
securityContext: {
|
||||
runAsUser: 1000,
|
||||
runAsNonRoot: true,
|
||||
fsGroup: 2000,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
|
@ -1,57 +0,0 @@
|
|||
{
|
||||
prometheusAlerts+:: {
|
||||
groups+: [
|
||||
{
|
||||
name: 'alertmanager.rules',
|
||||
rules: [
|
||||
{
|
||||
alert: 'AlertmanagerConfigInconsistent',
|
||||
annotations: {
|
||||
message: |||
|
||||
The configuration of the instances of the Alertmanager cluster `{{ $labels.namespace }}/{{ $labels.service }}` are out of sync.
|
||||
{{ range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query }}
|
||||
Configuration hash for pod {{ .Labels.pod }} is "{{ printf "%.f" .Value }}"
|
||||
{{ end }}
|
||||
|||,
|
||||
},
|
||||
expr: |||
|
||||
count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s})) != 1
|
||||
||| % $._config,
|
||||
'for': '5m',
|
||||
labels: {
|
||||
severity: 'critical',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'AlertmanagerFailedReload',
|
||||
annotations: {
|
||||
message: "Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}.",
|
||||
},
|
||||
expr: |||
|
||||
alertmanager_config_last_reload_successful{%(alertmanagerSelector)s} == 0
|
||||
||| % $._config,
|
||||
'for': '10m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'AlertmanagerMembersInconsistent',
|
||||
annotations: {
|
||||
message: 'Alertmanager has not found all other members of the cluster.',
|
||||
},
|
||||
expr: |||
|
||||
alertmanager_cluster_members{%(alertmanagerSelector)s}
|
||||
!= on (service) GROUP_LEFT()
|
||||
count by (service) (alertmanager_cluster_members{%(alertmanagerSelector)s})
|
||||
||| % $._config,
|
||||
'for': '5m',
|
||||
labels: {
|
||||
severity: 'critical',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
|
@ -1,3 +1,2 @@
|
|||
(import 'alertmanager.libsonnet') +
|
||||
(import 'general.libsonnet') +
|
||||
(import 'node.libsonnet')
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
"subdir": "jsonnet/prometheus-operator"
|
||||
}
|
||||
},
|
||||
"version": "release-0.42"
|
||||
"version": "release-0.44"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
|
@ -37,16 +37,6 @@
|
|||
},
|
||||
"version": "master"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/ksonnet/ksonnet-lib",
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "master",
|
||||
"name": "ksonnet"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
|
@ -63,7 +53,7 @@
|
|||
"subdir": "jsonnet/kube-state-metrics"
|
||||
}
|
||||
},
|
||||
"version": "master"
|
||||
"version": "release-1.9"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
|
@ -90,8 +80,27 @@
|
|||
"subdir": "documentation/prometheus-mixin"
|
||||
}
|
||||
},
|
||||
"version": "release-2.20",
|
||||
"version": "release-2.23",
|
||||
"name": "prometheus"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/prometheus/alertmanager",
|
||||
"subdir": "doc/alertmanager-mixin"
|
||||
}
|
||||
},
|
||||
"version": "master",
|
||||
"name": "alertmanager"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/thanos-io/thanos",
|
||||
"subdir": "mixin"
|
||||
}
|
||||
},
|
||||
"version": "release-0.17"
|
||||
}
|
||||
],
|
||||
"legacyImports": true
|
||||
|
|
|
@ -1,14 +1,7 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
versions+:: {
|
||||
clusterVerticalAutoscaler: "v0.8.1"
|
||||
},
|
||||
|
||||
imageRepos+:: {
|
||||
clusterVerticalAutoscaler: 'gcr.io/google_containers/cpvpa-amd64'
|
||||
},
|
||||
versions+:: { clusterVerticalAutoscaler: '0.8.1' },
|
||||
imageRepos+:: { clusterVerticalAutoscaler: 'gcr.io/google_containers/cpvpa-amd64' },
|
||||
|
||||
kubeStateMetrics+:: {
|
||||
stepCPU: '1m',
|
||||
|
@ -16,103 +9,120 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
},
|
||||
},
|
||||
ksmAutoscaler+:: {
|
||||
clusterRole:
|
||||
local clusterRole = k.rbac.v1.clusterRole;
|
||||
local rulesType = clusterRole.rulesType;
|
||||
clusterRole: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'ClusterRole',
|
||||
metadata: { name: 'ksm-autoscaler' },
|
||||
rules: [{
|
||||
apiGroups: [''],
|
||||
resources: ['nodes'],
|
||||
verbs: ['list', 'watch'],
|
||||
}],
|
||||
},
|
||||
|
||||
local rules = [
|
||||
rulesType.new() +
|
||||
rulesType.withApiGroups(['']) +
|
||||
rulesType.withResources([
|
||||
'nodes',
|
||||
]) +
|
||||
rulesType.withVerbs(['list', 'watch']),
|
||||
];
|
||||
clusterRoleBinding: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'ClusterRoleBinding',
|
||||
metadata: { name: 'ksm-autoscaler' },
|
||||
roleRef: {
|
||||
apiGroup: 'rbac.authorization.k8s.io',
|
||||
kind: 'ClusterRole',
|
||||
name: 'ksm-autoscaler',
|
||||
},
|
||||
subjects: [{ kind: 'ServiceAccount', name: 'ksm-autoscaler', namespace: $._config.namespace }],
|
||||
},
|
||||
|
||||
clusterRole.new() +
|
||||
clusterRole.mixin.metadata.withName('ksm-autoscaler') +
|
||||
clusterRole.withRules(rules),
|
||||
roleBinding: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'RoleBinding',
|
||||
metadata: {
|
||||
name: 'ksm-autoscaler',
|
||||
namespace: $._config.namespace,
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: 'rbac.authorization.k8s.io',
|
||||
kind: 'Role',
|
||||
name: 'ksm-autoscaler',
|
||||
},
|
||||
subjects: [{ kind: 'ServiceAccount', name: 'ksm-autoscaler' }],
|
||||
},
|
||||
|
||||
clusterRoleBinding:
|
||||
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
|
||||
role: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'Role',
|
||||
metadata: {
|
||||
name: 'ksm-autoscaler',
|
||||
namespace: $._config.namespace,
|
||||
},
|
||||
rules: [
|
||||
{
|
||||
apiGroups: ['extensions'],
|
||||
resources: ['deployments'],
|
||||
verbs: ['patch'],
|
||||
resourceNames: ['kube-state-metrics'],
|
||||
},
|
||||
{
|
||||
apiGroups: ['apps'],
|
||||
resources: ['deployments'],
|
||||
verbs: ['patch'],
|
||||
resourceNames: ['kube-state-metrics'],
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
clusterRoleBinding.new() +
|
||||
clusterRoleBinding.mixin.metadata.withName('ksm-autoscaler') +
|
||||
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
|
||||
clusterRoleBinding.mixin.roleRef.withName('ksm-autoscaler') +
|
||||
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
|
||||
clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'ksm-autoscaler', namespace: $._config.namespace }]),
|
||||
serviceAccount: {
|
||||
apiVersion: 'v1',
|
||||
kind: 'ServiceAccount',
|
||||
metadata: {
|
||||
name: 'ksm-autoscaler',
|
||||
namespace: $._config.namespace,
|
||||
},
|
||||
},
|
||||
|
||||
roleBinding:
|
||||
local roleBinding = k.rbac.v1.roleBinding;
|
||||
|
||||
roleBinding.new() +
|
||||
roleBinding.mixin.metadata.withName('ksm-autoscaler') +
|
||||
roleBinding.mixin.metadata.withNamespace($._config.namespace) +
|
||||
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
|
||||
roleBinding.mixin.roleRef.withName('ksm-autoscaler') +
|
||||
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
|
||||
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'ksm-autoscaler' }]),
|
||||
|
||||
role:
|
||||
local role = k.rbac.v1.role;
|
||||
local rulesType = role.rulesType;
|
||||
|
||||
local extensionsRule = rulesType.new() +
|
||||
rulesType.withApiGroups(['extensions']) +
|
||||
rulesType.withResources([
|
||||
'deployments',
|
||||
]) +
|
||||
rulesType.withVerbs(['patch']) +
|
||||
rulesType.withResourceNames(['kube-state-metrics']);
|
||||
|
||||
local appsRule = rulesType.new() +
|
||||
rulesType.withApiGroups(['apps']) +
|
||||
rulesType.withResources([
|
||||
'deployments',
|
||||
]) +
|
||||
rulesType.withVerbs(['patch']) +
|
||||
rulesType.withResourceNames(['kube-state-metrics']);
|
||||
|
||||
local rules = [extensionsRule, appsRule];
|
||||
|
||||
role.new() +
|
||||
role.mixin.metadata.withName('ksm-autoscaler') +
|
||||
role.mixin.metadata.withNamespace($._config.namespace) +
|
||||
role.withRules(rules),
|
||||
|
||||
serviceAccount:
|
||||
local serviceAccount = k.core.v1.serviceAccount;
|
||||
|
||||
serviceAccount.new('ksm-autoscaler') +
|
||||
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
|
||||
deployment:
|
||||
local deployment = k.apps.v1.deployment;
|
||||
local container = deployment.mixin.spec.template.spec.containersType;
|
||||
local podSelector = deployment.mixin.spec.template.spec.selectorType;
|
||||
local podLabels = { app: 'ksm-autoscaler' };
|
||||
|
||||
local kubeStateMetricsAutoscaler =
|
||||
container.new('ksm-autoscaler', $._config.imageRepos.clusterVerticalAutoscaler + ':' + $._config.versions.clusterVerticalAutoscaler) +
|
||||
container.withArgs([
|
||||
local c = {
|
||||
name: 'ksm-autoscaler',
|
||||
image: $._config.imageRepos.clusterVerticalAutoscaler + ':v' + $._config.versions.clusterVerticalAutoscaler,
|
||||
args: [
|
||||
'/cpvpa',
|
||||
'--target=deployment/kube-state-metrics',
|
||||
'--namespace=' + $._config.namespace,
|
||||
'--logtostderr=true',
|
||||
'--poll-period-seconds=10',
|
||||
'--default-config={"kube-state-metrics":{"requests":{"cpu":{"base":"' + $._config.kubeStateMetrics.baseCPU + '","step":"' + $._config.kubeStateMetrics.stepCPU + '","nodesPerStep":1},"memory":{"base":"' + $._config.kubeStateMetrics.baseMemory + '","step":"' + $._config.kubeStateMetrics.stepMemory + '","nodesPerStep":1}},"limits":{"cpu":{"base":"' + $._config.kubeStateMetrics.baseCPU + '","step":"' + $._config.kubeStateMetrics.stepCPU + '","nodesPerStep":1},"memory":{"base":"' + $._config.kubeStateMetrics.baseMemory + '","step":"' + $._config.kubeStateMetrics.stepMemory + '","nodesPerStep":1}}}}'
|
||||
]) +
|
||||
container.mixin.resources.withRequests({cpu: '20m', memory: '10Mi'});
|
||||
'--default-config={"kube-state-metrics":{"requests":{"cpu":{"base":"' + $._config.kubeStateMetrics.baseCPU + '","step":"' + $._config.kubeStateMetrics.stepCPU + '","nodesPerStep":1},"memory":{"base":"' + $._config.kubeStateMetrics.baseMemory + '","step":"' + $._config.kubeStateMetrics.stepMemory + '","nodesPerStep":1}},"limits":{"cpu":{"base":"' + $._config.kubeStateMetrics.baseCPU + '","step":"' + $._config.kubeStateMetrics.stepCPU + '","nodesPerStep":1},"memory":{"base":"' + $._config.kubeStateMetrics.baseMemory + '","step":"' + $._config.kubeStateMetrics.stepMemory + '","nodesPerStep":1}}}}',
|
||||
],
|
||||
resources: {
|
||||
requests: { cpu: '20m', memory: '10Mi' },
|
||||
},
|
||||
};
|
||||
|
||||
local c = [kubeStateMetricsAutoscaler];
|
||||
|
||||
deployment.new('ksm-autoscaler', 1, c, podLabels) +
|
||||
deployment.mixin.metadata.withNamespace($._config.namespace) +
|
||||
deployment.mixin.metadata.withLabels(podLabels) +
|
||||
deployment.mixin.spec.selector.withMatchLabels(podLabels) +
|
||||
deployment.mixin.spec.template.spec.withNodeSelector({ 'kubernetes.io/os': 'linux' }) +
|
||||
deployment.mixin.spec.template.spec.securityContext.withRunAsNonRoot(true) +
|
||||
deployment.mixin.spec.template.spec.securityContext.withRunAsUser(65534) +
|
||||
deployment.mixin.spec.template.spec.withServiceAccountName('ksm-autoscaler'),
|
||||
{
|
||||
apiVersion: 'apps/v1',
|
||||
kind: 'Deployment',
|
||||
metadata: {
|
||||
name: 'ksm-autoscaler',
|
||||
namespace: $._config.namespace,
|
||||
labels: podLabels,
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
selector: { matchLabels: podLabels },
|
||||
template: {
|
||||
metadata: {
|
||||
labels: podLabels,
|
||||
},
|
||||
spec: {
|
||||
containers: [c],
|
||||
serviceAccount: 'ksm-autoscaler',
|
||||
nodeSelector: { 'kubernetes.io/os': 'linux' },
|
||||
securityContext: {
|
||||
runAsNonRoot: true,
|
||||
runAsUser: 65534,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
|
@ -1,20 +1,11 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
{
|
||||
prometheus+:: {
|
||||
clusterRole+: {
|
||||
rules+:
|
||||
local role = k.rbac.v1.role;
|
||||
local policyRule = role.rulesType;
|
||||
local rule = policyRule.new() +
|
||||
policyRule.withApiGroups(['']) +
|
||||
policyRule.withResources([
|
||||
'services',
|
||||
'endpoints',
|
||||
'pods',
|
||||
]) +
|
||||
policyRule.withVerbs(['get', 'list', 'watch']);
|
||||
[rule]
|
||||
},
|
||||
}
|
||||
prometheus+:: {
|
||||
clusterRole+: {
|
||||
rules+: [{
|
||||
apiGroups: [''],
|
||||
resources: ['services', 'endpoints', 'pods'],
|
||||
verbs: ['get', 'list', 'watch'],
|
||||
}],
|
||||
},
|
||||
},
|
||||
}
|
|
@ -1,23 +1,22 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local statefulSet = k.apps.v1.statefulSet;
|
||||
local affinity = statefulSet.mixin.spec.template.spec.affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecutionType;
|
||||
local matchExpression = affinity.mixin.podAffinityTerm.labelSelector.matchExpressionsType;
|
||||
|
||||
{
|
||||
local antiaffinity(key, values, namespace) = {
|
||||
affinity: {
|
||||
podAntiAffinity: {
|
||||
preferredDuringSchedulingIgnoredDuringExecution: [
|
||||
affinity.new() +
|
||||
affinity.withWeight(100) +
|
||||
affinity.mixin.podAffinityTerm.withNamespaces(namespace) +
|
||||
affinity.mixin.podAffinityTerm.withTopologyKey('kubernetes.io/hostname') +
|
||||
affinity.mixin.podAffinityTerm.labelSelector.withMatchExpressions([
|
||||
matchExpression.new() +
|
||||
matchExpression.withKey(key) +
|
||||
matchExpression.withOperator('In') +
|
||||
matchExpression.withValues(values),
|
||||
]),
|
||||
{
|
||||
weight: 100,
|
||||
podAffinityTerm: {
|
||||
namespaces: [namespace],
|
||||
topologyKey: 'kubernetes.io/hostname',
|
||||
labelSelector: {
|
||||
matchExpressions: [{
|
||||
key: key,
|
||||
operator: 'In',
|
||||
values: values,
|
||||
}],
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
|
@ -30,12 +29,12 @@ local matchExpression = affinity.mixin.podAffinityTerm.labelSelector.matchExpres
|
|||
},
|
||||
},
|
||||
|
||||
prometheus+: {
|
||||
prometheus+:: {
|
||||
local p = self,
|
||||
|
||||
prometheus+: {
|
||||
spec+:
|
||||
antiaffinity('prometheus', [p.name], p.namespace),
|
||||
antiaffinity('prometheus', [$._config.prometheus.name], $._config.namespace),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
|
@ -1,23 +1,42 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
local service(name, namespace, labels, selector, ports) = {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Service',
|
||||
metadata: {
|
||||
name: name,
|
||||
namespace: namespace,
|
||||
labels: labels,
|
||||
},
|
||||
spec: {
|
||||
ports+: ports,
|
||||
selector: selector,
|
||||
clusterIP: 'None',
|
||||
},
|
||||
};
|
||||
|
||||
{
|
||||
prometheus+:: {
|
||||
kubeControllerManagerPrometheusDiscoveryService:
|
||||
service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('https-metrics', 10257, 10257)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
kubeSchedulerPrometheusDiscoveryService:
|
||||
service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('https-metrics', 10259, 10259)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
kubeDnsPrometheusDiscoveryService:
|
||||
service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('http-metrics-skydns', 10055, 10055), servicePort.newNamed('http-metrics-dnsmasq', 10054, 10054)]) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
kubeControllerManagerPrometheusDiscoveryService: service(
|
||||
'kube-controller-manager-prometheus-discovery',
|
||||
'kube-system',
|
||||
{ 'k8s-app': 'kube-controller-manager' },
|
||||
{ 'k8s-app': 'kube-controller-manager' },
|
||||
[{ name: 'https-metrics', port: 10257, targetPort: 10257 }]
|
||||
),
|
||||
|
||||
kubeSchedulerPrometheusDiscoveryService: service(
|
||||
'kube-scheduler-prometheus-discovery',
|
||||
'kube-system',
|
||||
{ 'k8s-app': 'kube-scheduler' },
|
||||
{ 'k8s-app': 'kube-scheduler' },
|
||||
[{ name: 'https-metrics', port: 10259, targetPort: 10259 }]
|
||||
),
|
||||
|
||||
kubeDnsPrometheusDiscoveryService: service(
|
||||
'kube-dns-prometheus-discovery',
|
||||
'kube-syste',
|
||||
{ 'k8s-app': 'kube-dns' },
|
||||
{ 'k8s-app': 'kube-dns' },
|
||||
[{ name: 'http-metrics-skydns', port: 10055, targetPort: 10055 }, { name: 'http-metrics-dnsmasq', port: 10054, targetPort: 10054 }]
|
||||
),
|
||||
},
|
||||
}
|
||||
|
|
|
@ -9,9 +9,9 @@ local withImageRepository(repository) = {
|
|||
if repository == null then image else repository + '/' + l.imageName(image),
|
||||
_config+:: {
|
||||
imageRepos:: {
|
||||
[field]: substituteRepository(oldRepos[field], repository),
|
||||
[field]: substituteRepository(oldRepos[field], repository)
|
||||
for field in std.objectFields(oldRepos)
|
||||
}
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
|
|
|
@ -1,11 +1,10 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
// Custom metrics API allows the HPA v2 to scale based on arbirary metrics.
|
||||
// For more details on usage visit https://github.com/DirectXMan12/k8s-prometheus-adapter#quick-links
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
prometheusAdapter+:: {
|
||||
namespace: $._config.namespace,
|
||||
// Rules for custom-metrics
|
||||
config+:: {
|
||||
rules+: [
|
||||
|
@ -14,19 +13,12 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
seriesFilters: [],
|
||||
resources: {
|
||||
overrides: {
|
||||
namespace: {
|
||||
resource: 'namespace'
|
||||
},
|
||||
pod: {
|
||||
resource: 'pod'
|
||||
}
|
||||
namespace: { resource: 'namespace' },
|
||||
pod: { resource: 'pod' },
|
||||
},
|
||||
},
|
||||
name: {
|
||||
matches: '^container_(.*)_seconds_total$',
|
||||
as: ""
|
||||
},
|
||||
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>,container!="POD"}[1m])) by (<<.GroupBy>>)'
|
||||
name: { matches: '^container_(.*)_seconds_total$', as: '' },
|
||||
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>,container!="POD"}[1m])) by (<<.GroupBy>>)',
|
||||
},
|
||||
{
|
||||
seriesQuery: '{__name__=~"^container_.*",container!="POD",namespace!="",pod!=""}',
|
||||
|
@ -35,19 +27,12 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
],
|
||||
resources: {
|
||||
overrides: {
|
||||
namespace: {
|
||||
resource: 'namespace'
|
||||
},
|
||||
pod: {
|
||||
resource: 'pod'
|
||||
}
|
||||
namespace: { resource: 'namespace' },
|
||||
pod: { resource: 'pod' },
|
||||
},
|
||||
},
|
||||
name: {
|
||||
matches: '^container_(.*)_total$',
|
||||
as: ''
|
||||
},
|
||||
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>,container!="POD"}[1m])) by (<<.GroupBy>>)'
|
||||
name: { matches: '^container_(.*)_total$', as: '' },
|
||||
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>,container!="POD"}[1m])) by (<<.GroupBy>>)',
|
||||
},
|
||||
{
|
||||
seriesQuery: '{__name__=~"^container_.*",container!="POD",namespace!="",pod!=""}',
|
||||
|
@ -56,60 +41,38 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
],
|
||||
resources: {
|
||||
overrides: {
|
||||
namespace: {
|
||||
resource: 'namespace'
|
||||
},
|
||||
pod: {
|
||||
resource: 'pod'
|
||||
}
|
||||
namespace: { resource: 'namespace' },
|
||||
pod: { resource: 'pod' },
|
||||
},
|
||||
},
|
||||
name: {
|
||||
matches: '^container_(.*)$',
|
||||
as: ''
|
||||
},
|
||||
metricsQuery: 'sum(<<.Series>>{<<.LabelMatchers>>,container!="POD"}) by (<<.GroupBy>>)'
|
||||
name: { matches: '^container_(.*)$', as: '' },
|
||||
metricsQuery: 'sum(<<.Series>>{<<.LabelMatchers>>,container!="POD"}) by (<<.GroupBy>>)',
|
||||
},
|
||||
{
|
||||
seriesQuery: '{namespace!="",__name__!~"^container_.*"}',
|
||||
seriesFilters: [
|
||||
{ isNot: '.*_total$' },
|
||||
],
|
||||
resources: {
|
||||
template: '<<.Resource>>'
|
||||
},
|
||||
name: {
|
||||
matches: '',
|
||||
as: ''
|
||||
},
|
||||
metricsQuery: 'sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)'
|
||||
resources: { template: '<<.Resource>>' },
|
||||
name: { matches: '', as: '' },
|
||||
metricsQuery: 'sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)',
|
||||
},
|
||||
{
|
||||
seriesQuery: '{namespace!="",__name__!~"^container_.*"}',
|
||||
seriesFilters: [
|
||||
{ isNot: '.*_seconds_total' },
|
||||
],
|
||||
resources: {
|
||||
template: '<<.Resource>>'
|
||||
},
|
||||
name: {
|
||||
matches: '^(.*)_total$',
|
||||
as: ''
|
||||
},
|
||||
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)'
|
||||
resources: { template: '<<.Resource>>' },
|
||||
name: { matches: '^(.*)_total$', as: '' },
|
||||
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)',
|
||||
},
|
||||
{
|
||||
seriesQuery: '{namespace!="",__name__!~"^container_.*"}',
|
||||
seriesFilters: [],
|
||||
resources: {
|
||||
template: '<<.Resource>>'
|
||||
},
|
||||
name: {
|
||||
matches: '^(.*)_seconds_total$',
|
||||
as: ''
|
||||
},
|
||||
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)'
|
||||
}
|
||||
resources: { template: '<<.Resource>>' },
|
||||
name: { matches: '^(.*)_seconds_total$', as: '' },
|
||||
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)',
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
|
@ -125,7 +88,7 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
spec: {
|
||||
service: {
|
||||
name: $.prometheusAdapter.service.metadata.name,
|
||||
namespace: $._config.namespace,
|
||||
namespace: $._config.prometheusAdapter.namespace,
|
||||
},
|
||||
group: 'custom.metrics.k8s.io',
|
||||
version: 'v1beta1',
|
||||
|
@ -143,7 +106,7 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
spec: {
|
||||
service: {
|
||||
name: $.prometheusAdapter.service.metadata.name,
|
||||
namespace: $._config.namespace,
|
||||
namespace: $._config.prometheusAdapter.namespace,
|
||||
},
|
||||
group: 'custom.metrics.k8s.io',
|
||||
version: 'v1beta2',
|
||||
|
@ -152,46 +115,51 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
versionPriority: 200,
|
||||
},
|
||||
},
|
||||
customMetricsClusterRoleServerResources:
|
||||
local clusterRole = k.rbac.v1.clusterRole;
|
||||
local policyRule = clusterRole.rulesType;
|
||||
|
||||
local rules =
|
||||
policyRule.new() +
|
||||
policyRule.withApiGroups(['custom.metrics.k8s.io']) +
|
||||
policyRule.withResources(['*']) +
|
||||
policyRule.withVerbs(['*']);
|
||||
|
||||
clusterRole.new() +
|
||||
clusterRole.mixin.metadata.withName('custom-metrics-server-resources') +
|
||||
clusterRole.withRules(rules),
|
||||
|
||||
customMetricsClusterRoleBindingServerResources:
|
||||
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
|
||||
|
||||
clusterRoleBinding.new() +
|
||||
clusterRoleBinding.mixin.metadata.withName('custom-metrics-server-resources') +
|
||||
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
|
||||
clusterRoleBinding.mixin.roleRef.withName('custom-metrics-server-resources') +
|
||||
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
|
||||
clusterRoleBinding.withSubjects([{
|
||||
customMetricsClusterRoleServerResources: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'ClusterRole',
|
||||
metadata: {
|
||||
name: 'custom-metrics-server-resources',
|
||||
},
|
||||
rules: [{
|
||||
apiGroups: ['custom.metrics.k8s.io'],
|
||||
resources: ['*'],
|
||||
verbs: ['*'],
|
||||
}],
|
||||
},
|
||||
customMetricsClusterRoleBindingServerResources: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'ClusterRoleBinding',
|
||||
metadata: {
|
||||
name: 'custom-metrics-server-resources',
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: 'rbac.authorization.k8s.io',
|
||||
kind: 'ClusterRole',
|
||||
name: 'custom-metrics-server-resources',
|
||||
},
|
||||
subjects: [{
|
||||
kind: 'ServiceAccount',
|
||||
name: $.prometheusAdapter.serviceAccount.metadata.name,
|
||||
namespace: $._config.namespace,
|
||||
}]),
|
||||
|
||||
customMetricsClusterRoleBindingHPA:
|
||||
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
|
||||
|
||||
clusterRoleBinding.new() +
|
||||
clusterRoleBinding.mixin.metadata.withName('hpa-controller-custom-metrics') +
|
||||
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
|
||||
clusterRoleBinding.mixin.roleRef.withName('custom-metrics-server-resources') +
|
||||
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
|
||||
clusterRoleBinding.withSubjects([{
|
||||
namespace: $._config.prometheusAdapter.namespace,
|
||||
}],
|
||||
},
|
||||
customMetricsClusterRoleBindingHPA: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'ClusterRoleBinding',
|
||||
metadata: {
|
||||
name: 'hpa-controller-custom-metrics',
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: 'rbac.authorization.k8s.io',
|
||||
kind: 'ClusterRole',
|
||||
name: 'custom-metrics-server-resources',
|
||||
},
|
||||
subjects: [{
|
||||
kind: 'ServiceAccount',
|
||||
name: 'horizontal-pod-autoscaler',
|
||||
namespace: 'kube-system',
|
||||
}]),
|
||||
}
|
||||
}],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
|
@ -1,63 +1,70 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
eks: {
|
||||
minimumAvailableIPs: 10,
|
||||
minimumAvailableIPsTime: '10m'
|
||||
}
|
||||
minimumAvailableIPsTime: '10m',
|
||||
},
|
||||
},
|
||||
prometheus+: {
|
||||
serviceMonitorCoreDNS+: {
|
||||
spec+: {
|
||||
endpoints: [
|
||||
{
|
||||
bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token",
|
||||
interval: "15s",
|
||||
targetPort: 9153
|
||||
}
|
||||
]
|
||||
spec+: {
|
||||
endpoints: [
|
||||
{
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
interval: '15s',
|
||||
targetPort: 9153,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
AwsEksCniMetricService: {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Service',
|
||||
metadata: {
|
||||
name: 'aws-node',
|
||||
namespace: 'kube-system',
|
||||
labels: { 'k8s-app': 'aws-node' },
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{ name: 'cni-metrics-port', port: 61678, targetPort: 61678 },
|
||||
],
|
||||
selector: { 'k8s-app': 'aws-node' },
|
||||
clusterIP: 'None',
|
||||
},
|
||||
},
|
||||
|
||||
serviceMonitorAwsEksCNI: {
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'awsekscni',
|
||||
namespace: $._config.namespace,
|
||||
labels: {
|
||||
'k8s-app': 'eks-cni',
|
||||
},
|
||||
},
|
||||
AwsEksCniMetricService:
|
||||
service.new('aws-node', { 'k8s-app' : 'aws-node' } , servicePort.newNamed('cni-metrics-port', 61678, 61678)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'aws-node' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
serviceMonitorAwsEksCNI:
|
||||
{
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'awsekscni',
|
||||
namespace: $._config.namespace,
|
||||
labels: {
|
||||
'k8s-app': 'eks-cni',
|
||||
spec: {
|
||||
jobLabel: 'k8s-app',
|
||||
selector: {
|
||||
matchLabels: {
|
||||
'k8s-app': 'aws-node',
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
jobLabel: 'k8s-app',
|
||||
selector: {
|
||||
matchLabels: {
|
||||
'k8s-app': 'aws-node',
|
||||
},
|
||||
},
|
||||
namespaceSelector: {
|
||||
matchNames: [
|
||||
'kube-system',
|
||||
],
|
||||
},
|
||||
endpoints: [
|
||||
{
|
||||
port: 'cni-metrics-port',
|
||||
interval: '30s',
|
||||
path: '/metrics',
|
||||
},
|
||||
namespaceSelector: {
|
||||
matchNames: [
|
||||
'kube-system',
|
||||
],
|
||||
},
|
||||
endpoints: [
|
||||
{
|
||||
port: 'cni-metrics-port',
|
||||
interval: '30s',
|
||||
path: '/metrics',
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
prometheusRules+: {
|
||||
groups+: [
|
||||
|
@ -65,15 +72,15 @@ local servicePort = k.core.v1.service.mixin.spec.portsType;
|
|||
name: 'kube-prometheus-eks.rules',
|
||||
rules: [
|
||||
{
|
||||
expr: 'sum by(instance) (awscni_total_ip_addresses) - sum by(instance) (awscni_assigned_ip_addresses) < %s' % $._config.eks.minimumAvailableIPs,
|
||||
expr: 'sum by(instance) (awscni_ip_max) - sum by(instance) (awscni_assigned_ip_addresses) < %s' % $._config.eks.minimumAvailableIPs,
|
||||
labels: {
|
||||
severity: 'critical',
|
||||
},
|
||||
annotations: {
|
||||
message: 'Instance {{ $labels.instance }} has less than 10 IPs available.'
|
||||
message: 'Instance {{ $labels.instance }} has less than 10 IPs available.',
|
||||
},
|
||||
'for': $._config.eks.minimumAvailableIPsTime,
|
||||
alert: 'EksAvailableIPs'
|
||||
alert: 'EksAvailableIPs',
|
||||
},
|
||||
],
|
||||
},
|
||||
|
|
95
monitoring/vendor/github.com/prometheus-operator/kube-prometheus/jsonnet/kube-prometheus/kube-prometheus-external-metrics.libsonnet
generated
vendored
Normal file
95
monitoring/vendor/github.com/prometheus-operator/kube-prometheus/jsonnet/kube-prometheus/kube-prometheus-external-metrics.libsonnet
generated
vendored
Normal file
|
@ -0,0 +1,95 @@
|
|||
// External metrics API allows the HPA v2 to scale based on metrics coming from outside of Kubernetes cluster
|
||||
// For more details on usage visit https://github.com/DirectXMan12/k8s-prometheus-adapter#quick-links
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
prometheusAdapter+:: {
|
||||
namespace: $._config.namespace,
|
||||
// Rules for external-metrics
|
||||
config+:: {
|
||||
externalRules+: [
|
||||
// {
|
||||
// seriesQuery: '{__name__=~"^.*_queue$",namespace!=""}',
|
||||
// seriesFilters: [],
|
||||
// resources: {
|
||||
// overrides: {
|
||||
// namespace: { resource: 'namespace' }
|
||||
// },
|
||||
// },
|
||||
// name: { matches: '^.*_queue$', as: '$0' },
|
||||
// metricsQuery: 'max(<<.Series>>{<<.LabelMatchers>>})',
|
||||
// },
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
prometheusAdapter+:: {
|
||||
externalMetricsApiService: {
|
||||
apiVersion: 'apiregistration.k8s.io/v1',
|
||||
kind: 'APIService',
|
||||
metadata: {
|
||||
name: 'v1beta1.external.metrics.k8s.io',
|
||||
},
|
||||
spec: {
|
||||
service: {
|
||||
name: $.prometheusAdapter.service.metadata.name,
|
||||
namespace: $._config.prometheusAdapter.namespace,
|
||||
},
|
||||
group: 'external.metrics.k8s.io',
|
||||
version: 'v1beta1',
|
||||
insecureSkipTLSVerify: true,
|
||||
groupPriorityMinimum: 100,
|
||||
versionPriority: 100,
|
||||
},
|
||||
},
|
||||
externalMetricsClusterRoleServerResources: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'ClusterRole',
|
||||
metadata: {
|
||||
name: 'external-metrics-server-resources',
|
||||
},
|
||||
rules: [{
|
||||
apiGroups: ['external.metrics.k8s.io'],
|
||||
resources: ['*'],
|
||||
verbs: ['*'],
|
||||
}],
|
||||
},
|
||||
externalMetricsClusterRoleBindingServerResources: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'ClusterRoleBinding',
|
||||
metadata: {
|
||||
name: 'external-metrics-server-resources',
|
||||
},
|
||||
|
||||
roleRef: {
|
||||
apiGroup: 'rbac.authorization.k8s.io',
|
||||
kind: 'ClusterRole',
|
||||
name: 'external-metrics-server-resources',
|
||||
},
|
||||
subjects: [{
|
||||
kind: 'ServiceAccount',
|
||||
name: $.prometheusAdapter.serviceAccount.metadata.name,
|
||||
namespace: $._config.prometheusAdapter.namespace,
|
||||
}],
|
||||
},
|
||||
externalMetricsClusterRoleBindingHPA: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'ClusterRoleBinding',
|
||||
metadata: {
|
||||
name: 'hpa-controller-external-metrics',
|
||||
},
|
||||
|
||||
roleRef: {
|
||||
apiGroup: 'rbac.authorization.k8s.io',
|
||||
kind: 'ClusterRole',
|
||||
name: 'external-metrics-server-resources',
|
||||
},
|
||||
subjects: [{
|
||||
kind: 'ServiceAccount',
|
||||
name: 'horizontal-pod-autoscaler',
|
||||
namespace: 'kube-system',
|
||||
}],
|
||||
},
|
||||
},
|
||||
}
|
13
monitoring/vendor/github.com/prometheus-operator/kube-prometheus/jsonnet/kube-prometheus/kube-prometheus-gke.libsonnet
generated
vendored
Normal file
13
monitoring/vendor/github.com/prometheus-operator/kube-prometheus/jsonnet/kube-prometheus/kube-prometheus-gke.libsonnet
generated
vendored
Normal file
|
@ -0,0 +1,13 @@
|
|||
(import './kube-prometheus-managed-cluster.libsonnet') + {
|
||||
_config+:: {
|
||||
prometheusAdapter+:: {
|
||||
config+: {
|
||||
resourceRules:: null,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
prometheusAdapter+:: {
|
||||
apiService:: null,
|
||||
},
|
||||
}
|
|
@ -10,10 +10,7 @@
|
|||
interval: '30s',
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
relabelings: [
|
||||
{
|
||||
sourceLabels: ['__metrics_path__'],
|
||||
targetLabel: 'metrics_path'
|
||||
},
|
||||
{ sourceLabels: ['__metrics_path__'], targetLabel: 'metrics_path' },
|
||||
],
|
||||
},
|
||||
{
|
||||
|
@ -24,10 +21,7 @@
|
|||
honorLabels: true,
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
relabelings: [
|
||||
{
|
||||
sourceLabels: ['__metrics_path__'],
|
||||
targetLabel: 'metrics_path'
|
||||
},
|
||||
{ sourceLabels: ['__metrics_path__'], targetLabel: 'metrics_path' },
|
||||
],
|
||||
metricRelabelings: [
|
||||
// Drop a bunch of metrics which are disabled but still sent, see
|
||||
|
|
|
@ -1,13 +1,20 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
{
|
||||
prometheus+:: {
|
||||
kubeDnsPrometheusDiscoveryService:
|
||||
service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('metrics', 9153, 9153)]) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
kubeDnsPrometheusDiscoveryService: {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Service',
|
||||
metadata: {
|
||||
name: 'kube-dns-prometheus-discovery',
|
||||
namespace: 'kube-system',
|
||||
labels: { 'k8s-app': 'kube-dns' },
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{ name: 'metrics', port: 9153, targetPort: 9153 },
|
||||
],
|
||||
selector: { 'k8s-app': 'kube-dns' },
|
||||
clusterIP: 'None',
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
|
@ -1,23 +1,40 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
local service(name, namespace, labels, selector, ports) = {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Service',
|
||||
metadata: {
|
||||
name: name,
|
||||
namespace: namespace,
|
||||
labels: labels,
|
||||
},
|
||||
spec: {
|
||||
ports+: ports,
|
||||
selector: selector,
|
||||
clusterIP: 'None',
|
||||
},
|
||||
};
|
||||
|
||||
{
|
||||
prometheus+:: {
|
||||
kubeControllerManagerPrometheusDiscoveryService:
|
||||
service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('https-metrics', 10257, 10257)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
kubeSchedulerPrometheusDiscoveryService:
|
||||
service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('https-metrics', 10259, 10259)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
kubeDnsPrometheusDiscoveryService:
|
||||
service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('metrics', 10055, 10055), servicePort.newNamed('http-metrics-dnsmasq', 10054, 10054)]) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
kubeControllerManagerPrometheusDiscoveryService: service(
|
||||
'kube-controller-manager-prometheus-discovery',
|
||||
'kube-system',
|
||||
{ 'k8s-app': 'kube-controller-manager' },
|
||||
{ 'k8s-app': 'kube-controller-manager' },
|
||||
[{ name: 'https-metrics', port: 10257, targetPort: 10257 }]
|
||||
),
|
||||
kubeSchedulerPrometheusDiscoveryService: service(
|
||||
'kube-controller-manager-prometheus-discovery',
|
||||
'kube-system',
|
||||
{ 'k8s-app': 'kube-scheduler' },
|
||||
{ 'k8s-app': 'kube-scheduler' },
|
||||
[{ name: 'https-metrics', port: 10259, targetPort: 10259 }]
|
||||
),
|
||||
kubeDnsPrometheusDiscoveryService: service(
|
||||
'kube-controller-manager-prometheus-discovery',
|
||||
'kube-system',
|
||||
{ 'k8s-app': 'kube-dns' },
|
||||
{ 'k8s-app': 'kube-dns' },
|
||||
[{ name: 'metrics', port: 10055, targetPort: 10055 }, { name: 'http-metrics-dnsmasq', port: 10054, targetPort: 10054 }]
|
||||
),
|
||||
},
|
||||
}
|
||||
|
|
|
@ -1,18 +1,33 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
local service(name, namespace, labels, selector, ports) = {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Service',
|
||||
metadata: {
|
||||
name: name,
|
||||
namespace: namespace,
|
||||
labels: labels,
|
||||
},
|
||||
spec: {
|
||||
ports+: ports,
|
||||
selector: selector,
|
||||
clusterIP: 'None',
|
||||
},
|
||||
};
|
||||
|
||||
{
|
||||
prometheus+: {
|
||||
kubeControllerManagerPrometheusDiscoveryService:
|
||||
service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('https-metrics', 10257, 10257)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
kubeSchedulerPrometheusDiscoveryService:
|
||||
service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('https-metrics', 10259, 10259)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
kubeControllerManagerPrometheusDiscoveryService: service(
|
||||
'kube-controller-manager-prometheus-discovery',
|
||||
'kube-system',
|
||||
{ 'k8s-app': 'kube-controller-manager' },
|
||||
{ 'k8s-app': 'kube-controller-manager' },
|
||||
[{ name: 'https-metrics', port: 10257, targetPort: 10257 }],
|
||||
),
|
||||
kubeSchedulerPrometheusDiscoveryService: service(
|
||||
'kube-scheduler-prometheus-discovery',
|
||||
'kube-system',
|
||||
{ 'k8s-app': 'kube-scheduler' },
|
||||
{ 'k8s-app': 'kube-scheduler' },
|
||||
[{ name: 'https-metrics', port: 10259, targetPort: 10259 }],
|
||||
),
|
||||
},
|
||||
}
|
||||
|
|
|
@ -1,18 +1,33 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
local service(name, namespace, labels, selector, ports) = {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Service',
|
||||
metadata: {
|
||||
name: name,
|
||||
namespace: namespace,
|
||||
labels: labels,
|
||||
},
|
||||
spec: {
|
||||
ports+: ports,
|
||||
selector: selector,
|
||||
clusterIP: 'None',
|
||||
},
|
||||
};
|
||||
|
||||
{
|
||||
prometheus+: {
|
||||
kubeControllerManagerPrometheusDiscoveryService:
|
||||
service.new('kube-controller-manager-prometheus-discovery', { component: 'kube-controller-manager' }, servicePort.newNamed('https-metrics', 10257, 10257)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
kubeSchedulerPrometheusDiscoveryService:
|
||||
service.new('kube-scheduler-prometheus-discovery', { component: 'kube-scheduler' }, servicePort.newNamed('https-metrics', 10259, 10259)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
kubeControllerManagerPrometheusDiscoveryService: service(
|
||||
'kube-controller-manager-prometheus-discovery',
|
||||
'kube-system',
|
||||
{ 'k8s-app': 'kube-controller-manager' },
|
||||
{ component: 'kube-controller-manager' },
|
||||
[{ name: 'https-metrics', port: 10257, targetPort: 10257 }]
|
||||
),
|
||||
kubeSchedulerPrometheusDiscoveryService: service(
|
||||
'kube-scheduler-prometheus-discovery',
|
||||
'kube-system',
|
||||
{ 'k8s-app': 'kube-scheduler' },
|
||||
{ component: 'kube-scheduler' },
|
||||
[{ name: 'https-metrics', port: 10259, targetPort: 10259 }],
|
||||
),
|
||||
},
|
||||
}
|
||||
|
|
|
@ -1,20 +1,36 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
local service(name, namespace, labels, selector, ports) = {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Service',
|
||||
metadata: {
|
||||
name: name,
|
||||
namespace: namespace,
|
||||
labels: labels,
|
||||
},
|
||||
spec: {
|
||||
ports+: ports,
|
||||
selector: selector,
|
||||
clusterIP: 'None',
|
||||
},
|
||||
};
|
||||
|
||||
{
|
||||
|
||||
prometheus+: {
|
||||
kubeControllerManagerPrometheusDiscoveryService:
|
||||
service.new('kube-controller-manager-prometheus-discovery', { 'component': 'kube-controller-manager' }, servicePort.newNamed('https-metrics', 10257, 10257)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
kubeSchedulerPrometheusDiscoveryService:
|
||||
service.new('kube-scheduler-prometheus-discovery', { 'component': 'kube-scheduler' }, servicePort.newNamed('https-metrics', 10259, 10259)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
kubeControllerManagerPrometheusDiscoveryService: service(
|
||||
'kube-controller-manager-prometheus-discovery',
|
||||
'kube-system',
|
||||
{ 'k8s-app': 'kube-controller-manager' },
|
||||
{ 'k8s-app': 'kube-controller-manager' },
|
||||
[{ name: 'https-metrics', port: 10257, targetPort: 10257 }]
|
||||
),
|
||||
|
||||
kubeSchedulerPrometheusDiscoveryService: service(
|
||||
'kube-scheduler-prometheus-discovery',
|
||||
'kube-system',
|
||||
{ 'k8s-app': 'kube-scheduler' },
|
||||
{ 'k8s-app': 'kube-scheduler' },
|
||||
[{ name: 'https-metrics', port: 10259, targetPort: 10259 }],
|
||||
),
|
||||
|
||||
serviceMonitorKubeScheduler+: {
|
||||
spec+: {
|
||||
|
|
|
@ -1,21 +1,18 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
local patch(ports) = {
|
||||
spec+: {
|
||||
ports: ports,
|
||||
type: 'NodePort',
|
||||
},
|
||||
};
|
||||
|
||||
{
|
||||
prometheus+: {
|
||||
service+:
|
||||
service.mixin.spec.withPorts(servicePort.newNamed('web', 9090, 'web') + servicePort.withNodePort(30900)) +
|
||||
service.mixin.spec.withType('NodePort'),
|
||||
service+: patch([{ name: 'web', port: 9090, targetPort: 'web', nodePort: 30900 }]),
|
||||
},
|
||||
alertmanager+: {
|
||||
service+:
|
||||
service.mixin.spec.withPorts(servicePort.newNamed('web', 9093, 'web') + servicePort.withNodePort(30903)) +
|
||||
service.mixin.spec.withType('NodePort'),
|
||||
service+: patch([{ name: 'web', port: 9093, targetPort: 'web', nodePort: 30903 }]),
|
||||
},
|
||||
grafana+: {
|
||||
service+:
|
||||
service.mixin.spec.withPorts(servicePort.newNamed('http', 3000, 'http') + servicePort.withNodePort(30902)) +
|
||||
service.mixin.spec.withType('NodePort'),
|
||||
service+: patch([{ name: 'http', port: 3000, targetPort: 'http', nodePort: 30902 }]),
|
||||
},
|
||||
}
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
(import 'github.com/etcd-io/etcd/Documentation/etcd-mixin/mixin.libsonnet') + {
|
||||
_config+:: {
|
||||
etcd: {
|
||||
|
@ -12,88 +10,93 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
},
|
||||
},
|
||||
prometheus+:: {
|
||||
serviceEtcd:
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
local etcdServicePort = servicePort.newNamed('metrics', 2379, 2379);
|
||||
|
||||
service.new('etcd', null, etcdServicePort) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'etcd' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
endpointsEtcd:
|
||||
local endpoints = k.core.v1.endpoints;
|
||||
local endpointSubset = endpoints.subsetsType;
|
||||
local endpointPort = endpointSubset.portsType;
|
||||
|
||||
local etcdPort = endpointPort.new() +
|
||||
endpointPort.withName('metrics') +
|
||||
endpointPort.withPort(2379) +
|
||||
endpointPort.withProtocol('TCP');
|
||||
|
||||
local subset = endpointSubset.new() +
|
||||
endpointSubset.withAddresses([
|
||||
{ ip: etcdIP }
|
||||
for etcdIP in $._config.etcd.ips
|
||||
]) +
|
||||
endpointSubset.withPorts(etcdPort);
|
||||
|
||||
endpoints.new() +
|
||||
endpoints.mixin.metadata.withName('etcd') +
|
||||
endpoints.mixin.metadata.withNamespace('kube-system') +
|
||||
endpoints.mixin.metadata.withLabels({ 'k8s-app': 'etcd' }) +
|
||||
endpoints.withSubsets(subset),
|
||||
serviceMonitorEtcd:
|
||||
{
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'etcd',
|
||||
namespace: 'kube-system',
|
||||
labels: {
|
||||
serviceEtcd: {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Service',
|
||||
metadata: {
|
||||
name: 'etcd',
|
||||
namespace: 'kube-system',
|
||||
labels: { 'k8s-app': 'etcd' },
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{ name: 'metrics', targetPort: 2379, port: 2379 },
|
||||
],
|
||||
clusterIP: 'None',
|
||||
},
|
||||
},
|
||||
endpointsEtcd: {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Endpoints',
|
||||
metadata: {
|
||||
name: 'etcd',
|
||||
namespace: 'kube-system',
|
||||
labels: { 'k8s-app': 'etcd' },
|
||||
},
|
||||
subsets: [{
|
||||
addresses: [
|
||||
{ ip: etcdIP }
|
||||
for etcdIP in $._config.etcd.ips
|
||||
],
|
||||
ports: [
|
||||
{ name: 'metrics', port: 2379, protocol: 'TCP' },
|
||||
],
|
||||
}],
|
||||
},
|
||||
serviceMonitorEtcd: {
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'etcd',
|
||||
namespace: 'kube-system',
|
||||
labels: {
|
||||
'k8s-app': 'etcd',
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
jobLabel: 'k8s-app',
|
||||
endpoints: [
|
||||
{
|
||||
port: 'metrics',
|
||||
interval: '30s',
|
||||
scheme: 'https',
|
||||
// Prometheus Operator (and Prometheus) allow us to specify a tlsConfig. This is required as most likely your etcd metrics end points is secure.
|
||||
tlsConfig: {
|
||||
caFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client-ca.crt',
|
||||
keyFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client.key',
|
||||
certFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client.crt',
|
||||
[if $._config.etcd.serverName != null then 'serverName']: $._config.etcd.serverName,
|
||||
[if $._config.etcd.insecureSkipVerify != null then 'insecureSkipVerify']: $._config.etcd.insecureSkipVerify,
|
||||
},
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
matchLabels: {
|
||||
'k8s-app': 'etcd',
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
jobLabel: 'k8s-app',
|
||||
endpoints: [
|
||||
{
|
||||
port: 'metrics',
|
||||
interval: '30s',
|
||||
scheme: 'https',
|
||||
// Prometheus Operator (and Prometheus) allow us to specify a tlsConfig. This is required as most likely your etcd metrics end points is secure.
|
||||
tlsConfig: {
|
||||
caFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client-ca.crt',
|
||||
keyFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client.key',
|
||||
certFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client.crt',
|
||||
[if $._config.etcd.serverName != null then 'serverName']: $._config.etcd.serverName,
|
||||
[if $._config.etcd.insecureSkipVerify != null then 'insecureSkipVerify']: $._config.etcd.insecureSkipVerify,
|
||||
},
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
matchLabels: {
|
||||
'k8s-app': 'etcd',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
secretEtcdCerts:
|
||||
},
|
||||
secretEtcdCerts: {
|
||||
// Prometheus Operator allows us to mount secrets in the pod. By loading the secrets as files, they can be made available inside the Prometheus pod.
|
||||
local secret = k.core.v1.secret;
|
||||
secret.new('kube-etcd-client-certs', {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Secret',
|
||||
type: 'Opaque',
|
||||
metadata: {
|
||||
name: 'kube-etcd-client-certs',
|
||||
namespace: $._config.namespace,
|
||||
},
|
||||
data: {
|
||||
'etcd-client-ca.crt': std.base64($._config.etcd.clientCA),
|
||||
'etcd-client.key': std.base64($._config.etcd.clientKey),
|
||||
'etcd-client.crt': std.base64($._config.etcd.clientCert),
|
||||
}) +
|
||||
secret.mixin.metadata.withNamespace($._config.namespace),
|
||||
prometheus+:
|
||||
{
|
||||
// Reference info: https://coreos.com/operators/prometheus/docs/latest/api.html#prometheusspec
|
||||
spec+: {
|
||||
secrets+: [$.prometheus.secretEtcdCerts.metadata.name],
|
||||
},
|
||||
},
|
||||
},
|
||||
prometheus+: {
|
||||
// Reference info: https://coreos.com/operators/prometheus/docs/latest/api.html#prometheusspec
|
||||
spec+: {
|
||||
secrets+: [$.prometheus.secretEtcdCerts.metadata.name],
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
|
@ -23,9 +23,9 @@
|
|||
template+: {
|
||||
spec+: {
|
||||
local addArgs(c) =
|
||||
if c.name == 'prometheus-operator'
|
||||
then c + {args+: ['--config-reloader-cpu=0']}
|
||||
else c,
|
||||
if c.name == 'prometheus-operator'
|
||||
then c { args+: ['--config-reloader-cpu=0'] }
|
||||
else c,
|
||||
containers: std.map(addArgs, super.containers),
|
||||
},
|
||||
},
|
||||
|
|
|
@ -1,15 +1,8 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
(import 'github.com/thanos-io/thanos/mixin/alerts/sidecar.libsonnet') +
|
||||
{
|
||||
_config+:: {
|
||||
versions+:: {
|
||||
thanos: 'v0.14.0',
|
||||
},
|
||||
imageRepos+:: {
|
||||
thanos: 'quay.io/thanos/thanos',
|
||||
},
|
||||
versions+:: { thanos: 'v0.14.0' },
|
||||
imageRepos+:: { thanos: 'quay.io/thanos/thanos' },
|
||||
thanos+:: {
|
||||
objectStorageConfig: {
|
||||
key: 'thanos.yaml', // How the file inside the secret is called
|
||||
|
@ -18,23 +11,34 @@ local servicePort = k.core.v1.service.mixin.spec.portsType;
|
|||
},
|
||||
},
|
||||
prometheus+:: {
|
||||
local p = self,
|
||||
|
||||
// Add the grpc port to the Prometheus service to be able to query it with the Thanos Querier
|
||||
service+: {
|
||||
spec+: {
|
||||
ports+: [
|
||||
servicePort.newNamed('grpc', 10901, 10901),
|
||||
{ name: 'grpc', port: 10901, targetPort: 10901 },
|
||||
],
|
||||
},
|
||||
},
|
||||
// Create a new service that exposes both sidecar's HTTP metrics port and gRPC StoreAPI
|
||||
serviceThanosSidecar:
|
||||
local thanosGrpcSidecarPort = servicePort.newNamed('grpc', 10901, 10901);
|
||||
local thanosHttpSidecarPort = servicePort.newNamed('http', 10902, 10902);
|
||||
service.new('prometheus-' + $._config.prometheus.name + '-thanos-sidecar', { app: 'prometheus', prometheus: $._config.prometheus.name }) +
|
||||
service.mixin.spec.withPorts([thanosGrpcSidecarPort, thanosHttpSidecarPort]) +
|
||||
service.mixin.spec.withClusterIp('None') +
|
||||
service.mixin.metadata.withLabels({'prometheus': $._config.prometheus.name, 'app': 'thanos-sidecar'}) +
|
||||
service.mixin.metadata.withNamespace($._config.namespace),
|
||||
serviceThanosSidecar: {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Service',
|
||||
metadata: {
|
||||
name: 'prometheus-' + p.name + '-thanos-sidecar',
|
||||
namespace: p.namespace,
|
||||
labels: { prometheus: p.name, app: 'thanos-sidecar' },
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{ name: 'grpc', port: 10901, targetPort: 10901 },
|
||||
{ name: 'http', port: 10902, targetPort: 10902 },
|
||||
],
|
||||
selector: { app: 'prometheus', prometheus: p.name },
|
||||
clusterIP: 'None',
|
||||
},
|
||||
},
|
||||
prometheus+: {
|
||||
spec+: {
|
||||
thanos+: {
|
||||
|
@ -50,7 +54,7 @@ local servicePort = k.core.v1.service.mixin.spec.portsType;
|
|||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'thanos-sidecar',
|
||||
namespace: $._config.namespace,
|
||||
namespace: p.namespace,
|
||||
labels: {
|
||||
'k8s-app': 'prometheus',
|
||||
},
|
||||
|
@ -60,7 +64,7 @@ local servicePort = k.core.v1.service.mixin.spec.portsType;
|
|||
jobLabel: 'app',
|
||||
selector: {
|
||||
matchLabels: {
|
||||
prometheus: $._config.prometheus.name,
|
||||
prometheus: p.name,
|
||||
app: 'thanos-sidecar',
|
||||
},
|
||||
},
|
||||
|
|
|
@ -1,14 +1,21 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
{
|
||||
prometheus+: {
|
||||
serviceWeaveNet:
|
||||
service.new('weave-net', { 'name': 'weave-net' }, servicePort.newNamed('weave-net-metrics', 6782, 6782)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'weave-net' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
serviceWeaveNet: {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Service',
|
||||
metadata: {
|
||||
name: 'weave-net',
|
||||
namespace: 'kube-system',
|
||||
labels: { 'k8s-app': 'weave-net' },
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{ name: 'weave-net-metrics', targetPort: 6782, port: 6782 },
|
||||
],
|
||||
selector: { name: 'weave-net' },
|
||||
clusterIP: 'None',
|
||||
},
|
||||
},
|
||||
serviceMonitorWeaveNet: {
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
|
|
|
@ -1,6 +1,3 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local k3 = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.3/k.libsonnet';
|
||||
local configMapList = k3.core.v1.configMapList;
|
||||
local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
|
||||
|
||||
(import 'github.com/brancz/kubernetes-grafana/grafana/grafana.libsonnet') +
|
||||
|
@ -9,6 +6,7 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
|
|||
(import './node-exporter/node-exporter.libsonnet') +
|
||||
(import 'github.com/prometheus/node_exporter/docs/node-mixin/mixin.libsonnet') +
|
||||
(import './alertmanager/alertmanager.libsonnet') +
|
||||
(import 'github.com/prometheus/alertmanager/doc/alertmanager-mixin/mixin.libsonnet') +
|
||||
(import 'github.com/prometheus-operator/prometheus-operator/jsonnet/prometheus-operator/prometheus-operator.libsonnet') +
|
||||
(import 'github.com/prometheus-operator/prometheus-operator/jsonnet/mixin/mixin.libsonnet') +
|
||||
(import './prometheus/prometheus.libsonnet') +
|
||||
|
@ -16,69 +14,83 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
|
|||
(import 'github.com/kubernetes-monitoring/kubernetes-mixin/mixin.libsonnet') +
|
||||
(import 'github.com/prometheus/prometheus/documentation/prometheus-mixin/mixin.libsonnet') +
|
||||
(import './alerts/alerts.libsonnet') +
|
||||
(import './rules/rules.libsonnet') + {
|
||||
(import './rules/rules.libsonnet') +
|
||||
{
|
||||
kubePrometheus+:: {
|
||||
namespace: k.core.v1.namespace.new($._config.namespace),
|
||||
namespace: {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Namespace',
|
||||
metadata: {
|
||||
name: $._config.namespace,
|
||||
},
|
||||
},
|
||||
},
|
||||
prometheusOperator+:: {
|
||||
service+: {
|
||||
spec+: {
|
||||
ports: [
|
||||
prometheusOperator+::
|
||||
{
|
||||
service+: {
|
||||
spec+: {
|
||||
ports: [
|
||||
{
|
||||
name: 'https',
|
||||
port: 8443,
|
||||
targetPort: 'https',
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
serviceMonitor+: {
|
||||
spec+: {
|
||||
endpoints: [
|
||||
{
|
||||
port: 'https',
|
||||
scheme: 'https',
|
||||
honorLabels: true,
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
tlsConfig: {
|
||||
insecureSkipVerify: true,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
clusterRole+: {
|
||||
rules+: [
|
||||
{
|
||||
name: 'https',
|
||||
port: 8443,
|
||||
targetPort: 'https',
|
||||
apiGroups: ['authentication.k8s.io'],
|
||||
resources: ['tokenreviews'],
|
||||
verbs: ['create'],
|
||||
},
|
||||
{
|
||||
apiGroups: ['authorization.k8s.io'],
|
||||
resources: ['subjectaccessreviews'],
|
||||
verbs: ['create'],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
serviceMonitor+: {
|
||||
spec+: {
|
||||
endpoints: [
|
||||
{
|
||||
port: 'https',
|
||||
scheme: 'https',
|
||||
honorLabels: true,
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
tlsConfig: {
|
||||
insecureSkipVerify: true,
|
||||
},
|
||||
},
|
||||
]
|
||||
},
|
||||
},
|
||||
clusterRole+: {
|
||||
rules+: [
|
||||
{
|
||||
apiGroups: ['authentication.k8s.io'],
|
||||
resources: ['tokenreviews'],
|
||||
verbs: ['create'],
|
||||
},
|
||||
{
|
||||
apiGroups: ['authorization.k8s.io'],
|
||||
resources: ['subjectaccessreviews'],
|
||||
verbs: ['create'],
|
||||
},
|
||||
],
|
||||
},
|
||||
} +
|
||||
(kubeRbacProxyContainer {
|
||||
config+:: {
|
||||
kubeRbacProxy: {
|
||||
local cfg = self,
|
||||
image: $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy,
|
||||
name: 'kube-rbac-proxy',
|
||||
securePortName: 'https',
|
||||
securePort: 8443,
|
||||
secureListenAddress: ':%d' % self.securePort,
|
||||
upstream: 'http://127.0.0.1:8080/',
|
||||
tlsCipherSuites: $._config.tlsCipherSuites,
|
||||
},
|
||||
},
|
||||
}).deploymentMixin,
|
||||
} +
|
||||
(kubeRbacProxyContainer {
|
||||
config+:: {
|
||||
kubeRbacProxy: {
|
||||
local cfg = self,
|
||||
image: $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy,
|
||||
name: 'kube-rbac-proxy',
|
||||
securePortName: 'https',
|
||||
securePort: 8443,
|
||||
secureListenAddress: ':%d' % self.securePort,
|
||||
upstream: 'http://127.0.0.1:8080/',
|
||||
tlsCipherSuites: $._config.tlsCipherSuites,
|
||||
},
|
||||
},
|
||||
}).deploymentMixin,
|
||||
|
||||
|
||||
grafana+:: {
|
||||
dashboardDefinitions: configMapList.new(super.dashboardDefinitions),
|
||||
local dashboardDefinitions = super.dashboardDefinitions,
|
||||
dashboardDefinitions: {
|
||||
apiVersion: 'v1',
|
||||
kind: 'ConfigMapList',
|
||||
items: dashboardDefinitions,
|
||||
},
|
||||
serviceMonitor: {
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
|
@ -92,12 +104,10 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
|
|||
app: 'grafana',
|
||||
},
|
||||
},
|
||||
endpoints: [
|
||||
{
|
||||
port: 'http',
|
||||
interval: '15s',
|
||||
},
|
||||
],
|
||||
endpoints: [{
|
||||
port: 'http',
|
||||
interval: '15s',
|
||||
}],
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -105,14 +115,8 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
|
|||
_config+:: {
|
||||
namespace: 'default',
|
||||
|
||||
versions+:: {
|
||||
grafana: '7.1.0',
|
||||
kubeRbacProxy: 'v0.6.0',
|
||||
},
|
||||
|
||||
imageRepos+:: {
|
||||
kubeRbacProxy: 'quay.io/brancz/kube-rbac-proxy',
|
||||
},
|
||||
versions+:: { grafana: '7.3.5', kubeRbacProxy: 'v0.8.0' },
|
||||
imageRepos+:: { kubeRbacProxy: 'quay.io/brancz/kube-rbac-proxy' },
|
||||
|
||||
tlsCipherSuites: [
|
||||
'TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256', // required by h2: http://golang.org/cl/30721
|
||||
|
@ -143,6 +147,8 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
|
|||
'TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305',
|
||||
],
|
||||
|
||||
runbookURLPattern: 'https://github.com/prometheus-operator/kube-prometheus/wiki/%s',
|
||||
|
||||
cadvisorSelector: 'job="kubelet", metrics_path="/metrics/cadvisor"',
|
||||
kubeletSelector: 'job="kubelet", metrics_path="/metrics"',
|
||||
kubeStateMetricsSelector: 'job="kube-state-metrics"',
|
||||
|
@ -155,6 +161,8 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
|
|||
coreDNSSelector: 'job="kube-dns"',
|
||||
podLabel: 'pod',
|
||||
|
||||
alertmanagerName: '{{ $labels.namespace }}/{{ $labels.pod}}',
|
||||
alertmanagerClusterLabels: 'namespace,service',
|
||||
alertmanagerSelector: 'job="alertmanager-' + $._config.alertmanager.name + '",namespace="' + $._config.namespace + '"',
|
||||
prometheusSelector: 'job="prometheus-' + $._config.prometheus.name + '",namespace="' + $._config.namespace + '"',
|
||||
prometheusName: '{{$labels.namespace}}/{{$labels.pod}}',
|
||||
|
@ -191,13 +199,7 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
|
|||
limits: { cpu: '250m', memory: '180Mi' },
|
||||
},
|
||||
},
|
||||
prometheus+:: {
|
||||
rules: $.prometheusRules + $.prometheusAlerts,
|
||||
},
|
||||
|
||||
grafana+:: {
|
||||
dashboards: $.grafanaDashboards,
|
||||
},
|
||||
|
||||
prometheus+:: { rules: $.prometheusRules + $.prometheusAlerts },
|
||||
grafana+:: { dashboards: $.grafanaDashboards },
|
||||
},
|
||||
}
|
||||
|
|
|
@ -1,8 +1,3 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local deployment = k.apps.v1.deployment;
|
||||
local container = deployment.mixin.spec.template.spec.containersType;
|
||||
local containerPort = container.portsType;
|
||||
|
||||
{
|
||||
local krp = self,
|
||||
config+:: {
|
||||
|
@ -33,17 +28,24 @@ local containerPort = container.portsType;
|
|||
spec+: {
|
||||
template+: {
|
||||
spec+: {
|
||||
containers+: [
|
||||
container.new(krp.config.kubeRbacProxy.name, krp.config.kubeRbacProxy.image) +
|
||||
container.mixin.securityContext.withRunAsUser(65534) +
|
||||
container.withArgs([
|
||||
containers+: [{
|
||||
name: krp.config.kubeRbacProxy.name,
|
||||
image: krp.config.kubeRbacProxy.image,
|
||||
args: [
|
||||
'--logtostderr',
|
||||
'--secure-listen-address=' + krp.config.kubeRbacProxy.secureListenAddress,
|
||||
'--tls-cipher-suites=' + std.join(',', krp.config.kubeRbacProxy.tlsCipherSuites),
|
||||
'--upstream=' + krp.config.kubeRbacProxy.upstream,
|
||||
]) +
|
||||
container.withPorts(containerPort.newNamed(krp.config.kubeRbacProxy.securePort, krp.config.kubeRbacProxy.securePortName)),
|
||||
],
|
||||
],
|
||||
ports: [
|
||||
{ name: krp.config.kubeRbacProxy.securePortName, containerPort: krp.config.kubeRbacProxy.securePort },
|
||||
],
|
||||
securityContext: {
|
||||
runAsUser: 65532,
|
||||
runAsGroup: 65532,
|
||||
runAsNonRoot: true,
|
||||
},
|
||||
}],
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
|
@ -15,7 +15,7 @@ local ksm = import 'github.com/kubernetes/kube-state-metrics/jsonnet/kube-state-
|
|||
},
|
||||
},
|
||||
kubeStateMetrics+::
|
||||
ksm + {
|
||||
ksm {
|
||||
local version = self.version,
|
||||
name:: 'kube-state-metrics',
|
||||
namespace:: $._config.namespace,
|
||||
|
@ -100,33 +100,33 @@ local ksm = import 'github.com/kubernetes/kube-state-metrics/jsonnet/kube-state-
|
|||
],
|
||||
},
|
||||
},
|
||||
} +
|
||||
(kubeRbacProxyContainer {
|
||||
config+:: {
|
||||
kubeRbacProxy: {
|
||||
local cfg = self,
|
||||
image: $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy,
|
||||
name: 'kube-rbac-proxy-main',
|
||||
securePortName: 'https-main',
|
||||
securePort: 8443,
|
||||
secureListenAddress: ':%d' % self.securePort,
|
||||
upstream: 'http://127.0.0.1:8081/',
|
||||
tlsCipherSuites: $._config.tlsCipherSuites,
|
||||
},
|
||||
},
|
||||
}).deploymentMixin +
|
||||
(kubeRbacProxyContainer {
|
||||
config+:: {
|
||||
kubeRbacProxy: {
|
||||
local cfg = self,
|
||||
image: $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy,
|
||||
name: 'kube-rbac-proxy-self',
|
||||
securePortName: 'https-self',
|
||||
securePort: 9443,
|
||||
secureListenAddress: ':%d' % self.securePort,
|
||||
upstream: 'http://127.0.0.1:8082/',
|
||||
tlsCipherSuites: $._config.tlsCipherSuites,
|
||||
},
|
||||
},
|
||||
}).deploymentMixin,
|
||||
} +
|
||||
(kubeRbacProxyContainer {
|
||||
config+:: {
|
||||
kubeRbacProxy: {
|
||||
local cfg = self,
|
||||
image: $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy,
|
||||
name: 'kube-rbac-proxy-main',
|
||||
securePortName: 'https-main',
|
||||
securePort: 8443,
|
||||
secureListenAddress: ':%d' % self.securePort,
|
||||
upstream: 'http://127.0.0.1:8081/',
|
||||
tlsCipherSuites: $._config.tlsCipherSuites,
|
||||
},
|
||||
},
|
||||
}).deploymentMixin +
|
||||
(kubeRbacProxyContainer {
|
||||
config+:: {
|
||||
kubeRbacProxy: {
|
||||
local cfg = self,
|
||||
image: $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy,
|
||||
name: 'kube-rbac-proxy-self',
|
||||
securePortName: 'https-self',
|
||||
securePort: 9443,
|
||||
secureListenAddress: ':%d' % self.securePort,
|
||||
upstream: 'http://127.0.0.1:8082/',
|
||||
tlsCipherSuites: $._config.tlsCipherSuites,
|
||||
},
|
||||
},
|
||||
}).deploymentMixin,
|
||||
}
|
||||
|
|
|
@ -5,16 +5,16 @@ local imageName(image) =
|
|||
local parts = std.split(image, '/');
|
||||
local len = std.length(parts);
|
||||
if len == 3 then
|
||||
# registry.com/org/image
|
||||
// registry.com/org/image
|
||||
parts[2]
|
||||
else if len == 2 then
|
||||
# org/image
|
||||
// org/image
|
||||
parts[1]
|
||||
else if len == 1 then
|
||||
# image, ie. busybox
|
||||
// image, ie. busybox
|
||||
parts[0]
|
||||
else
|
||||
error 'unknown image format: ' + image;
|
||||
error 'unknown image format: ' + image;
|
||||
|
||||
{
|
||||
imageName:: imageName,
|
||||
|
|
|
@ -1,16 +1,8 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
namespace: 'default',
|
||||
|
||||
versions+:: {
|
||||
nodeExporter: 'v1.0.1',
|
||||
},
|
||||
|
||||
imageRepos+:: {
|
||||
nodeExporter: 'quay.io/prometheus/node-exporter',
|
||||
},
|
||||
versions+:: { nodeExporter: 'v1.0.1' },
|
||||
imageRepos+:: { nodeExporter: 'quay.io/prometheus/node-exporter' },
|
||||
|
||||
nodeExporter+:: {
|
||||
listenAddress: '127.0.0.1',
|
||||
|
@ -28,76 +20,49 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
},
|
||||
|
||||
nodeExporter+:: {
|
||||
clusterRoleBinding:
|
||||
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
|
||||
clusterRoleBinding: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'ClusterRoleBinding',
|
||||
metadata: {
|
||||
name: 'node-exporter',
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: 'rbac.authorization.k8s.io',
|
||||
kind: 'ClusterRole',
|
||||
name: 'node-exporter',
|
||||
},
|
||||
subjects: [{
|
||||
kind: 'ServiceAccount',
|
||||
name: 'node-exporter',
|
||||
namespace: $._config.namespace,
|
||||
}],
|
||||
},
|
||||
|
||||
clusterRoleBinding.new() +
|
||||
clusterRoleBinding.mixin.metadata.withName('node-exporter') +
|
||||
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
|
||||
clusterRoleBinding.mixin.roleRef.withName('node-exporter') +
|
||||
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
|
||||
clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'node-exporter', namespace: $._config.namespace }]),
|
||||
|
||||
clusterRole:
|
||||
local clusterRole = k.rbac.v1.clusterRole;
|
||||
local policyRule = clusterRole.rulesType;
|
||||
|
||||
local authenticationRole = policyRule.new() +
|
||||
policyRule.withApiGroups(['authentication.k8s.io']) +
|
||||
policyRule.withResources([
|
||||
'tokenreviews',
|
||||
]) +
|
||||
policyRule.withVerbs(['create']);
|
||||
|
||||
local authorizationRole = policyRule.new() +
|
||||
policyRule.withApiGroups(['authorization.k8s.io']) +
|
||||
policyRule.withResources([
|
||||
'subjectaccessreviews',
|
||||
]) +
|
||||
policyRule.withVerbs(['create']);
|
||||
|
||||
local rules = [authenticationRole, authorizationRole];
|
||||
|
||||
clusterRole.new() +
|
||||
clusterRole.mixin.metadata.withName('node-exporter') +
|
||||
clusterRole.withRules(rules),
|
||||
clusterRole: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'ClusterRole',
|
||||
metadata: {
|
||||
name: 'node-exporter',
|
||||
},
|
||||
rules: [
|
||||
{
|
||||
apiGroups: ['authentication.k8s.io'],
|
||||
resources: ['tokenreviews'],
|
||||
verbs: ['create'],
|
||||
},
|
||||
{
|
||||
apiGroups: ['authorization.k8s.io'],
|
||||
resources: ['subjectaccessreviews'],
|
||||
verbs: ['create'],
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
daemonset:
|
||||
local daemonset = k.apps.v1.daemonSet;
|
||||
local container = daemonset.mixin.spec.template.spec.containersType;
|
||||
local volume = daemonset.mixin.spec.template.spec.volumesType;
|
||||
local containerPort = container.portsType;
|
||||
local containerVolumeMount = container.volumeMountsType;
|
||||
local podSelector = daemonset.mixin.spec.template.spec.selectorType;
|
||||
local toleration = daemonset.mixin.spec.template.spec.tolerationsType;
|
||||
local containerEnv = container.envType;
|
||||
|
||||
local podLabels = $._config.nodeExporter.labels;
|
||||
local selectorLabels = $._config.nodeExporter.selectorLabels;
|
||||
|
||||
local existsToleration = toleration.new() +
|
||||
toleration.withOperator('Exists');
|
||||
local procVolumeName = 'proc';
|
||||
local procVolume = volume.fromHostPath(procVolumeName, '/proc');
|
||||
local procVolumeMount = containerVolumeMount.new(procVolumeName, '/host/proc').
|
||||
withMountPropagation('HostToContainer').
|
||||
withReadOnly(true);
|
||||
|
||||
local sysVolumeName = 'sys';
|
||||
local sysVolume = volume.fromHostPath(sysVolumeName, '/sys');
|
||||
local sysVolumeMount = containerVolumeMount.new(sysVolumeName, '/host/sys').
|
||||
withMountPropagation('HostToContainer').
|
||||
withReadOnly(true);
|
||||
|
||||
local rootVolumeName = 'root';
|
||||
local rootVolume = volume.fromHostPath(rootVolumeName, '/');
|
||||
local rootVolumeMount = containerVolumeMount.new(rootVolumeName, '/host/root').
|
||||
withMountPropagation('HostToContainer').
|
||||
withReadOnly(true);
|
||||
|
||||
local nodeExporter =
|
||||
container.new('node-exporter', $._config.imageRepos.nodeExporter + ':' + $._config.versions.nodeExporter) +
|
||||
container.withArgs([
|
||||
local nodeExporter = {
|
||||
name: 'node-exporter',
|
||||
image: $._config.imageRepos.nodeExporter + ':' + $._config.versions.nodeExporter,
|
||||
args: [
|
||||
'--web.listen-address=' + std.join(':', [$._config.nodeExporter.listenAddress, std.toString($._config.nodeExporter.port)]),
|
||||
'--path.procfs=/host/proc',
|
||||
'--path.sysfs=/host/sys',
|
||||
|
@ -105,20 +70,27 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
'--no-collector.wifi',
|
||||
'--no-collector.hwmon',
|
||||
'--collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)',
|
||||
]) +
|
||||
container.withVolumeMounts([procVolumeMount, sysVolumeMount, rootVolumeMount]) +
|
||||
container.mixin.resources.withRequests($._config.resources['node-exporter'].requests) +
|
||||
container.mixin.resources.withLimits($._config.resources['node-exporter'].limits);
|
||||
],
|
||||
volumeMounts: [
|
||||
{ name: 'proc', mountPath: '/host/proc', mountPropagation: 'HostToContainer', readOnly: true },
|
||||
{ name: 'sys', mountPath: '/host/sys', mountPropagation: 'HostToContainer', readOnly: true },
|
||||
{ name: 'root', mountPath: '/host/root', mountPropagation: 'HostToContainer', readOnly: true },
|
||||
],
|
||||
resources: $._config.resources['node-exporter'],
|
||||
};
|
||||
|
||||
local ip = containerEnv.fromFieldPath('IP', 'status.podIP');
|
||||
local proxy =
|
||||
container.new('kube-rbac-proxy', $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy) +
|
||||
container.withArgs([
|
||||
local proxy = {
|
||||
name: 'kube-rbac-proxy',
|
||||
image: $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy,
|
||||
args: [
|
||||
'--logtostderr',
|
||||
'--secure-listen-address=[$(IP)]:' + $._config.nodeExporter.port,
|
||||
'--tls-cipher-suites=' + std.join(',', $._config.tlsCipherSuites),
|
||||
'--upstream=http://127.0.0.1:' + $._config.nodeExporter.port + '/',
|
||||
]) +
|
||||
],
|
||||
env: [
|
||||
{ name: 'IP', valueFrom: { fieldRef: { fieldPath: 'status.podIP' } } },
|
||||
],
|
||||
// Keep `hostPort` here, rather than in the node-exporter container
|
||||
// because Kubernetes mandates that if you define a `hostPort` then
|
||||
// `containerPort` must match. In our case, we are splitting the
|
||||
|
@ -127,82 +99,114 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
// used by the service is tied to the proxy container. We *could*
|
||||
// forgo declaring the host port, however it is important to declare
|
||||
// it so that the scheduler can decide if the pod is schedulable.
|
||||
container.withPorts(containerPort.new($._config.nodeExporter.port) + containerPort.withHostPort($._config.nodeExporter.port) + containerPort.withName('https')) +
|
||||
container.mixin.resources.withRequests($._config.resources['kube-rbac-proxy'].requests) +
|
||||
container.mixin.resources.withLimits($._config.resources['kube-rbac-proxy'].limits) +
|
||||
container.withEnv([ip]);
|
||||
ports: [
|
||||
{ name: 'https', containerPort: $._config.nodeExporter.port, hostPort: $._config.nodeExporter.port },
|
||||
],
|
||||
resources: $._config.resources['kube-rbac-proxy'],
|
||||
securityContext: {
|
||||
runAsUser: 65532,
|
||||
runAsGroup: 65532,
|
||||
runAsNonRoot: true,
|
||||
},
|
||||
};
|
||||
|
||||
local c = [nodeExporter, proxy];
|
||||
|
||||
daemonset.new() +
|
||||
daemonset.mixin.metadata.withName('node-exporter') +
|
||||
daemonset.mixin.metadata.withNamespace($._config.namespace) +
|
||||
daemonset.mixin.metadata.withLabels(podLabels) +
|
||||
daemonset.mixin.spec.selector.withMatchLabels(selectorLabels) +
|
||||
daemonset.mixin.spec.updateStrategy.rollingUpdate.withMaxUnavailable('10%') +
|
||||
daemonset.mixin.spec.template.metadata.withLabels(podLabels) +
|
||||
daemonset.mixin.spec.template.spec.withTolerations([existsToleration]) +
|
||||
daemonset.mixin.spec.template.spec.withNodeSelector({ 'kubernetes.io/os': 'linux' }) +
|
||||
daemonset.mixin.spec.template.spec.withContainers(c) +
|
||||
daemonset.mixin.spec.template.spec.withVolumes([procVolume, sysVolume, rootVolume]) +
|
||||
daemonset.mixin.spec.template.spec.securityContext.withRunAsNonRoot(true) +
|
||||
daemonset.mixin.spec.template.spec.securityContext.withRunAsUser(65534) +
|
||||
daemonset.mixin.spec.template.spec.withServiceAccountName('node-exporter') +
|
||||
daemonset.mixin.spec.template.spec.withHostPid(true) +
|
||||
daemonset.mixin.spec.template.spec.withHostNetwork(true),
|
||||
|
||||
serviceAccount:
|
||||
local serviceAccount = k.core.v1.serviceAccount;
|
||||
|
||||
serviceAccount.new('node-exporter') +
|
||||
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
|
||||
|
||||
serviceMonitor:
|
||||
{
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
apiVersion: 'apps/v1',
|
||||
kind: 'DaemonSet',
|
||||
metadata: {
|
||||
name: 'node-exporter',
|
||||
namespace: $._config.namespace,
|
||||
labels: $._config.nodeExporter.labels,
|
||||
},
|
||||
spec: {
|
||||
jobLabel: 'app.kubernetes.io/name',
|
||||
selector: {
|
||||
matchLabels: $._config.nodeExporter.selectorLabels,
|
||||
selector: { matchLabels: $._config.nodeExporter.selectorLabels },
|
||||
updateStrategy: {
|
||||
type: 'RollingUpdate',
|
||||
rollingUpdate: { maxUnavailable: '10%' },
|
||||
},
|
||||
endpoints: [
|
||||
{
|
||||
port: 'https',
|
||||
scheme: 'https',
|
||||
interval: '15s',
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
relabelings: [
|
||||
{
|
||||
action: 'replace',
|
||||
regex: '(.*)',
|
||||
replacement: '$1',
|
||||
sourceLabels: ['__meta_kubernetes_pod_node_name'],
|
||||
targetLabel: 'instance',
|
||||
},
|
||||
template: {
|
||||
metadata: { labels: $._config.nodeExporter.labels },
|
||||
spec: {
|
||||
nodeSelector: { 'kubernetes.io/os': 'linux' },
|
||||
tolerations: [{
|
||||
operator: 'Exists',
|
||||
}],
|
||||
containers: [nodeExporter, proxy],
|
||||
volumes: [
|
||||
{ name: 'proc', hostPath: { path: '/proc' } },
|
||||
{ name: 'sys', hostPath: { path: '/sys' } },
|
||||
{ name: 'root', hostPath: { path: '/' } },
|
||||
],
|
||||
tlsConfig: {
|
||||
insecureSkipVerify: true,
|
||||
serviceAccountName: 'node-exporter',
|
||||
securityContext: {
|
||||
runAsUser: 65534,
|
||||
runAsNonRoot: true,
|
||||
},
|
||||
hostPID: true,
|
||||
hostNetwork: true,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
service:
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
serviceAccount: {
|
||||
apiVersion: 'v1',
|
||||
kind: 'ServiceAccount',
|
||||
metadata: {
|
||||
name: 'node-exporter',
|
||||
namespace: $._config.namespace,
|
||||
},
|
||||
},
|
||||
|
||||
local nodeExporterPort = servicePort.newNamed('https', $._config.nodeExporter.port, 'https');
|
||||
serviceMonitor: {
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'node-exporter',
|
||||
namespace: $._config.namespace,
|
||||
labels: $._config.nodeExporter.labels,
|
||||
},
|
||||
spec: {
|
||||
jobLabel: 'app.kubernetes.io/name',
|
||||
selector: {
|
||||
matchLabels: $._config.nodeExporter.selectorLabels,
|
||||
},
|
||||
endpoints: [{
|
||||
port: 'https',
|
||||
scheme: 'https',
|
||||
interval: '15s',
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
relabelings: [
|
||||
{
|
||||
action: 'replace',
|
||||
regex: '(.*)',
|
||||
replacement: '$1',
|
||||
sourceLabels: ['__meta_kubernetes_pod_node_name'],
|
||||
targetLabel: 'instance',
|
||||
},
|
||||
],
|
||||
tlsConfig: {
|
||||
insecureSkipVerify: true,
|
||||
},
|
||||
}],
|
||||
},
|
||||
},
|
||||
|
||||
service.new('node-exporter', $._config.nodeExporter.selectorLabels, nodeExporterPort) +
|
||||
service.mixin.metadata.withNamespace($._config.namespace) +
|
||||
service.mixin.metadata.withLabels($._config.nodeExporter.labels) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
service: {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Service',
|
||||
metadata: {
|
||||
name: 'node-exporter',
|
||||
namespace: $._config.namespace,
|
||||
labels: $._config.nodeExporter.labels,
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{ name: 'https', targetPort: 'https', port: $._config.nodeExporter.port },
|
||||
],
|
||||
selector: $._config.nodeExporter.selectorLabels,
|
||||
clusterIP: 'None',
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
|
@ -1,19 +1,13 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
namespace: 'default',
|
||||
|
||||
versions+:: {
|
||||
prometheusAdapter: 'v0.7.0',
|
||||
},
|
||||
|
||||
imageRepos+:: {
|
||||
prometheusAdapter: 'directxman12/k8s-prometheus-adapter',
|
||||
},
|
||||
versions+:: { prometheusAdapter: 'v0.8.2' },
|
||||
imageRepos+:: { prometheusAdapter: 'directxman12/k8s-prometheus-adapter' },
|
||||
|
||||
prometheusAdapter+:: {
|
||||
name: 'prometheus-adapter',
|
||||
namespace: $._config.namespace,
|
||||
labels: { name: $._config.prometheusAdapter.name },
|
||||
prometheusURL: 'http://prometheus-' + $._config.prometheus.name + '.' + $._config.namespace + '.svc.cluster.local:9090/',
|
||||
config: {
|
||||
|
@ -23,239 +17,261 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
nodeQuery: 'sum(1 - irate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)',
|
||||
resources: {
|
||||
overrides: {
|
||||
node: {
|
||||
resource: 'node'
|
||||
},
|
||||
namespace: {
|
||||
resource: 'namespace'
|
||||
},
|
||||
pod: {
|
||||
resource: 'pod'
|
||||
},
|
||||
node: { resource: 'node' },
|
||||
namespace: { resource: 'namespace' },
|
||||
pod: { resource: 'pod' },
|
||||
},
|
||||
},
|
||||
containerLabel: 'container'
|
||||
containerLabel: 'container',
|
||||
},
|
||||
memory: {
|
||||
containerQuery: 'sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>)',
|
||||
nodeQuery: 'sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>)',
|
||||
resources: {
|
||||
overrides: {
|
||||
instance: {
|
||||
resource: 'node'
|
||||
},
|
||||
namespace: {
|
||||
resource: 'namespace'
|
||||
},
|
||||
pod: {
|
||||
resource: 'pod'
|
||||
},
|
||||
instance: { resource: 'node' },
|
||||
namespace: { resource: 'namespace' },
|
||||
pod: { resource: 'pod' },
|
||||
},
|
||||
},
|
||||
containerLabel: 'container'
|
||||
containerLabel: 'container',
|
||||
},
|
||||
window: '5m',
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
prometheusAdapter+:: {
|
||||
apiService:
|
||||
{
|
||||
apiVersion: 'apiregistration.k8s.io/v1',
|
||||
kind: 'APIService',
|
||||
metadata: {
|
||||
name: 'v1beta1.metrics.k8s.io',
|
||||
},
|
||||
spec: {
|
||||
service: {
|
||||
name: $.prometheusAdapter.service.metadata.name,
|
||||
namespace: $._config.namespace,
|
||||
},
|
||||
group: 'metrics.k8s.io',
|
||||
version: 'v1beta1',
|
||||
insecureSkipTLSVerify: true,
|
||||
groupPriorityMinimum: 100,
|
||||
versionPriority: 100,
|
||||
},
|
||||
apiService: {
|
||||
apiVersion: 'apiregistration.k8s.io/v1',
|
||||
kind: 'APIService',
|
||||
metadata: {
|
||||
name: 'v1beta1.metrics.k8s.io',
|
||||
},
|
||||
|
||||
configMap:
|
||||
local configmap = k.core.v1.configMap;
|
||||
configmap.new('adapter-config', { 'config.yaml': std.manifestYamlDoc($._config.prometheusAdapter.config) }) +
|
||||
|
||||
configmap.mixin.metadata.withNamespace($._config.namespace),
|
||||
|
||||
serviceMonitor:
|
||||
{
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: $._config.prometheusAdapter.name,
|
||||
namespace: $._config.namespace,
|
||||
labels: $._config.prometheusAdapter.labels,
|
||||
spec: {
|
||||
service: {
|
||||
name: $.prometheusAdapter.service.metadata.name,
|
||||
namespace: $._config.prometheusAdapter.namespace,
|
||||
},
|
||||
spec: {
|
||||
selector: {
|
||||
matchLabels: $._config.prometheusAdapter.labels,
|
||||
},
|
||||
endpoints: [
|
||||
{
|
||||
port: 'https',
|
||||
interval: '30s',
|
||||
scheme: 'https',
|
||||
tlsConfig: {
|
||||
insecureSkipVerify: true,
|
||||
},
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
group: 'metrics.k8s.io',
|
||||
version: 'v1beta1',
|
||||
insecureSkipTLSVerify: true,
|
||||
groupPriorityMinimum: 100,
|
||||
versionPriority: 100,
|
||||
},
|
||||
},
|
||||
|
||||
configMap: {
|
||||
apiVersion: 'v1',
|
||||
kind: 'ConfigMap',
|
||||
metadata: {
|
||||
name: 'adapter-config',
|
||||
namespace: $._config.prometheusAdapter.namespace,
|
||||
},
|
||||
data: { 'config.yaml': std.manifestYamlDoc($._config.prometheusAdapter.config) },
|
||||
},
|
||||
|
||||
serviceMonitor: {
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: $._config.prometheusAdapter.name,
|
||||
namespace: $._config.prometheusAdapter.namespace,
|
||||
labels: $._config.prometheusAdapter.labels,
|
||||
},
|
||||
spec: {
|
||||
selector: {
|
||||
matchLabels: $._config.prometheusAdapter.labels,
|
||||
},
|
||||
endpoints: [
|
||||
{
|
||||
port: 'https',
|
||||
interval: '30s',
|
||||
scheme: 'https',
|
||||
tlsConfig: {
|
||||
insecureSkipVerify: true,
|
||||
},
|
||||
],
|
||||
},
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
|
||||
service:
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
service.new(
|
||||
$._config.prometheusAdapter.name,
|
||||
$._config.prometheusAdapter.labels,
|
||||
servicePort.newNamed('https', 443, 6443),
|
||||
) +
|
||||
service.mixin.metadata.withNamespace($._config.namespace) +
|
||||
service.mixin.metadata.withLabels($._config.prometheusAdapter.labels),
|
||||
service: {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Service',
|
||||
metadata: {
|
||||
name: $._config.prometheusAdapter.name,
|
||||
namespace: $._config.prometheusAdapter.namespace,
|
||||
labels: $._config.prometheusAdapter.labels,
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{ name: 'https', targetPort: 6443, port: 443 },
|
||||
],
|
||||
selector: $._config.prometheusAdapter.labels,
|
||||
},
|
||||
},
|
||||
|
||||
deployment:
|
||||
local deployment = k.apps.v1.deployment;
|
||||
local volume = deployment.mixin.spec.template.spec.volumesType;
|
||||
local container = deployment.mixin.spec.template.spec.containersType;
|
||||
local containerVolumeMount = container.volumeMountsType;
|
||||
|
||||
local c =
|
||||
container.new($._config.prometheusAdapter.name, $._config.imageRepos.prometheusAdapter + ':' + $._config.versions.prometheusAdapter) +
|
||||
container.withArgs([
|
||||
local c = {
|
||||
name: $._config.prometheusAdapter.name,
|
||||
image: $._config.imageRepos.prometheusAdapter + ':' + $._config.versions.prometheusAdapter,
|
||||
args: [
|
||||
'--cert-dir=/var/run/serving-cert',
|
||||
'--config=/etc/adapter/config.yaml',
|
||||
'--logtostderr=true',
|
||||
'--metrics-relist-interval=1m',
|
||||
'--prometheus-url=' + $._config.prometheusAdapter.prometheusURL,
|
||||
'--secure-port=6443',
|
||||
]) +
|
||||
container.withPorts([{ containerPort: 6443 }]) +
|
||||
container.withVolumeMounts([
|
||||
containerVolumeMount.new('tmpfs', '/tmp'),
|
||||
containerVolumeMount.new('volume-serving-cert', '/var/run/serving-cert'),
|
||||
containerVolumeMount.new('config', '/etc/adapter'),
|
||||
],);
|
||||
],
|
||||
ports: [{ containerPort: 6443 }],
|
||||
volumeMounts: [
|
||||
{ name: 'tmpfs', mountPath: '/tmp', readOnly: false },
|
||||
{ name: 'volume-serving-cert', mountPath: '/var/run/serving-cert', readOnly: false },
|
||||
{ name: 'config', mountPath: '/etc/adapter', readOnly: false },
|
||||
],
|
||||
};
|
||||
|
||||
deployment.new($._config.prometheusAdapter.name, 1, c, $._config.prometheusAdapter.labels) +
|
||||
deployment.mixin.metadata.withNamespace($._config.namespace) +
|
||||
deployment.mixin.spec.selector.withMatchLabels($._config.prometheusAdapter.labels) +
|
||||
deployment.mixin.spec.template.spec.withServiceAccountName($.prometheusAdapter.serviceAccount.metadata.name) +
|
||||
deployment.mixin.spec.template.spec.withNodeSelector({ 'kubernetes.io/os': 'linux' }) +
|
||||
deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(1) +
|
||||
deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(0) +
|
||||
deployment.mixin.spec.template.spec.withVolumes([
|
||||
volume.fromEmptyDir(name='tmpfs'),
|
||||
volume.fromEmptyDir(name='volume-serving-cert'),
|
||||
{ name: 'config', configMap: { name: 'adapter-config' } },
|
||||
]),
|
||||
{
|
||||
apiVersion: 'apps/v1',
|
||||
kind: 'Deployment',
|
||||
metadata: {
|
||||
name: $._config.prometheusAdapter.name,
|
||||
namespace: $._config.prometheusAdapter.namespace,
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
selector: { matchLabels: $._config.prometheusAdapter.labels },
|
||||
strategy: {
|
||||
rollingUpdate: {
|
||||
maxSurge: 1,
|
||||
maxUnavailable: 0,
|
||||
},
|
||||
},
|
||||
template: {
|
||||
metadata: { labels: $._config.prometheusAdapter.labels },
|
||||
spec: {
|
||||
containers: [c],
|
||||
serviceAccountName: $.prometheusAdapter.serviceAccount.metadata.name,
|
||||
nodeSelector: { 'kubernetes.io/os': 'linux' },
|
||||
volumes: [
|
||||
{ name: 'tmpfs', emptyDir: {} },
|
||||
{ name: 'volume-serving-cert', emptyDir: {} },
|
||||
{ name: 'config', configMap: { name: 'adapter-config' } },
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
serviceAccount:
|
||||
local serviceAccount = k.core.v1.serviceAccount;
|
||||
serviceAccount: {
|
||||
apiVersion: 'v1',
|
||||
kind: 'ServiceAccount',
|
||||
metadata: {
|
||||
name: $._config.prometheusAdapter.name,
|
||||
namespace: $._config.prometheusAdapter.namespace,
|
||||
},
|
||||
},
|
||||
|
||||
serviceAccount.new($._config.prometheusAdapter.name) +
|
||||
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
|
||||
clusterRole: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'ClusterRole',
|
||||
metadata: {
|
||||
name: $._config.prometheusAdapter.name,
|
||||
},
|
||||
rules: [{
|
||||
apiGroups: [''],
|
||||
resources: ['nodes', 'namespaces', 'pods', 'services'],
|
||||
verbs: ['get', 'list', 'watch'],
|
||||
}],
|
||||
},
|
||||
|
||||
clusterRole:
|
||||
local clusterRole = k.rbac.v1.clusterRole;
|
||||
local policyRule = clusterRole.rulesType;
|
||||
|
||||
local rules =
|
||||
policyRule.new() +
|
||||
policyRule.withApiGroups(['']) +
|
||||
policyRule.withResources(['nodes', 'namespaces', 'pods', 'services']) +
|
||||
policyRule.withVerbs(['get', 'list', 'watch']);
|
||||
|
||||
clusterRole.new() +
|
||||
clusterRole.mixin.metadata.withName($._config.prometheusAdapter.name) +
|
||||
clusterRole.withRules(rules),
|
||||
|
||||
clusterRoleBinding:
|
||||
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
|
||||
|
||||
clusterRoleBinding.new() +
|
||||
clusterRoleBinding.mixin.metadata.withName($._config.prometheusAdapter.name) +
|
||||
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
|
||||
clusterRoleBinding.mixin.roleRef.withName($.prometheusAdapter.clusterRole.metadata.name) +
|
||||
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
|
||||
clusterRoleBinding.withSubjects([{
|
||||
clusterRoleBinding: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'ClusterRoleBinding',
|
||||
metadata: {
|
||||
name: $._config.prometheusAdapter.name,
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: 'rbac.authorization.k8s.io',
|
||||
kind: 'ClusterRole',
|
||||
name: $.prometheusAdapter.clusterRole.metadata.name,
|
||||
},
|
||||
subjects: [{
|
||||
kind: 'ServiceAccount',
|
||||
name: $.prometheusAdapter.serviceAccount.metadata.name,
|
||||
namespace: $._config.namespace,
|
||||
}]),
|
||||
namespace: $._config.prometheusAdapter.namespace,
|
||||
}],
|
||||
},
|
||||
|
||||
clusterRoleBindingDelegator:
|
||||
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
|
||||
|
||||
clusterRoleBinding.new() +
|
||||
clusterRoleBinding.mixin.metadata.withName('resource-metrics:system:auth-delegator') +
|
||||
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
|
||||
clusterRoleBinding.mixin.roleRef.withName('system:auth-delegator') +
|
||||
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
|
||||
clusterRoleBinding.withSubjects([{
|
||||
clusterRoleBindingDelegator: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'ClusterRoleBinding',
|
||||
metadata: {
|
||||
name: 'resource-metrics:system:auth-delegator',
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: 'rbac.authorization.k8s.io',
|
||||
kind: 'ClusterRole',
|
||||
name: 'system:auth-delegator',
|
||||
},
|
||||
subjects: [{
|
||||
kind: 'ServiceAccount',
|
||||
name: $.prometheusAdapter.serviceAccount.metadata.name,
|
||||
namespace: $._config.namespace,
|
||||
}]),
|
||||
namespace: $._config.prometheusAdapter.namespace,
|
||||
}],
|
||||
},
|
||||
|
||||
clusterRoleServerResources:
|
||||
local clusterRole = k.rbac.v1.clusterRole;
|
||||
local policyRule = clusterRole.rulesType;
|
||||
clusterRoleServerResources: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'ClusterRole',
|
||||
metadata: {
|
||||
name: 'resource-metrics-server-resources',
|
||||
},
|
||||
rules: [{
|
||||
apiGroups: ['metrics.k8s.io'],
|
||||
resources: ['*'],
|
||||
verbs: ['*'],
|
||||
}],
|
||||
},
|
||||
|
||||
local rules =
|
||||
policyRule.new() +
|
||||
policyRule.withApiGroups(['metrics.k8s.io']) +
|
||||
policyRule.withResources(['*']) +
|
||||
policyRule.withVerbs(['*']);
|
||||
clusterRoleAggregatedMetricsReader: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'ClusterRole',
|
||||
metadata: {
|
||||
name: 'system:aggregated-metrics-reader',
|
||||
labels: {
|
||||
'rbac.authorization.k8s.io/aggregate-to-admin': 'true',
|
||||
'rbac.authorization.k8s.io/aggregate-to-edit': 'true',
|
||||
'rbac.authorization.k8s.io/aggregate-to-view': 'true',
|
||||
},
|
||||
},
|
||||
rules: [{
|
||||
apiGroups: ['metrics.k8s.io'],
|
||||
resources: ['pods', 'nodes'],
|
||||
verbs: ['get', 'list', 'watch'],
|
||||
}],
|
||||
},
|
||||
|
||||
clusterRole.new() +
|
||||
clusterRole.mixin.metadata.withName('resource-metrics-server-resources') +
|
||||
clusterRole.withRules(rules),
|
||||
|
||||
clusterRoleAggregatedMetricsReader:
|
||||
local clusterRole = k.rbac.v1.clusterRole;
|
||||
local policyRule = clusterRole.rulesType;
|
||||
|
||||
local rules =
|
||||
policyRule.new() +
|
||||
policyRule.withApiGroups(['metrics.k8s.io']) +
|
||||
policyRule.withResources(['pods', 'nodes']) +
|
||||
policyRule.withVerbs(['get','list','watch']);
|
||||
|
||||
clusterRole.new() +
|
||||
clusterRole.mixin.metadata.withName('system:aggregated-metrics-reader') +
|
||||
clusterRole.mixin.metadata.withLabels({
|
||||
"rbac.authorization.k8s.io/aggregate-to-admin": "true",
|
||||
"rbac.authorization.k8s.io/aggregate-to-edit": "true",
|
||||
"rbac.authorization.k8s.io/aggregate-to-view": "true",
|
||||
}) +
|
||||
clusterRole.withRules(rules),
|
||||
|
||||
roleBindingAuthReader:
|
||||
local roleBinding = k.rbac.v1.roleBinding;
|
||||
|
||||
roleBinding.new() +
|
||||
roleBinding.mixin.metadata.withName('resource-metrics-auth-reader') +
|
||||
roleBinding.mixin.metadata.withNamespace('kube-system') +
|
||||
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
|
||||
roleBinding.mixin.roleRef.withName('extension-apiserver-authentication-reader') +
|
||||
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
|
||||
roleBinding.withSubjects([{
|
||||
roleBindingAuthReader: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'RoleBinding',
|
||||
metadata: {
|
||||
name: 'resource-metrics-auth-reader',
|
||||
namespace: 'kube-system',
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: 'rbac.authorization.k8s.io',
|
||||
kind: 'Role',
|
||||
name: 'extension-apiserver-authentication-reader',
|
||||
},
|
||||
subjects: [{
|
||||
kind: 'ServiceAccount',
|
||||
name: $.prometheusAdapter.serviceAccount.metadata.name,
|
||||
namespace: $._config.namespace,
|
||||
}]),
|
||||
namespace: $._config.prometheusAdapter.namespace,
|
||||
}],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
|
@ -1,21 +1,12 @@
|
|||
local k3 = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.3/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local relabelings = import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
namespace: 'default',
|
||||
|
||||
versions+:: {
|
||||
prometheus: 'v2.20.0',
|
||||
},
|
||||
|
||||
imageRepos+:: {
|
||||
prometheus: 'quay.io/prometheus/prometheus',
|
||||
},
|
||||
|
||||
alertmanager+:: {
|
||||
name: 'main',
|
||||
},
|
||||
versions+:: { prometheus: 'v2.22.1' },
|
||||
imageRepos+:: { prometheus: 'quay.io/prometheus/prometheus' },
|
||||
alertmanager+:: { name: 'main' },
|
||||
|
||||
prometheus+:: {
|
||||
name: 'k8s',
|
||||
|
@ -35,468 +26,438 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
prometheusRules:: $._config.prometheus.rules,
|
||||
alertmanagerName:: $.alertmanager.service.metadata.name,
|
||||
|
||||
serviceAccount:
|
||||
local serviceAccount = k.core.v1.serviceAccount;
|
||||
|
||||
serviceAccount.new('prometheus-' + p.name) +
|
||||
serviceAccount.mixin.metadata.withNamespace(p.namespace),
|
||||
service:
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
local prometheusPort = servicePort.newNamed('web', 9090, 'web');
|
||||
|
||||
service.new('prometheus-' + p.name, { app: 'prometheus', prometheus: p.name }, prometheusPort) +
|
||||
service.mixin.spec.withSessionAffinity('ClientIP') +
|
||||
service.mixin.metadata.withNamespace(p.namespace) +
|
||||
service.mixin.metadata.withLabels({ prometheus: p.name }),
|
||||
|
||||
rules:
|
||||
{
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'PrometheusRule',
|
||||
metadata: {
|
||||
labels: {
|
||||
prometheus: p.name,
|
||||
role: 'alert-rules',
|
||||
},
|
||||
name: 'prometheus-' + p.name + '-rules',
|
||||
namespace: p.namespace,
|
||||
},
|
||||
spec: {
|
||||
groups: p.prometheusRules.groups,
|
||||
},
|
||||
serviceAccount: {
|
||||
apiVersion: 'v1',
|
||||
kind: 'ServiceAccount',
|
||||
metadata: {
|
||||
name: 'prometheus-' + p.name,
|
||||
namespace: p.namespace,
|
||||
},
|
||||
},
|
||||
|
||||
service: {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Service',
|
||||
metadata: {
|
||||
name: 'prometheus-' + p.name,
|
||||
namespace: p.namespace,
|
||||
labels: { prometheus: p.name },
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{ name: 'web', targetPort: 'web', port: 9090 },
|
||||
],
|
||||
selector: { app: 'prometheus', prometheus: p.name },
|
||||
sessionAffinity: 'ClientIP',
|
||||
},
|
||||
},
|
||||
|
||||
rules: {
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'PrometheusRule',
|
||||
metadata: {
|
||||
labels: {
|
||||
prometheus: p.name,
|
||||
role: 'alert-rules',
|
||||
},
|
||||
name: 'prometheus-' + p.name + '-rules',
|
||||
namespace: p.namespace,
|
||||
},
|
||||
spec: {
|
||||
groups: p.prometheusRules.groups,
|
||||
},
|
||||
},
|
||||
|
||||
roleBindingSpecificNamespaces:
|
||||
local roleBinding = k.rbac.v1.roleBinding;
|
||||
|
||||
local newSpecificRoleBinding(namespace) =
|
||||
roleBinding.new() +
|
||||
roleBinding.mixin.metadata.withName('prometheus-' + p.name) +
|
||||
roleBinding.mixin.metadata.withNamespace(namespace) +
|
||||
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
|
||||
roleBinding.mixin.roleRef.withName('prometheus-' + p.name) +
|
||||
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
|
||||
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + p.name, namespace: p.namespace }]);
|
||||
|
||||
local roleBindingList = k3.rbac.v1.roleBindingList;
|
||||
roleBindingList.new([newSpecificRoleBinding(x) for x in p.roleBindingNamespaces]),
|
||||
clusterRole:
|
||||
local clusterRole = k.rbac.v1.clusterRole;
|
||||
local policyRule = clusterRole.rulesType;
|
||||
|
||||
local nodeMetricsRule = policyRule.new() +
|
||||
policyRule.withApiGroups(['']) +
|
||||
policyRule.withResources(['nodes/metrics']) +
|
||||
policyRule.withVerbs(['get']);
|
||||
|
||||
local metricsRule = policyRule.new() +
|
||||
policyRule.withNonResourceUrls('/metrics') +
|
||||
policyRule.withVerbs(['get']);
|
||||
|
||||
local rules = [nodeMetricsRule, metricsRule];
|
||||
|
||||
clusterRole.new() +
|
||||
clusterRole.mixin.metadata.withName('prometheus-' + p.name) +
|
||||
clusterRole.withRules(rules),
|
||||
roleConfig:
|
||||
local role = k.rbac.v1.role;
|
||||
local policyRule = role.rulesType;
|
||||
|
||||
local configmapRule = policyRule.new() +
|
||||
policyRule.withApiGroups(['']) +
|
||||
policyRule.withResources([
|
||||
'configmaps',
|
||||
]) +
|
||||
policyRule.withVerbs(['get']);
|
||||
|
||||
role.new() +
|
||||
role.mixin.metadata.withName('prometheus-' + p.name + '-config') +
|
||||
role.mixin.metadata.withNamespace(p.namespace) +
|
||||
role.withRules(configmapRule),
|
||||
roleBindingConfig:
|
||||
local roleBinding = k.rbac.v1.roleBinding;
|
||||
|
||||
roleBinding.new() +
|
||||
roleBinding.mixin.metadata.withName('prometheus-' + p.name + '-config') +
|
||||
roleBinding.mixin.metadata.withNamespace(p.namespace) +
|
||||
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
|
||||
roleBinding.mixin.roleRef.withName('prometheus-' + p.name + '-config') +
|
||||
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
|
||||
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + p.name, namespace: p.namespace }]),
|
||||
clusterRoleBinding:
|
||||
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
|
||||
|
||||
clusterRoleBinding.new() +
|
||||
clusterRoleBinding.mixin.metadata.withName('prometheus-' + p.name) +
|
||||
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
|
||||
clusterRoleBinding.mixin.roleRef.withName('prometheus-' + p.name) +
|
||||
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
|
||||
clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + p.name, namespace: p.namespace }]),
|
||||
roleSpecificNamespaces:
|
||||
local role = k.rbac.v1.role;
|
||||
local policyRule = role.rulesType;
|
||||
local coreRule = policyRule.new() +
|
||||
policyRule.withApiGroups(['']) +
|
||||
policyRule.withResources([
|
||||
'services',
|
||||
'endpoints',
|
||||
'pods',
|
||||
]) +
|
||||
policyRule.withVerbs(['get', 'list', 'watch']);
|
||||
local ingressRule = policyRule.new() +
|
||||
policyRule.withApiGroups(['extensions']) +
|
||||
policyRule.withResources([
|
||||
'ingresses',
|
||||
]) +
|
||||
policyRule.withVerbs(['get', 'list', 'watch']);
|
||||
|
||||
local newSpecificRole(namespace) =
|
||||
role.new() +
|
||||
role.mixin.metadata.withName('prometheus-' + p.name) +
|
||||
role.mixin.metadata.withNamespace(namespace) +
|
||||
role.withRules([coreRule, ingressRule]);
|
||||
|
||||
local roleList = k3.rbac.v1.roleList;
|
||||
roleList.new([newSpecificRole(x) for x in p.roleBindingNamespaces]),
|
||||
prometheus:
|
||||
local statefulSet = k.apps.v1.statefulSet;
|
||||
local container = statefulSet.mixin.spec.template.spec.containersType;
|
||||
local resourceRequirements = container.mixin.resourcesType;
|
||||
local selector = statefulSet.mixin.spec.selectorType;
|
||||
|
||||
|
||||
local resources =
|
||||
resourceRequirements.new() +
|
||||
resourceRequirements.withRequests({ memory: '400Mi' });
|
||||
|
||||
{
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'Prometheus',
|
||||
local newSpecificRoleBinding(namespace) = {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'RoleBinding',
|
||||
metadata: {
|
||||
name: p.name,
|
||||
namespace: p.namespace,
|
||||
labels: {
|
||||
prometheus: p.name,
|
||||
},
|
||||
name: 'prometheus-' + p.name,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
replicas: p.replicas,
|
||||
version: $._config.versions.prometheus,
|
||||
image: $._config.imageRepos.prometheus + ':' + $._config.versions.prometheus,
|
||||
serviceAccountName: 'prometheus-' + p.name,
|
||||
serviceMonitorSelector: {},
|
||||
podMonitorSelector: {},
|
||||
probeSelector: {},
|
||||
serviceMonitorNamespaceSelector: {},
|
||||
podMonitorNamespaceSelector: {},
|
||||
probeNamespaceSelector: {},
|
||||
nodeSelector: { 'kubernetes.io/os': 'linux' },
|
||||
ruleSelector: selector.withMatchLabels({
|
||||
roleRef: {
|
||||
apiGroup: 'rbac.authorization.k8s.io',
|
||||
kind: 'Role',
|
||||
name: 'prometheus-' + p.name,
|
||||
},
|
||||
subjects: [{
|
||||
kind: 'ServiceAccount',
|
||||
name: 'prometheus-' + p.name,
|
||||
namespace: p.namespace,
|
||||
}],
|
||||
};
|
||||
{
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'RoleBindingList',
|
||||
items: [newSpecificRoleBinding(x) for x in p.roleBindingNamespaces],
|
||||
},
|
||||
|
||||
clusterRole: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'ClusterRole',
|
||||
metadata: { name: 'prometheus-' + p.name },
|
||||
rules: [
|
||||
{
|
||||
apiGroups: [''],
|
||||
resources: ['nodes/metrics'],
|
||||
verbs: ['get'],
|
||||
},
|
||||
{
|
||||
nonResourceURLs: ['/metrics'],
|
||||
verbs: ['get'],
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
roleConfig: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'Role',
|
||||
metadata: {
|
||||
name: 'prometheus-' + p.name + '-config',
|
||||
namespace: p.namespace,
|
||||
},
|
||||
rules: [{
|
||||
apiGroups: [''],
|
||||
resources: ['configmaps'],
|
||||
verbs: ['get'],
|
||||
}],
|
||||
},
|
||||
|
||||
roleBindingConfig: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'RoleBinding',
|
||||
metadata: {
|
||||
name: 'prometheus-' + p.name + '-config',
|
||||
namespace: p.namespace,
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: 'rbac.authorization.k8s.io',
|
||||
kind: 'Role',
|
||||
name: 'prometheus-' + p.name + '-config',
|
||||
},
|
||||
subjects: [{
|
||||
kind: 'ServiceAccount',
|
||||
name: 'prometheus-' + p.name,
|
||||
namespace: p.namespace,
|
||||
}],
|
||||
},
|
||||
|
||||
clusterRoleBinding: {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'ClusterRoleBinding',
|
||||
metadata: { name: 'prometheus-' + p.name },
|
||||
roleRef: {
|
||||
apiGroup: 'rbac.authorization.k8s.io',
|
||||
kind: 'ClusterRole',
|
||||
name: 'prometheus-' + p.name,
|
||||
},
|
||||
subjects: [{
|
||||
kind: 'ServiceAccount',
|
||||
name: 'prometheus-' + p.name,
|
||||
namespace: p.namespace,
|
||||
}],
|
||||
},
|
||||
|
||||
roleSpecificNamespaces:
|
||||
local newSpecificRole(namespace) = {
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'Role',
|
||||
metadata: {
|
||||
name: 'prometheus-' + p.name,
|
||||
namespace: namespace,
|
||||
},
|
||||
rules: [
|
||||
{
|
||||
apiGroups: [''],
|
||||
resources: ['services', 'endpoints', 'pods'],
|
||||
verbs: ['get', 'list', 'watch'],
|
||||
},
|
||||
{
|
||||
apiGroups: ['extensions'],
|
||||
resources: ['ingresses'],
|
||||
verbs: ['get', 'list', 'watch'],
|
||||
},
|
||||
],
|
||||
};
|
||||
{
|
||||
apiVersion: 'rbac.authorization.k8s.io/v1',
|
||||
kind: 'RoleList',
|
||||
items: [newSpecificRole(x) for x in p.roleBindingNamespaces],
|
||||
},
|
||||
|
||||
prometheus: {
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'Prometheus',
|
||||
metadata: {
|
||||
name: p.name,
|
||||
namespace: p.namespace,
|
||||
labels: { prometheus: p.name },
|
||||
},
|
||||
spec: {
|
||||
replicas: p.replicas,
|
||||
version: $._config.versions.prometheus,
|
||||
image: $._config.imageRepos.prometheus + ':' + $._config.versions.prometheus,
|
||||
serviceAccountName: 'prometheus-' + p.name,
|
||||
serviceMonitorSelector: {},
|
||||
podMonitorSelector: {},
|
||||
probeSelector: {},
|
||||
serviceMonitorNamespaceSelector: {},
|
||||
podMonitorNamespaceSelector: {},
|
||||
probeNamespaceSelector: {},
|
||||
nodeSelector: { 'kubernetes.io/os': 'linux' },
|
||||
ruleSelector: {
|
||||
matchLabels: {
|
||||
role: 'alert-rules',
|
||||
prometheus: p.name,
|
||||
}),
|
||||
resources: resources,
|
||||
alerting: {
|
||||
alertmanagers: [
|
||||
},
|
||||
},
|
||||
resources: {
|
||||
requests: { memory: '400Mi' },
|
||||
},
|
||||
alerting: {
|
||||
alertmanagers: [{
|
||||
namespace: p.namespace,
|
||||
name: p.alertmanagerName,
|
||||
port: 'web',
|
||||
}],
|
||||
},
|
||||
securityContext: {
|
||||
runAsUser: 1000,
|
||||
runAsNonRoot: true,
|
||||
fsGroup: 2000,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
serviceMonitor: {
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'prometheus',
|
||||
namespace: p.namespace,
|
||||
labels: { 'k8s-app': 'prometheus' },
|
||||
},
|
||||
spec: {
|
||||
selector: {
|
||||
matchLabels: { prometheus: p.name },
|
||||
},
|
||||
endpoints: [{
|
||||
port: 'web',
|
||||
interval: '30s',
|
||||
}],
|
||||
},
|
||||
},
|
||||
|
||||
serviceMonitorKubeScheduler: {
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'kube-scheduler',
|
||||
namespace: p.namespace,
|
||||
labels: { 'k8s-app': 'kube-scheduler' },
|
||||
},
|
||||
spec: {
|
||||
jobLabel: 'k8s-app',
|
||||
endpoints: [{
|
||||
port: 'https-metrics',
|
||||
interval: '30s',
|
||||
scheme: 'https',
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
tlsConfig: { insecureSkipVerify: true },
|
||||
}],
|
||||
selector: {
|
||||
matchLabels: { 'k8s-app': 'kube-scheduler' },
|
||||
},
|
||||
namespaceSelector: {
|
||||
matchNames: ['kube-system'],
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
serviceMonitorKubelet: {
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'kubelet',
|
||||
namespace: p.namespace,
|
||||
labels: { 'k8s-app': 'kubelet' },
|
||||
},
|
||||
spec: {
|
||||
jobLabel: 'k8s-app',
|
||||
endpoints: [
|
||||
{
|
||||
port: 'https-metrics',
|
||||
scheme: 'https',
|
||||
interval: '30s',
|
||||
honorLabels: true,
|
||||
tlsConfig: { insecureSkipVerify: true },
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
metricRelabelings: relabelings,
|
||||
relabelings: [{
|
||||
sourceLabels: ['__metrics_path__'],
|
||||
targetLabel: 'metrics_path',
|
||||
}],
|
||||
},
|
||||
{
|
||||
port: 'https-metrics',
|
||||
scheme: 'https',
|
||||
path: '/metrics/cadvisor',
|
||||
interval: '30s',
|
||||
honorLabels: true,
|
||||
honorTimestamps: false,
|
||||
tlsConfig: {
|
||||
insecureSkipVerify: true,
|
||||
},
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
relabelings: [{
|
||||
sourceLabels: ['__metrics_path__'],
|
||||
targetLabel: 'metrics_path',
|
||||
}],
|
||||
metricRelabelings: [
|
||||
// Drop a bunch of metrics which are disabled but still sent, see
|
||||
// https://github.com/google/cadvisor/issues/1925.
|
||||
{
|
||||
namespace: p.namespace,
|
||||
name: p.alertmanagerName,
|
||||
port: 'web',
|
||||
sourceLabels: ['__name__'],
|
||||
regex: 'container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)',
|
||||
action: 'drop',
|
||||
},
|
||||
],
|
||||
},
|
||||
securityContext: {
|
||||
runAsUser: 1000,
|
||||
runAsNonRoot: true,
|
||||
fsGroup: 2000,
|
||||
{
|
||||
port: 'https-metrics',
|
||||
scheme: 'https',
|
||||
path: '/metrics/probes',
|
||||
interval: '30s',
|
||||
honorLabels: true,
|
||||
tlsConfig: { insecureSkipVerify: true },
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
relabelings: [{
|
||||
sourceLabels: ['__metrics_path__'],
|
||||
targetLabel: 'metrics_path',
|
||||
}],
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
matchLabels: { 'k8s-app': 'kubelet' },
|
||||
},
|
||||
namespaceSelector: {
|
||||
matchNames: ['kube-system'],
|
||||
},
|
||||
},
|
||||
serviceMonitor:
|
||||
{
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'prometheus',
|
||||
namespace: p.namespace,
|
||||
labels: {
|
||||
'k8s-app': 'prometheus',
|
||||
},
|
||||
|
||||
serviceMonitorKubeControllerManager: {
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'kube-controller-manager',
|
||||
namespace: p.namespace,
|
||||
labels: { 'k8s-app': 'kube-controller-manager' },
|
||||
},
|
||||
spec: {
|
||||
jobLabel: 'k8s-app',
|
||||
endpoints: [{
|
||||
port: 'https-metrics',
|
||||
interval: '30s',
|
||||
scheme: 'https',
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
tlsConfig: {
|
||||
insecureSkipVerify: true,
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
selector: {
|
||||
matchLabels: {
|
||||
prometheus: p.name,
|
||||
},
|
||||
},
|
||||
endpoints: [
|
||||
metricRelabelings: relabelings + [
|
||||
{
|
||||
port: 'web',
|
||||
interval: '30s',
|
||||
sourceLabels: ['__name__'],
|
||||
regex: 'etcd_(debugging|disk|request|server).*',
|
||||
action: 'drop',
|
||||
},
|
||||
],
|
||||
}],
|
||||
selector: {
|
||||
matchLabels: { 'k8s-app': 'kube-controller-manager' },
|
||||
},
|
||||
namespaceSelector: {
|
||||
matchNames: ['kube-system'],
|
||||
},
|
||||
},
|
||||
serviceMonitorKubeScheduler:
|
||||
{
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'kube-scheduler',
|
||||
namespace: p.namespace,
|
||||
labels: {
|
||||
'k8s-app': 'kube-scheduler',
|
||||
},
|
||||
|
||||
serviceMonitorApiserver: {
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'kube-apiserver',
|
||||
namespace: p.namespace,
|
||||
labels: { 'k8s-app': 'apiserver' },
|
||||
},
|
||||
spec: {
|
||||
jobLabel: 'component',
|
||||
selector: {
|
||||
matchLabels: {
|
||||
component: 'apiserver',
|
||||
provider: 'kubernetes',
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
jobLabel: 'k8s-app',
|
||||
endpoints: [
|
||||
namespaceSelector: {
|
||||
matchNames: ['default'],
|
||||
},
|
||||
endpoints: [{
|
||||
port: 'https',
|
||||
interval: '30s',
|
||||
scheme: 'https',
|
||||
tlsConfig: {
|
||||
caFile: '/var/run/secrets/kubernetes.io/serviceaccount/ca.crt',
|
||||
serverName: 'kubernetes',
|
||||
},
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
metricRelabelings: relabelings + [
|
||||
{
|
||||
port: 'https-metrics',
|
||||
interval: '30s',
|
||||
scheme: "https",
|
||||
bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token",
|
||||
tlsConfig: {
|
||||
insecureSkipVerify: true
|
||||
}
|
||||
sourceLabels: ['__name__'],
|
||||
regex: 'etcd_(debugging|disk|server).*',
|
||||
action: 'drop',
|
||||
},
|
||||
{
|
||||
sourceLabels: ['__name__'],
|
||||
regex: 'apiserver_admission_controller_admission_latencies_seconds_.*',
|
||||
action: 'drop',
|
||||
},
|
||||
{
|
||||
sourceLabels: ['__name__'],
|
||||
regex: 'apiserver_admission_step_admission_latencies_seconds_.*',
|
||||
action: 'drop',
|
||||
},
|
||||
{
|
||||
sourceLabels: ['__name__', 'le'],
|
||||
regex: 'apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)',
|
||||
action: 'drop',
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
matchLabels: {
|
||||
'k8s-app': 'kube-scheduler',
|
||||
},
|
||||
},
|
||||
namespaceSelector: {
|
||||
matchNames: [
|
||||
'kube-system',
|
||||
],
|
||||
},
|
||||
},
|
||||
}],
|
||||
},
|
||||
serviceMonitorKubelet:
|
||||
{
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'kubelet',
|
||||
namespace: p.namespace,
|
||||
labels: {
|
||||
'k8s-app': 'kubelet',
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
jobLabel: 'k8s-app',
|
||||
endpoints: [
|
||||
{
|
||||
port: 'https-metrics',
|
||||
scheme: 'https',
|
||||
interval: '30s',
|
||||
honorLabels: true,
|
||||
tlsConfig: {
|
||||
insecureSkipVerify: true,
|
||||
},
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
metricRelabelings: (import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet'),
|
||||
relabelings: [
|
||||
{
|
||||
sourceLabels: ['__metrics_path__'],
|
||||
targetLabel: 'metrics_path',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
port: 'https-metrics',
|
||||
scheme: 'https',
|
||||
path: '/metrics/cadvisor',
|
||||
interval: '30s',
|
||||
honorLabels: true,
|
||||
honorTimestamps: false,
|
||||
tlsConfig: {
|
||||
insecureSkipVerify: true,
|
||||
},
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
relabelings: [
|
||||
{
|
||||
sourceLabels: ['__metrics_path__'],
|
||||
targetLabel: 'metrics_path',
|
||||
},
|
||||
],
|
||||
metricRelabelings: [
|
||||
// Drop a bunch of metrics which are disabled but still sent, see
|
||||
// https://github.com/google/cadvisor/issues/1925.
|
||||
{
|
||||
sourceLabels: ['__name__'],
|
||||
regex: 'container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)',
|
||||
action: 'drop',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
port: 'https-metrics',
|
||||
scheme: 'https',
|
||||
path: '/metrics/probes',
|
||||
interval: '30s',
|
||||
honorLabels: true,
|
||||
tlsConfig: {
|
||||
insecureSkipVerify: true,
|
||||
},
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
relabelings: [
|
||||
{
|
||||
sourceLabels: ['__metrics_path__'],
|
||||
targetLabel: 'metrics_path',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
matchLabels: {
|
||||
'k8s-app': 'kubelet',
|
||||
},
|
||||
},
|
||||
namespaceSelector: {
|
||||
matchNames: [
|
||||
'kube-system',
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
serviceMonitorCoreDNS: {
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'coredns',
|
||||
namespace: p.namespace,
|
||||
labels: { 'k8s-app': 'coredns' },
|
||||
},
|
||||
serviceMonitorKubeControllerManager:
|
||||
{
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'kube-controller-manager',
|
||||
namespace: p.namespace,
|
||||
labels: {
|
||||
'k8s-app': 'kube-controller-manager',
|
||||
},
|
||||
spec: {
|
||||
jobLabel: 'k8s-app',
|
||||
selector: {
|
||||
matchLabels: { 'k8s-app': 'kube-dns' },
|
||||
},
|
||||
spec: {
|
||||
jobLabel: 'k8s-app',
|
||||
endpoints: [
|
||||
{
|
||||
port: 'https-metrics',
|
||||
interval: '30s',
|
||||
scheme: "https",
|
||||
bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token",
|
||||
tlsConfig: {
|
||||
insecureSkipVerify: true
|
||||
},
|
||||
metricRelabelings: (import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet') + [
|
||||
{
|
||||
sourceLabels: ['__name__'],
|
||||
regex: 'etcd_(debugging|disk|request|server).*',
|
||||
action: 'drop',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
matchLabels: {
|
||||
'k8s-app': 'kube-controller-manager',
|
||||
},
|
||||
},
|
||||
namespaceSelector: {
|
||||
matchNames: [
|
||||
'kube-system',
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
serviceMonitorApiserver:
|
||||
{
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'kube-apiserver',
|
||||
namespace: p.namespace,
|
||||
labels: {
|
||||
'k8s-app': 'apiserver',
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
jobLabel: 'component',
|
||||
selector: {
|
||||
matchLabels: {
|
||||
component: 'apiserver',
|
||||
provider: 'kubernetes',
|
||||
},
|
||||
},
|
||||
namespaceSelector: {
|
||||
matchNames: [
|
||||
'default',
|
||||
],
|
||||
},
|
||||
endpoints: [
|
||||
{
|
||||
port: 'https',
|
||||
interval: '30s',
|
||||
scheme: 'https',
|
||||
tlsConfig: {
|
||||
caFile: '/var/run/secrets/kubernetes.io/serviceaccount/ca.crt',
|
||||
serverName: 'kubernetes',
|
||||
},
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
metricRelabelings: (import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet') + [
|
||||
{
|
||||
sourceLabels: ['__name__'],
|
||||
regex: 'etcd_(debugging|disk|server).*',
|
||||
action: 'drop',
|
||||
},
|
||||
{
|
||||
sourceLabels: ['__name__'],
|
||||
regex: 'apiserver_admission_controller_admission_latencies_seconds_.*',
|
||||
action: 'drop',
|
||||
},
|
||||
{
|
||||
sourceLabels: ['__name__'],
|
||||
regex: 'apiserver_admission_step_admission_latencies_seconds_.*',
|
||||
action: 'drop',
|
||||
},
|
||||
{
|
||||
sourceLabels: ['__name__', 'le'],
|
||||
regex: 'apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)',
|
||||
action: 'drop',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
serviceMonitorCoreDNS:
|
||||
{
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'coredns',
|
||||
namespace: p.namespace,
|
||||
labels: {
|
||||
'k8s-app': 'coredns',
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
jobLabel: 'k8s-app',
|
||||
selector: {
|
||||
matchLabels: {
|
||||
'k8s-app': 'kube-dns',
|
||||
},
|
||||
},
|
||||
namespaceSelector: {
|
||||
matchNames: [
|
||||
'kube-system',
|
||||
],
|
||||
},
|
||||
endpoints: [
|
||||
{
|
||||
port: 'metrics',
|
||||
interval: '15s',
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
},
|
||||
],
|
||||
namespaceSelector: {
|
||||
matchNames: ['kube-system'],
|
||||
},
|
||||
endpoints: [{
|
||||
port: 'metrics',
|
||||
interval: '15s',
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
}],
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
name: 'kube-prometheus-node-recording.rules',
|
||||
rules: [
|
||||
{
|
||||
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance)',
|
||||
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) BY (instance)',
|
||||
record: 'instance:node_cpu:rate:sum',
|
||||
},
|
||||
{
|
||||
|
@ -17,11 +17,11 @@
|
|||
record: 'instance:node_network_transmit_bytes:rate:sum',
|
||||
},
|
||||
{
|
||||
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)',
|
||||
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)',
|
||||
record: 'instance:node_cpu:ratio',
|
||||
},
|
||||
{
|
||||
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m]))',
|
||||
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))',
|
||||
record: 'cluster:node_cpu:sum_rate5m',
|
||||
},
|
||||
{
|
||||
|
|
|
@ -88,6 +88,20 @@
|
|||
},
|
||||
'for': '5m',
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusOperatorRejectedResources',
|
||||
expr: |||
|
||||
min_over_time(prometheus_operator_managed_resources{state="rejected",%(prometheusOperatorSelector)s}[5m]) > 0
|
||||
||| % $._config,
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
description: 'Prometheus operator in {{ $labels.namespace }} namespace rejected {{ printf "%0.0f" $value }} {{ $labels.controller }}/{{ $labels.resource }} resources.',
|
||||
summary: 'Resources rejected by Prometheus operator',
|
||||
},
|
||||
'for': '5m',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
|
|
File diff suppressed because one or more lines are too long
1
monitoring/vendor/github.com/prometheus-operator/prometheus-operator/jsonnet/prometheus-operator/alertmanagerconfig-crd.libsonnet
generated
vendored
Normal file
1
monitoring/vendor/github.com/prometheus-operator/prometheus-operator/jsonnet/prometheus-operator/alertmanagerconfig-crd.libsonnet
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -15,14 +15,12 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
},
|
||||
|
||||
versions+:: {
|
||||
prometheusOperator: 'v0.42.1',
|
||||
prometheusOperator: 'v0.44.1',
|
||||
prometheusConfigReloader: self.prometheusOperator,
|
||||
configmapReloader: 'v0.4.0',
|
||||
},
|
||||
|
||||
imageRepos+:: {
|
||||
prometheusOperator: 'quay.io/prometheus-operator/prometheus-operator',
|
||||
configmapReloader: 'jimmidyson/configmap-reload',
|
||||
prometheusConfigReloader: 'quay.io/prometheus-operator/prometheus-config-reloader',
|
||||
},
|
||||
},
|
||||
|
@ -36,13 +34,12 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
|
||||
image:: $._config.imageRepos.prometheusOperator,
|
||||
version:: $._config.versions.prometheusOperator,
|
||||
configReloaderImage:: $._config.imageRepos.configmapReloader,
|
||||
configReloaderVersion:: $._config.versions.configmapReloader,
|
||||
prometheusConfigReloaderImage:: $._config.imageRepos.prometheusConfigReloader,
|
||||
prometheusConfigReloaderVersion:: $._config.versions.prometheusConfigReloader,
|
||||
|
||||
// Prefixing with 0 to ensure these manifests are listed and therefore created first.
|
||||
'0alertmanagerCustomResourceDefinition': import 'alertmanager-crd.libsonnet',
|
||||
'0alertmanagerConfigCustomResourceDefinition': import 'alertmanagerconfig-crd.libsonnet',
|
||||
'0prometheusCustomResourceDefinition': import 'prometheus-crd.libsonnet',
|
||||
'0servicemonitorCustomResourceDefinition': import 'servicemonitor-crd.libsonnet',
|
||||
'0podmonitorCustomResourceDefinition': import 'podmonitor-crd.libsonnet',
|
||||
|
@ -70,6 +67,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
policyRule.withResources([
|
||||
'alertmanagers',
|
||||
'alertmanagers/finalizers',
|
||||
'alertmanagerconfigs',
|
||||
'prometheuses',
|
||||
'prometheuses/finalizers',
|
||||
'thanosrulers',
|
||||
|
@ -126,7 +124,15 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
]) +
|
||||
policyRule.withVerbs(['get', 'list', 'watch']);
|
||||
|
||||
local rules = [monitoringRule, appsRule, coreRule, podRule, routingRule, nodeRule, namespaceRule];
|
||||
local ingressRule = policyRule.new() +
|
||||
policyRule.withApiGroups(['networking.k8s.io']) +
|
||||
policyRule.withResources([
|
||||
'ingresses',
|
||||
]) +
|
||||
policyRule.withVerbs(['get', 'list', 'watch']);
|
||||
|
||||
|
||||
local rules = [monitoringRule, appsRule, coreRule, podRule, routingRule, nodeRule, namespaceRule, ingressRule];
|
||||
|
||||
clusterRole.new() +
|
||||
clusterRole.mixin.metadata.withLabels(po.commonLabels) +
|
||||
|
@ -145,10 +151,6 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
container.withPorts(containerPort.newNamed(targetPort, 'http')) +
|
||||
container.withArgs([
|
||||
'--kubelet-service=kube-system/kubelet',
|
||||
// Prometheus Operator is run with a read-only root file system. By
|
||||
// default glog saves logfiles to /tmp. Make it log to stderr instead.
|
||||
'--logtostderr=true',
|
||||
'--config-reloader-image=' + po.configReloaderImage + ':' + po.configReloaderVersion,
|
||||
'--prometheus-config-reloader=' + po.prometheusConfigReloaderImage + ':' + po.prometheusConfigReloaderVersion,
|
||||
]) +
|
||||
container.mixin.securityContext.withAllowPrivilegeEscalation(false) +
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"apiVersion":"apiextensions.k8s.io/v1","kind":"CustomResourceDefinition","metadata":{"annotations":{"controller-gen.kubebuilder.io/version":"v0.2.4"},"creationTimestamp":null,"name":"prometheusrules.monitoring.coreos.com"},"spec":{"group":"monitoring.coreos.com","names":{"kind":"PrometheusRule","listKind":"PrometheusRuleList","plural":"prometheusrules","singular":"prometheusrule"},"scope":"Namespaced","versions":[{"name":"v1","schema":{"openAPIV3Schema":{"description":"PrometheusRule defines alerting rules for a Prometheus instance","properties":{"apiVersion":{"description":"APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources","type":"string"},"kind":{"description":"Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds","type":"string"},"metadata":{"type":"object"},"spec":{"description":"Specification of desired alerting rule definitions for Prometheus.","properties":{"groups":{"description":"Content of Prometheus rule file","items":{"description":"RuleGroup is a list of sequentially evaluated recording and alerting rules. Note: PartialResponseStrategy is only used by ThanosRuler and will be ignored by Prometheus instances. Valid values for this field are 'warn' or 'abort'. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response","properties":{"interval":{"type":"string"},"name":{"type":"string"},"partial_response_strategy":{"type":"string"},"rules":{"items":{"description":"Rule describes an alerting or recording rule.","properties":{"alert":{"type":"string"},"annotations":{"additionalProperties":{"type":"string"},"type":"object"},"expr":{"anyOf":[{"type":"integer"},{"type":"string"}],"x-kubernetes-int-or-string":true},"for":{"type":"string"},"labels":{"additionalProperties":{"type":"string"},"type":"object"},"record":{"type":"string"}},"required":["expr"],"type":"object"},"type":"array"}},"required":["name","rules"],"type":"object"},"type":"array"}},"type":"object"}},"required":["spec"],"type":"object"}},"served":true,"storage":true}]},"status":{"acceptedNames":{"kind":"","plural":""},"conditions":[],"storedVersions":[]}}
|
||||
{"apiVersion":"apiextensions.k8s.io/v1","kind":"CustomResourceDefinition","metadata":{"annotations":{"controller-gen.kubebuilder.io/version":"v0.4.1"},"creationTimestamp":null,"name":"prometheusrules.monitoring.coreos.com"},"spec":{"group":"monitoring.coreos.com","names":{"kind":"PrometheusRule","listKind":"PrometheusRuleList","plural":"prometheusrules","singular":"prometheusrule"},"scope":"Namespaced","versions":[{"name":"v1","schema":{"openAPIV3Schema":{"description":"PrometheusRule defines recording and alerting rules for a Prometheus instance","properties":{"apiVersion":{"description":"APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources","type":"string"},"kind":{"description":"Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds","type":"string"},"metadata":{"type":"object"},"spec":{"description":"Specification of desired alerting rule definitions for Prometheus.","properties":{"groups":{"description":"Content of Prometheus rule file","items":{"description":"RuleGroup is a list of sequentially evaluated recording and alerting rules. Note: PartialResponseStrategy is only used by ThanosRuler and will be ignored by Prometheus instances. Valid values for this field are 'warn' or 'abort'. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response","properties":{"interval":{"type":"string"},"name":{"type":"string"},"partial_response_strategy":{"type":"string"},"rules":{"items":{"description":"Rule describes an alerting or recording rule.","properties":{"alert":{"type":"string"},"annotations":{"additionalProperties":{"type":"string"},"type":"object"},"expr":{"anyOf":[{"type":"integer"},{"type":"string"}],"x-kubernetes-int-or-string":true},"for":{"type":"string"},"labels":{"additionalProperties":{"type":"string"},"type":"object"},"record":{"type":"string"}},"required":["expr"],"type":"object"},"type":"array"}},"required":["name","rules"],"type":"object"},"type":"array"}},"type":"object"}},"required":["spec"],"type":"object"}},"served":true,"storage":true}]},"status":{"acceptedNames":{"kind":"","plural":""},"conditions":[],"storedVersions":[]}}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue