update monitoring

This commit is contained in:
Tobias Brunner 2020-03-07 20:44:56 +01:00
parent 3954488b8e
commit 43f69e9bc0
44 changed files with 8549 additions and 226 deletions

View File

@ -8,7 +8,7 @@
"subdir": "Documentation/etcd-mixin"
}
},
"version": "cb633418a2a67a41cd2f30d556f19e995ed8f274",
"version": "221f0cc107cb3497eeb20fb241e1bcafca2e9115",
"sum": "Ko3qhNfC2vN/houLh6C0Ryacjv70gl0DVPGU/PQ4OD0="
},
{
@ -19,8 +19,8 @@
"subdir": "grafana"
}
},
"version": "1b07a802b663f77e36fe1e518cef552ef9fbdb82",
"sum": "GliiVmOLUPmBNjvsx332UOvZj0o9VVxLFLp9u4QmmNk="
"version": "57b4365eacda291b82e0d55ba7eec573a8198dda",
"sum": "92DWADwGjnCfpZaL7Q07C0GZayxBziGla/O03qWea34="
},
{
"name": "grafana-builder",
@ -30,7 +30,7 @@
"subdir": "grafana-builder"
}
},
"version": "66eb3af2bd87c4ee18b97d5b2d366b234eef89cc",
"version": "c19a92e586a6752f11745b47f309b13f02ef7147",
"sum": "slxrtftVDiTlQK22ertdfrg4Epnq97gdrLI63ftUfaE="
},
{
@ -63,8 +63,8 @@
"subdir": "jsonnet/kube-prometheus"
}
},
"version": "953c5464f72594b7fde2e534b207b211f7454ec7",
"sum": "+9Clkrsv9C637n1P7pPoKXTMJTbJGgt2bhv1/1ySTuc="
"version": "66c625d0bfbc080f40ee453ea093c66faa8e1cc0",
"sum": "lBFnEmkRi9RiJrHnWWxVPB0pOGuYans1FdDSiRD/z68="
},
{
"name": "kube-state-metrics",
@ -74,7 +74,7 @@
"subdir": "jsonnet/kube-state-metrics"
}
},
"version": "89ede10b19d7ef0145777717351cabe14b113c01",
"version": "e048e7058295e218066068d8de70772da5bbbc23",
"sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA="
},
{
@ -85,7 +85,7 @@
"subdir": "jsonnet/kube-state-metrics-mixin"
}
},
"version": "89ede10b19d7ef0145777717351cabe14b113c01",
"version": "e048e7058295e218066068d8de70772da5bbbc23",
"sum": "E1GGavnf9PCWBm4WVrxWnc0FIj72UcbcweqGioWrOdU="
},
{
@ -96,8 +96,8 @@
"subdir": ""
}
},
"version": "02b62082e3feb271b8fd476603dceaa1fd2054c0",
"sum": "h+ZL4TFVFbSdlsY25mi5x1nRts3PY3JmKz3QXUgnXJk="
"version": "12773f7181057163a3761913bb629d7ce9e13bca",
"sum": "HvDNaf17rnzrdiRCjofCRrFm+tXfC5M4kmARK7bNBpw="
},
{
"name": "node-mixin",
@ -107,8 +107,8 @@
"subdir": "docs/node-mixin"
}
},
"version": "a7c31ff7ed0990545ed4cc62690fc53563ee8860",
"sum": "7vEamDTP9AApeiF4Zu9ZyXzDIs3rYHzwf9k7g8X+wsg="
"version": "0107bc794204f50d887898da60032da890637471",
"sum": "VKdF0zPMSCiuIuXWblSz2VOeBaXzQ7fp40vz9sxj+Bo="
},
{
"name": "prometheus",
@ -118,7 +118,7 @@
"subdir": "documentation/prometheus-mixin"
}
},
"version": "babadf13e852654cfc87c06fc8ff0b843586a00e",
"version": "84b00564f4df8477d59ac275e7c62972664c2926",
"sum": "5EUgr6Spr1zNR8Y2/NevjvEkGV9WMvKo6nEScNER1Lc="
},
{
@ -129,8 +129,8 @@
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "89f35ef22db0dc24c523bf8be473bcbcf9ac81f6",
"sum": "KCO153lAOWmWfoj3rQGhLB+8UmyvQ2Bghu/ewDqVum4="
"version": "59bdf55453ba08b4ed7c271cb3c6627058945ed5",
"sum": "qwMbUQkdPhAn9Sl4OVLgzmNOuOTnRLUmvv14I0unsa8="
},
{
"name": "prometheus-pushgateway",
@ -151,7 +151,7 @@
"subdir": "lib/promgrafonnet"
}
},
"version": "02b62082e3feb271b8fd476603dceaa1fd2054c0",
"version": "12773f7181057163a3761913bb629d7ce9e13bca",
"sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc="
},
{
@ -162,8 +162,8 @@
"subdir": "slo-libsonnet"
}
},
"version": "437c402c5f3ad86c3c16db8471f1649284fef0ee",
"sum": "2Zcyku1f558VrUpMaJnI78fahDksPLcS1idmxxwcQ7Q="
"version": "5ddd7ffc39e7a54c9aca997c2c389a8046fab0ff",
"sum": "S7/+tnAkzVh8Li7sg7Hu4aeIQAWHCtxhRQ+k1OKjoQk="
}
]
}

View File

@ -6,7 +6,7 @@ metadata:
namespace: monitoring
spec:
encryptedData:
alertmanager.yaml: AgBvC5CTR2K6WX9Gqku+RK5L/0u2qHvZkWwWs46yEZQZwK6q1vTtiDR3lK2mM1EXFkMLHRa+Hsu3Zu2fQf0aeAp8v4BCzCoB8EVGcb4PVu0cCzXy+G2+ufUm+sBatshWsbE1o+Fqm+sFgA2pclS8WzAtqgsmdYkoBF2e99HVmLk4PjA8aO6ujgCu1LsPvNA7WGb/zGIFVLKeByElt0zmCvxIM7msOOGW1fN6wizuhbcqB8f7LVqIN5BRWp2SmVaHUldLg/TSQOqFJNpwjdhQ5huz45ZxFzHqGbSlERDO5WqSey+iorEKXQEhYOAbf8Am+xV0/+Dxb14j2nxRyfRvfkVEzRAXpZZU921zZ/u4eCIAEgD5zNI/kN+Jz8rNbyWDiOdZOyajuQ/gx9yXIORASY07C5YgvsrtCut5clYX4yI/4P9Sgq7Jm8Z1JfIWbjI0gvVzkbmKi/Qj/rbu9uM/HyjHSp1+dGt/DoLdCcnl5WYeW47tIZH/gUhkgS7zLhAxEiMIRoZAmUzdwN/DEFc/L0r5sPdYyzO0YwompT+9T2fkT3BEdQORushJSE4RceEvtVi6V+MlHqRo0zEoe7WxO590TPZFifThVUa5mc0RL1KDWttWbq+ygeVPbwDoVWouAnujjSieQHiY0xe7doTu3TJodoYxb2pofPZIeZCeHE4TnLhV79O0j4064exwzcz4J/CxBstimIJL8GY8+LEhNQLeqddue6LjlnmfoSlCWTGKUeD/sX+o1KubmcHE01/6q237ICJBpZFo3rjdnwnW3d90vW+BVyCpm72KCvzZjAHkmc2rqfXUBE82VEfq8YIrCEwaN/HYPJkpbZZZt5nh4i1NHiUVU9zx5MOimofoZbEA6BIU7zNO7Hbo6Hta0qTYEKmTvGWC1EfQSNAs5SatwXdBIW99qlOuEeAc/UmCsf49SRqQp9tIqCjpyFxYjuXzXfch33TvKf9ayh3lQ4RjafhKG86wC94pB59o038+D1++vaIswNMO7YWpJx/bfKsp9LtvtiS6mizyPuWl6+vE4FWUxsa72hpgN1G8RsfYr03I0WcshtBKaJaeTYxPGC9t0y6TV0kxJcLbeeCKbNSxjJMf0YCuSHoB86e74KhwoWb72Ga9qfZqGvSugODY4nP7RaMxG3CpiQ2rlpkHgeEm1Jd/0D7IkkvDK87ZI9COHwRwIJhWDSLB7g9qVEBNPirgKihngbLm/Thw3i0kzqRmAcMbE9fnkeqeiPGiJB1TSJj5h+vbCwJAyaTtu3rs3avc+7zX1KhlxSA5sDBEasXaX2fagSFbOSzxM6VkbYKziBgTVTlt/BwlwMi/PjbDcW/GLrH1ImSBMdrvU5MhyGYnDd44u5iAs6oGBKykeLqTsor7vNjpWl5zViMYdByqNMSkOQvQZC8JAcxn/w5eM+Cg9BJuxmy3XfprNbBUkvDkAVFyH8rlCUCdAiXKU2E10hz3XkeinTJ2FH3Cfa1YQANNC7F+BvfOm22Z1GogOJJdeXKCRpP6HX5xYGrebnFu+m6+UAqKvyKULU/Rk9vRBZj7M5f/QGmTALWy6WHZvqw2d9MtXMAaL2GvD6H76oDAgv312xN8CAg5PJC9K6aV+Qyh3B6BuISqnYS/WR08nPfcTkk6sJ1TL0754tWPRRrF2o38Tjnt1yd0Lz6wex1QamnxMksJklg+nWDKsDkdLwgtaiZCVZ+xWw==
alertmanager.yaml: AgCB9ENTl3ptItT5IFDHKxONg+hJ20nAClt94b8ShwMWlaNNpUlxa92HIS8nKJ8sfciyvxPgMTDx/0euv8+RYZiuU5tYSjo6H6KZJUavsFykgxLDiNGN4qORkZyHGD/ZvXjJ4Ns+pDkOcxQ2xZD59Djbnu6CLk010NFENmwAn/b2jTy55fUwW9qPuOxGEmz2RpSaIiPDz5l6LsI2lvZV54hbuefBgpKDgGUS4EnbaU0db8w6APM/rWFrGIgBXDIfj/tM4BthrOCNbbK/clDuKsUkRoEjbApqvbXf36D41uUZIHhLlF2CTT2mi6nT6mTAxsmro1YdKO828wCmBZV879E+jJldsh5RUl7EWW7X7bb+XrVlmCxagRkKQjR/AwUEgi6Zd4XEOTcdC53f9R1e0xm3/0MQpqu24rZR4kIXkJbAmgJnOshKsscW6IlfeRGIiIHWdVXGp7mePcF+hvA8/0nQkxRC8JtNNR8buDYWmXRBihfcrr2zxn/zdhBxYcE9vcc9GEmaTnwqI3f/W0nBgOy6gkMnQ2z2RqoyzkgxNX1l1CoOCIzbyGsxnAhWna47xNvACqb/PvidNI+Ivc14/cUF/uGOaktHnLKzi0r71ebMFKtXSbI/a93qs7d5cghNbnnRrrxHBmtni+lVgAnbfrR7e4FgcjLeUvKhYjTUiGLz8gfSIF4dgDed9GIG+PxklE2na9SJTTWSv99C0JhAe7x+hCxnFnQgxOiB9ThBmwHI/3MBvn6qFvWAFSystOCcjbZ3wad9J/ndSrbwy8LojHIbHMzXf0feuTkmYNsdx3B+77sa/oiZV7ewHWlURMsBEslyNX0OuFNyaSVwL/sUjOxMmUqJe9q0uxfgdSr1W8S+/7v2s/ky7k3FZ9AqsAH5NGP8apZQe1SJzvxt0hFiw2FOFqwKQMNUvacHzXUtQyDeivvu3mPFCQZbwwCFrxnZviOdmrfGQr5dhCAYmxxnUVIetSbvj77sq0c/QXKXjJOiqyqTd7WjAFwKnZDZ6Yn4isSbsaDrnDrZpQ+O4MVwKSXY5q1S0NMqdn4cfjLZYRdsufwWsULeq7Gt+SaSCaI0BHn5grsKpNh89HgNMvgYUwkjJXXRd2/I2lcQDEaTYXEs7RcRnJk1a+8DljdVd51b2VN5FkT69aSL1olaDXrfEztShbJlz+nQUkYZeSYE9dhURfvtAILg1J7tTD0eDwNggJGc7hPI62Ir+Kk0t045WobbTESGElU3hbJ5/WMA7HC6b7Mbiw8ikjglIBK1ur9BBV/8lNrdqSBh6MqWGLpRf/qaaqlfG65jIM9O8Qe70E6hWFAbWFRPcuCNA878NMMiTGJ1PQZ/owm5lrEPn833FtJCKHkAVLIrezhalCIuDvc8OOFMD08vFmvsFwHp3YtPefgZXTNqAalQYuPKsPSDMtUJCCCYRzWI0vyj27dcHwtrOsP9ovRo1U6a20tsDnhBRaAlmUQEolC/fmOsmJrBcK7QBfj9awhtBovXacZKdud8cu0Mbxo1+1fJaF5cosR+s3qSvsWWCM+sCCr0bxqUOS1kqkHe+9AQNOUkwlLKPPNfZ+z8dvjJrtJeZ6dLnJuu3+G4aoB91x4sgAcr6LugEe6MW4fYMO+Td5pMqn51N/dwMUkFQy2pCvvF41xeJuxpu9m8eAZ/EyS2pNfPkq6EeiNwvVp33BtIKA5oAZ7JzJZ18ZrOWg==
template:
metadata:
creationTimestamp: null

View File

@ -2832,7 +2832,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
"title": "Rate of TCP Retransimts out of all sent segments",
"title": "Rate of TCP Retransmits out of all sent segments",
"tooltip": {
"shared": true,
"sort": 2,
@ -2935,7 +2935,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
"title": "Rate of TCP SYN Retransimts out of all retransmits",
"title": "Rate of TCP SYN Retransmits out of all retransmits",
"tooltip": {
"shared": true,
"sort": 2,
@ -31354,6 +31354,916 @@ items:
metadata:
name: grafana-dashboard-scheduler
namespace: monitoring
- apiVersion: v1
data:
statefulset.json: |-
{
"__inputs": [
],
"__requires": [
],
"annotations": {
"list": [
]
},
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [
],
"refresh": "",
"rows": [
{
"collapse": false,
"collapsed": false,
"panels": [
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "$datasource",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
},
"id": 2,
"interval": null,
"links": [
],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "cores",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 4,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"lineColor": "rgb(31, 120, 193)",
"show": true
},
"tableColumn": "",
"targets": [
{
"expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
}
],
"thresholds": "",
"title": "CPU",
"tooltip": {
"shared": false
},
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "0",
"value": "null"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "$datasource",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
},
"id": 3,
"interval": null,
"links": [
],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "GB",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 4,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"lineColor": "rgb(31, 120, 193)",
"show": true
},
"tableColumn": "",
"targets": [
{
"expr": "sum(container_memory_usage_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}) / 1024^3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
}
],
"thresholds": "",
"title": "Memory",
"tooltip": {
"shared": false
},
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "0",
"value": "null"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "$datasource",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
},
"id": 4,
"interval": null,
"links": [
],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "Bps",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 4,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"lineColor": "rgb(31, 120, 193)",
"show": true
},
"tableColumn": "",
"targets": [
{
"expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod=~\"$statefulset.*\"}[3m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
}
],
"thresholds": "",
"title": "Network",
"tooltip": {
"shared": false
},
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "0",
"value": "null"
}
],
"valueName": "current"
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6",
"type": "row"
},
{
"collapse": false,
"collapsed": false,
"height": "100px",
"panels": [
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "$datasource",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
},
"id": 5,
"interval": null,
"links": [
],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 3,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
}
],
"thresholds": "",
"title": "Desired Replicas",
"tooltip": {
"shared": false
},
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "0",
"value": "null"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "$datasource",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
},
"id": 6,
"interval": null,
"links": [
],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 3,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
}
],
"thresholds": "",
"title": "Replicas of current version",
"tooltip": {
"shared": false
},
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "0",
"value": "null"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "$datasource",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
},
"id": 7,
"interval": null,
"links": [
],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 3,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "max(kube_statefulset_status_observed_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
}
],
"thresholds": "",
"title": "Observed Generation",
"tooltip": {
"shared": false
},
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "0",
"value": "null"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "$datasource",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
},
"id": 8,
"interval": null,
"links": [
],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 3,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "max(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
}
],
"thresholds": "",
"title": "Metadata Generation",
"tooltip": {
"shared": false
},
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "0",
"value": "null"
}
],
"valueName": "current"
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6",
"type": "row"
},
{
"collapse": false,
"collapsed": false,
"panels": [
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
},
"id": 9,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "replicas specified",
"refId": "A"
},
{
"expr": "max(kube_statefulset_status_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "replicas created",
"refId": "B"
},
{
"expr": "min(kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "ready",
"refId": "C"
},
{
"expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "replicas of current version",
"refId": "D"
},
{
"expr": "min(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "updated",
"refId": "E"
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "Replicas",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6",
"type": "row"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [
"kubernetes-mixin"
],
"templating": {
"list": [
{
"current": {
"text": "default",
"value": "default"
},
"hide": 0,
"label": null,
"name": "datasource",
"options": [
],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": null,
"current": {
},
"datasource": "$datasource",
"hide": 2,
"includeAll": false,
"label": "cluster",
"multi": false,
"name": "cluster",
"options": [
],
"query": "label_values(kube_statefulset_metadata_generation, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [
],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "Namespace",
"multi": false,
"name": "namespace",
"options": [
],
"query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [
],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "Name",
"multi": false,
"name": "statefulset",
"options": [
],
"query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, statefulset)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [
],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "Kubernetes / StatefulSets",
"uid": "a31c1f46e6f727cb37c0d731a7245005",
"version": 0
}
kind: ConfigMap
metadata:
name: grafana-dashboard-statefulset
namespace: monitoring
- apiVersion: v1
data:
workload-total.json: |-

View File

@ -5,7 +5,7 @@ data:
"apiVersion": 1,
"providers": [
{
"folder": "",
"folder": "Default",
"name": "0",
"options": {
"path": "/grafana-dashboard-definitions/0"

View File

@ -106,6 +106,9 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/scheduler
name: grafana-dashboard-scheduler
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/statefulset
name: grafana-dashboard-statefulset
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/workload-total
name: grafana-dashboard-workload-total
readOnly: false
@ -187,6 +190,9 @@ spec:
- configMap:
name: grafana-dashboard-scheduler
name: grafana-dashboard-scheduler
- configMap:
name: grafana-dashboard-statefulset
name: grafana-dashboard-statefulset
- configMap:
name: grafana-dashboard-workload-total
name: grafana-dashboard-workload-total

View File

@ -3,7 +3,7 @@ kind: ClusterRole
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.4
app.kubernetes.io/version: 1.9.5
name: kube-state-metrics
rules:
- apiGroups:

View File

@ -3,7 +3,7 @@ kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.4
app.kubernetes.io/version: 1.9.5
name: kube-state-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io

View File

@ -3,7 +3,7 @@ kind: Deployment
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.4
app.kubernetes.io/version: 1.9.5
name: kube-state-metrics
namespace: monitoring
spec:
@ -15,30 +15,38 @@ spec:
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.4
app.kubernetes.io/version: 1.9.5
spec:
containers:
- image: quay.io/coreos/kube-state-metrics:v1.9.4
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 5
timeoutSeconds: 5
- args:
- --host=127.0.0.1
- --port=8081
- --telemetry-host=127.0.0.1
- --telemetry-port=8082
image: quay.io/coreos/kube-state-metrics:v1.9.5
name: kube-state-metrics
ports:
- containerPort: 8080
name: http-metrics
- containerPort: 8081
name: telemetry
readinessProbe:
httpGet:
path: /
port: 8081
initialDelaySeconds: 5
timeoutSeconds: 5
securityContext:
runAsUser: 65534
- args:
- --logtostderr
- --secure-listen-address=:8443
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
- --upstream=http://127.0.0.1:8081/
image: quay.io/coreos/kube-rbac-proxy:v0.4.1
name: kube-rbac-proxy-main
ports:
- containerPort: 8443
name: https-main
- args:
- --logtostderr
- --secure-listen-address=:9443
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
- --upstream=http://127.0.0.1:8082/
image: quay.io/coreos/kube-rbac-proxy:v0.4.1
name: kube-rbac-proxy-self
ports:
- containerPort: 9443
name: https-self
nodeSelector:
kubernetes.io/os: linux
serviceAccountName: kube-state-metrics

View File

@ -3,17 +3,17 @@ kind: Service
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.4
app.kubernetes.io/version: 1.9.5
name: kube-state-metrics
namespace: monitoring
spec:
clusterIP: None
ports:
- name: http-metrics
port: 8080
targetPort: http-metrics
- name: telemetry
port: 8081
targetPort: telemetry
- name: https-main
port: 8443
targetPort: https-main
- name: https-self
port: 9443
targetPort: https-self
selector:
app.kubernetes.io/name: kube-state-metrics

View File

@ -3,6 +3,6 @@ kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.4
app.kubernetes.io/version: 1.9.5
name: kube-state-metrics
namespace: monitoring

View File

@ -3,22 +3,29 @@ kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.4
app.kubernetes.io/version: 1.9.5
name: kube-state-metrics
namespace: monitoring
spec:
endpoints:
- honorLabels: true
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
honorLabels: true
interval: 30s
port: http-metrics
port: https-main
relabelings:
- action: labeldrop
regex: (pod|service|endpoint|namespace)
scheme: https
scrapeTimeout: 30s
- interval: 30s
port: telemetry
tlsConfig:
insecureSkipVerify: true
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 30s
port: https-self
scheme: https
tlsConfig:
insecureSkipVerify: true
jobLabel: app.kubernetes.io/name
selector:
matchLabels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.4

View File

@ -4,7 +4,7 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.36.0
app.kubernetes.io/version: v0.37.0
name: prometheus-operator
namespace: monitoring
spec:
@ -15,4 +15,4 @@ spec:
matchLabels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.36.0
app.kubernetes.io/version: v0.37.0

View File

@ -478,7 +478,7 @@ spec:
summary: Filesystem is predicted to run out of space within the next 4 hours.
expr: |
(
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 20
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 15
and
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
and
@ -963,6 +963,8 @@ spec:
rules:
- alert: ErrorBudgetBurn
annotations:
message: 'High requests error budget burn for job=apiserver (current value:
{{ $value }})'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-errorbudgetburn
expr: |
(
@ -981,6 +983,8 @@ spec:
severity: critical
- alert: ErrorBudgetBurn
annotations:
message: 'High requests error budget burn for job=apiserver (current value:
{{ $value }})'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-errorbudgetburn
expr: |
(
@ -1033,30 +1037,6 @@ spec:
for: 10m
labels:
severity: critical
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{ $value | humanizePercentage
}} of requests.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: |
sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m]))
/
sum(rate(apiserver_request_total{job="apiserver"}[5m])) > 0.03
for: 10m
labels:
severity: critical
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{ $value | humanizePercentage
}} of requests.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: |
sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m]))
/
sum(rate(apiserver_request_total{job="apiserver"}[5m])) > 0.01
for: 10m
labels:
severity: warning
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{ $value | humanizePercentage
@ -1187,7 +1167,7 @@ spec:
on node {{ $labels.node }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh
expr: |
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name > 5
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name > 60
for: 15m
labels:
severity: warning

View File

@ -719,7 +719,7 @@ spec:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, metadata.labels, metadata.annotations,
spec.nodeName, spec.serviceAccountName, status.hostIP,
status.podIP.'
status.podIP, status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
@ -1440,9 +1440,8 @@ spec:
in image metadata if unspecified. May also be set in
PodSecurityContext. If set in both SecurityContext and
PodSecurityContext, the value specified in SecurityContext
takes precedence. This field is alpha-level and it is
only honored by servers that enable the WindowsRunAsUserName
feature flag.
takes precedence. This field is beta-level and may be
disabled with the WindowsRunAsUserName feature flag.
type: string
type: object
type: object
@ -1661,7 +1660,7 @@ spec:
to SubPath but environment variable references $(VAR_NAME)
are expanded using the container's environment. Defaults
to "" (volume's root). SubPathExpr and SubPath are mutually
exclusive. This field is beta in 1.15.
exclusive.
type: string
required:
- mountPath
@ -1787,7 +1786,7 @@ spec:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, metadata.labels, metadata.annotations,
spec.nodeName, spec.serviceAccountName, status.hostIP,
status.podIP.'
status.podIP, status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
@ -2508,9 +2507,8 @@ spec:
in image metadata if unspecified. May also be set in
PodSecurityContext. If set in both SecurityContext and
PodSecurityContext, the value specified in SecurityContext
takes precedence. This field is alpha-level and it is
only honored by servers that enable the WindowsRunAsUserName
feature flag.
takes precedence. This field is beta-level and may be
disabled with the WindowsRunAsUserName feature flag.
type: string
type: object
type: object
@ -2729,7 +2727,7 @@ spec:
to SubPath but environment variable references $(VAR_NAME)
are expanded using the container's environment. Defaults
to "" (volume's root). SubPathExpr and SubPath are mutually
exclusive. This field is beta in 1.15.
exclusive.
type: string
required:
- mountPath
@ -2766,9 +2764,25 @@ spec:
are not goint to be performed, except for delete actions.
type: boolean
podMetadata:
description: 'Standard objects metadata. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#metadata
Metadata Labels and Annotations gets propagated to the prometheus
pods.'
description: PodMetadata configures Labels and Annotations which are
propagated to the alertmanager pods.
properties:
annotations:
additionalProperties:
type: string
description: 'Annotations is an unstructured key value map stored
with a resource that may be set by external tools to store and
retrieve arbitrary metadata. They are not queryable and should
be preserved when modifying objects. More info: http://kubernetes.io/docs/user-guide/annotations'
type: object
labels:
additionalProperties:
type: string
description: 'Map of string keys and values that can be used to
organize and categorize (scope and select) objects. May match
selectors of replication controllers and services. More info:
http://kubernetes.io/docs/user-guide/labels'
type: object
type: object
portName:
description: Port name used for the pods and governing service. This
@ -2933,8 +2947,8 @@ spec:
metadata if unspecified. May also be set in PodSecurityContext.
If set in both SecurityContext and PodSecurityContext, the
value specified in SecurityContext takes precedence. This
field is alpha-level and it is only honored by servers that
enable the WindowsRunAsUserName feature flag.
field is beta-level and may be disabled with the WindowsRunAsUserName
feature flag.
type: string
type: object
type: object
@ -3250,7 +3264,7 @@ spec:
volume should be mounted. Behaves similarly to SubPath but environment
variable references $(VAR_NAME) are expanded using the container's
environment. Defaults to "" (volume's root). SubPathExpr and
SubPath are mutually exclusive. This field is beta in 1.15.
SubPath are mutually exclusive.
type: string
required:
- mountPath

View File

@ -1163,7 +1163,7 @@ spec:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, metadata.labels, metadata.annotations,
spec.nodeName, spec.serviceAccountName, status.hostIP,
status.podIP.'
status.podIP, status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
@ -1884,9 +1884,8 @@ spec:
in image metadata if unspecified. May also be set in
PodSecurityContext. If set in both SecurityContext and
PodSecurityContext, the value specified in SecurityContext
takes precedence. This field is alpha-level and it is
only honored by servers that enable the WindowsRunAsUserName
feature flag.
takes precedence. This field is beta-level and may be
disabled with the WindowsRunAsUserName feature flag.
type: string
type: object
type: object
@ -2105,7 +2104,7 @@ spec:
to SubPath but environment variable references $(VAR_NAME)
are expanded using the container's environment. Defaults
to "" (volume's root). SubPathExpr and SubPath are mutually
exclusive. This field is beta in 1.15.
exclusive.
type: string
required:
- mountPath
@ -2262,7 +2261,7 @@ spec:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, metadata.labels, metadata.annotations,
spec.nodeName, spec.serviceAccountName, status.hostIP,
status.podIP.'
status.podIP, status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
@ -2983,9 +2982,8 @@ spec:
in image metadata if unspecified. May also be set in
PodSecurityContext. If set in both SecurityContext and
PodSecurityContext, the value specified in SecurityContext
takes precedence. This field is alpha-level and it is
only honored by servers that enable the WindowsRunAsUserName
feature flag.
takes precedence. This field is beta-level and may be
disabled with the WindowsRunAsUserName feature flag.
type: string
type: object
type: object
@ -3204,7 +3202,7 @@ spec:
to SubPath but environment variable references $(VAR_NAME)
are expanded using the container's environment. Defaults
to "" (volume's root). SubPathExpr and SubPath are mutually
exclusive. This field is beta in 1.15.
exclusive.
type: string
required:
- mountPath
@ -3249,9 +3247,25 @@ spec:
for deletion will be performed on the underlying objects.
type: boolean
podMetadata:
description: 'Standard objects metadata. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#metadata
Metadata Labels and Annotations gets propagated to the prometheus
pods.'
description: PodMetadata configures Labels and Annotations which are
propagated to the prometheus pods.
properties:
annotations:
additionalProperties:
type: string
description: 'Annotations is an unstructured key value map stored
with a resource that may be set by external tools to store and
retrieve arbitrary metadata. They are not queryable and should
be preserved when modifying objects. More info: http://kubernetes.io/docs/user-guide/annotations'
type: object
labels:
additionalProperties:
type: string
description: 'Map of string keys and values that can be used to
organize and categorize (scope and select) objects. May match
selectors of replication controllers and services. More info:
http://kubernetes.io/docs/user-guide/labels'
type: object
type: object
podMonitorNamespaceSelector:
description: Namespaces to be selected for PodMonitor discovery. If
@ -4133,8 +4147,8 @@ spec:
metadata if unspecified. May also be set in PodSecurityContext.
If set in both SecurityContext and PodSecurityContext, the
value specified in SecurityContext takes precedence. This
field is alpha-level and it is only honored by servers that
enable the WindowsRunAsUserName feature flag.
field is beta-level and may be disabled with the WindowsRunAsUserName
feature flag.
type: string
type: object
type: object

View File

@ -33,6 +33,578 @@ spec:
description: 'Specification of the desired behavior of the ThanosRuler cluster.
More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#spec-and-status'
properties:
affinity:
description: If specified, the pod's scheduling constraints.
properties:
nodeAffinity:
description: Describes node affinity scheduling rules for the pod.
properties:
preferredDuringSchedulingIgnoredDuringExecution:
description: The scheduler will prefer to schedule pods to nodes
that satisfy the affinity expressions specified by this field,
but it may choose a node that violates one or more of the
expressions. The node that is most preferred is the one with
the greatest sum of weights, i.e. for each node that meets
all of the scheduling requirements (resource request, requiredDuringScheduling
affinity expressions, etc.), compute a sum by iterating through
the elements of this field and adding "weight" to the sum
if the node matches the corresponding matchExpressions; the
node(s) with the highest sum are the most preferred.
items:
description: An empty preferred scheduling term matches all
objects with implicit weight 0 (i.e. it's a no-op). A null
preferred scheduling term matches no objects (i.e. is also
a no-op).
properties:
preference:
description: A node selector term, associated with the
corresponding weight.
properties:
matchExpressions:
description: A list of node selector requirements
by node's labels.
items:
description: A node selector requirement is a selector
that contains values, a key, and an operator that
relates the key and values.
properties:
key:
description: The label key that the selector
applies to.
type: string
operator:
description: Represents a key's relationship
to a set of values. Valid operators are In,
NotIn, Exists, DoesNotExist. Gt, and Lt.
type: string
values:
description: An array of string values. If the
operator is In or NotIn, the values array
must be non-empty. If the operator is Exists
or DoesNotExist, the values array must be
empty. If the operator is Gt or Lt, the values
array must have a single element, which will
be interpreted as an integer. This array is
replaced during a strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchFields:
description: A list of node selector requirements
by node's fields.
items:
description: A node selector requirement is a selector
that contains values, a key, and an operator that
relates the key and values.
properties:
key:
description: The label key that the selector
applies to.
type: string
operator:
description: Represents a key's relationship
to a set of values. Valid operators are In,
NotIn, Exists, DoesNotExist. Gt, and Lt.
type: string
values:
description: An array of string values. If the
operator is In or NotIn, the values array
must be non-empty. If the operator is Exists
or DoesNotExist, the values array must be
empty. If the operator is Gt or Lt, the values
array must have a single element, which will
be interpreted as an integer. This array is
replaced during a strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
type: object
weight:
description: Weight associated with matching the corresponding
nodeSelectorTerm, in the range 1-100.
format: int32
type: integer
required:
- preference
- weight
type: object
type: array
requiredDuringSchedulingIgnoredDuringExecution:
description: If the affinity requirements specified by this
field are not met at scheduling time, the pod will not be
scheduled onto the node. If the affinity requirements specified
by this field cease to be met at some point during pod execution
(e.g. due to an update), the system may or may not try to
eventually evict the pod from its node.
properties:
nodeSelectorTerms:
description: Required. A list of node selector terms. The
terms are ORed.
items:
description: A null or empty node selector term matches
no objects. The requirements of them are ANDed. The
TopologySelectorTerm type implements a subset of the
NodeSelectorTerm.
properties:
matchExpressions:
description: A list of node selector requirements
by node's labels.
items:
description: A node selector requirement is a selector
that contains values, a key, and an operator that
relates the key and values.
properties:
key:
description: The label key that the selector
applies to.
type: string
operator:
description: Represents a key's relationship
to a set of values. Valid operators are In,
NotIn, Exists, DoesNotExist. Gt, and Lt.
type: string
values:
description: An array of string values. If the
operator is In or NotIn, the values array
must be non-empty. If the operator is Exists
or DoesNotExist, the values array must be
empty. If the operator is Gt or Lt, the values
array must have a single element, which will
be interpreted as an integer. This array is
replaced during a strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchFields:
description: A list of node selector requirements
by node's fields.
items:
description: A node selector requirement is a selector
that contains values, a key, and an operator that
relates the key and values.
properties:
key:
description: The label key that the selector
applies to.
type: string
operator:
description: Represents a key's relationship
to a set of values. Valid operators are In,
NotIn, Exists, DoesNotExist. Gt, and Lt.
type: string
values:
description: An array of string values. If the
operator is In or NotIn, the values array
must be non-empty. If the operator is Exists
or DoesNotExist, the values array must be
empty. If the operator is Gt or Lt, the values
array must have a single element, which will
be interpreted as an integer. This array is
replaced during a strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
type: object
type: array
required:
- nodeSelectorTerms
type: object
type: object
podAffinity:
description: Describes pod affinity scheduling rules (e.g. co-locate
this pod in the same node, zone, etc. as some other pod(s)).
properties:
preferredDuringSchedulingIgnoredDuringExecution:
description: The scheduler will prefer to schedule pods to nodes
that satisfy the affinity expressions specified by this field,
but it may choose a node that violates one or more of the
expressions. The node that is most preferred is the one with
the greatest sum of weights, i.e. for each node that meets
all of the scheduling requirements (resource request, requiredDuringScheduling
affinity expressions, etc.), compute a sum by iterating through
the elements of this field and adding "weight" to the sum
if the node has pods which matches the corresponding podAffinityTerm;
the node(s) with the highest sum are the most preferred.
items:
description: The weights of all of the matched WeightedPodAffinityTerm
fields are added per-node to find the most preferred node(s)
properties:
podAffinityTerm:
description: Required. A pod affinity term, associated
with the corresponding weight.
properties:
labelSelector:
description: A label query over a set of resources,
in this case pods.
properties:
matchExpressions:
description: matchExpressions is a list of label
selector requirements. The requirements are
ANDed.
items:
description: A label selector requirement is
a selector that contains values, a key, and
an operator that relates the key and values.
properties:
key:
description: key is the label key that the
selector applies to.
type: string
operator:
description: operator represents a key's
relationship to a set of values. Valid
operators are In, NotIn, Exists and DoesNotExist.
type: string
values:
description: values is an array of string
values. If the operator is In or NotIn,
the values array must be non-empty. If
the operator is Exists or DoesNotExist,
the values array must be empty. This array
is replaced during a strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value}
pairs. A single {key,value} in the matchLabels
map is equivalent to an element of matchExpressions,
whose key field is "key", the operator is "In",
and the values array contains only "value".
The requirements are ANDed.
type: object
type: object
namespaces:
description: namespaces specifies which namespaces
the labelSelector applies to (matches against);
null or empty list means "this pod's namespace"
items:
type: string
type: array
topologyKey:
description: This pod should be co-located (affinity)
or not co-located (anti-affinity) with the pods
matching the labelSelector in the specified namespaces,
where co-located is defined as running on a node
whose value of the label with key topologyKey matches
that of any node on which any of the selected pods
is running. Empty topologyKey is not allowed.
type: string
required:
- topologyKey
type: object
weight:
description: weight associated with matching the corresponding
podAffinityTerm, in the range 1-100.
format: int32
type: integer
required:
- podAffinityTerm
- weight
type: object
type: array
requiredDuringSchedulingIgnoredDuringExecution:
description: If the affinity requirements specified by this
field are not met at scheduling time, the pod will not be
scheduled onto the node. If the affinity requirements specified
by this field cease to be met at some point during pod execution
(e.g. due to a pod label update), the system may or may not
try to eventually evict the pod from its node. When there
are multiple elements, the lists of nodes corresponding to
each podAffinityTerm are intersected, i.e. all terms must
be satisfied.
items:
description: Defines a set of pods (namely those matching
the labelSelector relative to the given namespace(s)) that
this pod should be co-located (affinity) or not co-located
(anti-affinity) with, where co-located is defined as running
on a node whose value of the label with key <topologyKey>
matches that of any node on which a pod of the set of pods
is running
properties:
labelSelector:
description: A label query over a set of resources, in
this case pods.
properties:
matchExpressions:
description: matchExpressions is a list of label selector
requirements. The requirements are ANDed.
items:
description: A label selector requirement is a selector
that contains values, a key, and an operator that
relates the key and values.
properties:
key:
description: key is the label key that the selector
applies to.
type: string
operator:
description: operator represents a key's relationship
to a set of values. Valid operators are In,
NotIn, Exists and DoesNotExist.
type: string
values:
description: values is an array of string values.
If the operator is In or NotIn, the values
array must be non-empty. If the operator is
Exists or DoesNotExist, the values array must
be empty. This array is replaced during a
strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value} pairs.
A single {key,value} in the matchLabels map is equivalent
to an element of matchExpressions, whose key field
is "key", the operator is "In", and the values array
contains only "value". The requirements are ANDed.
type: object
type: object
namespaces:
description: namespaces specifies which namespaces the
labelSelector applies to (matches against); null or
empty list means "this pod's namespace"
items:
type: string
type: array
topologyKey:
description: This pod should be co-located (affinity)
or not co-located (anti-affinity) with the pods matching
the labelSelector in the specified namespaces, where
co-located is defined as running on a node whose value
of the label with key topologyKey matches that of any
node on which any of the selected pods is running. Empty
topologyKey is not allowed.
type: string
required:
- topologyKey
type: object
type: array
type: object
podAntiAffinity:
description: Describes pod anti-affinity scheduling rules (e.g.
avoid putting this pod in the same node, zone, etc. as some other
pod(s)).
properties:
preferredDuringSchedulingIgnoredDuringExecution:
description: The scheduler will prefer to schedule pods to nodes
that satisfy the anti-affinity expressions specified by this
field, but it may choose a node that violates one or more
of the expressions. The node that is most preferred is the
one with the greatest sum of weights, i.e. for each node that
meets all of the scheduling requirements (resource request,
requiredDuringScheduling anti-affinity expressions, etc.),
compute a sum by iterating through the elements of this field
and adding "weight" to the sum if the node has pods which
matches the corresponding podAffinityTerm; the node(s) with
the highest sum are the most preferred.
items:
description: The weights of all of the matched WeightedPodAffinityTerm
fields are added per-node to find the most preferred node(s)
properties:
podAffinityTerm:
description: Required. A pod affinity term, associated
with the corresponding weight.
properties:
labelSelector:
description: A label query over a set of resources,
in this case pods.
properties:
matchExpressions:
description: matchExpressions is a list of label
selector requirements. The requirements are
ANDed.
items:
description: A label selector requirement is
a selector that contains values, a key, and
an operator that relates the key and values.
properties:
key:
description: key is the label key that the
selector applies to.
type: string
operator:
description: operator represents a key's
relationship to a set of values. Valid
operators are In, NotIn, Exists and DoesNotExist.
type: string
values:
description: values is an array of string
values. If the operator is In or NotIn,
the values array must be non-empty. If
the operator is Exists or DoesNotExist,
the values array must be empty. This array
is replaced during a strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value}
pairs. A single {key,value} in the matchLabels
map is equivalent to an element of matchExpressions,
whose key field is "key", the operator is "In",
and the values array contains only "value".
The requirements are ANDed.
type: object
type: object
namespaces:
description: namespaces specifies which namespaces
the labelSelector applies to (matches against);
null or empty list means "this pod's namespace"
items:
type: string
type: array
topologyKey:
description: This pod should be co-located (affinity)
or not co-located (anti-affinity) with the pods
matching the labelSelector in the specified namespaces,
where co-located is defined as running on a node
whose value of the label with key topologyKey matches
that of any node on which any of the selected pods
is running. Empty topologyKey is not allowed.
type: string
required:
- topologyKey
type: object
weight:
description: weight associated with matching the corresponding
podAffinityTerm, in the range 1-100.
format: int32
type: integer
required:
- podAffinityTerm
- weight
type: object
type: array
requiredDuringSchedulingIgnoredDuringExecution:
description: If the anti-affinity requirements specified by
this field are not met at scheduling time, the pod will not
be scheduled onto the node. If the anti-affinity requirements
specified by this field cease to be met at some point during
pod execution (e.g. due to a pod label update), the system
may or may not try to eventually evict the pod from its node.
When there are multiple elements, the lists of nodes corresponding
to each podAffinityTerm are intersected, i.e. all terms must
be satisfied.
items:
description: Defines a set of pods (namely those matching
the labelSelector relative to the given namespace(s)) that
this pod should be co-located (affinity) or not co-located
(anti-affinity) with, where co-located is defined as running
on a node whose value of the label with key <topologyKey>
matches that of any node on which a pod of the set of pods
is running
properties:
labelSelector:
description: A label query over a set of resources, in
this case pods.
properties:
matchExpressions:
description: matchExpressions is a list of label selector
requirements. The requirements are ANDed.
items:
description: A label selector requirement is a selector
that contains values, a key, and an operator that
relates the key and values.
properties:
key:
description: key is the label key that the selector
applies to.
type: string
operator:
description: operator represents a key's relationship
to a set of values. Valid operators are In,
NotIn, Exists and DoesNotExist.
type: string
values:
description: values is an array of string values.
If the operator is In or NotIn, the values
array must be non-empty. If the operator is
Exists or DoesNotExist, the values array must
be empty. This array is replaced during a
strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value} pairs.
A single {key,value} in the matchLabels map is equivalent
to an element of matchExpressions, whose key field
is "key", the operator is "In", and the values array
contains only "value". The requirements are ANDed.
type: object
type: object
namespaces:
description: namespaces specifies which namespaces the
labelSelector applies to (matches against); null or
empty list means "this pod's namespace"
items:
type: string
type: array
topologyKey:
description: This pod should be co-located (affinity)
or not co-located (anti-affinity) with the pods matching
the labelSelector in the specified namespaces, where
co-located is defined as running on a node whose value
of the label with key topologyKey matches that of any
node on which any of the selected pods is running. Empty
topologyKey is not allowed.
type: string
required:
- topologyKey
type: object
type: array
type: object
type: object
alertDropLabels:
description: AlertDropLabels configure the label names which should
be dropped in ThanosRuler alerts. If `labels` field is not provided,
@ -150,7 +722,7 @@ spec:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, metadata.labels, metadata.annotations,
spec.nodeName, spec.serviceAccountName, status.hostIP,
status.podIP.'
status.podIP, status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
@ -871,9 +1443,8 @@ spec:
in image metadata if unspecified. May also be set in
PodSecurityContext. If set in both SecurityContext and
PodSecurityContext, the value specified in SecurityContext
takes precedence. This field is alpha-level and it is
only honored by servers that enable the WindowsRunAsUserName
feature flag.
takes precedence. This field is beta-level and may be
disabled with the WindowsRunAsUserName feature flag.
type: string
type: object
type: object
@ -1092,7 +1663,7 @@ spec:
to SubPath but environment variable references $(VAR_NAME)
are expanded using the container's environment. Defaults
to "" (volume's root). SubPathExpr and SubPath are mutually
exclusive. This field is beta in 1.15.
exclusive.
type: string
required:
- mountPath
@ -1116,6 +1687,11 @@ spec:
evaluationInterval:
description: Interval between consecutive evaluations.
type: string
externalPrefix:
description: The external URL the Thanos Ruler instances will be available
under. This is necessary to generate correct URLs. This is necessary
if Thanos Ruler is not served from root of a DNS name.
type: string
image:
description: Thanos container image URL.
type: string
@ -1217,7 +1793,7 @@ spec:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, metadata.labels, metadata.annotations,
spec.nodeName, spec.serviceAccountName, status.hostIP,
status.podIP.'
status.podIP, status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath
@ -1938,9 +2514,8 @@ spec:
in image metadata if unspecified. May also be set in
PodSecurityContext. If set in both SecurityContext and
PodSecurityContext, the value specified in SecurityContext
takes precedence. This field is alpha-level and it is
only honored by servers that enable the WindowsRunAsUserName
feature flag.
takes precedence. This field is beta-level and may be
disabled with the WindowsRunAsUserName feature flag.
type: string
type: object
type: object
@ -2159,7 +2734,7 @@ spec:
to SubPath but environment variable references $(VAR_NAME)
are expanded using the container's environment. Defaults
to "" (volume's root). SubPathExpr and SubPath are mutually
exclusive. This field is beta in 1.15.
exclusive.
type: string
required:
- mountPath
@ -2192,6 +2767,11 @@ spec:
logLevel:
description: Log level for ThanosRuler to be configured with.
type: string
nodeSelector:
additionalProperties:
type: string
description: Define which Nodes the Pods are scheduled on.
type: object
objectStorageConfig:
description: ObjectStorageConfig configures object storage in Thanos.
properties:
@ -2214,14 +2794,33 @@ spec:
for deletion will be performed on the underlying objects.
type: boolean
podMetadata:
description: 'Standard objects metadata. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#metadata
Metadata Labels and Annotations gets propagated to the prometheus
pods.'
description: PodMetadata contains Labels and Annotations gets propagated
to the thanos ruler pods.
properties:
annotations:
additionalProperties:
type: string
description: 'Annotations is an unstructured key value map stored
with a resource that may be set by external tools to store and
retrieve arbitrary metadata. They are not queryable and should
be preserved when modifying objects. More info: http://kubernetes.io/docs/user-guide/annotations'
type: object
labels:
additionalProperties:
type: string
description: 'Map of string keys and values that can be used to
organize and categorize (scope and select) objects. May match
selectors of replication controllers and services. More info:
http://kubernetes.io/docs/user-guide/labels'
type: object
type: object
portName:
description: Port name used for the pods and governing service. This
defaults to web
type: string
priorityClassName:
description: Priority class assigned to the Pods
type: string
queryEndpoints:
description: QueryEndpoints defines Thanos querier endpoints from which
to query metrics. Maps to the --query flag of thanos ruler.
@ -2233,8 +2832,8 @@ spec:
format: int32
type: integer
resources:
description: Resources defines the resource requirements for the Thanos
sidecar. If not provided, no requests/limits will be set
description: Resources defines the resource requirements for single
Pods. If not provided, no requests/limits will be set
properties:
limits:
additionalProperties:
@ -2256,6 +2855,10 @@ spec:
is '24h', and must match the regular expression `[0-9]+(ms|s|m|h|d|w|y)`
(milliseconds seconds minutes hours days weeks years).
type: string
routePrefix:
description: The route prefix ThanosRuler registers HTTP handlers for.
This allows thanos UI to be served on a sub-path.
type: string
ruleNamespaceSelector:
description: Namespaces to be selected for Rules discovery. If unspecified,
only the same namespace as the ThanosRuler object is in is used.
@ -2344,6 +2947,128 @@ spec:
are ANDed.
type: object
type: object
securityContext:
description: SecurityContext holds pod-level security attributes and
common container settings. This defaults to the default PodSecurityContext.
properties:
fsGroup:
description: "A special supplemental group that applies to all containers
in a pod. Some volume types allow the Kubelet to change the ownership
of that volume to be owned by the pod: \n 1. The owning GID will
be the FSGroup 2. The setgid bit is set (new files created in
the volume will be owned by FSGroup) 3. The permission bits are
OR'd with rw-rw---- \n If unset, the Kubelet will not modify the
ownership and permissions of any volume."
format: int64
type: integer
runAsGroup:
description: The GID to run the entrypoint of the container process.
Uses runtime default if unset. May also be set in SecurityContext. If
set in both SecurityContext and PodSecurityContext, the value
specified in SecurityContext takes precedence for that container.
format: int64
type: integer
runAsNonRoot:
description: Indicates that the container must run as a non-root
user. If true, the Kubelet will validate the image at runtime
to ensure that it does not run as UID 0 (root) and fail to start
the container if it does. If unset or false, no such validation
will be performed. May also be set in SecurityContext. If set
in both SecurityContext and PodSecurityContext, the value specified
in SecurityContext takes precedence.
type: boolean
runAsUser:
description: The UID to run the entrypoint of the container process.
Defaults to user specified in image metadata if unspecified. May
also be set in SecurityContext. If set in both SecurityContext
and PodSecurityContext, the value specified in SecurityContext
takes precedence for that container.
format: int64
type: integer
seLinuxOptions:
description: The SELinux context to be applied to all containers.
If unspecified, the container runtime will allocate a random SELinux
context for each container. May also be set in SecurityContext. If
set in both SecurityContext and PodSecurityContext, the value
specified in SecurityContext takes precedence for that container.
properties:
level:
description: Level is SELinux level label that applies to the
container.
type: string
role:
description: Role is a SELinux role label that applies to the
container.
type: string
type:
description: Type is a SELinux type label that applies to the
container.
type: string
user:
description: User is a SELinux user label that applies to the
container.
type: string
type: object
supplementalGroups:
description: A list of groups applied to the first process run in
each container, in addition to the container's primary GID. If
unspecified, no groups will be added to any container.
items:
format: int64
type: integer
type: array
sysctls:
description: Sysctls hold a list of namespaced sysctls used for
the pod. Pods with unsupported sysctls (by the container runtime)
might fail to launch.
items:
description: Sysctl defines a kernel parameter to be set
properties:
name:
description: Name of a property to set
type: string
value:
description: Value of a property to set
type: string
required:
- name
- value
type: object
type: array
windowsOptions:
description: The Windows specific settings applied to all containers.
If unspecified, the options within a container's SecurityContext
will be used. If set in both SecurityContext and PodSecurityContext,
the value specified in SecurityContext takes precedence.
properties:
gmsaCredentialSpec:
description: GMSACredentialSpec is where the GMSA admission
webhook (https://github.com/kubernetes-sigs/windows-gmsa)
inlines the contents of the GMSA credential spec named by
the GMSACredentialSpecName field. This field is alpha-level
and is only honored by servers that enable the WindowsGMSA
feature flag.
type: string
gmsaCredentialSpecName:
description: GMSACredentialSpecName is the name of the GMSA
credential spec to use. This field is alpha-level and is only
honored by servers that enable the WindowsGMSA feature flag.
type: string
runAsUserName:
description: The UserName in Windows to run the entrypoint of
the container process. Defaults to the user specified in image
metadata if unspecified. May also be set in PodSecurityContext.
If set in both SecurityContext and PodSecurityContext, the
value specified in SecurityContext takes precedence. This
field is beta-level and may be disabled with the WindowsRunAsUserName
feature flag.
type: string
type: object
type: object
serviceAccountName:
description: ServiceAccountName is the name of the ServiceAccount to
use to run the Thanos Ruler Pods.
type: string
storage:
description: Storage spec to specify how storage shall be used.
properties:
@ -2564,6 +3289,46 @@ spec:
type: object
type: object
type: object
tolerations:
description: If specified, the pod's tolerations.
items:
description: The pod this Toleration is attached to tolerates any
taint that matches the triple <key,value,effect> using the matching
operator <operator>.
properties:
effect:
description: Effect indicates the taint effect to match. Empty
means match all taint effects. When specified, allowed values
are NoSchedule, PreferNoSchedule and NoExecute.
type: string
key:
description: Key is the taint key that the toleration applies
to. Empty means match all taint keys. If the key is empty, operator
must be Exists; this combination means to match all values and
all keys.
type: string
operator:
description: Operator represents a key's relationship to the value.
Valid operators are Exists and Equal. Defaults to Equal. Exists
is equivalent to wildcard for value, so that a pod can tolerate
all taints of a particular category.
type: string
tolerationSeconds:
description: TolerationSeconds represents the period of time the
toleration (which must be of effect NoExecute, otherwise this
field is ignored) tolerates the taint. By default, it is not
set, which means tolerate the taint forever (do not evict).
Zero and negative values will be treated as 0 (evict immediately)
by the system.
format: int64
type: integer
value:
description: Value is the taint value the toleration matches to.
If the operator is Exists, the value should be empty, otherwise
just a regular string.
type: string
type: object
type: array
tracingConfig:
description: TracingConfig configures tracing in Thanos. This is an
experimental feature, it may change in any upcoming release in a breaking

View File

@ -4,7 +4,7 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.36.0
app.kubernetes.io/version: v0.37.0
name: prometheus-operator
rules:
- apiGroups:

View File

@ -4,7 +4,7 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.36.0
app.kubernetes.io/version: v0.37.0
name: prometheus-operator
roleRef:
apiGroup: rbac.authorization.k8s.io

View File

@ -4,7 +4,7 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.36.0
app.kubernetes.io/version: v0.37.0
name: prometheus-operator
namespace: monitoring
spec:
@ -18,15 +18,15 @@ spec:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.36.0
app.kubernetes.io/version: v0.37.0
spec:
containers:
- args:
- --kubelet-service=kube-system/kubelet
- --logtostderr=true
- --config-reloader-image=jimmidyson/configmap-reload:v0.3.0
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.36.0
image: quay.io/coreos/prometheus-operator:v0.36.0
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.37.0
image: quay.io/coreos/prometheus-operator:v0.37.0
name: prometheus-operator
ports:
- containerPort: 8080

View File

@ -4,7 +4,7 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.36.0
app.kubernetes.io/version: v0.37.0
name: prometheus-operator
namespace: monitoring
spec:

View File

@ -4,6 +4,6 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.36.0
app.kubernetes.io/version: v0.37.0
name: prometheus-operator
namespace: monitoring

View File

@ -1,14 +0,0 @@
{
apiVersion: 1,
providers: [
{
name: '0',
orgId: 1,
folder: '',
type: 'file',
options: {
path: '/grafana-dashboard-definitions/0',
},
},
],
}

View File

@ -20,6 +20,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
grafana+:: {
dashboards: {},
rawDashboards: {},
folderDashboards: {},
datasources: [{
name: 'prometheus',
type: 'prometheus',
@ -57,6 +58,13 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
configMap.mixin.metadata.withNamespace($._config.namespace)
for name in std.objectFields($._config.grafana.dashboards)
] + [
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', '');
configMap.new(dashboardName, { [name]: std.manifestJsonEx($._config.grafana.folderDashboards[folder][name], ' ') }) +
configMap.mixin.metadata.withNamespace($._config.namespace)
for folder in std.objectFields($._config.grafana.folderDashboards)
for name in std.objectFields($._config.grafana.folderDashboards[folder])
] + if std.length($._config.grafana.rawDashboards) > 0 then
[
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', '');
@ -67,7 +75,31 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
] else [],
dashboardSources:
local configMap = k.core.v1.configMap;
local dashboardSources = import 'configs/dashboard-sources/dashboards.libsonnet';
local dashboardSources = {
apiVersion: 1,
providers: [
{
name: '0',
orgId: 1,
folder: 'Default',
type: 'file',
options: {
path: '/grafana-dashboard-definitions/0',
},
},
] + [
{
name: folder,
orgId: 1,
folder: folder,
type: 'file',
options: {
path: '/grafana-dashboard-definitions/' + folder,
},
}
for folder in std.objectFields($._config.grafana.folderDashboards)
],
};
configMap.new('grafana-dashboards', { 'dashboards.yaml': std.manifestJsonEx(dashboardSources, ' ') }) +
configMap.mixin.metadata.withNamespace($._config.namespace),
@ -134,6 +166,12 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
containerVolumeMount.new('grafana-dashboard-' + dashboardName, '/grafana-dashboard-definitions/0/' + dashboardName)
for name in std.objectFields($._config.grafana.dashboards)
] +
[
local dashboardName = std.strReplace(name, '.json', '');
containerVolumeMount.new('grafana-dashboard-' + dashboardName, '/grafana-dashboard-definitions/' + folder + '/' + dashboardName)
for folder in std.objectFields($._config.grafana.folderDashboards)
for name in std.objectFields($._config.grafana.folderDashboards[folder])
] +
[
local dashboardName = std.strReplace(name, '.json', '');
containerVolumeMount.new('grafana-dashboard-' + dashboardName, '/grafana-dashboard-definitions/0/' + dashboardName)
@ -154,6 +192,13 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
volume.mixin.configMap.withName(dashboardName)
for name in std.objectFields($._config.grafana.dashboards)
] +
[
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', '');
volume.withName(dashboardName) +
volume.mixin.configMap.withName(dashboardName)
for folder in std.objectFields($._config.grafana.folderDashboards)
for name in std.objectFields($._config.grafana.folderDashboards[folder])
] +
[
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', '');
volume.withName(dashboardName) +

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -38,7 +38,7 @@
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "release-0.36"
"version": "release-0.37"
},
{
"name": "etcd-mixin",

View File

@ -0,0 +1,189 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
prometheus+: {
serviceWeaveNet:
service.new('weave-net', { 'k8s-app': 'weave-net' }, servicePort.newNamed('weave-net-metrics', 6782, 6782)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'weave-net' }) +
service.mixin.spec.withClusterIp('None'),
serviceMonitorWeaveNet: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'weave-net',
labels: {
'k8s-app': 'weave-net',
},
namespace: 'monitoring',
},
spec: {
jobLabel: 'k8s-app',
endpoints: [
{
port: 'weave-metrics',
path: '/metrics',
interval: '15s',
},
],
namespaceSelector: {
matchNames: [
'kube-system',
],
},
selector: {
matchLabels: {
'k8s-app': 'weave-net',
},
},
},
},
},
prometheusRules+: {
groups+: [
{
name: 'weave-net',
rules: [
{
alert: 'WeaveNetIPAMSplitBrain',
expr: 'max(weave_ipam_unreachable_percentage) - min(weave_ipam_unreachable_percentage) > 0',
'for': '3m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'Percentage of all IP addresses owned by unreachable peers is not same for every node.',
description: 'actionable: Weave Net network has a split brain problem. Please find the problem and fix it.',
},
},
{
alert: 'WeaveNetIPAMUnreachable',
expr: 'weave_ipam_unreachable_percentage > 25',
'for': '10m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'Percentage of all IP addresses owned by unreachable peers is above threshold.',
description: 'actionable: Please find the problem and fix it.',
},
},
{
alert: 'WeaveNetIPAMPendingAllocates',
expr: 'sum(weave_ipam_pending_allocates) > 0',
'for': '3m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'Number of pending allocates is above the threshold.',
description: 'actionable: Please find the problem and fix it.',
},
},
{
alert: 'WeaveNetIPAMPendingClaims',
expr: 'sum(weave_ipam_pending_claims) > 0',
'for': '3m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'Number of pending claims is above the threshold.',
description: 'actionable: Please find the problem and fix it.',
},
},
{
alert: 'WeaveNetFastDPFlowsLow',
expr: 'sum(weave_flows) < 15000',
'for': '3m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'Number of FastDP flows is below the threshold.',
description: 'actionable: Please find the reason for FastDP flows to go below the threshold and fix it.',
},
},
{
alert: 'WeaveNetFastDPFlowsOff',
expr: 'sum(weave_flows == bool 0) > 0',
'for': '3m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'FastDP flows is zero.',
description: 'actionable: Please find the reason for FastDP flows to be off and fix it.',
},
},
{
alert: 'WeaveNetHighConnectionTerminationRate',
expr: 'rate(weave_connection_terminations_total[5m]) > 0.1',
'for': '5m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'A lot of connections are getting terminated.',
description: 'actionable: Please find the reason for the high connection termination rate and fix it.',
},
},
{
alert: 'WeaveNetConnectionsConnecting',
expr: 'sum(weave_connections{state="connecting"}) > 0',
'for': '3m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'A lot of connections are in connecting state.',
description: 'actionable: Please find the reason for this and fix it.',
},
},
{
alert: 'WeaveNetConnectionsRetying',
expr: 'sum(weave_connections{state="retrying"}) > 0',
'for': '3m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'A lot of connections are in retrying state.',
description: 'actionable: Please find the reason for this and fix it.',
},
},
{
alert: 'WeaveNetConnectionsPending',
expr: 'sum(weave_connections{state="pending"}) > 0',
'for': '3m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'A lot of connections are in pending state.',
description: 'actionable: Please find the reason for this and fix it.',
},
},
{
alert: 'WeaveNetConnectionsFailed',
expr: 'sum(weave_connections{state="failed"}) > 0',
'for': '3m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'A lot of connections are in failed state.',
description: 'actionable: Please find the reason and fix it.',
},
},
],
},
],
},
grafanaDashboards+:: {
'weave-net.json': (import 'grafana-weave-net.json'),
'weave-net-cluster.json': (import 'grafana-weave-net-cluster.json'),
},
}

View File

@ -116,6 +116,7 @@ local configMapList = k3.core.v1.configMapList;
kubeletSelector: 'job="kubelet", metrics_path="/metrics"',
kubeStateMetricsSelector: 'job="kube-state-metrics"',
nodeExporterSelector: 'job="node-exporter"',
fsSpaceFillingUpCriticalThreshold: 15,
notKubeDnsSelector: 'job!="kube-dns"',
kubeSchedulerSelector: 'job="kube-scheduler"',
kubeControllerManagerSelector: 'job="kube-controller-manager"',

View File

@ -0,0 +1,90 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local deployment = k.apps.v1.deployment;
local container = deployment.mixin.spec.template.spec.containersType;
local containerPort = container.portsType;
{
local krp = self,
config+:: {
kubeRbacProxy: {
image: error 'must provide image',
name: error 'must provide name',
securePortName: error 'must provide securePortName',
securePort: error 'must provide securePort',
secureListenAddress: error 'must provide secureListenAddress',
upstream: error 'must provide upstream',
tlsCipherSuites: error 'must provide tlsCipherSuites',
},
},
specMixin:: {
local sm = self,
config+:: {
kubeRbacProxy: {
image: error 'must provide image',
name: error 'must provide name',
securePortName: error 'must provide securePortName',
securePort: error 'must provide securePort',
secureListenAddress: error 'must provide secureListenAddress',
upstream: error 'must provide upstream',
tlsCipherSuites: error 'must provide tlsCipherSuites',
},
},
spec+: {
template+: {
spec+: {
containers+: [
container.new(krp.config.kubeRbacProxy.name, krp.config.kubeRbacProxy.image) +
container.withArgs([
'--logtostderr',
'--secure-listen-address=' + krp.config.kubeRbacProxy.secureListenAddress,
'--tls-cipher-suites=' + std.join(',', krp.config.kubeRbacProxy.tlsCipherSuites),
'--upstream=' + krp.config.kubeRbacProxy.upstream,
]) +
container.withPorts(containerPort.newNamed(krp.config.kubeRbacProxy.securePort, krp.config.kubeRbacProxy.securePortName)),
],
},
},
},
},
deploymentMixin:: {
local dm = self,
config+:: {
kubeRbacProxy: {
image: error 'must provide image',
name: error 'must provide name',
securePortName: error 'must provide securePortName',
securePort: error 'must provide securePort',
secureListenAddress: error 'must provide secureListenAddress',
upstream: error 'must provide upstream',
tlsCipherSuites: error 'must provide tlsCipherSuites',
},
},
deployment+: krp.specMixin {
config+:: {
kubeRbacProxy+: dm.config.kubeRbacProxy,
},
},
},
statefulSetMixin:: {
local sm = self,
config+:: {
kubeRbacProxy: {
image: error 'must provide image',
name: error 'must provide name',
securePortName: error 'must provide securePortName',
securePort: error 'must provide securePort',
secureListenAddress: error 'must provide secureListenAddress',
upstream: error 'must provide upstream',
tlsCipherSuites: error 'must provide tlsCipherSuites',
},
},
statefulSet+: krp.specMixin {
config+:: {
kubeRbacProxy+: sm.config.kubeRbacProxy,
},
},
},
}

View File

@ -1,43 +1,123 @@
{
_config+:: {
kubeStateMetrics+:: {
scrapeInterval: '30s',
scrapeTimeout: '30s',
},
},
kubeStateMetrics+:: (import 'kube-state-metrics/kube-state-metrics.libsonnet') +
{
local ksm = self,
name:: 'kube-state-metrics',
namespace:: 'monitoring',
version:: '1.9.4', //$._config.versions.kubeStateMetrics,
version:: '1.9.5', //$._config.versions.kubeStateMetrics,
image:: 'quay.io/coreos/kube-state-metrics:v' + ksm.version,
serviceMonitor: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: ksm.name,
namespace: ksm.namespace,
labels: ksm.commonLabels,
},
spec: {
jobLabel: 'app.kubernetes.io/name',
selector: {
matchLabels: ksm.commonLabels,
},
endpoints: [
service+: {
spec+: {
ports: [
{
port: 'http-metrics',
interval: '30s',
scrapeTimeout: '30s',
honorLabels: true,
relabelings: [
{
regex: '(pod|service|endpoint|namespace)',
action: 'labeldrop',
},
],
name: 'https-main',
port: 8443,
targetPort: 'https-main',
},
{
port: 'telemetry',
interval: '30s',
name: 'https-self',
port: 9443,
targetPort: 'https-self',
},
],
},
},
},
deployment+: {
spec+: {
template+: {
spec+: {
containers: std.map(function(c) c {
ports:: null,
livenessProbe:: null,
readinessProbe:: null,
args: ['--host=127.0.0.1', '--port=8081', '--telemetry-host=127.0.0.1', '--telemetry-port=8082'],
}, super.containers),
},
},
},
},
serviceMonitor:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'kube-state-metrics',
namespace: $._config.namespace,
labels: {
'app.kubernetes.io/name': 'kube-state-metrics',
'app.kubernetes.io/version': ksm.version,
},
},
spec: {
jobLabel: 'app.kubernetes.io/name',
selector: {
matchLabels: {
'app.kubernetes.io/name': 'kube-state-metrics',
},
},
endpoints: [
{
port: 'https-main',
scheme: 'https',
interval: $._config.kubeStateMetrics.scrapeInterval,
scrapeTimeout: $._config.kubeStateMetrics.scrapeTimeout,
honorLabels: true,
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
regex: '(pod|service|endpoint|namespace)',
action: 'labeldrop',
},
],
tlsConfig: {
insecureSkipVerify: true,
},
},
{
port: 'https-self',
scheme: 'https',
interval: $._config.kubeStateMetrics.scrapeInterval,
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
tlsConfig: {
insecureSkipVerify: true,
},
},
],
},
},
} +
((import 'kube-prometheus/kube-rbac-proxy/container.libsonnet') {
config+:: {
kubeRbacProxy: {
local cfg = self,
image: $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy,
name: 'kube-rbac-proxy-main',
securePortName: 'https-main',
securePort: 8443,
secureListenAddress: ':%d' % self.securePort,
upstream: 'http://127.0.0.1:8081/',
tlsCipherSuites: $._config.tlsCipherSuites,
},
},
}).deploymentMixin +
((import 'kube-prometheus/kube-rbac-proxy/container.libsonnet') {
config+:: {
kubeRbacProxy: {
local cfg = self,
image: $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy,
name: 'kube-rbac-proxy-self',
securePortName: 'https-self',
securePort: 9443,
secureListenAddress: ':%d' % self.securePort,
upstream: 'http://127.0.0.1:8082/',
tlsCipherSuites: $._config.tlsCipherSuites,
},
},
}).deploymentMixin,
}

View File

@ -61,36 +61,6 @@ local utils = import 'utils.libsonnet';
message: 'The API server has a 99th percentile latency of {{ $value }} seconds for {{ $labels.verb }} {{ $labels.resource }}.',
},
},
{
alert: 'KubeAPIErrorsHigh',
expr: |||
sum(rate(apiserver_request_total{%(kubeApiserverSelector)s,code=~"5.."}[5m]))
/
sum(rate(apiserver_request_total{%(kubeApiserverSelector)s}[5m])) > 0.03
||| % $._config,
'for': '10m',
labels: {
severity: 'critical',
},
annotations: {
message: 'API server is returning errors for {{ $value | humanizePercentage }} of requests.',
},
},
{
alert: 'KubeAPIErrorsHigh',
expr: |||
sum(rate(apiserver_request_total{%(kubeApiserverSelector)s,code=~"5.."}[5m]))
/
sum(rate(apiserver_request_total{%(kubeApiserverSelector)s}[5m])) > 0.01
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
annotations: {
message: 'API server is returning errors for {{ $value | humanizePercentage }} of requests.',
},
},
{
alert: 'KubeAPIErrorsHigh',
expr: |||

View File

@ -77,7 +77,7 @@
{
alert: 'KubeletPodStartUpLatencyHigh',
expr: |||
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{%(kubeletSelector)s}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name > 5
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{%(kubeletSelector)s}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name > 60
||| % $._config,
'for': '15m',
labels: {

View File

@ -6,4 +6,5 @@
(import 'scheduler.libsonnet') +
(import 'proxy.libsonnet') +
(import 'kubelet.libsonnet') +
(import 'statefulset.libsonnet') +
(import 'defaults.libsonnet')

View File

@ -474,7 +474,7 @@ local gauge = promgrafonnet.gauge;
)
.addPanel(
newGraphPanel(
graphTitle='Rate of TCP Retransimts out of all sent segments',
graphTitle='Rate of TCP Retransmits out of all sent segments',
graphQuery='sort_desc(sum(rate(node_netstat_Tcp_RetransSegs[$interval:$resolution]) / rate(node_netstat_Tcp_OutSegs[$interval:$resolution])) by (instance))',
graphFormat='percentunit',
legendFormat='{{instance}}'
@ -488,7 +488,7 @@ local gauge = promgrafonnet.gauge;
gridPos={ h: 9, w: 24, x: 0, y: 59 }
).addPanel(
newGraphPanel(
graphTitle='Rate of TCP SYN Retransimts out of all retransmits',
graphTitle='Rate of TCP SYN Retransmits out of all retransmits',
graphQuery='sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs[$interval:$resolution])) by (instance))',
graphFormat='percentunit',
legendFormat='{{instance}}'

View File

@ -0,0 +1,160 @@
local grafana = import 'grafonnet/grafana.libsonnet';
local dashboard = grafana.dashboard;
local graphPanel = grafana.graphPanel;
local prometheus = grafana.prometheus;
local promgrafonnet = import '../lib/promgrafonnet/promgrafonnet.libsonnet';
local row = grafana.row;
local singlestat = grafana.singlestat;
local template = grafana.template;
local numbersinglestat = promgrafonnet.numbersinglestat;
{
grafanaDashboards+:: {
'statefulset.json':
local cpuStat =
numbersinglestat.new(
'CPU',
'sum(rate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}[3m]))' % $._config,
)
.withSpanSize(4)
.withPostfix('cores')
.withSparkline();
local memoryStat =
numbersinglestat.new(
'Memory',
'sum(container_memory_usage_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}) / 1024^3' % $._config,
)
.withSpanSize(4)
.withPostfix('GB')
.withSparkline();
local networkStat =
numbersinglestat.new(
'Network',
'sum(rate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$statefulset.*"}[3m])) + sum(rate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", namespace="$namespace",pod=~"$statefulset.*"}[3m]))' % $._config,
)
.withSpanSize(4)
.withPostfix('Bps')
.withSparkline();
local overviewRow =
row.new()
.addPanel(cpuStat)
.addPanel(memoryStat)
.addPanel(networkStat);
local desiredReplicasStat = numbersinglestat.new(
'Desired Replicas',
'max(kube_statefulset_replicas{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", statefulset="$statefulset"}) without (instance, pod)' % $._config,
);
local availableReplicasStat = numbersinglestat.new(
'Replicas of current version',
'min(kube_statefulset_status_replicas_current{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", statefulset="$statefulset"}) without (instance, pod)' % $._config,
);
local observedGenerationStat = numbersinglestat.new(
'Observed Generation',
'max(kube_statefulset_status_observed_generation{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", statefulset="$statefulset"}) without (instance, pod)' % $._config,
);
local metadataGenerationStat = numbersinglestat.new(
'Metadata Generation',
'max(kube_statefulset_metadata_generation{%(kubeStateMetricsSelector)s, statefulset="$statefulset", %(clusterLabel)s="$cluster", namespace="$namespace"}) without (instance, pod)' % $._config,
);
local statsRow =
row.new(height='100px')
.addPanel(desiredReplicasStat)
.addPanel(availableReplicasStat)
.addPanel(observedGenerationStat)
.addPanel(metadataGenerationStat);
local replicasGraph =
graphPanel.new(
'Replicas',
datasource='$datasource',
)
.addTarget(prometheus.target(
'max(kube_statefulset_replicas{%(kubeStateMetricsSelector)s, statefulset="$statefulset", %(clusterLabel)s="$cluster", namespace="$namespace"}) without (instance, pod)' % $._config,
legendFormat='replicas specified',
))
.addTarget(prometheus.target(
'max(kube_statefulset_status_replicas{%(kubeStateMetricsSelector)s, statefulset="$statefulset", %(clusterLabel)s="$cluster", namespace="$namespace"}) without (instance, pod)' % $._config,
legendFormat='replicas created',
))
.addTarget(prometheus.target(
'min(kube_statefulset_status_replicas_ready{%(kubeStateMetricsSelector)s, statefulset="$statefulset", %(clusterLabel)s="$cluster", namespace="$namespace"}) without (instance, pod)' % $._config,
legendFormat='ready',
))
.addTarget(prometheus.target(
'min(kube_statefulset_status_replicas_current{%(kubeStateMetricsSelector)s, statefulset="$statefulset", %(clusterLabel)s="$cluster", namespace="$namespace"}) without (instance, pod)' % $._config,
legendFormat='replicas of current version',
))
.addTarget(prometheus.target(
'min(kube_statefulset_status_replicas_updated{%(kubeStateMetricsSelector)s, statefulset="$statefulset", %(clusterLabel)s="$cluster", namespace="$namespace"}) without (instance, pod)' % $._config,
legendFormat='updated',
));
local replicasRow =
row.new()
.addPanel(replicasGraph);
dashboard.new(
'%(dashboardNamePrefix)sStatefulSets' % $._config.grafanaK8s,
time_from='now-1h',
uid=($._config.grafanaDashboardIDs['statefulset.json']),
tags=($._config.grafanaK8s.dashboardTags),
).addTemplate(
{
current: {
text: 'default',
value: 'default',
},
hide: 0,
label: null,
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: '',
type: 'datasource',
},
)
.addTemplate(
template.new(
'cluster',
'$datasource',
'label_values(kube_statefulset_metadata_generation, %s)' % $._config.clusterLabel,
label='cluster',
refresh='time',
hide=if $._config.showMultiCluster then '' else 'variable',
sort=1,
)
)
.addTemplate(
template.new(
'namespace',
'$datasource',
'label_values(kube_statefulset_metadata_generation{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster"}, namespace)' % $._config,
label='Namespace',
refresh='time',
sort=1,
)
)
.addTemplate(
template.new(
'statefulset',
'$datasource',
'label_values(kube_statefulset_metadata_generation{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}, statefulset)' % $._config,
label='Name',
refresh='time',
sort=1,
)
)
.addRow(overviewRow)
.addRow(statsRow)
.addRow(replicasRow),
},
}

View File

@ -8,7 +8,7 @@
alert: 'NodeFilesystemSpaceFillingUp',
expr: |||
(
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < 40
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < %(fsSpaceFillingUpWarningThreshold)d
and
predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s}[6h], 24*60*60) < 0
and
@ -28,7 +28,7 @@
alert: 'NodeFilesystemSpaceFillingUp',
expr: |||
(
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < 20
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < %(fsSpaceFillingUpCriticalThreshold)d
and
predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s}[6h], 4*60*60) < 0
and

View File

@ -35,6 +35,18 @@
// just a warning for K8s nodes.
nodeCriticalSeverity: 'critical',
// Available disk space (%) thresholds on which to trigger the
// 'NodeFilesystemSpaceFillingUp' alerts. These alerts fire if the disk
// usage grows in a way that it is predicted to run out in 4h or 1d
// and if the provided thresholds have been reached right now.
// In some cases you'll want to adjust these, e.g. by default Kubernetes
// runs the image garbage collection when the disk usage reaches 85%
// of its available space. In that case, you'll want to reduce the
// critical threshold below to something like 14 or 15, otherwise
// the alert could fire under normal node usage.
fsSpaceFillingUpWarningThreshold: 40,
fsSpaceFillingUpCriticalThreshold: 20,
grafana_prefix: '',
},
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -15,7 +15,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
},
versions+:: {
prometheusOperator: 'v0.36.0',
prometheusOperator: 'v0.37.0',
prometheusConfigReloader: self.prometheusOperator,
configmapReloader: 'v0.3.0',
},

File diff suppressed because one or more lines are too long

View File

@ -1,5 +1,5 @@
local errors = import 'errors.libsonnet';
local util = import '_util.libsonnet';
local errors = import 'errors.libsonnet';
{
errorburn(param):: {
local slo = {
@ -95,6 +95,9 @@ local util = import '_util.libsonnet';
labels: labels {
severity: 'critical',
},
annotations: {
message: 'High requests error budget burn for %s (current value: {{ $value }})' % [std.strReplace(std.join(',', slo.selectors), '"', '')],
},
},
{
alert: 'ErrorBudgetBurn',
@ -127,6 +130,9 @@ local util = import '_util.libsonnet';
labels: labels {
severity: 'warning',
},
annotations: {
message: 'High requests error budget burn for %s (current value: {{ $value }})' % [std.strReplace(std.join(',', slo.selectors), '"', '')],
},
},
],

View File

@ -0,0 +1,126 @@
local util = import '_util.libsonnet';
{
latencyburn(param):: {
local slo = {
metric: error 'must set metric for latency burn',
selectors: error 'must set selectors for latency burn',
latencyTarget: error 'must set latencyTarget latency burn',
latencyBudget: error 'must set latencyBudget latency burn',
labels: [],
codeSelector: 'code',
} + param,
local rates = ['5m', '30m', '1h', '2h', '6h', '1d', '3d'],
local labels =
util.selectorsToLabels(slo.selectors),
local latencyRules = [
{
// How many percent are above the SLO latency target.
// First calculate how many requests are below the target and
// substract those from 100 percent.
// This gives the total requests that fail the SLO
expr: |||
1 - (
sum(rate(%s{%s,le="%s",%s!~"5.."}[%s]))
/
sum(rate(%s{%s}[%s]))
)
||| % [
slo.metric + '_bucket',
std.join(',', slo.selectors),
slo.latencyTarget,
slo.codeSelector,
rate,
slo.metric + '_count',
std.join(',', slo.selectors),
rate,
],
record: 'latencytarget:%s:rate%s' % [slo.metric, rate],
labels: labels,
}
for rate in rates
],
recordingrules: latencyRules,
local multiBurnRate30d = [
{
alert: 'LatencyBudgetBurn',
// Check how many procent are violating the SLO.
// Send an alert only when this procent is above the burn rate.
expr: |||
(
%s{%s} > (14.4*%f)
and
%s{%s} > (14.4*%f)
)
or
(
%s{%s} > (6*%f)
and
%s{%s} > (6*%f)
)
||| % [
latencyRules[2].record,
std.join(',', slo.selectors),
slo.latencyBudget,
latencyRules[0].record,
std.join(',', slo.selectors),
slo.latencyBudget,
latencyRules[4].record,
std.join(',', slo.selectors),
slo.latencyBudget,
latencyRules[1].record,
std.join(',', slo.selectors),
slo.latencyBudget,
],
labels: labels {
severity: 'critical',
},
annotations: {
message: 'High requests latency budget burn for %s (current value: {{ $value }})' % [std.strReplace(std.join(',', slo.selectors), '"', '')],
},
},
{
alert: 'LatencyBudgetBurn',
expr: |||
(
%s{%s} > (3*%f)
and
%s{%s} > (3*%f)
)
or
(
%s{%s} > (%f)
and
%s{%s} > (%f)
)
||| % [
latencyRules[5].record,
std.join(',', slo.selectors),
slo.latencyBudget,
latencyRules[3].record,
std.join(',', slo.selectors),
slo.latencyBudget,
latencyRules[6].record,
std.join(',', slo.selectors),
slo.latencyBudget,
latencyRules[4].record,
std.join(',', slo.selectors),
slo.latencyBudget,
],
labels: labels {
severity: 'warning',
},
annotations: {
message: 'High requests latency budget burn for %s (current value: {{ $value }})' % [std.strReplace(std.join(',', slo.selectors), '"', '')],
},
},
],
alerts: multiBurnRate30d,
},
}

View File

@ -1,3 +1,4 @@
(import 'latency.libsonnet') +
(import 'error-burn.libsonnet') +
(import 'errors.libsonnet') +
(import 'errorburn.libsonnet')
(import 'latency-burn.libsonnet') +
(import 'latency.libsonnet')