update monitoring
continuous-integration/drone/push Build is passing
Details
continuous-integration/drone/push Build is passing
Details
This commit is contained in:
parent
14d548bc23
commit
dd18c911bc
|
@ -18,8 +18,8 @@
|
|||
"subdir": "Documentation/etcd-mixin"
|
||||
}
|
||||
},
|
||||
"version": "528b01c327ee4abfd4afea29de9066c7f4b247fa",
|
||||
"sum": "NhOkJWkO7ZO2DSE8Fvipcs7Hh2/GOCS0WjPPZU8OiaQ="
|
||||
"version": "e42127658c910d91e7902be958f12d41ac33d54f",
|
||||
"sum": "L+PGlPK9mykGCJ9TIoEWdhMBjz+9lKuQ4YZ8fOeP9sk="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
|
@ -28,8 +28,8 @@
|
|||
"subdir": "grafonnet"
|
||||
}
|
||||
},
|
||||
"version": "cc1626a1b4dee45c99b78ddd9714dfd5f5d7816e",
|
||||
"sum": "nkgrtMYPCq/YB4r3mKyToepaLhicwWnxDdGIodPpzz0="
|
||||
"version": "8d382c732dbdc839ff07549a3f42d25828f1b268",
|
||||
"sum": "DRSRw4luAXlBXblo19/T1Jrv+9hyV8ivlS0KEtNANec="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
|
@ -38,7 +38,7 @@
|
|||
"subdir": "grafana-builder"
|
||||
}
|
||||
},
|
||||
"version": "2cc8d1dcb943eb3ff1dcb85bc9a3933afb36b730",
|
||||
"version": "b5e45051995755ea373ea67642f8e5f54fcb8dd7",
|
||||
"sum": "mD0zEP9FVFXeag7EaeS5OvUr2A9D6DQhGemoNn6+PLc="
|
||||
},
|
||||
{
|
||||
|
@ -59,8 +59,8 @@
|
|||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "0bbe890539df0c1477000322c73977af71ef71e9",
|
||||
"sum": "h48bpWnNFX9iN9Uqc9y0NTlKQu8sA1izvNyAHzsMIX8="
|
||||
"version": "aa2adbcf39884fd9c85d7c3e0ff338b1d61ea1ba",
|
||||
"sum": "ttkPUnv/5bqlOFcZ8fvp2wi/S7ZLKiqAZ4ZdTolX77M="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
|
@ -69,7 +69,7 @@
|
|||
"subdir": "lib/promgrafonnet"
|
||||
}
|
||||
},
|
||||
"version": "0bbe890539df0c1477000322c73977af71ef71e9",
|
||||
"version": "aa2adbcf39884fd9c85d7c3e0ff338b1d61ea1ba",
|
||||
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
|
||||
},
|
||||
{
|
||||
|
@ -79,8 +79,8 @@
|
|||
"subdir": "jsonnet/kube-state-metrics"
|
||||
}
|
||||
},
|
||||
"version": "daf555f1e11ad6aa37852653e63baede5f99367e",
|
||||
"sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA="
|
||||
"version": "35ef70bb74520a78cc8dc7cf364e1ff4e0c45063",
|
||||
"sum": "ySP+bI2ZMLPt/sguSh9WrwI5H5dasaNFRE8Uo9PcZrI="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
|
@ -89,7 +89,7 @@
|
|||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
||||
}
|
||||
},
|
||||
"version": "daf555f1e11ad6aa37852653e63baede5f99367e",
|
||||
"version": "35ef70bb74520a78cc8dc7cf364e1ff4e0c45063",
|
||||
"sum": "Yf8mNAHrV1YWzrdV8Ry5dJ8YblepTGw3C0Zp10XIYLo="
|
||||
},
|
||||
{
|
||||
|
@ -99,8 +99,18 @@
|
|||
"subdir": "jsonnet/kube-prometheus"
|
||||
}
|
||||
},
|
||||
"version": "5fe45c57b60f17568001fd04a7dc2bb754fdf152",
|
||||
"sum": "6Qrn74pNRqJNKYdsmcBu8ergYbMEH48qG1VDVm9FKak="
|
||||
"version": "980e95de011319b88a3b9c0787a81dcdf338a898",
|
||||
"sum": "BxOXyWCSc9KkgWJXDau2Xtsy3aOYZDHz2VqOSLga7VU="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/prometheus-operator/prometheus-operator",
|
||||
"subdir": "jsonnet/mixin"
|
||||
}
|
||||
},
|
||||
"version": "55baf034c431ed2c78d950b187f7d8b34dd06860",
|
||||
"sum": "+Q45oBC7O8g7KQOaiKhGglwndAMWRlLTR94KUI8Q1Ko="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
|
@ -109,8 +119,8 @@
|
|||
"subdir": "jsonnet/prometheus-operator"
|
||||
}
|
||||
},
|
||||
"version": "96094ad1ab039950537df448b95bbcc04c57bfc4",
|
||||
"sum": "ReamRYoS2C39Of7KtXGqkSWdfHw5Fy/Ix6ujOmBLFAg="
|
||||
"version": "cd331ce9bb58bb926e391c6ae807621cb12cc29e",
|
||||
"sum": "nM1eDP5vftqAeQSmVYzSBAh+lG0SN6zu46QiocQiVhk="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
|
@ -119,8 +129,8 @@
|
|||
"subdir": "docs/node-mixin"
|
||||
}
|
||||
},
|
||||
"version": "d8a1585f59ef1169837d08979ecc92dcea8aa58a",
|
||||
"sum": "EE+C+Krf518EGLjA/x3ZvKfenCI0J7YuwFJVBscypRw="
|
||||
"version": "f81747e608ea85ae44e76454eb63f9cb6484fb9e",
|
||||
"sum": "VyMzZPxQIjiKQYGjZjXeKNWfLJ9vOl3emp84PWfsrUc="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
|
|
|
@ -1,17 +0,0 @@
|
|||
apiVersion: bitnami.com/v1alpha1
|
||||
kind: SealedSecret
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
name: alertmanager-tbrnt-config
|
||||
namespace: monitoring
|
||||
spec:
|
||||
encryptedData:
|
||||
alertmanager.yaml: AgATnPnwlvlfGVJeWhjcFHyp/am3nmguqi9PsWgEhxtVLuyDA6OB1G+BdJZ7dGdCViZGeDJD5mHxESSLDMTfxg5DxMDIG18XCzojMtRFTPJMZPLjbETNuSZqtrkbscp/qQom4z+igVuLlkaihdYRcCNV+B0vm+1h6BUPV8Utv1RN1dy9XUvdrvhPRNFvqhCpVcpcLwNP5cli5SNYgVc/ty6a45Fl5h+KLv7rFBJexLhUXoR0jamQpQWoH7oNHcS4ONHxLDMKXqE9jFpKzlQJBNgiRQEEotwCYTodoALmkcIs37Ai+trQxEMZZYtD5vFzbehfTtNLT1bPhLiX91rv6Q9n9wuIw951Qk11L6cF93zDl2mZ9dAQHSAglVHEriXKXBZ3Df4DSyh5qkr+/7lFBdFTQVMS5+YTgM1eCmG1yfsvU33IWKh5wrNhpkUqGLiq9f+4k3xPQVysVY3jJjVhINM/A9OsTPfFzm7aAAklBxuROXiZgZ/6L4Oc/c0Tv3EN/02rhinSGr1hIMmcaSxdOVQxXPU+pbx4JcSmyQFXIY37n/2ya/UbJW/o901MtigCukUvgMedkxPSGhedvHOygKKXPKNSl5U1Emhza7c6vP9cSiiaHpRm7EyUQvjWpJRUP7tSRgDlZyBM9Ud0PRBRdYWLG5YlZB4STOX6cDyYOcFJvnAyiZpDuwOPKMOrhWQSbCgcMcuS/RCgCnYJ4YBfm1cSxcqxsA65PRhXbRmiY9b/Mqs7s1xpJo3RySO27JiffbY+vYRIrFv4G4ak0ug9AQJvrvEA/ZgSs9xpASXSsr42pB63exhlZP+D9JEDGFLgzGQVnVFRFDrlYLFQieqWDgBc0pkcxHHwGBTp3H6PP5RRPLKzNoypTbNrdLlaNAsAlb8VuPopPBHqLcpU+DPuxxBCQ/P8ezYXE8RmpH7x4A2rPLbV902zYVwfnWMrUdPZif7oPnn/xM+VDQMLIlKA/CQySudzAYf621N450V2zE0akOFQNATGEUZT+8HtjNKMcAxjojP/pJpo486t02KXHDw+i04R1kdGPVkLnZDz+UShh4eLwovBL3zopxHxSFnonI1Ez+IetemM+aCJhadU4YNC4zY7x+blNa51ZAGEGoXuSJB3fszLd7wSz4owIhQnb+StxVNcwNgircvFjhauLVLwEynO4WTm+YKzosf2GxJAaNriveyLj1L+DwBUOeWnvmL4QsHX3nriZgVR89KlIQI1d3+lcf2jw8VqfHj3tbpMxb98qZFWw2pczrDeE2t4UQTPTj+4VK4htKGhnIHlNmdeSN39GJCWfilnzAsznrkxZsr1wIqrP+ayRO+NxljBzUN7xspByJeJrBpzW1pukvNg74MK1K7g0/fh/zmqyduQYmJnCDDqfN0PB1YoXFPZn3o4kzGnLXetgJHyJG4tsinediVXrZJb+6KOL31hEbZVArP/gWYHQv1MltEZj2yunuLeEy+Oo35oS/IkW17qF3gkF0sLavZFhJe6XqVvO2BFF1V8S15cBApXXPvFJQWyFLOTVqhYicYfJqQEgz7sKH1uYK0zLeyBkRIfUXk3vs7X9X/CzQo9J9oH5FCgNEI6GdROhitGUihCxwAVa6lsteVxZZ4USkMGZtJPG3Pi5RQuWpSMFX9nWm/LHv8wpgFItx12ZkuaIDB30wYfp4SqcBr1ZvuptKtwJpTUjVx5HSUKnEye2g==
|
||||
template:
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
name: alertmanager-tbrnt-config
|
||||
namespace: monitoring
|
||||
type: Opaque
|
||||
status: {}
|
||||
|
|
@ -144,6 +144,7 @@ items:
|
|||
"decimals": 3,
|
||||
"description": "How much error budget is left looking at our 0.990% availability gurantees?",
|
||||
"fill": 10,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -338,6 +339,7 @@ items:
|
|||
"datasource": "$datasource",
|
||||
"description": "How many read requests (LIST,GET) per second do the apiservers get by code?",
|
||||
"fill": 10,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -446,6 +448,7 @@ items:
|
|||
"datasource": "$datasource",
|
||||
"description": "How many percent of read requests (LIST,GET) per second are returned with errors (5xx)?",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -539,6 +542,7 @@ items:
|
|||
"datasource": "$datasource",
|
||||
"description": "How many seconds is the 99th percentile for reading (LIST|GET) a given resource?",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -731,6 +735,7 @@ items:
|
|||
"datasource": "$datasource",
|
||||
"description": "How many write requests (POST|PUT|PATCH|DELETE) per second do the apiservers get by code?",
|
||||
"fill": 10,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -839,6 +844,7 @@ items:
|
|||
"datasource": "$datasource",
|
||||
"description": "How many percent of write requests (POST|PUT|PATCH|DELETE) per second are returned with errors (5xx)?",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -932,6 +938,7 @@ items:
|
|||
"datasource": "$datasource",
|
||||
"description": "How many seconds is the 99th percentile for writing (POST|PUT|PATCH|DELETE) a given resource?",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -1037,6 +1044,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -1129,6 +1137,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -1221,6 +1230,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -1326,6 +1336,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -1418,6 +1429,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -1510,6 +1522,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -1780,6 +1793,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -1882,6 +1896,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -2325,6 +2340,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -2427,6 +2443,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -2559,6 +2576,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
|
@ -2659,6 +2677,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
|
@ -2770,6 +2789,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
|
@ -2870,6 +2890,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
|
@ -2990,6 +3011,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
|
@ -3090,6 +3112,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
|
@ -3190,6 +3213,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
|
@ -3294,6 +3318,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
|
@ -3668,6 +3693,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -3773,6 +3799,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -3878,6 +3905,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -3983,6 +4011,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -4096,6 +4125,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -4201,6 +4231,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -4306,6 +4337,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -4398,6 +4430,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -4490,6 +4523,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -16978,6 +17012,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -17070,6 +17105,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -17175,6 +17211,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -17280,6 +17317,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -17379,6 +17417,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -17491,6 +17530,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -17585,6 +17625,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -17692,6 +17733,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -17799,6 +17841,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -17891,6 +17934,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -17997,6 +18041,7 @@ items:
|
|||
"datasource": "$datasource",
|
||||
"description": "Pod lifecycle event generator",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -18089,6 +18134,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -18194,6 +18240,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -18299,6 +18346,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -18425,6 +18473,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -18530,6 +18579,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -18622,6 +18672,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -18714,6 +18765,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -19527,6 +19579,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -19627,6 +19680,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -19738,6 +19792,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
|
@ -19838,6 +19893,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
|
@ -19958,6 +20014,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
|
@ -20058,6 +20115,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
|
@ -20400,6 +20458,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -20502,6 +20561,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -20945,6 +21005,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -21047,6 +21108,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -21179,6 +21241,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -21279,6 +21342,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -21390,6 +21454,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -21490,6 +21555,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -21610,6 +21676,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -21710,6 +21777,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -23983,6 +24051,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -24076,6 +24145,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 0,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -24202,6 +24272,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -24367,7 +24438,7 @@ items:
|
|||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "100 -\n(\n node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\"}\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n* 100\n)\n",
|
||||
"expr": "100 -\n(\n avg(node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\"})\n/\n avg(node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"})\n* 100\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
|
@ -24412,6 +24483,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 0,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -24528,6 +24600,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -24647,6 +24720,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 0,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -24740,6 +24814,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 0,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -24961,6 +25036,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -25157,6 +25233,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -25819,6 +25896,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -25919,6 +25997,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -26030,6 +26109,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
|
@ -26130,6 +26210,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
|
@ -26250,6 +26331,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
|
@ -26350,6 +26432,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
|
@ -26700,6 +26783,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -26792,6 +26876,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -26897,6 +26982,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -27002,6 +27088,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -27095,6 +27182,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -27187,6 +27275,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -27279,6 +27368,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -27384,6 +27474,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -27476,6 +27567,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -27581,6 +27673,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -27673,6 +27766,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -27778,6 +27872,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -27870,6 +27965,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -27962,6 +28058,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -28054,6 +28151,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -29634,6 +29732,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -29726,6 +29825,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -29831,6 +29931,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -29923,6 +30024,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -30028,6 +30130,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -30141,6 +30244,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -30246,6 +30350,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -30351,6 +30456,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -30443,6 +30549,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -30535,6 +30642,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -30839,6 +30947,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -30952,6 +31061,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -31078,6 +31188,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -31191,6 +31302,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -31296,6 +31408,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -31401,6 +31514,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -31493,6 +31607,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -31585,6 +31700,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -32417,6 +32533,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
|
@ -33529,6 +33646,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -33631,6 +33749,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -33744,6 +33863,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -33846,6 +33966,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -33978,6 +34099,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -34078,6 +34200,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -34189,6 +34312,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -34289,6 +34413,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -34409,6 +34534,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
@ -34509,6 +34635,7 @@ items:
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 2,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
|
|
|
@ -1,31 +0,0 @@
|
|||
apiVersion: batch/v1beta1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: healthchecks-io
|
||||
namespace: monitoring
|
||||
spec:
|
||||
schedule: "*/1 * * * *"
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 1
|
||||
failedJobsHistoryLimit: 1
|
||||
startingDeadlineSeconds: 200
|
||||
jobTemplate:
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: pinghc
|
||||
env:
|
||||
- name: HCURL
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: healthchecks-io
|
||||
key: HCURL
|
||||
image: busybox
|
||||
args:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- "date && echo $HCURL && /bin/wget -q -O - --no-check-certificate $HCURL"
|
||||
restartPolicy: OnFailure
|
||||
|
||||
|
|
@ -1,17 +0,0 @@
|
|||
apiVersion: bitnami.com/v1alpha1
|
||||
kind: SealedSecret
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
name: healthchecks-io
|
||||
namespace: monitoring
|
||||
spec:
|
||||
encryptedData:
|
||||
HCURL: AgBEpwET1Qa1hQqAmwrNGBv4sL0ml8pGYPwgq9Aps3tYhBVqsXjV7U5RQa/txldg1umw2Zqx8MfvZTN2kmFk6bJTROCWqTxmxd4rHgnJYqRR0+Opn/BtDhVx4WTnehyM/il9ymddhMD+WRQDr/Wfxq/0UQdsy+IEYyVMQuOKEihZabxmXRyNeAl5ZBeQ0W1T29biJPx3rifS37RbGlJtCIYuNPh82d0KAMu1dszDnkln8k5CBv6mPD8BVHg+Z/y1v1jFhTIE3YOlGzCIjb8RrJj6MVm7zlauj8zrl30JvF2OAWDGGZDOL3b0G3IKd0Qp/eagT33Sx7vbppY/l1Vci6UQcVpde3u2+ATMbysRej04Mvcodq5OgkBFqbgCzx0UFTIq0wER/GuCoYbt+k8b3TouK5ChQet8EP0W/c7rLHcMY3c0UR00N7m5UeKZAzAkXSGV+u3M9K6PMp8pl0VuDo+IVgEIY7ku9rtzL7SPIfXS4u5w7fte13fOtKB/2sa11dNqAbHmidF+IO6ycjm8SZibC7NKyCxgIKWPfsFXhNUT2Nx7eBRrzR1QlqThIGRsDpX1RVplTwe/OLsBz0K99AyGDUkSBJdOZLaRT/b3T0nS8DE5x/e8MvFsbbDdGE2U/YhVrbfn072u/X979/RIm0oCjipvByZXhFmobRj9SP9RcK2UfjBSY7xyKnd2rjj1mnIs2S0CmwGFdJqoywHckJJOu3YP2oN2Q1U7+Fe4yciupAshgdszY2okHMtd4aDDJJKeKKFHpjpsuA==
|
||||
template:
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
name: healthchecks-io
|
||||
namespace: monitoring
|
||||
type: Opaque
|
||||
status: {}
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: k8up
|
||||
labels:
|
||||
prometheus: k8s
|
||||
role: alert-rules
|
||||
spec:
|
||||
groups:
|
||||
- name: k8up.rules
|
||||
rules:
|
||||
- alert: baas_last_errors
|
||||
expr: baas_backup_restic_last_errors > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: Amount of errors of last restic backup
|
||||
description: This alert is fired when error number is > 0
|
||||
- alert: K8upBackupFailed
|
||||
expr: rate(k8up_jobs_failed_counter[1d]) > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Job in {{ $labels.namespace }} of type {{ $labels.jobType }} failed"
|
||||
- alert: K8upBackupNotRunning
|
||||
expr: sum(rate(k8up_jobs_total[25h])) == 0 and on(namespace) k8up_schedules_gauge > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "No K8up jobs were run in {{ $labels.namespace }} within the last 24 hours. Check the operator, there might be a deadlock"
|
||||
- alert: K8upJobStuck
|
||||
expr: k8up_jobs_queued_gauge{jobType="backup"} > 0 and on(namespace) k8up_schedules_gauge > 0
|
||||
for: 24h
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "K8up jobs are stuck in {{ $labels.namespace }} for the last 24 hours."
|
|
@ -30,7 +30,6 @@ rules:
|
|||
- daemonsets
|
||||
- deployments
|
||||
- replicasets
|
||||
- ingresses
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
|
@ -105,6 +104,7 @@ rules:
|
|||
- networking.k8s.io
|
||||
resources:
|
||||
- networkpolicies
|
||||
- ingresses
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
|
|
|
@ -3,7 +3,7 @@ kind: DaemonSet
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/version: v0.18.1
|
||||
app.kubernetes.io/version: v1.0.1
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
|
@ -14,7 +14,7 @@ spec:
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/version: v0.18.1
|
||||
app.kubernetes.io/version: v1.0.1
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
|
@ -25,7 +25,7 @@ spec:
|
|||
- --no-collector.wifi
|
||||
- --no-collector.hwmon
|
||||
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
|
||||
image: quay.io/prometheus/node-exporter:v0.18.1
|
||||
image: quay.io/prometheus/node-exporter:v1.0.1
|
||||
name: node-exporter
|
||||
resources:
|
||||
limits:
|
||||
|
@ -36,11 +36,13 @@ spec:
|
|||
memory: 180Mi
|
||||
volumeMounts:
|
||||
- mountPath: /host/proc
|
||||
mountPropagation: HostToContainer
|
||||
name: proc
|
||||
readOnly: false
|
||||
readOnly: true
|
||||
- mountPath: /host/sys
|
||||
mountPropagation: HostToContainer
|
||||
name: sys
|
||||
readOnly: false
|
||||
readOnly: true
|
||||
- mountPath: /host/root
|
||||
mountPropagation: HostToContainer
|
||||
name: root
|
||||
|
|
|
@ -3,7 +3,7 @@ kind: Service
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/version: v0.18.1
|
||||
app.kubernetes.io/version: v1.0.1
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
|
|
|
@ -3,7 +3,7 @@ kind: ServiceMonitor
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/version: v0.18.1
|
||||
app.kubernetes.io/version: v1.0.1
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
|
|
|
@ -4,7 +4,7 @@ metadata:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.42.0
|
||||
app.kubernetes.io/version: v0.42.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
spec:
|
||||
|
@ -19,4 +19,4 @@ spec:
|
|||
matchLabels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.42.0
|
||||
app.kubernetes.io/version: v0.42.1
|
||||
|
|
|
@ -1019,6 +1019,8 @@ spec:
|
|||
summary: Clock not synchronising.
|
||||
expr: |
|
||||
min_over_time(node_timex_sync_status[5m]) == 0
|
||||
and
|
||||
node_timex_maxerror_seconds >= 16
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
|
@ -1044,6 +1046,75 @@ spec:
|
|||
node_md_disks{state="fail"} > 0
|
||||
labels:
|
||||
severity: warning
|
||||
- name: prometheus-operator
|
||||
rules:
|
||||
- alert: PrometheusOperatorListErrors
|
||||
annotations:
|
||||
description: Errors while performing List operations in controller {{$labels.controller}}
|
||||
in {{$labels.namespace}} namespace.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorlisterrors
|
||||
summary: Errors while performing list operations in controller.
|
||||
expr: |
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorWatchErrors
|
||||
annotations:
|
||||
description: Errors while performing watch operations in controller {{$labels.controller}}
|
||||
in {{$labels.namespace}} namespace.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorwatcherrors
|
||||
summary: Errors while performing watch operations in controller.
|
||||
expr: |
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorSyncFailed
|
||||
annotations:
|
||||
description: Controller {{ $labels.controller }} in {{ $labels.namespace }}
|
||||
namespace fails to reconcile {{ $value }} objects.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorsyncfailed
|
||||
summary: Last controller reconciliation failed
|
||||
expr: |
|
||||
min_over_time(prometheus_operator_syncs{status="failed",job="prometheus-operator",namespace="monitoring"}[5m]) > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorReconcileErrors
|
||||
annotations:
|
||||
description: '{{ $value | humanizePercentage }} of reconciling operations
|
||||
failed for {{ $labels.controller }} controller in {{ $labels.namespace }}
|
||||
namespace.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorreconcileerrors
|
||||
summary: Errors while reconciling controller.
|
||||
expr: |
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator",namespace="monitoring"}[5m]))) > 0.1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorNodeLookupErrors
|
||||
annotations:
|
||||
description: Errors while reconciling Prometheus in {{ $labels.namespace }}
|
||||
Namespace.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatornodelookuperrors
|
||||
summary: Errors while reconciling Prometheus.
|
||||
expr: |
|
||||
rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorNotReady
|
||||
annotations:
|
||||
description: Prometheus operator in {{ $labels.namespace }} namespace isn't
|
||||
ready to reconcile {{ $labels.controller }} resources.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatornotready
|
||||
summary: Prometheus operator not ready
|
||||
expr: |
|
||||
min by(namespace, controller) (max_over_time(prometheus_operator_ready{job="prometheus-operator",namespace="monitoring"}[5m]) == 0)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- name: kubernetes-apps
|
||||
rules:
|
||||
- alert: KubePodCrashLooping
|
||||
|
@ -1249,7 +1320,7 @@ spec:
|
|||
- alert: KubeJobFailed
|
||||
annotations:
|
||||
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to
|
||||
complete.
|
||||
complete. Removing failed job after investigation should clear this alert.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed
|
||||
summary: Job failed to complete.
|
||||
expr: |
|
||||
|
@ -2031,40 +2102,3 @@ spec:
|
|||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
- name: prometheus-operator
|
||||
rules:
|
||||
- alert: PrometheusOperatorListErrors
|
||||
annotations:
|
||||
message: Errors while performing List operations in controller {{$labels.controller}}
|
||||
in {{$labels.namespace}} namespace.
|
||||
expr: |
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorWatchErrors
|
||||
annotations:
|
||||
message: Errors while performing Watch operations in controller {{$labels.controller}}
|
||||
in {{$labels.namespace}} namespace.
|
||||
expr: |
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorReconcileErrors
|
||||
annotations:
|
||||
message: Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace
|
||||
}} Namespace.
|
||||
expr: |
|
||||
rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorNodeLookupErrors
|
||||
annotations:
|
||||
message: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.
|
||||
expr: |
|
||||
rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
|
|
|
@ -53,6 +53,7 @@ spec:
|
|||
insecureSkipVerify: true
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
honorLabels: true
|
||||
honorTimestamps: false
|
||||
interval: 30s
|
||||
metricRelabelings:
|
||||
- action: drop
|
||||
|
|
|
@ -4,7 +4,7 @@ metadata:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.42.0
|
||||
app.kubernetes.io/version: v0.42.1
|
||||
name: prometheus-operator
|
||||
rules:
|
||||
- apiGroups:
|
||||
|
|
|
@ -4,7 +4,7 @@ metadata:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.42.0
|
||||
app.kubernetes.io/version: v0.42.1
|
||||
name: prometheus-operator
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
|
|
|
@ -4,7 +4,7 @@ metadata:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.42.0
|
||||
app.kubernetes.io/version: v0.42.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
spec:
|
||||
|
@ -18,15 +18,15 @@ spec:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.42.0
|
||||
app.kubernetes.io/version: v0.42.1
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --kubelet-service=kube-system/kubelet
|
||||
- --logtostderr=true
|
||||
- --config-reloader-image=jimmidyson/configmap-reload:v0.4.0
|
||||
- --prometheus-config-reloader=quay.io/prometheus-operator/prometheus-config-reloader:v0.42.0
|
||||
image: quay.io/prometheus-operator/prometheus-operator:v0.42.0
|
||||
- --prometheus-config-reloader=quay.io/prometheus-operator/prometheus-config-reloader:v0.42.1
|
||||
image: quay.io/prometheus-operator/prometheus-operator:v0.42.1
|
||||
name: prometheus-operator
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
|
|
|
@ -4,7 +4,7 @@ metadata:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.42.0
|
||||
app.kubernetes.io/version: v0.42.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
spec:
|
||||
|
|
|
@ -4,6 +4,6 @@ metadata:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.42.0
|
||||
app.kubernetes.io/version: v0.42.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
|
|
|
@ -1,17 +0,0 @@
|
|||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: traefik
|
||||
namespace: monitoring
|
||||
spec:
|
||||
endpoints:
|
||||
- interval: 30s
|
||||
path: /metrics
|
||||
port: metrics
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- kube-system
|
||||
selector:
|
||||
matchLabels:
|
||||
app: traefik
|
80
monitoring/vendor/github.com/etcd-io/etcd/Documentation/etcd-mixin/mixin.libsonnet
generated
vendored
80
monitoring/vendor/github.com/etcd-io/etcd/Documentation/etcd-mixin/mixin.libsonnet
generated
vendored
|
@ -34,7 +34,8 @@
|
|||
severity: 'critical',
|
||||
},
|
||||
annotations: {
|
||||
message: 'etcd cluster "{{ $labels.job }}": members are down ({{ $value }}).',
|
||||
description: 'etcd cluster "{{ $labels.job }}": members are down ({{ $value }}).',
|
||||
summary: 'etcd cluster members are down.',
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -47,7 +48,8 @@
|
|||
severity: 'critical',
|
||||
},
|
||||
annotations: {
|
||||
message: 'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value }}).',
|
||||
description: 'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value }}).',
|
||||
summary: 'etcd cluster has insufficient number of members.',
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -60,7 +62,8 @@
|
|||
severity: 'critical',
|
||||
},
|
||||
annotations: {
|
||||
message: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }} has no leader.',
|
||||
description: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }} has no leader.',
|
||||
summary: 'etcd cluster has no leader.',
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -73,7 +76,8 @@
|
|||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
message: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.',
|
||||
description: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.',
|
||||
summary: 'etcd cluster has high number of leader changes.',
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -89,7 +93,8 @@
|
|||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.',
|
||||
description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.',
|
||||
summary: 'etcd cluster has high number of failed grpc requests.',
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -105,7 +110,8 @@
|
|||
severity: 'critical',
|
||||
},
|
||||
annotations: {
|
||||
message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.',
|
||||
description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.',
|
||||
summary: 'etcd cluster has high number of failed grpc requests.',
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -119,7 +125,8 @@
|
|||
severity: 'critical',
|
||||
},
|
||||
annotations: {
|
||||
message: 'etcd cluster "{{ $labels.job }}": gRPC requests to {{ $labels.grpc_method }} are taking {{ $value }}s on etcd instance {{ $labels.instance }}.',
|
||||
description: 'etcd cluster "{{ $labels.job }}": gRPC requests to {{ $labels.grpc_method }} are taking {{ $value }}s on etcd instance {{ $labels.instance }}.',
|
||||
summary: 'etcd grpc requests are slow',
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -133,7 +140,8 @@
|
|||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
message: 'etcd cluster "{{ $labels.job }}": member communication with {{ $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}.',
|
||||
description: 'etcd cluster "{{ $labels.job }}": member communication with {{ $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}.',
|
||||
summary: 'etcd cluster member communication is slow.',
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -146,7 +154,8 @@
|
|||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
message: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures within the last 30 minutes on etcd instance {{ $labels.instance }}.',
|
||||
description: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures within the last 30 minutes on etcd instance {{ $labels.instance }}.',
|
||||
summary: 'etcd cluster has high number of proposal failures.',
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -159,6 +168,21 @@
|
|||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.',
|
||||
summary: 'etcd cluster 99th percentile fsync durations are too high.',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'etcdHighFsyncDurations',
|
||||
expr: |||
|
||||
histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{%(etcd_selector)s}[5m]))
|
||||
> 1
|
||||
||| % $._config,
|
||||
'for': '10m',
|
||||
labels: {
|
||||
severity: 'critical',
|
||||
},
|
||||
annotations: {
|
||||
message: 'etcd cluster "{{ $labels.job }}": 99th percentile fync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.',
|
||||
},
|
||||
|
@ -174,7 +198,8 @@
|
|||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
message: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations {{ $value }}s on etcd instance {{ $labels.instance }}.',
|
||||
description: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations {{ $value }}s on etcd instance {{ $labels.instance }}.',
|
||||
summary: 'etcd cluster 99th percentile commit durations are too high.',
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -188,7 +213,8 @@
|
|||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}',
|
||||
description: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}',
|
||||
summary: 'etcd has high number of failed HTTP requests.',
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -202,7 +228,8 @@
|
|||
severity: 'critical',
|
||||
},
|
||||
annotations: {
|
||||
message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}.',
|
||||
description: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}.',
|
||||
summary: 'etcd has high number of failed HTTP requests.',
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -216,7 +243,34 @@
|
|||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
message: 'etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method }} are slow.',
|
||||
description: 'etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method }} are slow.',
|
||||
summary: 'etcd instance HTTP requests are slow.',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'etcdBackendQuotaLowSpace',
|
||||
expr: |||
|
||||
(etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100 > 95
|
||||
||| % $._config,
|
||||
'for': '10m',
|
||||
labels: {
|
||||
severity: 'critical',
|
||||
},
|
||||
annotations: {
|
||||
message: 'etcd cluster "{{ $labels.job }}": database size exceeds the defined quota on etcd instance {{ $labels.instance }}, please defrag or increase the quota as the writes to etcd will be disabled when it is full.',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'etcdExcessiveDatabaseGrowth',
|
||||
expr: |||
|
||||
increase(((etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100)[240m:1m]) > 50
|
||||
||| % $._config,
|
||||
'for': '10m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
message: 'etcd cluster "{{ $labels.job }}": Observed surge in etcd writes leading to 50% increase in database size over the past four hours on etcd instance {{ $labels.instance }}, please check as it might be disruptive.',
|
||||
},
|
||||
},
|
||||
],
|
||||
|
|
|
@ -26,7 +26,8 @@ tests:
|
|||
job: etcd
|
||||
severity: critical
|
||||
exp_annotations:
|
||||
message: 'etcd cluster "etcd": members are down (3).'
|
||||
description: 'etcd cluster "etcd": members are down (3).'
|
||||
summary: 'etcd cluster members are down.'
|
||||
- eval_time: 7m
|
||||
alertname: etcdInsufficientMembers
|
||||
- eval_time: 11m
|
||||
|
@ -36,7 +37,8 @@ tests:
|
|||
job: etcd
|
||||
severity: critical
|
||||
exp_annotations:
|
||||
message: 'etcd cluster "etcd": insufficient members (1).'
|
||||
description: 'etcd cluster "etcd": insufficient members (1).'
|
||||
summary: 'etcd cluster has insufficient number of members.'
|
||||
- eval_time: 15m
|
||||
alertname: etcdInsufficientMembers
|
||||
exp_alerts:
|
||||
|
@ -44,7 +46,8 @@ tests:
|
|||
job: etcd
|
||||
severity: critical
|
||||
exp_annotations:
|
||||
message: 'etcd cluster "etcd": insufficient members (0).'
|
||||
description: 'etcd cluster "etcd": insufficient members (0).'
|
||||
summary: 'etcd cluster has insufficient number of members.'
|
||||
|
||||
- interval: 1m
|
||||
input_series:
|
||||
|
@ -62,7 +65,8 @@ tests:
|
|||
job: etcd
|
||||
severity: critical
|
||||
exp_annotations:
|
||||
message: 'etcd cluster "etcd": members are down (3).'
|
||||
description: 'etcd cluster "etcd": members are down (3).'
|
||||
summary: 'etcd cluster members are down.'
|
||||
|
||||
- interval: 1m
|
||||
input_series:
|
||||
|
@ -80,7 +84,8 @@ tests:
|
|||
job: etcd
|
||||
severity: critical
|
||||
exp_annotations:
|
||||
message: 'etcd cluster "etcd": members are down (1).'
|
||||
description: 'etcd cluster "etcd": members are down (1).'
|
||||
summary: 'etcd cluster members are down.'
|
||||
- interval: 1m
|
||||
input_series:
|
||||
- series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.0"}'
|
||||
|
@ -97,7 +102,8 @@ tests:
|
|||
job: etcd
|
||||
severity: warning
|
||||
exp_annotations:
|
||||
message: 'etcd cluster "etcd": 4 leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.'
|
||||
description: 'etcd cluster "etcd": 4 leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.'
|
||||
summary: 'etcd cluster has high number of leader changes.'
|
||||
- interval: 1m
|
||||
input_series:
|
||||
- series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.0"}'
|
||||
|
@ -110,4 +116,20 @@ tests:
|
|||
- eval_time: 10m
|
||||
alertname: etcdHighNumberOfLeaderChanges
|
||||
exp_alerts:
|
||||
|
||||
- interval: 1m
|
||||
input_series:
|
||||
- series: '((etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100){job="etcd",instance="10.10.10.0"}'
|
||||
values: '0 10 20 0 0 10 0 0 30 0 0 0 0 0 0 0'
|
||||
- series: '((etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100){job="etcd",instance="10.10.10.1"}'
|
||||
values: '0 0 10 0 20 0 0 0 0 0 0 0 0 0 0 0'
|
||||
- series: '((etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100){job="etcd",instance="10.10.10.2"}'
|
||||
values: '0 0 0 0 0 0 0 0'
|
||||
alert_rule_test:
|
||||
- eval_time: 10m
|
||||
alertname: etcdExcessiveDatabaseGrowth
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
job: etcd
|
||||
severity: warning
|
||||
exp_annotations:
|
||||
message: 'etcd cluster "etcd": Observed surge in etcd writes leading to 50% increase in database size over the past four hours, please check as it might be disruptive.'
|
||||
|
|
5
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/graph_panel.libsonnet
generated
vendored
5
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/graph_panel.libsonnet
generated
vendored
|
@ -10,6 +10,7 @@
|
|||
* @param span (optional) Width of the panel
|
||||
* @param datasource (optional) Datasource
|
||||
* @param fill (default `1`) , integer from 0 to 10
|
||||
* @param fillGradient (default `0`) , integer from 0 to 10
|
||||
* @param linewidth (default `1`) Line Width, integer from 0 to 10
|
||||
* @param decimals (optional) Override automatic decimal precision for legend and tooltip. If null, not added to the json output.
|
||||
* @param decimalsY1 (optional) Override automatic decimal precision for the first Y axis. If null, use decimals parameter.
|
||||
|
@ -63,11 +64,13 @@
|
|||
* @method addYaxis(format,min,max,label,show,logBase,decimals) Adds a Y axis to the graph
|
||||
* @method addAlert(alert) Adds an alert
|
||||
* @method addLink(link) Adds a [panel link](https://grafana.com/docs/grafana/latest/linking/panel-links/)
|
||||
* @method addLinks(links) Adds an array of links.
|
||||
*/
|
||||
new(
|
||||
title,
|
||||
span=null,
|
||||
fill=1,
|
||||
fillGradient=0,
|
||||
linewidth=1,
|
||||
decimals=null,
|
||||
decimalsY1=null,
|
||||
|
@ -166,6 +169,7 @@
|
|||
},
|
||||
lines: lines,
|
||||
fill: fill,
|
||||
fillGradient: fillGradient,
|
||||
linewidth: linewidth,
|
||||
dashes: dashes,
|
||||
dashLength: 10,
|
||||
|
@ -283,5 +287,6 @@
|
|||
addLink(link):: self {
|
||||
links+: [link],
|
||||
},
|
||||
addLinks(links):: std.foldl(function(p, t) p.addLink(t), links, self),
|
||||
},
|
||||
}
|
||||
|
|
|
@ -6,12 +6,15 @@
|
|||
*
|
||||
* @param expr
|
||||
* @param hide (optional) Disable query on graph.
|
||||
* @param legendFormat (optional) Defines the legend. Defaults to ''.
|
||||
*/
|
||||
target(
|
||||
expr,
|
||||
hide=null,
|
||||
legendFormat='',
|
||||
):: {
|
||||
[if hide != null then 'hide']: hide,
|
||||
expr: expr,
|
||||
legendFormat: legendFormat,
|
||||
},
|
||||
}
|
||||
|
|
|
@ -258,7 +258,7 @@
|
|||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
description: 'Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.',
|
||||
description: 'Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. Removing failed job after investigation should clear this alert.',
|
||||
summary: 'Job failed to complete.',
|
||||
},
|
||||
},
|
||||
|
|
|
@ -337,11 +337,11 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
|
|||
legend_avg=true,
|
||||
)
|
||||
.addTarget(prometheus.target(
|
||||
'sort_desc(sum by (container) (rate(windows_container_network_receive_bytes_total{namespace="$namespace", pod="$pod"}[1m])))' % $._config,
|
||||
'sort_desc(sum by (container) (rate(windows_container_network_received_bytes_total{namespace="$namespace", pod="$pod"}[1m])))' % $._config,
|
||||
legendFormat='Received : {{ container }}',
|
||||
))
|
||||
.addTarget(prometheus.target(
|
||||
'sort_desc(sum by (container) (rate(windows_container_network_transmit_bytes_total{namespace="$namespace", pod="$pod"}[1m])))' % $._config,
|
||||
'sort_desc(sum by (container) (rate(windows_container_network_transmitted_bytes_total{namespace="$namespace", pod="$pod"}[1m])))' % $._config,
|
||||
legendFormat='Transmitted : {{ container }}',
|
||||
))
|
||||
)
|
||||
|
|
|
@ -202,13 +202,13 @@
|
|||
||| % $._config,
|
||||
},
|
||||
{
|
||||
record: 'windows_container_network_receive_bytes_total',
|
||||
record: 'windows_container_network_received_bytes_total',
|
||||
expr: |||
|
||||
windows_container_network_receive_bytes_total{%(wmiExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace)
|
||||
||| % $._config,
|
||||
},
|
||||
{
|
||||
record: 'windows_container_network_transmit_bytes_total',
|
||||
record: 'windows_container_network_transmitted_bytes_total',
|
||||
expr: |||
|
||||
windows_container_network_transmit_bytes_total{%(wmiExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace)
|
||||
||| % $._config,
|
||||
|
|
|
@ -58,7 +58,6 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
'daemonsets',
|
||||
'deployments',
|
||||
'replicasets',
|
||||
'ingresses',
|
||||
]) +
|
||||
rulesType.withVerbs(['list', 'watch']),
|
||||
|
||||
|
@ -135,6 +134,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
rulesType.withApiGroups(['networking.k8s.io']) +
|
||||
rulesType.withResources([
|
||||
'networkpolicies',
|
||||
'ingresses',
|
||||
]) +
|
||||
rulesType.withVerbs(['list', 'watch']),
|
||||
|
||||
|
@ -228,6 +228,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
|
||||
roleBinding.new() +
|
||||
roleBinding.mixin.metadata.withName(ksm.name) +
|
||||
roleBinding.mixin.metadata.withNamespace(ksm.namespace) +
|
||||
roleBinding.mixin.metadata.withLabels(ksm.commonLabels) +
|
||||
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
|
||||
roleBinding.mixin.roleRef.withName(ksm.name) +
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
(import 'alertmanager.libsonnet') +
|
||||
(import 'general.libsonnet') +
|
||||
(import 'node.libsonnet') +
|
||||
(import 'prometheus-operator.libsonnet')
|
||||
(import 'node.libsonnet')
|
||||
|
|
|
@ -1,63 +0,0 @@
|
|||
{
|
||||
prometheusAlerts+:: {
|
||||
groups+: [
|
||||
{
|
||||
name: 'prometheus-operator',
|
||||
rules: [
|
||||
{
|
||||
alert: 'PrometheusOperatorListErrors',
|
||||
expr: |||
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{%(prometheusOperatorSelector)s}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{%(prometheusOperatorSelector)s}[10m]))) > 0.4
|
||||
||| % $._config,
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
message: 'Errors while performing List operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.',
|
||||
},
|
||||
'for': '15m',
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusOperatorWatchErrors',
|
||||
expr: |||
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{%(prometheusOperatorSelector)s}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{%(prometheusOperatorSelector)s}[10m]))) > 0.4
|
||||
||| % $._config,
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
message: 'Errors while performing Watch operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.',
|
||||
},
|
||||
'for': '15m',
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusOperatorReconcileErrors',
|
||||
expr: |||
|
||||
rate(prometheus_operator_reconcile_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1
|
||||
||| % $._config,
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
message: 'Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace }} Namespace.',
|
||||
},
|
||||
'for': '10m',
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusOperatorNodeLookupErrors',
|
||||
expr: |||
|
||||
rate(prometheus_operator_node_address_lookup_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1
|
||||
||| % $._config,
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
message: 'Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.',
|
||||
},
|
||||
'for': '10m',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
|
@ -28,6 +28,15 @@
|
|||
},
|
||||
"version": "release-0.42"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/prometheus-operator/prometheus-operator",
|
||||
"subdir": "jsonnet/mixin"
|
||||
}
|
||||
},
|
||||
"version": "master"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local k3 = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.3/k.libsonnet';
|
||||
local configMapList = k3.core.v1.configMapList;
|
||||
local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
|
||||
|
||||
(import 'github.com/brancz/kubernetes-grafana/grafana/grafana.libsonnet') +
|
||||
(import './kube-state-metrics/kube-state-metrics.libsonnet') +
|
||||
|
@ -9,6 +10,7 @@ local configMapList = k3.core.v1.configMapList;
|
|||
(import 'github.com/prometheus/node_exporter/docs/node-mixin/mixin.libsonnet') +
|
||||
(import './alertmanager/alertmanager.libsonnet') +
|
||||
(import 'github.com/prometheus-operator/prometheus-operator/jsonnet/prometheus-operator/prometheus-operator.libsonnet') +
|
||||
(import 'github.com/prometheus-operator/prometheus-operator/jsonnet/mixin/mixin.libsonnet') +
|
||||
(import './prometheus/prometheus.libsonnet') +
|
||||
(import './prometheus-adapter/prometheus-adapter.libsonnet') +
|
||||
(import 'github.com/kubernetes-monitoring/kubernetes-mixin/mixin.libsonnet') +
|
||||
|
@ -60,7 +62,7 @@ local configMapList = k3.core.v1.configMapList;
|
|||
],
|
||||
},
|
||||
} +
|
||||
((import 'kube-prometheus/kube-rbac-proxy/container.libsonnet') {
|
||||
(kubeRbacProxyContainer {
|
||||
config+:: {
|
||||
kubeRbacProxy: {
|
||||
local cfg = self,
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
local kubeRbacProxyContainer = import '../kube-rbac-proxy/container.libsonnet';
|
||||
local ksm = import 'github.com/kubernetes/kube-state-metrics/jsonnet/kube-state-metrics/kube-state-metrics.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
versions+:: {
|
||||
|
@ -11,9 +14,9 @@
|
|||
scrapeTimeout: '30s',
|
||||
},
|
||||
},
|
||||
kubeStateMetrics+:: (import 'github.com/kubernetes/kube-state-metrics/jsonnet/kube-state-metrics/kube-state-metrics.libsonnet') +
|
||||
{
|
||||
local ksm = self,
|
||||
kubeStateMetrics+::
|
||||
ksm + {
|
||||
local version = self.version,
|
||||
name:: 'kube-state-metrics',
|
||||
namespace:: $._config.namespace,
|
||||
version:: $._config.versions.kubeStateMetrics,
|
||||
|
@ -57,7 +60,7 @@
|
|||
namespace: $._config.namespace,
|
||||
labels: {
|
||||
'app.kubernetes.io/name': 'kube-state-metrics',
|
||||
'app.kubernetes.io/version': ksm.version,
|
||||
'app.kubernetes.io/version': version,
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
|
@ -98,7 +101,7 @@
|
|||
},
|
||||
},
|
||||
} +
|
||||
((import 'kube-prometheus/kube-rbac-proxy/container.libsonnet') {
|
||||
(kubeRbacProxyContainer {
|
||||
config+:: {
|
||||
kubeRbacProxy: {
|
||||
local cfg = self,
|
||||
|
@ -112,7 +115,7 @@
|
|||
},
|
||||
},
|
||||
}).deploymentMixin +
|
||||
((import 'kube-prometheus/kube-rbac-proxy/container.libsonnet') {
|
||||
(kubeRbacProxyContainer {
|
||||
config+:: {
|
||||
kubeRbacProxy: {
|
||||
local cfg = self,
|
||||
|
|
|
@ -5,7 +5,7 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
namespace: 'default',
|
||||
|
||||
versions+:: {
|
||||
nodeExporter: 'v0.18.1',
|
||||
nodeExporter: 'v1.0.1',
|
||||
},
|
||||
|
||||
imageRepos+:: {
|
||||
|
@ -79,11 +79,15 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
toleration.withOperator('Exists');
|
||||
local procVolumeName = 'proc';
|
||||
local procVolume = volume.fromHostPath(procVolumeName, '/proc');
|
||||
local procVolumeMount = containerVolumeMount.new(procVolumeName, '/host/proc');
|
||||
local procVolumeMount = containerVolumeMount.new(procVolumeName, '/host/proc').
|
||||
withMountPropagation('HostToContainer').
|
||||
withReadOnly(true);
|
||||
|
||||
local sysVolumeName = 'sys';
|
||||
local sysVolume = volume.fromHostPath(sysVolumeName, '/sys');
|
||||
local sysVolumeMount = containerVolumeMount.new(sysVolumeName, '/host/sys');
|
||||
local sysVolumeMount = containerVolumeMount.new(sysVolumeName, '/host/sys').
|
||||
withMountPropagation('HostToContainer').
|
||||
withReadOnly(true);
|
||||
|
||||
local rootVolumeName = 'root';
|
||||
local rootVolume = volume.fromHostPath(rootVolumeName, '/');
|
||||
|
|
|
@ -312,6 +312,7 @@ local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
|||
path: '/metrics/cadvisor',
|
||||
interval: '30s',
|
||||
honorLabels: true,
|
||||
honorTimestamps: false,
|
||||
tlsConfig: {
|
||||
insecureSkipVerify: true,
|
||||
},
|
||||
|
|
3
monitoring/vendor/github.com/prometheus-operator/prometheus-operator/jsonnet/mixin/alerts.jsonnet
generated
vendored
Normal file
3
monitoring/vendor/github.com/prometheus-operator/prometheus-operator/jsonnet/mixin/alerts.jsonnet
generated
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
(
|
||||
import 'mixin.libsonnet'
|
||||
).prometheusAlerts
|
95
monitoring/vendor/github.com/prometheus-operator/prometheus-operator/jsonnet/mixin/alerts/alerts.libsonnet
generated
vendored
Normal file
95
monitoring/vendor/github.com/prometheus-operator/prometheus-operator/jsonnet/mixin/alerts/alerts.libsonnet
generated
vendored
Normal file
|
@ -0,0 +1,95 @@
|
|||
{
|
||||
prometheusAlerts+:: {
|
||||
groups+: [
|
||||
{
|
||||
name: 'prometheus-operator',
|
||||
rules: [
|
||||
{
|
||||
alert: 'PrometheusOperatorListErrors',
|
||||
expr: |||
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{%(prometheusOperatorSelector)s}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{%(prometheusOperatorSelector)s}[10m]))) > 0.4
|
||||
||| % $._config,
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
description: 'Errors while performing List operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.',
|
||||
summary: 'Errors while performing list operations in controller.',
|
||||
},
|
||||
'for': '15m',
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusOperatorWatchErrors',
|
||||
expr: |||
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{%(prometheusOperatorSelector)s}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{%(prometheusOperatorSelector)s}[10m]))) > 0.4
|
||||
||| % $._config,
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
description: 'Errors while performing watch operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.',
|
||||
summary: 'Errors while performing watch operations in controller.',
|
||||
},
|
||||
'for': '15m',
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusOperatorSyncFailed',
|
||||
expr: |||
|
||||
min_over_time(prometheus_operator_syncs{status="failed",%(prometheusOperatorSelector)s}[5m]) > 0
|
||||
||| % $._config,
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
description: 'Controller {{ $labels.controller }} in {{ $labels.namespace }} namespace fails to reconcile {{ $value }} objects.',
|
||||
summary: 'Last controller reconciliation failed',
|
||||
},
|
||||
'for': '10m',
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusOperatorReconcileErrors',
|
||||
expr: |||
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{%(prometheusOperatorSelector)s}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{%(prometheusOperatorSelector)s}[5m]))) > 0.1
|
||||
||| % $._config,
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
description: '{{ $value | humanizePercentage }} of reconciling operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.',
|
||||
summary: 'Errors while reconciling controller.',
|
||||
},
|
||||
'for': '10m',
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusOperatorNodeLookupErrors',
|
||||
expr: |||
|
||||
rate(prometheus_operator_node_address_lookup_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1
|
||||
||| % $._config,
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
description: 'Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.',
|
||||
summary: 'Errors while reconciling Prometheus.',
|
||||
},
|
||||
'for': '10m',
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusOperatorNotReady',
|
||||
expr: |||
|
||||
min by(namespace, controller) (max_over_time(prometheus_operator_ready{%(prometheusOperatorSelector)s}[5m]) == 0)
|
||||
||| % $._config,
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
description: "Prometheus operator in {{ $labels.namespace }} namespace isn't ready to reconcile {{ $labels.controller }} resources.",
|
||||
summary: 'Prometheus operator not ready',
|
||||
},
|
||||
'for': '5m',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
5
monitoring/vendor/github.com/prometheus-operator/prometheus-operator/jsonnet/mixin/config.libsonnet
generated
vendored
Normal file
5
monitoring/vendor/github.com/prometheus-operator/prometheus-operator/jsonnet/mixin/config.libsonnet
generated
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
_config+:: {
|
||||
prometheusOperatorSelector: 'job="prometheus-operator"',
|
||||
},
|
||||
}
|
2
monitoring/vendor/github.com/prometheus-operator/prometheus-operator/jsonnet/mixin/mixin.libsonnet
generated
vendored
Normal file
2
monitoring/vendor/github.com/prometheus-operator/prometheus-operator/jsonnet/mixin/mixin.libsonnet
generated
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
(import 'config.libsonnet') +
|
||||
(import 'alerts/alerts.libsonnet')
|
|
@ -15,7 +15,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
},
|
||||
|
||||
versions+:: {
|
||||
prometheusOperator: 'v0.42.0',
|
||||
prometheusOperator: 'v0.42.1',
|
||||
prometheusConfigReloader: self.prometheusOperator,
|
||||
configmapReloader: 'v0.4.0',
|
||||
},
|
||||
|
|
|
@ -48,7 +48,7 @@
|
|||
alert: 'NodeFilesystemAlmostOutOfSpace',
|
||||
expr: |||
|
||||
(
|
||||
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < 5
|
||||
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < %(fsSpaceAvailableCriticalThreshold)d
|
||||
and
|
||||
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s} == 0
|
||||
)
|
||||
|
@ -58,7 +58,7 @@
|
|||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
summary: 'Filesystem has less than 5% space left.',
|
||||
summary: 'Filesystem has less than %(fsSpaceAvailableCriticalThreshold)d%% space left.' % $._config,
|
||||
description: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.',
|
||||
},
|
||||
},
|
||||
|
@ -66,7 +66,7 @@
|
|||
alert: 'NodeFilesystemAlmostOutOfSpace',
|
||||
expr: |||
|
||||
(
|
||||
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < 3
|
||||
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < %(fsSpaceAvailableWarningThreshold)d
|
||||
and
|
||||
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s} == 0
|
||||
)
|
||||
|
@ -76,7 +76,7 @@
|
|||
severity: '%(nodeCriticalSeverity)s' % $._config,
|
||||
},
|
||||
annotations: {
|
||||
summary: 'Filesystem has less than 3% space left.',
|
||||
summary: 'Filesystem has less than %(fsSpaceAvailableWarningThreshold)d%% space left.' % $._config,
|
||||
description: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.',
|
||||
},
|
||||
},
|
||||
|
@ -238,6 +238,8 @@
|
|||
alert: 'NodeClockNotSynchronising',
|
||||
expr: |||
|
||||
min_over_time(node_timex_sync_status[5m]) == 0
|
||||
and
|
||||
node_timex_maxerror_seconds >= 16
|
||||
||| % $._config,
|
||||
'for': '10m',
|
||||
labels: {
|
||||
|
|
|
@ -47,6 +47,11 @@
|
|||
fsSpaceFillingUpWarningThreshold: 40,
|
||||
fsSpaceFillingUpCriticalThreshold: 20,
|
||||
|
||||
// Available disk space (%) thresholds on which to trigger the
|
||||
// 'NodeFilesystemAlmostOutOfSpace' alerts.
|
||||
fsSpaceAvailableCriticalThreshold: 5,
|
||||
fsSpaceAvailableWarningThreshold: 3,
|
||||
|
||||
grafana_prefix: '',
|
||||
},
|
||||
}
|
||||
|
|
|
@ -75,14 +75,15 @@ local gauge = promgrafonnet.gauge;
|
|||
|
||||
// TODO: It would be nicer to have a gauge that gets a 0-1 range and displays it as a percentage 0%-100%.
|
||||
// This needs to be added upstream in the promgrafonnet library and then changed here.
|
||||
// NOTE: avg() is used to circumvent a label change caused by a node_exporter rollout.
|
||||
local memoryGauge = gauge.new(
|
||||
'Memory Usage',
|
||||
|||
|
||||
100 -
|
||||
(
|
||||
node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance"}
|
||||
avg(node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance"})
|
||||
/
|
||||
node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"}
|
||||
avg(node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"})
|
||||
* 100
|
||||
)
|
||||
||| % $._config,
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
github.com/prometheus-operator/prometheus-operator/jsonnet/mixin
|
Reference in New Issue