upgrade monitoring stack to latest and greatest
continuous-integration/drone/push Build is passing
Details
continuous-integration/drone/push Build is passing
Details
This commit is contained in:
parent
a96d01ba65
commit
d14fbc6e17
|
@ -1,24 +1,26 @@
|
|||
{
|
||||
"version": 1,
|
||||
"dependencies": [
|
||||
{
|
||||
"name": "kube-prometheus",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/coreos/kube-prometheus",
|
||||
"subdir": "jsonnet/kube-prometheus"
|
||||
}
|
||||
},
|
||||
"version": "master"
|
||||
"version": "master",
|
||||
"name": "kube-prometheus"
|
||||
},
|
||||
{
|
||||
"name": "prometheus-pushgateway",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/tobru/kube-prometheus-pushgateway",
|
||||
"subdir": "prometheus-pushgateway"
|
||||
}
|
||||
},
|
||||
"version": "master"
|
||||
"version": "master",
|
||||
"name": "prometheus-pushgateway"
|
||||
}
|
||||
]
|
||||
],
|
||||
"legacyImports": true
|
||||
}
|
||||
|
|
|
@ -1,18 +1,7 @@
|
|||
{
|
||||
"version": 1,
|
||||
"dependencies": [
|
||||
{
|
||||
"name": "etcd-mixin",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/coreos/etcd",
|
||||
"subdir": "Documentation/etcd-mixin"
|
||||
}
|
||||
},
|
||||
"version": "dd816f0735f8a5ee7a1bbd134002961b2884197c",
|
||||
"sum": "Ko3qhNfC2vN/houLh6C0Ryacjv70gl0DVPGU/PQ4OD0="
|
||||
},
|
||||
{
|
||||
"name": "grafana",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/brancz/kubernetes-grafana",
|
||||
|
@ -23,29 +12,56 @@
|
|||
"sum": "92DWADwGjnCfpZaL7Q07C0GZayxBziGla/O03qWea34="
|
||||
},
|
||||
{
|
||||
"name": "grafana-builder",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/grafana/jsonnet-libs",
|
||||
"subdir": "grafana-builder"
|
||||
"remote": "https://github.com/coreos/etcd",
|
||||
"subdir": "Documentation/etcd-mixin"
|
||||
}
|
||||
},
|
||||
"version": "e347979bbe1e89fa0f48ea61fb486fdbaec42b74",
|
||||
"sum": "slxrtftVDiTlQK22ertdfrg4Epnq97gdrLI63ftUfaE="
|
||||
"version": "4a64198a9f424549c64d6c2eeb92eed8ff3a7bd8",
|
||||
"sum": "pk7mLpdUrHuJKkj2vhD6LGMU7P+oYYooBXAeZyZa398="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/coreos/kube-prometheus",
|
||||
"subdir": "jsonnet/kube-prometheus"
|
||||
}
|
||||
},
|
||||
"version": "d07466766d46710e54a627f913eea5661382331a",
|
||||
"sum": "vFv2xVX1SALKIOlQmcIvQ2pDKXqAbdBJ6zdGIrZUcis="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/coreos/prometheus-operator",
|
||||
"subdir": "jsonnet/prometheus-operator"
|
||||
}
|
||||
},
|
||||
"version": "00cbd4911f931380cf9e19d771d7ebae1ec0a807",
|
||||
"sum": "PfB8G2nfy3e/BrXS1ayymsRRFJvQLWT+oY5aqLS0tE8="
|
||||
},
|
||||
{
|
||||
"name": "grafonnet",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/grafana/grafonnet-lib",
|
||||
"subdir": "grafonnet"
|
||||
}
|
||||
},
|
||||
"version": "906768d46973e022594d3f03d82c5a51d86de2cc",
|
||||
"sum": "J3Vp0EVbxTObr6KydLXsi4Rc0ssNVAEuwLc0NQ+4wqU="
|
||||
"version": "f1ceb29c053445881cf47c8761225d463273f7e2",
|
||||
"sum": "PQ1u4FqaLkqmYF1tE/BNx6Db8roTei0uqo+SKCwRlbo="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/grafana/jsonnet-libs",
|
||||
"subdir": "grafana-builder"
|
||||
}
|
||||
},
|
||||
"version": "d851699bb178b62deabb5532cdb9e48c1a83b1b9",
|
||||
"sum": "slxrtftVDiTlQK22ertdfrg4Epnq97gdrLI63ftUfaE="
|
||||
},
|
||||
{
|
||||
"name": "ksonnet",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/ksonnet/ksonnet-lib",
|
||||
|
@ -53,87 +69,71 @@
|
|||
}
|
||||
},
|
||||
"version": "0d2f82676817bbf9e4acf6495b2090205f323b9f",
|
||||
"sum": "h28BXZ7+vczxYJ2sCt8JuR9+yznRtU/iA6DCpQUrtEg="
|
||||
"sum": "h28BXZ7+vczxYJ2sCt8JuR9+yznRtU/iA6DCpQUrtEg=",
|
||||
"name": "ksonnet"
|
||||
},
|
||||
{
|
||||
"name": "kube-prometheus",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/coreos/kube-prometheus",
|
||||
"subdir": "jsonnet/kube-prometheus"
|
||||
}
|
||||
},
|
||||
"version": "cf7bb8706c840ae5ff80ffbc255ed0d49d3fff33",
|
||||
"sum": "jBsN3YSsYzSmaV2zNU4Um4K+28qUtw9uga7HwF/tqI4="
|
||||
},
|
||||
{
|
||||
"name": "kube-state-metrics",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/kubernetes/kube-state-metrics",
|
||||
"subdir": "jsonnet/kube-state-metrics"
|
||||
}
|
||||
},
|
||||
"version": "e99fbbc6b2b6539f44b4e769e432016eaabee650",
|
||||
"sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA="
|
||||
},
|
||||
{
|
||||
"name": "kube-state-metrics-mixin",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/kubernetes/kube-state-metrics",
|
||||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
||||
}
|
||||
},
|
||||
"version": "e99fbbc6b2b6539f44b4e769e432016eaabee650",
|
||||
"sum": "E1GGavnf9PCWBm4WVrxWnc0FIj72UcbcweqGioWrOdU="
|
||||
},
|
||||
{
|
||||
"name": "kubernetes-mixin",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "544148ee3ea4da1ffa1e97ebb66599a9548fde8d",
|
||||
"sum": "6uEUlF5xrvjk8FBLpGaIyfH+3PNAPnzedbTsbqEaB3U="
|
||||
"version": "90a6d132e3c9558c0690153c9b404b16b4754139",
|
||||
"sum": "RZn1B0sbrPEKenxKIrXGdDN3NGb9OesQ+IA5whAP4ik="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
|
||||
"subdir": "lib/promgrafonnet"
|
||||
}
|
||||
},
|
||||
"version": "90a6d132e3c9558c0690153c9b404b16b4754139",
|
||||
"sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/kubernetes/kube-state-metrics",
|
||||
"subdir": "jsonnet/kube-state-metrics"
|
||||
}
|
||||
},
|
||||
"version": "765afc98be4b2a53b3b12c48df2cf93379932a29",
|
||||
"sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/kubernetes/kube-state-metrics",
|
||||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
||||
}
|
||||
},
|
||||
"version": "765afc98be4b2a53b3b12c48df2cf93379932a29",
|
||||
"sum": "E1GGavnf9PCWBm4WVrxWnc0FIj72UcbcweqGioWrOdU="
|
||||
},
|
||||
{
|
||||
"name": "node-mixin",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/prometheus/node_exporter",
|
||||
"subdir": "docs/node-mixin"
|
||||
}
|
||||
},
|
||||
"version": "da5972b5398cd67a8854e6b1ee6b861bfda5ef83",
|
||||
"sum": "ZwrC0+4o1xD6+oPBu1p+rBXLlf6pMBD9rT8ygyl2aW0="
|
||||
"version": "0c532984b7a6a00d67de07a9794aacfeec2c11d3",
|
||||
"sum": "3jFV2qsc/GZe2GADswTYqxxP2zGOiANTj73W/VNFGqc="
|
||||
},
|
||||
{
|
||||
"name": "prometheus",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/prometheus/prometheus",
|
||||
"subdir": "documentation/prometheus-mixin"
|
||||
}
|
||||
},
|
||||
"version": "fe47c9c86ee01cb089b76002cbc047e0f22b5501",
|
||||
"sum": "5EUgr6Spr1zNR8Y2/NevjvEkGV9WMvKo6nEScNER1Lc="
|
||||
"version": "0e2004f6fbe9eeddcd0c0084e401b5acb61760a4",
|
||||
"sum": "kRb3XBTe/AALDcaTFfyuiKqzhxtLvihBkVkvJ5cUd/I=",
|
||||
"name": "prometheus"
|
||||
},
|
||||
{
|
||||
"name": "prometheus-operator",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/coreos/prometheus-operator",
|
||||
"subdir": "jsonnet/prometheus-operator"
|
||||
}
|
||||
},
|
||||
"version": "378d36df448366414de53a66a64020cd053002b7",
|
||||
"sum": "vegTm8VSDazwYflBQGLkjs3ystWahwUv0fUyuMbpNRg="
|
||||
},
|
||||
{
|
||||
"name": "prometheus-pushgateway",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/tobru/kube-prometheus-pushgateway",
|
||||
|
@ -142,28 +142,7 @@
|
|||
},
|
||||
"version": "7bb93ca3ddf3b83f1fdfe95d9bad415b57d0fe4b",
|
||||
"sum": "6nOJeHJExjYyTSovZvU6xbGjWS88oUfGnF1DAo+Q6tg="
|
||||
},
|
||||
{
|
||||
"name": "promgrafonnet",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
|
||||
"subdir": "lib/promgrafonnet"
|
||||
}
|
||||
},
|
||||
"version": "544148ee3ea4da1ffa1e97ebb66599a9548fde8d",
|
||||
"sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc="
|
||||
},
|
||||
{
|
||||
"name": "slo-libsonnet",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/metalmatze/slo-libsonnet",
|
||||
"subdir": "slo-libsonnet"
|
||||
}
|
||||
},
|
||||
"version": "2615ffbafc121fc0a507b75509764e53c58409e2",
|
||||
"sum": "D9MryEvtOBLzIBC1nU4IvVeJPP+l3P/vpZsC0KG8vnk="
|
||||
}
|
||||
]
|
||||
],
|
||||
"legacyImports": false
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@ metadata:
|
|||
namespace: monitoring
|
||||
spec:
|
||||
encryptedData:
|
||||
alertmanager.yaml: AgBlhYgX4pIgWPP1u5dNb9lPMoS0BR7Ca3M5kUVRKrkeT6Jn+kLZcju4g7A1WlkQ+jR2ptlaq+MkgYs1GS27wrw/iBsOZIbvfLoyFr4pL5i58WzzxPvk1wFO86oCpLJoK3pDh/eKR4e16BR70Q3DOTMRXEQ8SuOMKFonBPALOllAAXPYQFioF9MN+0MBifvnGe6UrfbXsQCdoL9H0G/OiJv/ZJxJcmx8eJRpJEgeyAQsdi0NNzhg2RiiNDC0qVCIblSxOaGhlNNQxiqLaQW5Uh7DXV2u+Rz1u85dm6lzSazjoXozPdPS4vMEMy8ZmFvzW2S7DhcfBJ0RE99DzGofIT67QIFh4edxCBRtlpYaYDoLKNkNRpp9inTHnQEGLga2qrElOUU5O2OwgC8qKl4k54Zxu4NUlGKr1IDsnbUiBNge+uvrsu/RahCYQ13H5eeJgkIuFSYE9h1qNG+s/lrjNQY7JsL9ZYj1ddn/5ZOYjgkCWs6yjTU+20uboU5vLOK2zawrJ6cvO6FHPjEMrnpcVSUlqIc588Ligg1coU5J0+UkLd1OPKY/5YBuMdNUBsmLfqutHbWSCiz9PgcI8wZsvCkPiyeUP3QlgvnLaQfcNUxv/tlU+Yg2nBYceKDPr3i2CN6IGrN5CxcCKMClEdTZ5xgmREN6sj0gIKPm1TpOdL1BjBcokZSXgYGnmr5XxDv9ybyainqcitSewznxHKbtvszJihHU6XcOr0MKo0L+abfwjlWvcHM+cZAT35xf1xtZ18A2JauwJ94+Flhzro+a1UclnwDQJf8Hp5PhMQRzu5q8WCLOF08N9Xq469K/FB4jtxnrYPJb6hQhZ+/LmLbLL9c3p2g1X+YQWeoUDvAZ6ujjPoBvNxPoLIsXO2Tu77se94e3WGxaHtcEqtFLQujH3jgAHLRp/x9jHoAniBLVo69/Xd6rbLZQ4DLhMlkdkipaM2iQZxoWQ0qHsISVseEu9741IZfveq0MY3acTcwPm7B0JXAn8dLmdv1BNrRo0/Xxgysdn3DybFmJ7AlFCi3iKacsIvnjEdZgCD3ogFyySSu/b52IyrhOl6WKLZaLURLHe8mxx8kXbRVVOQQ162Xgtx4viqg15yF15Qt0NmMXzU+hQc/XmVRuRVYPYhFuRAQVCD40ENwt/X3/ae/Mkfc2jzWvN3O+VfbepKfc4D+jVM1mv+E2dS4ZcWPmvvIXiLKJMcscZBr75bO6pt8LaMgz+n+ZPJRJzKa+yESbOYefNuP8DuwImPD8ridLBWR49EZD/P3I/7ZwJjHXH+VpYxMErcKI5R2znnLrnCBgMAV8M78vzbzuJ7Ljb6RkjCSU4NWnIAZmxgldbSLoHwXK57yoW+HDC9twDcvR40XL2NGSlChtd+x9M/mgHuS/NpAalJtF0zlo/hbaCSwE56wUYpjO9yx/rhj1wrYIbdAZ/aJaPcpi8cbhDltFMZT6OrWoCCiKR69wB3/HFSHd+SnLTBb1mdfqPwngekfe2TRQ7kXMfbPiBYJC6H5rQHHojmSGwl4AHL6qJ3InXGSG1FFnnc/roGGffjkpbIdmkqPfwn4Ob60xJwdLEYFEs1TOrOnKhTJfgc723EZr8AdVNhXbUlEiVyNhbyp4hHvzeybqMryEGuhKZIMD+GnfWC3/TSKFJC8wwFSOdmnjD3FhTF3ZEX4ehuhZLtE7sJjftjDXyrjVOF3eXlp9bQ==
|
||||
alertmanager.yaml: AgB6MMnL2RsryYbKhQIyJvqXopB4wtUdotRShqk32jFScXUaJKMxPFF8t1Uhwpt1UPcjdrzOElEdHVX9nYj2LK+KGa23L5IV6ZACfjFLbHsn2PHA/kWDNCS7hLaAahSO2ByfoAnoGQp8OejsYDCSsp4NtkM+ooNgw1mGbO/yVNZZRDzccVLZFwJchsR0+26KEqzL3eHtsG40e7lf71uxBgYhvggukEeDfuuPDkKG4YeFJSQ7clURFhckbe+/MmnP+MHw7K9xkdlANl4Tu+e2+njm32PuZ34PKUrLQJBFSHG3DpGjPJBxBZbV7GP+T+VK5GQHrUZOewjgp5e4fDrqZvOVoASPJqY1C961bLjhwsTC8uzdJVHE78LR29hWnjlDBEbQdNsGwDoAHG0yBiC5N0rWfm+XNLh1+e96IF4OKp0/BVqNJaxrs6jlmTDMQEp4Q8JwVTqNSKfFr2jMMW0uI3ggqPAXnWDsLWLd9pD6gpbqGHELmG7dmPi2FQWBlnLXf4VllFbYzRpLZ12hKILBzWuhY6OkCSXfb/bbLyfxyjO8rWvgKxcm9RpjdwtiU4+wT6KKc1/x9nSvqqgaXq81ecHZr8EbJz6nAE/+0Qm1WhQ5wB89RtbX0K4D3i0e7P7JEB3mrSwdSxUd1JLyVKesWKwDiIiwpA7ljD6cTgmLS/00be1eHreqSJ0SLSODBkIadIH4Kc0tyO3/dbdRTkQC3pTQp5q7gVfJzPEGcmZFG0OUHHDXgigS4VUDalpTnaXXDj0Gjuq0+rf1Gu3ZuVC1QKwZ3W26eUrUz4NLtRDtJNYTgsrtJs+T3T3iQHjoO53vTp1TFDX6zGTaJd7yzis5CZWVIn4K8t1rcR/MexzR/Dk46HBLVJXuL0uN17RvFuKsCIFy0rR0BH8qbmWUwiqEfjDG4csBPgH5xAHWGVJHfPStILSZbDoZ/twms391umr+eVeZrSyjgb/W9MSsikqTJaMRH6gHByqljm5/fdmnF3G+Pe6wT8UTM4E1gSOwfmVaow2sqLCT0wdL/ZVOaJBPIG5MIBS7nSQ9nMsDVCmQp7TUD/XNp0M9hYB96AbJOElONQq+0aPzfMgyZElsAMO6Odkr8FBSitMe8lSz2WJvjF7XWPvOQrj1eaXOwNKawArvBjwMqEX5lnMcjHKFgSAOzeWjcEdnTFpifg6kmQn9hO2HKwNggXO8evT9undrnm8qj6vPcc+oXEgs2RlYk5pHUbf4V+O1KxVSBWJCIC2sXyacHI3Emkb5qkTw+Wv4BXla2JJSbLWn9o1hgOI88xpyZpnFG4A1Y/XVQa2002TFccF78UjxcFSA2c1XL0uQxJFkERNxwinrCPjXqA1i7CnCYH53ZpAGj/TqwAzZSZ1FN3L1s+Ro0ASiN/92ppeHhXGPhhTLhj1i4rr2qQ1XhixDSMQLgctAN0ebe2lnjG07yZodRaZbHHl4GfYfcxLCqemwHg5xVzbS4XP+70/hoWes9UVygK9rkzTUUN9IbbtZpgpGplnT3MymbqbYbrlAU59U/vSpxS6K3RUiTMz5IK6p5u7baMYeVuthUy3LU9r07P+vicwAlEQ6jEWKXQD1Q/Vhc5H0wUgZrTBcgo2w9JwT4QUhO1qzIKiUmMEWn1bDh1GaurRZtMRgjP5sfWGMGtUqMCVmAZvUbYsr8k6+1nn93xmn78QNOabtth8Ayfp6vHYeF4h3xA==
|
||||
template:
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -10,7 +10,6 @@ spec:
|
|||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/version: v0.18.1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
|
|
|
@ -14,4 +14,3 @@ spec:
|
|||
targetPort: https
|
||||
selector:
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/version: v0.18.1
|
||||
|
|
|
@ -25,4 +25,3 @@ spec:
|
|||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/version: v0.18.1
|
||||
|
|
|
@ -1,32 +1,32 @@
|
|||
apiVersion: v1
|
||||
data:
|
||||
config.yaml: |
|
||||
resourceRules:
|
||||
cpu:
|
||||
containerQuery: sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}[5m])) by (<<.GroupBy>>)
|
||||
nodeQuery: sum(1 - irate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)
|
||||
resources:
|
||||
overrides:
|
||||
node:
|
||||
resource: node
|
||||
namespace:
|
||||
resource: namespace
|
||||
pod:
|
||||
resource: pod
|
||||
containerLabel: container
|
||||
memory:
|
||||
containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>)
|
||||
nodeQuery: sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>)
|
||||
resources:
|
||||
overrides:
|
||||
instance:
|
||||
resource: node
|
||||
namespace:
|
||||
resource: namespace
|
||||
pod:
|
||||
resource: pod
|
||||
containerLabel: container
|
||||
window: 5m
|
||||
config.yaml: |-
|
||||
"resourceRules":
|
||||
"cpu":
|
||||
"containerLabel": "container"
|
||||
"containerQuery": "sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!=\"POD\",container!=\"\",pod!=\"\"}[5m])) by (<<.GroupBy>>)"
|
||||
"nodeQuery": "sum(1 - irate(node_cpu_seconds_total{mode=\"idle\"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)"
|
||||
"resources":
|
||||
"overrides":
|
||||
"namespace":
|
||||
"resource": "namespace"
|
||||
"node":
|
||||
"resource": "node"
|
||||
"pod":
|
||||
"resource": "pod"
|
||||
"memory":
|
||||
"containerLabel": "container"
|
||||
"containerQuery": "sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!=\"POD\",container!=\"\",pod!=\"\"}) by (<<.GroupBy>>)"
|
||||
"nodeQuery": "sum(node_memory_MemTotal_bytes{job=\"node-exporter\",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job=\"node-exporter\",<<.LabelMatchers>>}) by (<<.GroupBy>>)"
|
||||
"resources":
|
||||
"overrides":
|
||||
"instance":
|
||||
"resource": "node"
|
||||
"namespace":
|
||||
"resource": "namespace"
|
||||
"pod":
|
||||
"resource": "pod"
|
||||
"window": "5m"
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: adapter-config
|
||||
|
|
|
@ -4,7 +4,7 @@ metadata:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.38.0
|
||||
app.kubernetes.io/version: v0.38.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
spec:
|
||||
|
@ -19,4 +19,4 @@ spec:
|
|||
matchLabels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.38.0
|
||||
app.kubernetes.io/version: v0.38.1
|
||||
|
|
|
@ -65,122 +65,289 @@ spec:
|
|||
rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[1m])
|
||||
)
|
||||
record: instance:node_network_transmit_drop_excluding_lo:rate1m
|
||||
- name: kube-apiserver-error
|
||||
rules:
|
||||
- expr: |
|
||||
sum by (status_class) (
|
||||
label_replace(
|
||||
rate(apiserver_request_total{job="apiserver"}[5m]
|
||||
), "status_class", "${1}xx", "code", "([0-9])..")
|
||||
)
|
||||
labels:
|
||||
job: apiserver
|
||||
record: status_class:apiserver_request_total:rate5m
|
||||
- expr: |
|
||||
sum by (status_class) (
|
||||
label_replace(
|
||||
rate(apiserver_request_total{job="apiserver"}[30m]
|
||||
), "status_class", "${1}xx", "code", "([0-9])..")
|
||||
)
|
||||
labels:
|
||||
job: apiserver
|
||||
record: status_class:apiserver_request_total:rate30m
|
||||
- expr: |
|
||||
sum by (status_class) (
|
||||
label_replace(
|
||||
rate(apiserver_request_total{job="apiserver"}[1h]
|
||||
), "status_class", "${1}xx", "code", "([0-9])..")
|
||||
)
|
||||
labels:
|
||||
job: apiserver
|
||||
record: status_class:apiserver_request_total:rate1h
|
||||
- expr: |
|
||||
sum by (status_class) (
|
||||
label_replace(
|
||||
rate(apiserver_request_total{job="apiserver"}[2h]
|
||||
), "status_class", "${1}xx", "code", "([0-9])..")
|
||||
)
|
||||
labels:
|
||||
job: apiserver
|
||||
record: status_class:apiserver_request_total:rate2h
|
||||
- expr: |
|
||||
sum by (status_class) (
|
||||
label_replace(
|
||||
rate(apiserver_request_total{job="apiserver"}[6h]
|
||||
), "status_class", "${1}xx", "code", "([0-9])..")
|
||||
)
|
||||
labels:
|
||||
job: apiserver
|
||||
record: status_class:apiserver_request_total:rate6h
|
||||
- expr: |
|
||||
sum by (status_class) (
|
||||
label_replace(
|
||||
rate(apiserver_request_total{job="apiserver"}[1d]
|
||||
), "status_class", "${1}xx", "code", "([0-9])..")
|
||||
)
|
||||
labels:
|
||||
job: apiserver
|
||||
record: status_class:apiserver_request_total:rate1d
|
||||
- expr: |
|
||||
sum by (status_class) (
|
||||
label_replace(
|
||||
rate(apiserver_request_total{job="apiserver"}[3d]
|
||||
), "status_class", "${1}xx", "code", "([0-9])..")
|
||||
)
|
||||
labels:
|
||||
job: apiserver
|
||||
record: status_class:apiserver_request_total:rate3d
|
||||
- expr: |
|
||||
sum(status_class:apiserver_request_total:rate5m{job="apiserver",status_class="5xx"})
|
||||
/
|
||||
sum(status_class:apiserver_request_total:rate5m{job="apiserver"})
|
||||
labels:
|
||||
job: apiserver
|
||||
record: status_class_5xx:apiserver_request_total:ratio_rate5m
|
||||
- expr: |
|
||||
sum(status_class:apiserver_request_total:rate30m{job="apiserver",status_class="5xx"})
|
||||
/
|
||||
sum(status_class:apiserver_request_total:rate30m{job="apiserver"})
|
||||
labels:
|
||||
job: apiserver
|
||||
record: status_class_5xx:apiserver_request_total:ratio_rate30m
|
||||
- expr: |
|
||||
sum(status_class:apiserver_request_total:rate1h{job="apiserver",status_class="5xx"})
|
||||
/
|
||||
sum(status_class:apiserver_request_total:rate1h{job="apiserver"})
|
||||
labels:
|
||||
job: apiserver
|
||||
record: status_class_5xx:apiserver_request_total:ratio_rate1h
|
||||
- expr: |
|
||||
sum(status_class:apiserver_request_total:rate2h{job="apiserver",status_class="5xx"})
|
||||
/
|
||||
sum(status_class:apiserver_request_total:rate2h{job="apiserver"})
|
||||
labels:
|
||||
job: apiserver
|
||||
record: status_class_5xx:apiserver_request_total:ratio_rate2h
|
||||
- expr: |
|
||||
sum(status_class:apiserver_request_total:rate6h{job="apiserver",status_class="5xx"})
|
||||
/
|
||||
sum(status_class:apiserver_request_total:rate6h{job="apiserver"})
|
||||
labels:
|
||||
job: apiserver
|
||||
record: status_class_5xx:apiserver_request_total:ratio_rate6h
|
||||
- expr: |
|
||||
sum(status_class:apiserver_request_total:rate1d{job="apiserver",status_class="5xx"})
|
||||
/
|
||||
sum(status_class:apiserver_request_total:rate1d{job="apiserver"})
|
||||
labels:
|
||||
job: apiserver
|
||||
record: status_class_5xx:apiserver_request_total:ratio_rate1d
|
||||
- expr: |
|
||||
sum(status_class:apiserver_request_total:rate3d{job="apiserver",status_class="5xx"})
|
||||
/
|
||||
sum(status_class:apiserver_request_total:rate3d{job="apiserver"})
|
||||
labels:
|
||||
job: apiserver
|
||||
record: status_class_5xx:apiserver_request_total:ratio_rate3d
|
||||
- name: kube-apiserver.rules
|
||||
rules:
|
||||
- expr: |
|
||||
(
|
||||
(
|
||||
# too slow
|
||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d]))
|
||||
-
|
||||
(
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1d])) +
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d])) +
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d]))
|
||||
)
|
||||
)
|
||||
+
|
||||
# errors
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d]))
|
||||
)
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d]))
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate1d
|
||||
- expr: |
|
||||
(
|
||||
(
|
||||
# too slow
|
||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h]))
|
||||
-
|
||||
(
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1h])) +
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h])) +
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h]))
|
||||
)
|
||||
)
|
||||
+
|
||||
# errors
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h]))
|
||||
)
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h]))
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate1h
|
||||
- expr: |
|
||||
(
|
||||
(
|
||||
# too slow
|
||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h]))
|
||||
-
|
||||
(
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[2h])) +
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h])) +
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h]))
|
||||
)
|
||||
)
|
||||
+
|
||||
# errors
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h]))
|
||||
)
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h]))
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate2h
|
||||
- expr: |
|
||||
(
|
||||
(
|
||||
# too slow
|
||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m]))
|
||||
-
|
||||
(
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30m])) +
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m])) +
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m]))
|
||||
)
|
||||
)
|
||||
+
|
||||
# errors
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m]))
|
||||
)
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m]))
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate30m
|
||||
- expr: |
|
||||
(
|
||||
(
|
||||
# too slow
|
||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d]))
|
||||
-
|
||||
(
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[3d])) +
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d])) +
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d]))
|
||||
)
|
||||
)
|
||||
+
|
||||
# errors
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d]))
|
||||
)
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d]))
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate3d
|
||||
- expr: |
|
||||
(
|
||||
(
|
||||
# too slow
|
||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m]))
|
||||
-
|
||||
(
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[5m])) +
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m])) +
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m]))
|
||||
)
|
||||
)
|
||||
+
|
||||
# errors
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m]))
|
||||
)
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate5m
|
||||
- expr: |
|
||||
(
|
||||
(
|
||||
# too slow
|
||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h]))
|
||||
-
|
||||
(
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[6h])) +
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h])) +
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h]))
|
||||
)
|
||||
)
|
||||
+
|
||||
# errors
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h]))
|
||||
)
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h]))
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate6h
|
||||
- expr: |
|
||||
(
|
||||
(
|
||||
# too slow
|
||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
|
||||
-
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1d]))
|
||||
)
|
||||
+
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d]))
|
||||
)
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate1d
|
||||
- expr: |
|
||||
(
|
||||
(
|
||||
# too slow
|
||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
|
||||
-
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1h]))
|
||||
)
|
||||
+
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
|
||||
)
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate1h
|
||||
- expr: |
|
||||
(
|
||||
(
|
||||
# too slow
|
||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
|
||||
-
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[2h]))
|
||||
)
|
||||
+
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h]))
|
||||
)
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate2h
|
||||
- expr: |
|
||||
(
|
||||
(
|
||||
# too slow
|
||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
|
||||
-
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[30m]))
|
||||
)
|
||||
+
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m]))
|
||||
)
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate30m
|
||||
- expr: |
|
||||
(
|
||||
(
|
||||
# too slow
|
||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
|
||||
-
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[3d]))
|
||||
)
|
||||
+
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d]))
|
||||
)
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate3d
|
||||
- expr: |
|
||||
(
|
||||
(
|
||||
# too slow
|
||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
|
||||
-
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[5m]))
|
||||
)
|
||||
+
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m]))
|
||||
)
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate5m
|
||||
- expr: |
|
||||
(
|
||||
(
|
||||
# too slow
|
||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
|
||||
-
|
||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[6h]))
|
||||
)
|
||||
+
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h]))
|
||||
)
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate6h
|
||||
- expr: |
|
||||
sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
|
||||
labels:
|
||||
verb: read
|
||||
record: code_resource:apiserver_request_total:rate5m
|
||||
- expr: |
|
||||
sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
|
||||
labels:
|
||||
verb: write
|
||||
record: code_resource:apiserver_request_total:rate5m
|
||||
- expr: |
|
||||
histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0
|
||||
labels:
|
||||
quantile: "0.99"
|
||||
verb: read
|
||||
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
||||
- expr: |
|
||||
histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0
|
||||
labels:
|
||||
quantile: "0.99"
|
||||
verb: write
|
||||
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
||||
- expr: |
|
||||
sum(rate(apiserver_request_duration_seconds_sum{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)
|
||||
/
|
||||
|
@ -201,6 +368,153 @@ spec:
|
|||
labels:
|
||||
quantile: "0.5"
|
||||
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
||||
- interval: 3m
|
||||
name: kube-apiserver-availability.rules
|
||||
rules:
|
||||
- expr: |
|
||||
1 - (
|
||||
(
|
||||
# write too slow
|
||||
sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
|
||||
-
|
||||
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
|
||||
) +
|
||||
(
|
||||
# read too slow
|
||||
sum(increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d]))
|
||||
-
|
||||
(
|
||||
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) +
|
||||
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) +
|
||||
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
|
||||
)
|
||||
) +
|
||||
# errors
|
||||
sum(code:apiserver_request_total:increase30d{code=~"5.."} or vector(0))
|
||||
)
|
||||
/
|
||||
sum(code:apiserver_request_total:increase30d)
|
||||
labels:
|
||||
verb: all
|
||||
record: apiserver_request:availability30d
|
||||
- expr: |
|
||||
1 - (
|
||||
sum(increase(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30d]))
|
||||
-
|
||||
(
|
||||
# too slow
|
||||
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) +
|
||||
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) +
|
||||
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
|
||||
)
|
||||
+
|
||||
# errors
|
||||
sum(code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0))
|
||||
)
|
||||
/
|
||||
sum(code:apiserver_request_total:increase30d{verb="read"})
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:availability30d
|
||||
- expr: |
|
||||
1 - (
|
||||
(
|
||||
# too slow
|
||||
sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
|
||||
-
|
||||
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
|
||||
)
|
||||
+
|
||||
# errors
|
||||
sum(code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0))
|
||||
)
|
||||
/
|
||||
sum(code:apiserver_request_total:increase30d{verb="write"})
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:availability30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
|
||||
labels:
|
||||
verb: read
|
||||
record: code:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
|
||||
labels:
|
||||
verb: write
|
||||
record: code:apiserver_request_total:increase30d
|
||||
- name: k8s.rules
|
||||
rules:
|
||||
- expr: |
|
||||
|
@ -607,13 +921,22 @@ spec:
|
|||
severity: warning
|
||||
- alert: NodeHighNumberConntrackEntriesUsed
|
||||
annotations:
|
||||
description: '{{ $value | humanizePercentage }} of conntrack entries are used'
|
||||
description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodehighnumberconntrackentriesused
|
||||
summary: Number of conntrack are getting close to the limit
|
||||
summary: Number of conntrack are getting close to the limit.
|
||||
expr: |
|
||||
(node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeTextFileCollectorScrapeError
|
||||
annotations:
|
||||
description: Node Exporter text file collector failed to scrape.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodetextfilecollectorscrapeerror
|
||||
summary: Node Exporter text file collector failed to scrape.
|
||||
expr: |
|
||||
node_textfile_scrape_error{job="node-exporter"} == 1
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeClockSkewDetected
|
||||
annotations:
|
||||
message: Clock on {{ $labels.instance }} is out of sync by more than 300s.
|
||||
|
@ -657,7 +980,7 @@ spec:
|
|||
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[15m]) * 60 * 5 > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
severity: warning
|
||||
- alert: KubePodNotReady
|
||||
annotations:
|
||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
|
||||
|
@ -667,7 +990,7 @@ spec:
|
|||
sum by (namespace, pod) (max by(namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}) * on(namespace, pod) group_left(owner_kind) max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!="Job"})) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
severity: warning
|
||||
- alert: KubeDeploymentGenerationMismatch
|
||||
annotations:
|
||||
message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
|
||||
|
@ -680,7 +1003,7 @@ spec:
|
|||
kube_deployment_metadata_generation{job="kube-state-metrics"}
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
severity: warning
|
||||
- alert: KubeDeploymentReplicasMismatch
|
||||
annotations:
|
||||
message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not
|
||||
|
@ -698,7 +1021,7 @@ spec:
|
|||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetReplicasMismatch
|
||||
annotations:
|
||||
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has
|
||||
|
@ -716,7 +1039,7 @@ spec:
|
|||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetGenerationMismatch
|
||||
annotations:
|
||||
message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
|
||||
|
@ -729,7 +1052,7 @@ spec:
|
|||
kube_statefulset_metadata_generation{job="kube-state-metrics"}
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetUpdateNotRolledOut
|
||||
annotations:
|
||||
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
|
||||
|
@ -749,7 +1072,7 @@ spec:
|
|||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetRolloutStuck
|
||||
annotations:
|
||||
message: Only {{ $value | humanizePercentage }} of the desired Pods of DaemonSet
|
||||
|
@ -761,7 +1084,7 @@ spec:
|
|||
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} < 1.00
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
severity: warning
|
||||
- alert: KubeContainerWaiting
|
||||
annotations:
|
||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}}
|
||||
|
@ -791,7 +1114,7 @@ spec:
|
|||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled
|
||||
expr: |
|
||||
kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
|
||||
for: 10m
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeCronJobRunning
|
||||
|
@ -934,12 +1257,12 @@ spec:
|
|||
severity: warning
|
||||
- name: kubernetes-storage
|
||||
rules:
|
||||
- alert: KubePersistentVolumeUsageCritical
|
||||
- alert: KubePersistentVolumeFillingUp
|
||||
annotations:
|
||||
message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
|
||||
}} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage
|
||||
}} free.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeusagecritical
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
|
||||
expr: |
|
||||
kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}
|
||||
/
|
||||
|
@ -948,12 +1271,12 @@ spec:
|
|||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubePersistentVolumeFullInFourDays
|
||||
- alert: KubePersistentVolumeFillingUp
|
||||
annotations:
|
||||
message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
|
||||
}} in Namespace {{ $labels.namespace }} is expected to fill up within four
|
||||
days. Currently {{ $value | humanizePercentage }} is available.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefullinfourdays
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
|
||||
expr: |
|
||||
(
|
||||
kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}
|
||||
|
@ -964,7 +1287,7 @@ spec:
|
|||
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
severity: warning
|
||||
- alert: KubePersistentVolumeErrors
|
||||
annotations:
|
||||
message: The persistent volume {{ $labels.persistentvolume }} has status {{
|
||||
|
@ -1000,47 +1323,51 @@ spec:
|
|||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- name: kube-apiserver-error-alerts
|
||||
- name: kube-apiserver-slos
|
||||
rules:
|
||||
- alert: ErrorBudgetBurn
|
||||
- alert: KubeAPIErrorBudgetBurn
|
||||
annotations:
|
||||
message: 'High requests error budget burn for job=apiserver (current value:
|
||||
{{ $value }})'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-errorbudgetburn
|
||||
message: The API server is burning too much error budget
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
|
||||
expr: |
|
||||
(
|
||||
status_class_5xx:apiserver_request_total:ratio_rate1h{job="apiserver"} > (14.4*0.010000)
|
||||
and
|
||||
status_class_5xx:apiserver_request_total:ratio_rate5m{job="apiserver"} > (14.4*0.010000)
|
||||
)
|
||||
or
|
||||
(
|
||||
status_class_5xx:apiserver_request_total:ratio_rate6h{job="apiserver"} > (6*0.010000)
|
||||
and
|
||||
status_class_5xx:apiserver_request_total:ratio_rate30m{job="apiserver"} > (6*0.010000)
|
||||
)
|
||||
sum(apiserver_request:burnrate1h) > (14.40 * 0.01000)
|
||||
and
|
||||
sum(apiserver_request:burnrate5m) > (14.40 * 0.01000)
|
||||
for: 2m
|
||||
labels:
|
||||
job: apiserver
|
||||
severity: critical
|
||||
- alert: ErrorBudgetBurn
|
||||
- alert: KubeAPIErrorBudgetBurn
|
||||
annotations:
|
||||
message: 'High requests error budget burn for job=apiserver (current value:
|
||||
{{ $value }})'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-errorbudgetburn
|
||||
message: The API server is burning too much error budget
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
|
||||
expr: |
|
||||
(
|
||||
status_class_5xx:apiserver_request_total:ratio_rate1d{job="apiserver"} > (3*0.010000)
|
||||
and
|
||||
status_class_5xx:apiserver_request_total:ratio_rate2h{job="apiserver"} > (3*0.010000)
|
||||
)
|
||||
or
|
||||
(
|
||||
status_class_5xx:apiserver_request_total:ratio_rate3d{job="apiserver"} > (0.010000)
|
||||
and
|
||||
status_class_5xx:apiserver_request_total:ratio_rate6h{job="apiserver"} > (0.010000)
|
||||
)
|
||||
sum(apiserver_request:burnrate6h) > (6.00 * 0.01000)
|
||||
and
|
||||
sum(apiserver_request:burnrate30m) > (6.00 * 0.01000)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubeAPIErrorBudgetBurn
|
||||
annotations:
|
||||
message: The API server is burning too much error budget
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
|
||||
expr: |
|
||||
sum(apiserver_request:burnrate1d) > (3.00 * 0.01000)
|
||||
and
|
||||
sum(apiserver_request:burnrate2h) > (3.00 * 0.01000)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeAPIErrorBudgetBurn
|
||||
annotations:
|
||||
message: The API server is burning too much error budget
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
|
||||
expr: |
|
||||
sum(apiserver_request:burnrate3d) > (1.00 * 0.01000)
|
||||
and
|
||||
sum(apiserver_request:burnrate6h) > (1.00 * 0.01000)
|
||||
for: 3h
|
||||
labels:
|
||||
job: apiserver
|
||||
severity: warning
|
||||
- name: kubernetes-system-apiserver
|
||||
rules:
|
||||
|
@ -1068,29 +1395,6 @@ spec:
|
|||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeAPILatencyHigh
|
||||
annotations:
|
||||
message: The API server has a 99th percentile latency of {{ $value }} seconds
|
||||
for {{ $labels.verb }} {{ $labels.resource }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
|
||||
expr: |
|
||||
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"} > 4
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubeAPIErrorsHigh
|
||||
annotations:
|
||||
message: API server is returning errors for {{ $value | humanizePercentage
|
||||
}} of requests for {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource
|
||||
}}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
|
||||
expr: |
|
||||
sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m])) by (resource,subresource,verb)
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) > 0.10
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubeAPIErrorsHigh
|
||||
annotations:
|
||||
message: API server is returning errors for {{ $value | humanizePercentage
|
||||
|
@ -1208,7 +1512,7 @@ spec:
|
|||
on node {{ $labels.node }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh
|
||||
expr: |
|
||||
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name > 60
|
||||
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
|
@ -1373,8 +1677,8 @@ spec:
|
|||
- alert: PrometheusRemoteStorageFailures
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send
|
||||
{{ printf "%.1f" $value }}% of the samples to {{ if $labels.queue }}{{ $labels.queue
|
||||
}}{{ else }}{{ $labels.url }}{{ end }}.
|
||||
{{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{
|
||||
$labels.url }}
|
||||
summary: Prometheus fails to send samples to remote storage.
|
||||
expr: |
|
||||
(
|
||||
|
@ -1394,8 +1698,8 @@ spec:
|
|||
- alert: PrometheusRemoteWriteBehind
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
|
||||
is {{ printf "%.1f" $value }}s behind for {{ if $labels.queue }}{{ $labels.queue
|
||||
}}{{ else }}{{ $labels.url }}{{ end }}.
|
||||
is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url
|
||||
}}.
|
||||
summary: Prometheus remote write is behind.
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
|
@ -1412,8 +1716,9 @@ spec:
|
|||
- alert: PrometheusRemoteWriteDesiredShards
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
|
||||
desired shards calculation wants to run {{ $value }} shards, which is more
|
||||
than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}`
|
||||
desired shards calculation wants to run {{ $value }} shards for queue {{
|
||||
$labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{
|
||||
printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}`
|
||||
$labels.instance | query | first | value }}.
|
||||
summary: Prometheus remote write desired shards calculation wants to run more
|
||||
than configured max shards.
|
||||
|
|
|
@ -612,6 +612,11 @@ spec:
|
|||
items:
|
||||
type: string
|
||||
type: array
|
||||
alertQueryUrl:
|
||||
description: The external Query URL the Thanos Ruler will set in the
|
||||
'Source' field of all alerts. Maps to the '--alert.query-url' CLI
|
||||
arg.
|
||||
type: string
|
||||
alertmanagersConfig:
|
||||
description: Define configuration for connecting to alertmanager. Only
|
||||
available with thanos v0.10.0 and higher. Maps to the `alertmanagers.config`
|
||||
|
|
|
@ -4,7 +4,7 @@ metadata:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.38.0
|
||||
app.kubernetes.io/version: v0.38.1
|
||||
name: prometheus-operator
|
||||
rules:
|
||||
- apiGroups:
|
||||
|
|
|
@ -4,7 +4,7 @@ metadata:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.38.0
|
||||
app.kubernetes.io/version: v0.38.1
|
||||
name: prometheus-operator
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
|
|
|
@ -4,7 +4,7 @@ metadata:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.38.0
|
||||
app.kubernetes.io/version: v0.38.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
spec:
|
||||
|
@ -18,15 +18,15 @@ spec:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.38.0
|
||||
app.kubernetes.io/version: v0.38.1
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --kubelet-service=kube-system/kubelet
|
||||
- --logtostderr=true
|
||||
- --config-reloader-image=jimmidyson/configmap-reload:v0.3.0
|
||||
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.38.0
|
||||
image: quay.io/coreos/prometheus-operator:v0.38.0
|
||||
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.38.1
|
||||
image: quay.io/coreos/prometheus-operator:v0.38.1
|
||||
name: prometheus-operator
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
|
|
|
@ -4,7 +4,7 @@ metadata:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.38.0
|
||||
app.kubernetes.io/version: v0.38.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
spec:
|
||||
|
|
|
@ -4,6 +4,6 @@ metadata:
|
|||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.38.0
|
||||
app.kubernetes.io/version: v0.38.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
github.com/coreos/etcd/Documentation/etcd-mixin
|
|
@ -217,7 +217,7 @@
|
|||
|
||||
grafanaDashboards+:: {
|
||||
'etcd.json': {
|
||||
id: 6,
|
||||
uid: std.md5('etcd.json'),
|
||||
title: 'etcd',
|
||||
description: 'etcd sample Grafana dashboard with Prometheus',
|
||||
tags: [],
|
||||
|
@ -516,7 +516,7 @@
|
|||
stack: false,
|
||||
steppedLine: false,
|
||||
targets: [{
|
||||
expr: 'etcd_debugging_mvcc_db_total_size_in_bytes{job="$cluster"}',
|
||||
expr: 'etcd_mvcc_db_total_size_in_bytes{job="$cluster"}',
|
||||
hide: false,
|
||||
interval: '',
|
||||
intervalFactor: 2,
|
89
monitoring/vendor/github.com/coreos/kube-prometheus/jsonnet/kube-prometheus/jsonnetfile.json
generated
vendored
Normal file
89
monitoring/vendor/github.com/coreos/kube-prometheus/jsonnet/kube-prometheus/jsonnetfile.json
generated
vendored
Normal file
|
@ -0,0 +1,89 @@
|
|||
{
|
||||
"version": 1,
|
||||
"dependencies": [
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/brancz/kubernetes-grafana",
|
||||
"subdir": "grafana"
|
||||
}
|
||||
},
|
||||
"version": "master"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/coreos/etcd",
|
||||
"subdir": "Documentation/etcd-mixin"
|
||||
}
|
||||
},
|
||||
"version": "master"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/coreos/prometheus-operator",
|
||||
"subdir": "jsonnet/prometheus-operator"
|
||||
}
|
||||
},
|
||||
"version": "release-0.38"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/ksonnet/ksonnet-lib",
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "master",
|
||||
"name": "ksonnet"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "master"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/kubernetes/kube-state-metrics",
|
||||
"subdir": "jsonnet/kube-state-metrics"
|
||||
}
|
||||
},
|
||||
"version": "master"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/kubernetes/kube-state-metrics",
|
||||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
||||
}
|
||||
},
|
||||
"version": "master"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/prometheus/node_exporter",
|
||||
"subdir": "docs/node-mixin"
|
||||
}
|
||||
},
|
||||
"version": "master"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/prometheus/prometheus",
|
||||
"subdir": "documentation/prometheus-mixin"
|
||||
}
|
||||
},
|
||||
"version": "master",
|
||||
"name": "prometheus"
|
||||
}
|
||||
],
|
||||
"legacyImports": true
|
||||
}
|
179
monitoring/vendor/github.com/coreos/kube-prometheus/jsonnet/kube-prometheus/kube-prometheus-custom-metrics.libsonnet
generated
vendored
Normal file
179
monitoring/vendor/github.com/coreos/kube-prometheus/jsonnet/kube-prometheus/kube-prometheus-custom-metrics.libsonnet
generated
vendored
Normal file
|
@ -0,0 +1,179 @@
|
|||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
// Custom metrics API allows the HPA v2 to scale based on arbirary metrics.
|
||||
// For more details on usage visit https://github.com/DirectXMan12/k8s-prometheus-adapter#quick-links
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
prometheusAdapter+:: {
|
||||
// Rules for custom-metrics
|
||||
config+:: {
|
||||
rules+: [
|
||||
{
|
||||
seriesQuery: '{__name__=~"^container_.*",container!="POD",namespace!="",pod!=""}',
|
||||
seriesFilters: [],
|
||||
resources: {
|
||||
overrides: {
|
||||
namespace: {
|
||||
resource: 'namespace'
|
||||
},
|
||||
pod: {
|
||||
resource: 'pod'
|
||||
}
|
||||
},
|
||||
},
|
||||
name: {
|
||||
matches: '^container_(.*)_seconds_total$',
|
||||
as: ""
|
||||
},
|
||||
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>,container!="POD"}[1m])) by (<<.GroupBy>>)'
|
||||
},
|
||||
{
|
||||
seriesQuery: '{__name__=~"^container_.*",container!="POD",namespace!="",pod!=""}',
|
||||
seriesFilters: [
|
||||
{ isNot: '^container_.*_seconds_total$' },
|
||||
],
|
||||
resources: {
|
||||
overrides: {
|
||||
namespace: {
|
||||
resource: 'namespace'
|
||||
},
|
||||
pod: {
|
||||
resource: 'pod'
|
||||
}
|
||||
},
|
||||
},
|
||||
name: {
|
||||
matches: '^container_(.*)_total$',
|
||||
as: ''
|
||||
},
|
||||
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>,container!="POD"}[1m])) by (<<.GroupBy>>)'
|
||||
},
|
||||
{
|
||||
seriesQuery: '{__name__=~"^container_.*",container!="POD",namespace!="",pod!=""}',
|
||||
seriesFilters: [
|
||||
{ isNot: '^container_.*_total$' },
|
||||
],
|
||||
resources: {
|
||||
overrides: {
|
||||
namespace: {
|
||||
resource: 'namespace'
|
||||
},
|
||||
pod: {
|
||||
resource: 'pod'
|
||||
}
|
||||
},
|
||||
},
|
||||
name: {
|
||||
matches: '^container_(.*)$',
|
||||
as: ''
|
||||
},
|
||||
metricsQuery: 'sum(<<.Series>>{<<.LabelMatchers>>,container!="POD"}) by (<<.GroupBy>>)'
|
||||
},
|
||||
{
|
||||
seriesQuery: '{namespace!="",__name__!~"^container_.*"}',
|
||||
seriesFilters: [
|
||||
{ isNot: '.*_total$' },
|
||||
],
|
||||
resources: {
|
||||
template: '<<.Resource>>'
|
||||
},
|
||||
name: {
|
||||
matches: '',
|
||||
as: ''
|
||||
},
|
||||
metricsQuery: 'sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)'
|
||||
},
|
||||
{
|
||||
seriesQuery: '{namespace!="",__name__!~"^container_.*"}',
|
||||
seriesFilters: [
|
||||
{ isNot: '.*_seconds_total' },
|
||||
],
|
||||
resources: {
|
||||
template: '<<.Resource>>'
|
||||
},
|
||||
name: {
|
||||
matches: '^(.*)_total$',
|
||||
as: ''
|
||||
},
|
||||
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)'
|
||||
},
|
||||
{
|
||||
seriesQuery: '{namespace!="",__name__!~"^container_.*"}',
|
||||
seriesFilters: [],
|
||||
resources: {
|
||||
template: '<<.Resource>>'
|
||||
},
|
||||
name: {
|
||||
matches: '^(.*)_seconds_total$',
|
||||
as: ''
|
||||
},
|
||||
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)'
|
||||
}
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
prometheusAdapter+:: {
|
||||
customMetricsApiService: {
|
||||
apiVersion: 'apiregistration.k8s.io/v1',
|
||||
kind: 'APIService',
|
||||
metadata: {
|
||||
name: 'v1beta1.custom.metrics.k8s.io',
|
||||
},
|
||||
spec: {
|
||||
service: {
|
||||
name: $.prometheusAdapter.service.metadata.name,
|
||||
namespace: $._config.namespace,
|
||||
},
|
||||
group: 'custom.metrics.k8s.io',
|
||||
version: 'v1beta1',
|
||||
insecureSkipTLSVerify: true,
|
||||
groupPriorityMinimum: 100,
|
||||
versionPriority: 100,
|
||||
},
|
||||
},
|
||||
customMetricsClusterRoleServerResources:
|
||||
local clusterRole = k.rbac.v1.clusterRole;
|
||||
local policyRule = clusterRole.rulesType;
|
||||
|
||||
local rules =
|
||||
policyRule.new() +
|
||||
policyRule.withApiGroups(['custom.metrics.k8s.io']) +
|
||||
policyRule.withResources(['*']) +
|
||||
policyRule.withVerbs(['*']);
|
||||
|
||||
clusterRole.new() +
|
||||
clusterRole.mixin.metadata.withName('custom-metrics-server-resources') +
|
||||
clusterRole.withRules(rules),
|
||||
|
||||
customMetricsClusterRoleBindingServerResources:
|
||||
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
|
||||
|
||||
clusterRoleBinding.new() +
|
||||
clusterRoleBinding.mixin.metadata.withName('custom-metrics-server-resources') +
|
||||
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
|
||||
clusterRoleBinding.mixin.roleRef.withName('custom-metrics-server-resources') +
|
||||
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
|
||||
clusterRoleBinding.withSubjects([{
|
||||
kind: 'ServiceAccount',
|
||||
name: $.prometheusAdapter.serviceAccount.metadata.name,
|
||||
namespace: $._config.namespace,
|
||||
}]),
|
||||
|
||||
customMetricsClusterRoleBindingHPA:
|
||||
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
|
||||
|
||||
clusterRoleBinding.new() +
|
||||
clusterRoleBinding.mixin.metadata.withName('hpa-controller-custom-metrics') +
|
||||
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
|
||||
clusterRoleBinding.mixin.roleRef.withName('custom-metrics-server-resources') +
|
||||
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
|
||||
clusterRoleBinding.withSubjects([{
|
||||
kind: 'ServiceAccount',
|
||||
name: 'horizontal-pod-autoscaler',
|
||||
namespace: 'kube-system',
|
||||
}]),
|
||||
}
|
||||
}
|
|
@ -3,6 +3,12 @@ local service = k.core.v1.service;
|
|||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
eks: {
|
||||
minimumAvailableIPs: 10,
|
||||
minimumAvailableIPsTime: '10m'
|
||||
}
|
||||
},
|
||||
prometheus+: {
|
||||
serviceMonitorCoreDNS+: {
|
||||
spec+: {
|
||||
|
@ -59,14 +65,14 @@ local servicePort = k.core.v1.service.mixin.spec.portsType;
|
|||
name: 'kube-prometheus-eks.rules',
|
||||
rules: [
|
||||
{
|
||||
expr: 'sum by(instance) (awscni_total_ip_addresses) - sum by(instance) (awscni_assigned_ip_addresses) < 10',
|
||||
expr: 'sum by(instance) (awscni_total_ip_addresses) - sum by(instance) (awscni_assigned_ip_addresses) < %s' % $._config.eks.minimumAvailableIPs,
|
||||
labels: {
|
||||
severity: 'critical',
|
||||
},
|
||||
annotations: {
|
||||
message: 'Instance {{ $labels.instance }} has less than 10 IPs available.'
|
||||
},
|
||||
'for': '10m',
|
||||
'for': $._config.eks.minimumAvailableIPsTime,
|
||||
alert: 'EksAvailableIPs'
|
||||
},
|
||||
],
|
|
@ -20,6 +20,11 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
'app.kubernetes.io/name': 'node-exporter',
|
||||
'app.kubernetes.io/version': $._config.versions.nodeExporter,
|
||||
},
|
||||
selectorLabels: {
|
||||
[labelName]: $._config.nodeExporter.labels[labelName]
|
||||
for labelName in std.objectFields($._config.nodeExporter.labels)
|
||||
if !std.setMember(labelName, ['app.kubernetes.io/version'])
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
|
@ -69,6 +74,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
local containerEnv = container.envType;
|
||||
|
||||
local podLabels = $._config.nodeExporter.labels;
|
||||
local selectorLabels = $._config.nodeExporter.selectorLabels;
|
||||
|
||||
local existsToleration = toleration.new() +
|
||||
toleration.withOperator('Exists');
|
||||
|
@ -133,7 +139,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
daemonset.mixin.metadata.withName('node-exporter') +
|
||||
daemonset.mixin.metadata.withNamespace($._config.namespace) +
|
||||
daemonset.mixin.metadata.withLabels(podLabels) +
|
||||
daemonset.mixin.spec.selector.withMatchLabels(podLabels) +
|
||||
daemonset.mixin.spec.selector.withMatchLabels(selectorLabels) +
|
||||
daemonset.mixin.spec.template.metadata.withLabels(podLabels) +
|
||||
daemonset.mixin.spec.template.spec.withTolerations([existsToleration]) +
|
||||
daemonset.mixin.spec.template.spec.withNodeSelector({ 'kubernetes.io/os': 'linux' }) +
|
||||
|
@ -163,7 +169,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
spec: {
|
||||
jobLabel: 'app.kubernetes.io/name',
|
||||
selector: {
|
||||
matchLabels: $._config.nodeExporter.labels,
|
||||
matchLabels: $._config.nodeExporter.selectorLabels,
|
||||
},
|
||||
endpoints: [
|
||||
{
|
||||
|
@ -194,7 +200,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
|
||||
local nodeExporterPort = servicePort.newNamed('https', $._config.nodeExporter.port, 'https');
|
||||
|
||||
service.new('node-exporter', $.nodeExporter.daemonset.spec.selector.matchLabels, nodeExporterPort) +
|
||||
service.new('node-exporter', $._config.nodeExporter.selectorLabels, nodeExporterPort) +
|
||||
service.mixin.metadata.withNamespace($._config.namespace) +
|
||||
service.mixin.metadata.withLabels($._config.nodeExporter.labels) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
|
@ -16,34 +16,47 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
name: 'prometheus-adapter',
|
||||
labels: { name: $._config.prometheusAdapter.name },
|
||||
prometheusURL: 'http://prometheus-' + $._config.prometheus.name + '.' + $._config.namespace + '.svc:9090/',
|
||||
config: |||
|
||||
resourceRules:
|
||||
cpu:
|
||||
containerQuery: sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}[5m])) by (<<.GroupBy>>)
|
||||
nodeQuery: sum(1 - irate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)
|
||||
resources:
|
||||
overrides:
|
||||
node:
|
||||
resource: node
|
||||
namespace:
|
||||
resource: namespace
|
||||
pod:
|
||||
resource: pod
|
||||
containerLabel: container
|
||||
memory:
|
||||
containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>)
|
||||
nodeQuery: sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>)
|
||||
resources:
|
||||
overrides:
|
||||
instance:
|
||||
resource: node
|
||||
namespace:
|
||||
resource: namespace
|
||||
pod:
|
||||
resource: pod
|
||||
containerLabel: container
|
||||
window: 5m
|
||||
|||,
|
||||
config: {
|
||||
resourceRules: {
|
||||
cpu: {
|
||||
containerQuery: 'sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}[5m])) by (<<.GroupBy>>)',
|
||||
nodeQuery: 'sum(1 - irate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)',
|
||||
resources: {
|
||||
overrides: {
|
||||
node: {
|
||||
resource: 'node'
|
||||
},
|
||||
namespace: {
|
||||
resource: 'namespace'
|
||||
},
|
||||
pod: {
|
||||
resource: 'pod'
|
||||
},
|
||||
},
|
||||
},
|
||||
containerLabel: 'container'
|
||||
},
|
||||
memory: {
|
||||
containerQuery: 'sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>)',
|
||||
nodeQuery: 'sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>)',
|
||||
resources: {
|
||||
overrides: {
|
||||
instance: {
|
||||
resource: 'node'
|
||||
},
|
||||
namespace: {
|
||||
resource: 'namespace'
|
||||
},
|
||||
pod: {
|
||||
resource: 'pod'
|
||||
},
|
||||
},
|
||||
},
|
||||
containerLabel: 'container'
|
||||
},
|
||||
window: '5m',
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
|
||||
|
@ -70,8 +83,8 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
|
||||
configMap:
|
||||
local configmap = k.core.v1.configMap;
|
||||
configmap.new('adapter-config', { 'config.yaml': std.manifestYamlDoc($._config.prometheusAdapter.config) }) +
|
||||
|
||||
configmap.new('adapter-config', { 'config.yaml': $._config.prometheusAdapter.config }) +
|
||||
configmap.mixin.metadata.withNamespace($._config.namespace),
|
||||
|
||||
service:
|
|
@ -6,7 +6,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
namespace: 'default',
|
||||
|
||||
versions+:: {
|
||||
prometheus: 'v2.15.2',
|
||||
prometheus: 'v2.17.2',
|
||||
},
|
||||
|
||||
imageRepos+:: {
|
|
@ -15,7 +15,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|||
},
|
||||
|
||||
versions+:: {
|
||||
prometheusOperator: 'v0.38.0',
|
||||
prometheusOperator: 'v0.38.1',
|
||||
prometheusConfigReloader: self.prometheusOperator,
|
||||
configmapReloader: 'v0.3.0',
|
||||
},
|
1
monitoring/vendor/github.com/coreos/prometheus-operator/jsonnet/prometheus-operator/thanosruler-crd.libsonnet
generated
vendored
Normal file
1
monitoring/vendor/github.com/coreos/prometheus-operator/jsonnet/prometheus-operator/thanosruler-crd.libsonnet
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
27
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/alertlist.libsonnet
generated
vendored
Normal file
27
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/alertlist.libsonnet
generated
vendored
Normal file
|
@ -0,0 +1,27 @@
|
|||
{
|
||||
new(
|
||||
title='',
|
||||
span=null,
|
||||
show='current',
|
||||
limit=10,
|
||||
sortOrder=1,
|
||||
stateFilter=[],
|
||||
onlyAlertsOnDashboard=true,
|
||||
transparent=null,
|
||||
description=null,
|
||||
datasource=null,
|
||||
)::
|
||||
{
|
||||
[if transparent != null then 'transparent']: transparent,
|
||||
title: title,
|
||||
[if span != null then 'span']: span,
|
||||
type: 'alertlist',
|
||||
show: show,
|
||||
limit: limit,
|
||||
sortOrder: sortOrder,
|
||||
[if show != 'changes' then 'stateFilter']: stateFilter,
|
||||
onlyAlertsOnDashboard: onlyAlertsOnDashboard,
|
||||
[if description != null then 'description']: description,
|
||||
datasource: datasource,
|
||||
},
|
||||
}
|
|
@ -3,6 +3,8 @@
|
|||
title,
|
||||
datasource=null,
|
||||
calc='mean',
|
||||
time_from=null,
|
||||
span=null,
|
||||
description='',
|
||||
height=null,
|
||||
transparent=null,
|
||||
|
@ -11,6 +13,8 @@
|
|||
[if description != '' then 'description']: description,
|
||||
[if height != null then 'height']: height,
|
||||
[if transparent != null then 'transparent']: transparent,
|
||||
[if time_from != null then 'timeFrom']: time_from,
|
||||
[if span != null then 'span']: span,
|
||||
title: title,
|
||||
type: 'gauge',
|
||||
datasource: datasource,
|
|
@ -1,4 +1,5 @@
|
|||
{
|
||||
alertlist:: import 'alertlist.libsonnet',
|
||||
dashboard:: import 'dashboard.libsonnet',
|
||||
template:: import 'template.libsonnet',
|
||||
text:: import 'text.libsonnet',
|
|
@ -128,7 +128,7 @@
|
|||
mode: x_axis_mode,
|
||||
name: null,
|
||||
values: if x_axis_mode == 'series' then [x_axis_values] else [],
|
||||
buckets: if x_axis_mode == 'histogram' then [x_axis_buckets] else null,
|
||||
buckets: if x_axis_mode == 'histogram' then x_axis_buckets else null,
|
||||
[if x_axis_min != null then 'min']: x_axis_min,
|
||||
[if x_axis_max != null then 'max']: x_axis_max,
|
||||
},
|
|
@ -18,6 +18,7 @@
|
|||
* @param color_mode How to display difference in frequency with color, default 'opacity'
|
||||
* @param dataFormat How to format the data, default is 'timeseries'
|
||||
* @param highlightCards TODO: document
|
||||
* @param hideZeroBuckets Whether or not to hide empty buckets, default is false
|
||||
* @param legend_show Show legend
|
||||
* @param minSpan Minimum span of the panel when repeated on a template variable
|
||||
* @param repeat Variable used to repeat the heatmap panel
|
||||
|
@ -54,6 +55,7 @@
|
|||
color_mode='spectrum',
|
||||
dataFormat='timeseries',
|
||||
highlightCards=true,
|
||||
hideZeroBuckets=false,
|
||||
legend_show=false,
|
||||
minSpan=null,
|
||||
span=null,
|
||||
|
@ -96,6 +98,7 @@
|
|||
},
|
||||
[if dataFormat != null then 'dataFormat']: dataFormat,
|
||||
heatmap: {},
|
||||
hideZeroBuckets: hideZeroBuckets,
|
||||
highlightCards: highlightCards,
|
||||
legend: {
|
||||
show: legend_show,
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue