update monitoring
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Tobias Brunner 2020-06-21 12:48:26 +02:00
parent e4f4144c26
commit caa6d8b011
32 changed files with 290 additions and 122 deletions

View file

@ -18,7 +18,7 @@
"subdir": "Documentation/etcd-mixin" "subdir": "Documentation/etcd-mixin"
} }
}, },
"version": "09fcf55ca498ade69f4ceb5b3c6afc2930f3621f", "version": "d8c8f903eee10b8391abaef7758c38b2cd393c55",
"sum": "pk7mLpdUrHuJKkj2vhD6LGMU7P+oYYooBXAeZyZa398=" "sum": "pk7mLpdUrHuJKkj2vhD6LGMU7P+oYYooBXAeZyZa398="
}, },
{ {
@ -28,8 +28,8 @@
"subdir": "jsonnet/kube-prometheus" "subdir": "jsonnet/kube-prometheus"
} }
}, },
"version": "7e0c503b137c25b9e1e68d143f6a298fe125d1b7", "version": "6f488250fd70a0bb438e802854d5b74d7c07b67f",
"sum": "cEMmJvhn8dLnLqUVR0ql/XnwY8Jy3HH0YWIQQRaDD0o=" "sum": "2HRrgRAyMLHO7+nQnt979363oiqyUw/mtDpjwVksgcg="
}, },
{ {
"source": { "source": {
@ -38,8 +38,8 @@
"subdir": "jsonnet/prometheus-operator" "subdir": "jsonnet/prometheus-operator"
} }
}, },
"version": "d0a871b710de7b764c05ced98dbd1eb32a681790", "version": "e31c69f9b5c6555e0f4a5c1f39d0f03182dd6b41",
"sum": "cIOKRTNBUOl3a+QsaA/NjClmZAhyVJHlDFReKlXJBAs=" "sum": "WggWVWZ+CBEUThQCztSaRELbtqdXf9s3OFzf06HbYNA="
}, },
{ {
"source": { "source": {
@ -48,8 +48,8 @@
"subdir": "grafonnet" "subdir": "grafonnet"
} }
}, },
"version": "7ab8a79738de007c407b939b35e54e84c213d542", "version": "8fb95bd89990e493a8534205ee636bfcb8db67bd",
"sum": "q2B0w9iyqTD99PJacSpHg9XshQN7kiupxaORQcAlb2E=" "sum": "tDuuSKE9f4Ew2bjBM33Rs6behLEAzkmKkShSt+jpAak="
}, },
{ {
"source": { "source": {
@ -58,7 +58,7 @@
"subdir": "grafana-builder" "subdir": "grafana-builder"
} }
}, },
"version": "b9cc0f3529833096c043084c04bc7b3562a134c4", "version": "881db2241f0c5007c3e831caf34b0c645202b4ab",
"sum": "slxrtftVDiTlQK22ertdfrg4Epnq97gdrLI63ftUfaE=" "sum": "slxrtftVDiTlQK22ertdfrg4Epnq97gdrLI63ftUfaE="
}, },
{ {
@ -79,8 +79,8 @@
"subdir": "" "subdir": ""
} }
}, },
"version": "4626a8d0dd261dbefa91d9d60cf8bc8240bd053f", "version": "b61c5a34051f8f57284a08fe78ad8a45b430252b",
"sum": "70WRBU/sDZ7i7CPufohrmssWb4nJQcUJDD+83HxbUto=" "sum": "7Hx/5eNm7ubLTsdrpk3b2+e/FLR3XOa4HCukmbRUCAY="
}, },
{ {
"source": { "source": {
@ -89,7 +89,7 @@
"subdir": "lib/promgrafonnet" "subdir": "lib/promgrafonnet"
} }
}, },
"version": "4626a8d0dd261dbefa91d9d60cf8bc8240bd053f", "version": "b61c5a34051f8f57284a08fe78ad8a45b430252b",
"sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc=" "sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc="
}, },
{ {
@ -99,7 +99,7 @@
"subdir": "jsonnet/kube-state-metrics" "subdir": "jsonnet/kube-state-metrics"
} }
}, },
"version": "cce1e3309ab2f42953933e441cbb20b54d986551", "version": "d667979ed55ad1c4db44d331b51d646f5b903aa7",
"sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA=" "sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA="
}, },
{ {
@ -109,7 +109,7 @@
"subdir": "jsonnet/kube-state-metrics-mixin" "subdir": "jsonnet/kube-state-metrics-mixin"
} }
}, },
"version": "cce1e3309ab2f42953933e441cbb20b54d986551", "version": "d667979ed55ad1c4db44d331b51d646f5b903aa7",
"sum": "o5avaguRsfFwYFNen00ZEsub1x4i8Z/ZZ2QoEjFMff8=" "sum": "o5avaguRsfFwYFNen00ZEsub1x4i8Z/ZZ2QoEjFMff8="
}, },
{ {
@ -119,7 +119,7 @@
"subdir": "docs/node-mixin" "subdir": "docs/node-mixin"
} }
}, },
"version": "e92c7b5bbf8b2224bbf5c4eb4839eff3c0bcfb83", "version": "08ce3c6dd430deb51798826701a395e460620d60",
"sum": "3jFV2qsc/GZe2GADswTYqxxP2zGOiANTj73W/VNFGqc=" "sum": "3jFV2qsc/GZe2GADswTYqxxP2zGOiANTj73W/VNFGqc="
}, },
{ {
@ -129,8 +129,8 @@
"subdir": "documentation/prometheus-mixin" "subdir": "documentation/prometheus-mixin"
} }
}, },
"version": "58c445e6efdf24ceec0e77da9042c85cb500aa87", "version": "27f89ac651ce89307388bec31d73271f62f04f9a",
"sum": "kRb3XBTe/AALDcaTFfyuiKqzhxtLvihBkVkvJ5cUd/I=", "sum": "lEzhZ8gllSfAO4kmXeTwl4W0anapIeFd5GCaCNuDe18=",
"name": "prometheus" "name": "prometheus"
}, },
{ {

View file

@ -6,7 +6,7 @@ metadata:
namespace: monitoring namespace: monitoring
spec: spec:
encryptedData: encryptedData:
alertmanager.yaml: AgCSZ9T4zOLVP72H8bnLppeG6ETlLMtUpTzhDmzcVZR5hT+WPWEsZD+4u863b6q1+/1YSE8TYdKRAGUtKjt3NqDHShD65A41cIyuz7hIs8ZYJw2nC2lKVXpcaRn5oIzzrAosM5CGkaIXgp/RHnx6kEBD17hYSHjWaFjc/j/b/yHvafvJ+mKJs73wdYdigQzsV7oehxQ22grrY1Vamthdp4JpjTxZ9g0osKqFyrHSh1DbtFXFfabUO+XvsxXqCscg9CUcBRTH1d67Orgy23am7EXhqce97KzCu/+VYI0fdsE5gMUfwvzg1mW1Ac9bFXErLd6mDJyaAAIarTFG5kub4YhVNVhDvVVS5pViybrjnoHvyTlkNVYakzurYfOhuwwNlcYsCu3kI0ulTLQV+Y35CHijjyEs9Wgmpfwc2W1MPr5101QFMBiLC2Dn4SMMvWE1I4b0NhyBT49J+12B5PDSDH8CTPPTFR7YtQ7xOEH3nK4fa0MPNJwB8wjDjbpxYDTcExqQaoa5nzN+3lW4v0hbVAU+Lzj0tXHY8ZxzdBM3veFnccF8BHwEY9k4WIbW8uk/cH1n+ar2JQAaXavXMaUPZaBU6e6a25rob4XLWeZ0g8SrskJhfMBqblrfmjqjX+/RTRaLnQaCd87QypZ6T1oNyDO3c3r4dIuDmMeWikyyRJZ3FgNFAZKPOa1s3IFOG2GQ2t3otraKsDkZ3corgi+sr/DAJG67KbFWT6AWA4SCYkB+HLMMh06B6z93Uktt+c8tDOKoTcb/Twu9Zm+W03iN08uOQf9YGvphG4wrhEVdfUIZDPnToOY1x4mZQ5LUY6QGbsSuCCm+xKFFoMzd+1nvkafBL4Jep5Fv67DvlLAdEMUn0f1ouIQO+9N9KuDwcrjnbsUixPsJkjiteCo6U5kYYjoJrL5/EbPtkIiYJPC52hr9wTy8AivKHydvTtsvTParYtpAb0thlekOJnaUsyp4DaCwrBbLqufoPCQg4CowIzAGGrXhJyUau3CYVm+o09eEjbJx+YfENQE/pZ59UGAH58b0xeZN2RHr7PsOT5Zw6PkkbhYp5/2+SFcZWVPaG2ykFySUZt/+k0BmJRmxzUIG5+VJ9Hhcaa9GEgjsuJGTK/kvL0IDD11ZcaNvIHu7CoaLIG7yI0oNnUkHGJtypB7nqC5ikn9LOydQSMkOM49Vcqlvfex/h0E+DVpWh0EECc31KCv3l8iXn+iq4XVZkoKwSSAnUe62kTUHXWQ/L1ChyjOkSvFg1abO/c+hU1Fm6h7yydqgDxGiGD4qdB7Cj6VnIoeQUJYiLi3D83ak6qqyNR7qjatpeDWUw5Jff8JmUs6Rfv3MyU3XDosAAZpYWzEw7bChChLf0x/jLPMCznrP2Kh19QgbKR3ikrerCLCZemjz/3bLhioeFpivBJWAa0Yz8gUaHhYBQjc7wh4MzjFlcYbpKJYNPVkdzJy5CdSwLMMlYeZLGDn1vkubp+ByrDSKccLYuK5NDk+F+kQLhQiz1o6Dm4nX8nF+MEk/hyEQ9iEkOw7XrcKv9m79TmCthVTukdkeytn87Zd9REDveuC+fsfZJNZPvU9eOlxWuwR/XpeufLgPW11X+5LC5AUPY53WcE3D4Rc7mnULinCQsJiI2jGbXYH0lwqmwqxTO1N5j4cJtPlc2u/vk9khcgQBVqmCkvjFlK+AT86CiesdWoAIiX+26lM/Ug== alertmanager.yaml: AgC3a8LrOvy9kcsxmYnEXNpL3CS8nrZJJ47R0DXTkyaxsapopJUNJ5y29kq39GK2OKjkgtcKOTDpBfV8nZSqgtkHkSGz6po0gqvzuB07hrqxMlPB3gg8re1oUOzDt3cgk9E4oOtrufIvCmFS+L0SEVJYW7JmMWkApl4nMO8zKMvPTxjJpSAJghpBFt2EM9tMe08TF8Q7lUDAryKB8t1cAbOLL2sk8xhsQUssBevyABzSHSO3HBCrxfK9BF0LgAmzdhRttpHp980wStmLCVqCq5fTOqKVSsEn5tKD0Uly5/wctlf7UzBhvIIx/0dRBL1DWqupGhzC/bIxtFbH3w1ODKksyQ2sByTKENkU9fWcub1llq7WU7X0qtKVSanjyfkIldgVvp1ryX9wiNx1XucWPwNHjfOTAK/p/yNLhwleEUQbvT3TVu73b6BmNSGIi7r7g3B1s0jRyqZbmucXzxr8+4d5V+G1mprEMFUYUpFxHxRbz3Lc63Q7ojo0Q51KKioj3ux3SannFLLaVFGPp3T3ojCm7/PE9QFXYvoYsau7mXg+6C6i0nbamFN3GZkaw5d1FScONojy0m4Kwuh9R5rzoq3GtpPCqIU7AtoP4cWwV1eCmzOHx56wP2lAl9GOSlkJ9C54l+F/uJ4UUwVkBIvHzK2wR8ekR290U0JdiMWUbIHEK3w8YCXw5spDbZB/k/g5dBIzUhAIWlg7EvL00ftojS4RFirrb+tRjW1wU0/uILadf7gbHszQWCps9a7EbVFVWFCUN0phXhmgXSguZjCRktFt+UoFk/kXXM3jcJjDav1zyCjweW5LNmH7vpdd2ilVMmGAvPy9KDBSID/5J+wBQ1iPQPRkhhUGrsQe531dg+g+cbuIXtRbnLKSVlOV5WzSioiJ4JSpGoWqpZla/ODBlw2/fHzIBNc8mAiVAxNXfW9BTzBQ8Jv/7iXfj37XatTzxCAfIQNGSlajZSUkP7QZiyzfJUaeE+Oe6euHQiPMDswywrT2hVRTNIlDo51Y0XbNUFVT1erC399pzSiNETBZ7TYHioaF7ah+2uosC/4BssCE66yXME5RYXihb98QACmP5Gc/qdzTEsHc2h9ybgUoKmuyXgcUsF5KfKRC4ocKhw73QSXJdfwObTyrxzYC/3/Ga6kkOtzoHq/WdS8TJgcIgJ+6y/i0WxhrjukHrKHCqsNr1Hy8TKqgZoJSh8ps6phKNiLPnS27tTd+KvhfbC22fsMk4uTausAq8X917yTxTYqpBUsqm8ACFMIZr+7xj8lzZ0gQLLTwMqLc1h9Bank71++V6ROJVJVeil95eEPXiu/xQhEQ8dEd+D/TvT/i9HP286LggTR/c/PzfbbAz0xugehwMzzrioR/TjNhkuG5BFYMBHGtWcuhvaVn6mUpQjtZtWozBUae2e2MI/O1f6gXFyTosxlX5c1VWZpWkG2ASzuoVtAom4oVJLWraWvJ3Gb8iuWbDxsiV194vRw85H5KswyMJSnRSAF2AfDqmzDHlmxDhYbmJKxouBBEi72+Qdi5Xq/loKlLQmYz5zH41MgDqRNCVhdSnS/hF7t+JtrtMb3/G5YfXe/P3PLaM3R7Ct0B9Ph0GQjVDJ9EWuN/FT13rwlLEIOTcUxaAxhdDQ/Dy+vxKcQE8Yv4O5OUP/ZUWbzbt06mNlLJ5uSyc4jXgpf5XyHo8NQlt4ISdAK6Jh7qF8iyLmga7g==
template: template:
metadata: metadata:
creationTimestamp: null creationTimestamp: null

View file

@ -22,6 +22,24 @@ items:
"id": null, "id": null,
"links": [ "links": [
],
"panels": [
{
"content": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.",
"datasource": null,
"description": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.",
"gridPos": {
"h": 2,
"w": 24,
"x": 0,
"y": 0
},
"id": 2,
"mode": "markdown",
"span": 12,
"title": "Notice",
"type": "text"
}
], ],
"refresh": "10s", "refresh": "10s",
"rows": [ "rows": [
@ -52,7 +70,7 @@ items:
"gridPos": { "gridPos": {
}, },
"id": 2, "id": 3,
"interval": null, "interval": null,
"links": [ "links": [
@ -129,7 +147,7 @@ items:
"gridPos": { "gridPos": {
}, },
"id": 3, "id": 4,
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": false,
"avg": false, "avg": false,
@ -247,7 +265,7 @@ items:
"gridPos": { "gridPos": {
}, },
"id": 4, "id": 5,
"interval": null, "interval": null,
"links": [ "links": [
@ -323,7 +341,7 @@ items:
"gridPos": { "gridPos": {
}, },
"id": 5, "id": 6,
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": false,
"avg": false, "avg": false,
@ -431,7 +449,7 @@ items:
"gridPos": { "gridPos": {
}, },
"id": 6, "id": 7,
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": false,
"avg": false, "avg": false,
@ -524,7 +542,7 @@ items:
"gridPos": { "gridPos": {
}, },
"id": 7, "id": 8,
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": false,
"avg": false, "avg": false,
@ -640,7 +658,7 @@ items:
"gridPos": { "gridPos": {
}, },
"id": 8, "id": 9,
"interval": null, "interval": null,
"links": [ "links": [
@ -716,7 +734,7 @@ items:
"gridPos": { "gridPos": {
}, },
"id": 9, "id": 10,
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": false,
"avg": false, "avg": false,
@ -824,7 +842,7 @@ items:
"gridPos": { "gridPos": {
}, },
"id": 10, "id": 11,
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": false,
"avg": false, "avg": false,
@ -917,7 +935,7 @@ items:
"gridPos": { "gridPos": {
}, },
"id": 11, "id": 12,
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": false,
"avg": false, "avg": false,
@ -1022,7 +1040,7 @@ items:
"gridPos": { "gridPos": {
}, },
"id": 12, "id": 13,
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": false,
"avg": false, "avg": false,
@ -1114,7 +1132,7 @@ items:
"gridPos": { "gridPos": {
}, },
"id": 13, "id": 14,
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": false,
"avg": false, "avg": false,
@ -1206,7 +1224,7 @@ items:
"gridPos": { "gridPos": {
}, },
"id": 14, "id": 15,
"legend": { "legend": {
"alignAsTable": true, "alignAsTable": true,
"avg": false, "avg": false,
@ -1311,7 +1329,7 @@ items:
"gridPos": { "gridPos": {
}, },
"id": 15, "id": 16,
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": false,
"avg": false, "avg": false,
@ -1403,7 +1421,7 @@ items:
"gridPos": { "gridPos": {
}, },
"id": 16, "id": 17,
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": false,
"avg": false, "avg": false,
@ -1502,7 +1520,7 @@ items:
"gridPos": { "gridPos": {
}, },
"id": 17, "id": 18,
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": false,
"avg": false, "avg": false,
@ -1614,7 +1632,7 @@ items:
"gridPos": { "gridPos": {
}, },
"id": 18, "id": 19,
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": false,
"avg": false, "avg": false,
@ -1706,7 +1724,7 @@ items:
"gridPos": { "gridPos": {
}, },
"id": 19, "id": 20,
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": false,
"avg": false, "avg": false,
@ -1798,7 +1816,7 @@ items:
"gridPos": { "gridPos": {
}, },
"id": 20, "id": 21,
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": false,
"avg": false, "avg": false,
@ -1995,7 +2013,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "Kubernetes / API server", "title": "Kubernetes / API server",
"uid": "09ec8aa1e996d6ffcd6817bbaff4db1b", "uid": "09ec8aa1e996d6ffcd6817bbaff4db1b",
"version": 0 "version": 0
@ -3820,7 +3838,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "Kubernetes / Networking / Cluster", "title": "Kubernetes / Networking / Cluster",
"uid": "ff635a025bcfea7bc3dd4f508990a3e9", "uid": "ff635a025bcfea7bc3dd4f508990a3e9",
"version": 0 "version": 0
@ -4946,7 +4964,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "Kubernetes / Controller Manager", "title": "Kubernetes / Controller Manager",
"uid": "72e0e05bef5099e5f049b05fdc429ed4", "uid": "72e0e05bef5099e5f049b05fdc429ed4",
"version": 0 "version": 0
@ -4988,6 +5006,7 @@ items:
"fill": 1, "fill": 1,
"format": "percentunit", "format": "percentunit",
"id": 1, "id": 1,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -6341,6 +6360,7 @@ items:
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 11, "id": 11,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -7487,7 +7507,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "Kubernetes / Compute Resources / Cluster", "title": "Kubernetes / Compute Resources / Cluster",
"uid": "efa86fd1d0c121a26444b636a3f509a8", "uid": "efa86fd1d0c121a26444b636a3f509a8",
"version": 0 "version": 0
@ -8757,6 +8777,7 @@ items:
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 9, "id": 9,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -9734,7 +9755,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "Kubernetes / Compute Resources / Namespace (Pods)", "title": "Kubernetes / Compute Resources / Namespace (Pods)",
"uid": "85a562078cdf77779eaa1add43ccec1e", "uid": "85a562078cdf77779eaa1add43ccec1e",
"version": 0 "version": 0
@ -10681,7 +10702,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "Kubernetes / Compute Resources / Node (Pods)", "title": "Kubernetes / Compute Resources / Node (Pods)",
"uid": "200ac8fdbfbb74b39aff88118e4d1c2c", "uid": "200ac8fdbfbb74b39aff88118e4d1c2c",
"version": 0 "version": 0
@ -11706,6 +11727,7 @@ items:
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 6, "id": 6,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -11804,6 +11826,7 @@ items:
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 7, "id": 7,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -11902,6 +11925,7 @@ items:
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 8, "id": 8,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -12000,6 +12024,7 @@ items:
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 9, "id": 9,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -12098,6 +12123,7 @@ items:
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 10, "id": 10,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -12196,6 +12222,7 @@ items:
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 11, "id": 11,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -12416,7 +12443,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "Kubernetes / Compute Resources / Pod", "title": "Kubernetes / Compute Resources / Pod",
"uid": "6581e46e4e5c7ba40a07646395ef7b23", "uid": "6581e46e4e5c7ba40a07646395ef7b23",
"version": 0 "version": 0
@ -13187,6 +13214,7 @@ items:
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 5, "id": 5,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -14414,7 +14442,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "Kubernetes / Compute Resources / Workload", "title": "Kubernetes / Compute Resources / Workload",
"uid": "a164a7f0339f99e89cea5cb47e9be617", "uid": "a164a7f0339f99e89cea5cb47e9be617",
"version": 0 "version": 0
@ -15345,6 +15373,7 @@ items:
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 5, "id": 5,
"interval": "1m",
"legend": { "legend": {
"avg": false, "avg": false,
"current": false, "current": false,
@ -16371,7 +16400,7 @@ items:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod) \ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{workload}}", "legendFormat": "{{workload}}",
@ -16568,7 +16597,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "Kubernetes / Compute Resources / Namespace (Workloads)", "title": "Kubernetes / Compute Resources / Namespace (Workloads)",
"uid": "a87fb0d919ec0ea5f6543124e16c42a5", "uid": "a87fb0d919ec0ea5f6543124e16c42a5",
"version": 0 "version": 0
@ -19067,7 +19096,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "Kubernetes / Kubelet", "title": "Kubernetes / Kubelet",
"uid": "3138fa155d5915769fbded898ac09fd9", "uid": "3138fa155d5915769fbded898ac09fd9",
"version": 0 "version": 0
@ -20480,7 +20509,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "Kubernetes / Networking / Namespace (Pods)", "title": "Kubernetes / Networking / Namespace (Pods)",
"uid": "8b7a8b326d7a6f1f04244066368c67af", "uid": "8b7a8b326d7a6f1f04244066368c67af",
"version": 0 "version": 0
@ -22161,7 +22190,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "Kubernetes / Networking / Namespace (Workload)", "title": "Kubernetes / Networking / Namespace (Workload)",
"uid": "bbb2a765a623ae38130206c7d94a160f", "uid": "bbb2a765a623ae38130206c7d94a160f",
"version": 0 "version": 0
@ -23109,7 +23138,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "USE Method / Cluster", "title": "USE Method / Cluster",
"uid": "3e97d1d02672cdd0861f4c97c64f89b2", "uid": "3e97d1d02672cdd0861f4c97c64f89b2",
"version": 0 "version": 0
@ -24084,7 +24113,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "USE Method / Node", "title": "USE Method / Node",
"uid": "fac67cfbe174d3ef53eb473d73d9212f", "uid": "fac67cfbe174d3ef53eb473d73d9212f",
"version": 0 "version": 0
@ -25062,7 +25091,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "Nodes", "title": "Nodes",
"uid": "fa49a4706d07a042595b664c87fb33ea", "uid": "fa49a4706d07a042595b664c87fb33ea",
"version": 0 "version": 0
@ -25621,7 +25650,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "Kubernetes / Persistent Volumes", "title": "Kubernetes / Persistent Volumes",
"uid": "919b92a8e8041bd567af9edab12c840c", "uid": "919b92a8e8041bd567af9edab12c840c",
"version": 0 "version": 0
@ -26801,7 +26830,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "Kubernetes / Networking / Pod", "title": "Kubernetes / Networking / Pod",
"uid": "7a18067ce943a40ae25454675c19ff5c", "uid": "7a18067ce943a40ae25454675c19ff5c",
"version": 0 "version": 0
@ -30851,7 +30880,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "Kubernetes / Proxy", "title": "Kubernetes / Proxy",
"uid": "632e265de029684c40b21cb76bca4f94", "uid": "632e265de029684c40b21cb76bca4f94",
"version": 0 "version": 0
@ -31901,7 +31930,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "Kubernetes / Scheduler", "title": "Kubernetes / Scheduler",
"uid": "2e6b6a3b4bddf1427b3a55aa1311c656", "uid": "2e6b6a3b4bddf1427b3a55aa1311c656",
"version": 0 "version": 0
@ -32812,7 +32841,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "Kubernetes / StatefulSets", "title": "Kubernetes / StatefulSets",
"uid": "a31c1f46e6f727cb37c0d731a7245005", "uid": "a31c1f46e6f727cb37c0d731a7245005",
"version": 0 "version": 0
@ -34987,7 +35016,7 @@ items:
"30d" "30d"
] ]
}, },
"timezone": "", "timezone": "UTC",
"title": "Kubernetes / Networking / Workload", "title": "Kubernetes / Networking / Workload",
"uid": "728bf77cc1166d2f3133bf25846876cc", "uid": "728bf77cc1166d2f3133bf25846876cc",
"version": 0 "version": 0

View file

@ -23,7 +23,7 @@ spec:
- --config=/etc/adapter/config.yaml - --config=/etc/adapter/config.yaml
- --logtostderr=true - --logtostderr=true
- --metrics-relist-interval=1m - --metrics-relist-interval=1m
- --prometheus-url=http://prometheus-k8s.monitoring.svc:9090/ - --prometheus-url=http://prometheus-k8s.monitoring.svc.cluster.local:9090/
- --secure-port=6443 - --secure-port=6443
image: quay.io/coreos/k8s-prometheus-adapter-amd64:v0.7.0 image: quay.io/coreos/k8s-prometheus-adapter-amd64:v0.7.0
name: prometheus-adapter name: prometheus-adapter

View file

@ -4,7 +4,7 @@ metadata:
labels: labels:
app.kubernetes.io/component: controller app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.39.0 app.kubernetes.io/version: v0.40.0
name: prometheus-operator name: prometheus-operator
namespace: monitoring namespace: monitoring
spec: spec:
@ -19,4 +19,4 @@ spec:
matchLabels: matchLabels:
app.kubernetes.io/component: controller app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.39.0 app.kubernetes.io/version: v0.40.0

View file

@ -1065,16 +1065,22 @@ spec:
has not been rolled out. has not been rolled out.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout
expr: | expr: |
max without (revision) (
kube_statefulset_status_current_revision{job="kube-state-metrics"}
unless
kube_statefulset_status_update_revision{job="kube-state-metrics"}
)
*
( (
kube_statefulset_replicas{job="kube-state-metrics"} max without (revision) (
!= kube_statefulset_status_current_revision{job="kube-state-metrics"}
kube_statefulset_status_replicas_updated{job="kube-state-metrics"} unless
kube_statefulset_status_update_revision{job="kube-state-metrics"}
)
*
(
kube_statefulset_replicas{job="kube-state-metrics"}
!=
kube_statefulset_status_replicas_updated{job="kube-state-metrics"}
)
) and (
changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics"}[5m])
==
0
) )
for: 15m for: 15m
labels: labels:
@ -1391,6 +1397,10 @@ spec:
{{ $labels.verb }} {{ $labels.resource }}. {{ $labels.verb }} {{ $labels.resource }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
expr: | expr: |
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"}
>
1
and on (verb,resource)
( (
cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"}
> >
@ -1402,10 +1412,6 @@ spec:
) )
) > on (verb) group_left() ) > on (verb) group_left()
1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0) 1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
and on (verb,resource)
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"}
>
1
for: 5m for: 5m
labels: labels:
severity: warning severity: warning
@ -1770,10 +1776,13 @@ spec:
rules: rules:
- alert: AlertmanagerConfigInconsistent - alert: AlertmanagerConfigInconsistent
annotations: annotations:
message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` message: |
are out of sync. The configuration of the instances of the Alertmanager cluster `{{ $labels.namespace }}/{{ $labels.service }}` are out of sync.
{{ range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query }}
Configuration hash for pod {{ .Labels.pod }} is "{{ printf "%.f" .Value }}"
{{ end }}
expr: | expr: |
count_values("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{job="prometheus-operator",namespace="monitoring",controller="alertmanager"}) by (name, job, namespace, controller), "service", "alertmanager-$1", "name", "(.*)") != 1 count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"})) != 1
for: 5m for: 5m
labels: labels:
severity: critical severity: critical

View file

@ -2177,6 +2177,15 @@ spec:
of origin for each alert and metric that is user created. The label of origin for each alert and metric that is user created. The label
value will always be the namespace of the object that is being created. value will always be the namespace of the object that is being created.
type: string type: string
enforcedSampleLimit:
description: EnforcedSampleLimit defines global limit on number of
scraped samples that will be accepted. This overrides any SampleLimit
set per ServiceMonitor or/and PodMonitor. It is meant to be used
by admins to enforce the SampleLimit to keep overall number of samples/series
under the desired limit. Note that if SampleLimit is lower that
value will be taken instead.
format: int64
type: integer
evaluationInterval: evaluationInterval:
description: Interval between consecutive evaluations. description: Interval between consecutive evaluations.
type: string type: string
@ -3428,6 +3437,27 @@ spec:
instance name. Defaults to the value of `prometheus`. External label instance name. Defaults to the value of `prometheus`. External label
will _not_ be added when value is set to empty string (`""`). will _not_ be added when value is set to empty string (`""`).
type: string type: string
prometheusRulesExcludedFromEnforce:
description: PrometheusRulesExcludedFromEnforce - list of prometheus
rules to be excluded from enforcing of adding namespace labels.
Works only if enforcedNamespaceLabel set to true. Make sure both
ruleNamespace and ruleName are set for each pair
items:
description: PrometheusRuleExcludeConfig enables users to configure
excluded PrometheusRule names and their namespaces to be ignored
while enforcing namespace label for alerts and metrics.
properties:
ruleName:
description: RuleNamespace - name of excluded rule
type: string
ruleNamespace:
description: RuleNamespace - namespace of excluded rule
type: string
required:
- ruleName
- ruleNamespace
type: object
type: array
query: query:
description: QuerySpec defines the query command line flags when starting description: QuerySpec defines the query command line flags when starting
Prometheus. Prometheus.
@ -4114,6 +4144,10 @@ spec:
scrapeInterval: scrapeInterval:
description: Interval between consecutive scrapes. description: Interval between consecutive scrapes.
type: string type: string
scrapeTimeout:
description: Number of seconds to wait for target to respond before
erroring.
type: string
secrets: secrets:
description: Secrets is a list of Secrets in the same namespace as description: Secrets is a list of Secrets in the same namespace as
the Prometheus object, which shall be mounted into the Prometheus the Prometheus object, which shall be mounted into the Prometheus
@ -4762,6 +4796,12 @@ spec:
logLevel: logLevel:
description: LogLevel for Thanos sidecar to be configured with. description: LogLevel for Thanos sidecar to be configured with.
type: string type: string
minTime:
description: MinTime for Thanos sidecar to be configured with.
Option can be a constant time in RFC3339 format or time duration
relative to current time, such as -1d or 2h45m. Valid duration
units are ms, s, m, h, d, w, y.
type: string
objectStorageConfig: objectStorageConfig:
description: ObjectStorageConfig configures object storage in description: ObjectStorageConfig configures object storage in
Thanos. Thanos.

View file

@ -2998,6 +2998,27 @@ spec:
priorityClassName: priorityClassName:
description: Priority class assigned to the Pods description: Priority class assigned to the Pods
type: string type: string
prometheusRulesExcludedFromEnforce:
description: PrometheusRulesExcludedFromEnforce - list of Prometheus
rules to be excluded from enforcing of adding namespace labels.
Works only if enforcedNamespaceLabel set to true. Make sure both
ruleNamespace and ruleName are set for each pair
items:
description: PrometheusRuleExcludeConfig enables users to configure
excluded PrometheusRule names and their namespaces to be ignored
while enforcing namespace label for alerts and metrics.
properties:
ruleName:
description: RuleNamespace - name of excluded rule
type: string
ruleNamespace:
description: RuleNamespace - namespace of excluded rule
type: string
required:
- ruleName
- ruleNamespace
type: object
type: array
queryConfig: queryConfig:
description: Define configuration for connecting to thanos query instances. description: Define configuration for connecting to thanos query instances.
If this is defined, the QueryEndpoints field will be ignored. Maps If this is defined, the QueryEndpoints field will be ignored. Maps

View file

@ -4,7 +4,7 @@ metadata:
labels: labels:
app.kubernetes.io/component: controller app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.39.0 app.kubernetes.io/version: v0.40.0
name: prometheus-operator name: prometheus-operator
rules: rules:
- apiGroups: - apiGroups:

View file

@ -4,7 +4,7 @@ metadata:
labels: labels:
app.kubernetes.io/component: controller app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.39.0 app.kubernetes.io/version: v0.40.0
name: prometheus-operator name: prometheus-operator
roleRef: roleRef:
apiGroup: rbac.authorization.k8s.io apiGroup: rbac.authorization.k8s.io

View file

@ -4,7 +4,7 @@ metadata:
labels: labels:
app.kubernetes.io/component: controller app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.39.0 app.kubernetes.io/version: v0.40.0
name: prometheus-operator name: prometheus-operator
namespace: monitoring namespace: monitoring
spec: spec:
@ -18,15 +18,15 @@ spec:
labels: labels:
app.kubernetes.io/component: controller app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.39.0 app.kubernetes.io/version: v0.40.0
spec: spec:
containers: containers:
- args: - args:
- --kubelet-service=kube-system/kubelet - --kubelet-service=kube-system/kubelet
- --logtostderr=true - --logtostderr=true
- --config-reloader-image=jimmidyson/configmap-reload:v0.3.0 - --config-reloader-image=jimmidyson/configmap-reload:v0.3.0
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.39.0 - --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.40.0
image: quay.io/coreos/prometheus-operator:v0.39.0 image: quay.io/coreos/prometheus-operator:v0.40.0
name: prometheus-operator name: prometheus-operator
ports: ports:
- containerPort: 8080 - containerPort: 8080

View file

@ -4,7 +4,7 @@ metadata:
labels: labels:
app.kubernetes.io/component: controller app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.39.0 app.kubernetes.io/version: v0.40.0
name: prometheus-operator name: prometheus-operator
namespace: monitoring namespace: monitoring
spec: spec:

View file

@ -4,6 +4,6 @@ metadata:
labels: labels:
app.kubernetes.io/component: controller app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.39.0 app.kubernetes.io/version: v0.40.0
name: prometheus-operator name: prometheus-operator
namespace: monitoring namespace: monitoring

View file

@ -7,10 +7,15 @@
{ {
alert: 'AlertmanagerConfigInconsistent', alert: 'AlertmanagerConfigInconsistent',
annotations: { annotations: {
message: 'The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.', message: |||
The configuration of the instances of the Alertmanager cluster `{{ $labels.namespace }}/{{ $labels.service }}` are out of sync.
{{ range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query }}
Configuration hash for pod {{ .Labels.pod }} is "{{ printf "%.f" .Value }}"
{{ end }}
|||,
}, },
expr: ||| expr: |||
count_values("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{%(prometheusOperatorSelector)s,controller="alertmanager"}) by (name, job, namespace, controller), "service", "alertmanager-$1", "name", "(.*)") != 1 count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s})) != 1
||| % $._config, ||| % $._config,
'for': '5m', 'for': '5m',
labels: { labels: {

View file

@ -26,7 +26,7 @@
"subdir": "jsonnet/prometheus-operator" "subdir": "jsonnet/prometheus-operator"
} }
}, },
"version": "release-0.39" "version": "release-0.40"
}, },
{ {
"source": { "source": {

View file

@ -15,7 +15,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
prometheusAdapter+:: { prometheusAdapter+:: {
name: 'prometheus-adapter', name: 'prometheus-adapter',
labels: { name: $._config.prometheusAdapter.name }, labels: { name: $._config.prometheusAdapter.name },
prometheusURL: 'http://prometheus-' + $._config.prometheus.name + '.' + $._config.namespace + '.svc:9090/', prometheusURL: 'http://prometheus-' + $._config.prometheus.name + '.' + $._config.namespace + '.svc.cluster.local:9090/',
config: { config: {
resourceRules: { resourceRules: {
cpu: { cpu: {

File diff suppressed because one or more lines are too long

View file

@ -15,7 +15,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
}, },
versions+:: { versions+:: {
prometheusOperator: 'v0.39.0', prometheusOperator: 'v0.40.0',
prometheusConfigReloader: self.prometheusOperator, prometheusConfigReloader: self.prometheusOperator,
configmapReloader: 'v0.3.0', configmapReloader: 'v0.3.0',
}, },

File diff suppressed because one or more lines are too long

View file

@ -11,8 +11,8 @@
* @param fill Fill, integer from 0 to 10 * @param fill Fill, integer from 0 to 10
* @param linewidth Line Width, integer from 0 to 10 * @param linewidth Line Width, integer from 0 to 10
* @param decimals Override automatic decimal precision for legend and tooltip. If null, not added to the json output. * @param decimals Override automatic decimal precision for legend and tooltip. If null, not added to the json output.
* @param decimals1Y Override automatic decimal precision for the first Y axis. If null, use decimals parameter. * @param decimalsY1 Override automatic decimal precision for the first Y axis. If null, use decimals parameter.
* @param decimals2Y Override automatic decimal precision for the second Y axis. If null, use decimals parameter. * @param decimalsY2 Override automatic decimal precision for the second Y axis. If null, use decimals parameter.
* @param min_span Min span * @param min_span Min span
* @param format Unit of the Y axes * @param format Unit of the Y axes
* @param formatY1 Unit of the first Y axis * @param formatY1 Unit of the first Y axis

View file

@ -192,6 +192,44 @@ $ jsonnet -J vendor -S -e 'std.manifestYamlDoc((import "mixin.libsonnet").promet
$ jsonnet -J vendor -m files/dashboards -e '(import "mixin.libsonnet").grafanaDashboards' $ jsonnet -J vendor -m files/dashboards -e '(import "mixin.libsonnet").grafanaDashboards'
``` ```
### Customising alert annotations
The steps described bellow extend on the existing mixin library without modifying the original git repository. This is to make consuming updates to your extended alert definitions easier. These definitions can reside outside of this repository and added to your own custom location, where you can define your alert dependencies in your `jsonnetfile.json` and add customisations to the existing definitions.
In your working directory, create a new file `kubernetes_mixin_override.libsonnet` with the following:
```
local utils = import 'lib/utils.libsonnet';
(import 'mixin.libsonnet') +
(
{
prometheusAlerts+::
// The specialAlerts can be in any other config file
local slack = 'observability';
local specialAlerts = {
KubePodCrashLooping: { slack_channel: slack },
KubePodNotReady: { slack_channel: slack },
};
local addExtraAnnotations(rule) = rule {
[if 'alert' in rule then 'annotations']+: {
dashboard: 'https://foo.bar.co',
[if rule.alert in specialAlerts then 'slack_channel']: specialAlerts[rule.alert].slack_channel,
},
};
utils.mapRuleGroups(addExtraAnnotations),
}
)
```
Create new file: `lib/kubernetes_customised_alerts.jsonnet` with the following:
```
std.manifestYamlDoc((import '../kubernetes_mixin_override.libsonnet').prometheusAlerts)
```
Running `jsonnet -S lib/kubernetes_customised_alerts.jsonnet` will build the alerts with your customisations.
Same result can be achieved by modyfying the existing `config.libsonnet` with the content of `kubernetes_mixin_override.libsonnet`.
## Background ## Background
* For more motivation, see * For more motivation, see

View file

@ -120,16 +120,22 @@
}, },
{ {
expr: ||| expr: |||
max without (revision) (
kube_statefulset_status_current_revision{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
unless
kube_statefulset_status_update_revision{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
)
*
( (
kube_statefulset_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s} max without (revision) (
!= kube_statefulset_status_current_revision{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
kube_statefulset_status_replicas_updated{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s} unless
kube_statefulset_status_update_revision{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
)
*
(
kube_statefulset_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
!=
kube_statefulset_status_replicas_updated{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
)
) and (
changes(kube_statefulset_status_replicas_updated{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}[5m])
==
0
) )
||| % $._config, ||| % $._config,
labels: { labels: {

View file

@ -48,6 +48,10 @@ local utils = import 'utils.libsonnet';
{ {
alert: 'KubeAPILatencyHigh', alert: 'KubeAPILatencyHigh',
expr: ||| expr: |||
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{%(kubeApiserverSelector)s,quantile="0.99"}
>
%(kubeAPILatencyWarningSeconds)s
and on (verb,resource)
( (
cluster:apiserver_request_duration_seconds:mean5m{%(kubeApiserverSelector)s} cluster:apiserver_request_duration_seconds:mean5m{%(kubeApiserverSelector)s}
> >
@ -59,10 +63,6 @@ local utils = import 'utils.libsonnet';
) )
) > on (verb) group_left() ) > on (verb) group_left()
1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{%(kubeApiserverSelector)s} >= 0) 1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{%(kubeApiserverSelector)s} >= 0)
and on (verb,resource)
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{%(kubeApiserverSelector)s,quantile="0.99"}
>
%(kubeAPILatencyWarningSeconds)s
||| % $._config, ||| % $._config,
'for': '5m', 'for': '5m',
labels: { labels: {

View file

@ -69,6 +69,7 @@
// The default refresh time for all dashboards, default to 10s // The default refresh time for all dashboards, default to 10s
refresh: '10s', refresh: '10s',
minimumTimeInterval: '1m',
}, },
// Opt-in to multiCluster dashboards by overriding this and the clusterLabel. // Opt-in to multiCluster dashboards by overriding this and the clusterLabel.

View file

@ -272,6 +272,20 @@ local singlestat = grafana.singlestat;
sort=1, sort=1,
) )
) )
.addPanel(
grafana.text.new(
title='Notice',
content='The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.',
description='The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.',
span=12,
),
gridPos={
"h": 2,
"w": 24,
"x": 0,
"y": 0
},
)
.addRow( .addRow(
row.new() row.new()
.addPanel(availability1d) .addPanel(availability1d)

View file

@ -6,7 +6,7 @@
grafanaDashboards:: { grafanaDashboards:: {
[filename]: grafanaDashboards[filename] { [filename]: grafanaDashboards[filename] {
uid: std.md5(filename), uid: std.md5(filename),
timezone: '', timezone: 'UTC',
// Modify tooltip to only show a single value // Modify tooltip to only show a single value
rows: [ rows: [

View file

@ -96,7 +96,8 @@ local template = grafana.template;
}) })
.addPanel( .addPanel(
g.panel('CPU Utilisation') + g.panel('CPU Utilisation') +
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle", %(clusterLabel)s="$cluster"}[$__interval]))' % $._config) g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle", %(clusterLabel)s="$cluster"}[$__interval]))' % $._config) +
{ interval: $._config.grafanaK8s.minimumTimeInterval },
) )
.addPanel( .addPanel(
g.panel('CPU Requests Commitment') + g.panel('CPU Requests Commitment') +
@ -183,7 +184,8 @@ local template = grafana.template;
g.tablePanel( g.tablePanel(
networkColumns, networkColumns,
networkTableStyles networkTableStyles
), ) +
{ interval: $._config.grafanaK8s.minimumTimeInterval },
) )
) )
.addRow( .addRow(

View file

@ -236,7 +236,8 @@ local template = grafana.template;
g.tablePanel( g.tablePanel(
networkColumns, networkColumns,
networkTableStyles networkTableStyles
), ) +
{ interval: $._config.grafanaK8s.minimumTimeInterval },
) )
) )
.addRow( .addRow(

View file

@ -215,7 +215,7 @@ local template = grafana.template;
g.panel('Receive Bandwidth') + g.panel('Receive Bandwidth') +
g.queryPanel('sum(irate(container_network_receive_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') + g.queryPanel('sum(irate(container_network_receive_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps') }, { yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
) )
) )
.addRow( .addRow(
@ -224,7 +224,7 @@ local template = grafana.template;
g.panel('Transmit Bandwidth') + g.panel('Transmit Bandwidth') +
g.queryPanel('sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') + g.queryPanel('sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps') }, { yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
) )
) )
.addRow( .addRow(
@ -233,7 +233,7 @@ local template = grafana.template;
g.panel('Rate of Received Packets') + g.panel('Rate of Received Packets') +
g.queryPanel('sum(irate(container_network_receive_packets_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') + g.queryPanel('sum(irate(container_network_receive_packets_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps') }, { yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
) )
) )
.addRow( .addRow(
@ -242,7 +242,7 @@ local template = grafana.template;
g.panel('Rate of Transmitted Packets') + g.panel('Rate of Transmitted Packets') +
g.queryPanel('sum(irate(container_network_transmit_packets_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') + g.queryPanel('sum(irate(container_network_transmit_packets_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps') }, { yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
) )
) )
.addRow( .addRow(
@ -251,7 +251,7 @@ local template = grafana.template;
g.panel('Rate of Received Packets Dropped') + g.panel('Rate of Received Packets Dropped') +
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') + g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps') }, { yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
) )
) )
.addRow( .addRow(
@ -260,7 +260,7 @@ local template = grafana.template;
g.panel('Rate of Transmitted Packets Dropped') + g.panel('Rate of Transmitted Packets Dropped') +
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') + g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
g.stack + g.stack +
{ yaxes: g.yaxes('Bps') }, { yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
) )
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [clusterTemplate, namespaceTemplate, podTemplate] }, refresh: $._config.grafanaK8s.refresh }, ) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [clusterTemplate, namespaceTemplate, podTemplate] }, refresh: $._config.grafanaK8s.refresh },
}, },

View file

@ -276,7 +276,8 @@ local template = grafana.template;
g.tablePanel( g.tablePanel(
networkColumns, networkColumns,
networkTableStyles networkTableStyles
), ) +
{ interval: $._config.grafanaK8s.minimumTimeInterval },
) )
) )
.addRow( .addRow(
@ -376,7 +377,7 @@ local template = grafana.template;
g.panel('Rate of Transmitted Packets Dropped') + g.panel('Rate of Transmitted Packets Dropped') +
g.queryPanel(||| g.queryPanel(|||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval]) (sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
* on (namespace,pod) * on (namespace,pod)
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload)) group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config, '{{workload}}') + ||| % $._config, '{{workload}}') +
g.stack + g.stack +

View file

@ -218,7 +218,8 @@ local template = grafana.template;
g.tablePanel( g.tablePanel(
networkColumns, networkColumns,
networkTableStyles networkTableStyles
), ) +
{ interval: $._config.grafanaK8s.minimumTimeInterval },
) )
) )
.addRow( .addRow(

View file

@ -10,12 +10,12 @@ To use them, you need to have `jsonnet` (v0.13+) and `jb` installed. If you
have a working Go development environment, it's easiest to run the following: have a working Go development environment, it's easiest to run the following:
```bash ```bash
$ go get github.com/google/go-jsonnet/cmd/jsonnet $ go get github.com/google/go-jsonnet/cmd/jsonnet
$ go get github.com/google/go-jsonnet/cmd/jsonnetfmt
$ go get github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb $ go get github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb
``` ```
_Note: The make targets `lint` and `fmt` need the `jsonnetfmt` binary, which is _Note: The make targets `lint` and `fmt` need the `jsonnetfmt` binary, which is
currently not included in the Go implementation of `jsonnet`. For the time available from [v.0.16.0](https://github.com/google/jsonnet/releases/tag/v0.16.0) in the Go implementation of `jsonnet`. If your jsonnet version is older than 0.16.0 you have to either upgrade or install the [C++ version of
being, you have to install the [C++ version of
jsonnetfmt](https://github.com/google/jsonnet) if you want to use `make lint` jsonnetfmt](https://github.com/google/jsonnet) if you want to use `make lint`
or `make fmt`._ or `make fmt`._