update monitoring
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Tobias Brunner 2020-06-21 12:48:26 +02:00
parent e4f4144c26
commit caa6d8b011
32 changed files with 290 additions and 122 deletions

View file

@ -18,7 +18,7 @@
"subdir": "Documentation/etcd-mixin"
}
},
"version": "09fcf55ca498ade69f4ceb5b3c6afc2930f3621f",
"version": "d8c8f903eee10b8391abaef7758c38b2cd393c55",
"sum": "pk7mLpdUrHuJKkj2vhD6LGMU7P+oYYooBXAeZyZa398="
},
{
@ -28,8 +28,8 @@
"subdir": "jsonnet/kube-prometheus"
}
},
"version": "7e0c503b137c25b9e1e68d143f6a298fe125d1b7",
"sum": "cEMmJvhn8dLnLqUVR0ql/XnwY8Jy3HH0YWIQQRaDD0o="
"version": "6f488250fd70a0bb438e802854d5b74d7c07b67f",
"sum": "2HRrgRAyMLHO7+nQnt979363oiqyUw/mtDpjwVksgcg="
},
{
"source": {
@ -38,8 +38,8 @@
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "d0a871b710de7b764c05ced98dbd1eb32a681790",
"sum": "cIOKRTNBUOl3a+QsaA/NjClmZAhyVJHlDFReKlXJBAs="
"version": "e31c69f9b5c6555e0f4a5c1f39d0f03182dd6b41",
"sum": "WggWVWZ+CBEUThQCztSaRELbtqdXf9s3OFzf06HbYNA="
},
{
"source": {
@ -48,8 +48,8 @@
"subdir": "grafonnet"
}
},
"version": "7ab8a79738de007c407b939b35e54e84c213d542",
"sum": "q2B0w9iyqTD99PJacSpHg9XshQN7kiupxaORQcAlb2E="
"version": "8fb95bd89990e493a8534205ee636bfcb8db67bd",
"sum": "tDuuSKE9f4Ew2bjBM33Rs6behLEAzkmKkShSt+jpAak="
},
{
"source": {
@ -58,7 +58,7 @@
"subdir": "grafana-builder"
}
},
"version": "b9cc0f3529833096c043084c04bc7b3562a134c4",
"version": "881db2241f0c5007c3e831caf34b0c645202b4ab",
"sum": "slxrtftVDiTlQK22ertdfrg4Epnq97gdrLI63ftUfaE="
},
{
@ -79,8 +79,8 @@
"subdir": ""
}
},
"version": "4626a8d0dd261dbefa91d9d60cf8bc8240bd053f",
"sum": "70WRBU/sDZ7i7CPufohrmssWb4nJQcUJDD+83HxbUto="
"version": "b61c5a34051f8f57284a08fe78ad8a45b430252b",
"sum": "7Hx/5eNm7ubLTsdrpk3b2+e/FLR3XOa4HCukmbRUCAY="
},
{
"source": {
@ -89,7 +89,7 @@
"subdir": "lib/promgrafonnet"
}
},
"version": "4626a8d0dd261dbefa91d9d60cf8bc8240bd053f",
"version": "b61c5a34051f8f57284a08fe78ad8a45b430252b",
"sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc="
},
{
@ -99,7 +99,7 @@
"subdir": "jsonnet/kube-state-metrics"
}
},
"version": "cce1e3309ab2f42953933e441cbb20b54d986551",
"version": "d667979ed55ad1c4db44d331b51d646f5b903aa7",
"sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA="
},
{
@ -109,7 +109,7 @@
"subdir": "jsonnet/kube-state-metrics-mixin"
}
},
"version": "cce1e3309ab2f42953933e441cbb20b54d986551",
"version": "d667979ed55ad1c4db44d331b51d646f5b903aa7",
"sum": "o5avaguRsfFwYFNen00ZEsub1x4i8Z/ZZ2QoEjFMff8="
},
{
@ -119,7 +119,7 @@
"subdir": "docs/node-mixin"
}
},
"version": "e92c7b5bbf8b2224bbf5c4eb4839eff3c0bcfb83",
"version": "08ce3c6dd430deb51798826701a395e460620d60",
"sum": "3jFV2qsc/GZe2GADswTYqxxP2zGOiANTj73W/VNFGqc="
},
{
@ -129,8 +129,8 @@
"subdir": "documentation/prometheus-mixin"
}
},
"version": "58c445e6efdf24ceec0e77da9042c85cb500aa87",
"sum": "kRb3XBTe/AALDcaTFfyuiKqzhxtLvihBkVkvJ5cUd/I=",
"version": "27f89ac651ce89307388bec31d73271f62f04f9a",
"sum": "lEzhZ8gllSfAO4kmXeTwl4W0anapIeFd5GCaCNuDe18=",
"name": "prometheus"
},
{

View file

@ -6,7 +6,7 @@ metadata:
namespace: monitoring
spec:
encryptedData:
alertmanager.yaml: AgCSZ9T4zOLVP72H8bnLppeG6ETlLMtUpTzhDmzcVZR5hT+WPWEsZD+4u863b6q1+/1YSE8TYdKRAGUtKjt3NqDHShD65A41cIyuz7hIs8ZYJw2nC2lKVXpcaRn5oIzzrAosM5CGkaIXgp/RHnx6kEBD17hYSHjWaFjc/j/b/yHvafvJ+mKJs73wdYdigQzsV7oehxQ22grrY1Vamthdp4JpjTxZ9g0osKqFyrHSh1DbtFXFfabUO+XvsxXqCscg9CUcBRTH1d67Orgy23am7EXhqce97KzCu/+VYI0fdsE5gMUfwvzg1mW1Ac9bFXErLd6mDJyaAAIarTFG5kub4YhVNVhDvVVS5pViybrjnoHvyTlkNVYakzurYfOhuwwNlcYsCu3kI0ulTLQV+Y35CHijjyEs9Wgmpfwc2W1MPr5101QFMBiLC2Dn4SMMvWE1I4b0NhyBT49J+12B5PDSDH8CTPPTFR7YtQ7xOEH3nK4fa0MPNJwB8wjDjbpxYDTcExqQaoa5nzN+3lW4v0hbVAU+Lzj0tXHY8ZxzdBM3veFnccF8BHwEY9k4WIbW8uk/cH1n+ar2JQAaXavXMaUPZaBU6e6a25rob4XLWeZ0g8SrskJhfMBqblrfmjqjX+/RTRaLnQaCd87QypZ6T1oNyDO3c3r4dIuDmMeWikyyRJZ3FgNFAZKPOa1s3IFOG2GQ2t3otraKsDkZ3corgi+sr/DAJG67KbFWT6AWA4SCYkB+HLMMh06B6z93Uktt+c8tDOKoTcb/Twu9Zm+W03iN08uOQf9YGvphG4wrhEVdfUIZDPnToOY1x4mZQ5LUY6QGbsSuCCm+xKFFoMzd+1nvkafBL4Jep5Fv67DvlLAdEMUn0f1ouIQO+9N9KuDwcrjnbsUixPsJkjiteCo6U5kYYjoJrL5/EbPtkIiYJPC52hr9wTy8AivKHydvTtsvTParYtpAb0thlekOJnaUsyp4DaCwrBbLqufoPCQg4CowIzAGGrXhJyUau3CYVm+o09eEjbJx+YfENQE/pZ59UGAH58b0xeZN2RHr7PsOT5Zw6PkkbhYp5/2+SFcZWVPaG2ykFySUZt/+k0BmJRmxzUIG5+VJ9Hhcaa9GEgjsuJGTK/kvL0IDD11ZcaNvIHu7CoaLIG7yI0oNnUkHGJtypB7nqC5ikn9LOydQSMkOM49Vcqlvfex/h0E+DVpWh0EECc31KCv3l8iXn+iq4XVZkoKwSSAnUe62kTUHXWQ/L1ChyjOkSvFg1abO/c+hU1Fm6h7yydqgDxGiGD4qdB7Cj6VnIoeQUJYiLi3D83ak6qqyNR7qjatpeDWUw5Jff8JmUs6Rfv3MyU3XDosAAZpYWzEw7bChChLf0x/jLPMCznrP2Kh19QgbKR3ikrerCLCZemjz/3bLhioeFpivBJWAa0Yz8gUaHhYBQjc7wh4MzjFlcYbpKJYNPVkdzJy5CdSwLMMlYeZLGDn1vkubp+ByrDSKccLYuK5NDk+F+kQLhQiz1o6Dm4nX8nF+MEk/hyEQ9iEkOw7XrcKv9m79TmCthVTukdkeytn87Zd9REDveuC+fsfZJNZPvU9eOlxWuwR/XpeufLgPW11X+5LC5AUPY53WcE3D4Rc7mnULinCQsJiI2jGbXYH0lwqmwqxTO1N5j4cJtPlc2u/vk9khcgQBVqmCkvjFlK+AT86CiesdWoAIiX+26lM/Ug==
alertmanager.yaml: AgC3a8LrOvy9kcsxmYnEXNpL3CS8nrZJJ47R0DXTkyaxsapopJUNJ5y29kq39GK2OKjkgtcKOTDpBfV8nZSqgtkHkSGz6po0gqvzuB07hrqxMlPB3gg8re1oUOzDt3cgk9E4oOtrufIvCmFS+L0SEVJYW7JmMWkApl4nMO8zKMvPTxjJpSAJghpBFt2EM9tMe08TF8Q7lUDAryKB8t1cAbOLL2sk8xhsQUssBevyABzSHSO3HBCrxfK9BF0LgAmzdhRttpHp980wStmLCVqCq5fTOqKVSsEn5tKD0Uly5/wctlf7UzBhvIIx/0dRBL1DWqupGhzC/bIxtFbH3w1ODKksyQ2sByTKENkU9fWcub1llq7WU7X0qtKVSanjyfkIldgVvp1ryX9wiNx1XucWPwNHjfOTAK/p/yNLhwleEUQbvT3TVu73b6BmNSGIi7r7g3B1s0jRyqZbmucXzxr8+4d5V+G1mprEMFUYUpFxHxRbz3Lc63Q7ojo0Q51KKioj3ux3SannFLLaVFGPp3T3ojCm7/PE9QFXYvoYsau7mXg+6C6i0nbamFN3GZkaw5d1FScONojy0m4Kwuh9R5rzoq3GtpPCqIU7AtoP4cWwV1eCmzOHx56wP2lAl9GOSlkJ9C54l+F/uJ4UUwVkBIvHzK2wR8ekR290U0JdiMWUbIHEK3w8YCXw5spDbZB/k/g5dBIzUhAIWlg7EvL00ftojS4RFirrb+tRjW1wU0/uILadf7gbHszQWCps9a7EbVFVWFCUN0phXhmgXSguZjCRktFt+UoFk/kXXM3jcJjDav1zyCjweW5LNmH7vpdd2ilVMmGAvPy9KDBSID/5J+wBQ1iPQPRkhhUGrsQe531dg+g+cbuIXtRbnLKSVlOV5WzSioiJ4JSpGoWqpZla/ODBlw2/fHzIBNc8mAiVAxNXfW9BTzBQ8Jv/7iXfj37XatTzxCAfIQNGSlajZSUkP7QZiyzfJUaeE+Oe6euHQiPMDswywrT2hVRTNIlDo51Y0XbNUFVT1erC399pzSiNETBZ7TYHioaF7ah+2uosC/4BssCE66yXME5RYXihb98QACmP5Gc/qdzTEsHc2h9ybgUoKmuyXgcUsF5KfKRC4ocKhw73QSXJdfwObTyrxzYC/3/Ga6kkOtzoHq/WdS8TJgcIgJ+6y/i0WxhrjukHrKHCqsNr1Hy8TKqgZoJSh8ps6phKNiLPnS27tTd+KvhfbC22fsMk4uTausAq8X917yTxTYqpBUsqm8ACFMIZr+7xj8lzZ0gQLLTwMqLc1h9Bank71++V6ROJVJVeil95eEPXiu/xQhEQ8dEd+D/TvT/i9HP286LggTR/c/PzfbbAz0xugehwMzzrioR/TjNhkuG5BFYMBHGtWcuhvaVn6mUpQjtZtWozBUae2e2MI/O1f6gXFyTosxlX5c1VWZpWkG2ASzuoVtAom4oVJLWraWvJ3Gb8iuWbDxsiV194vRw85H5KswyMJSnRSAF2AfDqmzDHlmxDhYbmJKxouBBEi72+Qdi5Xq/loKlLQmYz5zH41MgDqRNCVhdSnS/hF7t+JtrtMb3/G5YfXe/P3PLaM3R7Ct0B9Ph0GQjVDJ9EWuN/FT13rwlLEIOTcUxaAxhdDQ/Dy+vxKcQE8Yv4O5OUP/ZUWbzbt06mNlLJ5uSyc4jXgpf5XyHo8NQlt4ISdAK6Jh7qF8iyLmga7g==
template:
metadata:
creationTimestamp: null

View file

@ -22,6 +22,24 @@ items:
"id": null,
"links": [
],
"panels": [
{
"content": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.",
"datasource": null,
"description": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.",
"gridPos": {
"h": 2,
"w": 24,
"x": 0,
"y": 0
},
"id": 2,
"mode": "markdown",
"span": 12,
"title": "Notice",
"type": "text"
}
],
"refresh": "10s",
"rows": [
@ -52,7 +70,7 @@ items:
"gridPos": {
},
"id": 2,
"id": 3,
"interval": null,
"links": [
@ -129,7 +147,7 @@ items:
"gridPos": {
},
"id": 3,
"id": 4,
"legend": {
"alignAsTable": false,
"avg": false,
@ -247,7 +265,7 @@ items:
"gridPos": {
},
"id": 4,
"id": 5,
"interval": null,
"links": [
@ -323,7 +341,7 @@ items:
"gridPos": {
},
"id": 5,
"id": 6,
"legend": {
"alignAsTable": false,
"avg": false,
@ -431,7 +449,7 @@ items:
"gridPos": {
},
"id": 6,
"id": 7,
"legend": {
"alignAsTable": false,
"avg": false,
@ -524,7 +542,7 @@ items:
"gridPos": {
},
"id": 7,
"id": 8,
"legend": {
"alignAsTable": false,
"avg": false,
@ -640,7 +658,7 @@ items:
"gridPos": {
},
"id": 8,
"id": 9,
"interval": null,
"links": [
@ -716,7 +734,7 @@ items:
"gridPos": {
},
"id": 9,
"id": 10,
"legend": {
"alignAsTable": false,
"avg": false,
@ -824,7 +842,7 @@ items:
"gridPos": {
},
"id": 10,
"id": 11,
"legend": {
"alignAsTable": false,
"avg": false,
@ -917,7 +935,7 @@ items:
"gridPos": {
},
"id": 11,
"id": 12,
"legend": {
"alignAsTable": false,
"avg": false,
@ -1022,7 +1040,7 @@ items:
"gridPos": {
},
"id": 12,
"id": 13,
"legend": {
"alignAsTable": false,
"avg": false,
@ -1114,7 +1132,7 @@ items:
"gridPos": {
},
"id": 13,
"id": 14,
"legend": {
"alignAsTable": false,
"avg": false,
@ -1206,7 +1224,7 @@ items:
"gridPos": {
},
"id": 14,
"id": 15,
"legend": {
"alignAsTable": true,
"avg": false,
@ -1311,7 +1329,7 @@ items:
"gridPos": {
},
"id": 15,
"id": 16,
"legend": {
"alignAsTable": false,
"avg": false,
@ -1403,7 +1421,7 @@ items:
"gridPos": {
},
"id": 16,
"id": 17,
"legend": {
"alignAsTable": false,
"avg": false,
@ -1502,7 +1520,7 @@ items:
"gridPos": {
},
"id": 17,
"id": 18,
"legend": {
"alignAsTable": false,
"avg": false,
@ -1614,7 +1632,7 @@ items:
"gridPos": {
},
"id": 18,
"id": 19,
"legend": {
"alignAsTable": false,
"avg": false,
@ -1706,7 +1724,7 @@ items:
"gridPos": {
},
"id": 19,
"id": 20,
"legend": {
"alignAsTable": false,
"avg": false,
@ -1798,7 +1816,7 @@ items:
"gridPos": {
},
"id": 20,
"id": 21,
"legend": {
"alignAsTable": false,
"avg": false,
@ -1995,7 +2013,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "Kubernetes / API server",
"uid": "09ec8aa1e996d6ffcd6817bbaff4db1b",
"version": 0
@ -3820,7 +3838,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "Kubernetes / Networking / Cluster",
"uid": "ff635a025bcfea7bc3dd4f508990a3e9",
"version": 0
@ -4946,7 +4964,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "Kubernetes / Controller Manager",
"uid": "72e0e05bef5099e5f049b05fdc429ed4",
"version": 0
@ -4988,6 +5006,7 @@ items:
"fill": 1,
"format": "percentunit",
"id": 1,
"interval": "1m",
"legend": {
"avg": false,
"current": false,
@ -6341,6 +6360,7 @@ items:
"datasource": "$datasource",
"fill": 1,
"id": 11,
"interval": "1m",
"legend": {
"avg": false,
"current": false,
@ -7487,7 +7507,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "Kubernetes / Compute Resources / Cluster",
"uid": "efa86fd1d0c121a26444b636a3f509a8",
"version": 0
@ -8757,6 +8777,7 @@ items:
"datasource": "$datasource",
"fill": 1,
"id": 9,
"interval": "1m",
"legend": {
"avg": false,
"current": false,
@ -9734,7 +9755,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "Kubernetes / Compute Resources / Namespace (Pods)",
"uid": "85a562078cdf77779eaa1add43ccec1e",
"version": 0
@ -10681,7 +10702,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "Kubernetes / Compute Resources / Node (Pods)",
"uid": "200ac8fdbfbb74b39aff88118e4d1c2c",
"version": 0
@ -11706,6 +11727,7 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 6,
"interval": "1m",
"legend": {
"avg": false,
"current": false,
@ -11804,6 +11826,7 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 7,
"interval": "1m",
"legend": {
"avg": false,
"current": false,
@ -11902,6 +11925,7 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 8,
"interval": "1m",
"legend": {
"avg": false,
"current": false,
@ -12000,6 +12024,7 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 9,
"interval": "1m",
"legend": {
"avg": false,
"current": false,
@ -12098,6 +12123,7 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 10,
"interval": "1m",
"legend": {
"avg": false,
"current": false,
@ -12196,6 +12222,7 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 11,
"interval": "1m",
"legend": {
"avg": false,
"current": false,
@ -12416,7 +12443,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "Kubernetes / Compute Resources / Pod",
"uid": "6581e46e4e5c7ba40a07646395ef7b23",
"version": 0
@ -13187,6 +13214,7 @@ items:
"datasource": "$datasource",
"fill": 1,
"id": 5,
"interval": "1m",
"legend": {
"avg": false,
"current": false,
@ -14414,7 +14442,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "Kubernetes / Compute Resources / Workload",
"uid": "a164a7f0339f99e89cea5cb47e9be617",
"version": 0
@ -15345,6 +15373,7 @@ items:
"datasource": "$datasource",
"fill": 1,
"id": 5,
"interval": "1m",
"legend": {
"avg": false,
"current": false,
@ -16371,7 +16400,7 @@ items:
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod) \ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@ -16568,7 +16597,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "Kubernetes / Compute Resources / Namespace (Workloads)",
"uid": "a87fb0d919ec0ea5f6543124e16c42a5",
"version": 0
@ -19067,7 +19096,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "Kubernetes / Kubelet",
"uid": "3138fa155d5915769fbded898ac09fd9",
"version": 0
@ -20480,7 +20509,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "Kubernetes / Networking / Namespace (Pods)",
"uid": "8b7a8b326d7a6f1f04244066368c67af",
"version": 0
@ -22161,7 +22190,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "Kubernetes / Networking / Namespace (Workload)",
"uid": "bbb2a765a623ae38130206c7d94a160f",
"version": 0
@ -23109,7 +23138,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "USE Method / Cluster",
"uid": "3e97d1d02672cdd0861f4c97c64f89b2",
"version": 0
@ -24084,7 +24113,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "USE Method / Node",
"uid": "fac67cfbe174d3ef53eb473d73d9212f",
"version": 0
@ -25062,7 +25091,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "Nodes",
"uid": "fa49a4706d07a042595b664c87fb33ea",
"version": 0
@ -25621,7 +25650,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "Kubernetes / Persistent Volumes",
"uid": "919b92a8e8041bd567af9edab12c840c",
"version": 0
@ -26801,7 +26830,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "Kubernetes / Networking / Pod",
"uid": "7a18067ce943a40ae25454675c19ff5c",
"version": 0
@ -30851,7 +30880,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "Kubernetes / Proxy",
"uid": "632e265de029684c40b21cb76bca4f94",
"version": 0
@ -31901,7 +31930,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "Kubernetes / Scheduler",
"uid": "2e6b6a3b4bddf1427b3a55aa1311c656",
"version": 0
@ -32812,7 +32841,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "Kubernetes / StatefulSets",
"uid": "a31c1f46e6f727cb37c0d731a7245005",
"version": 0
@ -34987,7 +35016,7 @@ items:
"30d"
]
},
"timezone": "",
"timezone": "UTC",
"title": "Kubernetes / Networking / Workload",
"uid": "728bf77cc1166d2f3133bf25846876cc",
"version": 0

View file

@ -23,7 +23,7 @@ spec:
- --config=/etc/adapter/config.yaml
- --logtostderr=true
- --metrics-relist-interval=1m
- --prometheus-url=http://prometheus-k8s.monitoring.svc:9090/
- --prometheus-url=http://prometheus-k8s.monitoring.svc.cluster.local:9090/
- --secure-port=6443
image: quay.io/coreos/k8s-prometheus-adapter-amd64:v0.7.0
name: prometheus-adapter

View file

@ -4,7 +4,7 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.39.0
app.kubernetes.io/version: v0.40.0
name: prometheus-operator
namespace: monitoring
spec:
@ -19,4 +19,4 @@ spec:
matchLabels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.39.0
app.kubernetes.io/version: v0.40.0

View file

@ -1065,16 +1065,22 @@ spec:
has not been rolled out.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout
expr: |
max without (revision) (
kube_statefulset_status_current_revision{job="kube-state-metrics"}
unless
kube_statefulset_status_update_revision{job="kube-state-metrics"}
)
*
(
kube_statefulset_replicas{job="kube-state-metrics"}
!=
kube_statefulset_status_replicas_updated{job="kube-state-metrics"}
max without (revision) (
kube_statefulset_status_current_revision{job="kube-state-metrics"}
unless
kube_statefulset_status_update_revision{job="kube-state-metrics"}
)
*
(
kube_statefulset_replicas{job="kube-state-metrics"}
!=
kube_statefulset_status_replicas_updated{job="kube-state-metrics"}
)
) and (
changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics"}[5m])
==
0
)
for: 15m
labels:
@ -1391,6 +1397,10 @@ spec:
{{ $labels.verb }} {{ $labels.resource }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
expr: |
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"}
>
1
and on (verb,resource)
(
cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"}
>
@ -1402,10 +1412,6 @@ spec:
)
) > on (verb) group_left()
1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
and on (verb,resource)
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"}
>
1
for: 5m
labels:
severity: warning
@ -1770,10 +1776,13 @@ spec:
rules:
- alert: AlertmanagerConfigInconsistent
annotations:
message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}`
are out of sync.
message: |
The configuration of the instances of the Alertmanager cluster `{{ $labels.namespace }}/{{ $labels.service }}` are out of sync.
{{ range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query }}
Configuration hash for pod {{ .Labels.pod }} is "{{ printf "%.f" .Value }}"
{{ end }}
expr: |
count_values("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{job="prometheus-operator",namespace="monitoring",controller="alertmanager"}) by (name, job, namespace, controller), "service", "alertmanager-$1", "name", "(.*)") != 1
count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"})) != 1
for: 5m
labels:
severity: critical

View file

@ -2177,6 +2177,15 @@ spec:
of origin for each alert and metric that is user created. The label
value will always be the namespace of the object that is being created.
type: string
enforcedSampleLimit:
description: EnforcedSampleLimit defines global limit on number of
scraped samples that will be accepted. This overrides any SampleLimit
set per ServiceMonitor or/and PodMonitor. It is meant to be used
by admins to enforce the SampleLimit to keep overall number of samples/series
under the desired limit. Note that if SampleLimit is lower that
value will be taken instead.
format: int64
type: integer
evaluationInterval:
description: Interval between consecutive evaluations.
type: string
@ -3428,6 +3437,27 @@ spec:
instance name. Defaults to the value of `prometheus`. External label
will _not_ be added when value is set to empty string (`""`).
type: string
prometheusRulesExcludedFromEnforce:
description: PrometheusRulesExcludedFromEnforce - list of prometheus
rules to be excluded from enforcing of adding namespace labels.
Works only if enforcedNamespaceLabel set to true. Make sure both
ruleNamespace and ruleName are set for each pair
items:
description: PrometheusRuleExcludeConfig enables users to configure
excluded PrometheusRule names and their namespaces to be ignored
while enforcing namespace label for alerts and metrics.
properties:
ruleName:
description: RuleNamespace - name of excluded rule
type: string
ruleNamespace:
description: RuleNamespace - namespace of excluded rule
type: string
required:
- ruleName
- ruleNamespace
type: object
type: array
query:
description: QuerySpec defines the query command line flags when starting
Prometheus.
@ -4114,6 +4144,10 @@ spec:
scrapeInterval:
description: Interval between consecutive scrapes.
type: string
scrapeTimeout:
description: Number of seconds to wait for target to respond before
erroring.
type: string
secrets:
description: Secrets is a list of Secrets in the same namespace as
the Prometheus object, which shall be mounted into the Prometheus
@ -4762,6 +4796,12 @@ spec:
logLevel:
description: LogLevel for Thanos sidecar to be configured with.
type: string
minTime:
description: MinTime for Thanos sidecar to be configured with.
Option can be a constant time in RFC3339 format or time duration
relative to current time, such as -1d or 2h45m. Valid duration
units are ms, s, m, h, d, w, y.
type: string
objectStorageConfig:
description: ObjectStorageConfig configures object storage in
Thanos.

View file

@ -2998,6 +2998,27 @@ spec:
priorityClassName:
description: Priority class assigned to the Pods
type: string
prometheusRulesExcludedFromEnforce:
description: PrometheusRulesExcludedFromEnforce - list of Prometheus
rules to be excluded from enforcing of adding namespace labels.
Works only if enforcedNamespaceLabel set to true. Make sure both
ruleNamespace and ruleName are set for each pair
items:
description: PrometheusRuleExcludeConfig enables users to configure
excluded PrometheusRule names and their namespaces to be ignored
while enforcing namespace label for alerts and metrics.
properties:
ruleName:
description: RuleNamespace - name of excluded rule
type: string
ruleNamespace:
description: RuleNamespace - namespace of excluded rule
type: string
required:
- ruleName
- ruleNamespace
type: object
type: array
queryConfig:
description: Define configuration for connecting to thanos query instances.
If this is defined, the QueryEndpoints field will be ignored. Maps

View file

@ -4,7 +4,7 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.39.0
app.kubernetes.io/version: v0.40.0
name: prometheus-operator
rules:
- apiGroups:

View file

@ -4,7 +4,7 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.39.0
app.kubernetes.io/version: v0.40.0
name: prometheus-operator
roleRef:
apiGroup: rbac.authorization.k8s.io

View file

@ -4,7 +4,7 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.39.0
app.kubernetes.io/version: v0.40.0
name: prometheus-operator
namespace: monitoring
spec:
@ -18,15 +18,15 @@ spec:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.39.0
app.kubernetes.io/version: v0.40.0
spec:
containers:
- args:
- --kubelet-service=kube-system/kubelet
- --logtostderr=true
- --config-reloader-image=jimmidyson/configmap-reload:v0.3.0
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.39.0
image: quay.io/coreos/prometheus-operator:v0.39.0
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.40.0
image: quay.io/coreos/prometheus-operator:v0.40.0
name: prometheus-operator
ports:
- containerPort: 8080

View file

@ -4,7 +4,7 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.39.0
app.kubernetes.io/version: v0.40.0
name: prometheus-operator
namespace: monitoring
spec:

View file

@ -4,6 +4,6 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.39.0
app.kubernetes.io/version: v0.40.0
name: prometheus-operator
namespace: monitoring

View file

@ -7,10 +7,15 @@
{
alert: 'AlertmanagerConfigInconsistent',
annotations: {
message: 'The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.',
message: |||
The configuration of the instances of the Alertmanager cluster `{{ $labels.namespace }}/{{ $labels.service }}` are out of sync.
{{ range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query }}
Configuration hash for pod {{ .Labels.pod }} is "{{ printf "%.f" .Value }}"
{{ end }}
|||,
},
expr: |||
count_values("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{%(prometheusOperatorSelector)s,controller="alertmanager"}) by (name, job, namespace, controller), "service", "alertmanager-$1", "name", "(.*)") != 1
count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s})) != 1
||| % $._config,
'for': '5m',
labels: {

View file

@ -26,7 +26,7 @@
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "release-0.39"
"version": "release-0.40"
},
{
"source": {

View file

@ -15,7 +15,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
prometheusAdapter+:: {
name: 'prometheus-adapter',
labels: { name: $._config.prometheusAdapter.name },
prometheusURL: 'http://prometheus-' + $._config.prometheus.name + '.' + $._config.namespace + '.svc:9090/',
prometheusURL: 'http://prometheus-' + $._config.prometheus.name + '.' + $._config.namespace + '.svc.cluster.local:9090/',
config: {
resourceRules: {
cpu: {

File diff suppressed because one or more lines are too long

View file

@ -15,7 +15,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
},
versions+:: {
prometheusOperator: 'v0.39.0',
prometheusOperator: 'v0.40.0',
prometheusConfigReloader: self.prometheusOperator,
configmapReloader: 'v0.3.0',
},

File diff suppressed because one or more lines are too long

View file

@ -11,8 +11,8 @@
* @param fill Fill, integer from 0 to 10
* @param linewidth Line Width, integer from 0 to 10
* @param decimals Override automatic decimal precision for legend and tooltip. If null, not added to the json output.
* @param decimals1Y Override automatic decimal precision for the first Y axis. If null, use decimals parameter.
* @param decimals2Y Override automatic decimal precision for the second Y axis. If null, use decimals parameter.
* @param decimalsY1 Override automatic decimal precision for the first Y axis. If null, use decimals parameter.
* @param decimalsY2 Override automatic decimal precision for the second Y axis. If null, use decimals parameter.
* @param min_span Min span
* @param format Unit of the Y axes
* @param formatY1 Unit of the first Y axis

View file

@ -192,6 +192,44 @@ $ jsonnet -J vendor -S -e 'std.manifestYamlDoc((import "mixin.libsonnet").promet
$ jsonnet -J vendor -m files/dashboards -e '(import "mixin.libsonnet").grafanaDashboards'
```
### Customising alert annotations
The steps described bellow extend on the existing mixin library without modifying the original git repository. This is to make consuming updates to your extended alert definitions easier. These definitions can reside outside of this repository and added to your own custom location, where you can define your alert dependencies in your `jsonnetfile.json` and add customisations to the existing definitions.
In your working directory, create a new file `kubernetes_mixin_override.libsonnet` with the following:
```
local utils = import 'lib/utils.libsonnet';
(import 'mixin.libsonnet') +
(
{
prometheusAlerts+::
// The specialAlerts can be in any other config file
local slack = 'observability';
local specialAlerts = {
KubePodCrashLooping: { slack_channel: slack },
KubePodNotReady: { slack_channel: slack },
};
local addExtraAnnotations(rule) = rule {
[if 'alert' in rule then 'annotations']+: {
dashboard: 'https://foo.bar.co',
[if rule.alert in specialAlerts then 'slack_channel']: specialAlerts[rule.alert].slack_channel,
},
};
utils.mapRuleGroups(addExtraAnnotations),
}
)
```
Create new file: `lib/kubernetes_customised_alerts.jsonnet` with the following:
```
std.manifestYamlDoc((import '../kubernetes_mixin_override.libsonnet').prometheusAlerts)
```
Running `jsonnet -S lib/kubernetes_customised_alerts.jsonnet` will build the alerts with your customisations.
Same result can be achieved by modyfying the existing `config.libsonnet` with the content of `kubernetes_mixin_override.libsonnet`.
## Background
* For more motivation, see

View file

@ -120,16 +120,22 @@
},
{
expr: |||
max without (revision) (
kube_statefulset_status_current_revision{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
unless
kube_statefulset_status_update_revision{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
)
*
(
kube_statefulset_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
!=
kube_statefulset_status_replicas_updated{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
max without (revision) (
kube_statefulset_status_current_revision{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
unless
kube_statefulset_status_update_revision{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
)
*
(
kube_statefulset_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
!=
kube_statefulset_status_replicas_updated{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
)
) and (
changes(kube_statefulset_status_replicas_updated{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}[5m])
==
0
)
||| % $._config,
labels: {

View file

@ -48,6 +48,10 @@ local utils = import 'utils.libsonnet';
{
alert: 'KubeAPILatencyHigh',
expr: |||
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{%(kubeApiserverSelector)s,quantile="0.99"}
>
%(kubeAPILatencyWarningSeconds)s
and on (verb,resource)
(
cluster:apiserver_request_duration_seconds:mean5m{%(kubeApiserverSelector)s}
>
@ -59,10 +63,6 @@ local utils = import 'utils.libsonnet';
)
) > on (verb) group_left()
1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{%(kubeApiserverSelector)s} >= 0)
and on (verb,resource)
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{%(kubeApiserverSelector)s,quantile="0.99"}
>
%(kubeAPILatencyWarningSeconds)s
||| % $._config,
'for': '5m',
labels: {

View file

@ -69,6 +69,7 @@
// The default refresh time for all dashboards, default to 10s
refresh: '10s',
minimumTimeInterval: '1m',
},
// Opt-in to multiCluster dashboards by overriding this and the clusterLabel.

View file

@ -272,6 +272,20 @@ local singlestat = grafana.singlestat;
sort=1,
)
)
.addPanel(
grafana.text.new(
title='Notice',
content='The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.',
description='The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.',
span=12,
),
gridPos={
"h": 2,
"w": 24,
"x": 0,
"y": 0
},
)
.addRow(
row.new()
.addPanel(availability1d)

View file

@ -6,7 +6,7 @@
grafanaDashboards:: {
[filename]: grafanaDashboards[filename] {
uid: std.md5(filename),
timezone: '',
timezone: 'UTC',
// Modify tooltip to only show a single value
rows: [

View file

@ -96,7 +96,8 @@ local template = grafana.template;
})
.addPanel(
g.panel('CPU Utilisation') +
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle", %(clusterLabel)s="$cluster"}[$__interval]))' % $._config)
g.statPanel('1 - avg(rate(node_cpu_seconds_total{mode="idle", %(clusterLabel)s="$cluster"}[$__interval]))' % $._config) +
{ interval: $._config.grafanaK8s.minimumTimeInterval },
)
.addPanel(
g.panel('CPU Requests Commitment') +
@ -183,7 +184,8 @@ local template = grafana.template;
g.tablePanel(
networkColumns,
networkTableStyles
),
) +
{ interval: $._config.grafanaK8s.minimumTimeInterval },
)
)
.addRow(

View file

@ -236,7 +236,8 @@ local template = grafana.template;
g.tablePanel(
networkColumns,
networkTableStyles
),
) +
{ interval: $._config.grafanaK8s.minimumTimeInterval },
)
)
.addRow(

View file

@ -215,7 +215,7 @@ local template = grafana.template;
g.panel('Receive Bandwidth') +
g.queryPanel('sum(irate(container_network_receive_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
)
)
.addRow(
@ -224,7 +224,7 @@ local template = grafana.template;
g.panel('Transmit Bandwidth') +
g.queryPanel('sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
)
)
.addRow(
@ -233,7 +233,7 @@ local template = grafana.template;
g.panel('Rate of Received Packets') +
g.queryPanel('sum(irate(container_network_receive_packets_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
)
)
.addRow(
@ -242,7 +242,7 @@ local template = grafana.template;
g.panel('Rate of Transmitted Packets') +
g.queryPanel('sum(irate(container_network_transmit_packets_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
)
)
.addRow(
@ -251,7 +251,7 @@ local template = grafana.template;
g.panel('Rate of Received Packets Dropped') +
g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
)
)
.addRow(
@ -260,7 +260,7 @@ local template = grafana.template;
g.panel('Rate of Transmitted Packets Dropped') +
g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{namespace=~"$namespace", pod=~"$pod"}[$__interval])) by (pod)', '{{pod}}') +
g.stack +
{ yaxes: g.yaxes('Bps') },
{ yaxes: g.yaxes('Bps'), interval: $._config.grafanaK8s.minimumTimeInterval },
)
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [clusterTemplate, namespaceTemplate, podTemplate] }, refresh: $._config.grafanaK8s.refresh },
},

View file

@ -276,7 +276,8 @@ local template = grafana.template;
g.tablePanel(
networkColumns,
networkTableStyles
),
) +
{ interval: $._config.grafanaK8s.minimumTimeInterval },
)
)
.addRow(
@ -376,7 +377,7 @@ local template = grafana.template;
g.panel('Rate of Transmitted Packets Dropped') +
g.queryPanel(|||
(sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}[$__interval])
* on (namespace,pod)
* on (namespace,pod)
group_left(workload,workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", %(namespaceLabel)s=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
||| % $._config, '{{workload}}') +
g.stack +

View file

@ -218,7 +218,8 @@ local template = grafana.template;
g.tablePanel(
networkColumns,
networkTableStyles
),
) +
{ interval: $._config.grafanaK8s.minimumTimeInterval },
)
)
.addRow(

View file

@ -10,12 +10,12 @@ To use them, you need to have `jsonnet` (v0.13+) and `jb` installed. If you
have a working Go development environment, it's easiest to run the following:
```bash
$ go get github.com/google/go-jsonnet/cmd/jsonnet
$ go get github.com/google/go-jsonnet/cmd/jsonnetfmt
$ go get github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb
```
_Note: The make targets `lint` and `fmt` need the `jsonnetfmt` binary, which is
currently not included in the Go implementation of `jsonnet`. For the time
being, you have to install the [C++ version of
available from [v.0.16.0](https://github.com/google/jsonnet/releases/tag/v0.16.0) in the Go implementation of `jsonnet`. If your jsonnet version is older than 0.16.0 you have to either upgrade or install the [C++ version of
jsonnetfmt](https://github.com/google/jsonnet) if you want to use `make lint`
or `make fmt`._