major monitoring upgrade
continuous-integration/drone/push Build is passing
Details
continuous-integration/drone/push Build is passing
Details
This commit is contained in:
parent
ab98ad2e70
commit
d26b64b384
|
@ -8,7 +8,7 @@
|
||||||
"subdir": "jsonnet/kube-prometheus"
|
"subdir": "jsonnet/kube-prometheus"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "master",
|
"version": "main",
|
||||||
"name": "kube-prometheus"
|
"name": "kube-prometheus"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -8,18 +8,18 @@
|
||||||
"subdir": "grafana"
|
"subdir": "grafana"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "8024f4fdaeb3a3a7d72f77e2ed87deb92c79aeda",
|
"version": "8ea4e7bc04b1bf5e9bd99918ca28c6271b42be0e",
|
||||||
"sum": "WXrJQtWuU5lJVc4jXkJGddPMpPP0+4eMcIB5cauZGgM="
|
"sum": "muenICtKXABk6MZZHCZD2wCbmtiE96GwWRMGa1Rg+wA="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
"git": {
|
"git": {
|
||||||
"remote": "https://github.com/etcd-io/etcd",
|
"remote": "https://github.com/etcd-io/etcd",
|
||||||
"subdir": "Documentation/etcd-mixin"
|
"subdir": "contrib/mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "ca866c02422ff3f3d1f0876898a30c33dd7bcccf",
|
"version": "562d645ac923388ff5b8d270b0536764d34b0e0f",
|
||||||
"sum": "bLqTqEr0jky9zz5MV/7ucn6H5mph2NlXas0TVnGNB1Y="
|
"sum": "W/Azptf1PoqjyMwJON96UY69MFugDA4IAYiKURscryc="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
|
@ -28,8 +28,8 @@
|
||||||
"subdir": "grafonnet"
|
"subdir": "grafonnet"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "356bd73e4792ffe107725776ca8946895969c191",
|
"version": "55cf4ee53ced2b6d3ce96ecce9fb813b4465be98",
|
||||||
"sum": "CSMZ3dJrpJpwvffie8BqcfrIVVwiKNqdPEN+1XWRBGU="
|
"sum": "4/sUV0Kk+o8I+wlYxL9R6EPhL/NiLfYHk+NXlU64RUk="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
|
@ -38,8 +38,8 @@
|
||||||
"subdir": "grafana-builder"
|
"subdir": "grafana-builder"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "216bc806bb512f218e3cf5ed3d4f5699b07f04d6",
|
"version": "dbf1211d003d20c7adcdee942c477e648507a398",
|
||||||
"sum": "9/eJqljTTtJeq9QRjabdKWL6yD8a7VzLmGKBK3ir77k="
|
"sum": "GRf2GvwEU4jhXV+JOonXSZ4wdDv8mnHBPCQ6TUVd+g8="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
|
@ -59,8 +59,8 @@
|
||||||
"subdir": ""
|
"subdir": ""
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "ead45674dba3c8712e422d99223453177aac6bf4",
|
"version": "c67c0f19e869f1da34d79b6507c1fa37c23a6e4e",
|
||||||
"sum": "3i0NkntlBluDS1NRF+iSc2e727Alkv3ziuVjAP12/kE="
|
"sum": "F+RxcI26zeoeI81uot39Jv6IpQ6BOz+xlSHlElJYsz8="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
|
@ -69,7 +69,7 @@
|
||||||
"subdir": "lib/promgrafonnet"
|
"subdir": "lib/promgrafonnet"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "ead45674dba3c8712e422d99223453177aac6bf4",
|
"version": "39a9cda705b5201c35105bd1f24c83923fa839ef",
|
||||||
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
|
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -79,8 +79,8 @@
|
||||||
"subdir": "jsonnet/kube-state-metrics"
|
"subdir": "jsonnet/kube-state-metrics"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "89aaf6c524ee891140c4c8f2a05b1b16f5847309",
|
"version": "b1889aa1561ee269f628e2b9659155e7714dbbf0",
|
||||||
"sum": "zD/pbQLnQq+5hegEelaheHS8mn1h09GTktFO74iwlBI="
|
"sum": "S5qI+PJUdNeYOv76jH5nxwYS9N6U7CRxvyuB1wI4cTE="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
|
@ -89,7 +89,7 @@
|
||||||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
"subdir": "jsonnet/kube-state-metrics-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "7bdd62593c9273b5179cf3c9d2d819e9d997aaa4",
|
"version": "b1889aa1561ee269f628e2b9659155e7714dbbf0",
|
||||||
"sum": "Yf8mNAHrV1YWzrdV8Ry5dJ8YblepTGw3C0Zp10XIYLo="
|
"sum": "Yf8mNAHrV1YWzrdV8Ry5dJ8YblepTGw3C0Zp10XIYLo="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -99,8 +99,8 @@
|
||||||
"subdir": "jsonnet/kube-prometheus"
|
"subdir": "jsonnet/kube-prometheus"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "7d7d40b4dee70ecd3328dcdee2ed0cc8f806df93",
|
"version": "5b2740d517095a6ae9ad51bcb9c53e5ef28c62a0",
|
||||||
"sum": "6PhhQPWilq4skfe+z/hXKEg1pRqHnwvMR1Au6W136U0="
|
"sum": "+6VkkR44AC3Qnwfr9cWYCKs+uRi5JaIOda/3X1JEzAg="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
|
@ -109,8 +109,9 @@
|
||||||
"subdir": "jsonnet/mixin"
|
"subdir": "jsonnet/mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "117c9a2cd905479022a66ddd92a41f599cccf10d",
|
"version": "b7ca32169844f0b5143f3e5e318fc05fa025df18",
|
||||||
"sum": "6reUygVmQrLEWQzTKcH8ceDbvM+2ztK3z2VBR2K2l+U="
|
"sum": "6reUygVmQrLEWQzTKcH8ceDbvM+2ztK3z2VBR2K2l+U=",
|
||||||
|
"name": "prometheus-operator-mixin"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
|
@ -119,8 +120,8 @@
|
||||||
"subdir": "jsonnet/prometheus-operator"
|
"subdir": "jsonnet/prometheus-operator"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "d8b7d3766225908d0239fd0d78258892cd0fc384",
|
"version": "b7ca32169844f0b5143f3e5e318fc05fa025df18",
|
||||||
"sum": "Nl+N/h76bzD9tZ8tx7tuNIKHwCIJ9zyOsAWplH8HvAE="
|
"sum": "MRwyChXdKG3anL2OWpbUu3qWc97w9J6YsjUWjLFQyB0="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
|
@ -129,8 +130,8 @@
|
||||||
"subdir": "doc/alertmanager-mixin"
|
"subdir": "doc/alertmanager-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "193ebba04d1e70d971047e983a0b489112610460",
|
"version": "99f64e944b1043c790784cf5373c8fb349816fc4",
|
||||||
"sum": "QcftU7gjCQyj7B6M4YJeCAeaPd0kwxd4J4rolo7AnLE=",
|
"sum": "V8jcZQ1Qrlm7AQ6wjbuQQsacPb0NvrcZovKyplmzW5w=",
|
||||||
"name": "alertmanager"
|
"name": "alertmanager"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -140,8 +141,8 @@
|
||||||
"subdir": "docs/node-mixin"
|
"subdir": "docs/node-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "8b466360a35581e0301bd22918be7011cf4203c3",
|
"version": "b597c1244d7bef49e6f3359c87a56dd7707f6719",
|
||||||
"sum": "rvyiD/yCB4BeYAWqYF53bP8c+aCUt2ipLHW2Ea8ELO8="
|
"sum": "cZTNXQMUCLB5FGYpMn845dcqGdkcYt58qCqOFIV/BoQ="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
|
@ -150,8 +151,8 @@
|
||||||
"subdir": "documentation/prometheus-mixin"
|
"subdir": "documentation/prometheus-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "26d89b4b0776fe4cd5a3656dfa520f119a375273",
|
"version": "3cafc58827d1ebd1a67749f88be4218f0bab3d8d",
|
||||||
"sum": "1VRVMuxAEZ9vdGHFlndmG9iQzDD6AoIXrX80CDpGDaU=",
|
"sum": "VK0c3sQ3ksiM6JQsAVfWmL5NbzGv9llMfXFNXfFdJ+A=",
|
||||||
"name": "prometheus"
|
"name": "prometheus"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -161,8 +162,9 @@
|
||||||
"subdir": "mixin"
|
"subdir": "mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "37e6ef61566c7c70793ba6d128f00c4c66cb2402",
|
"version": "ba6c5c4726ff52807c7383c68f2159b1af7980bb",
|
||||||
"sum": "OptiWUMOHFrRGTZhSfxV1RCeXZ90qsefGNTD4lDYVG0="
|
"sum": "XP3uq7xcfKHsnWsz1v992csZhhZR3jQma6hFOfSViTs=",
|
||||||
|
"name": "thanos-mixin"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
|
|
|
@ -72,7 +72,7 @@ local masterIP = '185.95.218.11';
|
||||||
{
|
{
|
||||||
port: 'http-metrics',
|
port: 'http-metrics',
|
||||||
interval: '30s',
|
interval: '30s',
|
||||||
metricRelabelings: (import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet') + [
|
metricRelabelings: (import 'kube-prometheus/addons/dropping-deprecated-metrics-relabelings.libsonnet') + [
|
||||||
{
|
{
|
||||||
sourceLabels: ['__name__'],
|
sourceLabels: ['__name__'],
|
||||||
regex: 'etcd_(debugging|disk|request|server).*',
|
regex: 'etcd_(debugging|disk|request|server).*',
|
||||||
|
|
|
@ -3,17 +3,34 @@ kind: Alertmanager
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
alertmanager: main
|
alertmanager: main
|
||||||
|
app.kubernetes.io/component: alert-router
|
||||||
|
app.kubernetes.io/name: alertmanager
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.21.0
|
||||||
name: main
|
name: main
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
configSecret: alertmanager-tbrnt-config
|
configSecret: alertmanager-tbrnt-config
|
||||||
image: quay.io/prometheus/alertmanager:v0.21.0
|
image: quay.io/prometheus/alertmanager:v0.21.0
|
||||||
nodeSelector:
|
nodeSelector:
|
||||||
kubernetes.io/os: linux
|
kubernetes.io/os: linux
|
||||||
replicas: 1
|
podMetadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: alert-router
|
||||||
|
app.kubernetes.io/name: alertmanager
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.21.0
|
||||||
|
replicas: 3
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 100Mi
|
||||||
|
requests:
|
||||||
|
cpu: 4m
|
||||||
|
memory: 100Mi
|
||||||
securityContext:
|
securityContext:
|
||||||
fsGroup: 2000
|
fsGroup: 2000
|
||||||
runAsNonRoot: true
|
runAsNonRoot: true
|
||||||
runAsUser: 1000
|
runAsUser: 1000
|
||||||
serviceAccountName: alertmanager-main
|
serviceAccountName: alertmanager-main
|
||||||
version: v0.21.0
|
version: 0.21.0
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
apiVersion: policy/v1beta1
|
||||||
|
kind: PodDisruptionBudget
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: alert-router
|
||||||
|
app.kubernetes.io/name: alertmanager
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.21.0
|
||||||
|
name: alertmanager-main
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
maxUnavailable: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
alertmanager: main
|
||||||
|
app.kubernetes.io/component: alert-router
|
||||||
|
app.kubernetes.io/name: alertmanager
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
|
@ -0,0 +1,156 @@
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PrometheusRule
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: alert-router
|
||||||
|
app.kubernetes.io/name: alertmanager
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.21.0
|
||||||
|
prometheus: k8s
|
||||||
|
role: alert-rules
|
||||||
|
name: alertmanager-main-rules
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: alertmanager.rules
|
||||||
|
rules:
|
||||||
|
- alert: AlertmanagerFailedReload
|
||||||
|
annotations:
|
||||||
|
description: Configuration has failed to load for {{ $labels.namespace }}/{{
|
||||||
|
$labels.pod}}.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerfailedreload
|
||||||
|
summary: Reloading an Alertmanager configuration has failed.
|
||||||
|
expr: |
|
||||||
|
# Without max_over_time, failed scrapes could create false negatives, see
|
||||||
|
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
|
max_over_time(alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="default"}[5m]) == 0
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: AlertmanagerMembersInconsistent
|
||||||
|
annotations:
|
||||||
|
description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has only
|
||||||
|
found {{ $value }} members of the {{$labels.job}} cluster.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagermembersinconsistent
|
||||||
|
summary: A member of an Alertmanager cluster has not found all other cluster
|
||||||
|
members.
|
||||||
|
expr: |
|
||||||
|
# Without max_over_time, failed scrapes could create false negatives, see
|
||||||
|
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
|
max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="default"}[5m])
|
||||||
|
< on (namespace,service) group_left
|
||||||
|
count by (namespace,service) (max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="default"}[5m]))
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: AlertmanagerFailedToSendAlerts
|
||||||
|
annotations:
|
||||||
|
description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed
|
||||||
|
to send {{ $value | humanizePercentage }} of notifications to {{ $labels.integration
|
||||||
|
}}.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerfailedtosendalerts
|
||||||
|
summary: An Alertmanager instance failed to send notifications.
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="default"}[5m])
|
||||||
|
/
|
||||||
|
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="default"}[5m])
|
||||||
|
)
|
||||||
|
> 0.01
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: AlertmanagerClusterFailedToSendAlerts
|
||||||
|
annotations:
|
||||||
|
description: The minimum notification failure rate to {{ $labels.integration
|
||||||
|
}} sent from any instance in the {{$labels.job}} cluster is {{ $value |
|
||||||
|
humanizePercentage }}.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclusterfailedtosendalerts
|
||||||
|
summary: All Alertmanager instances in a cluster failed to send notifications
|
||||||
|
to a critical integration.
|
||||||
|
expr: |
|
||||||
|
min by (namespace,service, integration) (
|
||||||
|
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="default", integration=~`.*`}[5m])
|
||||||
|
/
|
||||||
|
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="default", integration=~`.*`}[5m])
|
||||||
|
)
|
||||||
|
> 0.01
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: AlertmanagerClusterFailedToSendAlerts
|
||||||
|
annotations:
|
||||||
|
description: The minimum notification failure rate to {{ $labels.integration
|
||||||
|
}} sent from any instance in the {{$labels.job}} cluster is {{ $value |
|
||||||
|
humanizePercentage }}.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclusterfailedtosendalerts
|
||||||
|
summary: All Alertmanager instances in a cluster failed to send notifications
|
||||||
|
to a non-critical integration.
|
||||||
|
expr: |
|
||||||
|
min by (namespace,service, integration) (
|
||||||
|
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="default", integration!~`.*`}[5m])
|
||||||
|
/
|
||||||
|
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="default", integration!~`.*`}[5m])
|
||||||
|
)
|
||||||
|
> 0.01
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: AlertmanagerConfigInconsistent
|
||||||
|
annotations:
|
||||||
|
description: Alertmanager instances within the {{$labels.job}} cluster have
|
||||||
|
different configurations.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerconfiginconsistent
|
||||||
|
summary: Alertmanager instances within the same cluster have different configurations.
|
||||||
|
expr: |
|
||||||
|
count by (namespace,service) (
|
||||||
|
count_values by (namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="default"})
|
||||||
|
)
|
||||||
|
!= 1
|
||||||
|
for: 20m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: AlertmanagerClusterDown
|
||||||
|
annotations:
|
||||||
|
description: '{{ $value | humanizePercentage }} of Alertmanager instances
|
||||||
|
within the {{$labels.job}} cluster have been up for less than half of the
|
||||||
|
last 5m.'
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclusterdown
|
||||||
|
summary: Half or more of the Alertmanager instances within the same cluster
|
||||||
|
are down.
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
count by (namespace,service) (
|
||||||
|
avg_over_time(up{job="alertmanager-main",namespace="default"}[5m]) < 0.5
|
||||||
|
)
|
||||||
|
/
|
||||||
|
count by (namespace,service) (
|
||||||
|
up{job="alertmanager-main",namespace="default"}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
>= 0.5
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: AlertmanagerClusterCrashlooping
|
||||||
|
annotations:
|
||||||
|
description: '{{ $value | humanizePercentage }} of Alertmanager instances
|
||||||
|
within the {{$labels.job}} cluster have restarted at least 5 times in the
|
||||||
|
last 10m.'
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclustercrashlooping
|
||||||
|
summary: Half or more of the Alertmanager instances within the same cluster
|
||||||
|
are crashlooping.
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
count by (namespace,service) (
|
||||||
|
changes(process_start_time_seconds{job="alertmanager-main",namespace="default"}[10m]) > 4
|
||||||
|
)
|
||||||
|
/
|
||||||
|
count by (namespace,service) (
|
||||||
|
up{job="alertmanager-main",namespace="default"}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
>= 0.5
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
|
@ -1,8 +1,14 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Secret
|
kind: Secret
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
alertmanager: main
|
||||||
|
app.kubernetes.io/component: alert-router
|
||||||
|
app.kubernetes.io/name: alertmanager
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.21.0
|
||||||
name: alertmanager-main
|
name: alertmanager-main
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
stringData:
|
stringData:
|
||||||
alertmanager.yaml: |-
|
alertmanager.yaml: |-
|
||||||
"global":
|
"global":
|
||||||
|
|
|
@ -3,8 +3,12 @@ kind: Service
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
alertmanager: main
|
alertmanager: main
|
||||||
|
app.kubernetes.io/component: alert-router
|
||||||
|
app.kubernetes.io/name: alertmanager
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.21.0
|
||||||
name: alertmanager-main
|
name: alertmanager-main
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
ports:
|
ports:
|
||||||
- name: web
|
- name: web
|
||||||
|
@ -13,4 +17,7 @@ spec:
|
||||||
selector:
|
selector:
|
||||||
alertmanager: main
|
alertmanager: main
|
||||||
app: alertmanager
|
app: alertmanager
|
||||||
|
app.kubernetes.io/component: alert-router
|
||||||
|
app.kubernetes.io/name: alertmanager
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
sessionAffinity: ClientIP
|
sessionAffinity: ClientIP
|
||||||
|
|
|
@ -1,5 +1,11 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ServiceAccount
|
kind: ServiceAccount
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
alertmanager: main
|
||||||
|
app.kubernetes.io/component: alert-router
|
||||||
|
app.kubernetes.io/name: alertmanager
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.21.0
|
||||||
name: alertmanager-main
|
name: alertmanager-main
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
|
|
|
@ -2,9 +2,12 @@ apiVersion: monitoring.coreos.com/v1
|
||||||
kind: ServiceMonitor
|
kind: ServiceMonitor
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
k8s-app: alertmanager
|
app.kubernetes.io/component: alert-router
|
||||||
|
app.kubernetes.io/name: alertmanager
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.21.0
|
||||||
name: alertmanager
|
name: alertmanager
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
endpoints:
|
endpoints:
|
||||||
- interval: 30s
|
- interval: 30s
|
||||||
|
@ -12,3 +15,6 @@ spec:
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
alertmanager: main
|
alertmanager: main
|
||||||
|
app.kubernetes.io/component: alert-router
|
||||||
|
app.kubernetes.io/name: alertmanager
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
|
|
@ -6,11 +6,12 @@ metadata:
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
encryptedData:
|
encryptedData:
|
||||||
alertmanager.yaml: AgDHh1Qgrdffl6IFXJuk3ZzNHGARWZLDzbMLKp5Wo/ZYqclCji4T5wx7Fql6FALCvvUBvThxLfiwM2LQHRcWfWnf5AwxipCxpos9oVvlu4aON0WZd/Kjz/ZwDq5pgR/etCrSA2DYNxYq4vnTxUAk29eY5F4dWwRPcCgVZ5/KsTIcNx4x+4einqQbwAhkUtAwQl5fEPmpwNRquJZM29XIEUxZLWne0YmCmEgNGleUU20ByfYMwgtDJGjgr6XNPtTmByAHVrxNuQwAMxgT6GcfDLCNsByaS6CY3wmSTg1GUv/CG8Xx29FwDWyf1Ly2KbmcAAafN5QJGvCCTEt/WB85GtzQisrWFZTykv3Zjuz101p9ShXQZALylaX5h22hHFXuQyiIQZEeM2ixiYQjcPhiPjx1/hkbQ25QRD73/gjalZO8bprDrJxkLlw+hrgJ0LzxWL881U6INLKow+8/GmLleFhMUXRsGqacLreCIAr4uVGEMGMVLhHJKnj597HRnn0UCxVNkDk8QjHyiVgJBrQ3Pz9SFdF7mxvJ9F4rEgGkE4dvfvWxrZFumTLEkVRF9To+rKxsIVkewvoHtN/gMzFMzumP+fz/oB9yAHsxkwVyfqXBg52hNSYIx5Z/67yy3hDRKPBcZgknf9S+F37ET5BABFxazwG8NJjf4td+UsAGuAMzKI/94u7TxuXLPCs/tIGKD7kJnPxAqpalepzABtVCmOrtWwNPb1h4XeuraUS9beJ2zV9oV5nVFJmX94EJ7qpZt0Um7+GGeavQ5SV3XHRolDS5PpZPTAWnc/1rtZ0nsKk8lllEr3aDWveMXma06NKkIXz8+iAonvHsDZuw0W6jUdUUtraIbSua9YkyugqCBGeeXIPLwFxqJTqIX5vedZVMveFiaxtCJjL48SUGxtyugfiYbPa3xpHWWe22BcJyTmAOG9aIq4Tp4nvftLyvWe7c9PotJk/7gdv1IO4RLx//eLtKWw0uADa0ara4hDuI8Yktlti24TlA9XYz00d5WtE+lJsSZN8547BUfFzXSOZSSbfrFLZmEmBTgkbj4szX19bXSctJN3BtOmRfCEPXYQN10HgnhpwqYHbXKUSTZNWLojnFL1/E56wUXGxRg9NGOwSXzTyfoLGxI9NEQpGc0Rj2Wna+JSUhlAUnfYW1eH8yyg5FfkyhQdyZJFvYfF0rk+XG5XNhLumST19uxrAkMWhk+Z9/eWwOaZQMmDcoi2Rs0za+1GGjPW5k56Ip+spwW5cvYmdl1PgkZ4g1mupjiB0FdgZHGR+kGn1lbPtSUd+amh9PXSDWkqfnix62H7374rQ3ZyG7fs9sQNnnRrd/cDCMxAl5Upk8D9dfxRmvuxRd8b89h7EQwUBML7TIriA2Pci5Ftux2R5wyIXjznLC5/kFZg6/Av3uKmKK6dLR2Ooey7/3g14CEjMumdijjySl8Pd2UUxSKVKD7vkq+3xYm0CJZqVvT/iBOccrv0UEiTHBsXrfaugUvqIKTAGYhJy0fUBXKisPdA0HdzrUmx57Du36TGyuEzGtVuDarcWzQYPqKJxOIuofJ+AGTDY53OjdUJ8pwJD6HDz55tu85gaV6ZOvSYqjqeX2FUe7lPhsGUIh/FemfichpypHyFpPYhkwAIO1AinKvsqjUuDXE6n5b7NMbI1gl87fPqT5wUSKXZqwViyFqUA5DFqPTEqvHIGU5Wz0GajEaQ==
|
alertmanager.yaml: AgBj3KqLiF7EAnGK6c4+Thferv3Sur+fhlwE4wpXD1PBtwlJQsMCqjsLRRFNAESH9/8vhI9E9D8wLJiauNS7CGw5jd5KU1cvxo5EyGeFoVyAB4bHSy/pxptSFq+rn00E99/Tqkbdsgduwusfpi0I9F1+zNucyyamJsEzIcsyHMlBbACz+9KQV9SdbgVEmIeqabrAP9VQaQ+i69yurhPdV7VkZzr0WKcGg3x27+slmtjlJz5fwtv1qmbYt/MQnijF2tc6tJeq19Cm0O4zuQ09meW6DwAZ9SOIFU6LxrqJlKbuleaWmfIE3AQYA6Z+qXyBjT1ILW36RwGyg3YK7nm0MNDQxd6LN3zR0eifPqrPsm7O6LE+NAg4FkurV3lJlrBoU3lSSc+sQZZr00ct9Gp57EEvg9T2TaM1B/KHQNmIhpDGntD4+yTcvK3nU7+sxqG/c4Wk5xiUQyLYnigNy5qYCcsM+t9iCoGxP7uU8GrsvIkojTxzhdc6e5LduThKdGE9jI3R6nCP6kmsU6XyUzgKmxYJVVzhSrm9yxFVDPHriNaEM2hgEd3wStwmRjGjwAPUjQZfSJtmxY6+RQ/77TYGjiskDm6gAZuzkGjdptL2t5F+54y3uePaLHNspMgtZsTARCo3kAhgf61Gk2nvnEY/ws5qFjAnsUEXs86wAk2S+w401QKPKTcDr6e/rnve8IrXW0FPvzR3rzdWOcU8v0Z0sSFijIfXdx+A9WGCJuHNo65FbKSgWhlHBfvWB1qWBnDd/VVHIA2wR8gevAPJHSc0f1WdUDc4w2w8tc/qum1SZo2lWkMopvLaiVHU5dCGtG6+4qsC1DmFzIRGZN4AbdVd5k+OY15Fp+ysjuTpA/HuZ/N0kz/5BXzNbY3u7YKi3EV3Up+eZIi2jlG0XBXWdCouuxRW40qHuShivtbrBgey32kFo85dsrDqN7F2kBVnumOB7kvFOaCkL2AtsakVjUzGoh5eXCSHl0ZcqmW2UjInzZIirBChMW/G4yL/TwpVYbBLqWPfdVMFmq7I4srY2+hUP/5UBt/DKZi5zPlLR8H3q4i02zsNpqdhSa9o6ThhFtVX9/te/DMpyN1fJ1Hn2p3cDhoTsiTLPkvflVOx70flap0v2zzPoDm+yXhFllpWp/5avHy9pKf/RzpAodbNr/EydkC+KDKI88MhVUtxS27WbKFsq+vUkmHQj+KtGyRFjg2/CnmM8YbdRsMe8p39PVGLxj1RTnyYzlMltOTbJo3rhDzjmpzGVUpWokwTMGC1WgTenrS4IcCK61ri9bsBIL9n9sMLF1lT8NVKnQfluDTaHNzsQgJ1HTSwQOcAfugqlUrSeTLt3q6U4pSjjlF8P7wYpqzWc+bhOaHed9NxrGXFBC5Wh6+BULuCaCA6TtkLpUfABYHVUa4OS3huNsOeBhZ3aCCQXrc0jOOq2DQzxvdGu4YAQnvMHwJRVyKVcw0pOS5RjIqJW6IOn0MGHzAo7qNv6LUyJ9a7huT2W4ibrHFkMck1zKxbBekPQ9FxpufSXrEqEqNuB3j7Gi7lVDVbPySr1rr2KXLzOLsnZhpTpMq2RejglIAMF7WfIMfvHQ2mnjNuYNNQnXx8hPLm88GSxFYKHpUnAswgYuo4XX2drYMzzq3GWDMIHZ/kpLySU+eJGo6VGeFUV1DgaGksLXE3oCfrA1OCUyZ/qke3tzj8ixjwuprCmFPWsg==
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
creationTimestamp: null
|
creationTimestamp: null
|
||||||
name: alertmanager-tbrnt-config
|
name: alertmanager-tbrnt-config
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
type: Opaque
|
type: Opaque
|
||||||
|
status: {}
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,13 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
data:
|
data:
|
||||||
datasources.yaml: ewogICAgImFwaVZlcnNpb24iOiAxLAogICAgImRhdGFzb3VyY2VzIjogWwogICAgICAgIHsKICAgICAgICAgICAgImFjY2VzcyI6ICJwcm94eSIsCiAgICAgICAgICAgICJlZGl0YWJsZSI6IGZhbHNlLAogICAgICAgICAgICAibmFtZSI6ICJwcm9tZXRoZXVzIiwKICAgICAgICAgICAgIm9yZ0lkIjogMSwKICAgICAgICAgICAgInR5cGUiOiAicHJvbWV0aGV1cyIsCiAgICAgICAgICAgICJ1cmwiOiAiaHR0cDovL3Byb21ldGhldXMtazhzLm1vbml0b3Jpbmcuc3ZjOjkwOTAiLAogICAgICAgICAgICAidmVyc2lvbiI6IDEKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICAgImFjY2VzcyI6ICJwcm94eSIsCiAgICAgICAgICAgICJlZGl0YWJsZSI6IGZhbHNlLAogICAgICAgICAgICAibmFtZSI6ICJMb2tpIiwKICAgICAgICAgICAgIm9yZ0lkIjogMSwKICAgICAgICAgICAgInR5cGUiOiAibG9raSIsCiAgICAgICAgICAgICJ1cmwiOiAiaHR0cDovL2xva2kubG9raTozMTAwIiwKICAgICAgICAgICAgInZlcnNpb24iOiAxCiAgICAgICAgfQogICAgXQp9
|
datasources.yaml: ewogICAgImFwaVZlcnNpb24iOiAxLAogICAgImRhdGFzb3VyY2VzIjogWwogICAgICAgIHsKICAgICAgICAgICAgImFjY2VzcyI6ICJwcm94eSIsCiAgICAgICAgICAgICJlZGl0YWJsZSI6IGZhbHNlLAogICAgICAgICAgICAibmFtZSI6ICJwcm9tZXRoZXVzIiwKICAgICAgICAgICAgIm9yZ0lkIjogMSwKICAgICAgICAgICAgInR5cGUiOiAicHJvbWV0aGV1cyIsCiAgICAgICAgICAgICJ1cmwiOiAiaHR0cDovL3Byb21ldGhldXMtazhzLmRlZmF1bHQuc3ZjOjkwOTAiLAogICAgICAgICAgICAidmVyc2lvbiI6IDEKICAgICAgICB9CiAgICBdCn0=
|
||||||
kind: Secret
|
kind: Secret
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: grafana
|
||||||
|
app.kubernetes.io/name: grafana
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 7.5.4
|
||||||
name: grafana-datasources
|
name: grafana-datasources
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
type: Opaque
|
type: Opaque
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -17,5 +17,10 @@ data:
|
||||||
}
|
}
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: grafana
|
||||||
|
app.kubernetes.io/name: grafana
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 7.5.4
|
||||||
name: grafana-dashboards
|
name: grafana-dashboards
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
|
|
|
@ -2,26 +2,32 @@ apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app: grafana
|
app.kubernetes.io/component: grafana
|
||||||
|
app.kubernetes.io/name: grafana
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 7.5.4
|
||||||
name: grafana
|
name: grafana
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app: grafana
|
app.kubernetes.io/component: grafana
|
||||||
|
app.kubernetes.io/name: grafana
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
annotations:
|
annotations:
|
||||||
checksum/grafana-datasources: 7103d054a6e94f976ca59b4ede77cf88
|
checksum/grafana-datasources: b822d7b1a1070f322d0773c043985b4a
|
||||||
labels:
|
labels:
|
||||||
app: grafana
|
app.kubernetes.io/component: grafana
|
||||||
|
app.kubernetes.io/name: grafana
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 7.5.4
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- env:
|
- env: []
|
||||||
- name: GF_INSTALL_PLUGINS
|
image: grafana/grafana:7.5.4
|
||||||
value: grafana-piechart-panel
|
|
||||||
image: grafana/grafana:7.3.5
|
|
||||||
name: grafana
|
name: grafana
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 3000
|
- containerPort: 3000
|
||||||
|
@ -113,9 +119,6 @@ spec:
|
||||||
- mountPath: /grafana-dashboard-definitions/0/statefulset
|
- mountPath: /grafana-dashboard-definitions/0/statefulset
|
||||||
name: grafana-dashboard-statefulset
|
name: grafana-dashboard-statefulset
|
||||||
readOnly: false
|
readOnly: false
|
||||||
- mountPath: /grafana-dashboard-definitions/0/traefik
|
|
||||||
name: grafana-dashboard-traefik
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/workload-total
|
- mountPath: /grafana-dashboard-definitions/0/workload-total
|
||||||
name: grafana-dashboard-workload-total
|
name: grafana-dashboard-workload-total
|
||||||
readOnly: false
|
readOnly: false
|
||||||
|
@ -201,9 +204,6 @@ spec:
|
||||||
- configMap:
|
- configMap:
|
||||||
name: grafana-dashboard-statefulset
|
name: grafana-dashboard-statefulset
|
||||||
name: grafana-dashboard-statefulset
|
name: grafana-dashboard-statefulset
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-traefik
|
|
||||||
name: grafana-dashboard-traefik
|
|
||||||
- configMap:
|
- configMap:
|
||||||
name: grafana-dashboard-workload-total
|
name: grafana-dashboard-workload-total
|
||||||
name: grafana-dashboard-workload-total
|
name: grafana-dashboard-workload-total
|
||||||
|
|
|
@ -2,14 +2,18 @@ apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app: grafana
|
app.kubernetes.io/component: grafana
|
||||||
|
app.kubernetes.io/name: grafana
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 7.5.4
|
||||||
name: grafana
|
name: grafana
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
ports:
|
ports:
|
||||||
- name: http
|
- name: http
|
||||||
port: 3000
|
port: 3000
|
||||||
targetPort: http
|
targetPort: http
|
||||||
selector:
|
selector:
|
||||||
app: grafana
|
app.kubernetes.io/component: grafana
|
||||||
type: NodePort
|
app.kubernetes.io/name: grafana
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
|
|
@ -2,4 +2,4 @@ apiVersion: v1
|
||||||
kind: ServiceAccount
|
kind: ServiceAccount
|
||||||
metadata:
|
metadata:
|
||||||
name: grafana
|
name: grafana
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
|
|
|
@ -1,12 +1,17 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
apiVersion: monitoring.coreos.com/v1
|
||||||
kind: ServiceMonitor
|
kind: ServiceMonitor
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: grafana
|
||||||
|
app.kubernetes.io/name: grafana
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 7.5.4
|
||||||
name: grafana
|
name: grafana
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
endpoints:
|
endpoints:
|
||||||
- interval: 15s
|
- interval: 15s
|
||||||
port: http
|
port: http
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app: grafana
|
app.kubernetes.io/name: grafana
|
||||||
|
|
|
@ -13,4 +13,5 @@ spec:
|
||||||
name: healthchecks-io
|
name: healthchecks-io
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
type: Opaque
|
type: Opaque
|
||||||
|
status: {}
|
||||||
|
|
||||||
|
|
|
@ -2,8 +2,10 @@ apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRole
|
kind: ClusterRole
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/version: v1.9.7
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.0.0
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
rules:
|
rules:
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
|
@ -24,16 +26,6 @@ rules:
|
||||||
verbs:
|
verbs:
|
||||||
- list
|
- list
|
||||||
- watch
|
- watch
|
||||||
- apiGroups:
|
|
||||||
- extensions
|
|
||||||
resources:
|
|
||||||
- daemonsets
|
|
||||||
- deployments
|
|
||||||
- replicasets
|
|
||||||
- ingresses
|
|
||||||
verbs:
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
- apps
|
- apps
|
||||||
resources:
|
resources:
|
||||||
|
@ -105,6 +97,14 @@ rules:
|
||||||
- networking.k8s.io
|
- networking.k8s.io
|
||||||
resources:
|
resources:
|
||||||
- networkpolicies
|
- networkpolicies
|
||||||
|
- ingresses
|
||||||
|
verbs:
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- coordination.k8s.io
|
||||||
|
resources:
|
||||||
|
- leases
|
||||||
verbs:
|
verbs:
|
||||||
- list
|
- list
|
||||||
- watch
|
- watch
|
||||||
|
|
|
@ -2,8 +2,10 @@ apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRoleBinding
|
kind: ClusterRoleBinding
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/version: v1.9.7
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.0.0
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
roleRef:
|
roleRef:
|
||||||
apiGroup: rbac.authorization.k8s.io
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
@ -12,4 +14,4 @@ roleRef:
|
||||||
subjects:
|
subjects:
|
||||||
- kind: ServiceAccount
|
- kind: ServiceAccount
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
|
|
|
@ -2,20 +2,28 @@ apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/version: v1.9.7
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.0.0
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
|
annotations:
|
||||||
|
kubectl.kubernetes.io/default-container: kube-state-metrics
|
||||||
labels:
|
labels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/version: v1.9.7
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.0.0
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- args:
|
- args:
|
||||||
|
@ -23,8 +31,17 @@ spec:
|
||||||
- --port=8081
|
- --port=8081
|
||||||
- --telemetry-host=127.0.0.1
|
- --telemetry-host=127.0.0.1
|
||||||
- --telemetry-port=8082
|
- --telemetry-port=8082
|
||||||
image: quay.io/coreos/kube-state-metrics:v1.9.7
|
image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.0.0
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 250Mi
|
||||||
|
requests:
|
||||||
|
cpu: 10m
|
||||||
|
memory: 190Mi
|
||||||
|
securityContext:
|
||||||
|
runAsUser: 65534
|
||||||
- args:
|
- args:
|
||||||
- --logtostderr
|
- --logtostderr
|
||||||
- --secure-listen-address=:8443
|
- --secure-listen-address=:8443
|
||||||
|
@ -35,6 +52,13 @@ spec:
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8443
|
- containerPort: 8443
|
||||||
name: https-main
|
name: https-main
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 40m
|
||||||
|
memory: 40Mi
|
||||||
|
requests:
|
||||||
|
cpu: 20m
|
||||||
|
memory: 20Mi
|
||||||
securityContext:
|
securityContext:
|
||||||
runAsGroup: 65532
|
runAsGroup: 65532
|
||||||
runAsNonRoot: true
|
runAsNonRoot: true
|
||||||
|
@ -49,6 +73,13 @@ spec:
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 9443
|
- containerPort: 9443
|
||||||
name: https-self
|
name: https-self
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 20m
|
||||||
|
memory: 40Mi
|
||||||
|
requests:
|
||||||
|
cpu: 10m
|
||||||
|
memory: 20Mi
|
||||||
securityContext:
|
securityContext:
|
||||||
runAsGroup: 65532
|
runAsGroup: 65532
|
||||||
runAsNonRoot: true
|
runAsNonRoot: true
|
||||||
|
|
|
@ -0,0 +1,46 @@
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PrometheusRule
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.0.0
|
||||||
|
prometheus: k8s
|
||||||
|
role: alert-rules
|
||||||
|
name: kube-state-metrics-rules
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: kube-state-metrics
|
||||||
|
rules:
|
||||||
|
- alert: KubeStateMetricsListErrors
|
||||||
|
annotations:
|
||||||
|
description: kube-state-metrics is experiencing errors at an elevated rate
|
||||||
|
in list operations. This is likely causing it to not be able to expose metrics
|
||||||
|
about Kubernetes objects correctly or at all.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatemetricslisterrors
|
||||||
|
summary: kube-state-metrics is experiencing errors in list operations.
|
||||||
|
expr: |
|
||||||
|
(sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m]))
|
||||||
|
/
|
||||||
|
sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m])))
|
||||||
|
> 0.01
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: KubeStateMetricsWatchErrors
|
||||||
|
annotations:
|
||||||
|
description: kube-state-metrics is experiencing errors at an elevated rate
|
||||||
|
in watch operations. This is likely causing it to not be able to expose
|
||||||
|
metrics about Kubernetes objects correctly or at all.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatemetricswatcherrors
|
||||||
|
summary: kube-state-metrics is experiencing errors in watch operations.
|
||||||
|
expr: |
|
||||||
|
(sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m]))
|
||||||
|
/
|
||||||
|
sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m])))
|
||||||
|
> 0.01
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
|
@ -2,10 +2,12 @@ apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/version: v1.9.7
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.0.0
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
clusterIP: None
|
clusterIP: None
|
||||||
ports:
|
ports:
|
||||||
|
@ -16,4 +18,6 @@ spec:
|
||||||
port: 9443
|
port: 9443
|
||||||
targetPort: https-self
|
targetPort: https-self
|
||||||
selector:
|
selector:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
|
|
@ -2,7 +2,9 @@ apiVersion: v1
|
||||||
kind: ServiceAccount
|
kind: ServiceAccount
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/version: v1.9.7
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.0.0
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
|
|
|
@ -2,10 +2,12 @@ apiVersion: monitoring.coreos.com/v1
|
||||||
kind: ServiceMonitor
|
kind: ServiceMonitor
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/version: 1.9.7
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.0.0
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
endpoints:
|
endpoints:
|
||||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||||
|
@ -28,4 +30,6 @@ spec:
|
||||||
jobLabel: app.kubernetes.io/name
|
jobLabel: app.kubernetes.io/name
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
|
|
@ -1,6 +1,11 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRole
|
kind: ClusterRole
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
|
app.kubernetes.io/name: node-exporter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 1.1.2
|
||||||
name: node-exporter
|
name: node-exporter
|
||||||
rules:
|
rules:
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
|
|
|
@ -1,6 +1,11 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRoleBinding
|
kind: ClusterRoleBinding
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
|
app.kubernetes.io/name: node-exporter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 1.1.2
|
||||||
name: node-exporter
|
name: node-exporter
|
||||||
roleRef:
|
roleRef:
|
||||||
apiGroup: rbac.authorization.k8s.io
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
@ -9,4 +14,4 @@ roleRef:
|
||||||
subjects:
|
subjects:
|
||||||
- kind: ServiceAccount
|
- kind: ServiceAccount
|
||||||
name: node-exporter
|
name: node-exporter
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
|
|
|
@ -2,30 +2,37 @@ apiVersion: apps/v1
|
||||||
kind: DaemonSet
|
kind: DaemonSet
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: node-exporter
|
app.kubernetes.io/name: node-exporter
|
||||||
app.kubernetes.io/version: v1.0.1
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 1.1.2
|
||||||
name: node-exporter
|
name: node-exporter
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: node-exporter
|
app.kubernetes.io/name: node-exporter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: node-exporter
|
app.kubernetes.io/name: node-exporter
|
||||||
app.kubernetes.io/version: v1.0.1
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 1.1.2
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- args:
|
- args:
|
||||||
- --web.listen-address=127.0.0.1:9100
|
- --web.listen-address=127.0.0.1:9100
|
||||||
- --path.procfs=/host/proc
|
|
||||||
- --path.sysfs=/host/sys
|
- --path.sysfs=/host/sys
|
||||||
- --path.rootfs=/host/root
|
- --path.rootfs=/host/root
|
||||||
- --no-collector.wifi
|
- --no-collector.wifi
|
||||||
- --no-collector.hwmon
|
- --no-collector.hwmon
|
||||||
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
|
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
|
||||||
image: quay.io/prometheus/node-exporter:v1.0.1
|
- --collector.netclass.ignored-devices=^(veth.*)$
|
||||||
|
- --collector.netdev.device-exclude=^(veth.*)$
|
||||||
|
image: quay.io/prometheus/node-exporter:v1.1.2
|
||||||
name: node-exporter
|
name: node-exporter
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
|
@ -35,10 +42,6 @@ spec:
|
||||||
cpu: 102m
|
cpu: 102m
|
||||||
memory: 180Mi
|
memory: 180Mi
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- mountPath: /host/proc
|
|
||||||
mountPropagation: HostToContainer
|
|
||||||
name: proc
|
|
||||||
readOnly: true
|
|
||||||
- mountPath: /host/sys
|
- mountPath: /host/sys
|
||||||
mountPropagation: HostToContainer
|
mountPropagation: HostToContainer
|
||||||
name: sys
|
name: sys
|
||||||
|
@ -85,9 +88,6 @@ spec:
|
||||||
tolerations:
|
tolerations:
|
||||||
- operator: Exists
|
- operator: Exists
|
||||||
volumes:
|
volumes:
|
||||||
- hostPath:
|
|
||||||
path: /proc
|
|
||||||
name: proc
|
|
||||||
- hostPath:
|
- hostPath:
|
||||||
path: /sys
|
path: /sys
|
||||||
name: sys
|
name: sys
|
||||||
|
|
|
@ -0,0 +1,301 @@
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PrometheusRule
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
|
app.kubernetes.io/name: node-exporter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 1.1.2
|
||||||
|
prometheus: k8s
|
||||||
|
role: alert-rules
|
||||||
|
name: node-exporter-rules
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: node-exporter
|
||||||
|
rules:
|
||||||
|
- alert: NodeFilesystemSpaceFillingUp
|
||||||
|
annotations:
|
||||||
|
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||||
|
has only {{ printf "%.2f" $value }}% available space left and is filling
|
||||||
|
up.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemspacefillingup
|
||||||
|
summary: Filesystem is predicted to run out of space within the next 24 hours.
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 40
|
||||||
|
and
|
||||||
|
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0
|
||||||
|
and
|
||||||
|
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
||||||
|
)
|
||||||
|
for: 1h
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: NodeFilesystemSpaceFillingUp
|
||||||
|
annotations:
|
||||||
|
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||||
|
has only {{ printf "%.2f" $value }}% available space left and is filling
|
||||||
|
up fast.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemspacefillingup
|
||||||
|
summary: Filesystem is predicted to run out of space within the next 4 hours.
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 15
|
||||||
|
and
|
||||||
|
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
|
||||||
|
and
|
||||||
|
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
||||||
|
)
|
||||||
|
for: 1h
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: NodeFilesystemAlmostOutOfSpace
|
||||||
|
annotations:
|
||||||
|
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||||
|
has only {{ printf "%.2f" $value }}% available space left.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutofspace
|
||||||
|
summary: Filesystem has less than 5% space left.
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 5
|
||||||
|
and
|
||||||
|
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
||||||
|
)
|
||||||
|
for: 1h
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: NodeFilesystemAlmostOutOfSpace
|
||||||
|
annotations:
|
||||||
|
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||||
|
has only {{ printf "%.2f" $value }}% available space left.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutofspace
|
||||||
|
summary: Filesystem has less than 3% space left.
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 3
|
||||||
|
and
|
||||||
|
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
||||||
|
)
|
||||||
|
for: 1h
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: NodeFilesystemFilesFillingUp
|
||||||
|
annotations:
|
||||||
|
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||||
|
has only {{ printf "%.2f" $value }}% available inodes left and is filling
|
||||||
|
up.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemfilesfillingup
|
||||||
|
summary: Filesystem is predicted to run out of inodes within the next 24 hours.
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 40
|
||||||
|
and
|
||||||
|
predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0
|
||||||
|
and
|
||||||
|
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
||||||
|
)
|
||||||
|
for: 1h
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: NodeFilesystemFilesFillingUp
|
||||||
|
annotations:
|
||||||
|
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||||
|
has only {{ printf "%.2f" $value }}% available inodes left and is filling
|
||||||
|
up fast.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemfilesfillingup
|
||||||
|
summary: Filesystem is predicted to run out of inodes within the next 4 hours.
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 20
|
||||||
|
and
|
||||||
|
predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
|
||||||
|
and
|
||||||
|
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
||||||
|
)
|
||||||
|
for: 1h
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: NodeFilesystemAlmostOutOfFiles
|
||||||
|
annotations:
|
||||||
|
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||||
|
has only {{ printf "%.2f" $value }}% available inodes left.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutoffiles
|
||||||
|
summary: Filesystem has less than 5% inodes left.
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 5
|
||||||
|
and
|
||||||
|
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
||||||
|
)
|
||||||
|
for: 1h
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: NodeFilesystemAlmostOutOfFiles
|
||||||
|
annotations:
|
||||||
|
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||||
|
has only {{ printf "%.2f" $value }}% available inodes left.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutoffiles
|
||||||
|
summary: Filesystem has less than 3% inodes left.
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 3
|
||||||
|
and
|
||||||
|
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
||||||
|
)
|
||||||
|
for: 1h
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: NodeNetworkReceiveErrs
|
||||||
|
annotations:
|
||||||
|
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
|
||||||
|
{{ printf "%.0f" $value }} receive errors in the last two minutes.'
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodenetworkreceiveerrs
|
||||||
|
summary: Network interface is reporting many receive errors.
|
||||||
|
expr: |
|
||||||
|
rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01
|
||||||
|
for: 1h
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: NodeNetworkTransmitErrs
|
||||||
|
annotations:
|
||||||
|
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
|
||||||
|
{{ printf "%.0f" $value }} transmit errors in the last two minutes.'
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodenetworktransmiterrs
|
||||||
|
summary: Network interface is reporting many transmit errors.
|
||||||
|
expr: |
|
||||||
|
rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01
|
||||||
|
for: 1h
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: NodeHighNumberConntrackEntriesUsed
|
||||||
|
annotations:
|
||||||
|
description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodehighnumberconntrackentriesused
|
||||||
|
summary: Number of conntrack are getting close to the limit.
|
||||||
|
expr: |
|
||||||
|
(node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: NodeTextFileCollectorScrapeError
|
||||||
|
annotations:
|
||||||
|
description: Node Exporter text file collector failed to scrape.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodetextfilecollectorscrapeerror
|
||||||
|
summary: Node Exporter text file collector failed to scrape.
|
||||||
|
expr: |
|
||||||
|
node_textfile_scrape_error{job="node-exporter"} == 1
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: NodeClockSkewDetected
|
||||||
|
annotations:
|
||||||
|
description: Clock on {{ $labels.instance }} is out of sync by more than 300s.
|
||||||
|
Ensure NTP is configured correctly on this host.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodeclockskewdetected
|
||||||
|
summary: Clock skew detected.
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
node_timex_offset_seconds > 0.05
|
||||||
|
and
|
||||||
|
deriv(node_timex_offset_seconds[5m]) >= 0
|
||||||
|
)
|
||||||
|
or
|
||||||
|
(
|
||||||
|
node_timex_offset_seconds < -0.05
|
||||||
|
and
|
||||||
|
deriv(node_timex_offset_seconds[5m]) <= 0
|
||||||
|
)
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: NodeClockNotSynchronising
|
||||||
|
annotations:
|
||||||
|
description: Clock on {{ $labels.instance }} is not synchronising. Ensure
|
||||||
|
NTP is configured on this host.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodeclocknotsynchronising
|
||||||
|
summary: Clock not synchronising.
|
||||||
|
expr: |
|
||||||
|
min_over_time(node_timex_sync_status[5m]) == 0
|
||||||
|
and
|
||||||
|
node_timex_maxerror_seconds >= 16
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: NodeRAIDDegraded
|
||||||
|
annotations:
|
||||||
|
description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is
|
||||||
|
in degraded state due to one or more disks failures. Number of spare drives
|
||||||
|
is insufficient to fix issue automatically.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/noderaiddegraded
|
||||||
|
summary: RAID Array is degraded
|
||||||
|
expr: |
|
||||||
|
node_md_disks_required - ignoring (state) (node_md_disks{state="active"}) > 0
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: NodeRAIDDiskFailure
|
||||||
|
annotations:
|
||||||
|
description: At least one device in RAID array on {{ $labels.instance }} failed.
|
||||||
|
Array '{{ $labels.device }}' needs attention and possibly a disk swap.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/noderaiddiskfailure
|
||||||
|
summary: Failed device in RAID array
|
||||||
|
expr: |
|
||||||
|
node_md_disks{state="failed"} > 0
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- name: node-exporter.rules
|
||||||
|
rules:
|
||||||
|
- expr: |
|
||||||
|
count without (cpu) (
|
||||||
|
count without (mode) (
|
||||||
|
node_cpu_seconds_total{job="node-exporter"}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
record: instance:node_num_cpu:sum
|
||||||
|
- expr: |
|
||||||
|
1 - avg without (cpu, mode) (
|
||||||
|
rate(node_cpu_seconds_total{job="node-exporter", mode="idle"}[1m])
|
||||||
|
)
|
||||||
|
record: instance:node_cpu_utilisation:rate1m
|
||||||
|
- expr: |
|
||||||
|
(
|
||||||
|
node_load1{job="node-exporter"}
|
||||||
|
/
|
||||||
|
instance:node_num_cpu:sum{job="node-exporter"}
|
||||||
|
)
|
||||||
|
record: instance:node_load1_per_cpu:ratio
|
||||||
|
- expr: |
|
||||||
|
1 - (
|
||||||
|
node_memory_MemAvailable_bytes{job="node-exporter"}
|
||||||
|
/
|
||||||
|
node_memory_MemTotal_bytes{job="node-exporter"}
|
||||||
|
)
|
||||||
|
record: instance:node_memory_utilisation:ratio
|
||||||
|
- expr: |
|
||||||
|
rate(node_vmstat_pgmajfault{job="node-exporter"}[1m])
|
||||||
|
record: instance:node_vmstat_pgmajfault:rate1m
|
||||||
|
- expr: |
|
||||||
|
rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
|
||||||
|
record: instance_device:node_disk_io_time_seconds:rate1m
|
||||||
|
- expr: |
|
||||||
|
rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
|
||||||
|
record: instance_device:node_disk_io_time_weighted_seconds:rate1m
|
||||||
|
- expr: |
|
||||||
|
sum without (device) (
|
||||||
|
rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[1m])
|
||||||
|
)
|
||||||
|
record: instance:node_network_receive_bytes_excluding_lo:rate1m
|
||||||
|
- expr: |
|
||||||
|
sum without (device) (
|
||||||
|
rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[1m])
|
||||||
|
)
|
||||||
|
record: instance:node_network_transmit_bytes_excluding_lo:rate1m
|
||||||
|
- expr: |
|
||||||
|
sum without (device) (
|
||||||
|
rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[1m])
|
||||||
|
)
|
||||||
|
record: instance:node_network_receive_drop_excluding_lo:rate1m
|
||||||
|
- expr: |
|
||||||
|
sum without (device) (
|
||||||
|
rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[1m])
|
||||||
|
)
|
||||||
|
record: instance:node_network_transmit_drop_excluding_lo:rate1m
|
|
@ -2,10 +2,12 @@ apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: node-exporter
|
app.kubernetes.io/name: node-exporter
|
||||||
app.kubernetes.io/version: v1.0.1
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 1.1.2
|
||||||
name: node-exporter
|
name: node-exporter
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
clusterIP: None
|
clusterIP: None
|
||||||
ports:
|
ports:
|
||||||
|
@ -13,4 +15,6 @@ spec:
|
||||||
port: 9100
|
port: 9100
|
||||||
targetPort: https
|
targetPort: https
|
||||||
selector:
|
selector:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: node-exporter
|
app.kubernetes.io/name: node-exporter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
|
|
@ -1,5 +1,10 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ServiceAccount
|
kind: ServiceAccount
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
|
app.kubernetes.io/name: node-exporter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 1.1.2
|
||||||
name: node-exporter
|
name: node-exporter
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
|
|
|
@ -2,10 +2,12 @@ apiVersion: monitoring.coreos.com/v1
|
||||||
kind: ServiceMonitor
|
kind: ServiceMonitor
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: node-exporter
|
app.kubernetes.io/name: node-exporter
|
||||||
app.kubernetes.io/version: v1.0.1
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 1.1.2
|
||||||
name: node-exporter
|
name: node-exporter
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
endpoints:
|
endpoints:
|
||||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||||
|
@ -24,4 +26,6 @@ spec:
|
||||||
jobLabel: app.kubernetes.io/name
|
jobLabel: app.kubernetes.io/name
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: node-exporter
|
app.kubernetes.io/name: node-exporter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
|
|
@ -1,6 +1,11 @@
|
||||||
apiVersion: apiregistration.k8s.io/v1
|
apiVersion: apiregistration.k8s.io/v1
|
||||||
kind: APIService
|
kind: APIService
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: metrics-adapter
|
||||||
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.8.4
|
||||||
name: v1beta1.metrics.k8s.io
|
name: v1beta1.metrics.k8s.io
|
||||||
spec:
|
spec:
|
||||||
group: metrics.k8s.io
|
group: metrics.k8s.io
|
||||||
|
@ -8,6 +13,6 @@ spec:
|
||||||
insecureSkipTLSVerify: true
|
insecureSkipTLSVerify: true
|
||||||
service:
|
service:
|
||||||
name: prometheus-adapter
|
name: prometheus-adapter
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
version: v1beta1
|
version: v1beta1
|
||||||
versionPriority: 100
|
versionPriority: 100
|
||||||
|
|
|
@ -1,6 +1,11 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRole
|
kind: ClusterRole
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: metrics-adapter
|
||||||
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.8.4
|
||||||
name: prometheus-adapter
|
name: prometheus-adapter
|
||||||
rules:
|
rules:
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
|
|
|
@ -2,6 +2,10 @@ apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRole
|
kind: ClusterRole
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
|
app.kubernetes.io/component: metrics-adapter
|
||||||
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.8.4
|
||||||
rbac.authorization.k8s.io/aggregate-to-admin: "true"
|
rbac.authorization.k8s.io/aggregate-to-admin: "true"
|
||||||
rbac.authorization.k8s.io/aggregate-to-edit: "true"
|
rbac.authorization.k8s.io/aggregate-to-edit: "true"
|
||||||
rbac.authorization.k8s.io/aggregate-to-view: "true"
|
rbac.authorization.k8s.io/aggregate-to-view: "true"
|
||||||
|
|
|
@ -1,6 +1,11 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRoleBinding
|
kind: ClusterRoleBinding
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: metrics-adapter
|
||||||
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.8.4
|
||||||
name: prometheus-adapter
|
name: prometheus-adapter
|
||||||
roleRef:
|
roleRef:
|
||||||
apiGroup: rbac.authorization.k8s.io
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
@ -9,4 +14,4 @@ roleRef:
|
||||||
subjects:
|
subjects:
|
||||||
- kind: ServiceAccount
|
- kind: ServiceAccount
|
||||||
name: prometheus-adapter
|
name: prometheus-adapter
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
|
|
|
@ -1,6 +1,11 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRoleBinding
|
kind: ClusterRoleBinding
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: metrics-adapter
|
||||||
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.8.4
|
||||||
name: resource-metrics:system:auth-delegator
|
name: resource-metrics:system:auth-delegator
|
||||||
roleRef:
|
roleRef:
|
||||||
apiGroup: rbac.authorization.k8s.io
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
@ -9,4 +14,4 @@ roleRef:
|
||||||
subjects:
|
subjects:
|
||||||
- kind: ServiceAccount
|
- kind: ServiceAccount
|
||||||
name: prometheus-adapter
|
name: prometheus-adapter
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
|
|
|
@ -1,6 +1,11 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRole
|
kind: ClusterRole
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: metrics-adapter
|
||||||
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.8.4
|
||||||
name: resource-metrics-server-resources
|
name: resource-metrics-server-resources
|
||||||
rules:
|
rules:
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
|
|
|
@ -4,8 +4,8 @@ data:
|
||||||
"resourceRules":
|
"resourceRules":
|
||||||
"cpu":
|
"cpu":
|
||||||
"containerLabel": "container"
|
"containerLabel": "container"
|
||||||
"containerQuery": "sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!=\"POD\",container!=\"\",pod!=\"\"}[5m])) by (<<.GroupBy>>)"
|
"containerQuery": "sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!=\"\",pod!=\"\"}[5m])) by (<<.GroupBy>>)"
|
||||||
"nodeQuery": "sum(1 - irate(node_cpu_seconds_total{mode=\"idle\"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)"
|
"nodeQuery": "sum(1 - irate(node_cpu_seconds_total{mode=\"idle\"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>) or sum (1- irate(windows_cpu_time_total{mode=\"idle\", job=\"windows-exporter\",<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>)"
|
||||||
"resources":
|
"resources":
|
||||||
"overrides":
|
"overrides":
|
||||||
"namespace":
|
"namespace":
|
||||||
|
@ -16,8 +16,8 @@ data:
|
||||||
"resource": "pod"
|
"resource": "pod"
|
||||||
"memory":
|
"memory":
|
||||||
"containerLabel": "container"
|
"containerLabel": "container"
|
||||||
"containerQuery": "sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!=\"POD\",container!=\"\",pod!=\"\"}) by (<<.GroupBy>>)"
|
"containerQuery": "sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!=\"\",pod!=\"\"}) by (<<.GroupBy>>)"
|
||||||
"nodeQuery": "sum(node_memory_MemTotal_bytes{job=\"node-exporter\",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job=\"node-exporter\",<<.LabelMatchers>>}) by (<<.GroupBy>>)"
|
"nodeQuery": "sum(node_memory_MemTotal_bytes{job=\"node-exporter\",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job=\"node-exporter\",<<.LabelMatchers>>}) by (<<.GroupBy>>) or sum(windows_cs_physical_memory_bytes{job=\"windows-exporter\",<<.LabelMatchers>>} - windows_memory_available_bytes{job=\"windows-exporter\",<<.LabelMatchers>>}) by (<<.GroupBy>>)"
|
||||||
"resources":
|
"resources":
|
||||||
"overrides":
|
"overrides":
|
||||||
"instance":
|
"instance":
|
||||||
|
@ -29,5 +29,10 @@ data:
|
||||||
"window": "5m"
|
"window": "5m"
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: metrics-adapter
|
||||||
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.8.4
|
||||||
name: adapter-config
|
name: adapter-config
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
|
|
|
@ -1,21 +1,31 @@
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: metrics-adapter
|
||||||
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.8.4
|
||||||
name: prometheus-adapter
|
name: prometheus-adapter
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 2
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
name: prometheus-adapter
|
app.kubernetes.io/component: metrics-adapter
|
||||||
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
strategy:
|
strategy:
|
||||||
rollingUpdate:
|
rollingUpdate:
|
||||||
maxSurge: 1
|
maxSurge: 1
|
||||||
maxUnavailable: 0
|
maxUnavailable: 1
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
name: prometheus-adapter
|
app.kubernetes.io/component: metrics-adapter
|
||||||
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.8.4
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- args:
|
- args:
|
||||||
|
@ -23,9 +33,9 @@ spec:
|
||||||
- --config=/etc/adapter/config.yaml
|
- --config=/etc/adapter/config.yaml
|
||||||
- --logtostderr=true
|
- --logtostderr=true
|
||||||
- --metrics-relist-interval=1m
|
- --metrics-relist-interval=1m
|
||||||
- --prometheus-url=http://prometheus-k8s.monitoring.svc.cluster.local:9090/
|
- --prometheus-url=http://prometheus-k8s.default.svc.cluster.local:9090/
|
||||||
- --secure-port=6443
|
- --secure-port=6443
|
||||||
image: directxman12/k8s-prometheus-adapter:v0.8.2
|
image: directxman12/k8s-prometheus-adapter:v0.8.4
|
||||||
name: prometheus-adapter
|
name: prometheus-adapter
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 6443
|
- containerPort: 6443
|
||||||
|
|
|
@ -1,6 +1,11 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: RoleBinding
|
kind: RoleBinding
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: metrics-adapter
|
||||||
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.8.4
|
||||||
name: resource-metrics-auth-reader
|
name: resource-metrics-auth-reader
|
||||||
namespace: kube-system
|
namespace: kube-system
|
||||||
roleRef:
|
roleRef:
|
||||||
|
@ -10,4 +15,4 @@ roleRef:
|
||||||
subjects:
|
subjects:
|
||||||
- kind: ServiceAccount
|
- kind: ServiceAccount
|
||||||
name: prometheus-adapter
|
name: prometheus-adapter
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
|
|
|
@ -2,13 +2,18 @@ apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
name: prometheus-adapter
|
app.kubernetes.io/component: metrics-adapter
|
||||||
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.8.4
|
||||||
name: prometheus-adapter
|
name: prometheus-adapter
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
ports:
|
ports:
|
||||||
- name: https
|
- name: https
|
||||||
port: 443
|
port: 443
|
||||||
targetPort: 6443
|
targetPort: 6443
|
||||||
selector:
|
selector:
|
||||||
name: prometheus-adapter
|
app.kubernetes.io/component: metrics-adapter
|
||||||
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
|
|
@ -1,5 +1,10 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ServiceAccount
|
kind: ServiceAccount
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: metrics-adapter
|
||||||
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.8.4
|
||||||
name: prometheus-adapter
|
name: prometheus-adapter
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
|
|
|
@ -2,9 +2,12 @@ apiVersion: monitoring.coreos.com/v1
|
||||||
kind: ServiceMonitor
|
kind: ServiceMonitor
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
name: prometheus-adapter
|
app.kubernetes.io/component: metrics-adapter
|
||||||
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.8.4
|
||||||
name: prometheus-adapter
|
name: prometheus-adapter
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
endpoints:
|
endpoints:
|
||||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||||
|
@ -15,4 +18,6 @@ spec:
|
||||||
insecureSkipVerify: true
|
insecureSkipVerify: true
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
name: prometheus-adapter
|
app.kubernetes.io/component: metrics-adapter
|
||||||
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
|
|
@ -1,6 +1,11 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRole
|
kind: ClusterRole
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: prometheus
|
||||||
|
app.kubernetes.io/name: prometheus
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.26.0
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
rules:
|
rules:
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
|
|
|
@ -1,6 +1,11 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRoleBinding
|
kind: ClusterRoleBinding
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: prometheus
|
||||||
|
app.kubernetes.io/name: prometheus
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.26.0
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
roleRef:
|
roleRef:
|
||||||
apiGroup: rbac.authorization.k8s.io
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
@ -9,4 +14,4 @@ roleRef:
|
||||||
subjects:
|
subjects:
|
||||||
- kind: ServiceAccount
|
- kind: ServiceAccount
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
|
|
|
@ -4,9 +4,10 @@ metadata:
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.44.1
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.47.0
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
endpoints:
|
endpoints:
|
||||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||||
|
@ -19,4 +20,5 @@ spec:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.44.1
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.47.0
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
apiVersion: policy/v1beta1
|
||||||
|
kind: PodDisruptionBudget
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: prometheus
|
||||||
|
app.kubernetes.io/name: prometheus
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.26.0
|
||||||
|
name: prometheus-k8s
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
minAvailable: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/component: prometheus
|
||||||
|
app.kubernetes.io/name: prometheus
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
prometheus: k8s
|
|
@ -2,19 +2,31 @@ apiVersion: monitoring.coreos.com/v1
|
||||||
kind: Prometheus
|
kind: Prometheus
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
|
app.kubernetes.io/component: prometheus
|
||||||
|
app.kubernetes.io/name: prometheus
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.26.0
|
||||||
prometheus: k8s
|
prometheus: k8s
|
||||||
name: k8s
|
name: k8s
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
alerting:
|
alerting:
|
||||||
alertmanagers:
|
alertmanagers:
|
||||||
- name: alertmanager-main
|
- apiVersion: v2
|
||||||
namespace: monitoring
|
name: alertmanager-main
|
||||||
|
namespace: default
|
||||||
port: web
|
port: web
|
||||||
|
externalLabels: {}
|
||||||
externalUrl: http://prometheus-k8s.monitoring:9090
|
externalUrl: http://prometheus-k8s.monitoring:9090
|
||||||
image: quay.io/prometheus/prometheus:v2.22.1
|
image: quay.io/prometheus/prometheus:v2.26.0
|
||||||
nodeSelector:
|
nodeSelector:
|
||||||
kubernetes.io/os: linux
|
kubernetes.io/os: linux
|
||||||
|
podMetadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: prometheus
|
||||||
|
app.kubernetes.io/name: prometheus
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.26.0
|
||||||
podMonitorNamespaceSelector:
|
podMonitorNamespaceSelector:
|
||||||
matchExpressions:
|
matchExpressions:
|
||||||
- key: prometheus
|
- key: prometheus
|
||||||
|
@ -25,7 +37,7 @@ spec:
|
||||||
podMonitorSelector: {}
|
podMonitorSelector: {}
|
||||||
probeNamespaceSelector: {}
|
probeNamespaceSelector: {}
|
||||||
probeSelector: {}
|
probeSelector: {}
|
||||||
replicas: 1
|
replicas: 2
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
memory: 400Mi
|
memory: 400Mi
|
||||||
|
@ -58,4 +70,4 @@ spec:
|
||||||
requests:
|
requests:
|
||||||
storage: 10Gi
|
storage: 10Gi
|
||||||
storageClassName: local-path
|
storageClassName: local-path
|
||||||
version: v2.22.1
|
version: 2.26.0
|
||||||
|
|
|
@ -0,0 +1,256 @@
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PrometheusRule
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: prometheus
|
||||||
|
app.kubernetes.io/name: prometheus
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.26.0
|
||||||
|
prometheus: k8s
|
||||||
|
role: alert-rules
|
||||||
|
name: prometheus-k8s-prometheus-rules
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: prometheus
|
||||||
|
rules:
|
||||||
|
- alert: PrometheusBadConfig
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to
|
||||||
|
reload its configuration.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusbadconfig
|
||||||
|
summary: Failed Prometheus configuration reload.
|
||||||
|
expr: |
|
||||||
|
# Without max_over_time, failed scrapes could create false negatives, see
|
||||||
|
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
|
max_over_time(prometheus_config_last_reload_successful{job="prometheus-k8s",namespace="default"}[5m]) == 0
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: PrometheusNotificationQueueRunningFull
|
||||||
|
annotations:
|
||||||
|
description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}}
|
||||||
|
is running full.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusnotificationqueuerunningfull
|
||||||
|
summary: Prometheus alert notification queue predicted to run full in less
|
||||||
|
than 30m.
|
||||||
|
expr: |
|
||||||
|
# Without min_over_time, failed scrapes could create false negatives, see
|
||||||
|
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
|
(
|
||||||
|
predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s",namespace="default"}[5m], 60 * 30)
|
||||||
|
>
|
||||||
|
min_over_time(prometheus_notifications_queue_capacity{job="prometheus-k8s",namespace="default"}[5m])
|
||||||
|
)
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
|
||||||
|
annotations:
|
||||||
|
description: '{{ printf "%.1f" $value }}% errors while sending alerts from
|
||||||
|
Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.'
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheuserrorsendingalertstosomealertmanagers
|
||||||
|
summary: Prometheus has encountered more than 1% errors sending alerts to
|
||||||
|
a specific Alertmanager.
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="default"}[5m])
|
||||||
|
/
|
||||||
|
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="default"}[5m])
|
||||||
|
)
|
||||||
|
* 100
|
||||||
|
> 1
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: PrometheusNotConnectedToAlertmanagers
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected
|
||||||
|
to any Alertmanagers.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusnotconnectedtoalertmanagers
|
||||||
|
summary: Prometheus is not connected to any Alertmanagers.
|
||||||
|
expr: |
|
||||||
|
# Without max_over_time, failed scrapes could create false negatives, see
|
||||||
|
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
|
max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s",namespace="default"}[5m]) < 1
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: PrometheusTSDBReloadsFailing
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected
|
||||||
|
{{$value | humanize}} reload failures over the last 3h.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheustsdbreloadsfailing
|
||||||
|
summary: Prometheus has issues reloading blocks from disk.
|
||||||
|
expr: |
|
||||||
|
increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="default"}[3h]) > 0
|
||||||
|
for: 4h
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: PrometheusTSDBCompactionsFailing
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected
|
||||||
|
{{$value | humanize}} compaction failures over the last 3h.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheustsdbcompactionsfailing
|
||||||
|
summary: Prometheus has issues compacting blocks.
|
||||||
|
expr: |
|
||||||
|
increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="default"}[3h]) > 0
|
||||||
|
for: 4h
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: PrometheusNotIngestingSamples
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting
|
||||||
|
samples.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusnotingestingsamples
|
||||||
|
summary: Prometheus is not ingesting samples.
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="default"}[5m]) <= 0
|
||||||
|
and
|
||||||
|
(
|
||||||
|
sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-k8s",namespace="default"}) > 0
|
||||||
|
or
|
||||||
|
sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-k8s",namespace="default"}) > 0
|
||||||
|
)
|
||||||
|
)
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: PrometheusDuplicateTimestamps
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping
|
||||||
|
{{ printf "%.4g" $value }} samples/s with different values but duplicated
|
||||||
|
timestamp.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusduplicatetimestamps
|
||||||
|
summary: Prometheus is dropping samples with duplicate timestamps.
|
||||||
|
expr: |
|
||||||
|
rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="default"}[5m]) > 0
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: PrometheusOutOfOrderTimestamps
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping
|
||||||
|
{{ printf "%.4g" $value }} samples/s with timestamps arriving out of order.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoutofordertimestamps
|
||||||
|
summary: Prometheus drops samples with out-of-order timestamps.
|
||||||
|
expr: |
|
||||||
|
rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="default"}[5m]) > 0
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: PrometheusRemoteStorageFailures
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send
|
||||||
|
{{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{
|
||||||
|
$labels.url }}
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusremotestoragefailures
|
||||||
|
summary: Prometheus fails to send samples to remote storage.
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="default"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-k8s",namespace="default"}[5m]))
|
||||||
|
/
|
||||||
|
(
|
||||||
|
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="default"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-k8s",namespace="default"}[5m]))
|
||||||
|
+
|
||||||
|
(rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-k8s",namespace="default"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus-k8s",namespace="default"}[5m]))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
* 100
|
||||||
|
> 1
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: PrometheusRemoteWriteBehind
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
|
||||||
|
is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url
|
||||||
|
}}.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusremotewritebehind
|
||||||
|
summary: Prometheus remote write is behind.
|
||||||
|
expr: |
|
||||||
|
# Without max_over_time, failed scrapes could create false negatives, see
|
||||||
|
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
|
(
|
||||||
|
max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-k8s",namespace="default"}[5m])
|
||||||
|
- ignoring(remote_name, url) group_right
|
||||||
|
max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-k8s",namespace="default"}[5m])
|
||||||
|
)
|
||||||
|
> 120
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: PrometheusRemoteWriteDesiredShards
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
|
||||||
|
desired shards calculation wants to run {{ $value }} shards for queue {{
|
||||||
|
$labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{
|
||||||
|
printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="default"}`
|
||||||
|
$labels.instance | query | first | value }}.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusremotewritedesiredshards
|
||||||
|
summary: Prometheus remote write desired shards calculation wants to run more
|
||||||
|
than configured max shards.
|
||||||
|
expr: |
|
||||||
|
# Without max_over_time, failed scrapes could create false negatives, see
|
||||||
|
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
|
(
|
||||||
|
max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-k8s",namespace="default"}[5m])
|
||||||
|
>
|
||||||
|
max_over_time(prometheus_remote_storage_shards_max{job="prometheus-k8s",namespace="default"}[5m])
|
||||||
|
)
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: PrometheusRuleFailures
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to
|
||||||
|
evaluate {{ printf "%.0f" $value }} rules in the last 5m.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusrulefailures
|
||||||
|
summary: Prometheus is failing rule evaluations.
|
||||||
|
expr: |
|
||||||
|
increase(prometheus_rule_evaluation_failures_total{job="prometheus-k8s",namespace="default"}[5m]) > 0
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: PrometheusMissingRuleEvaluations
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{
|
||||||
|
printf "%.0f" $value }} rule group evaluations in the last 5m.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusmissingruleevaluations
|
||||||
|
summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
|
||||||
|
expr: |
|
||||||
|
increase(prometheus_rule_group_iterations_missed_total{job="prometheus-k8s",namespace="default"}[5m]) > 0
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: PrometheusTargetLimitHit
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped
|
||||||
|
{{ printf "%.0f" $value }} targets because the number of targets exceeded
|
||||||
|
the configured target_limit.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheustargetlimithit
|
||||||
|
summary: Prometheus has dropped targets because some scrape configs have exceeded
|
||||||
|
the targets limit.
|
||||||
|
expr: |
|
||||||
|
increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-k8s",namespace="default"}[5m]) > 0
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager
|
||||||
|
annotations:
|
||||||
|
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts
|
||||||
|
from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheuserrorsendingalertstoanyalertmanager
|
||||||
|
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
|
||||||
|
expr: |
|
||||||
|
min without (alertmanager) (
|
||||||
|
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="default",alertmanager!~``}[5m])
|
||||||
|
/
|
||||||
|
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="default",alertmanager!~``}[5m])
|
||||||
|
)
|
||||||
|
* 100
|
||||||
|
> 3
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
|
@ -1,8 +1,13 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: RoleBinding
|
kind: RoleBinding
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: prometheus
|
||||||
|
app.kubernetes.io/name: prometheus
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.26.0
|
||||||
name: prometheus-k8s-config
|
name: prometheus-k8s-config
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
roleRef:
|
roleRef:
|
||||||
apiGroup: rbac.authorization.k8s.io
|
apiGroup: rbac.authorization.k8s.io
|
||||||
kind: Role
|
kind: Role
|
||||||
|
@ -10,4 +15,4 @@ roleRef:
|
||||||
subjects:
|
subjects:
|
||||||
- kind: ServiceAccount
|
- kind: ServiceAccount
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
|
|
|
@ -3,6 +3,11 @@ items:
|
||||||
- apiVersion: rbac.authorization.k8s.io/v1
|
- apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: RoleBinding
|
kind: RoleBinding
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: prometheus
|
||||||
|
app.kubernetes.io/name: prometheus
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.26.0
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: default
|
namespace: default
|
||||||
roleRef:
|
roleRef:
|
||||||
|
@ -12,10 +17,15 @@ items:
|
||||||
subjects:
|
subjects:
|
||||||
- kind: ServiceAccount
|
- kind: ServiceAccount
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
- apiVersion: rbac.authorization.k8s.io/v1
|
- apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: RoleBinding
|
kind: RoleBinding
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: prometheus
|
||||||
|
app.kubernetes.io/name: prometheus
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.26.0
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: kube-system
|
namespace: kube-system
|
||||||
roleRef:
|
roleRef:
|
||||||
|
@ -25,12 +35,17 @@ items:
|
||||||
subjects:
|
subjects:
|
||||||
- kind: ServiceAccount
|
- kind: ServiceAccount
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
- apiVersion: rbac.authorization.k8s.io/v1
|
- apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: RoleBinding
|
kind: RoleBinding
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: prometheus
|
||||||
|
app.kubernetes.io/name: prometheus
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.26.0
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
roleRef:
|
roleRef:
|
||||||
apiGroup: rbac.authorization.k8s.io
|
apiGroup: rbac.authorization.k8s.io
|
||||||
kind: Role
|
kind: Role
|
||||||
|
@ -38,31 +53,5 @@ items:
|
||||||
subjects:
|
subjects:
|
||||||
- kind: ServiceAccount
|
- kind: ServiceAccount
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
- apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: RoleBinding
|
|
||||||
metadata:
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: k8up
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: Role
|
|
||||||
name: prometheus-k8s
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: monitoring
|
|
||||||
- apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: RoleBinding
|
|
||||||
metadata:
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: owntracks
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: Role
|
|
||||||
name: prometheus-k8s
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: monitoring
|
|
||||||
kind: RoleBindingList
|
kind: RoleBindingList
|
||||||
|
|
|
@ -1,8 +1,13 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: Role
|
kind: Role
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: prometheus
|
||||||
|
app.kubernetes.io/name: prometheus
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.26.0
|
||||||
name: prometheus-k8s-config
|
name: prometheus-k8s-config
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
rules:
|
rules:
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
- ""
|
- ""
|
||||||
|
|
|
@ -3,6 +3,11 @@ items:
|
||||||
- apiVersion: rbac.authorization.k8s.io/v1
|
- apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: Role
|
kind: Role
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: prometheus
|
||||||
|
app.kubernetes.io/name: prometheus
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.26.0
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: default
|
namespace: default
|
||||||
rules:
|
rules:
|
||||||
|
@ -24,9 +29,22 @@ items:
|
||||||
- get
|
- get
|
||||||
- list
|
- list
|
||||||
- watch
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- networking.k8s.io
|
||||||
|
resources:
|
||||||
|
- ingresses
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
- apiVersion: rbac.authorization.k8s.io/v1
|
- apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: Role
|
kind: Role
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: prometheus
|
||||||
|
app.kubernetes.io/name: prometheus
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.26.0
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: kube-system
|
namespace: kube-system
|
||||||
rules:
|
rules:
|
||||||
|
@ -48,11 +66,24 @@ items:
|
||||||
- get
|
- get
|
||||||
- list
|
- list
|
||||||
- watch
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- networking.k8s.io
|
||||||
|
resources:
|
||||||
|
- ingresses
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
- apiVersion: rbac.authorization.k8s.io/v1
|
- apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: Role
|
kind: Role
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: prometheus
|
||||||
|
app.kubernetes.io/name: prometheus
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.26.0
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
rules:
|
rules:
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
- ""
|
- ""
|
||||||
|
@ -72,48 +103,8 @@ items:
|
||||||
- get
|
- get
|
||||||
- list
|
- list
|
||||||
- watch
|
- watch
|
||||||
- apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: Role
|
|
||||||
metadata:
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: k8up
|
|
||||||
rules:
|
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
- ""
|
- networking.k8s.io
|
||||||
resources:
|
|
||||||
- services
|
|
||||||
- endpoints
|
|
||||||
- pods
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- extensions
|
|
||||||
resources:
|
|
||||||
- ingresses
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: Role
|
|
||||||
metadata:
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: owntracks
|
|
||||||
rules:
|
|
||||||
- apiGroups:
|
|
||||||
- ""
|
|
||||||
resources:
|
|
||||||
- services
|
|
||||||
- endpoints
|
|
||||||
- pods
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- extensions
|
|
||||||
resources:
|
resources:
|
||||||
- ingresses
|
- ingresses
|
||||||
verbs:
|
verbs:
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -2,9 +2,13 @@ apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
|
app.kubernetes.io/component: prometheus
|
||||||
|
app.kubernetes.io/name: prometheus
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.26.0
|
||||||
prometheus: k8s
|
prometheus: k8s
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
ports:
|
ports:
|
||||||
- name: web
|
- name: web
|
||||||
|
@ -12,5 +16,8 @@ spec:
|
||||||
targetPort: web
|
targetPort: web
|
||||||
selector:
|
selector:
|
||||||
app: prometheus
|
app: prometheus
|
||||||
|
app.kubernetes.io/component: prometheus
|
||||||
|
app.kubernetes.io/name: prometheus
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
prometheus: k8s
|
prometheus: k8s
|
||||||
sessionAffinity: ClientIP
|
sessionAffinity: ClientIP
|
||||||
|
|
|
@ -1,5 +1,10 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ServiceAccount
|
kind: ServiceAccount
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: prometheus
|
||||||
|
app.kubernetes.io/name: prometheus
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.26.0
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
|
|
|
@ -2,13 +2,19 @@ apiVersion: monitoring.coreos.com/v1
|
||||||
kind: ServiceMonitor
|
kind: ServiceMonitor
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
k8s-app: prometheus
|
app.kubernetes.io/component: prometheus
|
||||||
name: prometheus
|
app.kubernetes.io/name: prometheus
|
||||||
namespace: monitoring
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 2.26.0
|
||||||
|
name: prometheus-k8s
|
||||||
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
endpoints:
|
endpoints:
|
||||||
- interval: 30s
|
- interval: 30s
|
||||||
port: web
|
port: web
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
|
app.kubernetes.io/component: prometheus
|
||||||
|
app.kubernetes.io/name: prometheus
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
prometheus: k8s
|
prometheus: k8s
|
||||||
|
|
|
@ -1,74 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: ServiceMonitor
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
k8s-app: apiserver
|
|
||||||
name: kube-apiserver
|
|
||||||
namespace: monitoring
|
|
||||||
spec:
|
|
||||||
endpoints:
|
|
||||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
||||||
interval: 30s
|
|
||||||
metricRelabelings:
|
|
||||||
- action: drop
|
|
||||||
regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: transformation_(transformation_latencies_microseconds|failures_total)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: etcd_(debugging|disk|server).*
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: apiserver_admission_controller_admission_latencies_seconds_.*
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: apiserver_admission_step_admission_latencies_seconds_.*
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- le
|
|
||||||
port: https
|
|
||||||
scheme: https
|
|
||||||
tlsConfig:
|
|
||||||
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
||||||
serverName: kubernetes
|
|
||||||
jobLabel: component
|
|
||||||
namespaceSelector:
|
|
||||||
matchNames:
|
|
||||||
- default
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
component: apiserver
|
|
||||||
provider: kubernetes
|
|
|
@ -1,19 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: ServiceMonitor
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
k8s-app: coredns
|
|
||||||
name: coredns
|
|
||||||
namespace: monitoring
|
|
||||||
spec:
|
|
||||||
endpoints:
|
|
||||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
||||||
interval: 15s
|
|
||||||
port: metrics
|
|
||||||
jobLabel: k8s-app
|
|
||||||
namespaceSelector:
|
|
||||||
matchNames:
|
|
||||||
- kube-system
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
k8s-app: kube-dns
|
|
|
@ -1,10 +1,3 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: ServiceMonitor
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
k8s-app: kube-controller-manager
|
|
||||||
name: kube-controller-manager
|
|
||||||
namespace: monitoring
|
|
||||||
spec:
|
spec:
|
||||||
endpoints:
|
endpoints:
|
||||||
- interval: 30s
|
- interval: 30s
|
||||||
|
@ -46,10 +39,3 @@ spec:
|
||||||
sourceLabels:
|
sourceLabels:
|
||||||
- __name__
|
- __name__
|
||||||
port: http-metrics
|
port: http-metrics
|
||||||
jobLabel: k8s-app
|
|
||||||
namespaceSelector:
|
|
||||||
matchNames:
|
|
||||||
- kube-system
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
k8s-app: kube-controller-manager
|
|
||||||
|
|
|
@ -1,18 +1,4 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: ServiceMonitor
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
k8s-app: kube-scheduler
|
|
||||||
name: kube-scheduler
|
|
||||||
namespace: monitoring
|
|
||||||
spec:
|
spec:
|
||||||
endpoints:
|
endpoints:
|
||||||
- interval: 30s
|
- interval: 30s
|
||||||
port: http-metrics
|
port: http-metrics
|
||||||
jobLabel: k8s-app
|
|
||||||
namespaceSelector:
|
|
||||||
matchNames:
|
|
||||||
- kube-system
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
k8s-app: kube-scheduler
|
|
||||||
|
|
|
@ -1,90 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: ServiceMonitor
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
k8s-app: kubelet
|
|
||||||
name: kubelet
|
|
||||||
namespace: monitoring
|
|
||||||
spec:
|
|
||||||
endpoints:
|
|
||||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
||||||
honorLabels: true
|
|
||||||
interval: 30s
|
|
||||||
metricRelabelings:
|
|
||||||
- action: drop
|
|
||||||
regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: transformation_(transformation_latencies_microseconds|failures_total)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
port: https-metrics
|
|
||||||
relabelings:
|
|
||||||
- sourceLabels:
|
|
||||||
- __metrics_path__
|
|
||||||
targetLabel: metrics_path
|
|
||||||
scheme: https
|
|
||||||
tlsConfig:
|
|
||||||
insecureSkipVerify: true
|
|
||||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
||||||
honorLabels: true
|
|
||||||
honorTimestamps: false
|
|
||||||
interval: 30s
|
|
||||||
metricRelabelings:
|
|
||||||
- action: drop
|
|
||||||
regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
path: /metrics/cadvisor
|
|
||||||
port: https-metrics
|
|
||||||
relabelings:
|
|
||||||
- sourceLabels:
|
|
||||||
- __metrics_path__
|
|
||||||
targetLabel: metrics_path
|
|
||||||
scheme: https
|
|
||||||
tlsConfig:
|
|
||||||
insecureSkipVerify: true
|
|
||||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
||||||
honorLabels: true
|
|
||||||
interval: 30s
|
|
||||||
path: /metrics/probes
|
|
||||||
port: https-metrics
|
|
||||||
relabelings:
|
|
||||||
- sourceLabels:
|
|
||||||
- __metrics_path__
|
|
||||||
targetLabel: metrics_path
|
|
||||||
scheme: https
|
|
||||||
tlsConfig:
|
|
||||||
insecureSkipVerify: true
|
|
||||||
jobLabel: k8s-app
|
|
||||||
namespaceSelector:
|
|
||||||
matchNames:
|
|
||||||
- kube-system
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
k8s-app: kubelet
|
|
|
@ -1,4 +1,4 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Namespace
|
kind: Namespace
|
||||||
metadata:
|
metadata:
|
||||||
name: monitoring
|
name: default
|
||||||
|
|
|
@ -0,0 +1,76 @@
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PrometheusRule
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
|
app.kubernetes.io/name: kube-prometheus
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
prometheus: k8s
|
||||||
|
role: alert-rules
|
||||||
|
name: kube-prometheus-rules
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: general.rules
|
||||||
|
rules:
|
||||||
|
- alert: TargetDown
|
||||||
|
annotations:
|
||||||
|
description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service
|
||||||
|
}} targets in {{ $labels.namespace }} namespace are down.'
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/targetdown
|
||||||
|
summary: One or more targets are unreachable.
|
||||||
|
expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job,
|
||||||
|
namespace, service)) > 10
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: Watchdog
|
||||||
|
annotations:
|
||||||
|
description: |
|
||||||
|
This is an alert meant to ensure that the entire alerting pipeline is functional.
|
||||||
|
This alert is always firing, therefore it should always be firing in Alertmanager
|
||||||
|
and always fire against a receiver. There are integrations with various notification
|
||||||
|
mechanisms that send a notification when this alert is not firing. For example the
|
||||||
|
"DeadMansSnitch" integration in PagerDuty.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/watchdog
|
||||||
|
summary: An alert that should always be firing to certify that Alertmanager
|
||||||
|
is working properly.
|
||||||
|
expr: vector(1)
|
||||||
|
labels:
|
||||||
|
severity: none
|
||||||
|
- name: node-network
|
||||||
|
rules:
|
||||||
|
- alert: NodeNetworkInterfaceFlapping
|
||||||
|
annotations:
|
||||||
|
message: Network interface "{{ $labels.device }}" changing it's up status
|
||||||
|
often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodenetworkinterfaceflapping
|
||||||
|
expr: |
|
||||||
|
changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2
|
||||||
|
for: 2m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- name: kube-prometheus-node-recording.rules
|
||||||
|
rules:
|
||||||
|
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m]))
|
||||||
|
BY (instance)
|
||||||
|
record: instance:node_cpu:rate:sum
|
||||||
|
- expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
|
||||||
|
record: instance:node_network_receive_bytes:rate:sum
|
||||||
|
- expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
|
||||||
|
record: instance:node_network_transmit_bytes:rate:sum
|
||||||
|
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
|
||||||
|
WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total)
|
||||||
|
BY (instance, cpu)) BY (instance)
|
||||||
|
record: instance:node_cpu:ratio
|
||||||
|
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
|
||||||
|
record: cluster:node_cpu:sum_rate5m
|
||||||
|
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total)
|
||||||
|
BY (instance, cpu))
|
||||||
|
record: cluster:node_cpu:ratio
|
||||||
|
- name: kube-prometheus-general.rules
|
||||||
|
rules:
|
||||||
|
- expr: count without(instance, pod, node) (up == 1)
|
||||||
|
record: count:up1
|
||||||
|
- expr: count without(instance, pod, node) (up == 0)
|
||||||
|
record: count:up0
|
|
@ -8,6 +8,8 @@ metadata:
|
||||||
spec:
|
spec:
|
||||||
group: monitoring.coreos.com
|
group: monitoring.coreos.com
|
||||||
names:
|
names:
|
||||||
|
categories:
|
||||||
|
- prometheus-operator
|
||||||
kind: AlertmanagerConfig
|
kind: AlertmanagerConfig
|
||||||
listKind: AlertmanagerConfigList
|
listKind: AlertmanagerConfigList
|
||||||
plural: alertmanagerconfigs
|
plural: alertmanagerconfigs
|
||||||
|
@ -60,6 +62,7 @@ spec:
|
||||||
properties:
|
properties:
|
||||||
name:
|
name:
|
||||||
description: Label to match.
|
description: Label to match.
|
||||||
|
minLength: 1
|
||||||
type: string
|
type: string
|
||||||
regex:
|
regex:
|
||||||
description: Whether to match on equality (false) or regular-expression
|
description: Whether to match on equality (false) or regular-expression
|
||||||
|
@ -70,7 +73,6 @@ spec:
|
||||||
type: string
|
type: string
|
||||||
required:
|
required:
|
||||||
- name
|
- name
|
||||||
- value
|
|
||||||
type: object
|
type: object
|
||||||
type: array
|
type: array
|
||||||
targetMatch:
|
targetMatch:
|
||||||
|
@ -82,6 +84,7 @@ spec:
|
||||||
properties:
|
properties:
|
||||||
name:
|
name:
|
||||||
description: Label to match.
|
description: Label to match.
|
||||||
|
minLength: 1
|
||||||
type: string
|
type: string
|
||||||
regex:
|
regex:
|
||||||
description: Whether to match on equality (false) or regular-expression
|
description: Whether to match on equality (false) or regular-expression
|
||||||
|
@ -92,7 +95,6 @@ spec:
|
||||||
type: string
|
type: string
|
||||||
required:
|
required:
|
||||||
- name
|
- name
|
||||||
- value
|
|
||||||
type: object
|
type: object
|
||||||
type: array
|
type: array
|
||||||
type: object
|
type: object
|
||||||
|
@ -108,9 +110,13 @@ spec:
|
||||||
description: EmailConfig configures notifications via Email.
|
description: EmailConfig configures notifications via Email.
|
||||||
properties:
|
properties:
|
||||||
authIdentity:
|
authIdentity:
|
||||||
|
description: The identity to use for authentication.
|
||||||
type: string
|
type: string
|
||||||
authPassword:
|
authPassword:
|
||||||
description: SecretKeySelector selects a key of a Secret.
|
description: The secret's key that contains the password
|
||||||
|
to use for authentication. The secret needs to be in
|
||||||
|
the same namespace as the AlertmanagerConfig object
|
||||||
|
and accessible by the Prometheus Operator.
|
||||||
properties:
|
properties:
|
||||||
key:
|
key:
|
||||||
description: The key of the secret to select from. Must
|
description: The key of the secret to select from. Must
|
||||||
|
@ -129,7 +135,10 @@ spec:
|
||||||
- key
|
- key
|
||||||
type: object
|
type: object
|
||||||
authSecret:
|
authSecret:
|
||||||
description: SecretKeySelector selects a key of a Secret.
|
description: The secret's key that contains the CRAM-MD5
|
||||||
|
secret. The secret needs to be in the same namespace
|
||||||
|
as the AlertmanagerConfig object and accessible by the
|
||||||
|
Prometheus Operator.
|
||||||
properties:
|
properties:
|
||||||
key:
|
key:
|
||||||
description: The key of the secret to select from. Must
|
description: The key of the secret to select from. Must
|
||||||
|
@ -148,7 +157,7 @@ spec:
|
||||||
- key
|
- key
|
||||||
type: object
|
type: object
|
||||||
authUsername:
|
authUsername:
|
||||||
description: SMTP authentication information.
|
description: The username to use for authentication.
|
||||||
type: string
|
type: string
|
||||||
from:
|
from:
|
||||||
description: The sender address.
|
description: The sender address.
|
||||||
|
@ -162,6 +171,7 @@ spec:
|
||||||
properties:
|
properties:
|
||||||
key:
|
key:
|
||||||
description: Key of the tuple.
|
description: Key of the tuple.
|
||||||
|
minLength: 1
|
||||||
type: string
|
type: string
|
||||||
value:
|
value:
|
||||||
description: Value of the tuple.
|
description: Value of the tuple.
|
||||||
|
@ -321,6 +331,7 @@ spec:
|
||||||
name:
|
name:
|
||||||
description: Name of the receiver. Must be unique across all
|
description: Name of the receiver. Must be unique across all
|
||||||
items from the list.
|
items from the list.
|
||||||
|
minLength: 1
|
||||||
type: string
|
type: string
|
||||||
opsgenieConfigs:
|
opsgenieConfigs:
|
||||||
description: List of OpsGenie configurations.
|
description: List of OpsGenie configurations.
|
||||||
|
@ -364,6 +375,7 @@ spec:
|
||||||
properties:
|
properties:
|
||||||
key:
|
key:
|
||||||
description: Key of the tuple.
|
description: Key of the tuple.
|
||||||
|
minLength: 1
|
||||||
type: string
|
type: string
|
||||||
value:
|
value:
|
||||||
description: Value of the tuple.
|
description: Value of the tuple.
|
||||||
|
@ -590,8 +602,8 @@ spec:
|
||||||
description: List of responders responsible for notifications.
|
description: List of responders responsible for notifications.
|
||||||
items:
|
items:
|
||||||
description: OpsGenieConfigResponder defines a responder
|
description: OpsGenieConfigResponder defines a responder
|
||||||
to an incident. One of id, name or username has to
|
to an incident. One of `id`, `name` or `username`
|
||||||
be defined.
|
has to be defined.
|
||||||
properties:
|
properties:
|
||||||
id:
|
id:
|
||||||
description: ID of the responder.
|
description: ID of the responder.
|
||||||
|
@ -601,10 +613,13 @@ spec:
|
||||||
type: string
|
type: string
|
||||||
type:
|
type:
|
||||||
description: Type of responder.
|
description: Type of responder.
|
||||||
|
minLength: 1
|
||||||
type: string
|
type: string
|
||||||
username:
|
username:
|
||||||
description: Username of the responder.
|
description: Username of the responder.
|
||||||
type: string
|
type: string
|
||||||
|
required:
|
||||||
|
- type
|
||||||
type: object
|
type: object
|
||||||
type: array
|
type: array
|
||||||
sendResolved:
|
sendResolved:
|
||||||
|
@ -649,6 +664,7 @@ spec:
|
||||||
properties:
|
properties:
|
||||||
key:
|
key:
|
||||||
description: Key of the tuple.
|
description: Key of the tuple.
|
||||||
|
minLength: 1
|
||||||
type: string
|
type: string
|
||||||
value:
|
value:
|
||||||
description: Value of the tuple.
|
description: Value of the tuple.
|
||||||
|
@ -1163,8 +1179,11 @@ spec:
|
||||||
description: Notification title.
|
description: Notification title.
|
||||||
type: string
|
type: string
|
||||||
token:
|
token:
|
||||||
description: Your registered application’s API token,
|
description: The secret's key that contains the registered
|
||||||
see https://pushover.net/apps
|
application’s API token, see https://pushover.net/apps.
|
||||||
|
The secret needs to be in the same namespace as the
|
||||||
|
AlertmanagerConfig object and accessible by the Prometheus
|
||||||
|
Operator.
|
||||||
properties:
|
properties:
|
||||||
key:
|
key:
|
||||||
description: The key of the secret to select from. Must
|
description: The key of the secret to select from. Must
|
||||||
|
@ -1190,7 +1209,10 @@ spec:
|
||||||
just the URL is shown
|
just the URL is shown
|
||||||
type: string
|
type: string
|
||||||
userKey:
|
userKey:
|
||||||
description: The recipient user’s user key.
|
description: The secret's key that contains the recipient
|
||||||
|
user’s user key. The secret needs to be in the same
|
||||||
|
namespace as the AlertmanagerConfig object and accessible
|
||||||
|
by the Prometheus Operator.
|
||||||
properties:
|
properties:
|
||||||
key:
|
key:
|
||||||
description: The key of the secret to select from. Must
|
description: The key of the secret to select from. Must
|
||||||
|
@ -1237,6 +1259,7 @@ spec:
|
||||||
okText:
|
okText:
|
||||||
type: string
|
type: string
|
||||||
text:
|
text:
|
||||||
|
minLength: 1
|
||||||
type: string
|
type: string
|
||||||
title:
|
title:
|
||||||
type: string
|
type: string
|
||||||
|
@ -1248,8 +1271,10 @@ spec:
|
||||||
style:
|
style:
|
||||||
type: string
|
type: string
|
||||||
text:
|
text:
|
||||||
|
minLength: 1
|
||||||
type: string
|
type: string
|
||||||
type:
|
type:
|
||||||
|
minLength: 1
|
||||||
type: string
|
type: string
|
||||||
url:
|
url:
|
||||||
type: string
|
type: string
|
||||||
|
@ -1307,8 +1332,10 @@ spec:
|
||||||
short:
|
short:
|
||||||
type: boolean
|
type: boolean
|
||||||
title:
|
title:
|
||||||
|
minLength: 1
|
||||||
type: string
|
type: string
|
||||||
value:
|
value:
|
||||||
|
minLength: 1
|
||||||
type: string
|
type: string
|
||||||
required:
|
required:
|
||||||
- title
|
- title
|
||||||
|
@ -1558,8 +1585,10 @@ spec:
|
||||||
VictorOps. See https://prometheus.io/docs/alerting/latest/configuration/#victorops_config
|
VictorOps. See https://prometheus.io/docs/alerting/latest/configuration/#victorops_config
|
||||||
properties:
|
properties:
|
||||||
apiKey:
|
apiKey:
|
||||||
description: The API key to use when talking to the VictorOps
|
description: The secret's key that contains the API key
|
||||||
API.
|
to use when talking to the VictorOps API. The secret
|
||||||
|
needs to be in the same namespace as the AlertmanagerConfig
|
||||||
|
object and accessible by the Prometheus Operator.
|
||||||
properties:
|
properties:
|
||||||
key:
|
key:
|
||||||
description: The key of the secret to select from. Must
|
description: The key of the secret to select from. Must
|
||||||
|
@ -1587,6 +1616,7 @@ spec:
|
||||||
properties:
|
properties:
|
||||||
key:
|
key:
|
||||||
description: Key of the tuple.
|
description: Key of the tuple.
|
||||||
|
minLength: 1
|
||||||
type: string
|
type: string
|
||||||
value:
|
value:
|
||||||
description: Value of the tuple.
|
description: Value of the tuple.
|
||||||
|
@ -1820,8 +1850,6 @@ spec:
|
||||||
description: Contains long explanation of the alerted
|
description: Contains long explanation of the alerted
|
||||||
problem.
|
problem.
|
||||||
type: string
|
type: string
|
||||||
required:
|
|
||||||
- routingKey
|
|
||||||
type: object
|
type: object
|
||||||
type: array
|
type: array
|
||||||
webhookConfigs:
|
webhookConfigs:
|
||||||
|
@ -2035,8 +2063,9 @@ spec:
|
||||||
type: object
|
type: object
|
||||||
maxAlerts:
|
maxAlerts:
|
||||||
description: Maximum number of alerts to be sent per webhook
|
description: Maximum number of alerts to be sent per webhook
|
||||||
message.
|
message. When 0, all alerts are included.
|
||||||
format: int32
|
format: int32
|
||||||
|
minimum: 0
|
||||||
type: integer
|
type: integer
|
||||||
sendResolved:
|
sendResolved:
|
||||||
description: Whether or not to notify about resolved alerts.
|
description: Whether or not to notify about resolved alerts.
|
||||||
|
@ -2334,8 +2363,8 @@ spec:
|
||||||
type: array
|
type: array
|
||||||
route:
|
route:
|
||||||
description: The Alertmanager route definition for alerts matching
|
description: The Alertmanager route definition for alerts matching
|
||||||
the resource’s namespace. It will be added to the generated Alertmanager
|
the resource’s namespace. If present, it will be added to the generated
|
||||||
configuration as a first-level route.
|
Alertmanager configuration as a first-level route.
|
||||||
properties:
|
properties:
|
||||||
continue:
|
continue:
|
||||||
description: Boolean indicating whether an alert should continue
|
description: Boolean indicating whether an alert should continue
|
||||||
|
@ -2367,6 +2396,7 @@ spec:
|
||||||
properties:
|
properties:
|
||||||
name:
|
name:
|
||||||
description: Label to match.
|
description: Label to match.
|
||||||
|
minLength: 1
|
||||||
type: string
|
type: string
|
||||||
regex:
|
regex:
|
||||||
description: Whether to match on equality (false) or regular-expression
|
description: Whether to match on equality (false) or regular-expression
|
||||||
|
@ -2377,13 +2407,11 @@ spec:
|
||||||
type: string
|
type: string
|
||||||
required:
|
required:
|
||||||
- name
|
- name
|
||||||
- value
|
|
||||||
type: object
|
type: object
|
||||||
type: array
|
type: array
|
||||||
receiver:
|
receiver:
|
||||||
description: Name of the receiver for this route. If present,
|
description: Name of the receiver for this route. If not empty,
|
||||||
it should be listed in the `receivers` field. The field can
|
it should be listed in the `receivers` field.
|
||||||
be omitted only for nested routes otherwise it is mandatory.
|
|
||||||
type: string
|
type: string
|
||||||
repeatInterval:
|
repeatInterval:
|
||||||
description: How long to wait before repeating the last notification.
|
description: How long to wait before repeating the last notification.
|
||||||
|
|
|
@ -8,6 +8,8 @@ metadata:
|
||||||
spec:
|
spec:
|
||||||
group: monitoring.coreos.com
|
group: monitoring.coreos.com
|
||||||
names:
|
names:
|
||||||
|
categories:
|
||||||
|
- prometheus-operator
|
||||||
kind: Alertmanager
|
kind: Alertmanager
|
||||||
listKind: AlertmanagerList
|
listKind: AlertmanagerList
|
||||||
plural: alertmanagers
|
plural: alertmanagers
|
||||||
|
|
|
@ -8,6 +8,8 @@ metadata:
|
||||||
spec:
|
spec:
|
||||||
group: monitoring.coreos.com
|
group: monitoring.coreos.com
|
||||||
names:
|
names:
|
||||||
|
categories:
|
||||||
|
- prometheus-operator
|
||||||
kind: PodMonitor
|
kind: PodMonitor
|
||||||
listKind: PodMonitorList
|
listKind: PodMonitorList
|
||||||
plural: podmonitors
|
plural: podmonitors
|
||||||
|
@ -197,8 +199,10 @@ spec:
|
||||||
to proxy through this endpoint.
|
to proxy through this endpoint.
|
||||||
type: string
|
type: string
|
||||||
relabelings:
|
relabelings:
|
||||||
description: 'RelabelConfigs to apply to samples before ingestion.
|
description: 'RelabelConfigs to apply to samples before scraping.
|
||||||
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
Prometheus Operator automatically adds relabelings for a few
|
||||||
|
standard Kubernetes fields and replaces original scrape job
|
||||||
|
name with __tmp_prometheus_job_name. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||||
items:
|
items:
|
||||||
description: 'RelabelConfig allows dynamic rewriting of the
|
description: 'RelabelConfig allows dynamic rewriting of the
|
||||||
label set, being applied to samples before ingestion. It
|
label set, being applied to samples before ingestion. It
|
||||||
|
|
|
@ -8,6 +8,8 @@ metadata:
|
||||||
spec:
|
spec:
|
||||||
group: monitoring.coreos.com
|
group: monitoring.coreos.com
|
||||||
names:
|
names:
|
||||||
|
categories:
|
||||||
|
- prometheus-operator
|
||||||
kind: Probe
|
kind: Probe
|
||||||
listKind: ProbeList
|
listKind: ProbeList
|
||||||
plural: probes
|
plural: probes
|
||||||
|
@ -35,6 +37,68 @@ spec:
|
||||||
description: Specification of desired Ingress selection for target discovery
|
description: Specification of desired Ingress selection for target discovery
|
||||||
by Prometheus.
|
by Prometheus.
|
||||||
properties:
|
properties:
|
||||||
|
basicAuth:
|
||||||
|
description: 'BasicAuth allow an endpoint to authenticate over basic
|
||||||
|
authentication. More info: https://prometheus.io/docs/operating/configuration/#endpoint'
|
||||||
|
properties:
|
||||||
|
password:
|
||||||
|
description: The secret in the service monitor namespace that
|
||||||
|
contains the password for authentication.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
description: The key of the secret to select from. Must be
|
||||||
|
a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
description: Specify whether the Secret or its key must be
|
||||||
|
defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
|
username:
|
||||||
|
description: The secret in the service monitor namespace that
|
||||||
|
contains the username for authentication.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
description: The key of the secret to select from. Must be
|
||||||
|
a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
description: Specify whether the Secret or its key must be
|
||||||
|
defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
|
type: object
|
||||||
|
bearerTokenSecret:
|
||||||
|
description: Secret to mount to read bearer token for scraping targets.
|
||||||
|
The secret needs to be in the same namespace as the probe and accessible
|
||||||
|
by the Prometheus Operator.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
description: The key of the secret to select from. Must be a
|
||||||
|
valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
description: Specify whether the Secret or its key must be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
interval:
|
interval:
|
||||||
description: Interval at which targets are probed using the configured
|
description: Interval at which targets are probed using the configured
|
||||||
prober. If not specified Prometheus' global scrape interval is used.
|
prober. If not specified Prometheus' global scrape interval is used.
|
||||||
|
@ -190,6 +254,52 @@ spec:
|
||||||
description: Labels assigned to all metrics scraped from the
|
description: Labels assigned to all metrics scraped from the
|
||||||
targets.
|
targets.
|
||||||
type: object
|
type: object
|
||||||
|
relabelingConfigs:
|
||||||
|
description: 'RelabelConfigs to apply to samples before ingestion.
|
||||||
|
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||||
|
items:
|
||||||
|
description: 'RelabelConfig allows dynamic rewriting of
|
||||||
|
the label set, being applied to samples before ingestion.
|
||||||
|
It defines `<metric_relabel_configs>`-section of Prometheus
|
||||||
|
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||||
|
properties:
|
||||||
|
action:
|
||||||
|
description: Action to perform based on regex matching.
|
||||||
|
Default is 'replace'
|
||||||
|
type: string
|
||||||
|
modulus:
|
||||||
|
description: Modulus to take of the hash of the source
|
||||||
|
label values.
|
||||||
|
format: int64
|
||||||
|
type: integer
|
||||||
|
regex:
|
||||||
|
description: Regular expression against which the extracted
|
||||||
|
value is matched. Default is '(.*)'
|
||||||
|
type: string
|
||||||
|
replacement:
|
||||||
|
description: Replacement value against which a regex
|
||||||
|
replace is performed if the regular expression matches.
|
||||||
|
Regex capture groups are available. Default is '$1'
|
||||||
|
type: string
|
||||||
|
separator:
|
||||||
|
description: Separator placed between concatenated source
|
||||||
|
label values. default is ';'.
|
||||||
|
type: string
|
||||||
|
sourceLabels:
|
||||||
|
description: The source labels select values from existing
|
||||||
|
labels. Their content is concatenated using the configured
|
||||||
|
separator and matched against the configured regular
|
||||||
|
expression for the replace, keep, and drop actions.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
targetLabel:
|
||||||
|
description: Label to which the resulting value is written
|
||||||
|
in a replace action. It is mandatory for replace actions.
|
||||||
|
Regex capture groups are available.
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
static:
|
static:
|
||||||
description: Targets is a list of URLs to probe using the
|
description: Targets is a list of URLs to probe using the
|
||||||
configured prober.
|
configured prober.
|
||||||
|
@ -198,6 +308,112 @@ spec:
|
||||||
type: array
|
type: array
|
||||||
type: object
|
type: object
|
||||||
type: object
|
type: object
|
||||||
|
tlsConfig:
|
||||||
|
description: TLS configuration to use when scraping the endpoint.
|
||||||
|
properties:
|
||||||
|
ca:
|
||||||
|
description: Struct containing the CA cert to use for the targets.
|
||||||
|
properties:
|
||||||
|
configMap:
|
||||||
|
description: ConfigMap containing data to use for the targets.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
description: The key to select.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
description: Specify whether the ConfigMap or its key
|
||||||
|
must be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
|
secret:
|
||||||
|
description: Secret containing data to use for the targets.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
description: The key of the secret to select from. Must
|
||||||
|
be a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
description: Specify whether the Secret or its key must
|
||||||
|
be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
|
type: object
|
||||||
|
cert:
|
||||||
|
description: Struct containing the client cert file for the targets.
|
||||||
|
properties:
|
||||||
|
configMap:
|
||||||
|
description: ConfigMap containing data to use for the targets.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
description: The key to select.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
description: Specify whether the ConfigMap or its key
|
||||||
|
must be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
|
secret:
|
||||||
|
description: Secret containing data to use for the targets.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
description: The key of the secret to select from. Must
|
||||||
|
be a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
description: Specify whether the Secret or its key must
|
||||||
|
be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
|
type: object
|
||||||
|
insecureSkipVerify:
|
||||||
|
description: Disable target certificate validation.
|
||||||
|
type: boolean
|
||||||
|
keySecret:
|
||||||
|
description: Secret containing the client key file for the targets.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
description: The key of the secret to select from. Must be
|
||||||
|
a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
description: Specify whether the Secret or its key must be
|
||||||
|
defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
|
serverName:
|
||||||
|
description: Used to verify the hostname for the targets.
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
type: object
|
type: object
|
||||||
required:
|
required:
|
||||||
- spec
|
- spec
|
||||||
|
|
|
@ -8,6 +8,8 @@ metadata:
|
||||||
spec:
|
spec:
|
||||||
group: monitoring.coreos.com
|
group: monitoring.coreos.com
|
||||||
names:
|
names:
|
||||||
|
categories:
|
||||||
|
- prometheus-operator
|
||||||
kind: Prometheus
|
kind: Prometheus
|
||||||
listKind: PrometheusList
|
listKind: PrometheusList
|
||||||
plural: prometheuses
|
plural: prometheuses
|
||||||
|
@ -2199,6 +2201,15 @@ spec:
|
||||||
only clients authorized to perform these actions can do so. For
|
only clients authorized to perform these actions can do so. For
|
||||||
more information see https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-admin-apis'
|
more information see https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-admin-apis'
|
||||||
type: boolean
|
type: boolean
|
||||||
|
enableFeatures:
|
||||||
|
description: Enable access to Prometheus disabled features. By default,
|
||||||
|
no features are enabled. Enabling disabled features is entirely
|
||||||
|
outside the scope of what the maintainers will support and by doing
|
||||||
|
so, you accept that this behaviour may break at any time without
|
||||||
|
notice. For more information see https://prometheus.io/docs/prometheus/latest/disabled_features/
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
enforcedNamespaceLabel:
|
enforcedNamespaceLabel:
|
||||||
description: EnforcedNamespaceLabel enforces adding a namespace label
|
description: EnforcedNamespaceLabel enforces adding a namespace label
|
||||||
of origin for each alert and metric that is user created. The label
|
of origin for each alert and metric that is user created. The label
|
||||||
|
@ -3388,8 +3399,8 @@ spec:
|
||||||
type: string
|
type: string
|
||||||
type: object
|
type: object
|
||||||
podMonitorNamespaceSelector:
|
podMonitorNamespaceSelector:
|
||||||
description: Namespaces to be selected for PodMonitor discovery. If
|
description: Namespace's labels to match for PodMonitor discovery.
|
||||||
nil, only check own namespace.
|
If nil, only check own namespace.
|
||||||
properties:
|
properties:
|
||||||
matchExpressions:
|
matchExpressions:
|
||||||
description: matchExpressions is a list of label selector requirements.
|
description: matchExpressions is a list of label selector requirements.
|
||||||
|
@ -3682,7 +3693,7 @@ spec:
|
||||||
type: object
|
type: object
|
||||||
type: object
|
type: object
|
||||||
bearerToken:
|
bearerToken:
|
||||||
description: bearer token for remote read.
|
description: Bearer token for remote read.
|
||||||
type: string
|
type: string
|
||||||
bearerTokenFile:
|
bearerTokenFile:
|
||||||
description: File to read bearer token for remote read.
|
description: File to read bearer token for remote read.
|
||||||
|
@ -3893,11 +3904,32 @@ spec:
|
||||||
type: object
|
type: object
|
||||||
type: object
|
type: object
|
||||||
bearerToken:
|
bearerToken:
|
||||||
description: File to read bearer token for remote write.
|
description: Bearer token for remote write.
|
||||||
type: string
|
type: string
|
||||||
bearerTokenFile:
|
bearerTokenFile:
|
||||||
description: File to read bearer token for remote write.
|
description: File to read bearer token for remote write.
|
||||||
type: string
|
type: string
|
||||||
|
headers:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: Custom HTTP headers to be sent along with each
|
||||||
|
remote write request. Be aware that headers that are set by
|
||||||
|
Prometheus itself can't be overwritten. Only valid in Prometheus
|
||||||
|
versions 2.25.0 and newer.
|
||||||
|
type: object
|
||||||
|
metadataConfig:
|
||||||
|
description: MetadataConfig configures the sending of series
|
||||||
|
metadata to remote storage.
|
||||||
|
properties:
|
||||||
|
send:
|
||||||
|
description: Whether metric metadata is sent to remote storage
|
||||||
|
or not.
|
||||||
|
type: boolean
|
||||||
|
sendInterval:
|
||||||
|
description: How frequently metric metadata is sent to remote
|
||||||
|
storage.
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
name:
|
name:
|
||||||
description: The name of the remote write queue, must be unique
|
description: The name of the remote write queue, must be unique
|
||||||
if specified. The name is used in metrics and logging in order
|
if specified. The name is used in metrics and logging in order
|
||||||
|
@ -4168,7 +4200,8 @@ spec:
|
||||||
(milliseconds seconds minutes hours days weeks years).
|
(milliseconds seconds minutes hours days weeks years).
|
||||||
type: string
|
type: string
|
||||||
retentionSize:
|
retentionSize:
|
||||||
description: Maximum amount of disk space used by blocks.
|
description: 'Maximum amount of disk space used by blocks. Supported
|
||||||
|
units: B, KB, MB, GB, TB, PB, EB. Ex: `512MB`.'
|
||||||
type: string
|
type: string
|
||||||
routePrefix:
|
routePrefix:
|
||||||
description: The route prefix Prometheus registers HTTP handlers for.
|
description: The route prefix Prometheus registers HTTP handlers for.
|
||||||
|
@ -4435,7 +4468,7 @@ spec:
|
||||||
to use to run the Prometheus Pods.
|
to use to run the Prometheus Pods.
|
||||||
type: string
|
type: string
|
||||||
serviceMonitorNamespaceSelector:
|
serviceMonitorNamespaceSelector:
|
||||||
description: Namespaces to be selected for ServiceMonitor discovery.
|
description: Namespace's labels to match for ServiceMonitor discovery.
|
||||||
If nil, only check own namespace.
|
If nil, only check own namespace.
|
||||||
properties:
|
properties:
|
||||||
matchExpressions:
|
matchExpressions:
|
||||||
|
@ -5072,6 +5105,11 @@ spec:
|
||||||
required:
|
required:
|
||||||
- key
|
- key
|
||||||
type: object
|
type: object
|
||||||
|
tracingConfigFile:
|
||||||
|
description: TracingConfig specifies the path of the tracing configuration
|
||||||
|
file. When used alongside with TracingConfig, TracingConfigFile
|
||||||
|
takes precedence.
|
||||||
|
type: string
|
||||||
version:
|
version:
|
||||||
description: Version describes the version of Thanos to use.
|
description: Version describes the version of Thanos to use.
|
||||||
type: string
|
type: string
|
||||||
|
|
|
@ -8,6 +8,8 @@ metadata:
|
||||||
spec:
|
spec:
|
||||||
group: monitoring.coreos.com
|
group: monitoring.coreos.com
|
||||||
names:
|
names:
|
||||||
|
categories:
|
||||||
|
- prometheus-operator
|
||||||
kind: ServiceMonitor
|
kind: ServiceMonitor
|
||||||
listKind: ServiceMonitorList
|
listKind: ServiceMonitorList
|
||||||
plural: servicemonitors
|
plural: servicemonitors
|
||||||
|
@ -184,7 +186,9 @@ spec:
|
||||||
type: string
|
type: string
|
||||||
relabelings:
|
relabelings:
|
||||||
description: 'RelabelConfigs to apply to samples before scraping.
|
description: 'RelabelConfigs to apply to samples before scraping.
|
||||||
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
Prometheus Operator automatically adds relabelings for a few
|
||||||
|
standard Kubernetes fields and replaces original scrape job
|
||||||
|
name with __tmp_prometheus_job_name. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||||
items:
|
items:
|
||||||
description: 'RelabelConfig allows dynamic rewriting of the
|
description: 'RelabelConfig allows dynamic rewriting of the
|
||||||
label set, being applied to samples before ingestion. It
|
label set, being applied to samples before ingestion. It
|
||||||
|
|
|
@ -8,6 +8,8 @@ metadata:
|
||||||
spec:
|
spec:
|
||||||
group: monitoring.coreos.com
|
group: monitoring.coreos.com
|
||||||
names:
|
names:
|
||||||
|
categories:
|
||||||
|
- prometheus-operator
|
||||||
kind: ThanosRuler
|
kind: ThanosRuler
|
||||||
listKind: ThanosRulerList
|
listKind: ThanosRulerList
|
||||||
plural: thanosrulers
|
plural: thanosrulers
|
||||||
|
|
|
@ -4,7 +4,8 @@ metadata:
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.44.1
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.47.0
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
rules:
|
rules:
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
|
|
|
@ -4,7 +4,8 @@ metadata:
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.44.1
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.47.0
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
roleRef:
|
roleRef:
|
||||||
apiGroup: rbac.authorization.k8s.io
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
@ -13,4 +14,4 @@ roleRef:
|
||||||
subjects:
|
subjects:
|
||||||
- kind: ServiceAccount
|
- kind: ServiceAccount
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
|
|
|
@ -4,27 +4,30 @@ metadata:
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.44.1
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.47.0
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.44.1
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.47.0
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- args:
|
- args:
|
||||||
- --kubelet-service=kube-system/kubelet
|
- --kubelet-service=kube-system/kubelet
|
||||||
- --prometheus-config-reloader=quay.io/prometheus-operator/prometheus-config-reloader:v0.44.1
|
- --prometheus-config-reloader=quay.io/prometheus-operator/prometheus-config-reloader:v0.47.0
|
||||||
image: quay.io/prometheus-operator/prometheus-operator:v0.44.1
|
image: quay.io/prometheus-operator/prometheus-operator:v0.47.0
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8080
|
- containerPort: 8080
|
||||||
|
@ -48,12 +51,19 @@ spec:
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8443
|
- containerPort: 8443
|
||||||
name: https
|
name: https
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 20m
|
||||||
|
memory: 40Mi
|
||||||
|
requests:
|
||||||
|
cpu: 10m
|
||||||
|
memory: 20Mi
|
||||||
securityContext:
|
securityContext:
|
||||||
runAsGroup: 65532
|
runAsGroup: 65532
|
||||||
runAsNonRoot: true
|
runAsNonRoot: true
|
||||||
runAsUser: 65532
|
runAsUser: 65532
|
||||||
nodeSelector:
|
nodeSelector:
|
||||||
beta.kubernetes.io/os: linux
|
kubernetes.io/os: linux
|
||||||
securityContext:
|
securityContext:
|
||||||
runAsNonRoot: true
|
runAsNonRoot: true
|
||||||
runAsUser: 65534
|
runAsUser: 65534
|
||||||
|
|
|
@ -0,0 +1,95 @@
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PrometheusRule
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: controller
|
||||||
|
app.kubernetes.io/name: prometheus-operator
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.47.0
|
||||||
|
prometheus: k8s
|
||||||
|
role: alert-rules
|
||||||
|
name: prometheus-operator-rules
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: prometheus-operator
|
||||||
|
rules:
|
||||||
|
- alert: PrometheusOperatorListErrors
|
||||||
|
annotations:
|
||||||
|
description: Errors while performing List operations in controller {{$labels.controller}}
|
||||||
|
in {{$labels.namespace}} namespace.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorlisterrors
|
||||||
|
summary: Errors while performing list operations in controller.
|
||||||
|
expr: |
|
||||||
|
(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-operator",namespace="default"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-operator",namespace="default"}[10m]))) > 0.4
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: PrometheusOperatorWatchErrors
|
||||||
|
annotations:
|
||||||
|
description: Errors while performing watch operations in controller {{$labels.controller}}
|
||||||
|
in {{$labels.namespace}} namespace.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorwatcherrors
|
||||||
|
summary: Errors while performing watch operations in controller.
|
||||||
|
expr: |
|
||||||
|
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="default"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="default"}[10m]))) > 0.4
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: PrometheusOperatorSyncFailed
|
||||||
|
annotations:
|
||||||
|
description: Controller {{ $labels.controller }} in {{ $labels.namespace }}
|
||||||
|
namespace fails to reconcile {{ $value }} objects.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorsyncfailed
|
||||||
|
summary: Last controller reconciliation failed
|
||||||
|
expr: |
|
||||||
|
min_over_time(prometheus_operator_syncs{status="failed",job="prometheus-operator",namespace="default"}[5m]) > 0
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: PrometheusOperatorReconcileErrors
|
||||||
|
annotations:
|
||||||
|
description: '{{ $value | humanizePercentage }} of reconciling operations
|
||||||
|
failed for {{ $labels.controller }} controller in {{ $labels.namespace }}
|
||||||
|
namespace.'
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorreconcileerrors
|
||||||
|
summary: Errors while reconciling controller.
|
||||||
|
expr: |
|
||||||
|
(sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="default"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator",namespace="default"}[5m]))) > 0.1
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: PrometheusOperatorNodeLookupErrors
|
||||||
|
annotations:
|
||||||
|
description: Errors while reconciling Prometheus in {{ $labels.namespace }}
|
||||||
|
Namespace.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatornodelookuperrors
|
||||||
|
summary: Errors while reconciling Prometheus.
|
||||||
|
expr: |
|
||||||
|
rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="default"}[5m]) > 0.1
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: PrometheusOperatorNotReady
|
||||||
|
annotations:
|
||||||
|
description: Prometheus operator in {{ $labels.namespace }} namespace isn't
|
||||||
|
ready to reconcile {{ $labels.controller }} resources.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatornotready
|
||||||
|
summary: Prometheus operator not ready
|
||||||
|
expr: |
|
||||||
|
min by(namespace, controller) (max_over_time(prometheus_operator_ready{job="prometheus-operator",namespace="default"}[5m]) == 0)
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: PrometheusOperatorRejectedResources
|
||||||
|
annotations:
|
||||||
|
description: Prometheus operator in {{ $labels.namespace }} namespace rejected
|
||||||
|
{{ printf "%0.0f" $value }} {{ $labels.controller }}/{{ $labels.resource
|
||||||
|
}} resources.
|
||||||
|
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorrejectedresources
|
||||||
|
summary: Resources rejected by Prometheus operator
|
||||||
|
expr: |
|
||||||
|
min_over_time(prometheus_operator_managed_resources{state="rejected",job="prometheus-operator",namespace="default"}[5m]) > 0
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
|
@ -4,9 +4,10 @@ metadata:
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.44.1
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.47.0
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
clusterIP: None
|
clusterIP: None
|
||||||
ports:
|
ports:
|
||||||
|
@ -16,3 +17,4 @@ spec:
|
||||||
selector:
|
selector:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
|
|
@ -4,6 +4,7 @@ metadata:
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.44.1
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 0.47.0
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
namespace: monitoring
|
namespace: default
|
||||||
|
|
|
@ -8,7 +8,7 @@ local statefulSet = k.apps.v1.statefulSet;
|
||||||
local selector = statefulSet.mixin.spec.selectorType;
|
local selector = statefulSet.mixin.spec.selectorType;
|
||||||
|
|
||||||
local kp =
|
local kp =
|
||||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
(import 'kube-prometheus/main.libsonnet') +
|
||||||
(import 'prometheus-pushgateway/pushgateway.libsonnet') +
|
(import 'prometheus-pushgateway/pushgateway.libsonnet') +
|
||||||
(import 'k3s.libsonnet')
|
(import 'k3s.libsonnet')
|
||||||
|
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
github.com/etcd-io/etcd/Documentation/etcd-mixin
|
|
27
monitoring/vendor/github.com/brancz/kubernetes-grafana/grafana/grafana.libsonnet
generated
vendored
27
monitoring/vendor/github.com/brancz/kubernetes-grafana/grafana/grafana.libsonnet
generated
vendored
|
@ -7,7 +7,7 @@
|
||||||
},
|
},
|
||||||
|
|
||||||
imageRepos+:: {
|
imageRepos+:: {
|
||||||
grafana: 'grafana/grafana',
|
grafana: 'docker.io/grafana/grafana',
|
||||||
},
|
},
|
||||||
|
|
||||||
prometheus+:: {
|
prometheus+:: {
|
||||||
|
@ -16,6 +16,11 @@
|
||||||
},
|
},
|
||||||
|
|
||||||
grafana+:: {
|
grafana+:: {
|
||||||
|
labels: {
|
||||||
|
'app.kubernetes.io/name': 'grafana',
|
||||||
|
'app.kubernetes.io/version': $._config.versions.grafana,
|
||||||
|
'app.kubernetes.io/component': 'grafana',
|
||||||
|
},
|
||||||
dashboards: {},
|
dashboards: {},
|
||||||
rawDashboards: {},
|
rawDashboards: {},
|
||||||
folderDashboards: {},
|
folderDashboards: {},
|
||||||
|
@ -51,6 +56,7 @@
|
||||||
metadata: {
|
metadata: {
|
||||||
name: 'grafana-config',
|
name: 'grafana-config',
|
||||||
namespace: $._config.namespace,
|
namespace: $._config.namespace,
|
||||||
|
labels: $._config.grafana.labels,
|
||||||
},
|
},
|
||||||
type: 'Opaque',
|
type: 'Opaque',
|
||||||
data: {
|
data: {
|
||||||
|
@ -67,6 +73,7 @@
|
||||||
metadata: {
|
metadata: {
|
||||||
name: dashboardName,
|
name: dashboardName,
|
||||||
namespace: $._config.namespace,
|
namespace: $._config.namespace,
|
||||||
|
labels: $._config.grafana.labels,
|
||||||
},
|
},
|
||||||
data: { [name]: std.manifestJsonEx($._config.grafana.dashboards[name], ' ') },
|
data: { [name]: std.manifestJsonEx($._config.grafana.dashboards[name], ' ') },
|
||||||
}
|
}
|
||||||
|
@ -79,6 +86,7 @@
|
||||||
metadata: {
|
metadata: {
|
||||||
name: dashboardName,
|
name: dashboardName,
|
||||||
namespace: $._config.namespace,
|
namespace: $._config.namespace,
|
||||||
|
labels: $._config.grafana.labels,
|
||||||
},
|
},
|
||||||
data: { [name]: std.manifestJsonEx($._config.grafana.folderDashboards[folder][name], ' ') },
|
data: { [name]: std.manifestJsonEx($._config.grafana.folderDashboards[folder][name], ' ') },
|
||||||
}
|
}
|
||||||
|
@ -95,6 +103,7 @@
|
||||||
metadata: {
|
metadata: {
|
||||||
name: dashboardName,
|
name: dashboardName,
|
||||||
namespace: $._config.namespace,
|
namespace: $._config.namespace,
|
||||||
|
labels: $._config.grafana.labels,
|
||||||
},
|
},
|
||||||
data: { [name]: $._config.grafana.rawDashboards[name] },
|
data: { [name]: $._config.grafana.rawDashboards[name] },
|
||||||
}
|
}
|
||||||
|
@ -141,6 +150,7 @@
|
||||||
metadata: {
|
metadata: {
|
||||||
name: 'grafana-dashboards',
|
name: 'grafana-dashboards',
|
||||||
namespace: $._config.namespace,
|
namespace: $._config.namespace,
|
||||||
|
labels: $._config.grafana.labels,
|
||||||
},
|
},
|
||||||
data: { 'dashboards.yaml': std.manifestJsonEx(dashboardSources, ' ') },
|
data: { 'dashboards.yaml': std.manifestJsonEx(dashboardSources, ' ') },
|
||||||
},
|
},
|
||||||
|
@ -151,6 +161,7 @@
|
||||||
metadata: {
|
metadata: {
|
||||||
name: 'grafana-datasources',
|
name: 'grafana-datasources',
|
||||||
namespace: $._config.namespace,
|
namespace: $._config.namespace,
|
||||||
|
labels: $._config.grafana.labels,
|
||||||
},
|
},
|
||||||
type: 'Opaque',
|
type: 'Opaque',
|
||||||
data: { 'datasources.yaml': std.base64(std.encodeUTF8(std.manifestJsonEx({
|
data: { 'datasources.yaml': std.base64(std.encodeUTF8(std.manifestJsonEx({
|
||||||
|
@ -165,13 +176,10 @@
|
||||||
metadata: {
|
metadata: {
|
||||||
name: 'grafana',
|
name: 'grafana',
|
||||||
namespace: $._config.namespace,
|
namespace: $._config.namespace,
|
||||||
labels: {
|
labels: $._config.grafana.labels,
|
||||||
app: 'grafana',
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
spec: {
|
spec: {
|
||||||
selector: $.grafana.deployment.spec.selector.matchLabels,
|
selector: $.grafana.deployment.spec.selector.matchLabels,
|
||||||
type: 'NodePort',
|
|
||||||
ports: [
|
ports: [
|
||||||
{ name: 'http', targetPort: 'http', port: 3000 },
|
{ name: 'http', targetPort: 'http', port: 3000 },
|
||||||
],
|
],
|
||||||
|
@ -189,7 +197,12 @@
|
||||||
deployment:
|
deployment:
|
||||||
local targetPort = $._config.grafana.port;
|
local targetPort = $._config.grafana.port;
|
||||||
local portName = 'http';
|
local portName = 'http';
|
||||||
local podLabels = { app: 'grafana' };
|
local podLabels = $._config.grafana.labels;
|
||||||
|
local podSelectorLabels = {
|
||||||
|
[labelName]: podLabels[labelName]
|
||||||
|
for labelName in std.objectFields(podLabels)
|
||||||
|
if !std.setMember(labelName, ['app.kubernetes.io/version'])
|
||||||
|
};
|
||||||
|
|
||||||
local configVolumeName = 'grafana-config';
|
local configVolumeName = 'grafana-config';
|
||||||
local configSecretName = 'grafana-config';
|
local configSecretName = 'grafana-config';
|
||||||
|
@ -311,7 +324,7 @@
|
||||||
spec: {
|
spec: {
|
||||||
replicas: 1,
|
replicas: 1,
|
||||||
selector: {
|
selector: {
|
||||||
matchLabels: podLabels,
|
matchLabels: podSelectorLabels,
|
||||||
},
|
},
|
||||||
template: {
|
template: {
|
||||||
metadata: {
|
metadata: {
|
||||||
|
|
|
@ -10,6 +10,11 @@
|
||||||
// scrape_interval_seconds is the global scrape interval which can be
|
// scrape_interval_seconds is the global scrape interval which can be
|
||||||
// used to dynamically adjust rate windows as a function of the interval.
|
// used to dynamically adjust rate windows as a function of the interval.
|
||||||
scrape_interval_seconds: 30,
|
scrape_interval_seconds: 30,
|
||||||
|
// Dashboard variable refresh option on Grafana (https://grafana.com/docs/grafana/latest/datasources/prometheus/).
|
||||||
|
// 0 : Never (Will never refresh the Dashboard variables values)
|
||||||
|
// 1 : On Dashboard Load (Will refresh Dashboards variables when dashboard are loaded)
|
||||||
|
// 2 : On Time Range Change (Will refresh Dashboards variables when time range will be changed)
|
||||||
|
dashboard_var_refresh: 2,
|
||||||
},
|
},
|
||||||
|
|
||||||
prometheusAlerts+:: {
|
prometheusAlerts+:: {
|
||||||
|
@ -202,51 +207,6 @@
|
||||||
summary: 'etcd cluster 99th percentile commit durations are too high.',
|
summary: 'etcd cluster 99th percentile commit durations are too high.',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
alert: 'etcdHighNumberOfFailedHTTPRequests',
|
|
||||||
expr: |||
|
|
||||||
sum(rate(etcd_http_failed_total{%(etcd_selector)s, code!="404"}[5m])) without (code) / sum(rate(etcd_http_received_total{%(etcd_selector)s}[5m]))
|
|
||||||
without (code) > 0.01
|
|
||||||
||| % $._config,
|
|
||||||
'for': '10m',
|
|
||||||
labels: {
|
|
||||||
severity: 'warning',
|
|
||||||
},
|
|
||||||
annotations: {
|
|
||||||
description: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}',
|
|
||||||
summary: 'etcd has high number of failed HTTP requests.',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
alert: 'etcdHighNumberOfFailedHTTPRequests',
|
|
||||||
expr: |||
|
|
||||||
sum(rate(etcd_http_failed_total{%(etcd_selector)s, code!="404"}[5m])) without (code) / sum(rate(etcd_http_received_total{%(etcd_selector)s}[5m]))
|
|
||||||
without (code) > 0.05
|
|
||||||
||| % $._config,
|
|
||||||
'for': '10m',
|
|
||||||
labels: {
|
|
||||||
severity: 'critical',
|
|
||||||
},
|
|
||||||
annotations: {
|
|
||||||
description: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}.',
|
|
||||||
summary: 'etcd has high number of failed HTTP requests.',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
alert: 'etcdHTTPRequestsSlow',
|
|
||||||
expr: |||
|
|
||||||
histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m]))
|
|
||||||
> 0.15
|
|
||||||
||| % $._config,
|
|
||||||
'for': '10m',
|
|
||||||
labels: {
|
|
||||||
severity: 'warning',
|
|
||||||
},
|
|
||||||
annotations: {
|
|
||||||
description: 'etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method }} are slow.',
|
|
||||||
summary: 'etcd instance HTTP requests are slow.',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
alert: 'etcdBackendQuotaLowSpace',
|
alert: 'etcdBackendQuotaLowSpace',
|
||||||
expr: |||
|
expr: |||
|
||||||
|
@ -283,7 +243,7 @@
|
||||||
uid: std.md5('etcd.json'),
|
uid: std.md5('etcd.json'),
|
||||||
title: 'etcd',
|
title: 'etcd',
|
||||||
description: 'etcd sample Grafana dashboard with Prometheus',
|
description: 'etcd sample Grafana dashboard with Prometheus',
|
||||||
tags: [],
|
tags: [ 'etcd-mixin' ],
|
||||||
style: 'dark',
|
style: 'dark',
|
||||||
timezone: 'browser',
|
timezone: 'browser',
|
||||||
editable: true,
|
editable: true,
|
||||||
|
@ -1332,7 +1292,7 @@
|
||||||
name: 'cluster',
|
name: 'cluster',
|
||||||
options: [],
|
options: [],
|
||||||
query: 'label_values(etcd_server_has_leader, job)',
|
query: 'label_values(etcd_server_has_leader, job)',
|
||||||
refresh: 1,
|
refresh: $._config.dashboard_var_refresh,
|
||||||
regex: '',
|
regex: '',
|
||||||
sort: 2,
|
sort: 2,
|
||||||
tagValuesQuery: '',
|
tagValuesQuery: '',
|
57
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/cloudmonitoring.libsonnet
generated
vendored
Normal file
57
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/cloudmonitoring.libsonnet
generated
vendored
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Creates a [Google Cloud Monitoring target](https://grafana.com/docs/grafana/latest/datasources/cloudmonitoring/)
|
||||||
|
*
|
||||||
|
* @name cloudmonitoring.target
|
||||||
|
*
|
||||||
|
* @param metric
|
||||||
|
* @param project
|
||||||
|
* @param filters (optional)
|
||||||
|
* @param groupBys (optional)
|
||||||
|
* @param period (default: `'cloud-monitoring-auto'`)
|
||||||
|
* @param crossSeriesReducer (default 'REDUCE_MAX')
|
||||||
|
* @param valueType (default 'INT64')
|
||||||
|
* @param perSeriesAligner (default 'ALIGN_DELTA')
|
||||||
|
* @param metricKind (default 'CUMULATIVE')
|
||||||
|
* @param unit (optional)
|
||||||
|
* @param alias (optional)
|
||||||
|
|
||||||
|
* @return Panel target
|
||||||
|
*/
|
||||||
|
|
||||||
|
target(
|
||||||
|
metric,
|
||||||
|
project,
|
||||||
|
filters=[],
|
||||||
|
groupBys=[],
|
||||||
|
period='cloud-monitoring-auto',
|
||||||
|
crossSeriesReducer='REDUCE_MAX',
|
||||||
|
valueType='INT64',
|
||||||
|
perSeriesAligner='ALIGN_DELTA',
|
||||||
|
metricKind='CUMULATIVE',
|
||||||
|
unit=1,
|
||||||
|
alias=null,
|
||||||
|
):: {
|
||||||
|
metricQuery: {
|
||||||
|
[if alias != null then 'aliasBy']: alias,
|
||||||
|
alignmentPeriod: period,
|
||||||
|
crossSeriesReducer: crossSeriesReducer,
|
||||||
|
[if filters != null then 'filters']: filters,
|
||||||
|
[if groupBys != null then 'groupBys']: groupBys,
|
||||||
|
metricKind: metricKind,
|
||||||
|
metricType: metric,
|
||||||
|
perSeriesAligner: perSeriesAligner,
|
||||||
|
projectName: project,
|
||||||
|
unit: unit,
|
||||||
|
valueType: valueType,
|
||||||
|
},
|
||||||
|
sloQuery: {
|
||||||
|
[if alias != null then 'aliasBy']: alias,
|
||||||
|
alignmentPeriod: period,
|
||||||
|
projectName: project,
|
||||||
|
selectorName: 'select_slo_health',
|
||||||
|
serviceId: '',
|
||||||
|
sloId: '',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
12
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/cloudwatch.libsonnet
generated
vendored
12
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/cloudwatch.libsonnet
generated
vendored
|
@ -13,6 +13,9 @@
|
||||||
* @param highResolution (default: `false`)
|
* @param highResolution (default: `false`)
|
||||||
* @param period (default: `'1m'`)
|
* @param period (default: `'1m'`)
|
||||||
* @param dimensions (optional)
|
* @param dimensions (optional)
|
||||||
|
* @param id (optional)
|
||||||
|
* @param expression (optional)
|
||||||
|
* @param hide (optional)
|
||||||
|
|
||||||
* @return Panel target
|
* @return Panel target
|
||||||
*/
|
*/
|
||||||
|
@ -26,7 +29,10 @@
|
||||||
alias=null,
|
alias=null,
|
||||||
highResolution=false,
|
highResolution=false,
|
||||||
period='1m',
|
period='1m',
|
||||||
dimensions={}
|
dimensions={},
|
||||||
|
id=null,
|
||||||
|
expression=null,
|
||||||
|
hide=null
|
||||||
):: {
|
):: {
|
||||||
region: region,
|
region: region,
|
||||||
namespace: namespace,
|
namespace: namespace,
|
||||||
|
@ -37,5 +43,9 @@
|
||||||
highResolution: highResolution,
|
highResolution: highResolution,
|
||||||
period: period,
|
period: period,
|
||||||
dimensions: dimensions,
|
dimensions: dimensions,
|
||||||
|
[if id != null then 'id']: id,
|
||||||
|
[if expression != null then 'expression']: expression,
|
||||||
|
[if hide != null then 'hide']: hide,
|
||||||
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
18
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/gauge_panel.libsonnet
generated
vendored
18
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/gauge_panel.libsonnet
generated
vendored
|
@ -36,6 +36,7 @@
|
||||||
* @method addMappings(mappings) Adds an array of value mappings.
|
* @method addMappings(mappings) Adds an array of value mappings.
|
||||||
* @method addDataLink(link) Adds a data link.
|
* @method addDataLink(link) Adds a data link.
|
||||||
* @method addDataLinks(links) Adds an array of data links.
|
* @method addDataLinks(links) Adds an array of data links.
|
||||||
|
* @param timeFrom (optional)
|
||||||
*/
|
*/
|
||||||
new(
|
new(
|
||||||
title,
|
title,
|
||||||
|
@ -58,6 +59,7 @@
|
||||||
repeat=null,
|
repeat=null,
|
||||||
repeatDirection='h',
|
repeatDirection='h',
|
||||||
repeatMaxPerRow=null,
|
repeatMaxPerRow=null,
|
||||||
|
timeFrom=null,
|
||||||
pluginVersion='7',
|
pluginVersion='7',
|
||||||
):: {
|
):: {
|
||||||
|
|
||||||
|
@ -71,6 +73,7 @@
|
||||||
[if repeat != null then 'repeat']: repeat,
|
[if repeat != null then 'repeat']: repeat,
|
||||||
[if repeat != null then 'repeatDirection']: repeatDirection,
|
[if repeat != null then 'repeatDirection']: repeatDirection,
|
||||||
[if repeat != null then 'repeatMaxPerRow']: repeatMaxPerRow,
|
[if repeat != null then 'repeatMaxPerRow']: repeatMaxPerRow,
|
||||||
|
[if timeFrom != null then 'timeFrom']: timeFrom,
|
||||||
|
|
||||||
// targets
|
// targets
|
||||||
_nextTarget:: 0,
|
_nextTarget:: 0,
|
||||||
|
@ -138,6 +141,21 @@
|
||||||
fieldConfig+: { defaults+: { links+: [link] } },
|
fieldConfig+: { defaults+: { links+: [link] } },
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// Overrides
|
||||||
|
addOverride(
|
||||||
|
matcher=null,
|
||||||
|
properties=null,
|
||||||
|
):: self {
|
||||||
|
fieldConfig+: {
|
||||||
|
overrides+: [
|
||||||
|
{
|
||||||
|
[if matcher != null then 'matcher']: matcher,
|
||||||
|
[if properties != null then 'properties']: properties,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
addOverrides(overrides):: std.foldl(function(p, o) p.addOverride(o.matcher, o.properties), overrides, self),
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
options: {
|
options: {
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
sql:: import 'sql.libsonnet',
|
sql:: import 'sql.libsonnet',
|
||||||
graphite:: import 'graphite.libsonnet',
|
graphite:: import 'graphite.libsonnet',
|
||||||
alertCondition:: import 'alert_condition.libsonnet',
|
alertCondition:: import 'alert_condition.libsonnet',
|
||||||
|
cloudmonitoring:: import 'cloudmonitoring.libsonnet',
|
||||||
cloudwatch:: import 'cloudwatch.libsonnet',
|
cloudwatch:: import 'cloudwatch.libsonnet',
|
||||||
elasticsearch:: import 'elasticsearch.libsonnet',
|
elasticsearch:: import 'elasticsearch.libsonnet',
|
||||||
heatmapPanel:: import 'heatmap_panel.libsonnet',
|
heatmapPanel:: import 'heatmap_panel.libsonnet',
|
||||||
|
@ -27,4 +28,5 @@
|
||||||
gaugePanel:: import 'gauge_panel.libsonnet',
|
gaugePanel:: import 'gauge_panel.libsonnet',
|
||||||
barGaugePanel:: import 'bar_gauge_panel.libsonnet',
|
barGaugePanel:: import 'bar_gauge_panel.libsonnet',
|
||||||
statPanel:: import 'stat_panel.libsonnet',
|
statPanel:: import 'stat_panel.libsonnet',
|
||||||
|
transformation:: import 'transformation.libsonnet',
|
||||||
}
|
}
|
||||||
|
|
21
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/graph_panel.libsonnet
generated
vendored
21
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/graph_panel.libsonnet
generated
vendored
|
@ -21,6 +21,7 @@
|
||||||
* @param formatY2 (optional) Unit of the second Y axis
|
* @param formatY2 (optional) Unit of the second Y axis
|
||||||
* @param min (optional) Min of the Y axes
|
* @param min (optional) Min of the Y axes
|
||||||
* @param max (optional) Max of the Y axes
|
* @param max (optional) Max of the Y axes
|
||||||
|
* @param maxDataPoints (optional) If the data source supports it, sets the maximum number of data points for each series returned.
|
||||||
* @param labelY1 (optional) Label of the first Y axis
|
* @param labelY1 (optional) Label of the first Y axis
|
||||||
* @param labelY2 (optional) Label of the second Y axis
|
* @param labelY2 (optional) Label of the second Y axis
|
||||||
* @param x_axis_mode (default `'time'`) X axis mode, one of [time, series, histogram]
|
* @param x_axis_mode (default `'time'`) X axis mode, one of [time, series, histogram]
|
||||||
|
@ -57,6 +58,8 @@
|
||||||
* @param value_type (default `'individual'`) Type of tooltip value
|
* @param value_type (default `'individual'`) Type of tooltip value
|
||||||
* @param shared_tooltip (default `true`) Allow to group or spit tooltips on mouseover within a chart
|
* @param shared_tooltip (default `true`) Allow to group or spit tooltips on mouseover within a chart
|
||||||
* @param percentage (defaut: false) show as percentages
|
* @param percentage (defaut: false) show as percentages
|
||||||
|
* @param interval (defaut: null) A lower limit for the interval.
|
||||||
|
|
||||||
*
|
*
|
||||||
* @method addTarget(target) Adds a target object.
|
* @method addTarget(target) Adds a target object.
|
||||||
* @method addTargets(targets) Adds an array of targets.
|
* @method addTargets(targets) Adds an array of targets.
|
||||||
|
@ -126,8 +129,10 @@
|
||||||
value_type='individual',
|
value_type='individual',
|
||||||
shared_tooltip=true,
|
shared_tooltip=true,
|
||||||
percentage=false,
|
percentage=false,
|
||||||
|
maxDataPoints=null,
|
||||||
time_from=null,
|
time_from=null,
|
||||||
time_shift=null,
|
time_shift=null,
|
||||||
|
interval=null
|
||||||
):: {
|
):: {
|
||||||
title: title,
|
title: title,
|
||||||
[if span != null then 'span']: span,
|
[if span != null then 'span']: span,
|
||||||
|
@ -179,6 +184,7 @@
|
||||||
bars: bars,
|
bars: bars,
|
||||||
stack: stack,
|
stack: stack,
|
||||||
percentage: percentage,
|
percentage: percentage,
|
||||||
|
[if maxDataPoints != null then 'maxDataPoints']: maxDataPoints,
|
||||||
legend: {
|
legend: {
|
||||||
show: legend_show,
|
show: legend_show,
|
||||||
values: legend_values,
|
values: legend_values,
|
||||||
|
@ -204,6 +210,7 @@
|
||||||
},
|
},
|
||||||
timeFrom: time_from,
|
timeFrom: time_from,
|
||||||
timeShift: time_shift,
|
timeShift: time_shift,
|
||||||
|
[if interval != null then 'interval']: interval,
|
||||||
[if transparent == true then 'transparent']: transparent,
|
[if transparent == true then 'transparent']: transparent,
|
||||||
aliasColors: aliasColors,
|
aliasColors: aliasColors,
|
||||||
repeat: repeat,
|
repeat: repeat,
|
||||||
|
@ -288,5 +295,19 @@
|
||||||
links+: [link],
|
links+: [link],
|
||||||
},
|
},
|
||||||
addLinks(links):: std.foldl(function(p, t) p.addLink(t), links, self),
|
addLinks(links):: std.foldl(function(p, t) p.addLink(t), links, self),
|
||||||
|
addOverride(
|
||||||
|
matcher=null,
|
||||||
|
properties=null,
|
||||||
|
):: self {
|
||||||
|
fieldConfig+: {
|
||||||
|
overrides+: [
|
||||||
|
{
|
||||||
|
[if matcher != null then 'matcher']: matcher,
|
||||||
|
[if properties != null then 'properties']: properties,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
addOverrides(overrides):: std.foldl(function(p, o) p.addOverride(o.matcher, o.properties), overrides, self),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
4
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/heatmap_panel.libsonnet
generated
vendored
4
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/heatmap_panel.libsonnet
generated
vendored
|
@ -42,6 +42,7 @@
|
||||||
* @param yBucketBound (default `'auto'`) Which bound ('lower' or 'upper') of the bucket to use
|
* @param yBucketBound (default `'auto'`) Which bound ('lower' or 'upper') of the bucket to use
|
||||||
* @param yBucketNumber (optional) Number of buckets for the Y axis
|
* @param yBucketNumber (optional) Number of buckets for the Y axis
|
||||||
* @param yBucketSize (optional) Size of Y axis buckets. Has priority over yBucketNumber
|
* @param yBucketSize (optional) Size of Y axis buckets. Has priority over yBucketNumber
|
||||||
|
* @param maxDataPoints (optional) The maximum data points per series. Used directly by some data sources and used in calculation of auto interval. With streaming data this value is used for the rolling buffer.
|
||||||
*
|
*
|
||||||
* @method addTarget(target) Adds a target object.
|
* @method addTarget(target) Adds a target object.
|
||||||
* @method addTargets(targets) Adds an array of targets.
|
* @method addTargets(targets) Adds an array of targets.
|
||||||
|
@ -83,7 +84,7 @@
|
||||||
yBucketBound='auto',
|
yBucketBound='auto',
|
||||||
yBucketNumber=null,
|
yBucketNumber=null,
|
||||||
yBucketSize=null,
|
yBucketSize=null,
|
||||||
|
maxDataPoints=null,
|
||||||
):: {
|
):: {
|
||||||
title: title,
|
title: title,
|
||||||
type: 'heatmap',
|
type: 'heatmap',
|
||||||
|
@ -135,6 +136,7 @@
|
||||||
yBucketBound: yBucketBound,
|
yBucketBound: yBucketBound,
|
||||||
[if dataFormat == 'timeseries' then 'yBucketNumber']: yBucketNumber,
|
[if dataFormat == 'timeseries' then 'yBucketNumber']: yBucketNumber,
|
||||||
[if dataFormat == 'timeseries' then 'yBucketSize']: yBucketSize,
|
[if dataFormat == 'timeseries' then 'yBucketSize']: yBucketSize,
|
||||||
|
[if maxDataPoints != null then 'maxDataPoints']: maxDataPoints,
|
||||||
|
|
||||||
_nextTarget:: 0,
|
_nextTarget:: 0,
|
||||||
addTarget(target):: self {
|
addTarget(target):: self {
|
||||||
|
|
|
@ -7,14 +7,17 @@
|
||||||
* @param rawSql The SQL query
|
* @param rawSql The SQL query
|
||||||
* @param datasource (optional)
|
* @param datasource (optional)
|
||||||
* @param format (default `'time_series'`)
|
* @param format (default `'time_series'`)
|
||||||
|
* @param alias (optional)
|
||||||
*/
|
*/
|
||||||
target(
|
target(
|
||||||
rawSql,
|
rawSql,
|
||||||
datasource=null,
|
datasource=null,
|
||||||
format='time_series',
|
format='time_series',
|
||||||
|
alias=null,
|
||||||
):: {
|
):: {
|
||||||
[if datasource != null then 'datasource']: datasource,
|
[if datasource != null then 'datasource']: datasource,
|
||||||
format: format,
|
format: format,
|
||||||
|
[if alias != null then 'alias']: alias,
|
||||||
rawSql: rawSql,
|
rawSql: rawSql,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
25
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/stat_panel.libsonnet
generated
vendored
25
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/stat_panel.libsonnet
generated
vendored
|
@ -23,9 +23,10 @@
|
||||||
* @param displayName (optional) Change the field or series name.
|
* @param displayName (optional) Change the field or series name.
|
||||||
* @param noValue (optional) What to show when there is no value.
|
* @param noValue (optional) What to show when there is no value.
|
||||||
* @param thresholdsMode (default `'absolute'`) 'absolute' or 'percentage'.
|
* @param thresholdsMode (default `'absolute'`) 'absolute' or 'percentage'.
|
||||||
|
* @param timeFrom (optional) Override the relative time range.
|
||||||
* @param repeat (optional) Name of variable that should be used to repeat this panel.
|
* @param repeat (optional) Name of variable that should be used to repeat this panel.
|
||||||
* @param repeatDirection (default `'h'`) 'h' for horizontal or 'v' for vertical.
|
* @param repeatDirection (default `'h'`) 'h' for horizontal or 'v' for vertical.
|
||||||
* @param repeatMaxPerRow (optional) Maximum panels per row in repeat mode.
|
* @param maxPerRow (optional) Maximum panels per row in repeat mode.
|
||||||
* @param pluginVersion (default `'7'`) Plugin version the panel should be modeled for. This has been tested with the default, '7', and '6.7'.
|
* @param pluginVersion (default `'7'`) Plugin version the panel should be modeled for. This has been tested with the default, '7', and '6.7'.
|
||||||
*
|
*
|
||||||
* @method addTarget(target) Adds a target object.
|
* @method addTarget(target) Adds a target object.
|
||||||
|
@ -59,9 +60,10 @@
|
||||||
displayName=null,
|
displayName=null,
|
||||||
noValue=null,
|
noValue=null,
|
||||||
thresholdsMode='absolute',
|
thresholdsMode='absolute',
|
||||||
|
timeFrom=null,
|
||||||
repeat=null,
|
repeat=null,
|
||||||
repeatDirection='h',
|
repeatDirection='h',
|
||||||
repeatMaxPerRow=null,
|
maxPerRow=null,
|
||||||
pluginVersion='7',
|
pluginVersion='7',
|
||||||
):: {
|
):: {
|
||||||
|
|
||||||
|
@ -74,7 +76,8 @@
|
||||||
links: [],
|
links: [],
|
||||||
[if repeat != null then 'repeat']: repeat,
|
[if repeat != null then 'repeat']: repeat,
|
||||||
[if repeat != null then 'repeatDirection']: repeatDirection,
|
[if repeat != null then 'repeatDirection']: repeatDirection,
|
||||||
[if repeat != null then 'repeatMaxPerRow']: repeatMaxPerRow,
|
[if timeFrom != null then 'timeFrom']: timeFrom,
|
||||||
|
[if repeat != null then 'maxPerRow']: maxPerRow,
|
||||||
|
|
||||||
// targets
|
// targets
|
||||||
_nextTarget:: 0,
|
_nextTarget:: 0,
|
||||||
|
@ -143,6 +146,22 @@
|
||||||
addDataLink(link):: self {
|
addDataLink(link):: self {
|
||||||
fieldConfig+: { defaults+: { links+: [link] } },
|
fieldConfig+: { defaults+: { links+: [link] } },
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// Overrides
|
||||||
|
addOverride(
|
||||||
|
matcher=null,
|
||||||
|
properties=null,
|
||||||
|
):: self {
|
||||||
|
fieldConfig+: {
|
||||||
|
overrides+: [
|
||||||
|
{
|
||||||
|
[if matcher != null then 'matcher']: matcher,
|
||||||
|
[if properties != null then 'properties']: properties,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
addOverrides(overrides):: std.foldl(function(p, o) p.addOverride(o.matcher, o.properties), overrides, self),
|
||||||
} else {
|
} else {
|
||||||
options: {
|
options: {
|
||||||
fieldOptions: {
|
fieldOptions: {
|
||||||
|
|
6
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/table_panel.libsonnet
generated
vendored
6
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/table_panel.libsonnet
generated
vendored
|
@ -24,6 +24,8 @@
|
||||||
* @method addColumn(field, style) Adds a column
|
* @method addColumn(field, style) Adds a column
|
||||||
* @method hideColumn(field) Hides a column
|
* @method hideColumn(field) Hides a column
|
||||||
* @method addLink(link) Adds a link
|
* @method addLink(link) Adds a link
|
||||||
|
* @method addTransformation(transformation) Adds a transformation object
|
||||||
|
* @method addTransformations(transformations) Adds an array of transformations
|
||||||
*/
|
*/
|
||||||
new(
|
new(
|
||||||
title,
|
title,
|
||||||
|
@ -81,5 +83,9 @@
|
||||||
addLink(link):: self {
|
addLink(link):: self {
|
||||||
links+: [link],
|
links+: [link],
|
||||||
},
|
},
|
||||||
|
addTransformation(transformation):: self {
|
||||||
|
transformations+: [transformation],
|
||||||
|
},
|
||||||
|
addTransformations(transformations):: std.foldl(function(p, t) p.addTransformation(t), transformations, self),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
4
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/template.libsonnet
generated
vendored
4
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/template.libsonnet
generated
vendored
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
* Creates a [template](https://grafana.com/docs/grafana/latest/variables/templates-and-variables/#templates) that can be added to a dashboard.
|
* Creates a [template](https://grafana.com/docs/grafana/latest/variables/#templates) that can be added to a dashboard.
|
||||||
*
|
*
|
||||||
* @name template.new
|
* @name template.new
|
||||||
*
|
*
|
||||||
|
@ -18,7 +18,7 @@
|
||||||
* @param multi (default `false`) Whether multiple values can be selected or not from variable value list.
|
* @param multi (default `false`) Whether multiple values can be selected or not from variable value list.
|
||||||
* @param sort (default `0`) `0`: Without Sort, `1`: Alphabetical (asc), `2`: Alphabetical (desc), `3`: Numerical (asc), `4`: Numerical (desc).
|
* @param sort (default `0`) `0`: Without Sort, `1`: Alphabetical (asc), `2`: Alphabetical (desc), `3`: Numerical (asc), `4`: Numerical (desc).
|
||||||
*
|
*
|
||||||
* @return A [template](https://grafana.com/docs/grafana/latest/variables/templates-and-variables/#templates)
|
* @return A [template](https://grafana.com/docs/grafana/latest/variables/#templates)
|
||||||
*/
|
*/
|
||||||
new(
|
new(
|
||||||
name,
|
name,
|
||||||
|
|
2
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/timepicker.libsonnet
generated
vendored
2
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/timepicker.libsonnet
generated
vendored
|
@ -31,8 +31,10 @@
|
||||||
'7d',
|
'7d',
|
||||||
'30d',
|
'30d',
|
||||||
],
|
],
|
||||||
|
nowDelay=null,
|
||||||
):: {
|
):: {
|
||||||
refresh_intervals: refresh_intervals,
|
refresh_intervals: refresh_intervals,
|
||||||
time_options: time_options,
|
time_options: time_options,
|
||||||
|
[if nowDelay != null then 'nowDelay']: nowDelay,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
12
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/transformation.libsonnet
generated
vendored
Normal file
12
monitoring/vendor/github.com/grafana/grafonnet-lib/grafonnet/transformation.libsonnet
generated
vendored
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @name transformation.new
|
||||||
|
*/
|
||||||
|
new(
|
||||||
|
id='',
|
||||||
|
options={}
|
||||||
|
):: {
|
||||||
|
id: id,
|
||||||
|
options: options,
|
||||||
|
},
|
||||||
|
}
|
12
monitoring/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet
generated
vendored
12
monitoring/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet
generated
vendored
|
@ -44,7 +44,7 @@
|
||||||
addMultiTemplate(name, metric_name, label_name, hide=0):: self {
|
addMultiTemplate(name, metric_name, label_name, hide=0):: self {
|
||||||
templating+: {
|
templating+: {
|
||||||
list+: [{
|
list+: [{
|
||||||
allValue: null,
|
allValue: '.+',
|
||||||
current: {
|
current: {
|
||||||
selected: true,
|
selected: true,
|
||||||
text: 'All',
|
text: 'All',
|
||||||
|
@ -196,7 +196,7 @@
|
||||||
timeShift: null,
|
timeShift: null,
|
||||||
title: title,
|
title: title,
|
||||||
tooltip: {
|
tooltip: {
|
||||||
shared: true,
|
shared: false,
|
||||||
sort: 0,
|
sort: 0,
|
||||||
value_type: 'individual',
|
value_type: 'individual',
|
||||||
},
|
},
|
||||||
|
@ -382,7 +382,7 @@
|
||||||
expr:
|
expr:
|
||||||
|||
|
|||
|
||||||
sum by (status) (
|
sum by (status) (
|
||||||
label_replace(label_replace(rate(%s[$__interval]),
|
label_replace(label_replace(rate(%s[$__rate_interval]),
|
||||||
"status", "${1}xx", "%s", "([0-9]).."),
|
"status", "${1}xx", "%s", "([0-9]).."),
|
||||||
"status", "${1}", "%s", "([a-z]+)"))
|
"status", "${1}", "%s", "([a-z]+)"))
|
||||||
||| % [selector, statusLabelName, statusLabelName],
|
||| % [selector, statusLabelName, statusLabelName],
|
||||||
|
@ -399,7 +399,7 @@
|
||||||
nullPointMode: 'null as zero',
|
nullPointMode: 'null as zero',
|
||||||
targets: [
|
targets: [
|
||||||
{
|
{
|
||||||
expr: 'histogram_quantile(0.99, sum(rate(%s_bucket%s[$__interval])) by (le)) * %s' % [metricName, selector, multiplier],
|
expr: 'histogram_quantile(0.99, sum(rate(%s_bucket%s[$__rate_interval])) by (le)) * %s' % [metricName, selector, multiplier],
|
||||||
format: 'time_series',
|
format: 'time_series',
|
||||||
intervalFactor: 2,
|
intervalFactor: 2,
|
||||||
legendFormat: '99th Percentile',
|
legendFormat: '99th Percentile',
|
||||||
|
@ -407,7 +407,7 @@
|
||||||
step: 10,
|
step: 10,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
expr: 'histogram_quantile(0.50, sum(rate(%s_bucket%s[$__interval])) by (le)) * %s' % [metricName, selector, multiplier],
|
expr: 'histogram_quantile(0.50, sum(rate(%s_bucket%s[$__rate_interval])) by (le)) * %s' % [metricName, selector, multiplier],
|
||||||
format: 'time_series',
|
format: 'time_series',
|
||||||
intervalFactor: 2,
|
intervalFactor: 2,
|
||||||
legendFormat: '50th Percentile',
|
legendFormat: '50th Percentile',
|
||||||
|
@ -415,7 +415,7 @@
|
||||||
step: 10,
|
step: 10,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
expr: 'sum(rate(%s_sum%s[$__interval])) * %s / sum(rate(%s_count%s[$__interval]))' % [metricName, selector, multiplier, metricName, selector],
|
expr: 'sum(rate(%s_sum%s[$__rate_interval])) * %s / sum(rate(%s_count%s[$__rate_interval]))' % [metricName, selector, multiplier, metricName, selector],
|
||||||
format: 'time_series',
|
format: 'time_series',
|
||||||
intervalFactor: 2,
|
intervalFactor: 2,
|
||||||
legendFormat: 'Average',
|
legendFormat: 'Average',
|
||||||
|
|
|
@ -6,6 +6,8 @@ approvers:
|
||||||
- metalmatze
|
- metalmatze
|
||||||
- tomwilkie
|
- tomwilkie
|
||||||
- s-urbaniak
|
- s-urbaniak
|
||||||
|
- povilasv
|
||||||
|
- paulfantom
|
||||||
|
|
||||||
reviewers:
|
reviewers:
|
||||||
- brancz
|
- brancz
|
||||||
|
@ -13,3 +15,5 @@ reviewers:
|
||||||
- metalmatze
|
- metalmatze
|
||||||
- tomwilkie
|
- tomwilkie
|
||||||
- s-urbaniak
|
- s-urbaniak
|
||||||
|
- povilasv
|
||||||
|
- paulfantom
|
||||||
|
|
|
@ -7,15 +7,17 @@ A set of Grafana dashboards and Prometheus alerts for Kubernetes.
|
||||||
|
|
||||||
## Releases
|
## Releases
|
||||||
|
|
||||||
| Release branch | Kubernetes Compatibility | Prometheus Compatibility |
|
| Release branch | Kubernetes Compatibility | Prometheus Compatibility | Kube-state-metrics Compatibility |
|
||||||
| ------- | -------------------------- | ------------------------ |
|
| -------------- | -------------------------- | ------------------------ | -------------------------------- |
|
||||||
| release-0.1 | v1.13 and before | |
|
| release-0.1 | v1.13 and before | | |
|
||||||
| release-0.2 | v1.14.1 and before | v2.11.0+ |
|
| release-0.2 | v1.14.1 and before | v2.11.0+ | |
|
||||||
| release-0.3 | v1.17 and before | v2.11.0+ |
|
| release-0.3 | v1.17 and before | v2.11.0+ | |
|
||||||
| release-0.4 | v1.18 | v2.11.0+ |
|
| release-0.4 | v1.18 | v2.11.0+ | |
|
||||||
| release-0.5 | v1.19 | v2.11.0+ |
|
| release-0.5 | v1.19 | v2.11.0+ | |
|
||||||
| release-0.6 | v1.19+ | v2.11.0+ |
|
| release-0.6 | v1.19+ | v2.11.0+ | |
|
||||||
| master | v1.19+ | v2.11.0+ |
|
| release-0.7 | v1.19+ | v2.11.0+ | v1.x |
|
||||||
|
| release-0.8 | v1.20+ | v2.11.0+ | v2.0+ |
|
||||||
|
| master | v1.20+ | v2.11.0+ | v2.0+ |
|
||||||
|
|
||||||
In Kubernetes 1.14 there was a major [metrics overhaul](https://github.com/kubernetes/enhancements/issues/1206) implemented.
|
In Kubernetes 1.14 there was a major [metrics overhaul](https://github.com/kubernetes/enhancements/issues/1206) implemented.
|
||||||
Therefore v0.1.x of this repository is the last release to support Kubernetes 1.13 and previous version on a best effort basis.
|
Therefore v0.1.x of this repository is the last release to support Kubernetes 1.13 and previous version on a best effort basis.
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue