Tobias Brunner
caa6d8b011
All checks were successful
continuous-integration/drone/push Build is passing
58 lines
2 KiB
Plaintext
58 lines
2 KiB
Plaintext
{
|
|
prometheusAlerts+:: {
|
|
groups+: [
|
|
{
|
|
name: 'alertmanager.rules',
|
|
rules: [
|
|
{
|
|
alert: 'AlertmanagerConfigInconsistent',
|
|
annotations: {
|
|
message: |||
|
|
The configuration of the instances of the Alertmanager cluster `{{ $labels.namespace }}/{{ $labels.service }}` are out of sync.
|
|
{{ range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query }}
|
|
Configuration hash for pod {{ .Labels.pod }} is "{{ printf "%.f" .Value }}"
|
|
{{ end }}
|
|
|||,
|
|
},
|
|
expr: |||
|
|
count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s})) != 1
|
|
||| % $._config,
|
|
'for': '5m',
|
|
labels: {
|
|
severity: 'critical',
|
|
},
|
|
},
|
|
{
|
|
alert: 'AlertmanagerFailedReload',
|
|
annotations: {
|
|
message: "Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}.",
|
|
},
|
|
expr: |||
|
|
alertmanager_config_last_reload_successful{%(alertmanagerSelector)s} == 0
|
|
||| % $._config,
|
|
'for': '10m',
|
|
labels: {
|
|
severity: 'warning',
|
|
},
|
|
},
|
|
{
|
|
alert: 'AlertmanagerMembersInconsistent',
|
|
annotations: {
|
|
message: 'Alertmanager has not found all other members of the cluster.',
|
|
},
|
|
expr: |||
|
|
alertmanager_cluster_members{%(alertmanagerSelector)s}
|
|
!= on (service) GROUP_LEFT()
|
|
count by (service) (alertmanager_cluster_members{%(alertmanagerSelector)s})
|
|
||| % $._config,
|
|
'for': '5m',
|
|
labels: {
|
|
severity: 'critical',
|
|
},
|
|
},
|
|
],
|
|
},
|
|
],
|
|
},
|
|
}
|