move k8up rules to monitoring

This commit is contained in:
Tobias Brunner 2020-03-07 20:52:58 +01:00
parent 43f69e9bc0
commit 4f9035453f
4 changed files with 42 additions and 1 deletions

View File

@ -2,6 +2,7 @@ build:
docker run --rm -v $(shell pwd):$(shell pwd) --workdir $(shell pwd) quay.io/coreos/jsonnet-ci ./build.sh monitoring.jsonnet
sudo chown -R tobru. manifests/
kubeseal --controller-namespace sealed-secrets -o yaml -n monitoring < ../../gitops-tbrnt-private/monitoring/alertmanager.yaml > manifests/alertmanager-tbrnt-config-secret.yaml
cp *.yaml manifests/
.PHONY: build
update:

View File

@ -6,7 +6,7 @@ metadata:
namespace: monitoring
spec:
encryptedData:
alertmanager.yaml: AgCB9ENTl3ptItT5IFDHKxONg+hJ20nAClt94b8ShwMWlaNNpUlxa92HIS8nKJ8sfciyvxPgMTDx/0euv8+RYZiuU5tYSjo6H6KZJUavsFykgxLDiNGN4qORkZyHGD/ZvXjJ4Ns+pDkOcxQ2xZD59Djbnu6CLk010NFENmwAn/b2jTy55fUwW9qPuOxGEmz2RpSaIiPDz5l6LsI2lvZV54hbuefBgpKDgGUS4EnbaU0db8w6APM/rWFrGIgBXDIfj/tM4BthrOCNbbK/clDuKsUkRoEjbApqvbXf36D41uUZIHhLlF2CTT2mi6nT6mTAxsmro1YdKO828wCmBZV879E+jJldsh5RUl7EWW7X7bb+XrVlmCxagRkKQjR/AwUEgi6Zd4XEOTcdC53f9R1e0xm3/0MQpqu24rZR4kIXkJbAmgJnOshKsscW6IlfeRGIiIHWdVXGp7mePcF+hvA8/0nQkxRC8JtNNR8buDYWmXRBihfcrr2zxn/zdhBxYcE9vcc9GEmaTnwqI3f/W0nBgOy6gkMnQ2z2RqoyzkgxNX1l1CoOCIzbyGsxnAhWna47xNvACqb/PvidNI+Ivc14/cUF/uGOaktHnLKzi0r71ebMFKtXSbI/a93qs7d5cghNbnnRrrxHBmtni+lVgAnbfrR7e4FgcjLeUvKhYjTUiGLz8gfSIF4dgDed9GIG+PxklE2na9SJTTWSv99C0JhAe7x+hCxnFnQgxOiB9ThBmwHI/3MBvn6qFvWAFSystOCcjbZ3wad9J/ndSrbwy8LojHIbHMzXf0feuTkmYNsdx3B+77sa/oiZV7ewHWlURMsBEslyNX0OuFNyaSVwL/sUjOxMmUqJe9q0uxfgdSr1W8S+/7v2s/ky7k3FZ9AqsAH5NGP8apZQe1SJzvxt0hFiw2FOFqwKQMNUvacHzXUtQyDeivvu3mPFCQZbwwCFrxnZviOdmrfGQr5dhCAYmxxnUVIetSbvj77sq0c/QXKXjJOiqyqTd7WjAFwKnZDZ6Yn4isSbsaDrnDrZpQ+O4MVwKSXY5q1S0NMqdn4cfjLZYRdsufwWsULeq7Gt+SaSCaI0BHn5grsKpNh89HgNMvgYUwkjJXXRd2/I2lcQDEaTYXEs7RcRnJk1a+8DljdVd51b2VN5FkT69aSL1olaDXrfEztShbJlz+nQUkYZeSYE9dhURfvtAILg1J7tTD0eDwNggJGc7hPI62Ir+Kk0t045WobbTESGElU3hbJ5/WMA7HC6b7Mbiw8ikjglIBK1ur9BBV/8lNrdqSBh6MqWGLpRf/qaaqlfG65jIM9O8Qe70E6hWFAbWFRPcuCNA878NMMiTGJ1PQZ/owm5lrEPn833FtJCKHkAVLIrezhalCIuDvc8OOFMD08vFmvsFwHp3YtPefgZXTNqAalQYuPKsPSDMtUJCCCYRzWI0vyj27dcHwtrOsP9ovRo1U6a20tsDnhBRaAlmUQEolC/fmOsmJrBcK7QBfj9awhtBovXacZKdud8cu0Mbxo1+1fJaF5cosR+s3qSvsWWCM+sCCr0bxqUOS1kqkHe+9AQNOUkwlLKPPNfZ+z8dvjJrtJeZ6dLnJuu3+G4aoB91x4sgAcr6LugEe6MW4fYMO+Td5pMqn51N/dwMUkFQy2pCvvF41xeJuxpu9m8eAZ/EyS2pNfPkq6EeiNwvVp33BtIKA5oAZ7JzJZ18ZrOWg==
alertmanager.yaml: AgBZGe4hWNMQ1DaNFdd2SPT8feODqYuIqqUT1TivOShoVy0XjpPAGQW2v5PNc6Q3qnn6/5CGCBAcLn0K4XxuB0Oi/39rbArAeuP0BsBAoI2KveEL3GIJ6BZ87xvJc9Exup5fJ3zeGb1Az0chgd1f0xzWtZQdgGO6Ba+KZGMtqj9BZLjxSTGLueWOBU/ZUN+q4Qj7fd730u187loSeSSdbGD1okDfEWFQPT8bXz2jeRWkEc8lg/nuyNibmCwWot9fMwUXgaOYN8bTpGHIc1nx+E7MZrvgSSsLbaGH9+UYcWjbKLAju1hOTxAtyHxfgJjNxkLDynYMaNDgfOdd5rY0oQN8HVQGRBSrb2ZFJMqP2m2LfjCy+CHWg/2EStmNeILLyJkdBzq4TqcpncOHfHUADOjuTPkAA2++K1DRwAT+KAA6bzkBuAS4bKhDD2QDjZ2jzfvrT/orAdXelTsljLULnVwQkDID3GxBPJ/7EebQNe/YloHN9KXtgqCyo6yvYUj2V9tFGEfZjzb1DEASINUWJ5GTu2yFVTPRK1s4kcpAs7jYs8d4eYRsYQbCLJdgH5O2fM6zPjSkifE15iGsZ77xDO73qa9XvX9/h/nhpyZRrWj0jwbKLe9ZdopjEV8VnT+vAec1DSHxBFYSHal60Pjhd6V8P6aMKPzWlqa2CI5i9oFY8MRuR4sil7sgGhjjts9FqtaMNFLUuYd5KiK5q02OC5zBRxg1xWisDTjXNGRjg1JVw8c2wxpuFjz0ROcAwGUmo0/xDcMWGl1dQ8hkqpZ2deUTPONLD8Sbz8luv0/UhaBiNY7jK878vMgMhBc/U2QpDbjfXjVx3V6+o4L7pp3ks6PSsXqobzPeMzQMpfitB5iVJ3YuhqtdLYH/VDPalKfsomTjcA+bPy/7XFfVsHoTbZ500TzejP/KBL46Nyw21svGe8+KeM6yA2nd3Jj6YetCsagOA9fJl405Lw6kBK4hiTdkcElVWG65Ro6dSL0cdSliU85lO85EmRwBi98jebTSRxcljndvl/fENxCtHjWbbpCKEz2yOaI0dMjZdg5bLRUkXcF5wOklsolKAb9j0yU6vyQcE8iYxyi7T9pYDWniIb4jchMQHOP8njgeIMPRMexcmOCHutBwMEItYcHMFzq8zHSgRPzpXyKQ1QHy6aDJ8hVxrOCVeOKQncPlZrGTNp+XXk4PfQhBaO7iS/eLB48N4vRevaJr4i0uffL7GvPMUjTeMZYI+7vDK3nMrpvls8RQe6OWVkei8AnBFG3211YLz320Z5FYrL6zj+aSFsko6ZDODRBD9lo949x1RArcxGdKPaNhMBTNEvOX+umMkWzEvtu+xhiLfwQwyKQbm98woFRa4KDcmk7xp49kQ0gIRRMZ0/g5n7wJk0YJyRXWy494YEJNJ4IumIg8lu/iT/wml1Y/+TiI4tt371PqczOhv7FljRST0LYurtHhM1g8SPQVavMddEGNFQCl31zh+S74dHSiABgTDMRp6g7HrEPGmXDoqgMCTlWttx89stFYos8DPtPspWZN2bbg56GKR0ih/tvbzadzzQa4BiIXliPibf1HIyWc3DgfHXjFr7hivliVrIVvgcEklj3KUftMNW6FydBaSXLhdAAaq+eBH81V8Lz+fD03b+fGv8nsKGQnlbVEoaWlBYSbnWnNvYNtlfwe9FaQVJIt/kd+2XNgNzgbKGciISpeZw==
template:
metadata:
creationTimestamp: null

View File

@ -0,0 +1,40 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: k8up
labels:
prometheus: k8s
role: alert-rules
spec:
groups:
- name: k8up.rules
rules:
- alert: baas_last_errors
expr: baas_backup_restic_last_errors > 0
for: 1m
labels:
severity: critical
annotations:
summary: Amount of errors of last restic backup
description: This alert is fired when error number is > 0
- alert: K8upBackupFailed
expr: rate(k8up_jobs_failed_counter[1d]) > 0
for: 1m
labels:
severity: critical
annotations:
summary: "Job in {{ $labels.namespace }} of type {{ $labels.jobType }} failed"
- alert: K8upBackupNotRunning
expr: sum(rate(k8up_jobs_total[25h])) == 0 and on(namespace) k8up_schedules_gauge > 0
for: 1m
labels:
severity: critical
annotations:
summary: "No K8up jobs were run in {{ $labels.namespace }} within the last 24 hours. Check the operator, there might be a deadlock"
- alert: K8upJobStuck
expr: k8up_jobs_queued_gauge{jobType="backup"} > 0 and on(namespace) k8up_schedules_gauge > 0
for: 24h
labels:
severity: critical
annotations:
summary: "K8up jobs are stuck in {{ $labels.namespace }} for the last 24 hours."