add some monitoring sauce
This commit is contained in:
parent
1341ebf585
commit
eafb22dcec
16
README.md
16
README.md
|
@ -57,9 +57,23 @@ provided which regularly pings [Healthchecks.io](https://healthchecks.io/).
|
||||||
A secret with the ping URL needs to be added before the CronJobs does it's work:
|
A secret with the ping URL needs to be added before the CronJobs does it's work:
|
||||||
|
|
||||||
```
|
```
|
||||||
kubectl -n hc create secret generic healthchecks-io --from-literal=HCURL=https://hc-ping.com/MYUUID
|
kubectl -n posmon create secret generic healthchecks-io --from-literal=HCURL=https://hc-ping.com/MYUUID
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Application and network monitoring
|
||||||
|
|
||||||
|
Application monitoring is done using Prometheus, Alertmanager and
|
||||||
|
Blackbox exporter. No application specific exporters are used, so
|
||||||
|
it's just a base monitoring to answer the question: "Is it up?".
|
||||||
|
|
||||||
|
1. Install [prometheus-operator](https://github.com/coreos/prometheus-operator)
|
||||||
|
F.e.: `kubectl apply -f https://raw.githubusercontent.com/coreos/prometheus-operator/master/bundle.yaml`
|
||||||
|
2. Apply manifests: `kubectl apply -f contrib/posmon/`
|
||||||
|
3. Create secret for extra scrape config:
|
||||||
|
`kubectl -n posmon create secret generic additional-scrape-configs --from-file=contrib/pos-blackbox-exporter-scrape.yaml`
|
||||||
|
4. Create secret for Alertmanager config:
|
||||||
|
`kubectl -n posmon create secret generic alertmanager-posmon --from-file=contrib/alertmanager.yaml`
|
||||||
|
|
||||||
## Backup configuration
|
## Backup configuration
|
||||||
|
|
||||||
Example contents of `backup.env`:
|
Example contents of `backup.env`:
|
||||||
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
global:
|
||||||
|
resolve_timeout: 5m
|
||||||
|
route:
|
||||||
|
group_by: ['job']
|
||||||
|
group_wait: 30s
|
||||||
|
group_interval: 5m
|
||||||
|
repeat_interval: 12h
|
||||||
|
receiver: 'webhook'
|
||||||
|
receivers:
|
||||||
|
- name: 'webhook'
|
||||||
|
webhook_configs:
|
||||||
|
- url: 'http://alertmanagerwh:30500/'
|
|
@ -0,0 +1,48 @@
|
||||||
|
- job_name: 'blackbox_http'
|
||||||
|
metrics_path: /probe
|
||||||
|
scrape_interval: 1m
|
||||||
|
params:
|
||||||
|
module: [http_2xx]
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- http://odoo.pos.svc.cluster.local
|
||||||
|
- http://iotbox.pos.svc.cluster.local
|
||||||
|
- http://192.168.233.1
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: [__address__]
|
||||||
|
target_label: __param_target
|
||||||
|
- source_labels: [__param_target]
|
||||||
|
target_label: instance
|
||||||
|
- target_label: __address__
|
||||||
|
replacement: blackbox-exporter:9115
|
||||||
|
- job_name: 'blackbox_tcp'
|
||||||
|
metrics_path: /probe
|
||||||
|
scrape_interval: 1m
|
||||||
|
params:
|
||||||
|
module: [tcp_connect]
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- db.pos.svc.cluster.local:5432
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: [__address__]
|
||||||
|
target_label: __param_target
|
||||||
|
- source_labels: [__param_target]
|
||||||
|
target_label: instance
|
||||||
|
- target_label: __address__
|
||||||
|
replacement: blackbox-exporter:9115
|
||||||
|
- job_name: 'blackbox_icmp'
|
||||||
|
metrics_path: /probe
|
||||||
|
scrape_interval: 1m
|
||||||
|
params:
|
||||||
|
module: [icmp]
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- 192.168.233.3
|
||||||
|
- 192.168.233.5
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: [__address__]
|
||||||
|
target_label: __param_target
|
||||||
|
- source_labels: [__param_target]
|
||||||
|
target_label: instance
|
||||||
|
- target_label: __address__
|
||||||
|
replacement: blackbox-exporter:9115
|
|
@ -0,0 +1,4 @@
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: posmon
|
|
@ -0,0 +1,23 @@
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: Alertmanager
|
||||||
|
metadata:
|
||||||
|
name: posmon
|
||||||
|
namespace: posmon
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: alertmanager-posmon
|
||||||
|
namespace: posmon
|
||||||
|
spec:
|
||||||
|
type: NodePort
|
||||||
|
ports:
|
||||||
|
- name: web
|
||||||
|
nodePort: 30903
|
||||||
|
port: 9093
|
||||||
|
protocol: TCP
|
||||||
|
targetPort: web
|
||||||
|
selector:
|
||||||
|
alertmanager: posmon
|
|
@ -0,0 +1,14 @@
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PrometheusRule
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
prometheus: posmon
|
||||||
|
role: alert-rules
|
||||||
|
name: posmon-rules
|
||||||
|
namespace: posmon
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: ./posmon.rules
|
||||||
|
rules:
|
||||||
|
- alert: TargetDown
|
||||||
|
expr: probe_success < 1
|
|
@ -2,7 +2,7 @@ apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: blackbox-exporter
|
name: blackbox-exporter
|
||||||
namespace: mon
|
namespace: posmon
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
selector:
|
selector:
|
||||||
|
@ -23,7 +23,9 @@ apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
metadata:
|
metadata:
|
||||||
name: blackbox-exporter
|
name: blackbox-exporter
|
||||||
namespace: mon
|
namespace: posmon
|
||||||
|
labels:
|
||||||
|
app: blackbox-exporter
|
||||||
spec:
|
spec:
|
||||||
ports:
|
ports:
|
||||||
- name: http
|
- name: http
|
|
@ -2,7 +2,7 @@ apiVersion: batch/v1beta1
|
||||||
kind: CronJob
|
kind: CronJob
|
||||||
metadata:
|
metadata:
|
||||||
name: healthchecks-io
|
name: healthchecks-io
|
||||||
namespace: hc
|
namespace: posmon
|
||||||
spec:
|
spec:
|
||||||
schedule: "*/1 * * * *"
|
schedule: "*/1 * * * *"
|
||||||
concurrencyPolicy: Forbid
|
concurrencyPolicy: Forbid
|
|
@ -0,0 +1,37 @@
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: prometheus
|
||||||
|
rules:
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources:
|
||||||
|
- nodes
|
||||||
|
- services
|
||||||
|
- endpoints
|
||||||
|
- pods
|
||||||
|
verbs: ["get", "list", "watch"]
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources:
|
||||||
|
- configmaps
|
||||||
|
verbs: ["get"]
|
||||||
|
- nonResourceURLs: ["/metrics"]
|
||||||
|
verbs: ["get"]
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: prometheus
|
||||||
|
namespace: posmon
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: prometheus
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: prometheus
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: prometheus
|
||||||
|
namespace: posmon
|
|
@ -0,0 +1,42 @@
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: Prometheus
|
||||||
|
metadata:
|
||||||
|
name: posmon
|
||||||
|
namespace: posmon
|
||||||
|
spec:
|
||||||
|
serviceAccountName: prometheus
|
||||||
|
serviceMonitorSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: pos
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: 400Mi
|
||||||
|
enableAdminAPI: false
|
||||||
|
additionalScrapeConfigs:
|
||||||
|
name: additional-scrape-configs
|
||||||
|
key: pos-blackbox-exporter-scrape.yaml
|
||||||
|
alerting:
|
||||||
|
alertmanagers:
|
||||||
|
- namespace: posmon
|
||||||
|
name: alertmanager-posmon
|
||||||
|
port: web
|
||||||
|
ruleSelector:
|
||||||
|
matchLabels:
|
||||||
|
role: alert-rules
|
||||||
|
prometheus: posmon
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: prometheus-posmon
|
||||||
|
namespace: posmon
|
||||||
|
spec:
|
||||||
|
type: NodePort
|
||||||
|
ports:
|
||||||
|
- name: web
|
||||||
|
nodePort: 30909
|
||||||
|
port: 9090
|
||||||
|
protocol: TCP
|
||||||
|
targetPort: web
|
||||||
|
selector:
|
||||||
|
prometheus: posmon
|
|
@ -0,0 +1,13 @@
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: blackboxmon
|
||||||
|
namespace: posmon
|
||||||
|
labels:
|
||||||
|
app: pos
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: blackbox-exporter
|
||||||
|
endpoints:
|
||||||
|
- port: http
|
Reference in New Issue