add some monitoring sauce
This commit is contained in:
parent
1341ebf585
commit
eafb22dcec
16
README.md
16
README.md
|
@ -57,9 +57,23 @@ provided which regularly pings [Healthchecks.io](https://healthchecks.io/).
|
|||
A secret with the ping URL needs to be added before the CronJobs does it's work:
|
||||
|
||||
```
|
||||
kubectl -n hc create secret generic healthchecks-io --from-literal=HCURL=https://hc-ping.com/MYUUID
|
||||
kubectl -n posmon create secret generic healthchecks-io --from-literal=HCURL=https://hc-ping.com/MYUUID
|
||||
```
|
||||
|
||||
### Application and network monitoring
|
||||
|
||||
Application monitoring is done using Prometheus, Alertmanager and
|
||||
Blackbox exporter. No application specific exporters are used, so
|
||||
it's just a base monitoring to answer the question: "Is it up?".
|
||||
|
||||
1. Install [prometheus-operator](https://github.com/coreos/prometheus-operator)
|
||||
F.e.: `kubectl apply -f https://raw.githubusercontent.com/coreos/prometheus-operator/master/bundle.yaml`
|
||||
2. Apply manifests: `kubectl apply -f contrib/posmon/`
|
||||
3. Create secret for extra scrape config:
|
||||
`kubectl -n posmon create secret generic additional-scrape-configs --from-file=contrib/pos-blackbox-exporter-scrape.yaml`
|
||||
4. Create secret for Alertmanager config:
|
||||
`kubectl -n posmon create secret generic alertmanager-posmon --from-file=contrib/alertmanager.yaml`
|
||||
|
||||
## Backup configuration
|
||||
|
||||
Example contents of `backup.env`:
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
global:
|
||||
resolve_timeout: 5m
|
||||
route:
|
||||
group_by: ['job']
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 12h
|
||||
receiver: 'webhook'
|
||||
receivers:
|
||||
- name: 'webhook'
|
||||
webhook_configs:
|
||||
- url: 'http://alertmanagerwh:30500/'
|
|
@ -0,0 +1,48 @@
|
|||
- job_name: 'blackbox_http'
|
||||
metrics_path: /probe
|
||||
scrape_interval: 1m
|
||||
params:
|
||||
module: [http_2xx]
|
||||
static_configs:
|
||||
- targets:
|
||||
- http://odoo.pos.svc.cluster.local
|
||||
- http://iotbox.pos.svc.cluster.local
|
||||
- http://192.168.233.1
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: blackbox-exporter:9115
|
||||
- job_name: 'blackbox_tcp'
|
||||
metrics_path: /probe
|
||||
scrape_interval: 1m
|
||||
params:
|
||||
module: [tcp_connect]
|
||||
static_configs:
|
||||
- targets:
|
||||
- db.pos.svc.cluster.local:5432
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: blackbox-exporter:9115
|
||||
- job_name: 'blackbox_icmp'
|
||||
metrics_path: /probe
|
||||
scrape_interval: 1m
|
||||
params:
|
||||
module: [icmp]
|
||||
static_configs:
|
||||
- targets:
|
||||
- 192.168.233.3
|
||||
- 192.168.233.5
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: blackbox-exporter:9115
|
|
@ -0,0 +1,4 @@
|
|||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: posmon
|
|
@ -0,0 +1,23 @@
|
|||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: Alertmanager
|
||||
metadata:
|
||||
name: posmon
|
||||
namespace: posmon
|
||||
spec:
|
||||
replicas: 1
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: alertmanager-posmon
|
||||
namespace: posmon
|
||||
spec:
|
||||
type: NodePort
|
||||
ports:
|
||||
- name: web
|
||||
nodePort: 30903
|
||||
port: 9093
|
||||
protocol: TCP
|
||||
targetPort: web
|
||||
selector:
|
||||
alertmanager: posmon
|
|
@ -0,0 +1,14 @@
|
|||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
prometheus: posmon
|
||||
role: alert-rules
|
||||
name: posmon-rules
|
||||
namespace: posmon
|
||||
spec:
|
||||
groups:
|
||||
- name: ./posmon.rules
|
||||
rules:
|
||||
- alert: TargetDown
|
||||
expr: probe_success < 1
|
|
@ -2,7 +2,7 @@ apiVersion: apps/v1
|
|||
kind: Deployment
|
||||
metadata:
|
||||
name: blackbox-exporter
|
||||
namespace: mon
|
||||
namespace: posmon
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
|
@ -23,7 +23,9 @@ apiVersion: v1
|
|||
kind: Service
|
||||
metadata:
|
||||
name: blackbox-exporter
|
||||
namespace: mon
|
||||
namespace: posmon
|
||||
labels:
|
||||
app: blackbox-exporter
|
||||
spec:
|
||||
ports:
|
||||
- name: http
|
|
@ -2,7 +2,7 @@ apiVersion: batch/v1beta1
|
|||
kind: CronJob
|
||||
metadata:
|
||||
name: healthchecks-io
|
||||
namespace: hc
|
||||
namespace: posmon
|
||||
spec:
|
||||
schedule: "*/1 * * * *"
|
||||
concurrencyPolicy: Forbid
|
|
@ -0,0 +1,37 @@
|
|||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: prometheus
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- nodes
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- configmaps
|
||||
verbs: ["get"]
|
||||
- nonResourceURLs: ["/metrics"]
|
||||
verbs: ["get"]
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: posmon
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: prometheus
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: prometheus
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus
|
||||
namespace: posmon
|
|
@ -0,0 +1,42 @@
|
|||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: Prometheus
|
||||
metadata:
|
||||
name: posmon
|
||||
namespace: posmon
|
||||
spec:
|
||||
serviceAccountName: prometheus
|
||||
serviceMonitorSelector:
|
||||
matchLabels:
|
||||
app: pos
|
||||
resources:
|
||||
requests:
|
||||
memory: 400Mi
|
||||
enableAdminAPI: false
|
||||
additionalScrapeConfigs:
|
||||
name: additional-scrape-configs
|
||||
key: pos-blackbox-exporter-scrape.yaml
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- namespace: posmon
|
||||
name: alertmanager-posmon
|
||||
port: web
|
||||
ruleSelector:
|
||||
matchLabels:
|
||||
role: alert-rules
|
||||
prometheus: posmon
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: prometheus-posmon
|
||||
namespace: posmon
|
||||
spec:
|
||||
type: NodePort
|
||||
ports:
|
||||
- name: web
|
||||
nodePort: 30909
|
||||
port: 9090
|
||||
protocol: TCP
|
||||
targetPort: web
|
||||
selector:
|
||||
prometheus: posmon
|
|
@ -0,0 +1,13 @@
|
|||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: blackboxmon
|
||||
namespace: posmon
|
||||
labels:
|
||||
app: pos
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: blackbox-exporter
|
||||
endpoints:
|
||||
- port: http
|
Reference in New Issue