replace custom monitoring with helm chart
This commit is contained in:
parent
ad580edfa8
commit
8996c7a963
|
@ -9,15 +9,41 @@ spec:
|
||||||
destination:
|
destination:
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
server: https://kubernetes.default.svc
|
server: https://kubernetes.default.svc
|
||||||
project: system
|
|
||||||
source:
|
source:
|
||||||
path: monitoring/manifests
|
chart: kube-prometheus-stack
|
||||||
repoURL: https://git.tbrnt.ch/tobru/gitops-tbrnt.git
|
repoURL: https://prometheus-community.github.io/helm-charts
|
||||||
targetRevision: HEAD
|
targetRevision: 18.0.1
|
||||||
directory:
|
helm:
|
||||||
recurse: true
|
values: |
|
||||||
|
kubeApiServer:
|
||||||
|
enabled: true
|
||||||
|
kubeControllerManager:
|
||||||
|
enabled: true
|
||||||
|
endpoints:
|
||||||
|
- 185.95.218.11
|
||||||
|
kubeScheduler:
|
||||||
|
enabled: true
|
||||||
|
endpoints:
|
||||||
|
- 185.95.218.11
|
||||||
|
kubeProxy:
|
||||||
|
enabled: true
|
||||||
|
endpoints:
|
||||||
|
- 185.95.218.11
|
||||||
|
kubeEtcd:
|
||||||
|
enabled: true
|
||||||
|
endpoints:
|
||||||
|
- 185.95.218.11
|
||||||
|
service:
|
||||||
|
enabled: true
|
||||||
|
port: 2381
|
||||||
|
targetPort: 2381
|
||||||
|
project: system
|
||||||
|
syncPolicy:
|
||||||
|
syncOptions:
|
||||||
|
- CreateNamespace=true
|
||||||
ignoreDifferences:
|
ignoreDifferences:
|
||||||
- group: apiextensions.k8s.io
|
- group: apiextensions.k8s.io
|
||||||
kind: CustomResourceDefinition
|
kind: CustomResourceDefinition
|
||||||
jsonPointers:
|
jsonPointers:
|
||||||
- /status
|
- /status
|
||||||
|
|
||||||
|
|
|
@ -1,12 +0,0 @@
|
||||||
build:
|
|
||||||
docker run --rm -v $(shell pwd):$(shell pwd) --workdir $(shell pwd) quay.io/coreos/jsonnet-ci ./build.sh monitoring.jsonnet
|
|
||||||
sudo chown -R tobru. manifests/
|
|
||||||
kubeseal --controller-namespace sealed-secrets -o yaml -n monitoring < ../../gitops-tbrnt-private/monitoring/alertmanager.yaml > manifests/alertmanager-tbrnt-config-secret.yaml
|
|
||||||
cp *.yaml manifests/
|
|
||||||
.PHONY: build
|
|
||||||
|
|
||||||
update:
|
|
||||||
docker run --rm -v $(shell pwd):$(shell pwd) --workdir $(shell pwd) quay.io/coreos/jsonnet-ci jb update
|
|
||||||
sudo chown -R tobru. vendor/
|
|
||||||
make build
|
|
||||||
.PHONY: update
|
|
|
@ -1,17 +0,0 @@
|
||||||
# Cluster Monitoring
|
|
||||||
|
|
||||||
Source: [kube-prometheus](https://github.com/coreos/kube-prometheus).
|
|
||||||
|
|
||||||
## Build
|
|
||||||
|
|
||||||
```
|
|
||||||
docker run --rm -v $(pwd):$(pwd) --workdir $(pwd) quay.io/coreos/jsonnet-ci ./build.sh monitoring.jsonnet
|
|
||||||
```
|
|
||||||
|
|
||||||
## Update libs
|
|
||||||
|
|
||||||
```
|
|
||||||
docker run --rm -v $(pwd):$(pwd) --workdir $(pwd) quay.io/coreos/jsonnet-ci jb update
|
|
||||||
```
|
|
||||||
|
|
||||||
Then build again, obviously
|
|
|
@ -1,15 +0,0 @@
|
||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
# This script uses arg $1 (name of *.jsonnet file to use) to generate the manifests/*.yaml files.
|
|
||||||
|
|
||||||
set -e
|
|
||||||
set -x
|
|
||||||
# only exit with zero if all commands of the pipeline exit successfully
|
|
||||||
set -o pipefail
|
|
||||||
|
|
||||||
# Make sure to start with a clean 'manifests' dir
|
|
||||||
rm -rf manifests
|
|
||||||
mkdir -p manifests/setup
|
|
||||||
|
|
||||||
jsonnet -J vendor -m manifests "${1-example.jsonnet}" | xargs -I{} sh -c 'cat {} | gojsontoyaml > {}.yaml; rm -f {}' -- {}
|
|
||||||
|
|
|
@ -1,31 +0,0 @@
|
||||||
apiVersion: batch/v1beta1
|
|
||||||
kind: CronJob
|
|
||||||
metadata:
|
|
||||||
name: healthchecks-io
|
|
||||||
namespace: monitoring
|
|
||||||
spec:
|
|
||||||
schedule: "*/1 * * * *"
|
|
||||||
concurrencyPolicy: Forbid
|
|
||||||
successfulJobsHistoryLimit: 1
|
|
||||||
failedJobsHistoryLimit: 1
|
|
||||||
startingDeadlineSeconds: 200
|
|
||||||
jobTemplate:
|
|
||||||
spec:
|
|
||||||
template:
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: pinghc
|
|
||||||
env:
|
|
||||||
- name: HCURL
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: healthchecks-io
|
|
||||||
key: HCURL
|
|
||||||
image: busybox
|
|
||||||
args:
|
|
||||||
- /bin/sh
|
|
||||||
- -c
|
|
||||||
- "date && echo $HCURL && /bin/wget -q -O - --no-check-certificate $HCURL"
|
|
||||||
restartPolicy: OnFailure
|
|
||||||
|
|
||||||
|
|
|
@ -1,17 +0,0 @@
|
||||||
apiVersion: bitnami.com/v1alpha1
|
|
||||||
kind: SealedSecret
|
|
||||||
metadata:
|
|
||||||
creationTimestamp: null
|
|
||||||
name: healthchecks-io
|
|
||||||
namespace: monitoring
|
|
||||||
spec:
|
|
||||||
encryptedData:
|
|
||||||
HCURL: AgBEpwET1Qa1hQqAmwrNGBv4sL0ml8pGYPwgq9Aps3tYhBVqsXjV7U5RQa/txldg1umw2Zqx8MfvZTN2kmFk6bJTROCWqTxmxd4rHgnJYqRR0+Opn/BtDhVx4WTnehyM/il9ymddhMD+WRQDr/Wfxq/0UQdsy+IEYyVMQuOKEihZabxmXRyNeAl5ZBeQ0W1T29biJPx3rifS37RbGlJtCIYuNPh82d0KAMu1dszDnkln8k5CBv6mPD8BVHg+Z/y1v1jFhTIE3YOlGzCIjb8RrJj6MVm7zlauj8zrl30JvF2OAWDGGZDOL3b0G3IKd0Qp/eagT33Sx7vbppY/l1Vci6UQcVpde3u2+ATMbysRej04Mvcodq5OgkBFqbgCzx0UFTIq0wER/GuCoYbt+k8b3TouK5ChQet8EP0W/c7rLHcMY3c0UR00N7m5UeKZAzAkXSGV+u3M9K6PMp8pl0VuDo+IVgEIY7ku9rtzL7SPIfXS4u5w7fte13fOtKB/2sa11dNqAbHmidF+IO6ycjm8SZibC7NKyCxgIKWPfsFXhNUT2Nx7eBRrzR1QlqThIGRsDpX1RVplTwe/OLsBz0K99AyGDUkSBJdOZLaRT/b3T0nS8DE5x/e8MvFsbbDdGE2U/YhVrbfn072u/X979/RIm0oCjipvByZXhFmobRj9SP9RcK2UfjBSY7xyKnd2rjj1mnIs2S0CmwGFdJqoywHckJJOu3YP2oN2Q1U7+Fe4yciupAshgdszY2okHMtd4aDDJJKeKKFHpjpsuA==
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
creationTimestamp: null
|
|
||||||
name: healthchecks-io
|
|
||||||
namespace: monitoring
|
|
||||||
type: Opaque
|
|
||||||
status: {}
|
|
||||||
|
|
|
@ -1,26 +0,0 @@
|
||||||
{
|
|
||||||
"version": 1,
|
|
||||||
"dependencies": [
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"git": {
|
|
||||||
"remote": "https://github.com/prometheus-operator/kube-prometheus",
|
|
||||||
"subdir": "jsonnet/kube-prometheus"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"version": "main",
|
|
||||||
"name": "kube-prometheus"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"git": {
|
|
||||||
"remote": "https://github.com/tobru/kube-prometheus-pushgateway",
|
|
||||||
"subdir": "prometheus-pushgateway"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"version": "master",
|
|
||||||
"name": "prometheus-pushgateway"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"legacyImports": true
|
|
||||||
}
|
|
|
@ -1,181 +0,0 @@
|
||||||
{
|
|
||||||
"version": 1,
|
|
||||||
"dependencies": [
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"git": {
|
|
||||||
"remote": "https://github.com/brancz/kubernetes-grafana",
|
|
||||||
"subdir": "grafana"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"version": "8ea4e7bc04b1bf5e9bd99918ca28c6271b42be0e",
|
|
||||||
"sum": "muenICtKXABk6MZZHCZD2wCbmtiE96GwWRMGa1Rg+wA="
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"git": {
|
|
||||||
"remote": "https://github.com/etcd-io/etcd",
|
|
||||||
"subdir": "contrib/mixin"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"version": "562d645ac923388ff5b8d270b0536764d34b0e0f",
|
|
||||||
"sum": "W/Azptf1PoqjyMwJON96UY69MFugDA4IAYiKURscryc="
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"git": {
|
|
||||||
"remote": "https://github.com/grafana/grafonnet-lib",
|
|
||||||
"subdir": "grafonnet"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"version": "55cf4ee53ced2b6d3ce96ecce9fb813b4465be98",
|
|
||||||
"sum": "4/sUV0Kk+o8I+wlYxL9R6EPhL/NiLfYHk+NXlU64RUk="
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"git": {
|
|
||||||
"remote": "https://github.com/grafana/jsonnet-libs",
|
|
||||||
"subdir": "grafana-builder"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"version": "dbf1211d003d20c7adcdee942c477e648507a398",
|
|
||||||
"sum": "GRf2GvwEU4jhXV+JOonXSZ4wdDv8mnHBPCQ6TUVd+g8="
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"git": {
|
|
||||||
"remote": "https://github.com/ksonnet/ksonnet-lib",
|
|
||||||
"subdir": ""
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"version": "0d2f82676817bbf9e4acf6495b2090205f323b9f",
|
|
||||||
"sum": "h28BXZ7+vczxYJ2sCt8JuR9+yznRtU/iA6DCpQUrtEg=",
|
|
||||||
"name": "ksonnet"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"git": {
|
|
||||||
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
|
|
||||||
"subdir": ""
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"version": "c67c0f19e869f1da34d79b6507c1fa37c23a6e4e",
|
|
||||||
"sum": "F+RxcI26zeoeI81uot39Jv6IpQ6BOz+xlSHlElJYsz8="
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"git": {
|
|
||||||
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
|
|
||||||
"subdir": "lib/promgrafonnet"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"version": "39a9cda705b5201c35105bd1f24c83923fa839ef",
|
|
||||||
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"git": {
|
|
||||||
"remote": "https://github.com/kubernetes/kube-state-metrics",
|
|
||||||
"subdir": "jsonnet/kube-state-metrics"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"version": "b1889aa1561ee269f628e2b9659155e7714dbbf0",
|
|
||||||
"sum": "S5qI+PJUdNeYOv76jH5nxwYS9N6U7CRxvyuB1wI4cTE="
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"git": {
|
|
||||||
"remote": "https://github.com/kubernetes/kube-state-metrics",
|
|
||||||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"version": "b1889aa1561ee269f628e2b9659155e7714dbbf0",
|
|
||||||
"sum": "Yf8mNAHrV1YWzrdV8Ry5dJ8YblepTGw3C0Zp10XIYLo="
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"git": {
|
|
||||||
"remote": "https://github.com/prometheus-operator/kube-prometheus",
|
|
||||||
"subdir": "jsonnet/kube-prometheus"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"version": "5b2740d517095a6ae9ad51bcb9c53e5ef28c62a0",
|
|
||||||
"sum": "+6VkkR44AC3Qnwfr9cWYCKs+uRi5JaIOda/3X1JEzAg="
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"git": {
|
|
||||||
"remote": "https://github.com/prometheus-operator/prometheus-operator",
|
|
||||||
"subdir": "jsonnet/mixin"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"version": "b7ca32169844f0b5143f3e5e318fc05fa025df18",
|
|
||||||
"sum": "6reUygVmQrLEWQzTKcH8ceDbvM+2ztK3z2VBR2K2l+U=",
|
|
||||||
"name": "prometheus-operator-mixin"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"git": {
|
|
||||||
"remote": "https://github.com/prometheus-operator/prometheus-operator",
|
|
||||||
"subdir": "jsonnet/prometheus-operator"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"version": "b7ca32169844f0b5143f3e5e318fc05fa025df18",
|
|
||||||
"sum": "MRwyChXdKG3anL2OWpbUu3qWc97w9J6YsjUWjLFQyB0="
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"git": {
|
|
||||||
"remote": "https://github.com/prometheus/alertmanager",
|
|
||||||
"subdir": "doc/alertmanager-mixin"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"version": "99f64e944b1043c790784cf5373c8fb349816fc4",
|
|
||||||
"sum": "V8jcZQ1Qrlm7AQ6wjbuQQsacPb0NvrcZovKyplmzW5w=",
|
|
||||||
"name": "alertmanager"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"git": {
|
|
||||||
"remote": "https://github.com/prometheus/node_exporter",
|
|
||||||
"subdir": "docs/node-mixin"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"version": "b597c1244d7bef49e6f3359c87a56dd7707f6719",
|
|
||||||
"sum": "cZTNXQMUCLB5FGYpMn845dcqGdkcYt58qCqOFIV/BoQ="
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"git": {
|
|
||||||
"remote": "https://github.com/prometheus/prometheus",
|
|
||||||
"subdir": "documentation/prometheus-mixin"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"version": "3cafc58827d1ebd1a67749f88be4218f0bab3d8d",
|
|
||||||
"sum": "VK0c3sQ3ksiM6JQsAVfWmL5NbzGv9llMfXFNXfFdJ+A=",
|
|
||||||
"name": "prometheus"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"git": {
|
|
||||||
"remote": "https://github.com/thanos-io/thanos",
|
|
||||||
"subdir": "mixin"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"version": "ba6c5c4726ff52807c7383c68f2159b1af7980bb",
|
|
||||||
"sum": "XP3uq7xcfKHsnWsz1v992csZhhZR3jQma6hFOfSViTs=",
|
|
||||||
"name": "thanos-mixin"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"git": {
|
|
||||||
"remote": "https://github.com/tobru/kube-prometheus-pushgateway",
|
|
||||||
"subdir": "prometheus-pushgateway"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"version": "7bb93ca3ddf3b83f1fdfe95d9bad415b57d0fe4b",
|
|
||||||
"sum": "6nOJeHJExjYyTSovZvU6xbGjWS88oUfGnF1DAo+Q6tg="
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"legacyImports": false
|
|
||||||
}
|
|
|
@ -1,87 +0,0 @@
|
||||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|
||||||
local endpoints = k.core.v1.endpoints;
|
|
||||||
local endpointSubset = endpoints.subsetsType;
|
|
||||||
local endpointPort = endpointSubset.portsType;
|
|
||||||
local service = k.core.v1.service;
|
|
||||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
|
||||||
local masterIP = '185.95.218.11';
|
|
||||||
|
|
||||||
{
|
|
||||||
prometheus+:: {
|
|
||||||
kubeSchedulerPrometheusDiscoveryService:
|
|
||||||
local p = servicePort.newNamed('http-metrics', 10251, 10251);
|
|
||||||
service.new('kube-scheduler', { 'k8s-app': 'kube-scheduler' }, p) +
|
|
||||||
service.mixin.metadata.withNamespace('kube-system') +
|
|
||||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
|
|
||||||
service.mixin.spec.withClusterIp('None') +
|
|
||||||
service.mixin.spec.withSelector({}),
|
|
||||||
kubeSchedulerPrometheusDiscoveryEndpoints:
|
|
||||||
local port = endpointPort.new() +
|
|
||||||
endpointPort.withName('http-metrics') +
|
|
||||||
endpointPort.withPort(10251) +
|
|
||||||
endpointPort.withProtocol('TCP');
|
|
||||||
local subset = endpointSubset.new() +
|
|
||||||
endpointSubset.withAddresses([
|
|
||||||
{ ip: masterIP },
|
|
||||||
]) +
|
|
||||||
endpointSubset.withPorts(port);
|
|
||||||
|
|
||||||
endpoints.new() +
|
|
||||||
endpoints.mixin.metadata.withName('kube-scheduler') +
|
|
||||||
endpoints.mixin.metadata.withNamespace('kube-system') +
|
|
||||||
endpoints.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
|
|
||||||
endpoints.withSubsets(subset),
|
|
||||||
kubeControllerManagerPrometheusDiscoveryService:
|
|
||||||
local p = servicePort.newNamed('http-metrics', 10252, 10252);
|
|
||||||
service.new('kube-controller-manager', { 'k8s-app': 'kube-controller-manager' }, p) +
|
|
||||||
service.mixin.metadata.withNamespace('kube-system') +
|
|
||||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
|
|
||||||
service.mixin.spec.withClusterIp('None') +
|
|
||||||
service.mixin.spec.withSelector({}),
|
|
||||||
kubeControllerManagerPrometheusDiscoveryEndpoints:
|
|
||||||
local port = endpointPort.new() +
|
|
||||||
endpointPort.withName('http-metrics') +
|
|
||||||
endpointPort.withPort(10252) +
|
|
||||||
endpointPort.withProtocol('TCP');
|
|
||||||
local subset = endpointSubset.new() +
|
|
||||||
endpointSubset.withAddresses([
|
|
||||||
{ ip: masterIP },
|
|
||||||
]) +
|
|
||||||
endpointSubset.withPorts(port);
|
|
||||||
|
|
||||||
endpoints.new() +
|
|
||||||
endpoints.mixin.metadata.withName('kube-controller-manager') +
|
|
||||||
endpoints.mixin.metadata.withNamespace('kube-system') +
|
|
||||||
endpoints.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
|
|
||||||
endpoints.withSubsets(subset),
|
|
||||||
serviceMonitorKubeScheduler+:
|
|
||||||
{
|
|
||||||
spec+: {
|
|
||||||
endpoints: [
|
|
||||||
{
|
|
||||||
port: 'http-metrics',
|
|
||||||
interval: '30s',
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
serviceMonitorKubeControllerManager+:
|
|
||||||
{
|
|
||||||
spec+: {
|
|
||||||
endpoints: [
|
|
||||||
{
|
|
||||||
port: 'http-metrics',
|
|
||||||
interval: '30s',
|
|
||||||
metricRelabelings: (import 'kube-prometheus/addons/dropping-deprecated-metrics-relabelings.libsonnet') + [
|
|
||||||
{
|
|
||||||
sourceLabels: ['__name__'],
|
|
||||||
regex: 'etcd_(debugging|disk|request|server).*',
|
|
||||||
action: 'drop',
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
|
@ -1,40 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: PrometheusRule
|
|
||||||
metadata:
|
|
||||||
name: k8up
|
|
||||||
labels:
|
|
||||||
prometheus: k8s
|
|
||||||
role: alert-rules
|
|
||||||
spec:
|
|
||||||
groups:
|
|
||||||
- name: k8up.rules
|
|
||||||
rules:
|
|
||||||
- alert: baas_last_errors
|
|
||||||
expr: baas_backup_restic_last_errors > 0
|
|
||||||
for: 1m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: Amount of errors of last restic backup
|
|
||||||
description: This alert is fired when error number is > 0
|
|
||||||
- alert: K8upBackupFailed
|
|
||||||
expr: rate(k8up_jobs_failed_counter[1d]) > 0
|
|
||||||
for: 1m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: "Job in {{ $labels.namespace }} of type {{ $labels.jobType }} failed"
|
|
||||||
- alert: K8upBackupNotRunning
|
|
||||||
expr: sum(rate(k8up_jobs_total[25h])) == 0 and on(namespace) k8up_schedules_gauge > 0
|
|
||||||
for: 1m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: "No K8up jobs were run in {{ $labels.namespace }} within the last 24 hours. Check the operator, there might be a deadlock"
|
|
||||||
- alert: K8upJobStuck
|
|
||||||
expr: k8up_jobs_queued_gauge{jobType="backup"} > 0 and on(namespace) k8up_schedules_gauge > 0
|
|
||||||
for: 24h
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: "K8up jobs are stuck in {{ $labels.namespace }} for the last 24 hours."
|
|
|
@ -1,36 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: Alertmanager
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
alertmanager: main
|
|
||||||
app.kubernetes.io/component: alert-router
|
|
||||||
app.kubernetes.io/name: alertmanager
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.21.0
|
|
||||||
name: main
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
configSecret: alertmanager-tbrnt-config
|
|
||||||
image: quay.io/prometheus/alertmanager:v0.21.0
|
|
||||||
nodeSelector:
|
|
||||||
kubernetes.io/os: linux
|
|
||||||
podMetadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: alert-router
|
|
||||||
app.kubernetes.io/name: alertmanager
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.21.0
|
|
||||||
replicas: 3
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 100m
|
|
||||||
memory: 100Mi
|
|
||||||
requests:
|
|
||||||
cpu: 4m
|
|
||||||
memory: 100Mi
|
|
||||||
securityContext:
|
|
||||||
fsGroup: 2000
|
|
||||||
runAsNonRoot: true
|
|
||||||
runAsUser: 1000
|
|
||||||
serviceAccountName: alertmanager-main
|
|
||||||
version: 0.21.0
|
|
|
@ -1,18 +0,0 @@
|
||||||
apiVersion: policy/v1beta1
|
|
||||||
kind: PodDisruptionBudget
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: alert-router
|
|
||||||
app.kubernetes.io/name: alertmanager
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.21.0
|
|
||||||
name: alertmanager-main
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
maxUnavailable: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
alertmanager: main
|
|
||||||
app.kubernetes.io/component: alert-router
|
|
||||||
app.kubernetes.io/name: alertmanager
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
|
@ -1,156 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: PrometheusRule
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: alert-router
|
|
||||||
app.kubernetes.io/name: alertmanager
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.21.0
|
|
||||||
prometheus: k8s
|
|
||||||
role: alert-rules
|
|
||||||
name: alertmanager-main-rules
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
groups:
|
|
||||||
- name: alertmanager.rules
|
|
||||||
rules:
|
|
||||||
- alert: AlertmanagerFailedReload
|
|
||||||
annotations:
|
|
||||||
description: Configuration has failed to load for {{ $labels.namespace }}/{{
|
|
||||||
$labels.pod}}.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerfailedreload
|
|
||||||
summary: Reloading an Alertmanager configuration has failed.
|
|
||||||
expr: |
|
|
||||||
# Without max_over_time, failed scrapes could create false negatives, see
|
|
||||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
|
||||||
max_over_time(alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="default"}[5m]) == 0
|
|
||||||
for: 10m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
- alert: AlertmanagerMembersInconsistent
|
|
||||||
annotations:
|
|
||||||
description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has only
|
|
||||||
found {{ $value }} members of the {{$labels.job}} cluster.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagermembersinconsistent
|
|
||||||
summary: A member of an Alertmanager cluster has not found all other cluster
|
|
||||||
members.
|
|
||||||
expr: |
|
|
||||||
# Without max_over_time, failed scrapes could create false negatives, see
|
|
||||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
|
||||||
max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="default"}[5m])
|
|
||||||
< on (namespace,service) group_left
|
|
||||||
count by (namespace,service) (max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="default"}[5m]))
|
|
||||||
for: 10m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
- alert: AlertmanagerFailedToSendAlerts
|
|
||||||
annotations:
|
|
||||||
description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed
|
|
||||||
to send {{ $value | humanizePercentage }} of notifications to {{ $labels.integration
|
|
||||||
}}.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerfailedtosendalerts
|
|
||||||
summary: An Alertmanager instance failed to send notifications.
|
|
||||||
expr: |
|
|
||||||
(
|
|
||||||
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="default"}[5m])
|
|
||||||
/
|
|
||||||
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="default"}[5m])
|
|
||||||
)
|
|
||||||
> 0.01
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: AlertmanagerClusterFailedToSendAlerts
|
|
||||||
annotations:
|
|
||||||
description: The minimum notification failure rate to {{ $labels.integration
|
|
||||||
}} sent from any instance in the {{$labels.job}} cluster is {{ $value |
|
|
||||||
humanizePercentage }}.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclusterfailedtosendalerts
|
|
||||||
summary: All Alertmanager instances in a cluster failed to send notifications
|
|
||||||
to a critical integration.
|
|
||||||
expr: |
|
|
||||||
min by (namespace,service, integration) (
|
|
||||||
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="default", integration=~`.*`}[5m])
|
|
||||||
/
|
|
||||||
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="default", integration=~`.*`}[5m])
|
|
||||||
)
|
|
||||||
> 0.01
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
- alert: AlertmanagerClusterFailedToSendAlerts
|
|
||||||
annotations:
|
|
||||||
description: The minimum notification failure rate to {{ $labels.integration
|
|
||||||
}} sent from any instance in the {{$labels.job}} cluster is {{ $value |
|
|
||||||
humanizePercentage }}.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclusterfailedtosendalerts
|
|
||||||
summary: All Alertmanager instances in a cluster failed to send notifications
|
|
||||||
to a non-critical integration.
|
|
||||||
expr: |
|
|
||||||
min by (namespace,service, integration) (
|
|
||||||
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="default", integration!~`.*`}[5m])
|
|
||||||
/
|
|
||||||
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="default", integration!~`.*`}[5m])
|
|
||||||
)
|
|
||||||
> 0.01
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: AlertmanagerConfigInconsistent
|
|
||||||
annotations:
|
|
||||||
description: Alertmanager instances within the {{$labels.job}} cluster have
|
|
||||||
different configurations.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerconfiginconsistent
|
|
||||||
summary: Alertmanager instances within the same cluster have different configurations.
|
|
||||||
expr: |
|
|
||||||
count by (namespace,service) (
|
|
||||||
count_values by (namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="default"})
|
|
||||||
)
|
|
||||||
!= 1
|
|
||||||
for: 20m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
- alert: AlertmanagerClusterDown
|
|
||||||
annotations:
|
|
||||||
description: '{{ $value | humanizePercentage }} of Alertmanager instances
|
|
||||||
within the {{$labels.job}} cluster have been up for less than half of the
|
|
||||||
last 5m.'
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclusterdown
|
|
||||||
summary: Half or more of the Alertmanager instances within the same cluster
|
|
||||||
are down.
|
|
||||||
expr: |
|
|
||||||
(
|
|
||||||
count by (namespace,service) (
|
|
||||||
avg_over_time(up{job="alertmanager-main",namespace="default"}[5m]) < 0.5
|
|
||||||
)
|
|
||||||
/
|
|
||||||
count by (namespace,service) (
|
|
||||||
up{job="alertmanager-main",namespace="default"}
|
|
||||||
)
|
|
||||||
)
|
|
||||||
>= 0.5
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
- alert: AlertmanagerClusterCrashlooping
|
|
||||||
annotations:
|
|
||||||
description: '{{ $value | humanizePercentage }} of Alertmanager instances
|
|
||||||
within the {{$labels.job}} cluster have restarted at least 5 times in the
|
|
||||||
last 10m.'
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclustercrashlooping
|
|
||||||
summary: Half or more of the Alertmanager instances within the same cluster
|
|
||||||
are crashlooping.
|
|
||||||
expr: |
|
|
||||||
(
|
|
||||||
count by (namespace,service) (
|
|
||||||
changes(process_start_time_seconds{job="alertmanager-main",namespace="default"}[10m]) > 4
|
|
||||||
)
|
|
||||||
/
|
|
||||||
count by (namespace,service) (
|
|
||||||
up{job="alertmanager-main",namespace="default"}
|
|
||||||
)
|
|
||||||
)
|
|
||||||
>= 0.5
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
|
@ -1,49 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: Secret
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
alertmanager: main
|
|
||||||
app.kubernetes.io/component: alert-router
|
|
||||||
app.kubernetes.io/name: alertmanager
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.21.0
|
|
||||||
name: alertmanager-main
|
|
||||||
namespace: default
|
|
||||||
stringData:
|
|
||||||
alertmanager.yaml: |-
|
|
||||||
"global":
|
|
||||||
"resolve_timeout": "5m"
|
|
||||||
"inhibit_rules":
|
|
||||||
- "equal":
|
|
||||||
- "namespace"
|
|
||||||
- "alertname"
|
|
||||||
"source_match":
|
|
||||||
"severity": "critical"
|
|
||||||
"target_match_re":
|
|
||||||
"severity": "warning|info"
|
|
||||||
- "equal":
|
|
||||||
- "namespace"
|
|
||||||
- "alertname"
|
|
||||||
"source_match":
|
|
||||||
"severity": "warning"
|
|
||||||
"target_match_re":
|
|
||||||
"severity": "info"
|
|
||||||
"receivers":
|
|
||||||
- "name": "Default"
|
|
||||||
- "name": "Watchdog"
|
|
||||||
- "name": "Critical"
|
|
||||||
"route":
|
|
||||||
"group_by":
|
|
||||||
- "namespace"
|
|
||||||
"group_interval": "5m"
|
|
||||||
"group_wait": "30s"
|
|
||||||
"receiver": "Default"
|
|
||||||
"repeat_interval": "12h"
|
|
||||||
"routes":
|
|
||||||
- "match":
|
|
||||||
"alertname": "Watchdog"
|
|
||||||
"receiver": "Watchdog"
|
|
||||||
- "match":
|
|
||||||
"severity": "critical"
|
|
||||||
"receiver": "Critical"
|
|
||||||
type: Opaque
|
|
|
@ -1,23 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
alertmanager: main
|
|
||||||
app.kubernetes.io/component: alert-router
|
|
||||||
app.kubernetes.io/name: alertmanager
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.21.0
|
|
||||||
name: alertmanager-main
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
ports:
|
|
||||||
- name: web
|
|
||||||
port: 9093
|
|
||||||
targetPort: web
|
|
||||||
selector:
|
|
||||||
alertmanager: main
|
|
||||||
app: alertmanager
|
|
||||||
app.kubernetes.io/component: alert-router
|
|
||||||
app.kubernetes.io/name: alertmanager
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
sessionAffinity: ClientIP
|
|
|
@ -1,11 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: ServiceAccount
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
alertmanager: main
|
|
||||||
app.kubernetes.io/component: alert-router
|
|
||||||
app.kubernetes.io/name: alertmanager
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.21.0
|
|
||||||
name: alertmanager-main
|
|
||||||
namespace: default
|
|
|
@ -1,20 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: ServiceMonitor
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: alert-router
|
|
||||||
app.kubernetes.io/name: alertmanager
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.21.0
|
|
||||||
name: alertmanager
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
endpoints:
|
|
||||||
- interval: 30s
|
|
||||||
port: web
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
alertmanager: main
|
|
||||||
app.kubernetes.io/component: alert-router
|
|
||||||
app.kubernetes.io/name: alertmanager
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
|
@ -1,17 +0,0 @@
|
||||||
apiVersion: bitnami.com/v1alpha1
|
|
||||||
kind: SealedSecret
|
|
||||||
metadata:
|
|
||||||
creationTimestamp: null
|
|
||||||
name: alertmanager-tbrnt-config
|
|
||||||
namespace: monitoring
|
|
||||||
spec:
|
|
||||||
encryptedData:
|
|
||||||
alertmanager.yaml: AgBj3KqLiF7EAnGK6c4+Thferv3Sur+fhlwE4wpXD1PBtwlJQsMCqjsLRRFNAESH9/8vhI9E9D8wLJiauNS7CGw5jd5KU1cvxo5EyGeFoVyAB4bHSy/pxptSFq+rn00E99/Tqkbdsgduwusfpi0I9F1+zNucyyamJsEzIcsyHMlBbACz+9KQV9SdbgVEmIeqabrAP9VQaQ+i69yurhPdV7VkZzr0WKcGg3x27+slmtjlJz5fwtv1qmbYt/MQnijF2tc6tJeq19Cm0O4zuQ09meW6DwAZ9SOIFU6LxrqJlKbuleaWmfIE3AQYA6Z+qXyBjT1ILW36RwGyg3YK7nm0MNDQxd6LN3zR0eifPqrPsm7O6LE+NAg4FkurV3lJlrBoU3lSSc+sQZZr00ct9Gp57EEvg9T2TaM1B/KHQNmIhpDGntD4+yTcvK3nU7+sxqG/c4Wk5xiUQyLYnigNy5qYCcsM+t9iCoGxP7uU8GrsvIkojTxzhdc6e5LduThKdGE9jI3R6nCP6kmsU6XyUzgKmxYJVVzhSrm9yxFVDPHriNaEM2hgEd3wStwmRjGjwAPUjQZfSJtmxY6+RQ/77TYGjiskDm6gAZuzkGjdptL2t5F+54y3uePaLHNspMgtZsTARCo3kAhgf61Gk2nvnEY/ws5qFjAnsUEXs86wAk2S+w401QKPKTcDr6e/rnve8IrXW0FPvzR3rzdWOcU8v0Z0sSFijIfXdx+A9WGCJuHNo65FbKSgWhlHBfvWB1qWBnDd/VVHIA2wR8gevAPJHSc0f1WdUDc4w2w8tc/qum1SZo2lWkMopvLaiVHU5dCGtG6+4qsC1DmFzIRGZN4AbdVd5k+OY15Fp+ysjuTpA/HuZ/N0kz/5BXzNbY3u7YKi3EV3Up+eZIi2jlG0XBXWdCouuxRW40qHuShivtbrBgey32kFo85dsrDqN7F2kBVnumOB7kvFOaCkL2AtsakVjUzGoh5eXCSHl0ZcqmW2UjInzZIirBChMW/G4yL/TwpVYbBLqWPfdVMFmq7I4srY2+hUP/5UBt/DKZi5zPlLR8H3q4i02zsNpqdhSa9o6ThhFtVX9/te/DMpyN1fJ1Hn2p3cDhoTsiTLPkvflVOx70flap0v2zzPoDm+yXhFllpWp/5avHy9pKf/RzpAodbNr/EydkC+KDKI88MhVUtxS27WbKFsq+vUkmHQj+KtGyRFjg2/CnmM8YbdRsMe8p39PVGLxj1RTnyYzlMltOTbJo3rhDzjmpzGVUpWokwTMGC1WgTenrS4IcCK61ri9bsBIL9n9sMLF1lT8NVKnQfluDTaHNzsQgJ1HTSwQOcAfugqlUrSeTLt3q6U4pSjjlF8P7wYpqzWc+bhOaHed9NxrGXFBC5Wh6+BULuCaCA6TtkLpUfABYHVUa4OS3huNsOeBhZ3aCCQXrc0jOOq2DQzxvdGu4YAQnvMHwJRVyKVcw0pOS5RjIqJW6IOn0MGHzAo7qNv6LUyJ9a7huT2W4ibrHFkMck1zKxbBekPQ9FxpufSXrEqEqNuB3j7Gi7lVDVbPySr1rr2KXLzOLsnZhpTpMq2RejglIAMF7WfIMfvHQ2mnjNuYNNQnXx8hPLm88GSxFYKHpUnAswgYuo4XX2drYMzzq3GWDMIHZ/kpLySU+eJGo6VGeFUV1DgaGksLXE3oCfrA1OCUyZ/qke3tzj8ixjwuprCmFPWsg==
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
creationTimestamp: null
|
|
||||||
name: alertmanager-tbrnt-config
|
|
||||||
namespace: monitoring
|
|
||||||
type: Opaque
|
|
||||||
status: {}
|
|
||||||
|
|
|
@ -1,13 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
data:
|
|
||||||
datasources.yaml: ewogICAgImFwaVZlcnNpb24iOiAxLAogICAgImRhdGFzb3VyY2VzIjogWwogICAgICAgIHsKICAgICAgICAgICAgImFjY2VzcyI6ICJwcm94eSIsCiAgICAgICAgICAgICJlZGl0YWJsZSI6IGZhbHNlLAogICAgICAgICAgICAibmFtZSI6ICJwcm9tZXRoZXVzIiwKICAgICAgICAgICAgIm9yZ0lkIjogMSwKICAgICAgICAgICAgInR5cGUiOiAicHJvbWV0aGV1cyIsCiAgICAgICAgICAgICJ1cmwiOiAiaHR0cDovL3Byb21ldGhldXMtazhzLmRlZmF1bHQuc3ZjOjkwOTAiLAogICAgICAgICAgICAidmVyc2lvbiI6IDEKICAgICAgICB9CiAgICBdCn0=
|
|
||||||
kind: Secret
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: grafana
|
|
||||||
app.kubernetes.io/name: grafana
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 7.5.4
|
|
||||||
name: grafana-datasources
|
|
||||||
namespace: default
|
|
||||||
type: Opaque
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,26 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
data:
|
|
||||||
dashboards.yaml: |-
|
|
||||||
{
|
|
||||||
"apiVersion": 1,
|
|
||||||
"providers": [
|
|
||||||
{
|
|
||||||
"folder": "Default",
|
|
||||||
"name": "0",
|
|
||||||
"options": {
|
|
||||||
"path": "/grafana-dashboard-definitions/0"
|
|
||||||
},
|
|
||||||
"orgId": 1,
|
|
||||||
"type": "file"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: grafana
|
|
||||||
app.kubernetes.io/name: grafana
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 7.5.4
|
|
||||||
name: grafana-dashboards
|
|
||||||
namespace: default
|
|
|
@ -1,209 +0,0 @@
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: grafana
|
|
||||||
app.kubernetes.io/name: grafana
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 7.5.4
|
|
||||||
name: grafana
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/component: grafana
|
|
||||||
app.kubernetes.io/name: grafana
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
annotations:
|
|
||||||
checksum/grafana-datasources: b822d7b1a1070f322d0773c043985b4a
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: grafana
|
|
||||||
app.kubernetes.io/name: grafana
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 7.5.4
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- env: []
|
|
||||||
image: grafana/grafana:7.5.4
|
|
||||||
name: grafana
|
|
||||||
ports:
|
|
||||||
- containerPort: 3000
|
|
||||||
name: http
|
|
||||||
readinessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /api/health
|
|
||||||
port: http
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 200m
|
|
||||||
memory: 200Mi
|
|
||||||
requests:
|
|
||||||
cpu: 100m
|
|
||||||
memory: 100Mi
|
|
||||||
volumeMounts:
|
|
||||||
- mountPath: /var/lib/grafana
|
|
||||||
name: grafana-storage
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /etc/grafana/provisioning/datasources
|
|
||||||
name: grafana-datasources
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /etc/grafana/provisioning/dashboards
|
|
||||||
name: grafana-dashboards
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/apiserver
|
|
||||||
name: grafana-dashboard-apiserver
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/cluster-total
|
|
||||||
name: grafana-dashboard-cluster-total
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/controller-manager
|
|
||||||
name: grafana-dashboard-controller-manager
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-cluster
|
|
||||||
name: grafana-dashboard-k8s-resources-cluster
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-namespace
|
|
||||||
name: grafana-dashboard-k8s-resources-namespace
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-node
|
|
||||||
name: grafana-dashboard-k8s-resources-node
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-pod
|
|
||||||
name: grafana-dashboard-k8s-resources-pod
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-workload
|
|
||||||
name: grafana-dashboard-k8s-resources-workload
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-workloads-namespace
|
|
||||||
name: grafana-dashboard-k8s-resources-workloads-namespace
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/kubelet
|
|
||||||
name: grafana-dashboard-kubelet
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/namespace-by-pod
|
|
||||||
name: grafana-dashboard-namespace-by-pod
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/namespace-by-workload
|
|
||||||
name: grafana-dashboard-namespace-by-workload
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/node-cluster-rsrc-use
|
|
||||||
name: grafana-dashboard-node-cluster-rsrc-use
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/node-rsrc-use
|
|
||||||
name: grafana-dashboard-node-rsrc-use
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/nodes
|
|
||||||
name: grafana-dashboard-nodes
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/persistentvolumesusage
|
|
||||||
name: grafana-dashboard-persistentvolumesusage
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/pod-total
|
|
||||||
name: grafana-dashboard-pod-total
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/prometheus-remote-write
|
|
||||||
name: grafana-dashboard-prometheus-remote-write
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/prometheus
|
|
||||||
name: grafana-dashboard-prometheus
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/proxy
|
|
||||||
name: grafana-dashboard-proxy
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/scheduler
|
|
||||||
name: grafana-dashboard-scheduler
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/statefulset
|
|
||||||
name: grafana-dashboard-statefulset
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /grafana-dashboard-definitions/0/workload-total
|
|
||||||
name: grafana-dashboard-workload-total
|
|
||||||
readOnly: false
|
|
||||||
nodeSelector:
|
|
||||||
beta.kubernetes.io/os: linux
|
|
||||||
securityContext:
|
|
||||||
fsGroup: 65534
|
|
||||||
runAsNonRoot: true
|
|
||||||
runAsUser: 65534
|
|
||||||
serviceAccountName: grafana
|
|
||||||
volumes:
|
|
||||||
- emptyDir: {}
|
|
||||||
name: grafana-storage
|
|
||||||
- name: grafana-datasources
|
|
||||||
secret:
|
|
||||||
secretName: grafana-datasources
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboards
|
|
||||||
name: grafana-dashboards
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-apiserver
|
|
||||||
name: grafana-dashboard-apiserver
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-cluster-total
|
|
||||||
name: grafana-dashboard-cluster-total
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-controller-manager
|
|
||||||
name: grafana-dashboard-controller-manager
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-k8s-resources-cluster
|
|
||||||
name: grafana-dashboard-k8s-resources-cluster
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-k8s-resources-namespace
|
|
||||||
name: grafana-dashboard-k8s-resources-namespace
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-k8s-resources-node
|
|
||||||
name: grafana-dashboard-k8s-resources-node
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-k8s-resources-pod
|
|
||||||
name: grafana-dashboard-k8s-resources-pod
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-k8s-resources-workload
|
|
||||||
name: grafana-dashboard-k8s-resources-workload
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-k8s-resources-workloads-namespace
|
|
||||||
name: grafana-dashboard-k8s-resources-workloads-namespace
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-kubelet
|
|
||||||
name: grafana-dashboard-kubelet
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-namespace-by-pod
|
|
||||||
name: grafana-dashboard-namespace-by-pod
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-namespace-by-workload
|
|
||||||
name: grafana-dashboard-namespace-by-workload
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-node-cluster-rsrc-use
|
|
||||||
name: grafana-dashboard-node-cluster-rsrc-use
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-node-rsrc-use
|
|
||||||
name: grafana-dashboard-node-rsrc-use
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-nodes
|
|
||||||
name: grafana-dashboard-nodes
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-persistentvolumesusage
|
|
||||||
name: grafana-dashboard-persistentvolumesusage
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-pod-total
|
|
||||||
name: grafana-dashboard-pod-total
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-prometheus-remote-write
|
|
||||||
name: grafana-dashboard-prometheus-remote-write
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-prometheus
|
|
||||||
name: grafana-dashboard-prometheus
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-proxy
|
|
||||||
name: grafana-dashboard-proxy
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-scheduler
|
|
||||||
name: grafana-dashboard-scheduler
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-statefulset
|
|
||||||
name: grafana-dashboard-statefulset
|
|
||||||
- configMap:
|
|
||||||
name: grafana-dashboard-workload-total
|
|
||||||
name: grafana-dashboard-workload-total
|
|
|
@ -1,19 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: grafana
|
|
||||||
app.kubernetes.io/name: grafana
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 7.5.4
|
|
||||||
name: grafana
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
ports:
|
|
||||||
- name: http
|
|
||||||
port: 3000
|
|
||||||
targetPort: http
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/component: grafana
|
|
||||||
app.kubernetes.io/name: grafana
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
|
@ -1,5 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: ServiceAccount
|
|
||||||
metadata:
|
|
||||||
name: grafana
|
|
||||||
namespace: default
|
|
|
@ -1,17 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: ServiceMonitor
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: grafana
|
|
||||||
app.kubernetes.io/name: grafana
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 7.5.4
|
|
||||||
name: grafana
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
endpoints:
|
|
||||||
- interval: 15s
|
|
||||||
port: http
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/name: grafana
|
|
|
@ -1,31 +0,0 @@
|
||||||
apiVersion: batch/v1beta1
|
|
||||||
kind: CronJob
|
|
||||||
metadata:
|
|
||||||
name: healthchecks-io
|
|
||||||
namespace: monitoring
|
|
||||||
spec:
|
|
||||||
schedule: "*/1 * * * *"
|
|
||||||
concurrencyPolicy: Forbid
|
|
||||||
successfulJobsHistoryLimit: 1
|
|
||||||
failedJobsHistoryLimit: 1
|
|
||||||
startingDeadlineSeconds: 200
|
|
||||||
jobTemplate:
|
|
||||||
spec:
|
|
||||||
template:
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: pinghc
|
|
||||||
env:
|
|
||||||
- name: HCURL
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: healthchecks-io
|
|
||||||
key: HCURL
|
|
||||||
image: busybox
|
|
||||||
args:
|
|
||||||
- /bin/sh
|
|
||||||
- -c
|
|
||||||
- "date && echo $HCURL && /bin/wget -q -O - --no-check-certificate $HCURL"
|
|
||||||
restartPolicy: OnFailure
|
|
||||||
|
|
||||||
|
|
|
@ -1,17 +0,0 @@
|
||||||
apiVersion: bitnami.com/v1alpha1
|
|
||||||
kind: SealedSecret
|
|
||||||
metadata:
|
|
||||||
creationTimestamp: null
|
|
||||||
name: healthchecks-io
|
|
||||||
namespace: monitoring
|
|
||||||
spec:
|
|
||||||
encryptedData:
|
|
||||||
HCURL: AgBEpwET1Qa1hQqAmwrNGBv4sL0ml8pGYPwgq9Aps3tYhBVqsXjV7U5RQa/txldg1umw2Zqx8MfvZTN2kmFk6bJTROCWqTxmxd4rHgnJYqRR0+Opn/BtDhVx4WTnehyM/il9ymddhMD+WRQDr/Wfxq/0UQdsy+IEYyVMQuOKEihZabxmXRyNeAl5ZBeQ0W1T29biJPx3rifS37RbGlJtCIYuNPh82d0KAMu1dszDnkln8k5CBv6mPD8BVHg+Z/y1v1jFhTIE3YOlGzCIjb8RrJj6MVm7zlauj8zrl30JvF2OAWDGGZDOL3b0G3IKd0Qp/eagT33Sx7vbppY/l1Vci6UQcVpde3u2+ATMbysRej04Mvcodq5OgkBFqbgCzx0UFTIq0wER/GuCoYbt+k8b3TouK5ChQet8EP0W/c7rLHcMY3c0UR00N7m5UeKZAzAkXSGV+u3M9K6PMp8pl0VuDo+IVgEIY7ku9rtzL7SPIfXS4u5w7fte13fOtKB/2sa11dNqAbHmidF+IO6ycjm8SZibC7NKyCxgIKWPfsFXhNUT2Nx7eBRrzR1QlqThIGRsDpX1RVplTwe/OLsBz0K99AyGDUkSBJdOZLaRT/b3T0nS8DE5x/e8MvFsbbDdGE2U/YhVrbfn072u/X979/RIm0oCjipvByZXhFmobRj9SP9RcK2UfjBSY7xyKnd2rjj1mnIs2S0CmwGFdJqoywHckJJOu3YP2oN2Q1U7+Fe4yciupAshgdszY2okHMtd4aDDJJKeKKFHpjpsuA==
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
creationTimestamp: null
|
|
||||||
name: healthchecks-io
|
|
||||||
namespace: monitoring
|
|
||||||
type: Opaque
|
|
||||||
status: {}
|
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
apiVersion: extensions/v1beta1
|
|
||||||
kind: Ingress
|
|
||||||
metadata:
|
|
||||||
annotations:
|
|
||||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
|
||||||
ingress.kubernetes.io/ssl-redirect: "true"
|
|
||||||
name: grafana
|
|
||||||
namespace: monitoring
|
|
||||||
spec:
|
|
||||||
rules:
|
|
||||||
- host: grafana.knurrli.tbrnt.ch
|
|
||||||
http:
|
|
||||||
paths:
|
|
||||||
- backend:
|
|
||||||
serviceName: grafana
|
|
||||||
servicePort: http
|
|
||||||
tls:
|
|
||||||
- hosts:
|
|
||||||
- grafana.knurrli.tbrnt.ch
|
|
||||||
secretName: grafana-ingress-cert
|
|
|
@ -1,40 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: PrometheusRule
|
|
||||||
metadata:
|
|
||||||
name: k8up
|
|
||||||
labels:
|
|
||||||
prometheus: k8s
|
|
||||||
role: alert-rules
|
|
||||||
spec:
|
|
||||||
groups:
|
|
||||||
- name: k8up.rules
|
|
||||||
rules:
|
|
||||||
- alert: baas_last_errors
|
|
||||||
expr: baas_backup_restic_last_errors > 0
|
|
||||||
for: 1m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: Amount of errors of last restic backup
|
|
||||||
description: This alert is fired when error number is > 0
|
|
||||||
- alert: K8upBackupFailed
|
|
||||||
expr: rate(k8up_jobs_failed_counter[1d]) > 0
|
|
||||||
for: 1m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: "Job in {{ $labels.namespace }} of type {{ $labels.jobType }} failed"
|
|
||||||
- alert: K8upBackupNotRunning
|
|
||||||
expr: sum(rate(k8up_jobs_total[25h])) == 0 and on(namespace) k8up_schedules_gauge > 0
|
|
||||||
for: 1m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: "No K8up jobs were run in {{ $labels.namespace }} within the last 24 hours. Check the operator, there might be a deadlock"
|
|
||||||
- alert: K8upJobStuck
|
|
||||||
expr: k8up_jobs_queued_gauge{jobType="backup"} > 0 and on(namespace) k8up_schedules_gauge > 0
|
|
||||||
for: 24h
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: "K8up jobs are stuck in {{ $labels.namespace }} for the last 24 hours."
|
|
|
@ -1,110 +0,0 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRole
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.0.0
|
|
||||||
name: kube-state-metrics
|
|
||||||
rules:
|
|
||||||
- apiGroups:
|
|
||||||
- ""
|
|
||||||
resources:
|
|
||||||
- configmaps
|
|
||||||
- secrets
|
|
||||||
- nodes
|
|
||||||
- pods
|
|
||||||
- services
|
|
||||||
- resourcequotas
|
|
||||||
- replicationcontrollers
|
|
||||||
- limitranges
|
|
||||||
- persistentvolumeclaims
|
|
||||||
- persistentvolumes
|
|
||||||
- namespaces
|
|
||||||
- endpoints
|
|
||||||
verbs:
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- apps
|
|
||||||
resources:
|
|
||||||
- statefulsets
|
|
||||||
- daemonsets
|
|
||||||
- deployments
|
|
||||||
- replicasets
|
|
||||||
verbs:
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- batch
|
|
||||||
resources:
|
|
||||||
- cronjobs
|
|
||||||
- jobs
|
|
||||||
verbs:
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- autoscaling
|
|
||||||
resources:
|
|
||||||
- horizontalpodautoscalers
|
|
||||||
verbs:
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- authentication.k8s.io
|
|
||||||
resources:
|
|
||||||
- tokenreviews
|
|
||||||
verbs:
|
|
||||||
- create
|
|
||||||
- apiGroups:
|
|
||||||
- authorization.k8s.io
|
|
||||||
resources:
|
|
||||||
- subjectaccessreviews
|
|
||||||
verbs:
|
|
||||||
- create
|
|
||||||
- apiGroups:
|
|
||||||
- policy
|
|
||||||
resources:
|
|
||||||
- poddisruptionbudgets
|
|
||||||
verbs:
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- certificates.k8s.io
|
|
||||||
resources:
|
|
||||||
- certificatesigningrequests
|
|
||||||
verbs:
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- storage.k8s.io
|
|
||||||
resources:
|
|
||||||
- storageclasses
|
|
||||||
- volumeattachments
|
|
||||||
verbs:
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- admissionregistration.k8s.io
|
|
||||||
resources:
|
|
||||||
- mutatingwebhookconfigurations
|
|
||||||
- validatingwebhookconfigurations
|
|
||||||
verbs:
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- networking.k8s.io
|
|
||||||
resources:
|
|
||||||
- networkpolicies
|
|
||||||
- ingresses
|
|
||||||
verbs:
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- coordination.k8s.io
|
|
||||||
resources:
|
|
||||||
- leases
|
|
||||||
verbs:
|
|
||||||
- list
|
|
||||||
- watch
|
|
|
@ -1,17 +0,0 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRoleBinding
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.0.0
|
|
||||||
name: kube-state-metrics
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: ClusterRole
|
|
||||||
name: kube-state-metrics
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: kube-state-metrics
|
|
||||||
namespace: default
|
|
|
@ -1,89 +0,0 @@
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.0.0
|
|
||||||
name: kube-state-metrics
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
annotations:
|
|
||||||
kubectl.kubernetes.io/default-container: kube-state-metrics
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.0.0
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- args:
|
|
||||||
- --host=127.0.0.1
|
|
||||||
- --port=8081
|
|
||||||
- --telemetry-host=127.0.0.1
|
|
||||||
- --telemetry-port=8082
|
|
||||||
image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.0.0
|
|
||||||
name: kube-state-metrics
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 100m
|
|
||||||
memory: 250Mi
|
|
||||||
requests:
|
|
||||||
cpu: 10m
|
|
||||||
memory: 190Mi
|
|
||||||
securityContext:
|
|
||||||
runAsUser: 65534
|
|
||||||
- args:
|
|
||||||
- --logtostderr
|
|
||||||
- --secure-listen-address=:8443
|
|
||||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
|
|
||||||
- --upstream=http://127.0.0.1:8081/
|
|
||||||
image: quay.io/brancz/kube-rbac-proxy:v0.8.0
|
|
||||||
name: kube-rbac-proxy-main
|
|
||||||
ports:
|
|
||||||
- containerPort: 8443
|
|
||||||
name: https-main
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 40m
|
|
||||||
memory: 40Mi
|
|
||||||
requests:
|
|
||||||
cpu: 20m
|
|
||||||
memory: 20Mi
|
|
||||||
securityContext:
|
|
||||||
runAsGroup: 65532
|
|
||||||
runAsNonRoot: true
|
|
||||||
runAsUser: 65532
|
|
||||||
- args:
|
|
||||||
- --logtostderr
|
|
||||||
- --secure-listen-address=:9443
|
|
||||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
|
|
||||||
- --upstream=http://127.0.0.1:8082/
|
|
||||||
image: quay.io/brancz/kube-rbac-proxy:v0.8.0
|
|
||||||
name: kube-rbac-proxy-self
|
|
||||||
ports:
|
|
||||||
- containerPort: 9443
|
|
||||||
name: https-self
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 20m
|
|
||||||
memory: 40Mi
|
|
||||||
requests:
|
|
||||||
cpu: 10m
|
|
||||||
memory: 20Mi
|
|
||||||
securityContext:
|
|
||||||
runAsGroup: 65532
|
|
||||||
runAsNonRoot: true
|
|
||||||
runAsUser: 65532
|
|
||||||
nodeSelector:
|
|
||||||
kubernetes.io/os: linux
|
|
||||||
serviceAccountName: kube-state-metrics
|
|
|
@ -1,46 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: PrometheusRule
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.0.0
|
|
||||||
prometheus: k8s
|
|
||||||
role: alert-rules
|
|
||||||
name: kube-state-metrics-rules
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
groups:
|
|
||||||
- name: kube-state-metrics
|
|
||||||
rules:
|
|
||||||
- alert: KubeStateMetricsListErrors
|
|
||||||
annotations:
|
|
||||||
description: kube-state-metrics is experiencing errors at an elevated rate
|
|
||||||
in list operations. This is likely causing it to not be able to expose metrics
|
|
||||||
about Kubernetes objects correctly or at all.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatemetricslisterrors
|
|
||||||
summary: kube-state-metrics is experiencing errors in list operations.
|
|
||||||
expr: |
|
|
||||||
(sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m]))
|
|
||||||
/
|
|
||||||
sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m])))
|
|
||||||
> 0.01
|
|
||||||
for: 15m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
- alert: KubeStateMetricsWatchErrors
|
|
||||||
annotations:
|
|
||||||
description: kube-state-metrics is experiencing errors at an elevated rate
|
|
||||||
in watch operations. This is likely causing it to not be able to expose
|
|
||||||
metrics about Kubernetes objects correctly or at all.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatemetricswatcherrors
|
|
||||||
summary: kube-state-metrics is experiencing errors in watch operations.
|
|
||||||
expr: |
|
|
||||||
(sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m]))
|
|
||||||
/
|
|
||||||
sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m])))
|
|
||||||
> 0.01
|
|
||||||
for: 15m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
|
@ -1,23 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.0.0
|
|
||||||
name: kube-state-metrics
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
clusterIP: None
|
|
||||||
ports:
|
|
||||||
- name: https-main
|
|
||||||
port: 8443
|
|
||||||
targetPort: https-main
|
|
||||||
- name: https-self
|
|
||||||
port: 9443
|
|
||||||
targetPort: https-self
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
|
@ -1,10 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: ServiceAccount
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.0.0
|
|
||||||
name: kube-state-metrics
|
|
||||||
namespace: default
|
|
|
@ -1,35 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: ServiceMonitor
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.0.0
|
|
||||||
name: kube-state-metrics
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
endpoints:
|
|
||||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
||||||
honorLabels: true
|
|
||||||
interval: 30s
|
|
||||||
port: https-main
|
|
||||||
relabelings:
|
|
||||||
- action: labeldrop
|
|
||||||
regex: (pod|service|endpoint|namespace)
|
|
||||||
scheme: https
|
|
||||||
scrapeTimeout: 30s
|
|
||||||
tlsConfig:
|
|
||||||
insecureSkipVerify: true
|
|
||||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
||||||
interval: 30s
|
|
||||||
port: https-self
|
|
||||||
scheme: https
|
|
||||||
tlsConfig:
|
|
||||||
insecureSkipVerify: true
|
|
||||||
jobLabel: app.kubernetes.io/name
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
|
@ -1,22 +0,0 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRole
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: node-exporter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 1.1.2
|
|
||||||
name: node-exporter
|
|
||||||
rules:
|
|
||||||
- apiGroups:
|
|
||||||
- authentication.k8s.io
|
|
||||||
resources:
|
|
||||||
- tokenreviews
|
|
||||||
verbs:
|
|
||||||
- create
|
|
||||||
- apiGroups:
|
|
||||||
- authorization.k8s.io
|
|
||||||
resources:
|
|
||||||
- subjectaccessreviews
|
|
||||||
verbs:
|
|
||||||
- create
|
|
|
@ -1,17 +0,0 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRoleBinding
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: node-exporter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 1.1.2
|
|
||||||
name: node-exporter
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: ClusterRole
|
|
||||||
name: node-exporter
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: node-exporter
|
|
||||||
namespace: default
|
|
|
@ -1,100 +0,0 @@
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: DaemonSet
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: node-exporter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 1.1.2
|
|
||||||
name: node-exporter
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: node-exporter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: node-exporter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 1.1.2
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- args:
|
|
||||||
- --web.listen-address=127.0.0.1:9100
|
|
||||||
- --path.sysfs=/host/sys
|
|
||||||
- --path.rootfs=/host/root
|
|
||||||
- --no-collector.wifi
|
|
||||||
- --no-collector.hwmon
|
|
||||||
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
|
|
||||||
- --collector.netclass.ignored-devices=^(veth.*)$
|
|
||||||
- --collector.netdev.device-exclude=^(veth.*)$
|
|
||||||
image: quay.io/prometheus/node-exporter:v1.1.2
|
|
||||||
name: node-exporter
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 250m
|
|
||||||
memory: 180Mi
|
|
||||||
requests:
|
|
||||||
cpu: 102m
|
|
||||||
memory: 180Mi
|
|
||||||
volumeMounts:
|
|
||||||
- mountPath: /host/sys
|
|
||||||
mountPropagation: HostToContainer
|
|
||||||
name: sys
|
|
||||||
readOnly: true
|
|
||||||
- mountPath: /host/root
|
|
||||||
mountPropagation: HostToContainer
|
|
||||||
name: root
|
|
||||||
readOnly: true
|
|
||||||
- args:
|
|
||||||
- --logtostderr
|
|
||||||
- --secure-listen-address=[$(IP)]:9100
|
|
||||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
|
|
||||||
- --upstream=http://127.0.0.1:9100/
|
|
||||||
env:
|
|
||||||
- name: IP
|
|
||||||
valueFrom:
|
|
||||||
fieldRef:
|
|
||||||
fieldPath: status.podIP
|
|
||||||
image: quay.io/brancz/kube-rbac-proxy:v0.8.0
|
|
||||||
name: kube-rbac-proxy
|
|
||||||
ports:
|
|
||||||
- containerPort: 9100
|
|
||||||
hostPort: 9100
|
|
||||||
name: https
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 20m
|
|
||||||
memory: 40Mi
|
|
||||||
requests:
|
|
||||||
cpu: 10m
|
|
||||||
memory: 20Mi
|
|
||||||
securityContext:
|
|
||||||
runAsGroup: 65532
|
|
||||||
runAsNonRoot: true
|
|
||||||
runAsUser: 65532
|
|
||||||
hostNetwork: true
|
|
||||||
hostPID: true
|
|
||||||
nodeSelector:
|
|
||||||
kubernetes.io/os: linux
|
|
||||||
securityContext:
|
|
||||||
runAsNonRoot: true
|
|
||||||
runAsUser: 65534
|
|
||||||
serviceAccountName: node-exporter
|
|
||||||
tolerations:
|
|
||||||
- operator: Exists
|
|
||||||
volumes:
|
|
||||||
- hostPath:
|
|
||||||
path: /sys
|
|
||||||
name: sys
|
|
||||||
- hostPath:
|
|
||||||
path: /
|
|
||||||
name: root
|
|
||||||
updateStrategy:
|
|
||||||
rollingUpdate:
|
|
||||||
maxUnavailable: 10%
|
|
||||||
type: RollingUpdate
|
|
|
@ -1,301 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: PrometheusRule
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: node-exporter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 1.1.2
|
|
||||||
prometheus: k8s
|
|
||||||
role: alert-rules
|
|
||||||
name: node-exporter-rules
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
groups:
|
|
||||||
- name: node-exporter
|
|
||||||
rules:
|
|
||||||
- alert: NodeFilesystemSpaceFillingUp
|
|
||||||
annotations:
|
|
||||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
|
||||||
has only {{ printf "%.2f" $value }}% available space left and is filling
|
|
||||||
up.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemspacefillingup
|
|
||||||
summary: Filesystem is predicted to run out of space within the next 24 hours.
|
|
||||||
expr: |
|
|
||||||
(
|
|
||||||
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 40
|
|
||||||
and
|
|
||||||
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0
|
|
||||||
and
|
|
||||||
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
|
||||||
)
|
|
||||||
for: 1h
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: NodeFilesystemSpaceFillingUp
|
|
||||||
annotations:
|
|
||||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
|
||||||
has only {{ printf "%.2f" $value }}% available space left and is filling
|
|
||||||
up fast.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemspacefillingup
|
|
||||||
summary: Filesystem is predicted to run out of space within the next 4 hours.
|
|
||||||
expr: |
|
|
||||||
(
|
|
||||||
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 15
|
|
||||||
and
|
|
||||||
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
|
|
||||||
and
|
|
||||||
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
|
||||||
)
|
|
||||||
for: 1h
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
- alert: NodeFilesystemAlmostOutOfSpace
|
|
||||||
annotations:
|
|
||||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
|
||||||
has only {{ printf "%.2f" $value }}% available space left.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutofspace
|
|
||||||
summary: Filesystem has less than 5% space left.
|
|
||||||
expr: |
|
|
||||||
(
|
|
||||||
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 5
|
|
||||||
and
|
|
||||||
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
|
||||||
)
|
|
||||||
for: 1h
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: NodeFilesystemAlmostOutOfSpace
|
|
||||||
annotations:
|
|
||||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
|
||||||
has only {{ printf "%.2f" $value }}% available space left.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutofspace
|
|
||||||
summary: Filesystem has less than 3% space left.
|
|
||||||
expr: |
|
|
||||||
(
|
|
||||||
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 3
|
|
||||||
and
|
|
||||||
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
|
||||||
)
|
|
||||||
for: 1h
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
- alert: NodeFilesystemFilesFillingUp
|
|
||||||
annotations:
|
|
||||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
|
||||||
has only {{ printf "%.2f" $value }}% available inodes left and is filling
|
|
||||||
up.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemfilesfillingup
|
|
||||||
summary: Filesystem is predicted to run out of inodes within the next 24 hours.
|
|
||||||
expr: |
|
|
||||||
(
|
|
||||||
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 40
|
|
||||||
and
|
|
||||||
predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0
|
|
||||||
and
|
|
||||||
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
|
||||||
)
|
|
||||||
for: 1h
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: NodeFilesystemFilesFillingUp
|
|
||||||
annotations:
|
|
||||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
|
||||||
has only {{ printf "%.2f" $value }}% available inodes left and is filling
|
|
||||||
up fast.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemfilesfillingup
|
|
||||||
summary: Filesystem is predicted to run out of inodes within the next 4 hours.
|
|
||||||
expr: |
|
|
||||||
(
|
|
||||||
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 20
|
|
||||||
and
|
|
||||||
predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
|
|
||||||
and
|
|
||||||
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
|
||||||
)
|
|
||||||
for: 1h
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
- alert: NodeFilesystemAlmostOutOfFiles
|
|
||||||
annotations:
|
|
||||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
|
||||||
has only {{ printf "%.2f" $value }}% available inodes left.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutoffiles
|
|
||||||
summary: Filesystem has less than 5% inodes left.
|
|
||||||
expr: |
|
|
||||||
(
|
|
||||||
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 5
|
|
||||||
and
|
|
||||||
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
|
||||||
)
|
|
||||||
for: 1h
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: NodeFilesystemAlmostOutOfFiles
|
|
||||||
annotations:
|
|
||||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
|
||||||
has only {{ printf "%.2f" $value }}% available inodes left.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutoffiles
|
|
||||||
summary: Filesystem has less than 3% inodes left.
|
|
||||||
expr: |
|
|
||||||
(
|
|
||||||
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 3
|
|
||||||
and
|
|
||||||
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
|
||||||
)
|
|
||||||
for: 1h
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
- alert: NodeNetworkReceiveErrs
|
|
||||||
annotations:
|
|
||||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
|
|
||||||
{{ printf "%.0f" $value }} receive errors in the last two minutes.'
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodenetworkreceiveerrs
|
|
||||||
summary: Network interface is reporting many receive errors.
|
|
||||||
expr: |
|
|
||||||
rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01
|
|
||||||
for: 1h
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: NodeNetworkTransmitErrs
|
|
||||||
annotations:
|
|
||||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
|
|
||||||
{{ printf "%.0f" $value }} transmit errors in the last two minutes.'
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodenetworktransmiterrs
|
|
||||||
summary: Network interface is reporting many transmit errors.
|
|
||||||
expr: |
|
|
||||||
rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01
|
|
||||||
for: 1h
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: NodeHighNumberConntrackEntriesUsed
|
|
||||||
annotations:
|
|
||||||
description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodehighnumberconntrackentriesused
|
|
||||||
summary: Number of conntrack are getting close to the limit.
|
|
||||||
expr: |
|
|
||||||
(node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: NodeTextFileCollectorScrapeError
|
|
||||||
annotations:
|
|
||||||
description: Node Exporter text file collector failed to scrape.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodetextfilecollectorscrapeerror
|
|
||||||
summary: Node Exporter text file collector failed to scrape.
|
|
||||||
expr: |
|
|
||||||
node_textfile_scrape_error{job="node-exporter"} == 1
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: NodeClockSkewDetected
|
|
||||||
annotations:
|
|
||||||
description: Clock on {{ $labels.instance }} is out of sync by more than 300s.
|
|
||||||
Ensure NTP is configured correctly on this host.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodeclockskewdetected
|
|
||||||
summary: Clock skew detected.
|
|
||||||
expr: |
|
|
||||||
(
|
|
||||||
node_timex_offset_seconds > 0.05
|
|
||||||
and
|
|
||||||
deriv(node_timex_offset_seconds[5m]) >= 0
|
|
||||||
)
|
|
||||||
or
|
|
||||||
(
|
|
||||||
node_timex_offset_seconds < -0.05
|
|
||||||
and
|
|
||||||
deriv(node_timex_offset_seconds[5m]) <= 0
|
|
||||||
)
|
|
||||||
for: 10m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: NodeClockNotSynchronising
|
|
||||||
annotations:
|
|
||||||
description: Clock on {{ $labels.instance }} is not synchronising. Ensure
|
|
||||||
NTP is configured on this host.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodeclocknotsynchronising
|
|
||||||
summary: Clock not synchronising.
|
|
||||||
expr: |
|
|
||||||
min_over_time(node_timex_sync_status[5m]) == 0
|
|
||||||
and
|
|
||||||
node_timex_maxerror_seconds >= 16
|
|
||||||
for: 10m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: NodeRAIDDegraded
|
|
||||||
annotations:
|
|
||||||
description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is
|
|
||||||
in degraded state due to one or more disks failures. Number of spare drives
|
|
||||||
is insufficient to fix issue automatically.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/noderaiddegraded
|
|
||||||
summary: RAID Array is degraded
|
|
||||||
expr: |
|
|
||||||
node_md_disks_required - ignoring (state) (node_md_disks{state="active"}) > 0
|
|
||||||
for: 15m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
- alert: NodeRAIDDiskFailure
|
|
||||||
annotations:
|
|
||||||
description: At least one device in RAID array on {{ $labels.instance }} failed.
|
|
||||||
Array '{{ $labels.device }}' needs attention and possibly a disk swap.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/noderaiddiskfailure
|
|
||||||
summary: Failed device in RAID array
|
|
||||||
expr: |
|
|
||||||
node_md_disks{state="failed"} > 0
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- name: node-exporter.rules
|
|
||||||
rules:
|
|
||||||
- expr: |
|
|
||||||
count without (cpu) (
|
|
||||||
count without (mode) (
|
|
||||||
node_cpu_seconds_total{job="node-exporter"}
|
|
||||||
)
|
|
||||||
)
|
|
||||||
record: instance:node_num_cpu:sum
|
|
||||||
- expr: |
|
|
||||||
1 - avg without (cpu, mode) (
|
|
||||||
rate(node_cpu_seconds_total{job="node-exporter", mode="idle"}[1m])
|
|
||||||
)
|
|
||||||
record: instance:node_cpu_utilisation:rate1m
|
|
||||||
- expr: |
|
|
||||||
(
|
|
||||||
node_load1{job="node-exporter"}
|
|
||||||
/
|
|
||||||
instance:node_num_cpu:sum{job="node-exporter"}
|
|
||||||
)
|
|
||||||
record: instance:node_load1_per_cpu:ratio
|
|
||||||
- expr: |
|
|
||||||
1 - (
|
|
||||||
node_memory_MemAvailable_bytes{job="node-exporter"}
|
|
||||||
/
|
|
||||||
node_memory_MemTotal_bytes{job="node-exporter"}
|
|
||||||
)
|
|
||||||
record: instance:node_memory_utilisation:ratio
|
|
||||||
- expr: |
|
|
||||||
rate(node_vmstat_pgmajfault{job="node-exporter"}[1m])
|
|
||||||
record: instance:node_vmstat_pgmajfault:rate1m
|
|
||||||
- expr: |
|
|
||||||
rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
|
|
||||||
record: instance_device:node_disk_io_time_seconds:rate1m
|
|
||||||
- expr: |
|
|
||||||
rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
|
|
||||||
record: instance_device:node_disk_io_time_weighted_seconds:rate1m
|
|
||||||
- expr: |
|
|
||||||
sum without (device) (
|
|
||||||
rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[1m])
|
|
||||||
)
|
|
||||||
record: instance:node_network_receive_bytes_excluding_lo:rate1m
|
|
||||||
- expr: |
|
|
||||||
sum without (device) (
|
|
||||||
rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[1m])
|
|
||||||
)
|
|
||||||
record: instance:node_network_transmit_bytes_excluding_lo:rate1m
|
|
||||||
- expr: |
|
|
||||||
sum without (device) (
|
|
||||||
rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[1m])
|
|
||||||
)
|
|
||||||
record: instance:node_network_receive_drop_excluding_lo:rate1m
|
|
||||||
- expr: |
|
|
||||||
sum without (device) (
|
|
||||||
rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[1m])
|
|
||||||
)
|
|
||||||
record: instance:node_network_transmit_drop_excluding_lo:rate1m
|
|
|
@ -1,20 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: node-exporter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 1.1.2
|
|
||||||
name: node-exporter
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
clusterIP: None
|
|
||||||
ports:
|
|
||||||
- name: https
|
|
||||||
port: 9100
|
|
||||||
targetPort: https
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: node-exporter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
|
@ -1,10 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: ServiceAccount
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: node-exporter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 1.1.2
|
|
||||||
name: node-exporter
|
|
||||||
namespace: default
|
|
|
@ -1,31 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: ServiceMonitor
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: node-exporter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 1.1.2
|
|
||||||
name: node-exporter
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
endpoints:
|
|
||||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
||||||
interval: 15s
|
|
||||||
port: https
|
|
||||||
relabelings:
|
|
||||||
- action: replace
|
|
||||||
regex: (.*)
|
|
||||||
replacement: $1
|
|
||||||
sourceLabels:
|
|
||||||
- __meta_kubernetes_pod_node_name
|
|
||||||
targetLabel: instance
|
|
||||||
scheme: https
|
|
||||||
tlsConfig:
|
|
||||||
insecureSkipVerify: true
|
|
||||||
jobLabel: app.kubernetes.io/name
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: node-exporter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
|
@ -1,18 +0,0 @@
|
||||||
apiVersion: apiregistration.k8s.io/v1
|
|
||||||
kind: APIService
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: metrics-adapter
|
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.8.4
|
|
||||||
name: v1beta1.metrics.k8s.io
|
|
||||||
spec:
|
|
||||||
group: metrics.k8s.io
|
|
||||||
groupPriorityMinimum: 100
|
|
||||||
insecureSkipTLSVerify: true
|
|
||||||
service:
|
|
||||||
name: prometheus-adapter
|
|
||||||
namespace: default
|
|
||||||
version: v1beta1
|
|
||||||
versionPriority: 100
|
|
|
@ -1,21 +0,0 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRole
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: metrics-adapter
|
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.8.4
|
|
||||||
name: prometheus-adapter
|
|
||||||
rules:
|
|
||||||
- apiGroups:
|
|
||||||
- ""
|
|
||||||
resources:
|
|
||||||
- nodes
|
|
||||||
- namespaces
|
|
||||||
- pods
|
|
||||||
- services
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
- list
|
|
||||||
- watch
|
|
|
@ -1,22 +0,0 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRole
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: metrics-adapter
|
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.8.4
|
|
||||||
rbac.authorization.k8s.io/aggregate-to-admin: "true"
|
|
||||||
rbac.authorization.k8s.io/aggregate-to-edit: "true"
|
|
||||||
rbac.authorization.k8s.io/aggregate-to-view: "true"
|
|
||||||
name: system:aggregated-metrics-reader
|
|
||||||
rules:
|
|
||||||
- apiGroups:
|
|
||||||
- metrics.k8s.io
|
|
||||||
resources:
|
|
||||||
- pods
|
|
||||||
- nodes
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
- list
|
|
||||||
- watch
|
|
|
@ -1,17 +0,0 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRoleBinding
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: metrics-adapter
|
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.8.4
|
|
||||||
name: prometheus-adapter
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: ClusterRole
|
|
||||||
name: prometheus-adapter
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: prometheus-adapter
|
|
||||||
namespace: default
|
|
|
@ -1,17 +0,0 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRoleBinding
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: metrics-adapter
|
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.8.4
|
|
||||||
name: resource-metrics:system:auth-delegator
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: ClusterRole
|
|
||||||
name: system:auth-delegator
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: prometheus-adapter
|
|
||||||
namespace: default
|
|
|
@ -1,16 +0,0 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRole
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: metrics-adapter
|
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.8.4
|
|
||||||
name: resource-metrics-server-resources
|
|
||||||
rules:
|
|
||||||
- apiGroups:
|
|
||||||
- metrics.k8s.io
|
|
||||||
resources:
|
|
||||||
- '*'
|
|
||||||
verbs:
|
|
||||||
- '*'
|
|
|
@ -1,38 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
data:
|
|
||||||
config.yaml: |-
|
|
||||||
"resourceRules":
|
|
||||||
"cpu":
|
|
||||||
"containerLabel": "container"
|
|
||||||
"containerQuery": "sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!=\"\",pod!=\"\"}[5m])) by (<<.GroupBy>>)"
|
|
||||||
"nodeQuery": "sum(1 - irate(node_cpu_seconds_total{mode=\"idle\"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>) or sum (1- irate(windows_cpu_time_total{mode=\"idle\", job=\"windows-exporter\",<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>)"
|
|
||||||
"resources":
|
|
||||||
"overrides":
|
|
||||||
"namespace":
|
|
||||||
"resource": "namespace"
|
|
||||||
"node":
|
|
||||||
"resource": "node"
|
|
||||||
"pod":
|
|
||||||
"resource": "pod"
|
|
||||||
"memory":
|
|
||||||
"containerLabel": "container"
|
|
||||||
"containerQuery": "sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!=\"\",pod!=\"\"}) by (<<.GroupBy>>)"
|
|
||||||
"nodeQuery": "sum(node_memory_MemTotal_bytes{job=\"node-exporter\",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job=\"node-exporter\",<<.LabelMatchers>>}) by (<<.GroupBy>>) or sum(windows_cs_physical_memory_bytes{job=\"windows-exporter\",<<.LabelMatchers>>} - windows_memory_available_bytes{job=\"windows-exporter\",<<.LabelMatchers>>}) by (<<.GroupBy>>)"
|
|
||||||
"resources":
|
|
||||||
"overrides":
|
|
||||||
"instance":
|
|
||||||
"resource": "node"
|
|
||||||
"namespace":
|
|
||||||
"resource": "namespace"
|
|
||||||
"pod":
|
|
||||||
"resource": "pod"
|
|
||||||
"window": "5m"
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: metrics-adapter
|
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.8.4
|
|
||||||
name: adapter-config
|
|
||||||
namespace: default
|
|
|
@ -1,62 +0,0 @@
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: metrics-adapter
|
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.8.4
|
|
||||||
name: prometheus-adapter
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
replicas: 2
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/component: metrics-adapter
|
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
strategy:
|
|
||||||
rollingUpdate:
|
|
||||||
maxSurge: 1
|
|
||||||
maxUnavailable: 1
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: metrics-adapter
|
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.8.4
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- args:
|
|
||||||
- --cert-dir=/var/run/serving-cert
|
|
||||||
- --config=/etc/adapter/config.yaml
|
|
||||||
- --logtostderr=true
|
|
||||||
- --metrics-relist-interval=1m
|
|
||||||
- --prometheus-url=http://prometheus-k8s.default.svc.cluster.local:9090/
|
|
||||||
- --secure-port=6443
|
|
||||||
image: directxman12/k8s-prometheus-adapter:v0.8.4
|
|
||||||
name: prometheus-adapter
|
|
||||||
ports:
|
|
||||||
- containerPort: 6443
|
|
||||||
volumeMounts:
|
|
||||||
- mountPath: /tmp
|
|
||||||
name: tmpfs
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /var/run/serving-cert
|
|
||||||
name: volume-serving-cert
|
|
||||||
readOnly: false
|
|
||||||
- mountPath: /etc/adapter
|
|
||||||
name: config
|
|
||||||
readOnly: false
|
|
||||||
nodeSelector:
|
|
||||||
kubernetes.io/os: linux
|
|
||||||
serviceAccountName: prometheus-adapter
|
|
||||||
volumes:
|
|
||||||
- emptyDir: {}
|
|
||||||
name: tmpfs
|
|
||||||
- emptyDir: {}
|
|
||||||
name: volume-serving-cert
|
|
||||||
- configMap:
|
|
||||||
name: adapter-config
|
|
||||||
name: config
|
|
|
@ -1,18 +0,0 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: RoleBinding
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: metrics-adapter
|
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.8.4
|
|
||||||
name: resource-metrics-auth-reader
|
|
||||||
namespace: kube-system
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: Role
|
|
||||||
name: extension-apiserver-authentication-reader
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: prometheus-adapter
|
|
||||||
namespace: default
|
|
|
@ -1,19 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: metrics-adapter
|
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.8.4
|
|
||||||
name: prometheus-adapter
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
ports:
|
|
||||||
- name: https
|
|
||||||
port: 443
|
|
||||||
targetPort: 6443
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/component: metrics-adapter
|
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
|
@ -1,10 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: ServiceAccount
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: metrics-adapter
|
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.8.4
|
|
||||||
name: prometheus-adapter
|
|
||||||
namespace: default
|
|
|
@ -1,23 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: ServiceMonitor
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: metrics-adapter
|
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.8.4
|
|
||||||
name: prometheus-adapter
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
endpoints:
|
|
||||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
||||||
interval: 30s
|
|
||||||
port: https
|
|
||||||
scheme: https
|
|
||||||
tlsConfig:
|
|
||||||
insecureSkipVerify: true
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/component: metrics-adapter
|
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
|
@ -1,20 +0,0 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRole
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: prometheus
|
|
||||||
app.kubernetes.io/name: prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.26.0
|
|
||||||
name: prometheus-k8s
|
|
||||||
rules:
|
|
||||||
- apiGroups:
|
|
||||||
- ""
|
|
||||||
resources:
|
|
||||||
- nodes/metrics
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
- nonResourceURLs:
|
|
||||||
- /metrics
|
|
||||||
verbs:
|
|
||||||
- get
|
|
|
@ -1,17 +0,0 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRoleBinding
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: prometheus
|
|
||||||
app.kubernetes.io/name: prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.26.0
|
|
||||||
name: prometheus-k8s
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: ClusterRole
|
|
||||||
name: prometheus-k8s
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: default
|
|
|
@ -1,14 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: Endpoints
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
k8s-app: kube-controller-manager
|
|
||||||
name: kube-controller-manager
|
|
||||||
namespace: kube-system
|
|
||||||
subsets:
|
|
||||||
- addresses:
|
|
||||||
- ip: 185.95.218.11
|
|
||||||
ports:
|
|
||||||
- name: http-metrics
|
|
||||||
port: 10252
|
|
||||||
protocol: TCP
|
|
|
@ -1,14 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
k8s-app: kube-controller-manager
|
|
||||||
name: kube-controller-manager
|
|
||||||
namespace: kube-system
|
|
||||||
spec:
|
|
||||||
clusterIP: None
|
|
||||||
ports:
|
|
||||||
- name: http-metrics
|
|
||||||
port: 10252
|
|
||||||
targetPort: 10252
|
|
||||||
selector: {}
|
|
|
@ -1,14 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: Endpoints
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
k8s-app: kube-scheduler
|
|
||||||
name: kube-scheduler
|
|
||||||
namespace: kube-system
|
|
||||||
subsets:
|
|
||||||
- addresses:
|
|
||||||
- ip: 185.95.218.11
|
|
||||||
ports:
|
|
||||||
- name: http-metrics
|
|
||||||
port: 10251
|
|
||||||
protocol: TCP
|
|
|
@ -1,14 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
k8s-app: kube-scheduler
|
|
||||||
name: kube-scheduler
|
|
||||||
namespace: kube-system
|
|
||||||
spec:
|
|
||||||
clusterIP: None
|
|
||||||
ports:
|
|
||||||
- name: http-metrics
|
|
||||||
port: 10251
|
|
||||||
targetPort: 10251
|
|
||||||
selector: {}
|
|
|
@ -1,24 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: ServiceMonitor
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: controller
|
|
||||||
app.kubernetes.io/name: prometheus-operator
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.47.0
|
|
||||||
name: prometheus-operator
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
endpoints:
|
|
||||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
||||||
honorLabels: true
|
|
||||||
port: https
|
|
||||||
scheme: https
|
|
||||||
tlsConfig:
|
|
||||||
insecureSkipVerify: true
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/component: controller
|
|
||||||
app.kubernetes.io/name: prometheus-operator
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.47.0
|
|
|
@ -1,18 +0,0 @@
|
||||||
apiVersion: policy/v1beta1
|
|
||||||
kind: PodDisruptionBudget
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: prometheus
|
|
||||||
app.kubernetes.io/name: prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.26.0
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
minAvailable: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/component: prometheus
|
|
||||||
app.kubernetes.io/name: prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
prometheus: k8s
|
|
|
@ -1,73 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: Prometheus
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: prometheus
|
|
||||||
app.kubernetes.io/name: prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.26.0
|
|
||||||
prometheus: k8s
|
|
||||||
name: k8s
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
alerting:
|
|
||||||
alertmanagers:
|
|
||||||
- apiVersion: v2
|
|
||||||
name: alertmanager-main
|
|
||||||
namespace: default
|
|
||||||
port: web
|
|
||||||
externalLabels: {}
|
|
||||||
externalUrl: http://prometheus-k8s.monitoring:9090
|
|
||||||
image: quay.io/prometheus/prometheus:v2.26.0
|
|
||||||
nodeSelector:
|
|
||||||
kubernetes.io/os: linux
|
|
||||||
podMetadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: prometheus
|
|
||||||
app.kubernetes.io/name: prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.26.0
|
|
||||||
podMonitorNamespaceSelector:
|
|
||||||
matchExpressions:
|
|
||||||
- key: prometheus
|
|
||||||
operator: In
|
|
||||||
values:
|
|
||||||
- "yes"
|
|
||||||
- "true"
|
|
||||||
podMonitorSelector: {}
|
|
||||||
probeNamespaceSelector: {}
|
|
||||||
probeSelector: {}
|
|
||||||
replicas: 2
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
memory: 400Mi
|
|
||||||
retention: 7d
|
|
||||||
ruleSelector:
|
|
||||||
matchLabels:
|
|
||||||
prometheus: k8s
|
|
||||||
role: alert-rules
|
|
||||||
securityContext:
|
|
||||||
fsGroup: 2000
|
|
||||||
runAsNonRoot: true
|
|
||||||
runAsUser: 1000
|
|
||||||
serviceAccountName: prometheus-k8s
|
|
||||||
serviceMonitorNamespaceSelector:
|
|
||||||
matchExpressions:
|
|
||||||
- key: prometheus
|
|
||||||
operator: In
|
|
||||||
values:
|
|
||||||
- "yes"
|
|
||||||
- "true"
|
|
||||||
serviceMonitorSelector: {}
|
|
||||||
storage:
|
|
||||||
volumeClaimTemplate:
|
|
||||||
apiVersion: v1
|
|
||||||
kind: PersistentVolumeClaim
|
|
||||||
spec:
|
|
||||||
accessModes:
|
|
||||||
- ReadWriteOnce
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
storage: 10Gi
|
|
||||||
storageClassName: local-path
|
|
||||||
version: 2.26.0
|
|
|
@ -1,256 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: PrometheusRule
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: prometheus
|
|
||||||
app.kubernetes.io/name: prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.26.0
|
|
||||||
prometheus: k8s
|
|
||||||
role: alert-rules
|
|
||||||
name: prometheus-k8s-prometheus-rules
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
groups:
|
|
||||||
- name: prometheus
|
|
||||||
rules:
|
|
||||||
- alert: PrometheusBadConfig
|
|
||||||
annotations:
|
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to
|
|
||||||
reload its configuration.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusbadconfig
|
|
||||||
summary: Failed Prometheus configuration reload.
|
|
||||||
expr: |
|
|
||||||
# Without max_over_time, failed scrapes could create false negatives, see
|
|
||||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
|
||||||
max_over_time(prometheus_config_last_reload_successful{job="prometheus-k8s",namespace="default"}[5m]) == 0
|
|
||||||
for: 10m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
- alert: PrometheusNotificationQueueRunningFull
|
|
||||||
annotations:
|
|
||||||
description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}}
|
|
||||||
is running full.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusnotificationqueuerunningfull
|
|
||||||
summary: Prometheus alert notification queue predicted to run full in less
|
|
||||||
than 30m.
|
|
||||||
expr: |
|
|
||||||
# Without min_over_time, failed scrapes could create false negatives, see
|
|
||||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
|
||||||
(
|
|
||||||
predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s",namespace="default"}[5m], 60 * 30)
|
|
||||||
>
|
|
||||||
min_over_time(prometheus_notifications_queue_capacity{job="prometheus-k8s",namespace="default"}[5m])
|
|
||||||
)
|
|
||||||
for: 15m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
|
|
||||||
annotations:
|
|
||||||
description: '{{ printf "%.1f" $value }}% errors while sending alerts from
|
|
||||||
Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.'
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheuserrorsendingalertstosomealertmanagers
|
|
||||||
summary: Prometheus has encountered more than 1% errors sending alerts to
|
|
||||||
a specific Alertmanager.
|
|
||||||
expr: |
|
|
||||||
(
|
|
||||||
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="default"}[5m])
|
|
||||||
/
|
|
||||||
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="default"}[5m])
|
|
||||||
)
|
|
||||||
* 100
|
|
||||||
> 1
|
|
||||||
for: 15m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: PrometheusNotConnectedToAlertmanagers
|
|
||||||
annotations:
|
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected
|
|
||||||
to any Alertmanagers.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusnotconnectedtoalertmanagers
|
|
||||||
summary: Prometheus is not connected to any Alertmanagers.
|
|
||||||
expr: |
|
|
||||||
# Without max_over_time, failed scrapes could create false negatives, see
|
|
||||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
|
||||||
max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s",namespace="default"}[5m]) < 1
|
|
||||||
for: 10m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: PrometheusTSDBReloadsFailing
|
|
||||||
annotations:
|
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected
|
|
||||||
{{$value | humanize}} reload failures over the last 3h.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheustsdbreloadsfailing
|
|
||||||
summary: Prometheus has issues reloading blocks from disk.
|
|
||||||
expr: |
|
|
||||||
increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="default"}[3h]) > 0
|
|
||||||
for: 4h
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: PrometheusTSDBCompactionsFailing
|
|
||||||
annotations:
|
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected
|
|
||||||
{{$value | humanize}} compaction failures over the last 3h.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheustsdbcompactionsfailing
|
|
||||||
summary: Prometheus has issues compacting blocks.
|
|
||||||
expr: |
|
|
||||||
increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="default"}[3h]) > 0
|
|
||||||
for: 4h
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: PrometheusNotIngestingSamples
|
|
||||||
annotations:
|
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting
|
|
||||||
samples.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusnotingestingsamples
|
|
||||||
summary: Prometheus is not ingesting samples.
|
|
||||||
expr: |
|
|
||||||
(
|
|
||||||
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="default"}[5m]) <= 0
|
|
||||||
and
|
|
||||||
(
|
|
||||||
sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-k8s",namespace="default"}) > 0
|
|
||||||
or
|
|
||||||
sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-k8s",namespace="default"}) > 0
|
|
||||||
)
|
|
||||||
)
|
|
||||||
for: 10m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: PrometheusDuplicateTimestamps
|
|
||||||
annotations:
|
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping
|
|
||||||
{{ printf "%.4g" $value }} samples/s with different values but duplicated
|
|
||||||
timestamp.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusduplicatetimestamps
|
|
||||||
summary: Prometheus is dropping samples with duplicate timestamps.
|
|
||||||
expr: |
|
|
||||||
rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="default"}[5m]) > 0
|
|
||||||
for: 10m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: PrometheusOutOfOrderTimestamps
|
|
||||||
annotations:
|
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping
|
|
||||||
{{ printf "%.4g" $value }} samples/s with timestamps arriving out of order.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoutofordertimestamps
|
|
||||||
summary: Prometheus drops samples with out-of-order timestamps.
|
|
||||||
expr: |
|
|
||||||
rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="default"}[5m]) > 0
|
|
||||||
for: 10m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: PrometheusRemoteStorageFailures
|
|
||||||
annotations:
|
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send
|
|
||||||
{{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{
|
|
||||||
$labels.url }}
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusremotestoragefailures
|
|
||||||
summary: Prometheus fails to send samples to remote storage.
|
|
||||||
expr: |
|
|
||||||
(
|
|
||||||
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="default"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-k8s",namespace="default"}[5m]))
|
|
||||||
/
|
|
||||||
(
|
|
||||||
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="default"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-k8s",namespace="default"}[5m]))
|
|
||||||
+
|
|
||||||
(rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-k8s",namespace="default"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus-k8s",namespace="default"}[5m]))
|
|
||||||
)
|
|
||||||
)
|
|
||||||
* 100
|
|
||||||
> 1
|
|
||||||
for: 15m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
- alert: PrometheusRemoteWriteBehind
|
|
||||||
annotations:
|
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
|
|
||||||
is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url
|
|
||||||
}}.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusremotewritebehind
|
|
||||||
summary: Prometheus remote write is behind.
|
|
||||||
expr: |
|
|
||||||
# Without max_over_time, failed scrapes could create false negatives, see
|
|
||||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
|
||||||
(
|
|
||||||
max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-k8s",namespace="default"}[5m])
|
|
||||||
- ignoring(remote_name, url) group_right
|
|
||||||
max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-k8s",namespace="default"}[5m])
|
|
||||||
)
|
|
||||||
> 120
|
|
||||||
for: 15m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
- alert: PrometheusRemoteWriteDesiredShards
|
|
||||||
annotations:
|
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
|
|
||||||
desired shards calculation wants to run {{ $value }} shards for queue {{
|
|
||||||
$labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{
|
|
||||||
printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="default"}`
|
|
||||||
$labels.instance | query | first | value }}.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusremotewritedesiredshards
|
|
||||||
summary: Prometheus remote write desired shards calculation wants to run more
|
|
||||||
than configured max shards.
|
|
||||||
expr: |
|
|
||||||
# Without max_over_time, failed scrapes could create false negatives, see
|
|
||||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
|
||||||
(
|
|
||||||
max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-k8s",namespace="default"}[5m])
|
|
||||||
>
|
|
||||||
max_over_time(prometheus_remote_storage_shards_max{job="prometheus-k8s",namespace="default"}[5m])
|
|
||||||
)
|
|
||||||
for: 15m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: PrometheusRuleFailures
|
|
||||||
annotations:
|
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to
|
|
||||||
evaluate {{ printf "%.0f" $value }} rules in the last 5m.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusrulefailures
|
|
||||||
summary: Prometheus is failing rule evaluations.
|
|
||||||
expr: |
|
|
||||||
increase(prometheus_rule_evaluation_failures_total{job="prometheus-k8s",namespace="default"}[5m]) > 0
|
|
||||||
for: 15m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
- alert: PrometheusMissingRuleEvaluations
|
|
||||||
annotations:
|
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{
|
|
||||||
printf "%.0f" $value }} rule group evaluations in the last 5m.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusmissingruleevaluations
|
|
||||||
summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
|
|
||||||
expr: |
|
|
||||||
increase(prometheus_rule_group_iterations_missed_total{job="prometheus-k8s",namespace="default"}[5m]) > 0
|
|
||||||
for: 15m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: PrometheusTargetLimitHit
|
|
||||||
annotations:
|
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped
|
|
||||||
{{ printf "%.0f" $value }} targets because the number of targets exceeded
|
|
||||||
the configured target_limit.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheustargetlimithit
|
|
||||||
summary: Prometheus has dropped targets because some scrape configs have exceeded
|
|
||||||
the targets limit.
|
|
||||||
expr: |
|
|
||||||
increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-k8s",namespace="default"}[5m]) > 0
|
|
||||||
for: 15m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager
|
|
||||||
annotations:
|
|
||||||
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts
|
|
||||||
from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheuserrorsendingalertstoanyalertmanager
|
|
||||||
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
|
|
||||||
expr: |
|
|
||||||
min without (alertmanager) (
|
|
||||||
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="default",alertmanager!~``}[5m])
|
|
||||||
/
|
|
||||||
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="default",alertmanager!~``}[5m])
|
|
||||||
)
|
|
||||||
* 100
|
|
||||||
> 3
|
|
||||||
for: 15m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
|
@ -1,42 +0,0 @@
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: prometheus-pushgateway
|
|
||||||
name: prometheus-pushgateway
|
|
||||||
namespace: monitoring
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: prometheus-pushgateway
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: prometheus-pushgateway
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- image: prom/pushgateway:v1.1.0
|
|
||||||
livenessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /#/status
|
|
||||||
port: 9091
|
|
||||||
initialDelaySeconds: 10
|
|
||||||
timeoutSeconds: 10
|
|
||||||
name: pushgateway
|
|
||||||
ports:
|
|
||||||
- containerPort: 9091
|
|
||||||
name: metrics
|
|
||||||
readinessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /#/status
|
|
||||||
port: 9091
|
|
||||||
initialDelaySeconds: 10
|
|
||||||
timeoutSeconds: 10
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 50m
|
|
||||||
memory: 100Mi
|
|
||||||
requests:
|
|
||||||
cpu: 50m
|
|
||||||
memory: 100Mi
|
|
|
@ -1,15 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: prometheus-pushgateway
|
|
||||||
name: prometheus-pushgateway
|
|
||||||
namespace: monitoring
|
|
||||||
spec:
|
|
||||||
ports:
|
|
||||||
- name: http
|
|
||||||
port: 9091
|
|
||||||
targetPort: metrics
|
|
||||||
selector:
|
|
||||||
app: prometheus-pushgateway
|
|
||||||
type: ClusterIP
|
|
|
@ -1,15 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: ServiceMonitor
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
prometheus: k8s
|
|
||||||
name: prometheus-pushgateway
|
|
||||||
namespace: monitoring
|
|
||||||
spec:
|
|
||||||
endpoints:
|
|
||||||
- honorLabels: true
|
|
||||||
port: http
|
|
||||||
jobLabel: k8s-app
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: prometheus-pushgateway
|
|
|
@ -1,18 +0,0 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: RoleBinding
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: prometheus
|
|
||||||
app.kubernetes.io/name: prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.26.0
|
|
||||||
name: prometheus-k8s-config
|
|
||||||
namespace: default
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: Role
|
|
||||||
name: prometheus-k8s-config
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: default
|
|
|
@ -1,57 +0,0 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
items:
|
|
||||||
- apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: RoleBinding
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: prometheus
|
|
||||||
app.kubernetes.io/name: prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.26.0
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: default
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: Role
|
|
||||||
name: prometheus-k8s
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: default
|
|
||||||
- apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: RoleBinding
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: prometheus
|
|
||||||
app.kubernetes.io/name: prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.26.0
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: kube-system
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: Role
|
|
||||||
name: prometheus-k8s
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: default
|
|
||||||
- apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: RoleBinding
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: prometheus
|
|
||||||
app.kubernetes.io/name: prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.26.0
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: default
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: Role
|
|
||||||
name: prometheus-k8s
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: default
|
|
||||||
kind: RoleBindingList
|
|
|
@ -1,17 +0,0 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: Role
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: prometheus
|
|
||||||
app.kubernetes.io/name: prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.26.0
|
|
||||||
name: prometheus-k8s-config
|
|
||||||
namespace: default
|
|
||||||
rules:
|
|
||||||
- apiGroups:
|
|
||||||
- ""
|
|
||||||
resources:
|
|
||||||
- configmaps
|
|
||||||
verbs:
|
|
||||||
- get
|
|
|
@ -1,114 +0,0 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
items:
|
|
||||||
- apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: Role
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: prometheus
|
|
||||||
app.kubernetes.io/name: prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.26.0
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: default
|
|
||||||
rules:
|
|
||||||
- apiGroups:
|
|
||||||
- ""
|
|
||||||
resources:
|
|
||||||
- services
|
|
||||||
- endpoints
|
|
||||||
- pods
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- extensions
|
|
||||||
resources:
|
|
||||||
- ingresses
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- networking.k8s.io
|
|
||||||
resources:
|
|
||||||
- ingresses
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: Role
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: prometheus
|
|
||||||
app.kubernetes.io/name: prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.26.0
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: kube-system
|
|
||||||
rules:
|
|
||||||
- apiGroups:
|
|
||||||
- ""
|
|
||||||
resources:
|
|
||||||
- services
|
|
||||||
- endpoints
|
|
||||||
- pods
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- extensions
|
|
||||||
resources:
|
|
||||||
- ingresses
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- networking.k8s.io
|
|
||||||
resources:
|
|
||||||
- ingresses
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: Role
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: prometheus
|
|
||||||
app.kubernetes.io/name: prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.26.0
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: default
|
|
||||||
rules:
|
|
||||||
- apiGroups:
|
|
||||||
- ""
|
|
||||||
resources:
|
|
||||||
- services
|
|
||||||
- endpoints
|
|
||||||
- pods
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- extensions
|
|
||||||
resources:
|
|
||||||
- ingresses
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- networking.k8s.io
|
|
||||||
resources:
|
|
||||||
- ingresses
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
kind: RoleList
|
|
|
@ -1,23 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: prometheus
|
|
||||||
app.kubernetes.io/name: prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.26.0
|
|
||||||
prometheus: k8s
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
ports:
|
|
||||||
- name: web
|
|
||||||
port: 9090
|
|
||||||
targetPort: web
|
|
||||||
selector:
|
|
||||||
app: prometheus
|
|
||||||
app.kubernetes.io/component: prometheus
|
|
||||||
app.kubernetes.io/name: prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
prometheus: k8s
|
|
||||||
sessionAffinity: ClientIP
|
|
|
@ -1,10 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: ServiceAccount
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: prometheus
|
|
||||||
app.kubernetes.io/name: prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.26.0
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: default
|
|
|
@ -1,20 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: ServiceMonitor
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: prometheus
|
|
||||||
app.kubernetes.io/name: prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 2.26.0
|
|
||||||
name: prometheus-k8s
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
endpoints:
|
|
||||||
- interval: 30s
|
|
||||||
port: web
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/component: prometheus
|
|
||||||
app.kubernetes.io/name: prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
prometheus: k8s
|
|
|
@ -1,41 +0,0 @@
|
||||||
spec:
|
|
||||||
endpoints:
|
|
||||||
- interval: 30s
|
|
||||||
metricRelabelings:
|
|
||||||
- action: drop
|
|
||||||
regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: transformation_(transformation_latencies_microseconds|failures_total)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
- action: drop
|
|
||||||
regex: etcd_(debugging|disk|request|server).*
|
|
||||||
sourceLabels:
|
|
||||||
- __name__
|
|
||||||
port: http-metrics
|
|
|
@ -1,4 +0,0 @@
|
||||||
spec:
|
|
||||||
endpoints:
|
|
||||||
- interval: 30s
|
|
||||||
port: http-metrics
|
|
|
@ -1,4 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: Namespace
|
|
||||||
metadata:
|
|
||||||
name: default
|
|
|
@ -1,76 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: PrometheusRule
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
app.kubernetes.io/name: kube-prometheus
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
prometheus: k8s
|
|
||||||
role: alert-rules
|
|
||||||
name: kube-prometheus-rules
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
groups:
|
|
||||||
- name: general.rules
|
|
||||||
rules:
|
|
||||||
- alert: TargetDown
|
|
||||||
annotations:
|
|
||||||
description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service
|
|
||||||
}} targets in {{ $labels.namespace }} namespace are down.'
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/targetdown
|
|
||||||
summary: One or more targets are unreachable.
|
|
||||||
expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job,
|
|
||||||
namespace, service)) > 10
|
|
||||||
for: 10m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: Watchdog
|
|
||||||
annotations:
|
|
||||||
description: |
|
|
||||||
This is an alert meant to ensure that the entire alerting pipeline is functional.
|
|
||||||
This alert is always firing, therefore it should always be firing in Alertmanager
|
|
||||||
and always fire against a receiver. There are integrations with various notification
|
|
||||||
mechanisms that send a notification when this alert is not firing. For example the
|
|
||||||
"DeadMansSnitch" integration in PagerDuty.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/watchdog
|
|
||||||
summary: An alert that should always be firing to certify that Alertmanager
|
|
||||||
is working properly.
|
|
||||||
expr: vector(1)
|
|
||||||
labels:
|
|
||||||
severity: none
|
|
||||||
- name: node-network
|
|
||||||
rules:
|
|
||||||
- alert: NodeNetworkInterfaceFlapping
|
|
||||||
annotations:
|
|
||||||
message: Network interface "{{ $labels.device }}" changing it's up status
|
|
||||||
often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodenetworkinterfaceflapping
|
|
||||||
expr: |
|
|
||||||
changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2
|
|
||||||
for: 2m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- name: kube-prometheus-node-recording.rules
|
|
||||||
rules:
|
|
||||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m]))
|
|
||||||
BY (instance)
|
|
||||||
record: instance:node_cpu:rate:sum
|
|
||||||
- expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
|
|
||||||
record: instance:node_network_receive_bytes:rate:sum
|
|
||||||
- expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
|
|
||||||
record: instance:node_network_transmit_bytes:rate:sum
|
|
||||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
|
|
||||||
WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total)
|
|
||||||
BY (instance, cpu)) BY (instance)
|
|
||||||
record: instance:node_cpu:ratio
|
|
||||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
|
|
||||||
record: cluster:node_cpu:sum_rate5m
|
|
||||||
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total)
|
|
||||||
BY (instance, cpu))
|
|
||||||
record: cluster:node_cpu:ratio
|
|
||||||
- name: kube-prometheus-general.rules
|
|
||||||
rules:
|
|
||||||
- expr: count without(instance, pod, node) (up == 1)
|
|
||||||
record: count:up1
|
|
||||||
- expr: count without(instance, pod, node) (up == 0)
|
|
||||||
record: count:up0
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -1,452 +0,0 @@
|
||||||
apiVersion: apiextensions.k8s.io/v1
|
|
||||||
kind: CustomResourceDefinition
|
|
||||||
metadata:
|
|
||||||
annotations:
|
|
||||||
controller-gen.kubebuilder.io/version: v0.4.1
|
|
||||||
creationTimestamp: null
|
|
||||||
name: podmonitors.monitoring.coreos.com
|
|
||||||
spec:
|
|
||||||
group: monitoring.coreos.com
|
|
||||||
names:
|
|
||||||
categories:
|
|
||||||
- prometheus-operator
|
|
||||||
kind: PodMonitor
|
|
||||||
listKind: PodMonitorList
|
|
||||||
plural: podmonitors
|
|
||||||
singular: podmonitor
|
|
||||||
scope: Namespaced
|
|
||||||
versions:
|
|
||||||
- name: v1
|
|
||||||
schema:
|
|
||||||
openAPIV3Schema:
|
|
||||||
description: PodMonitor defines monitoring for a set of pods.
|
|
||||||
properties:
|
|
||||||
apiVersion:
|
|
||||||
description: 'APIVersion defines the versioned schema of this representation
|
|
||||||
of an object. Servers should convert recognized schemas to the latest
|
|
||||||
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
|
||||||
type: string
|
|
||||||
kind:
|
|
||||||
description: 'Kind is a string value representing the REST resource this
|
|
||||||
object represents. Servers may infer this from the endpoint the client
|
|
||||||
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
|
||||||
type: string
|
|
||||||
metadata:
|
|
||||||
type: object
|
|
||||||
spec:
|
|
||||||
description: Specification of desired Pod selection for target discovery
|
|
||||||
by Prometheus.
|
|
||||||
properties:
|
|
||||||
jobLabel:
|
|
||||||
description: The label to use to retrieve the job name from.
|
|
||||||
type: string
|
|
||||||
namespaceSelector:
|
|
||||||
description: Selector to select which namespaces the Endpoints objects
|
|
||||||
are discovered from.
|
|
||||||
properties:
|
|
||||||
any:
|
|
||||||
description: Boolean describing whether all namespaces are selected
|
|
||||||
in contrast to a list restricting them.
|
|
||||||
type: boolean
|
|
||||||
matchNames:
|
|
||||||
description: List of namespace names.
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
type: object
|
|
||||||
podMetricsEndpoints:
|
|
||||||
description: A list of endpoints allowed as part of this PodMonitor.
|
|
||||||
items:
|
|
||||||
description: PodMetricsEndpoint defines a scrapeable endpoint of
|
|
||||||
a Kubernetes Pod serving Prometheus metrics.
|
|
||||||
properties:
|
|
||||||
basicAuth:
|
|
||||||
description: 'BasicAuth allow an endpoint to authenticate over
|
|
||||||
basic authentication. More info: https://prometheus.io/docs/operating/configuration/#endpoint'
|
|
||||||
properties:
|
|
||||||
password:
|
|
||||||
description: The secret in the service monitor namespace
|
|
||||||
that contains the password for authentication.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key of the secret to select from. Must
|
|
||||||
be a valid secret key.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the Secret or its key must
|
|
||||||
be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
username:
|
|
||||||
description: The secret in the service monitor namespace
|
|
||||||
that contains the username for authentication.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key of the secret to select from. Must
|
|
||||||
be a valid secret key.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the Secret or its key must
|
|
||||||
be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
type: object
|
|
||||||
bearerTokenSecret:
|
|
||||||
description: Secret to mount to read bearer token for scraping
|
|
||||||
targets. The secret needs to be in the same namespace as the
|
|
||||||
pod monitor and accessible by the Prometheus Operator.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key of the secret to select from. Must
|
|
||||||
be a valid secret key.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the Secret or its key must
|
|
||||||
be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
honorLabels:
|
|
||||||
description: HonorLabels chooses the metric's labels on collisions
|
|
||||||
with target labels.
|
|
||||||
type: boolean
|
|
||||||
honorTimestamps:
|
|
||||||
description: HonorTimestamps controls whether Prometheus respects
|
|
||||||
the timestamps present in scraped data.
|
|
||||||
type: boolean
|
|
||||||
interval:
|
|
||||||
description: Interval at which metrics should be scraped
|
|
||||||
type: string
|
|
||||||
metricRelabelings:
|
|
||||||
description: MetricRelabelConfigs to apply to samples before
|
|
||||||
ingestion.
|
|
||||||
items:
|
|
||||||
description: 'RelabelConfig allows dynamic rewriting of the
|
|
||||||
label set, being applied to samples before ingestion. It
|
|
||||||
defines `<metric_relabel_configs>`-section of Prometheus
|
|
||||||
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
|
||||||
properties:
|
|
||||||
action:
|
|
||||||
description: Action to perform based on regex matching.
|
|
||||||
Default is 'replace'
|
|
||||||
type: string
|
|
||||||
modulus:
|
|
||||||
description: Modulus to take of the hash of the source
|
|
||||||
label values.
|
|
||||||
format: int64
|
|
||||||
type: integer
|
|
||||||
regex:
|
|
||||||
description: Regular expression against which the extracted
|
|
||||||
value is matched. Default is '(.*)'
|
|
||||||
type: string
|
|
||||||
replacement:
|
|
||||||
description: Replacement value against which a regex replace
|
|
||||||
is performed if the regular expression matches. Regex
|
|
||||||
capture groups are available. Default is '$1'
|
|
||||||
type: string
|
|
||||||
separator:
|
|
||||||
description: Separator placed between concatenated source
|
|
||||||
label values. default is ';'.
|
|
||||||
type: string
|
|
||||||
sourceLabels:
|
|
||||||
description: The source labels select values from existing
|
|
||||||
labels. Their content is concatenated using the configured
|
|
||||||
separator and matched against the configured regular
|
|
||||||
expression for the replace, keep, and drop actions.
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
targetLabel:
|
|
||||||
description: Label to which the resulting value is written
|
|
||||||
in a replace action. It is mandatory for replace actions.
|
|
||||||
Regex capture groups are available.
|
|
||||||
type: string
|
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
params:
|
|
||||||
additionalProperties:
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
description: Optional HTTP URL parameters
|
|
||||||
type: object
|
|
||||||
path:
|
|
||||||
description: HTTP path to scrape for metrics.
|
|
||||||
type: string
|
|
||||||
port:
|
|
||||||
description: Name of the pod port this endpoint refers to. Mutually
|
|
||||||
exclusive with targetPort.
|
|
||||||
type: string
|
|
||||||
proxyUrl:
|
|
||||||
description: ProxyURL eg http://proxyserver:2195 Directs scrapes
|
|
||||||
to proxy through this endpoint.
|
|
||||||
type: string
|
|
||||||
relabelings:
|
|
||||||
description: 'RelabelConfigs to apply to samples before scraping.
|
|
||||||
Prometheus Operator automatically adds relabelings for a few
|
|
||||||
standard Kubernetes fields and replaces original scrape job
|
|
||||||
name with __tmp_prometheus_job_name. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
|
||||||
items:
|
|
||||||
description: 'RelabelConfig allows dynamic rewriting of the
|
|
||||||
label set, being applied to samples before ingestion. It
|
|
||||||
defines `<metric_relabel_configs>`-section of Prometheus
|
|
||||||
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
|
||||||
properties:
|
|
||||||
action:
|
|
||||||
description: Action to perform based on regex matching.
|
|
||||||
Default is 'replace'
|
|
||||||
type: string
|
|
||||||
modulus:
|
|
||||||
description: Modulus to take of the hash of the source
|
|
||||||
label values.
|
|
||||||
format: int64
|
|
||||||
type: integer
|
|
||||||
regex:
|
|
||||||
description: Regular expression against which the extracted
|
|
||||||
value is matched. Default is '(.*)'
|
|
||||||
type: string
|
|
||||||
replacement:
|
|
||||||
description: Replacement value against which a regex replace
|
|
||||||
is performed if the regular expression matches. Regex
|
|
||||||
capture groups are available. Default is '$1'
|
|
||||||
type: string
|
|
||||||
separator:
|
|
||||||
description: Separator placed between concatenated source
|
|
||||||
label values. default is ';'.
|
|
||||||
type: string
|
|
||||||
sourceLabels:
|
|
||||||
description: The source labels select values from existing
|
|
||||||
labels. Their content is concatenated using the configured
|
|
||||||
separator and matched against the configured regular
|
|
||||||
expression for the replace, keep, and drop actions.
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
targetLabel:
|
|
||||||
description: Label to which the resulting value is written
|
|
||||||
in a replace action. It is mandatory for replace actions.
|
|
||||||
Regex capture groups are available.
|
|
||||||
type: string
|
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
scheme:
|
|
||||||
description: HTTP scheme to use for scraping.
|
|
||||||
type: string
|
|
||||||
scrapeTimeout:
|
|
||||||
description: Timeout after which the scrape is ended
|
|
||||||
type: string
|
|
||||||
targetPort:
|
|
||||||
anyOf:
|
|
||||||
- type: integer
|
|
||||||
- type: string
|
|
||||||
description: 'Deprecated: Use ''port'' instead.'
|
|
||||||
x-kubernetes-int-or-string: true
|
|
||||||
tlsConfig:
|
|
||||||
description: TLS configuration to use when scraping the endpoint.
|
|
||||||
properties:
|
|
||||||
ca:
|
|
||||||
description: Struct containing the CA cert to use for the
|
|
||||||
targets.
|
|
||||||
properties:
|
|
||||||
configMap:
|
|
||||||
description: ConfigMap containing data to use for the
|
|
||||||
targets.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key to select.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind,
|
|
||||||
uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the ConfigMap or its
|
|
||||||
key must be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
secret:
|
|
||||||
description: Secret containing data to use for the targets.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key of the secret to select from. Must
|
|
||||||
be a valid secret key.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind,
|
|
||||||
uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the Secret or its key
|
|
||||||
must be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
type: object
|
|
||||||
cert:
|
|
||||||
description: Struct containing the client cert file for
|
|
||||||
the targets.
|
|
||||||
properties:
|
|
||||||
configMap:
|
|
||||||
description: ConfigMap containing data to use for the
|
|
||||||
targets.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key to select.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind,
|
|
||||||
uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the ConfigMap or its
|
|
||||||
key must be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
secret:
|
|
||||||
description: Secret containing data to use for the targets.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key of the secret to select from. Must
|
|
||||||
be a valid secret key.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind,
|
|
||||||
uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the Secret or its key
|
|
||||||
must be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
type: object
|
|
||||||
insecureSkipVerify:
|
|
||||||
description: Disable target certificate validation.
|
|
||||||
type: boolean
|
|
||||||
keySecret:
|
|
||||||
description: Secret containing the client key file for the
|
|
||||||
targets.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key of the secret to select from. Must
|
|
||||||
be a valid secret key.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the Secret or its key must
|
|
||||||
be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
serverName:
|
|
||||||
description: Used to verify the hostname for the targets.
|
|
||||||
type: string
|
|
||||||
type: object
|
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
podTargetLabels:
|
|
||||||
description: PodTargetLabels transfers labels on the Kubernetes Pod
|
|
||||||
onto the target.
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
sampleLimit:
|
|
||||||
description: SampleLimit defines per-scrape limit on number of scraped
|
|
||||||
samples that will be accepted.
|
|
||||||
format: int64
|
|
||||||
type: integer
|
|
||||||
selector:
|
|
||||||
description: Selector to select Pod objects.
|
|
||||||
properties:
|
|
||||||
matchExpressions:
|
|
||||||
description: matchExpressions is a list of label selector requirements.
|
|
||||||
The requirements are ANDed.
|
|
||||||
items:
|
|
||||||
description: A label selector requirement is a selector that
|
|
||||||
contains values, a key, and an operator that relates the key
|
|
||||||
and values.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: key is the label key that the selector applies
|
|
||||||
to.
|
|
||||||
type: string
|
|
||||||
operator:
|
|
||||||
description: operator represents a key's relationship to
|
|
||||||
a set of values. Valid operators are In, NotIn, Exists
|
|
||||||
and DoesNotExist.
|
|
||||||
type: string
|
|
||||||
values:
|
|
||||||
description: values is an array of string values. If the
|
|
||||||
operator is In or NotIn, the values array must be non-empty.
|
|
||||||
If the operator is Exists or DoesNotExist, the values
|
|
||||||
array must be empty. This array is replaced during a strategic
|
|
||||||
merge patch.
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
- operator
|
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
matchLabels:
|
|
||||||
additionalProperties:
|
|
||||||
type: string
|
|
||||||
description: matchLabels is a map of {key,value} pairs. A single
|
|
||||||
{key,value} in the matchLabels map is equivalent to an element
|
|
||||||
of matchExpressions, whose key field is "key", the operator
|
|
||||||
is "In", and the values array contains only "value". The requirements
|
|
||||||
are ANDed.
|
|
||||||
type: object
|
|
||||||
type: object
|
|
||||||
targetLimit:
|
|
||||||
description: TargetLimit defines a limit on the number of scraped
|
|
||||||
targets that will be accepted.
|
|
||||||
format: int64
|
|
||||||
type: integer
|
|
||||||
required:
|
|
||||||
- podMetricsEndpoints
|
|
||||||
- selector
|
|
||||||
type: object
|
|
||||||
required:
|
|
||||||
- spec
|
|
||||||
type: object
|
|
||||||
served: true
|
|
||||||
storage: true
|
|
||||||
status:
|
|
||||||
acceptedNames:
|
|
||||||
kind: ""
|
|
||||||
plural: ""
|
|
||||||
conditions: []
|
|
||||||
storedVersions: []
|
|
|
@ -1,428 +0,0 @@
|
||||||
apiVersion: apiextensions.k8s.io/v1
|
|
||||||
kind: CustomResourceDefinition
|
|
||||||
metadata:
|
|
||||||
annotations:
|
|
||||||
controller-gen.kubebuilder.io/version: v0.4.1
|
|
||||||
creationTimestamp: null
|
|
||||||
name: probes.monitoring.coreos.com
|
|
||||||
spec:
|
|
||||||
group: monitoring.coreos.com
|
|
||||||
names:
|
|
||||||
categories:
|
|
||||||
- prometheus-operator
|
|
||||||
kind: Probe
|
|
||||||
listKind: ProbeList
|
|
||||||
plural: probes
|
|
||||||
singular: probe
|
|
||||||
scope: Namespaced
|
|
||||||
versions:
|
|
||||||
- name: v1
|
|
||||||
schema:
|
|
||||||
openAPIV3Schema:
|
|
||||||
description: Probe defines monitoring for a set of static targets or ingresses.
|
|
||||||
properties:
|
|
||||||
apiVersion:
|
|
||||||
description: 'APIVersion defines the versioned schema of this representation
|
|
||||||
of an object. Servers should convert recognized schemas to the latest
|
|
||||||
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
|
||||||
type: string
|
|
||||||
kind:
|
|
||||||
description: 'Kind is a string value representing the REST resource this
|
|
||||||
object represents. Servers may infer this from the endpoint the client
|
|
||||||
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
|
||||||
type: string
|
|
||||||
metadata:
|
|
||||||
type: object
|
|
||||||
spec:
|
|
||||||
description: Specification of desired Ingress selection for target discovery
|
|
||||||
by Prometheus.
|
|
||||||
properties:
|
|
||||||
basicAuth:
|
|
||||||
description: 'BasicAuth allow an endpoint to authenticate over basic
|
|
||||||
authentication. More info: https://prometheus.io/docs/operating/configuration/#endpoint'
|
|
||||||
properties:
|
|
||||||
password:
|
|
||||||
description: The secret in the service monitor namespace that
|
|
||||||
contains the password for authentication.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key of the secret to select from. Must be
|
|
||||||
a valid secret key.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the Secret or its key must be
|
|
||||||
defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
username:
|
|
||||||
description: The secret in the service monitor namespace that
|
|
||||||
contains the username for authentication.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key of the secret to select from. Must be
|
|
||||||
a valid secret key.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the Secret or its key must be
|
|
||||||
defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
type: object
|
|
||||||
bearerTokenSecret:
|
|
||||||
description: Secret to mount to read bearer token for scraping targets.
|
|
||||||
The secret needs to be in the same namespace as the probe and accessible
|
|
||||||
by the Prometheus Operator.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key of the secret to select from. Must be a
|
|
||||||
valid secret key.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the Secret or its key must be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
interval:
|
|
||||||
description: Interval at which targets are probed using the configured
|
|
||||||
prober. If not specified Prometheus' global scrape interval is used.
|
|
||||||
type: string
|
|
||||||
jobName:
|
|
||||||
description: The job name assigned to scraped metrics by default.
|
|
||||||
type: string
|
|
||||||
module:
|
|
||||||
description: 'The module to use for probing specifying how to probe
|
|
||||||
the target. Example module configuring in the blackbox exporter:
|
|
||||||
https://github.com/prometheus/blackbox_exporter/blob/master/example.yml'
|
|
||||||
type: string
|
|
||||||
prober:
|
|
||||||
description: Specification for the prober to use for probing targets.
|
|
||||||
The prober.URL parameter is required. Targets cannot be probed if
|
|
||||||
left empty.
|
|
||||||
properties:
|
|
||||||
path:
|
|
||||||
description: Path to collect metrics from. Defaults to `/probe`.
|
|
||||||
type: string
|
|
||||||
scheme:
|
|
||||||
description: HTTP scheme to use for scraping. Defaults to `http`.
|
|
||||||
type: string
|
|
||||||
url:
|
|
||||||
description: Mandatory URL of the prober.
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- url
|
|
||||||
type: object
|
|
||||||
scrapeTimeout:
|
|
||||||
description: Timeout for scraping metrics from the Prometheus exporter.
|
|
||||||
type: string
|
|
||||||
targets:
|
|
||||||
description: Targets defines a set of static and/or dynamically discovered
|
|
||||||
targets to be probed using the prober.
|
|
||||||
properties:
|
|
||||||
ingress:
|
|
||||||
description: Ingress defines the set of dynamically discovered
|
|
||||||
ingress objects which hosts are considered for probing.
|
|
||||||
properties:
|
|
||||||
namespaceSelector:
|
|
||||||
description: Select Ingress objects by namespace.
|
|
||||||
properties:
|
|
||||||
any:
|
|
||||||
description: Boolean describing whether all namespaces
|
|
||||||
are selected in contrast to a list restricting them.
|
|
||||||
type: boolean
|
|
||||||
matchNames:
|
|
||||||
description: List of namespace names.
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
type: object
|
|
||||||
relabelingConfigs:
|
|
||||||
description: 'RelabelConfigs to apply to samples before ingestion.
|
|
||||||
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
|
||||||
items:
|
|
||||||
description: 'RelabelConfig allows dynamic rewriting of
|
|
||||||
the label set, being applied to samples before ingestion.
|
|
||||||
It defines `<metric_relabel_configs>`-section of Prometheus
|
|
||||||
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
|
||||||
properties:
|
|
||||||
action:
|
|
||||||
description: Action to perform based on regex matching.
|
|
||||||
Default is 'replace'
|
|
||||||
type: string
|
|
||||||
modulus:
|
|
||||||
description: Modulus to take of the hash of the source
|
|
||||||
label values.
|
|
||||||
format: int64
|
|
||||||
type: integer
|
|
||||||
regex:
|
|
||||||
description: Regular expression against which the extracted
|
|
||||||
value is matched. Default is '(.*)'
|
|
||||||
type: string
|
|
||||||
replacement:
|
|
||||||
description: Replacement value against which a regex
|
|
||||||
replace is performed if the regular expression matches.
|
|
||||||
Regex capture groups are available. Default is '$1'
|
|
||||||
type: string
|
|
||||||
separator:
|
|
||||||
description: Separator placed between concatenated source
|
|
||||||
label values. default is ';'.
|
|
||||||
type: string
|
|
||||||
sourceLabels:
|
|
||||||
description: The source labels select values from existing
|
|
||||||
labels. Their content is concatenated using the configured
|
|
||||||
separator and matched against the configured regular
|
|
||||||
expression for the replace, keep, and drop actions.
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
targetLabel:
|
|
||||||
description: Label to which the resulting value is written
|
|
||||||
in a replace action. It is mandatory for replace actions.
|
|
||||||
Regex capture groups are available.
|
|
||||||
type: string
|
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
selector:
|
|
||||||
description: Select Ingress objects by labels.
|
|
||||||
properties:
|
|
||||||
matchExpressions:
|
|
||||||
description: matchExpressions is a list of label selector
|
|
||||||
requirements. The requirements are ANDed.
|
|
||||||
items:
|
|
||||||
description: A label selector requirement is a selector
|
|
||||||
that contains values, a key, and an operator that
|
|
||||||
relates the key and values.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: key is the label key that the selector
|
|
||||||
applies to.
|
|
||||||
type: string
|
|
||||||
operator:
|
|
||||||
description: operator represents a key's relationship
|
|
||||||
to a set of values. Valid operators are In, NotIn,
|
|
||||||
Exists and DoesNotExist.
|
|
||||||
type: string
|
|
||||||
values:
|
|
||||||
description: values is an array of string values.
|
|
||||||
If the operator is In or NotIn, the values array
|
|
||||||
must be non-empty. If the operator is Exists or
|
|
||||||
DoesNotExist, the values array must be empty.
|
|
||||||
This array is replaced during a strategic merge
|
|
||||||
patch.
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
- operator
|
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
matchLabels:
|
|
||||||
additionalProperties:
|
|
||||||
type: string
|
|
||||||
description: matchLabels is a map of {key,value} pairs.
|
|
||||||
A single {key,value} in the matchLabels map is equivalent
|
|
||||||
to an element of matchExpressions, whose key field is
|
|
||||||
"key", the operator is "In", and the values array contains
|
|
||||||
only "value". The requirements are ANDed.
|
|
||||||
type: object
|
|
||||||
type: object
|
|
||||||
type: object
|
|
||||||
staticConfig:
|
|
||||||
description: 'StaticConfig defines static targets which are considers
|
|
||||||
for probing. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config.'
|
|
||||||
properties:
|
|
||||||
labels:
|
|
||||||
additionalProperties:
|
|
||||||
type: string
|
|
||||||
description: Labels assigned to all metrics scraped from the
|
|
||||||
targets.
|
|
||||||
type: object
|
|
||||||
relabelingConfigs:
|
|
||||||
description: 'RelabelConfigs to apply to samples before ingestion.
|
|
||||||
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
|
||||||
items:
|
|
||||||
description: 'RelabelConfig allows dynamic rewriting of
|
|
||||||
the label set, being applied to samples before ingestion.
|
|
||||||
It defines `<metric_relabel_configs>`-section of Prometheus
|
|
||||||
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
|
||||||
properties:
|
|
||||||
action:
|
|
||||||
description: Action to perform based on regex matching.
|
|
||||||
Default is 'replace'
|
|
||||||
type: string
|
|
||||||
modulus:
|
|
||||||
description: Modulus to take of the hash of the source
|
|
||||||
label values.
|
|
||||||
format: int64
|
|
||||||
type: integer
|
|
||||||
regex:
|
|
||||||
description: Regular expression against which the extracted
|
|
||||||
value is matched. Default is '(.*)'
|
|
||||||
type: string
|
|
||||||
replacement:
|
|
||||||
description: Replacement value against which a regex
|
|
||||||
replace is performed if the regular expression matches.
|
|
||||||
Regex capture groups are available. Default is '$1'
|
|
||||||
type: string
|
|
||||||
separator:
|
|
||||||
description: Separator placed between concatenated source
|
|
||||||
label values. default is ';'.
|
|
||||||
type: string
|
|
||||||
sourceLabels:
|
|
||||||
description: The source labels select values from existing
|
|
||||||
labels. Their content is concatenated using the configured
|
|
||||||
separator and matched against the configured regular
|
|
||||||
expression for the replace, keep, and drop actions.
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
targetLabel:
|
|
||||||
description: Label to which the resulting value is written
|
|
||||||
in a replace action. It is mandatory for replace actions.
|
|
||||||
Regex capture groups are available.
|
|
||||||
type: string
|
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
static:
|
|
||||||
description: Targets is a list of URLs to probe using the
|
|
||||||
configured prober.
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
type: object
|
|
||||||
type: object
|
|
||||||
tlsConfig:
|
|
||||||
description: TLS configuration to use when scraping the endpoint.
|
|
||||||
properties:
|
|
||||||
ca:
|
|
||||||
description: Struct containing the CA cert to use for the targets.
|
|
||||||
properties:
|
|
||||||
configMap:
|
|
||||||
description: ConfigMap containing data to use for the targets.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key to select.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the ConfigMap or its key
|
|
||||||
must be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
secret:
|
|
||||||
description: Secret containing data to use for the targets.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key of the secret to select from. Must
|
|
||||||
be a valid secret key.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the Secret or its key must
|
|
||||||
be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
type: object
|
|
||||||
cert:
|
|
||||||
description: Struct containing the client cert file for the targets.
|
|
||||||
properties:
|
|
||||||
configMap:
|
|
||||||
description: ConfigMap containing data to use for the targets.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key to select.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the ConfigMap or its key
|
|
||||||
must be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
secret:
|
|
||||||
description: Secret containing data to use for the targets.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key of the secret to select from. Must
|
|
||||||
be a valid secret key.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the Secret or its key must
|
|
||||||
be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
type: object
|
|
||||||
insecureSkipVerify:
|
|
||||||
description: Disable target certificate validation.
|
|
||||||
type: boolean
|
|
||||||
keySecret:
|
|
||||||
description: Secret containing the client key file for the targets.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key of the secret to select from. Must be
|
|
||||||
a valid secret key.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the Secret or its key must be
|
|
||||||
defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
serverName:
|
|
||||||
description: Used to verify the hostname for the targets.
|
|
||||||
type: string
|
|
||||||
type: object
|
|
||||||
type: object
|
|
||||||
required:
|
|
||||||
- spec
|
|
||||||
type: object
|
|
||||||
served: true
|
|
||||||
storage: true
|
|
||||||
status:
|
|
||||||
acceptedNames:
|
|
||||||
kind: ""
|
|
||||||
plural: ""
|
|
||||||
conditions: []
|
|
||||||
storedVersions: []
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,95 +0,0 @@
|
||||||
apiVersion: apiextensions.k8s.io/v1
|
|
||||||
kind: CustomResourceDefinition
|
|
||||||
metadata:
|
|
||||||
annotations:
|
|
||||||
controller-gen.kubebuilder.io/version: v0.4.1
|
|
||||||
creationTimestamp: null
|
|
||||||
name: prometheusrules.monitoring.coreos.com
|
|
||||||
spec:
|
|
||||||
group: monitoring.coreos.com
|
|
||||||
names:
|
|
||||||
kind: PrometheusRule
|
|
||||||
listKind: PrometheusRuleList
|
|
||||||
plural: prometheusrules
|
|
||||||
singular: prometheusrule
|
|
||||||
scope: Namespaced
|
|
||||||
versions:
|
|
||||||
- name: v1
|
|
||||||
schema:
|
|
||||||
openAPIV3Schema:
|
|
||||||
description: PrometheusRule defines recording and alerting rules for a Prometheus
|
|
||||||
instance
|
|
||||||
properties:
|
|
||||||
apiVersion:
|
|
||||||
description: 'APIVersion defines the versioned schema of this representation
|
|
||||||
of an object. Servers should convert recognized schemas to the latest
|
|
||||||
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
|
||||||
type: string
|
|
||||||
kind:
|
|
||||||
description: 'Kind is a string value representing the REST resource this
|
|
||||||
object represents. Servers may infer this from the endpoint the client
|
|
||||||
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
|
||||||
type: string
|
|
||||||
metadata:
|
|
||||||
type: object
|
|
||||||
spec:
|
|
||||||
description: Specification of desired alerting rule definitions for Prometheus.
|
|
||||||
properties:
|
|
||||||
groups:
|
|
||||||
description: Content of Prometheus rule file
|
|
||||||
items:
|
|
||||||
description: 'RuleGroup is a list of sequentially evaluated recording
|
|
||||||
and alerting rules. Note: PartialResponseStrategy is only used
|
|
||||||
by ThanosRuler and will be ignored by Prometheus instances. Valid
|
|
||||||
values for this field are ''warn'' or ''abort''. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response'
|
|
||||||
properties:
|
|
||||||
interval:
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
type: string
|
|
||||||
partial_response_strategy:
|
|
||||||
type: string
|
|
||||||
rules:
|
|
||||||
items:
|
|
||||||
description: Rule describes an alerting or recording rule.
|
|
||||||
properties:
|
|
||||||
alert:
|
|
||||||
type: string
|
|
||||||
annotations:
|
|
||||||
additionalProperties:
|
|
||||||
type: string
|
|
||||||
type: object
|
|
||||||
expr:
|
|
||||||
anyOf:
|
|
||||||
- type: integer
|
|
||||||
- type: string
|
|
||||||
x-kubernetes-int-or-string: true
|
|
||||||
for:
|
|
||||||
type: string
|
|
||||||
labels:
|
|
||||||
additionalProperties:
|
|
||||||
type: string
|
|
||||||
type: object
|
|
||||||
record:
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- expr
|
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
required:
|
|
||||||
- name
|
|
||||||
- rules
|
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
type: object
|
|
||||||
required:
|
|
||||||
- spec
|
|
||||||
type: object
|
|
||||||
served: true
|
|
||||||
storage: true
|
|
||||||
status:
|
|
||||||
acceptedNames:
|
|
||||||
kind: ""
|
|
||||||
plural: ""
|
|
||||||
conditions: []
|
|
||||||
storedVersions: []
|
|
|
@ -1,475 +0,0 @@
|
||||||
apiVersion: apiextensions.k8s.io/v1
|
|
||||||
kind: CustomResourceDefinition
|
|
||||||
metadata:
|
|
||||||
annotations:
|
|
||||||
controller-gen.kubebuilder.io/version: v0.4.1
|
|
||||||
creationTimestamp: null
|
|
||||||
name: servicemonitors.monitoring.coreos.com
|
|
||||||
spec:
|
|
||||||
group: monitoring.coreos.com
|
|
||||||
names:
|
|
||||||
categories:
|
|
||||||
- prometheus-operator
|
|
||||||
kind: ServiceMonitor
|
|
||||||
listKind: ServiceMonitorList
|
|
||||||
plural: servicemonitors
|
|
||||||
singular: servicemonitor
|
|
||||||
scope: Namespaced
|
|
||||||
versions:
|
|
||||||
- name: v1
|
|
||||||
schema:
|
|
||||||
openAPIV3Schema:
|
|
||||||
description: ServiceMonitor defines monitoring for a set of services.
|
|
||||||
properties:
|
|
||||||
apiVersion:
|
|
||||||
description: 'APIVersion defines the versioned schema of this representation
|
|
||||||
of an object. Servers should convert recognized schemas to the latest
|
|
||||||
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
|
||||||
type: string
|
|
||||||
kind:
|
|
||||||
description: 'Kind is a string value representing the REST resource this
|
|
||||||
object represents. Servers may infer this from the endpoint the client
|
|
||||||
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
|
||||||
type: string
|
|
||||||
metadata:
|
|
||||||
type: object
|
|
||||||
spec:
|
|
||||||
description: Specification of desired Service selection for target discovery
|
|
||||||
by Prometheus.
|
|
||||||
properties:
|
|
||||||
endpoints:
|
|
||||||
description: A list of endpoints allowed as part of this ServiceMonitor.
|
|
||||||
items:
|
|
||||||
description: Endpoint defines a scrapeable endpoint serving Prometheus
|
|
||||||
metrics.
|
|
||||||
properties:
|
|
||||||
basicAuth:
|
|
||||||
description: 'BasicAuth allow an endpoint to authenticate over
|
|
||||||
basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints'
|
|
||||||
properties:
|
|
||||||
password:
|
|
||||||
description: The secret in the service monitor namespace
|
|
||||||
that contains the password for authentication.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key of the secret to select from. Must
|
|
||||||
be a valid secret key.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the Secret or its key must
|
|
||||||
be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
username:
|
|
||||||
description: The secret in the service monitor namespace
|
|
||||||
that contains the username for authentication.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key of the secret to select from. Must
|
|
||||||
be a valid secret key.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the Secret or its key must
|
|
||||||
be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
type: object
|
|
||||||
bearerTokenFile:
|
|
||||||
description: File to read bearer token for scraping targets.
|
|
||||||
type: string
|
|
||||||
bearerTokenSecret:
|
|
||||||
description: Secret to mount to read bearer token for scraping
|
|
||||||
targets. The secret needs to be in the same namespace as the
|
|
||||||
service monitor and accessible by the Prometheus Operator.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key of the secret to select from. Must
|
|
||||||
be a valid secret key.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the Secret or its key must
|
|
||||||
be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
honorLabels:
|
|
||||||
description: HonorLabels chooses the metric's labels on collisions
|
|
||||||
with target labels.
|
|
||||||
type: boolean
|
|
||||||
honorTimestamps:
|
|
||||||
description: HonorTimestamps controls whether Prometheus respects
|
|
||||||
the timestamps present in scraped data.
|
|
||||||
type: boolean
|
|
||||||
interval:
|
|
||||||
description: Interval at which metrics should be scraped
|
|
||||||
type: string
|
|
||||||
metricRelabelings:
|
|
||||||
description: MetricRelabelConfigs to apply to samples before
|
|
||||||
ingestion.
|
|
||||||
items:
|
|
||||||
description: 'RelabelConfig allows dynamic rewriting of the
|
|
||||||
label set, being applied to samples before ingestion. It
|
|
||||||
defines `<metric_relabel_configs>`-section of Prometheus
|
|
||||||
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
|
||||||
properties:
|
|
||||||
action:
|
|
||||||
description: Action to perform based on regex matching.
|
|
||||||
Default is 'replace'
|
|
||||||
type: string
|
|
||||||
modulus:
|
|
||||||
description: Modulus to take of the hash of the source
|
|
||||||
label values.
|
|
||||||
format: int64
|
|
||||||
type: integer
|
|
||||||
regex:
|
|
||||||
description: Regular expression against which the extracted
|
|
||||||
value is matched. Default is '(.*)'
|
|
||||||
type: string
|
|
||||||
replacement:
|
|
||||||
description: Replacement value against which a regex replace
|
|
||||||
is performed if the regular expression matches. Regex
|
|
||||||
capture groups are available. Default is '$1'
|
|
||||||
type: string
|
|
||||||
separator:
|
|
||||||
description: Separator placed between concatenated source
|
|
||||||
label values. default is ';'.
|
|
||||||
type: string
|
|
||||||
sourceLabels:
|
|
||||||
description: The source labels select values from existing
|
|
||||||
labels. Their content is concatenated using the configured
|
|
||||||
separator and matched against the configured regular
|
|
||||||
expression for the replace, keep, and drop actions.
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
targetLabel:
|
|
||||||
description: Label to which the resulting value is written
|
|
||||||
in a replace action. It is mandatory for replace actions.
|
|
||||||
Regex capture groups are available.
|
|
||||||
type: string
|
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
params:
|
|
||||||
additionalProperties:
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
description: Optional HTTP URL parameters
|
|
||||||
type: object
|
|
||||||
path:
|
|
||||||
description: HTTP path to scrape for metrics.
|
|
||||||
type: string
|
|
||||||
port:
|
|
||||||
description: Name of the service port this endpoint refers to.
|
|
||||||
Mutually exclusive with targetPort.
|
|
||||||
type: string
|
|
||||||
proxyUrl:
|
|
||||||
description: ProxyURL eg http://proxyserver:2195 Directs scrapes
|
|
||||||
to proxy through this endpoint.
|
|
||||||
type: string
|
|
||||||
relabelings:
|
|
||||||
description: 'RelabelConfigs to apply to samples before scraping.
|
|
||||||
Prometheus Operator automatically adds relabelings for a few
|
|
||||||
standard Kubernetes fields and replaces original scrape job
|
|
||||||
name with __tmp_prometheus_job_name. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
|
||||||
items:
|
|
||||||
description: 'RelabelConfig allows dynamic rewriting of the
|
|
||||||
label set, being applied to samples before ingestion. It
|
|
||||||
defines `<metric_relabel_configs>`-section of Prometheus
|
|
||||||
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
|
||||||
properties:
|
|
||||||
action:
|
|
||||||
description: Action to perform based on regex matching.
|
|
||||||
Default is 'replace'
|
|
||||||
type: string
|
|
||||||
modulus:
|
|
||||||
description: Modulus to take of the hash of the source
|
|
||||||
label values.
|
|
||||||
format: int64
|
|
||||||
type: integer
|
|
||||||
regex:
|
|
||||||
description: Regular expression against which the extracted
|
|
||||||
value is matched. Default is '(.*)'
|
|
||||||
type: string
|
|
||||||
replacement:
|
|
||||||
description: Replacement value against which a regex replace
|
|
||||||
is performed if the regular expression matches. Regex
|
|
||||||
capture groups are available. Default is '$1'
|
|
||||||
type: string
|
|
||||||
separator:
|
|
||||||
description: Separator placed between concatenated source
|
|
||||||
label values. default is ';'.
|
|
||||||
type: string
|
|
||||||
sourceLabels:
|
|
||||||
description: The source labels select values from existing
|
|
||||||
labels. Their content is concatenated using the configured
|
|
||||||
separator and matched against the configured regular
|
|
||||||
expression for the replace, keep, and drop actions.
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
targetLabel:
|
|
||||||
description: Label to which the resulting value is written
|
|
||||||
in a replace action. It is mandatory for replace actions.
|
|
||||||
Regex capture groups are available.
|
|
||||||
type: string
|
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
scheme:
|
|
||||||
description: HTTP scheme to use for scraping.
|
|
||||||
type: string
|
|
||||||
scrapeTimeout:
|
|
||||||
description: Timeout after which the scrape is ended
|
|
||||||
type: string
|
|
||||||
targetPort:
|
|
||||||
anyOf:
|
|
||||||
- type: integer
|
|
||||||
- type: string
|
|
||||||
description: Name or number of the target port of the Pod behind
|
|
||||||
the Service, the port must be specified with container port
|
|
||||||
property. Mutually exclusive with port.
|
|
||||||
x-kubernetes-int-or-string: true
|
|
||||||
tlsConfig:
|
|
||||||
description: TLS configuration to use when scraping the endpoint
|
|
||||||
properties:
|
|
||||||
ca:
|
|
||||||
description: Struct containing the CA cert to use for the
|
|
||||||
targets.
|
|
||||||
properties:
|
|
||||||
configMap:
|
|
||||||
description: ConfigMap containing data to use for the
|
|
||||||
targets.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key to select.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind,
|
|
||||||
uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the ConfigMap or its
|
|
||||||
key must be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
secret:
|
|
||||||
description: Secret containing data to use for the targets.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key of the secret to select from. Must
|
|
||||||
be a valid secret key.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind,
|
|
||||||
uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the Secret or its key
|
|
||||||
must be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
type: object
|
|
||||||
caFile:
|
|
||||||
description: Path to the CA cert in the Prometheus container
|
|
||||||
to use for the targets.
|
|
||||||
type: string
|
|
||||||
cert:
|
|
||||||
description: Struct containing the client cert file for
|
|
||||||
the targets.
|
|
||||||
properties:
|
|
||||||
configMap:
|
|
||||||
description: ConfigMap containing data to use for the
|
|
||||||
targets.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key to select.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind,
|
|
||||||
uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the ConfigMap or its
|
|
||||||
key must be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
secret:
|
|
||||||
description: Secret containing data to use for the targets.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key of the secret to select from. Must
|
|
||||||
be a valid secret key.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind,
|
|
||||||
uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the Secret or its key
|
|
||||||
must be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
type: object
|
|
||||||
certFile:
|
|
||||||
description: Path to the client cert file in the Prometheus
|
|
||||||
container for the targets.
|
|
||||||
type: string
|
|
||||||
insecureSkipVerify:
|
|
||||||
description: Disable target certificate validation.
|
|
||||||
type: boolean
|
|
||||||
keyFile:
|
|
||||||
description: Path to the client key file in the Prometheus
|
|
||||||
container for the targets.
|
|
||||||
type: string
|
|
||||||
keySecret:
|
|
||||||
description: Secret containing the client key file for the
|
|
||||||
targets.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: The key of the secret to select from. Must
|
|
||||||
be a valid secret key.
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
|
||||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
|
||||||
type: string
|
|
||||||
optional:
|
|
||||||
description: Specify whether the Secret or its key must
|
|
||||||
be defined
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
type: object
|
|
||||||
serverName:
|
|
||||||
description: Used to verify the hostname for the targets.
|
|
||||||
type: string
|
|
||||||
type: object
|
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
jobLabel:
|
|
||||||
description: The label to use to retrieve the job name from.
|
|
||||||
type: string
|
|
||||||
namespaceSelector:
|
|
||||||
description: Selector to select which namespaces the Endpoints objects
|
|
||||||
are discovered from.
|
|
||||||
properties:
|
|
||||||
any:
|
|
||||||
description: Boolean describing whether all namespaces are selected
|
|
||||||
in contrast to a list restricting them.
|
|
||||||
type: boolean
|
|
||||||
matchNames:
|
|
||||||
description: List of namespace names.
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
type: object
|
|
||||||
podTargetLabels:
|
|
||||||
description: PodTargetLabels transfers labels on the Kubernetes Pod
|
|
||||||
onto the target.
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
sampleLimit:
|
|
||||||
description: SampleLimit defines per-scrape limit on number of scraped
|
|
||||||
samples that will be accepted.
|
|
||||||
format: int64
|
|
||||||
type: integer
|
|
||||||
selector:
|
|
||||||
description: Selector to select Endpoints objects.
|
|
||||||
properties:
|
|
||||||
matchExpressions:
|
|
||||||
description: matchExpressions is a list of label selector requirements.
|
|
||||||
The requirements are ANDed.
|
|
||||||
items:
|
|
||||||
description: A label selector requirement is a selector that
|
|
||||||
contains values, a key, and an operator that relates the key
|
|
||||||
and values.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
description: key is the label key that the selector applies
|
|
||||||
to.
|
|
||||||
type: string
|
|
||||||
operator:
|
|
||||||
description: operator represents a key's relationship to
|
|
||||||
a set of values. Valid operators are In, NotIn, Exists
|
|
||||||
and DoesNotExist.
|
|
||||||
type: string
|
|
||||||
values:
|
|
||||||
description: values is an array of string values. If the
|
|
||||||
operator is In or NotIn, the values array must be non-empty.
|
|
||||||
If the operator is Exists or DoesNotExist, the values
|
|
||||||
array must be empty. This array is replaced during a strategic
|
|
||||||
merge patch.
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
- operator
|
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
matchLabels:
|
|
||||||
additionalProperties:
|
|
||||||
type: string
|
|
||||||
description: matchLabels is a map of {key,value} pairs. A single
|
|
||||||
{key,value} in the matchLabels map is equivalent to an element
|
|
||||||
of matchExpressions, whose key field is "key", the operator
|
|
||||||
is "In", and the values array contains only "value". The requirements
|
|
||||||
are ANDed.
|
|
||||||
type: object
|
|
||||||
type: object
|
|
||||||
targetLabels:
|
|
||||||
description: TargetLabels transfers labels on the Kubernetes Service
|
|
||||||
onto the target.
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
targetLimit:
|
|
||||||
description: TargetLimit defines a limit on the number of scraped
|
|
||||||
targets that will be accepted.
|
|
||||||
format: int64
|
|
||||||
type: integer
|
|
||||||
required:
|
|
||||||
- endpoints
|
|
||||||
- selector
|
|
||||||
type: object
|
|
||||||
required:
|
|
||||||
- spec
|
|
||||||
type: object
|
|
||||||
served: true
|
|
||||||
storage: true
|
|
||||||
status:
|
|
||||||
acceptedNames:
|
|
||||||
kind: ""
|
|
||||||
plural: ""
|
|
||||||
conditions: []
|
|
||||||
storedVersions: []
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,92 +0,0 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRole
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: controller
|
|
||||||
app.kubernetes.io/name: prometheus-operator
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.47.0
|
|
||||||
name: prometheus-operator
|
|
||||||
rules:
|
|
||||||
- apiGroups:
|
|
||||||
- monitoring.coreos.com
|
|
||||||
resources:
|
|
||||||
- alertmanagers
|
|
||||||
- alertmanagers/finalizers
|
|
||||||
- alertmanagerconfigs
|
|
||||||
- prometheuses
|
|
||||||
- prometheuses/finalizers
|
|
||||||
- thanosrulers
|
|
||||||
- thanosrulers/finalizers
|
|
||||||
- servicemonitors
|
|
||||||
- podmonitors
|
|
||||||
- probes
|
|
||||||
- prometheusrules
|
|
||||||
verbs:
|
|
||||||
- '*'
|
|
||||||
- apiGroups:
|
|
||||||
- apps
|
|
||||||
resources:
|
|
||||||
- statefulsets
|
|
||||||
verbs:
|
|
||||||
- '*'
|
|
||||||
- apiGroups:
|
|
||||||
- ""
|
|
||||||
resources:
|
|
||||||
- configmaps
|
|
||||||
- secrets
|
|
||||||
verbs:
|
|
||||||
- '*'
|
|
||||||
- apiGroups:
|
|
||||||
- ""
|
|
||||||
resources:
|
|
||||||
- pods
|
|
||||||
verbs:
|
|
||||||
- list
|
|
||||||
- delete
|
|
||||||
- apiGroups:
|
|
||||||
- ""
|
|
||||||
resources:
|
|
||||||
- services
|
|
||||||
- services/finalizers
|
|
||||||
- endpoints
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
- create
|
|
||||||
- update
|
|
||||||
- delete
|
|
||||||
- apiGroups:
|
|
||||||
- ""
|
|
||||||
resources:
|
|
||||||
- nodes
|
|
||||||
verbs:
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- ""
|
|
||||||
resources:
|
|
||||||
- namespaces
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- networking.k8s.io
|
|
||||||
resources:
|
|
||||||
- ingresses
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- authentication.k8s.io
|
|
||||||
resources:
|
|
||||||
- tokenreviews
|
|
||||||
verbs:
|
|
||||||
- create
|
|
||||||
- apiGroups:
|
|
||||||
- authorization.k8s.io
|
|
||||||
resources:
|
|
||||||
- subjectaccessreviews
|
|
||||||
verbs:
|
|
||||||
- create
|
|
|
@ -1,17 +0,0 @@
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRoleBinding
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: controller
|
|
||||||
app.kubernetes.io/name: prometheus-operator
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.47.0
|
|
||||||
name: prometheus-operator
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: ClusterRole
|
|
||||||
name: prometheus-operator
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: prometheus-operator
|
|
||||||
namespace: default
|
|
|
@ -1,70 +0,0 @@
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: controller
|
|
||||||
app.kubernetes.io/name: prometheus-operator
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.47.0
|
|
||||||
name: prometheus-operator
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/component: controller
|
|
||||||
app.kubernetes.io/name: prometheus-operator
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: controller
|
|
||||||
app.kubernetes.io/name: prometheus-operator
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.47.0
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- args:
|
|
||||||
- --kubelet-service=kube-system/kubelet
|
|
||||||
- --prometheus-config-reloader=quay.io/prometheus-operator/prometheus-config-reloader:v0.47.0
|
|
||||||
image: quay.io/prometheus-operator/prometheus-operator:v0.47.0
|
|
||||||
name: prometheus-operator
|
|
||||||
ports:
|
|
||||||
- containerPort: 8080
|
|
||||||
name: http
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 200m
|
|
||||||
memory: 200Mi
|
|
||||||
requests:
|
|
||||||
cpu: 100m
|
|
||||||
memory: 100Mi
|
|
||||||
securityContext:
|
|
||||||
allowPrivilegeEscalation: false
|
|
||||||
- args:
|
|
||||||
- --logtostderr
|
|
||||||
- --secure-listen-address=:8443
|
|
||||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
|
|
||||||
- --upstream=http://127.0.0.1:8080/
|
|
||||||
image: quay.io/brancz/kube-rbac-proxy:v0.8.0
|
|
||||||
name: kube-rbac-proxy
|
|
||||||
ports:
|
|
||||||
- containerPort: 8443
|
|
||||||
name: https
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 20m
|
|
||||||
memory: 40Mi
|
|
||||||
requests:
|
|
||||||
cpu: 10m
|
|
||||||
memory: 20Mi
|
|
||||||
securityContext:
|
|
||||||
runAsGroup: 65532
|
|
||||||
runAsNonRoot: true
|
|
||||||
runAsUser: 65532
|
|
||||||
nodeSelector:
|
|
||||||
kubernetes.io/os: linux
|
|
||||||
securityContext:
|
|
||||||
runAsNonRoot: true
|
|
||||||
runAsUser: 65534
|
|
||||||
serviceAccountName: prometheus-operator
|
|
|
@ -1,95 +0,0 @@
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: PrometheusRule
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: controller
|
|
||||||
app.kubernetes.io/name: prometheus-operator
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.47.0
|
|
||||||
prometheus: k8s
|
|
||||||
role: alert-rules
|
|
||||||
name: prometheus-operator-rules
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
groups:
|
|
||||||
- name: prometheus-operator
|
|
||||||
rules:
|
|
||||||
- alert: PrometheusOperatorListErrors
|
|
||||||
annotations:
|
|
||||||
description: Errors while performing List operations in controller {{$labels.controller}}
|
|
||||||
in {{$labels.namespace}} namespace.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorlisterrors
|
|
||||||
summary: Errors while performing list operations in controller.
|
|
||||||
expr: |
|
|
||||||
(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-operator",namespace="default"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-operator",namespace="default"}[10m]))) > 0.4
|
|
||||||
for: 15m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: PrometheusOperatorWatchErrors
|
|
||||||
annotations:
|
|
||||||
description: Errors while performing watch operations in controller {{$labels.controller}}
|
|
||||||
in {{$labels.namespace}} namespace.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorwatcherrors
|
|
||||||
summary: Errors while performing watch operations in controller.
|
|
||||||
expr: |
|
|
||||||
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="default"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="default"}[10m]))) > 0.4
|
|
||||||
for: 15m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: PrometheusOperatorSyncFailed
|
|
||||||
annotations:
|
|
||||||
description: Controller {{ $labels.controller }} in {{ $labels.namespace }}
|
|
||||||
namespace fails to reconcile {{ $value }} objects.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorsyncfailed
|
|
||||||
summary: Last controller reconciliation failed
|
|
||||||
expr: |
|
|
||||||
min_over_time(prometheus_operator_syncs{status="failed",job="prometheus-operator",namespace="default"}[5m]) > 0
|
|
||||||
for: 10m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: PrometheusOperatorReconcileErrors
|
|
||||||
annotations:
|
|
||||||
description: '{{ $value | humanizePercentage }} of reconciling operations
|
|
||||||
failed for {{ $labels.controller }} controller in {{ $labels.namespace }}
|
|
||||||
namespace.'
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorreconcileerrors
|
|
||||||
summary: Errors while reconciling controller.
|
|
||||||
expr: |
|
|
||||||
(sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="default"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator",namespace="default"}[5m]))) > 0.1
|
|
||||||
for: 10m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: PrometheusOperatorNodeLookupErrors
|
|
||||||
annotations:
|
|
||||||
description: Errors while reconciling Prometheus in {{ $labels.namespace }}
|
|
||||||
Namespace.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatornodelookuperrors
|
|
||||||
summary: Errors while reconciling Prometheus.
|
|
||||||
expr: |
|
|
||||||
rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="default"}[5m]) > 0.1
|
|
||||||
for: 10m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: PrometheusOperatorNotReady
|
|
||||||
annotations:
|
|
||||||
description: Prometheus operator in {{ $labels.namespace }} namespace isn't
|
|
||||||
ready to reconcile {{ $labels.controller }} resources.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatornotready
|
|
||||||
summary: Prometheus operator not ready
|
|
||||||
expr: |
|
|
||||||
min by(namespace, controller) (max_over_time(prometheus_operator_ready{job="prometheus-operator",namespace="default"}[5m]) == 0)
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: PrometheusOperatorRejectedResources
|
|
||||||
annotations:
|
|
||||||
description: Prometheus operator in {{ $labels.namespace }} namespace rejected
|
|
||||||
{{ printf "%0.0f" $value }} {{ $labels.controller }}/{{ $labels.resource
|
|
||||||
}} resources.
|
|
||||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorrejectedresources
|
|
||||||
summary: Resources rejected by Prometheus operator
|
|
||||||
expr: |
|
|
||||||
min_over_time(prometheus_operator_managed_resources{state="rejected",job="prometheus-operator",namespace="default"}[5m]) > 0
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
|
@ -1,20 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: controller
|
|
||||||
app.kubernetes.io/name: prometheus-operator
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.47.0
|
|
||||||
name: prometheus-operator
|
|
||||||
namespace: default
|
|
||||||
spec:
|
|
||||||
clusterIP: None
|
|
||||||
ports:
|
|
||||||
- name: https
|
|
||||||
port: 8443
|
|
||||||
targetPort: https
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/component: controller
|
|
||||||
app.kubernetes.io/name: prometheus-operator
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
|
@ -1,10 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: ServiceAccount
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/component: controller
|
|
||||||
app.kubernetes.io/name: prometheus-operator
|
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
|
||||||
app.kubernetes.io/version: 0.47.0
|
|
||||||
name: prometheus-operator
|
|
||||||
namespace: default
|
|
|
@ -1,17 +0,0 @@
|
||||||
---
|
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: ServiceMonitor
|
|
||||||
metadata:
|
|
||||||
name: traefik
|
|
||||||
namespace: monitoring
|
|
||||||
spec:
|
|
||||||
endpoints:
|
|
||||||
- interval: 30s
|
|
||||||
path: /metrics
|
|
||||||
port: metrics
|
|
||||||
namespaceSelector:
|
|
||||||
matchNames:
|
|
||||||
- kube-system
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: traefik
|
|
|
@ -1,109 +0,0 @@
|
||||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
|
||||||
local pvc = k.core.v1.persistentVolumeClaim;
|
|
||||||
local ingress = k.extensions.v1beta1.ingress;
|
|
||||||
local ingressTls = ingress.mixin.spec.tlsType;
|
|
||||||
local ingressRule = ingress.mixin.spec.rulesType;
|
|
||||||
local httpIngressPath = ingressRule.mixin.http.pathsType;
|
|
||||||
local statefulSet = k.apps.v1.statefulSet;
|
|
||||||
local selector = statefulSet.mixin.spec.selectorType;
|
|
||||||
|
|
||||||
local kp =
|
|
||||||
(import 'kube-prometheus/main.libsonnet') +
|
|
||||||
(import 'prometheus-pushgateway/pushgateway.libsonnet') +
|
|
||||||
(import 'k3s.libsonnet')
|
|
||||||
|
|
||||||
{
|
|
||||||
_config+:: {
|
|
||||||
namespace: 'monitoring',
|
|
||||||
versions+:: {
|
|
||||||
pushgateway: 'v1.1.0',
|
|
||||||
},
|
|
||||||
prometheus+:: {
|
|
||||||
names: 'k8s',
|
|
||||||
replicas: 1,
|
|
||||||
namespaces+: ['k8up', 'owntracks'],
|
|
||||||
},
|
|
||||||
alertmanager+:: {
|
|
||||||
replicas: 1,
|
|
||||||
},
|
|
||||||
grafana+: {
|
|
||||||
plugins: ['grafana-piechart-panel'],
|
|
||||||
datasources+: [{
|
|
||||||
name: 'Loki',
|
|
||||||
type: 'loki',
|
|
||||||
access: 'proxy',
|
|
||||||
orgId: 1,
|
|
||||||
url: 'http://loki.loki:3100',
|
|
||||||
version: 1,
|
|
||||||
editable: false,
|
|
||||||
}],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
alertmanager+:: {
|
|
||||||
alertmanager+: {
|
|
||||||
spec+: {
|
|
||||||
configSecret: 'alertmanager-tbrnt-config',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
prometheus+:: {
|
|
||||||
prometheus+: {
|
|
||||||
spec+: {
|
|
||||||
retention: '7d',
|
|
||||||
externalUrl: 'http://prometheus-k8s.monitoring:9090',
|
|
||||||
serviceMonitorNamespaceSelector: selector.withMatchExpressions({ key: 'prometheus', operator: 'In', values: ['yes', 'true'] }),
|
|
||||||
podMonitorNamespaceSelector: selector.withMatchExpressions({ key: 'prometheus', operator: 'In', values: ['yes', 'true'] }),
|
|
||||||
storage: {
|
|
||||||
volumeClaimTemplate:
|
|
||||||
pvc.new() +
|
|
||||||
pvc.mixin.spec.withAccessModes('ReadWriteOnce') +
|
|
||||||
pvc.mixin.spec.resources.withRequests({ storage: '10Gi' }) +
|
|
||||||
pvc.mixin.spec.withStorageClassName('local-path'),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
ingress+:: {
|
|
||||||
grafana:
|
|
||||||
ingress.new() +
|
|
||||||
ingress.mixin.metadata.withName('grafana') +
|
|
||||||
ingress.mixin.metadata.withNamespace($._config.namespace) +
|
|
||||||
ingress.mixin.metadata.withAnnotations({
|
|
||||||
'cert-manager.io/cluster-issuer': 'letsencrypt-prod',
|
|
||||||
'ingress.kubernetes.io/ssl-redirect': 'true',
|
|
||||||
}) +
|
|
||||||
ingress.mixin.spec.withRules(
|
|
||||||
ingressRule.new() +
|
|
||||||
ingressRule.withHost('grafana.knurrli.tbrnt.ch') +
|
|
||||||
ingressRule.mixin.http.withPaths(
|
|
||||||
httpIngressPath.new() +
|
|
||||||
httpIngressPath.mixin.backend.withServiceName('grafana') +
|
|
||||||
httpIngressPath.mixin.backend.withServicePort('http')
|
|
||||||
),
|
|
||||||
) +
|
|
||||||
ingress.mixin.spec.withTls(
|
|
||||||
ingressTls.new() +
|
|
||||||
ingressTls.withHosts('grafana.knurrli.tbrnt.ch') +
|
|
||||||
ingressTls.withSecretName('grafana-ingress-cert')
|
|
||||||
),
|
|
||||||
},
|
|
||||||
grafanaDashboards+:: {
|
|
||||||
'traefik.json': (import 'traefik-grafana-dashboard.json'),
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
{ ['setup/0namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
|
||||||
{
|
|
||||||
['setup/prometheus-operator-' + name]: kp.prometheusOperator[name]
|
|
||||||
for name in std.filter((function(name) name != 'serviceMonitor'), std.objectFields(kp.prometheusOperator))
|
|
||||||
} +
|
|
||||||
// serviceMonitor is separated so that it can be created after the CRDs are ready
|
|
||||||
{ 'prometheus-operator-serviceMonitor': kp.prometheusOperator.serviceMonitor } +
|
|
||||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
|
||||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
|
||||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
|
||||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
|
||||||
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
|
|
||||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } +
|
|
||||||
{ ['prometheus-pushgateway-' + name]: kp.pushgateway[name] for name in std.objectFields(kp.pushgateway) } +
|
|
||||||
{ ['ingress-' + name]: kp.ingress[name] for name in std.objectFields(kp.ingress) }
|
|
|
@ -1,720 +0,0 @@
|
||||||
{
|
|
||||||
"__inputs": [
|
|
||||||
{
|
|
||||||
"name": "DS_PROMETHEUS",
|
|
||||||
"label": "Prometheus",
|
|
||||||
"description": "",
|
|
||||||
"type": "datasource",
|
|
||||||
"pluginId": "prometheus",
|
|
||||||
"pluginName": "Prometheus"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"__requires": [
|
|
||||||
{
|
|
||||||
"type": "grafana",
|
|
||||||
"id": "grafana",
|
|
||||||
"name": "Grafana",
|
|
||||||
"version": "5.2.4"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "panel",
|
|
||||||
"id": "grafana-piechart-panel",
|
|
||||||
"name": "Pie Chart",
|
|
||||||
"version": "1.1.6"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "panel",
|
|
||||||
"id": "graph",
|
|
||||||
"name": "Graph",
|
|
||||||
"version": "5.0.0"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "datasource",
|
|
||||||
"id": "prometheus",
|
|
||||||
"name": "Prometheus",
|
|
||||||
"version": "5.0.0"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "panel",
|
|
||||||
"id": "singlestat",
|
|
||||||
"name": "Singlestat",
|
|
||||||
"version": "5.0.0"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"annotations": {
|
|
||||||
"list": [
|
|
||||||
{
|
|
||||||
"builtIn": 1,
|
|
||||||
"datasource": "-- Grafana --",
|
|
||||||
"enable": true,
|
|
||||||
"hide": true,
|
|
||||||
"iconColor": "rgba(0, 211, 255, 1)",
|
|
||||||
"name": "Annotations & Alerts",
|
|
||||||
"type": "dashboard"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"description": "Traefik dashboard prometheus",
|
|
||||||
"editable": true,
|
|
||||||
"gnetId": 4475,
|
|
||||||
"graphTooltip": 0,
|
|
||||||
"id": null,
|
|
||||||
"iteration": 1538662098977,
|
|
||||||
"links": [],
|
|
||||||
"panels": [
|
|
||||||
{
|
|
||||||
"gridPos": {
|
|
||||||
"h": 1,
|
|
||||||
"w": 24,
|
|
||||||
"x": 0,
|
|
||||||
"y": 0
|
|
||||||
},
|
|
||||||
"id": 10,
|
|
||||||
"title": "$backend stats",
|
|
||||||
"type": "row"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cacheTimeout": null,
|
|
||||||
"colorBackground": false,
|
|
||||||
"colorValue": true,
|
|
||||||
"colors": [
|
|
||||||
"#d44a3a",
|
|
||||||
"rgba(237, 129, 40, 0.89)",
|
|
||||||
"#299c46"
|
|
||||||
],
|
|
||||||
"datasource": "prometheus",
|
|
||||||
"format": "none",
|
|
||||||
"gauge": {
|
|
||||||
"maxValue": 100,
|
|
||||||
"minValue": 0,
|
|
||||||
"show": false,
|
|
||||||
"thresholdLabels": false,
|
|
||||||
"thresholdMarkers": true
|
|
||||||
},
|
|
||||||
"gridPos": {
|
|
||||||
"h": 7,
|
|
||||||
"w": 8,
|
|
||||||
"x": 0,
|
|
||||||
"y": 1
|
|
||||||
},
|
|
||||||
"id": 1,
|
|
||||||
"interval": null,
|
|
||||||
"links": [],
|
|
||||||
"mappingType": 1,
|
|
||||||
"mappingTypes": [
|
|
||||||
{
|
|
||||||
"name": "value to text",
|
|
||||||
"value": 1
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "range to text",
|
|
||||||
"value": 2
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"maxDataPoints": 100,
|
|
||||||
"nullPointMode": "connected",
|
|
||||||
"nullText": null,
|
|
||||||
"postfix": "",
|
|
||||||
"postfixFontSize": "50%",
|
|
||||||
"prefix": "",
|
|
||||||
"prefixFontSize": "50%",
|
|
||||||
"rangeMaps": [
|
|
||||||
{
|
|
||||||
"from": "null",
|
|
||||||
"text": "N/A",
|
|
||||||
"to": "null"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"sparkline": {
|
|
||||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
|
||||||
"full": false,
|
|
||||||
"lineColor": "rgb(31, 120, 193)",
|
|
||||||
"show": false
|
|
||||||
},
|
|
||||||
"tableColumn": "",
|
|
||||||
"targets": [
|
|
||||||
{
|
|
||||||
"expr": "sum(traefik_backend_server_up{backend=\"$backend\"})/count(traefik_config_reloads_total)",
|
|
||||||
"format": "time_series",
|
|
||||||
"intervalFactor": 2,
|
|
||||||
"refId": "A"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"thresholds": "0,1",
|
|
||||||
"title": "$backend status",
|
|
||||||
"type": "singlestat",
|
|
||||||
"valueFontSize": "80%",
|
|
||||||
"valueMaps": [
|
|
||||||
{
|
|
||||||
"op": "=",
|
|
||||||
"text": "OK",
|
|
||||||
"value": "1"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"valueName": "current"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"aliasColors": {},
|
|
||||||
"breakPoint": "50%",
|
|
||||||
"cacheTimeout": null,
|
|
||||||
"combine": {
|
|
||||||
"label": "Others",
|
|
||||||
"threshold": 0
|
|
||||||
},
|
|
||||||
"datasource": "prometheus",
|
|
||||||
"fontSize": "80%",
|
|
||||||
"format": "short",
|
|
||||||
"gridPos": {
|
|
||||||
"h": 7,
|
|
||||||
"w": 8,
|
|
||||||
"x": 8,
|
|
||||||
"y": 1
|
|
||||||
},
|
|
||||||
"id": 2,
|
|
||||||
"interval": null,
|
|
||||||
"legend": {
|
|
||||||
"percentage": true,
|
|
||||||
"show": true,
|
|
||||||
"values": true
|
|
||||||
},
|
|
||||||
"legendType": "Right side",
|
|
||||||
"links": [],
|
|
||||||
"maxDataPoints": 3,
|
|
||||||
"nullPointMode": "connected",
|
|
||||||
"pieType": "pie",
|
|
||||||
"strokeWidth": 1,
|
|
||||||
"targets": [
|
|
||||||
{
|
|
||||||
"expr": "traefik_backend_requests_total{backend=\"$backend\"}",
|
|
||||||
"format": "time_series",
|
|
||||||
"intervalFactor": 2,
|
|
||||||
"legendFormat": "{{method}} : {{code}}",
|
|
||||||
"refId": "A"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"title": "$backend return code",
|
|
||||||
"type": "grafana-piechart-panel",
|
|
||||||
"valueName": "current"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cacheTimeout": null,
|
|
||||||
"colorBackground": false,
|
|
||||||
"colorValue": false,
|
|
||||||
"colors": [
|
|
||||||
"#299c46",
|
|
||||||
"rgba(237, 129, 40, 0.89)",
|
|
||||||
"#d44a3a"
|
|
||||||
],
|
|
||||||
"datasource": "prometheus",
|
|
||||||
"format": "ms",
|
|
||||||
"gauge": {
|
|
||||||
"maxValue": 100,
|
|
||||||
"minValue": 0,
|
|
||||||
"show": false,
|
|
||||||
"thresholdLabels": false,
|
|
||||||
"thresholdMarkers": true
|
|
||||||
},
|
|
||||||
"gridPos": {
|
|
||||||
"h": 7,
|
|
||||||
"w": 8,
|
|
||||||
"x": 16,
|
|
||||||
"y": 1
|
|
||||||
},
|
|
||||||
"id": 4,
|
|
||||||
"interval": null,
|
|
||||||
"links": [],
|
|
||||||
"mappingType": 1,
|
|
||||||
"mappingTypes": [
|
|
||||||
{
|
|
||||||
"name": "value to text",
|
|
||||||
"value": 1
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "range to text",
|
|
||||||
"value": 2
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"maxDataPoints": 100,
|
|
||||||
"nullPointMode": "connected",
|
|
||||||
"nullText": null,
|
|
||||||
"postfix": "",
|
|
||||||
"postfixFontSize": "50%",
|
|
||||||
"prefix": "",
|
|
||||||
"prefixFontSize": "50%",
|
|
||||||
"rangeMaps": [
|
|
||||||
{
|
|
||||||
"from": "null",
|
|
||||||
"text": "N/A",
|
|
||||||
"to": "null"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"sparkline": {
|
|
||||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
|
||||||
"full": false,
|
|
||||||
"lineColor": "rgb(31, 120, 193)",
|
|
||||||
"show": true
|
|
||||||
},
|
|
||||||
"tableColumn": "",
|
|
||||||
"targets": [
|
|
||||||
{
|
|
||||||
"expr": "sum(traefik_backend_request_duration_seconds_sum{backend=\"$backend\"}) / sum(traefik_backend_requests_total{backend=\"$backend\"}) * 1000",
|
|
||||||
"format": "time_series",
|
|
||||||
"intervalFactor": 2,
|
|
||||||
"refId": "A"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"thresholds": "",
|
|
||||||
"title": "$backend response time",
|
|
||||||
"type": "singlestat",
|
|
||||||
"valueFontSize": "80%",
|
|
||||||
"valueMaps": [
|
|
||||||
{
|
|
||||||
"op": "=",
|
|
||||||
"text": "N/A",
|
|
||||||
"value": "null"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"valueName": "avg"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"aliasColors": {},
|
|
||||||
"bars": true,
|
|
||||||
"dashLength": 10,
|
|
||||||
"dashes": false,
|
|
||||||
"datasource": "prometheus",
|
|
||||||
"fill": 1,
|
|
||||||
"gridPos": {
|
|
||||||
"h": 7,
|
|
||||||
"w": 24,
|
|
||||||
"x": 0,
|
|
||||||
"y": 8
|
|
||||||
},
|
|
||||||
"id": 3,
|
|
||||||
"legend": {
|
|
||||||
"alignAsTable": true,
|
|
||||||
"avg": true,
|
|
||||||
"current": false,
|
|
||||||
"max": true,
|
|
||||||
"min": true,
|
|
||||||
"rightSide": true,
|
|
||||||
"show": true,
|
|
||||||
"total": false,
|
|
||||||
"values": true
|
|
||||||
},
|
|
||||||
"lines": false,
|
|
||||||
"linewidth": 1,
|
|
||||||
"links": [],
|
|
||||||
"nullPointMode": "null",
|
|
||||||
"percentage": false,
|
|
||||||
"pointradius": 5,
|
|
||||||
"points": false,
|
|
||||||
"renderer": "flot",
|
|
||||||
"seriesOverrides": [],
|
|
||||||
"spaceLength": 10,
|
|
||||||
"stack": false,
|
|
||||||
"steppedLine": false,
|
|
||||||
"targets": [
|
|
||||||
{
|
|
||||||
"expr": "sum(rate(traefik_backend_requests_total{backend=\"$backend\"}[5m]))",
|
|
||||||
"format": "time_series",
|
|
||||||
"intervalFactor": 2,
|
|
||||||
"legendFormat": "Total requests $backend",
|
|
||||||
"refId": "A"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"thresholds": [],
|
|
||||||
"timeFrom": null,
|
|
||||||
"timeShift": null,
|
|
||||||
"title": "Total requests over 5min $backend",
|
|
||||||
"tooltip": {
|
|
||||||
"shared": true,
|
|
||||||
"sort": 0,
|
|
||||||
"value_type": "individual"
|
|
||||||
},
|
|
||||||
"type": "graph",
|
|
||||||
"xaxis": {
|
|
||||||
"buckets": null,
|
|
||||||
"mode": "time",
|
|
||||||
"name": null,
|
|
||||||
"show": true,
|
|
||||||
"values": []
|
|
||||||
},
|
|
||||||
"yaxes": [
|
|
||||||
{
|
|
||||||
"format": "short",
|
|
||||||
"label": null,
|
|
||||||
"logBase": 1,
|
|
||||||
"max": null,
|
|
||||||
"min": null,
|
|
||||||
"show": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"format": "short",
|
|
||||||
"label": null,
|
|
||||||
"logBase": 1,
|
|
||||||
"max": null,
|
|
||||||
"min": null,
|
|
||||||
"show": true
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"yaxis": {
|
|
||||||
"align": false,
|
|
||||||
"alignLevel": null
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"collapsed": false,
|
|
||||||
"gridPos": {
|
|
||||||
"h": 1,
|
|
||||||
"w": 24,
|
|
||||||
"x": 0,
|
|
||||||
"y": 15
|
|
||||||
},
|
|
||||||
"id": 12,
|
|
||||||
"panels": [],
|
|
||||||
"title": "Global stats",
|
|
||||||
"type": "row"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"aliasColors": {},
|
|
||||||
"bars": true,
|
|
||||||
"dashLength": 10,
|
|
||||||
"dashes": false,
|
|
||||||
"datasource": "prometheus",
|
|
||||||
"fill": 1,
|
|
||||||
"gridPos": {
|
|
||||||
"h": 7,
|
|
||||||
"w": 12,
|
|
||||||
"x": 0,
|
|
||||||
"y": 16
|
|
||||||
},
|
|
||||||
"id": 5,
|
|
||||||
"legend": {
|
|
||||||
"alignAsTable": true,
|
|
||||||
"avg": false,
|
|
||||||
"current": true,
|
|
||||||
"max": true,
|
|
||||||
"min": true,
|
|
||||||
"rightSide": true,
|
|
||||||
"show": true,
|
|
||||||
"total": false,
|
|
||||||
"values": true
|
|
||||||
},
|
|
||||||
"lines": false,
|
|
||||||
"linewidth": 1,
|
|
||||||
"links": [],
|
|
||||||
"nullPointMode": "null",
|
|
||||||
"percentage": false,
|
|
||||||
"pointradius": 5,
|
|
||||||
"points": false,
|
|
||||||
"renderer": "flot",
|
|
||||||
"seriesOverrides": [],
|
|
||||||
"spaceLength": 10,
|
|
||||||
"stack": true,
|
|
||||||
"steppedLine": false,
|
|
||||||
"targets": [
|
|
||||||
{
|
|
||||||
"expr": "rate(traefik_entrypoint_requests_total{entrypoint=~\"$entrypoint\",code=\"200\"}[5m])",
|
|
||||||
"format": "time_series",
|
|
||||||
"intervalFactor": 2,
|
|
||||||
"legendFormat": "{{method}} : {{code}}",
|
|
||||||
"refId": "A"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"thresholds": [],
|
|
||||||
"timeFrom": null,
|
|
||||||
"timeShift": null,
|
|
||||||
"title": "Status code 200 over 5min",
|
|
||||||
"tooltip": {
|
|
||||||
"shared": true,
|
|
||||||
"sort": 0,
|
|
||||||
"value_type": "individual"
|
|
||||||
},
|
|
||||||
"type": "graph",
|
|
||||||
"xaxis": {
|
|
||||||
"buckets": null,
|
|
||||||
"mode": "time",
|
|
||||||
"name": null,
|
|
||||||
"show": true,
|
|
||||||
"values": []
|
|
||||||
},
|
|
||||||
"yaxes": [
|
|
||||||
{
|
|
||||||
"format": "short",
|
|
||||||
"label": null,
|
|
||||||
"logBase": 1,
|
|
||||||
"max": null,
|
|
||||||
"min": null,
|
|
||||||
"show": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"format": "short",
|
|
||||||
"label": null,
|
|
||||||
"logBase": 1,
|
|
||||||
"max": null,
|
|
||||||
"min": null,
|
|
||||||
"show": true
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"yaxis": {
|
|
||||||
"align": false,
|
|
||||||
"alignLevel": null
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"aliasColors": {},
|
|
||||||
"bars": true,
|
|
||||||
"dashLength": 10,
|
|
||||||
"dashes": false,
|
|
||||||
"datasource": "prometheus",
|
|
||||||
"fill": 1,
|
|
||||||
"gridPos": {
|
|
||||||
"h": 7,
|
|
||||||
"w": 12,
|
|
||||||
"x": 12,
|
|
||||||
"y": 16
|
|
||||||
},
|
|
||||||
"id": 6,
|
|
||||||
"legend": {
|
|
||||||
"alignAsTable": true,
|
|
||||||
"avg": false,
|
|
||||||
"current": true,
|
|
||||||
"max": true,
|
|
||||||
"min": true,
|
|
||||||
"rightSide": true,
|
|
||||||
"show": true,
|
|
||||||
"total": false,
|
|
||||||
"values": true
|
|
||||||
},
|
|
||||||
"lines": false,
|
|
||||||
"linewidth": 1,
|
|
||||||
"links": [],
|
|
||||||
"nullPointMode": "null",
|
|
||||||
"percentage": false,
|
|
||||||
"pointradius": 5,
|
|
||||||
"points": false,
|
|
||||||
"renderer": "flot",
|
|
||||||
"seriesOverrides": [],
|
|
||||||
"spaceLength": 10,
|
|
||||||
"stack": true,
|
|
||||||
"steppedLine": false,
|
|
||||||
"targets": [
|
|
||||||
{
|
|
||||||
"expr": "rate(traefik_entrypoint_requests_total{entrypoint=~\"$entrypoint\",code!=\"200\"}[5m])",
|
|
||||||
"format": "time_series",
|
|
||||||
"intervalFactor": 2,
|
|
||||||
"legendFormat": "{{ method }} : {{code}}",
|
|
||||||
"refId": "A"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"thresholds": [],
|
|
||||||
"timeFrom": null,
|
|
||||||
"timeShift": null,
|
|
||||||
"title": "Others status code over 5min",
|
|
||||||
"tooltip": {
|
|
||||||
"shared": true,
|
|
||||||
"sort": 0,
|
|
||||||
"value_type": "individual"
|
|
||||||
},
|
|
||||||
"type": "graph",
|
|
||||||
"xaxis": {
|
|
||||||
"buckets": null,
|
|
||||||
"mode": "time",
|
|
||||||
"name": null,
|
|
||||||
"show": true,
|
|
||||||
"values": []
|
|
||||||
},
|
|
||||||
"yaxes": [
|
|
||||||
{
|
|
||||||
"format": "short",
|
|
||||||
"label": null,
|
|
||||||
"logBase": 1,
|
|
||||||
"max": null,
|
|
||||||
"min": null,
|
|
||||||
"show": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"format": "short",
|
|
||||||
"label": null,
|
|
||||||
"logBase": 1,
|
|
||||||
"max": null,
|
|
||||||
"min": null,
|
|
||||||
"show": true
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"yaxis": {
|
|
||||||
"align": false,
|
|
||||||
"alignLevel": null
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"aliasColors": {},
|
|
||||||
"breakPoint": "50%",
|
|
||||||
"cacheTimeout": null,
|
|
||||||
"combine": {
|
|
||||||
"label": "Others",
|
|
||||||
"threshold": 0
|
|
||||||
},
|
|
||||||
"datasource": "prometheus",
|
|
||||||
"fontSize": "80%",
|
|
||||||
"format": "short",
|
|
||||||
"gridPos": {
|
|
||||||
"h": 7,
|
|
||||||
"w": 12,
|
|
||||||
"x": 0,
|
|
||||||
"y": 23
|
|
||||||
},
|
|
||||||
"id": 7,
|
|
||||||
"interval": null,
|
|
||||||
"legend": {
|
|
||||||
"show": true,
|
|
||||||
"values": true
|
|
||||||
},
|
|
||||||
"legendType": "Right side",
|
|
||||||
"links": [],
|
|
||||||
"maxDataPoints": 3,
|
|
||||||
"nullPointMode": "connected",
|
|
||||||
"pieType": "pie",
|
|
||||||
"strokeWidth": 1,
|
|
||||||
"targets": [
|
|
||||||
{
|
|
||||||
"expr": "sum(rate(traefik_backend_requests_total[5m])) by (backend) ",
|
|
||||||
"format": "time_series",
|
|
||||||
"interval": "",
|
|
||||||
"intervalFactor": 2,
|
|
||||||
"legendFormat": "{{ backend }}",
|
|
||||||
"refId": "A"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"title": "Requests by service",
|
|
||||||
"type": "grafana-piechart-panel",
|
|
||||||
"valueName": "total"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"aliasColors": {},
|
|
||||||
"breakPoint": "50%",
|
|
||||||
"cacheTimeout": null,
|
|
||||||
"combine": {
|
|
||||||
"label": "Others",
|
|
||||||
"threshold": 0
|
|
||||||
},
|
|
||||||
"datasource": "prometheus",
|
|
||||||
"fontSize": "80%",
|
|
||||||
"format": "short",
|
|
||||||
"gridPos": {
|
|
||||||
"h": 7,
|
|
||||||
"w": 12,
|
|
||||||
"x": 12,
|
|
||||||
"y": 23
|
|
||||||
},
|
|
||||||
"id": 8,
|
|
||||||
"interval": null,
|
|
||||||
"legend": {
|
|
||||||
"show": true,
|
|
||||||
"values": true
|
|
||||||
},
|
|
||||||
"legendType": "Right side",
|
|
||||||
"links": [],
|
|
||||||
"maxDataPoints": 3,
|
|
||||||
"nullPointMode": "connected",
|
|
||||||
"pieType": "pie",
|
|
||||||
"strokeWidth": 1,
|
|
||||||
"targets": [
|
|
||||||
{
|
|
||||||
"expr": "sum(rate(traefik_entrypoint_requests_total{entrypoint =~ \"$entrypoint\"}[5m])) by (entrypoint) ",
|
|
||||||
"format": "time_series",
|
|
||||||
"interval": "",
|
|
||||||
"intervalFactor": 2,
|
|
||||||
"legendFormat": "{{ entrypoint }}",
|
|
||||||
"refId": "A"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"title": "Requests by protocol",
|
|
||||||
"type": "grafana-piechart-panel",
|
|
||||||
"valueName": "total"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"schemaVersion": 16,
|
|
||||||
"style": "dark",
|
|
||||||
"tags": [
|
|
||||||
"traefik",
|
|
||||||
"prometheus"
|
|
||||||
],
|
|
||||||
"templating": {
|
|
||||||
"list": [
|
|
||||||
{
|
|
||||||
"allValue": null,
|
|
||||||
"current": {},
|
|
||||||
"datasource": "prometheus",
|
|
||||||
"hide": 0,
|
|
||||||
"includeAll": false,
|
|
||||||
"label": null,
|
|
||||||
"multi": false,
|
|
||||||
"name": "backend",
|
|
||||||
"options": [],
|
|
||||||
"query": "label_values(backend)",
|
|
||||||
"refresh": 1,
|
|
||||||
"regex": "",
|
|
||||||
"sort": 0,
|
|
||||||
"tagValuesQuery": "",
|
|
||||||
"tags": [],
|
|
||||||
"tagsQuery": "",
|
|
||||||
"type": "query",
|
|
||||||
"useTags": false
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"allValue": null,
|
|
||||||
"current": {},
|
|
||||||
"datasource": "prometheus",
|
|
||||||
"hide": 0,
|
|
||||||
"includeAll": true,
|
|
||||||
"label": null,
|
|
||||||
"multi": true,
|
|
||||||
"name": "entrypoint",
|
|
||||||
"options": [],
|
|
||||||
"query": "label_values(entrypoint)",
|
|
||||||
"refresh": 1,
|
|
||||||
"regex": "",
|
|
||||||
"sort": 0,
|
|
||||||
"tagValuesQuery": "",
|
|
||||||
"tags": [],
|
|
||||||
"tagsQuery": "",
|
|
||||||
"type": "query",
|
|
||||||
"useTags": false
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"time": {
|
|
||||||
"from": "now-1h",
|
|
||||||
"to": "now"
|
|
||||||
},
|
|
||||||
"timepicker": {
|
|
||||||
"refresh_intervals": [
|
|
||||||
"5s",
|
|
||||||
"10s",
|
|
||||||
"30s",
|
|
||||||
"1m",
|
|
||||||
"5m",
|
|
||||||
"15m",
|
|
||||||
"30m",
|
|
||||||
"1h",
|
|
||||||
"2h",
|
|
||||||
"1d"
|
|
||||||
],
|
|
||||||
"time_options": [
|
|
||||||
"5m",
|
|
||||||
"15m",
|
|
||||||
"1h",
|
|
||||||
"6h",
|
|
||||||
"12h",
|
|
||||||
"24h",
|
|
||||||
"2d",
|
|
||||||
"7d",
|
|
||||||
"30d"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"timezone": "",
|
|
||||||
"title": "Traefik",
|
|
||||||
"uid": "qPdAviJmz",
|
|
||||||
"version": 5
|
|
||||||
}
|
|
|
@ -1,17 +0,0 @@
|
||||||
---
|
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: ServiceMonitor
|
|
||||||
metadata:
|
|
||||||
name: traefik
|
|
||||||
namespace: monitoring
|
|
||||||
spec:
|
|
||||||
endpoints:
|
|
||||||
- interval: 30s
|
|
||||||
path: /metrics
|
|
||||||
port: metrics
|
|
||||||
namespaceSelector:
|
|
||||||
matchNames:
|
|
||||||
- kube-system
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: traefik
|
|
1
monitoring/vendor/alertmanager
vendored
1
monitoring/vendor/alertmanager
vendored
|
@ -1 +0,0 @@
|
||||||
github.com/prometheus/alertmanager/doc/alertmanager-mixin
|
|
349
monitoring/vendor/github.com/brancz/kubernetes-grafana/grafana/grafana.libsonnet
generated
vendored
349
monitoring/vendor/github.com/brancz/kubernetes-grafana/grafana/grafana.libsonnet
generated
vendored
|
@ -1,349 +0,0 @@
|
||||||
{
|
|
||||||
_config+:: {
|
|
||||||
namespace: 'default',
|
|
||||||
|
|
||||||
versions+:: {
|
|
||||||
grafana: '7.3.4',
|
|
||||||
},
|
|
||||||
|
|
||||||
imageRepos+:: {
|
|
||||||
grafana: 'docker.io/grafana/grafana',
|
|
||||||
},
|
|
||||||
|
|
||||||
prometheus+:: {
|
|
||||||
name: 'k8s',
|
|
||||||
serviceName: 'prometheus-' + $._config.prometheus.name,
|
|
||||||
},
|
|
||||||
|
|
||||||
grafana+:: {
|
|
||||||
labels: {
|
|
||||||
'app.kubernetes.io/name': 'grafana',
|
|
||||||
'app.kubernetes.io/version': $._config.versions.grafana,
|
|
||||||
'app.kubernetes.io/component': 'grafana',
|
|
||||||
},
|
|
||||||
dashboards: {},
|
|
||||||
rawDashboards: {},
|
|
||||||
folderDashboards: {},
|
|
||||||
datasources: [{
|
|
||||||
name: 'prometheus',
|
|
||||||
type: 'prometheus',
|
|
||||||
access: 'proxy',
|
|
||||||
orgId: 1,
|
|
||||||
url: 'http://' + $._config.prometheus.serviceName + '.' + $._config.namespace + '.svc:9090',
|
|
||||||
version: 1,
|
|
||||||
editable: false,
|
|
||||||
}],
|
|
||||||
// Forces pod restarts when dashboards are changed
|
|
||||||
dashboardsChecksum: false,
|
|
||||||
config: {},
|
|
||||||
ldap: null,
|
|
||||||
plugins: [],
|
|
||||||
env: [],
|
|
||||||
port: 3000,
|
|
||||||
resources: {
|
|
||||||
requests: { cpu: '100m', memory: '100Mi' },
|
|
||||||
limits: { cpu: '200m', memory: '200Mi' },
|
|
||||||
},
|
|
||||||
containers: [],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
grafanaDashboards: {},
|
|
||||||
grafana+: {
|
|
||||||
[if std.length($._config.grafana.config) > 0 then 'config']:
|
|
||||||
{
|
|
||||||
apiVersion: 'v1',
|
|
||||||
kind: 'Secret',
|
|
||||||
metadata: {
|
|
||||||
name: 'grafana-config',
|
|
||||||
namespace: $._config.namespace,
|
|
||||||
labels: $._config.grafana.labels,
|
|
||||||
},
|
|
||||||
type: 'Opaque',
|
|
||||||
data: {
|
|
||||||
'grafana.ini': std.base64(std.encodeUTF8(std.manifestIni($._config.grafana.config))),
|
|
||||||
} +
|
|
||||||
if $._config.grafana.ldap != null then { 'ldap.toml': std.base64(std.encodeUTF8($._config.grafana.ldap)) } else {},
|
|
||||||
},
|
|
||||||
dashboardDefinitions:
|
|
||||||
[
|
|
||||||
{
|
|
||||||
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
|
|
||||||
apiVersion: 'v1',
|
|
||||||
kind: 'ConfigMap',
|
|
||||||
metadata: {
|
|
||||||
name: dashboardName,
|
|
||||||
namespace: $._config.namespace,
|
|
||||||
labels: $._config.grafana.labels,
|
|
||||||
},
|
|
||||||
data: { [name]: std.manifestJsonEx($._config.grafana.dashboards[name], ' ') },
|
|
||||||
}
|
|
||||||
for name in std.objectFields($._config.grafana.dashboards)
|
|
||||||
] + [
|
|
||||||
{
|
|
||||||
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
|
|
||||||
apiVersion: 'v1',
|
|
||||||
kind: 'ConfigMap',
|
|
||||||
metadata: {
|
|
||||||
name: dashboardName,
|
|
||||||
namespace: $._config.namespace,
|
|
||||||
labels: $._config.grafana.labels,
|
|
||||||
},
|
|
||||||
data: { [name]: std.manifestJsonEx($._config.grafana.folderDashboards[folder][name], ' ') },
|
|
||||||
}
|
|
||||||
for folder in std.objectFields($._config.grafana.folderDashboards)
|
|
||||||
for name in std.objectFields($._config.grafana.folderDashboards[folder])
|
|
||||||
] + (
|
|
||||||
if std.length($._config.grafana.rawDashboards) > 0 then
|
|
||||||
[
|
|
||||||
|
|
||||||
{
|
|
||||||
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
|
|
||||||
apiVersion: 'v1',
|
|
||||||
kind: 'ConfigMap',
|
|
||||||
metadata: {
|
|
||||||
name: dashboardName,
|
|
||||||
namespace: $._config.namespace,
|
|
||||||
labels: $._config.grafana.labels,
|
|
||||||
},
|
|
||||||
data: { [name]: $._config.grafana.rawDashboards[name] },
|
|
||||||
}
|
|
||||||
for name in std.objectFields($._config.grafana.rawDashboards)
|
|
||||||
]
|
|
||||||
else
|
|
||||||
[]
|
|
||||||
),
|
|
||||||
dashboardSources:
|
|
||||||
local dashboardSources = {
|
|
||||||
apiVersion: 1,
|
|
||||||
providers:
|
|
||||||
(
|
|
||||||
if std.length($._config.grafana.dashboards) +
|
|
||||||
std.length($._config.grafana.rawDashboards) > 0 then [
|
|
||||||
{
|
|
||||||
name: '0',
|
|
||||||
orgId: 1,
|
|
||||||
folder: 'Default',
|
|
||||||
type: 'file',
|
|
||||||
options: {
|
|
||||||
path: '/grafana-dashboard-definitions/0',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
] else []
|
|
||||||
) +
|
|
||||||
[
|
|
||||||
{
|
|
||||||
name: folder,
|
|
||||||
orgId: 1,
|
|
||||||
folder: folder,
|
|
||||||
type: 'file',
|
|
||||||
options: {
|
|
||||||
path: '/grafana-dashboard-definitions/' + folder,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
for folder in std.objectFields($._config.grafana.folderDashboards)
|
|
||||||
],
|
|
||||||
};
|
|
||||||
|
|
||||||
{
|
|
||||||
kind: 'ConfigMap',
|
|
||||||
apiVersion: 'v1',
|
|
||||||
metadata: {
|
|
||||||
name: 'grafana-dashboards',
|
|
||||||
namespace: $._config.namespace,
|
|
||||||
labels: $._config.grafana.labels,
|
|
||||||
},
|
|
||||||
data: { 'dashboards.yaml': std.manifestJsonEx(dashboardSources, ' ') },
|
|
||||||
},
|
|
||||||
dashboardDatasources:
|
|
||||||
{
|
|
||||||
apiVersion: 'v1',
|
|
||||||
kind: 'Secret',
|
|
||||||
metadata: {
|
|
||||||
name: 'grafana-datasources',
|
|
||||||
namespace: $._config.namespace,
|
|
||||||
labels: $._config.grafana.labels,
|
|
||||||
},
|
|
||||||
type: 'Opaque',
|
|
||||||
data: { 'datasources.yaml': std.base64(std.encodeUTF8(std.manifestJsonEx({
|
|
||||||
apiVersion: 1,
|
|
||||||
datasources: $._config.grafana.datasources,
|
|
||||||
}, ' '))) },
|
|
||||||
},
|
|
||||||
service:
|
|
||||||
{
|
|
||||||
apiVersion: 'v1',
|
|
||||||
kind: 'Service',
|
|
||||||
metadata: {
|
|
||||||
name: 'grafana',
|
|
||||||
namespace: $._config.namespace,
|
|
||||||
labels: $._config.grafana.labels,
|
|
||||||
},
|
|
||||||
spec: {
|
|
||||||
selector: $.grafana.deployment.spec.selector.matchLabels,
|
|
||||||
ports: [
|
|
||||||
{ name: 'http', targetPort: 'http', port: 3000 },
|
|
||||||
],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
serviceAccount:
|
|
||||||
{
|
|
||||||
apiVersion: 'v1',
|
|
||||||
kind: 'ServiceAccount',
|
|
||||||
metadata: {
|
|
||||||
name: 'grafana',
|
|
||||||
namespace: $._config.namespace,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
deployment:
|
|
||||||
local targetPort = $._config.grafana.port;
|
|
||||||
local portName = 'http';
|
|
||||||
local podLabels = $._config.grafana.labels;
|
|
||||||
local podSelectorLabels = {
|
|
||||||
[labelName]: podLabels[labelName]
|
|
||||||
for labelName in std.objectFields(podLabels)
|
|
||||||
if !std.setMember(labelName, ['app.kubernetes.io/version'])
|
|
||||||
};
|
|
||||||
|
|
||||||
local configVolumeName = 'grafana-config';
|
|
||||||
local configSecretName = 'grafana-config';
|
|
||||||
local configVolume = { name: configVolumeName, secret: { secretName: configSecretName } };
|
|
||||||
local configVolumeMount = { name: configVolumeName, mountPath: '/etc/grafana', readOnly: false };
|
|
||||||
|
|
||||||
local storageVolumeName = 'grafana-storage';
|
|
||||||
local storageVolume = { name: storageVolumeName, emptyDir: {} };
|
|
||||||
local storageVolumeMount = { name: storageVolumeName, mountPath: '/var/lib/grafana', readOnly: false };
|
|
||||||
|
|
||||||
local datasourcesVolumeName = 'grafana-datasources';
|
|
||||||
local datasourcesSecretName = 'grafana-datasources';
|
|
||||||
local datasourcesVolume = { name: datasourcesVolumeName, secret: { secretName: datasourcesSecretName } };
|
|
||||||
local datasourcesVolumeMount = { name: datasourcesVolumeName, mountPath: '/etc/grafana/provisioning/datasources', readOnly: false };
|
|
||||||
|
|
||||||
local dashboardsVolumeName = 'grafana-dashboards';
|
|
||||||
local dashboardsConfigMapName = 'grafana-dashboards';
|
|
||||||
local dashboardsVolume = { name: dashboardsVolumeName, configMap: { name: dashboardsConfigMapName } };
|
|
||||||
local dashboardsVolumeMount = { name: dashboardsVolumeName, mountPath: '/etc/grafana/provisioning/dashboards', readOnly: false };
|
|
||||||
|
|
||||||
local volumeMounts =
|
|
||||||
[
|
|
||||||
storageVolumeMount,
|
|
||||||
datasourcesVolumeMount,
|
|
||||||
dashboardsVolumeMount,
|
|
||||||
] +
|
|
||||||
[
|
|
||||||
{
|
|
||||||
local dashboardName = std.strReplace(name, '.json', ''),
|
|
||||||
name: 'grafana-dashboard-' + dashboardName,
|
|
||||||
mountPath: '/grafana-dashboard-definitions/0/' + dashboardName,
|
|
||||||
readOnly: false,
|
|
||||||
}
|
|
||||||
for name in std.objectFields($._config.grafana.dashboards)
|
|
||||||
] +
|
|
||||||
[
|
|
||||||
{
|
|
||||||
local dashboardName = std.strReplace(name, '.json', ''),
|
|
||||||
name: 'grafana-dashboard-' + dashboardName,
|
|
||||||
mountPath: '/grafana-dashboard-definitions/' + folder + '/' + dashboardName,
|
|
||||||
readOnly: false,
|
|
||||||
}
|
|
||||||
for folder in std.objectFields($._config.grafana.folderDashboards)
|
|
||||||
for name in std.objectFields($._config.grafana.folderDashboards[folder])
|
|
||||||
] +
|
|
||||||
[
|
|
||||||
{
|
|
||||||
|
|
||||||
local dashboardName = std.strReplace(name, '.json', ''),
|
|
||||||
name: 'grafana-dashboard-' + dashboardName,
|
|
||||||
mountPath: '/grafana-dashboard-definitions/0/' + dashboardName,
|
|
||||||
readOnly: false,
|
|
||||||
}
|
|
||||||
for name in std.objectFields($._config.grafana.rawDashboards)
|
|
||||||
] + (
|
|
||||||
if std.length($._config.grafana.config) > 0 then [configVolumeMount] else []
|
|
||||||
);
|
|
||||||
|
|
||||||
local volumes =
|
|
||||||
[
|
|
||||||
storageVolume,
|
|
||||||
datasourcesVolume,
|
|
||||||
dashboardsVolume,
|
|
||||||
] +
|
|
||||||
[
|
|
||||||
{
|
|
||||||
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
|
|
||||||
name: dashboardName,
|
|
||||||
configMap: { name: dashboardName },
|
|
||||||
}
|
|
||||||
for name in std.objectFields($._config.grafana.dashboards)
|
|
||||||
] +
|
|
||||||
[
|
|
||||||
{
|
|
||||||
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
|
|
||||||
name: dashboardName,
|
|
||||||
configMap: { name: dashboardName },
|
|
||||||
}
|
|
||||||
for folder in std.objectFields($._config.grafana.folderDashboards)
|
|
||||||
for name in std.objectFields($._config.grafana.folderDashboards[folder])
|
|
||||||
] +
|
|
||||||
[
|
|
||||||
{
|
|
||||||
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
|
|
||||||
name: dashboardName,
|
|
||||||
configMap: { name: dashboardName },
|
|
||||||
}
|
|
||||||
for name in std.objectFields($._config.grafana.rawDashboards)
|
|
||||||
] +
|
|
||||||
if std.length($._config.grafana.config) > 0 then [configVolume] else [];
|
|
||||||
|
|
||||||
local plugins = (
|
|
||||||
if std.length($._config.grafana.plugins) == 0 then
|
|
||||||
[]
|
|
||||||
else
|
|
||||||
[{ name: 'GF_INSTALL_PLUGINS', value: std.join(',', $._config.grafana.plugins) }]
|
|
||||||
);
|
|
||||||
|
|
||||||
local c = [{
|
|
||||||
name: 'grafana',
|
|
||||||
image: $._config.imageRepos.grafana + ':' + $._config.versions.grafana,
|
|
||||||
env: $._config.grafana.env + plugins,
|
|
||||||
volumeMounts: volumeMounts,
|
|
||||||
ports: [{ name: portName, containerPort: targetPort }],
|
|
||||||
readinessProbe: {
|
|
||||||
httpGet: { path: '/api/health', port: portName },
|
|
||||||
},
|
|
||||||
resources: $._config.grafana.resources,
|
|
||||||
}] + $._config.grafana.containers;
|
|
||||||
|
|
||||||
{
|
|
||||||
apiVersion: 'apps/v1',
|
|
||||||
kind: 'Deployment',
|
|
||||||
metadata: {
|
|
||||||
name: 'grafana',
|
|
||||||
namespace: $._config.namespace,
|
|
||||||
labels: podLabels,
|
|
||||||
},
|
|
||||||
spec: {
|
|
||||||
replicas: 1,
|
|
||||||
selector: {
|
|
||||||
matchLabels: podSelectorLabels,
|
|
||||||
},
|
|
||||||
template: {
|
|
||||||
metadata: {
|
|
||||||
labels: podLabels,
|
|
||||||
annotations: {
|
|
||||||
[if std.length($._config.grafana.config) > 0 then 'checksum/grafana-config']: std.md5(std.toString($.grafana.config)),
|
|
||||||
'checksum/grafana-datasources': std.md5(std.toString($.grafana.dashboardDatasources)),
|
|
||||||
[if $._config.grafana.dashboardsChecksum then 'checksum/grafana-dashboards']: std.md5(std.toString($.grafana.dashboardDefinitions)),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
spec: {
|
|
||||||
containers: c,
|
|
||||||
volumes: volumes,
|
|
||||||
serviceAccountName: $.grafana.serviceAccount.metadata.name,
|
|
||||||
nodeSelector: { 'beta.kubernetes.io/os': 'linux' },
|
|
||||||
securityContext: { fsGroup: 65534, runAsNonRoot: true, runAsUser: 65534 },
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
15
monitoring/vendor/github.com/brancz/kubernetes-grafana/grafana/jsonnetfile.json
generated
vendored
15
monitoring/vendor/github.com/brancz/kubernetes-grafana/grafana/jsonnetfile.json
generated
vendored
|
@ -1,15 +0,0 @@
|
||||||
{
|
|
||||||
"version": 1,
|
|
||||||
"dependencies": [
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"git": {
|
|
||||||
"remote": "https://github.com/grafana/grafonnet-lib.git",
|
|
||||||
"subdir": "grafonnet"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"version": "master"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"legacyImports": false
|
|
||||||
}
|
|
Some files were not shown because too many files have changed in this diff Show more
Reference in a new issue