replace custom monitoring with helm chart

This commit is contained in:
Tobias Brunner 2021-08-26 23:09:00 +02:00
parent ad580edfa8
commit 8996c7a963
953 changed files with 32 additions and 267637 deletions

View File

@ -9,15 +9,41 @@ spec:
destination:
namespace: monitoring
server: https://kubernetes.default.svc
project: system
source:
path: monitoring/manifests
repoURL: https://git.tbrnt.ch/tobru/gitops-tbrnt.git
targetRevision: HEAD
directory:
recurse: true
chart: kube-prometheus-stack
repoURL: https://prometheus-community.github.io/helm-charts
targetRevision: 18.0.1
helm:
values: |
kubeApiServer:
enabled: true
kubeControllerManager:
enabled: true
endpoints:
- 185.95.218.11
kubeScheduler:
enabled: true
endpoints:
- 185.95.218.11
kubeProxy:
enabled: true
endpoints:
- 185.95.218.11
kubeEtcd:
enabled: true
endpoints:
- 185.95.218.11
service:
enabled: true
port: 2381
targetPort: 2381
project: system
syncPolicy:
syncOptions:
- CreateNamespace=true
ignoreDifferences:
- group: apiextensions.k8s.io
kind: CustomResourceDefinition
jsonPointers:
- /status

View File

@ -1,12 +0,0 @@
build:
docker run --rm -v $(shell pwd):$(shell pwd) --workdir $(shell pwd) quay.io/coreos/jsonnet-ci ./build.sh monitoring.jsonnet
sudo chown -R tobru. manifests/
kubeseal --controller-namespace sealed-secrets -o yaml -n monitoring < ../../gitops-tbrnt-private/monitoring/alertmanager.yaml > manifests/alertmanager-tbrnt-config-secret.yaml
cp *.yaml manifests/
.PHONY: build
update:
docker run --rm -v $(shell pwd):$(shell pwd) --workdir $(shell pwd) quay.io/coreos/jsonnet-ci jb update
sudo chown -R tobru. vendor/
make build
.PHONY: update

View File

@ -1,17 +0,0 @@
# Cluster Monitoring
Source: [kube-prometheus](https://github.com/coreos/kube-prometheus).
## Build
```
docker run --rm -v $(pwd):$(pwd) --workdir $(pwd) quay.io/coreos/jsonnet-ci ./build.sh monitoring.jsonnet
```
## Update libs
```
docker run --rm -v $(pwd):$(pwd) --workdir $(pwd) quay.io/coreos/jsonnet-ci jb update
```
Then build again, obviously

View File

@ -1,15 +0,0 @@
#!/usr/bin/env bash
# This script uses arg $1 (name of *.jsonnet file to use) to generate the manifests/*.yaml files.
set -e
set -x
# only exit with zero if all commands of the pipeline exit successfully
set -o pipefail
# Make sure to start with a clean 'manifests' dir
rm -rf manifests
mkdir -p manifests/setup
jsonnet -J vendor -m manifests "${1-example.jsonnet}" | xargs -I{} sh -c 'cat {} | gojsontoyaml > {}.yaml; rm -f {}' -- {}

View File

@ -1,31 +0,0 @@
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: healthchecks-io
namespace: monitoring
spec:
schedule: "*/1 * * * *"
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
startingDeadlineSeconds: 200
jobTemplate:
spec:
template:
spec:
containers:
- name: pinghc
env:
- name: HCURL
valueFrom:
secretKeyRef:
name: healthchecks-io
key: HCURL
image: busybox
args:
- /bin/sh
- -c
- "date && echo $HCURL && /bin/wget -q -O - --no-check-certificate $HCURL"
restartPolicy: OnFailure

View File

@ -1,17 +0,0 @@
apiVersion: bitnami.com/v1alpha1
kind: SealedSecret
metadata:
creationTimestamp: null
name: healthchecks-io
namespace: monitoring
spec:
encryptedData:
HCURL: AgBEpwET1Qa1hQqAmwrNGBv4sL0ml8pGYPwgq9Aps3tYhBVqsXjV7U5RQa/txldg1umw2Zqx8MfvZTN2kmFk6bJTROCWqTxmxd4rHgnJYqRR0+Opn/BtDhVx4WTnehyM/il9ymddhMD+WRQDr/Wfxq/0UQdsy+IEYyVMQuOKEihZabxmXRyNeAl5ZBeQ0W1T29biJPx3rifS37RbGlJtCIYuNPh82d0KAMu1dszDnkln8k5CBv6mPD8BVHg+Z/y1v1jFhTIE3YOlGzCIjb8RrJj6MVm7zlauj8zrl30JvF2OAWDGGZDOL3b0G3IKd0Qp/eagT33Sx7vbppY/l1Vci6UQcVpde3u2+ATMbysRej04Mvcodq5OgkBFqbgCzx0UFTIq0wER/GuCoYbt+k8b3TouK5ChQet8EP0W/c7rLHcMY3c0UR00N7m5UeKZAzAkXSGV+u3M9K6PMp8pl0VuDo+IVgEIY7ku9rtzL7SPIfXS4u5w7fte13fOtKB/2sa11dNqAbHmidF+IO6ycjm8SZibC7NKyCxgIKWPfsFXhNUT2Nx7eBRrzR1QlqThIGRsDpX1RVplTwe/OLsBz0K99AyGDUkSBJdOZLaRT/b3T0nS8DE5x/e8MvFsbbDdGE2U/YhVrbfn072u/X979/RIm0oCjipvByZXhFmobRj9SP9RcK2UfjBSY7xyKnd2rjj1mnIs2S0CmwGFdJqoywHckJJOu3YP2oN2Q1U7+Fe4yciupAshgdszY2okHMtd4aDDJJKeKKFHpjpsuA==
template:
metadata:
creationTimestamp: null
name: healthchecks-io
namespace: monitoring
type: Opaque
status: {}

View File

@ -1,26 +0,0 @@
{
"version": 1,
"dependencies": [
{
"source": {
"git": {
"remote": "https://github.com/prometheus-operator/kube-prometheus",
"subdir": "jsonnet/kube-prometheus"
}
},
"version": "main",
"name": "kube-prometheus"
},
{
"source": {
"git": {
"remote": "https://github.com/tobru/kube-prometheus-pushgateway",
"subdir": "prometheus-pushgateway"
}
},
"version": "master",
"name": "prometheus-pushgateway"
}
],
"legacyImports": true
}

View File

@ -1,181 +0,0 @@
{
"version": 1,
"dependencies": [
{
"source": {
"git": {
"remote": "https://github.com/brancz/kubernetes-grafana",
"subdir": "grafana"
}
},
"version": "8ea4e7bc04b1bf5e9bd99918ca28c6271b42be0e",
"sum": "muenICtKXABk6MZZHCZD2wCbmtiE96GwWRMGa1Rg+wA="
},
{
"source": {
"git": {
"remote": "https://github.com/etcd-io/etcd",
"subdir": "contrib/mixin"
}
},
"version": "562d645ac923388ff5b8d270b0536764d34b0e0f",
"sum": "W/Azptf1PoqjyMwJON96UY69MFugDA4IAYiKURscryc="
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib",
"subdir": "grafonnet"
}
},
"version": "55cf4ee53ced2b6d3ce96ecce9fb813b4465be98",
"sum": "4/sUV0Kk+o8I+wlYxL9R6EPhL/NiLfYHk+NXlU64RUk="
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/jsonnet-libs",
"subdir": "grafana-builder"
}
},
"version": "dbf1211d003d20c7adcdee942c477e648507a398",
"sum": "GRf2GvwEU4jhXV+JOonXSZ4wdDv8mnHBPCQ6TUVd+g8="
},
{
"source": {
"git": {
"remote": "https://github.com/ksonnet/ksonnet-lib",
"subdir": ""
}
},
"version": "0d2f82676817bbf9e4acf6495b2090205f323b9f",
"sum": "h28BXZ7+vczxYJ2sCt8JuR9+yznRtU/iA6DCpQUrtEg=",
"name": "ksonnet"
},
{
"source": {
"git": {
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
"subdir": ""
}
},
"version": "c67c0f19e869f1da34d79b6507c1fa37c23a6e4e",
"sum": "F+RxcI26zeoeI81uot39Jv6IpQ6BOz+xlSHlElJYsz8="
},
{
"source": {
"git": {
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
"subdir": "lib/promgrafonnet"
}
},
"version": "39a9cda705b5201c35105bd1f24c83923fa839ef",
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
},
{
"source": {
"git": {
"remote": "https://github.com/kubernetes/kube-state-metrics",
"subdir": "jsonnet/kube-state-metrics"
}
},
"version": "b1889aa1561ee269f628e2b9659155e7714dbbf0",
"sum": "S5qI+PJUdNeYOv76jH5nxwYS9N6U7CRxvyuB1wI4cTE="
},
{
"source": {
"git": {
"remote": "https://github.com/kubernetes/kube-state-metrics",
"subdir": "jsonnet/kube-state-metrics-mixin"
}
},
"version": "b1889aa1561ee269f628e2b9659155e7714dbbf0",
"sum": "Yf8mNAHrV1YWzrdV8Ry5dJ8YblepTGw3C0Zp10XIYLo="
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus-operator/kube-prometheus",
"subdir": "jsonnet/kube-prometheus"
}
},
"version": "5b2740d517095a6ae9ad51bcb9c53e5ef28c62a0",
"sum": "+6VkkR44AC3Qnwfr9cWYCKs+uRi5JaIOda/3X1JEzAg="
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus-operator/prometheus-operator",
"subdir": "jsonnet/mixin"
}
},
"version": "b7ca32169844f0b5143f3e5e318fc05fa025df18",
"sum": "6reUygVmQrLEWQzTKcH8ceDbvM+2ztK3z2VBR2K2l+U=",
"name": "prometheus-operator-mixin"
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus-operator/prometheus-operator",
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "b7ca32169844f0b5143f3e5e318fc05fa025df18",
"sum": "MRwyChXdKG3anL2OWpbUu3qWc97w9J6YsjUWjLFQyB0="
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus/alertmanager",
"subdir": "doc/alertmanager-mixin"
}
},
"version": "99f64e944b1043c790784cf5373c8fb349816fc4",
"sum": "V8jcZQ1Qrlm7AQ6wjbuQQsacPb0NvrcZovKyplmzW5w=",
"name": "alertmanager"
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus/node_exporter",
"subdir": "docs/node-mixin"
}
},
"version": "b597c1244d7bef49e6f3359c87a56dd7707f6719",
"sum": "cZTNXQMUCLB5FGYpMn845dcqGdkcYt58qCqOFIV/BoQ="
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus/prometheus",
"subdir": "documentation/prometheus-mixin"
}
},
"version": "3cafc58827d1ebd1a67749f88be4218f0bab3d8d",
"sum": "VK0c3sQ3ksiM6JQsAVfWmL5NbzGv9llMfXFNXfFdJ+A=",
"name": "prometheus"
},
{
"source": {
"git": {
"remote": "https://github.com/thanos-io/thanos",
"subdir": "mixin"
}
},
"version": "ba6c5c4726ff52807c7383c68f2159b1af7980bb",
"sum": "XP3uq7xcfKHsnWsz1v992csZhhZR3jQma6hFOfSViTs=",
"name": "thanos-mixin"
},
{
"source": {
"git": {
"remote": "https://github.com/tobru/kube-prometheus-pushgateway",
"subdir": "prometheus-pushgateway"
}
},
"version": "7bb93ca3ddf3b83f1fdfe95d9bad415b57d0fe4b",
"sum": "6nOJeHJExjYyTSovZvU6xbGjWS88oUfGnF1DAo+Q6tg="
}
],
"legacyImports": false
}

View File

@ -1,87 +0,0 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local endpoints = k.core.v1.endpoints;
local endpointSubset = endpoints.subsetsType;
local endpointPort = endpointSubset.portsType;
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local masterIP = '185.95.218.11';
{
prometheus+:: {
kubeSchedulerPrometheusDiscoveryService:
local p = servicePort.newNamed('http-metrics', 10251, 10251);
service.new('kube-scheduler', { 'k8s-app': 'kube-scheduler' }, p) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
service.mixin.spec.withClusterIp('None') +
service.mixin.spec.withSelector({}),
kubeSchedulerPrometheusDiscoveryEndpoints:
local port = endpointPort.new() +
endpointPort.withName('http-metrics') +
endpointPort.withPort(10251) +
endpointPort.withProtocol('TCP');
local subset = endpointSubset.new() +
endpointSubset.withAddresses([
{ ip: masterIP },
]) +
endpointSubset.withPorts(port);
endpoints.new() +
endpoints.mixin.metadata.withName('kube-scheduler') +
endpoints.mixin.metadata.withNamespace('kube-system') +
endpoints.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
endpoints.withSubsets(subset),
kubeControllerManagerPrometheusDiscoveryService:
local p = servicePort.newNamed('http-metrics', 10252, 10252);
service.new('kube-controller-manager', { 'k8s-app': 'kube-controller-manager' }, p) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
service.mixin.spec.withClusterIp('None') +
service.mixin.spec.withSelector({}),
kubeControllerManagerPrometheusDiscoveryEndpoints:
local port = endpointPort.new() +
endpointPort.withName('http-metrics') +
endpointPort.withPort(10252) +
endpointPort.withProtocol('TCP');
local subset = endpointSubset.new() +
endpointSubset.withAddresses([
{ ip: masterIP },
]) +
endpointSubset.withPorts(port);
endpoints.new() +
endpoints.mixin.metadata.withName('kube-controller-manager') +
endpoints.mixin.metadata.withNamespace('kube-system') +
endpoints.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
endpoints.withSubsets(subset),
serviceMonitorKubeScheduler+:
{
spec+: {
endpoints: [
{
port: 'http-metrics',
interval: '30s',
},
],
},
},
serviceMonitorKubeControllerManager+:
{
spec+: {
endpoints: [
{
port: 'http-metrics',
interval: '30s',
metricRelabelings: (import 'kube-prometheus/addons/dropping-deprecated-metrics-relabelings.libsonnet') + [
{
sourceLabels: ['__name__'],
regex: 'etcd_(debugging|disk|request|server).*',
action: 'drop',
},
],
},
],
},
},
},
}

View File

@ -1,40 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: k8up
labels:
prometheus: k8s
role: alert-rules
spec:
groups:
- name: k8up.rules
rules:
- alert: baas_last_errors
expr: baas_backup_restic_last_errors > 0
for: 1m
labels:
severity: critical
annotations:
summary: Amount of errors of last restic backup
description: This alert is fired when error number is > 0
- alert: K8upBackupFailed
expr: rate(k8up_jobs_failed_counter[1d]) > 0
for: 1m
labels:
severity: critical
annotations:
summary: "Job in {{ $labels.namespace }} of type {{ $labels.jobType }} failed"
- alert: K8upBackupNotRunning
expr: sum(rate(k8up_jobs_total[25h])) == 0 and on(namespace) k8up_schedules_gauge > 0
for: 1m
labels:
severity: critical
annotations:
summary: "No K8up jobs were run in {{ $labels.namespace }} within the last 24 hours. Check the operator, there might be a deadlock"
- alert: K8upJobStuck
expr: k8up_jobs_queued_gauge{jobType="backup"} > 0 and on(namespace) k8up_schedules_gauge > 0
for: 24h
labels:
severity: critical
annotations:
summary: "K8up jobs are stuck in {{ $labels.namespace }} for the last 24 hours."

View File

@ -1,36 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: Alertmanager
metadata:
labels:
alertmanager: main
app.kubernetes.io/component: alert-router
app.kubernetes.io/name: alertmanager
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.21.0
name: main
namespace: default
spec:
configSecret: alertmanager-tbrnt-config
image: quay.io/prometheus/alertmanager:v0.21.0
nodeSelector:
kubernetes.io/os: linux
podMetadata:
labels:
app.kubernetes.io/component: alert-router
app.kubernetes.io/name: alertmanager
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.21.0
replicas: 3
resources:
limits:
cpu: 100m
memory: 100Mi
requests:
cpu: 4m
memory: 100Mi
securityContext:
fsGroup: 2000
runAsNonRoot: true
runAsUser: 1000
serviceAccountName: alertmanager-main
version: 0.21.0

View File

@ -1,18 +0,0 @@
apiVersion: policy/v1beta1
kind: PodDisruptionBudget
metadata:
labels:
app.kubernetes.io/component: alert-router
app.kubernetes.io/name: alertmanager
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.21.0
name: alertmanager-main
namespace: default
spec:
maxUnavailable: 1
selector:
matchLabels:
alertmanager: main
app.kubernetes.io/component: alert-router
app.kubernetes.io/name: alertmanager
app.kubernetes.io/part-of: kube-prometheus

View File

@ -1,156 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
app.kubernetes.io/component: alert-router
app.kubernetes.io/name: alertmanager
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.21.0
prometheus: k8s
role: alert-rules
name: alertmanager-main-rules
namespace: default
spec:
groups:
- name: alertmanager.rules
rules:
- alert: AlertmanagerFailedReload
annotations:
description: Configuration has failed to load for {{ $labels.namespace }}/{{
$labels.pod}}.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerfailedreload
summary: Reloading an Alertmanager configuration has failed.
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
max_over_time(alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="default"}[5m]) == 0
for: 10m
labels:
severity: critical
- alert: AlertmanagerMembersInconsistent
annotations:
description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has only
found {{ $value }} members of the {{$labels.job}} cluster.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagermembersinconsistent
summary: A member of an Alertmanager cluster has not found all other cluster
members.
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="default"}[5m])
< on (namespace,service) group_left
count by (namespace,service) (max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="default"}[5m]))
for: 10m
labels:
severity: critical
- alert: AlertmanagerFailedToSendAlerts
annotations:
description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed
to send {{ $value | humanizePercentage }} of notifications to {{ $labels.integration
}}.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerfailedtosendalerts
summary: An Alertmanager instance failed to send notifications.
expr: |
(
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="default"}[5m])
/
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="default"}[5m])
)
> 0.01
for: 5m
labels:
severity: warning
- alert: AlertmanagerClusterFailedToSendAlerts
annotations:
description: The minimum notification failure rate to {{ $labels.integration
}} sent from any instance in the {{$labels.job}} cluster is {{ $value |
humanizePercentage }}.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclusterfailedtosendalerts
summary: All Alertmanager instances in a cluster failed to send notifications
to a critical integration.
expr: |
min by (namespace,service, integration) (
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="default", integration=~`.*`}[5m])
/
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="default", integration=~`.*`}[5m])
)
> 0.01
for: 5m
labels:
severity: critical
- alert: AlertmanagerClusterFailedToSendAlerts
annotations:
description: The minimum notification failure rate to {{ $labels.integration
}} sent from any instance in the {{$labels.job}} cluster is {{ $value |
humanizePercentage }}.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclusterfailedtosendalerts
summary: All Alertmanager instances in a cluster failed to send notifications
to a non-critical integration.
expr: |
min by (namespace,service, integration) (
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="default", integration!~`.*`}[5m])
/
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="default", integration!~`.*`}[5m])
)
> 0.01
for: 5m
labels:
severity: warning
- alert: AlertmanagerConfigInconsistent
annotations:
description: Alertmanager instances within the {{$labels.job}} cluster have
different configurations.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerconfiginconsistent
summary: Alertmanager instances within the same cluster have different configurations.
expr: |
count by (namespace,service) (
count_values by (namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="default"})
)
!= 1
for: 20m
labels:
severity: critical
- alert: AlertmanagerClusterDown
annotations:
description: '{{ $value | humanizePercentage }} of Alertmanager instances
within the {{$labels.job}} cluster have been up for less than half of the
last 5m.'
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclusterdown
summary: Half or more of the Alertmanager instances within the same cluster
are down.
expr: |
(
count by (namespace,service) (
avg_over_time(up{job="alertmanager-main",namespace="default"}[5m]) < 0.5
)
/
count by (namespace,service) (
up{job="alertmanager-main",namespace="default"}
)
)
>= 0.5
for: 5m
labels:
severity: critical
- alert: AlertmanagerClusterCrashlooping
annotations:
description: '{{ $value | humanizePercentage }} of Alertmanager instances
within the {{$labels.job}} cluster have restarted at least 5 times in the
last 10m.'
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/alertmanagerclustercrashlooping
summary: Half or more of the Alertmanager instances within the same cluster
are crashlooping.
expr: |
(
count by (namespace,service) (
changes(process_start_time_seconds{job="alertmanager-main",namespace="default"}[10m]) > 4
)
/
count by (namespace,service) (
up{job="alertmanager-main",namespace="default"}
)
)
>= 0.5
for: 5m
labels:
severity: critical

View File

@ -1,49 +0,0 @@
apiVersion: v1
kind: Secret
metadata:
labels:
alertmanager: main
app.kubernetes.io/component: alert-router
app.kubernetes.io/name: alertmanager
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.21.0
name: alertmanager-main
namespace: default
stringData:
alertmanager.yaml: |-
"global":
"resolve_timeout": "5m"
"inhibit_rules":
- "equal":
- "namespace"
- "alertname"
"source_match":
"severity": "critical"
"target_match_re":
"severity": "warning|info"
- "equal":
- "namespace"
- "alertname"
"source_match":
"severity": "warning"
"target_match_re":
"severity": "info"
"receivers":
- "name": "Default"
- "name": "Watchdog"
- "name": "Critical"
"route":
"group_by":
- "namespace"
"group_interval": "5m"
"group_wait": "30s"
"receiver": "Default"
"repeat_interval": "12h"
"routes":
- "match":
"alertname": "Watchdog"
"receiver": "Watchdog"
- "match":
"severity": "critical"
"receiver": "Critical"
type: Opaque

View File

@ -1,23 +0,0 @@
apiVersion: v1
kind: Service
metadata:
labels:
alertmanager: main
app.kubernetes.io/component: alert-router
app.kubernetes.io/name: alertmanager
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.21.0
name: alertmanager-main
namespace: default
spec:
ports:
- name: web
port: 9093
targetPort: web
selector:
alertmanager: main
app: alertmanager
app.kubernetes.io/component: alert-router
app.kubernetes.io/name: alertmanager
app.kubernetes.io/part-of: kube-prometheus
sessionAffinity: ClientIP

View File

@ -1,11 +0,0 @@
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
alertmanager: main
app.kubernetes.io/component: alert-router
app.kubernetes.io/name: alertmanager
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.21.0
name: alertmanager-main
namespace: default

View File

@ -1,20 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: alert-router
app.kubernetes.io/name: alertmanager
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.21.0
name: alertmanager
namespace: default
spec:
endpoints:
- interval: 30s
port: web
selector:
matchLabels:
alertmanager: main
app.kubernetes.io/component: alert-router
app.kubernetes.io/name: alertmanager
app.kubernetes.io/part-of: kube-prometheus

View File

@ -1,17 +0,0 @@
apiVersion: bitnami.com/v1alpha1
kind: SealedSecret
metadata:
creationTimestamp: null
name: alertmanager-tbrnt-config
namespace: monitoring
spec:
encryptedData:
alertmanager.yaml: AgBj3KqLiF7EAnGK6c4+Thferv3Sur+fhlwE4wpXD1PBtwlJQsMCqjsLRRFNAESH9/8vhI9E9D8wLJiauNS7CGw5jd5KU1cvxo5EyGeFoVyAB4bHSy/pxptSFq+rn00E99/Tqkbdsgduwusfpi0I9F1+zNucyyamJsEzIcsyHMlBbACz+9KQV9SdbgVEmIeqabrAP9VQaQ+i69yurhPdV7VkZzr0WKcGg3x27+slmtjlJz5fwtv1qmbYt/MQnijF2tc6tJeq19Cm0O4zuQ09meW6DwAZ9SOIFU6LxrqJlKbuleaWmfIE3AQYA6Z+qXyBjT1ILW36RwGyg3YK7nm0MNDQxd6LN3zR0eifPqrPsm7O6LE+NAg4FkurV3lJlrBoU3lSSc+sQZZr00ct9Gp57EEvg9T2TaM1B/KHQNmIhpDGntD4+yTcvK3nU7+sxqG/c4Wk5xiUQyLYnigNy5qYCcsM+t9iCoGxP7uU8GrsvIkojTxzhdc6e5LduThKdGE9jI3R6nCP6kmsU6XyUzgKmxYJVVzhSrm9yxFVDPHriNaEM2hgEd3wStwmRjGjwAPUjQZfSJtmxY6+RQ/77TYGjiskDm6gAZuzkGjdptL2t5F+54y3uePaLHNspMgtZsTARCo3kAhgf61Gk2nvnEY/ws5qFjAnsUEXs86wAk2S+w401QKPKTcDr6e/rnve8IrXW0FPvzR3rzdWOcU8v0Z0sSFijIfXdx+A9WGCJuHNo65FbKSgWhlHBfvWB1qWBnDd/VVHIA2wR8gevAPJHSc0f1WdUDc4w2w8tc/qum1SZo2lWkMopvLaiVHU5dCGtG6+4qsC1DmFzIRGZN4AbdVd5k+OY15Fp+ysjuTpA/HuZ/N0kz/5BXzNbY3u7YKi3EV3Up+eZIi2jlG0XBXWdCouuxRW40qHuShivtbrBgey32kFo85dsrDqN7F2kBVnumOB7kvFOaCkL2AtsakVjUzGoh5eXCSHl0ZcqmW2UjInzZIirBChMW/G4yL/TwpVYbBLqWPfdVMFmq7I4srY2+hUP/5UBt/DKZi5zPlLR8H3q4i02zsNpqdhSa9o6ThhFtVX9/te/DMpyN1fJ1Hn2p3cDhoTsiTLPkvflVOx70flap0v2zzPoDm+yXhFllpWp/5avHy9pKf/RzpAodbNr/EydkC+KDKI88MhVUtxS27WbKFsq+vUkmHQj+KtGyRFjg2/CnmM8YbdRsMe8p39PVGLxj1RTnyYzlMltOTbJo3rhDzjmpzGVUpWokwTMGC1WgTenrS4IcCK61ri9bsBIL9n9sMLF1lT8NVKnQfluDTaHNzsQgJ1HTSwQOcAfugqlUrSeTLt3q6U4pSjjlF8P7wYpqzWc+bhOaHed9NxrGXFBC5Wh6+BULuCaCA6TtkLpUfABYHVUa4OS3huNsOeBhZ3aCCQXrc0jOOq2DQzxvdGu4YAQnvMHwJRVyKVcw0pOS5RjIqJW6IOn0MGHzAo7qNv6LUyJ9a7huT2W4ibrHFkMck1zKxbBekPQ9FxpufSXrEqEqNuB3j7Gi7lVDVbPySr1rr2KXLzOLsnZhpTpMq2RejglIAMF7WfIMfvHQ2mnjNuYNNQnXx8hPLm88GSxFYKHpUnAswgYuo4XX2drYMzzq3GWDMIHZ/kpLySU+eJGo6VGeFUV1DgaGksLXE3oCfrA1OCUyZ/qke3tzj8ixjwuprCmFPWsg==
template:
metadata:
creationTimestamp: null
name: alertmanager-tbrnt-config
namespace: monitoring
type: Opaque
status: {}

View File

@ -1,13 +0,0 @@
apiVersion: v1
data:
datasources.yaml: ewogICAgImFwaVZlcnNpb24iOiAxLAogICAgImRhdGFzb3VyY2VzIjogWwogICAgICAgIHsKICAgICAgICAgICAgImFjY2VzcyI6ICJwcm94eSIsCiAgICAgICAgICAgICJlZGl0YWJsZSI6IGZhbHNlLAogICAgICAgICAgICAibmFtZSI6ICJwcm9tZXRoZXVzIiwKICAgICAgICAgICAgIm9yZ0lkIjogMSwKICAgICAgICAgICAgInR5cGUiOiAicHJvbWV0aGV1cyIsCiAgICAgICAgICAgICJ1cmwiOiAiaHR0cDovL3Byb21ldGhldXMtazhzLmRlZmF1bHQuc3ZjOjkwOTAiLAogICAgICAgICAgICAidmVyc2lvbiI6IDEKICAgICAgICB9CiAgICBdCn0=
kind: Secret
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.5.4
name: grafana-datasources
namespace: default
type: Opaque

File diff suppressed because it is too large Load Diff

View File

@ -1,26 +0,0 @@
apiVersion: v1
data:
dashboards.yaml: |-
{
"apiVersion": 1,
"providers": [
{
"folder": "Default",
"name": "0",
"options": {
"path": "/grafana-dashboard-definitions/0"
},
"orgId": 1,
"type": "file"
}
]
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.5.4
name: grafana-dashboards
namespace: default

View File

@ -1,209 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.5.4
name: grafana
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
template:
metadata:
annotations:
checksum/grafana-datasources: b822d7b1a1070f322d0773c043985b4a
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.5.4
spec:
containers:
- env: []
image: grafana/grafana:7.5.4
name: grafana
ports:
- containerPort: 3000
name: http
readinessProbe:
httpGet:
path: /api/health
port: http
resources:
limits:
cpu: 200m
memory: 200Mi
requests:
cpu: 100m
memory: 100Mi
volumeMounts:
- mountPath: /var/lib/grafana
name: grafana-storage
readOnly: false
- mountPath: /etc/grafana/provisioning/datasources
name: grafana-datasources
readOnly: false
- mountPath: /etc/grafana/provisioning/dashboards
name: grafana-dashboards
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/apiserver
name: grafana-dashboard-apiserver
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/cluster-total
name: grafana-dashboard-cluster-total
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/controller-manager
name: grafana-dashboard-controller-manager
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-cluster
name: grafana-dashboard-k8s-resources-cluster
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-namespace
name: grafana-dashboard-k8s-resources-namespace
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-node
name: grafana-dashboard-k8s-resources-node
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-pod
name: grafana-dashboard-k8s-resources-pod
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-workload
name: grafana-dashboard-k8s-resources-workload
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-workloads-namespace
name: grafana-dashboard-k8s-resources-workloads-namespace
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/kubelet
name: grafana-dashboard-kubelet
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/namespace-by-pod
name: grafana-dashboard-namespace-by-pod
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/namespace-by-workload
name: grafana-dashboard-namespace-by-workload
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/node-cluster-rsrc-use
name: grafana-dashboard-node-cluster-rsrc-use
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/node-rsrc-use
name: grafana-dashboard-node-rsrc-use
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/nodes
name: grafana-dashboard-nodes
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/persistentvolumesusage
name: grafana-dashboard-persistentvolumesusage
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/pod-total
name: grafana-dashboard-pod-total
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/prometheus-remote-write
name: grafana-dashboard-prometheus-remote-write
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/prometheus
name: grafana-dashboard-prometheus
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/proxy
name: grafana-dashboard-proxy
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/scheduler
name: grafana-dashboard-scheduler
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/statefulset
name: grafana-dashboard-statefulset
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/workload-total
name: grafana-dashboard-workload-total
readOnly: false
nodeSelector:
beta.kubernetes.io/os: linux
securityContext:
fsGroup: 65534
runAsNonRoot: true
runAsUser: 65534
serviceAccountName: grafana
volumes:
- emptyDir: {}
name: grafana-storage
- name: grafana-datasources
secret:
secretName: grafana-datasources
- configMap:
name: grafana-dashboards
name: grafana-dashboards
- configMap:
name: grafana-dashboard-apiserver
name: grafana-dashboard-apiserver
- configMap:
name: grafana-dashboard-cluster-total
name: grafana-dashboard-cluster-total
- configMap:
name: grafana-dashboard-controller-manager
name: grafana-dashboard-controller-manager
- configMap:
name: grafana-dashboard-k8s-resources-cluster
name: grafana-dashboard-k8s-resources-cluster
- configMap:
name: grafana-dashboard-k8s-resources-namespace
name: grafana-dashboard-k8s-resources-namespace
- configMap:
name: grafana-dashboard-k8s-resources-node
name: grafana-dashboard-k8s-resources-node
- configMap:
name: grafana-dashboard-k8s-resources-pod
name: grafana-dashboard-k8s-resources-pod
- configMap:
name: grafana-dashboard-k8s-resources-workload
name: grafana-dashboard-k8s-resources-workload
- configMap:
name: grafana-dashboard-k8s-resources-workloads-namespace
name: grafana-dashboard-k8s-resources-workloads-namespace
- configMap:
name: grafana-dashboard-kubelet
name: grafana-dashboard-kubelet
- configMap:
name: grafana-dashboard-namespace-by-pod
name: grafana-dashboard-namespace-by-pod
- configMap:
name: grafana-dashboard-namespace-by-workload
name: grafana-dashboard-namespace-by-workload
- configMap:
name: grafana-dashboard-node-cluster-rsrc-use
name: grafana-dashboard-node-cluster-rsrc-use
- configMap:
name: grafana-dashboard-node-rsrc-use
name: grafana-dashboard-node-rsrc-use
- configMap:
name: grafana-dashboard-nodes
name: grafana-dashboard-nodes
- configMap:
name: grafana-dashboard-persistentvolumesusage
name: grafana-dashboard-persistentvolumesusage
- configMap:
name: grafana-dashboard-pod-total
name: grafana-dashboard-pod-total
- configMap:
name: grafana-dashboard-prometheus-remote-write
name: grafana-dashboard-prometheus-remote-write
- configMap:
name: grafana-dashboard-prometheus
name: grafana-dashboard-prometheus
- configMap:
name: grafana-dashboard-proxy
name: grafana-dashboard-proxy
- configMap:
name: grafana-dashboard-scheduler
name: grafana-dashboard-scheduler
- configMap:
name: grafana-dashboard-statefulset
name: grafana-dashboard-statefulset
- configMap:
name: grafana-dashboard-workload-total
name: grafana-dashboard-workload-total

View File

@ -1,19 +0,0 @@
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.5.4
name: grafana
namespace: default
spec:
ports:
- name: http
port: 3000
targetPort: http
selector:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus

View File

@ -1,5 +0,0 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: grafana
namespace: default

View File

@ -1,17 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.5.4
name: grafana
namespace: default
spec:
endpoints:
- interval: 15s
port: http
selector:
matchLabels:
app.kubernetes.io/name: grafana

View File

@ -1,31 +0,0 @@
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: healthchecks-io
namespace: monitoring
spec:
schedule: "*/1 * * * *"
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
startingDeadlineSeconds: 200
jobTemplate:
spec:
template:
spec:
containers:
- name: pinghc
env:
- name: HCURL
valueFrom:
secretKeyRef:
name: healthchecks-io
key: HCURL
image: busybox
args:
- /bin/sh
- -c
- "date && echo $HCURL && /bin/wget -q -O - --no-check-certificate $HCURL"
restartPolicy: OnFailure

View File

@ -1,17 +0,0 @@
apiVersion: bitnami.com/v1alpha1
kind: SealedSecret
metadata:
creationTimestamp: null
name: healthchecks-io
namespace: monitoring
spec:
encryptedData:
HCURL: AgBEpwET1Qa1hQqAmwrNGBv4sL0ml8pGYPwgq9Aps3tYhBVqsXjV7U5RQa/txldg1umw2Zqx8MfvZTN2kmFk6bJTROCWqTxmxd4rHgnJYqRR0+Opn/BtDhVx4WTnehyM/il9ymddhMD+WRQDr/Wfxq/0UQdsy+IEYyVMQuOKEihZabxmXRyNeAl5ZBeQ0W1T29biJPx3rifS37RbGlJtCIYuNPh82d0KAMu1dszDnkln8k5CBv6mPD8BVHg+Z/y1v1jFhTIE3YOlGzCIjb8RrJj6MVm7zlauj8zrl30JvF2OAWDGGZDOL3b0G3IKd0Qp/eagT33Sx7vbppY/l1Vci6UQcVpde3u2+ATMbysRej04Mvcodq5OgkBFqbgCzx0UFTIq0wER/GuCoYbt+k8b3TouK5ChQet8EP0W/c7rLHcMY3c0UR00N7m5UeKZAzAkXSGV+u3M9K6PMp8pl0VuDo+IVgEIY7ku9rtzL7SPIfXS4u5w7fte13fOtKB/2sa11dNqAbHmidF+IO6ycjm8SZibC7NKyCxgIKWPfsFXhNUT2Nx7eBRrzR1QlqThIGRsDpX1RVplTwe/OLsBz0K99AyGDUkSBJdOZLaRT/b3T0nS8DE5x/e8MvFsbbDdGE2U/YhVrbfn072u/X979/RIm0oCjipvByZXhFmobRj9SP9RcK2UfjBSY7xyKnd2rjj1mnIs2S0CmwGFdJqoywHckJJOu3YP2oN2Q1U7+Fe4yciupAshgdszY2okHMtd4aDDJJKeKKFHpjpsuA==
template:
metadata:
creationTimestamp: null
name: healthchecks-io
namespace: monitoring
type: Opaque
status: {}

View File

@ -1,20 +0,0 @@
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
ingress.kubernetes.io/ssl-redirect: "true"
name: grafana
namespace: monitoring
spec:
rules:
- host: grafana.knurrli.tbrnt.ch
http:
paths:
- backend:
serviceName: grafana
servicePort: http
tls:
- hosts:
- grafana.knurrli.tbrnt.ch
secretName: grafana-ingress-cert

View File

@ -1,40 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: k8up
labels:
prometheus: k8s
role: alert-rules
spec:
groups:
- name: k8up.rules
rules:
- alert: baas_last_errors
expr: baas_backup_restic_last_errors > 0
for: 1m
labels:
severity: critical
annotations:
summary: Amount of errors of last restic backup
description: This alert is fired when error number is > 0
- alert: K8upBackupFailed
expr: rate(k8up_jobs_failed_counter[1d]) > 0
for: 1m
labels:
severity: critical
annotations:
summary: "Job in {{ $labels.namespace }} of type {{ $labels.jobType }} failed"
- alert: K8upBackupNotRunning
expr: sum(rate(k8up_jobs_total[25h])) == 0 and on(namespace) k8up_schedules_gauge > 0
for: 1m
labels:
severity: critical
annotations:
summary: "No K8up jobs were run in {{ $labels.namespace }} within the last 24 hours. Check the operator, there might be a deadlock"
- alert: K8upJobStuck
expr: k8up_jobs_queued_gauge{jobType="backup"} > 0 and on(namespace) k8up_schedules_gauge > 0
for: 24h
labels:
severity: critical
annotations:
summary: "K8up jobs are stuck in {{ $labels.namespace }} for the last 24 hours."

View File

@ -1,110 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.0.0
name: kube-state-metrics
rules:
- apiGroups:
- ""
resources:
- configmaps
- secrets
- nodes
- pods
- services
- resourcequotas
- replicationcontrollers
- limitranges
- persistentvolumeclaims
- persistentvolumes
- namespaces
- endpoints
verbs:
- list
- watch
- apiGroups:
- apps
resources:
- statefulsets
- daemonsets
- deployments
- replicasets
verbs:
- list
- watch
- apiGroups:
- batch
resources:
- cronjobs
- jobs
verbs:
- list
- watch
- apiGroups:
- autoscaling
resources:
- horizontalpodautoscalers
verbs:
- list
- watch
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create
- apiGroups:
- policy
resources:
- poddisruptionbudgets
verbs:
- list
- watch
- apiGroups:
- certificates.k8s.io
resources:
- certificatesigningrequests
verbs:
- list
- watch
- apiGroups:
- storage.k8s.io
resources:
- storageclasses
- volumeattachments
verbs:
- list
- watch
- apiGroups:
- admissionregistration.k8s.io
resources:
- mutatingwebhookconfigurations
- validatingwebhookconfigurations
verbs:
- list
- watch
- apiGroups:
- networking.k8s.io
resources:
- networkpolicies
- ingresses
verbs:
- list
- watch
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- list
- watch

View File

@ -1,17 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.0.0
name: kube-state-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kube-state-metrics
subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: default

View File

@ -1,89 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.0.0
name: kube-state-metrics
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: kube-prometheus
template:
metadata:
annotations:
kubectl.kubernetes.io/default-container: kube-state-metrics
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.0.0
spec:
containers:
- args:
- --host=127.0.0.1
- --port=8081
- --telemetry-host=127.0.0.1
- --telemetry-port=8082
image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.0.0
name: kube-state-metrics
resources:
limits:
cpu: 100m
memory: 250Mi
requests:
cpu: 10m
memory: 190Mi
securityContext:
runAsUser: 65534
- args:
- --logtostderr
- --secure-listen-address=:8443
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
- --upstream=http://127.0.0.1:8081/
image: quay.io/brancz/kube-rbac-proxy:v0.8.0
name: kube-rbac-proxy-main
ports:
- containerPort: 8443
name: https-main
resources:
limits:
cpu: 40m
memory: 40Mi
requests:
cpu: 20m
memory: 20Mi
securityContext:
runAsGroup: 65532
runAsNonRoot: true
runAsUser: 65532
- args:
- --logtostderr
- --secure-listen-address=:9443
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
- --upstream=http://127.0.0.1:8082/
image: quay.io/brancz/kube-rbac-proxy:v0.8.0
name: kube-rbac-proxy-self
ports:
- containerPort: 9443
name: https-self
resources:
limits:
cpu: 20m
memory: 40Mi
requests:
cpu: 10m
memory: 20Mi
securityContext:
runAsGroup: 65532
runAsNonRoot: true
runAsUser: 65532
nodeSelector:
kubernetes.io/os: linux
serviceAccountName: kube-state-metrics

View File

@ -1,46 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.0.0
prometheus: k8s
role: alert-rules
name: kube-state-metrics-rules
namespace: default
spec:
groups:
- name: kube-state-metrics
rules:
- alert: KubeStateMetricsListErrors
annotations:
description: kube-state-metrics is experiencing errors at an elevated rate
in list operations. This is likely causing it to not be able to expose metrics
about Kubernetes objects correctly or at all.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatemetricslisterrors
summary: kube-state-metrics is experiencing errors in list operations.
expr: |
(sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m]))
/
sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m])))
> 0.01
for: 15m
labels:
severity: critical
- alert: KubeStateMetricsWatchErrors
annotations:
description: kube-state-metrics is experiencing errors at an elevated rate
in watch operations. This is likely causing it to not be able to expose
metrics about Kubernetes objects correctly or at all.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatemetricswatcherrors
summary: kube-state-metrics is experiencing errors in watch operations.
expr: |
(sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m]))
/
sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m])))
> 0.01
for: 15m
labels:
severity: critical

View File

@ -1,23 +0,0 @@
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.0.0
name: kube-state-metrics
namespace: default
spec:
clusterIP: None
ports:
- name: https-main
port: 8443
targetPort: https-main
- name: https-self
port: 9443
targetPort: https-self
selector:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: kube-prometheus

View File

@ -1,10 +0,0 @@
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.0.0
name: kube-state-metrics
namespace: default

View File

@ -1,35 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.0.0
name: kube-state-metrics
namespace: default
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
honorLabels: true
interval: 30s
port: https-main
relabelings:
- action: labeldrop
regex: (pod|service|endpoint|namespace)
scheme: https
scrapeTimeout: 30s
tlsConfig:
insecureSkipVerify: true
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 30s
port: https-self
scheme: https
tlsConfig:
insecureSkipVerify: true
jobLabel: app.kubernetes.io/name
selector:
matchLabels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: kube-prometheus

View File

@ -1,22 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 1.1.2
name: node-exporter
rules:
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create

View File

@ -1,17 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 1.1.2
name: node-exporter
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: node-exporter
subjects:
- kind: ServiceAccount
name: node-exporter
namespace: default

View File

@ -1,100 +0,0 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 1.1.2
name: node-exporter
namespace: default
spec:
selector:
matchLabels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus
template:
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 1.1.2
spec:
containers:
- args:
- --web.listen-address=127.0.0.1:9100
- --path.sysfs=/host/sys
- --path.rootfs=/host/root
- --no-collector.wifi
- --no-collector.hwmon
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
- --collector.netclass.ignored-devices=^(veth.*)$
- --collector.netdev.device-exclude=^(veth.*)$
image: quay.io/prometheus/node-exporter:v1.1.2
name: node-exporter
resources:
limits:
cpu: 250m
memory: 180Mi
requests:
cpu: 102m
memory: 180Mi
volumeMounts:
- mountPath: /host/sys
mountPropagation: HostToContainer
name: sys
readOnly: true
- mountPath: /host/root
mountPropagation: HostToContainer
name: root
readOnly: true
- args:
- --logtostderr
- --secure-listen-address=[$(IP)]:9100
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
- --upstream=http://127.0.0.1:9100/
env:
- name: IP
valueFrom:
fieldRef:
fieldPath: status.podIP
image: quay.io/brancz/kube-rbac-proxy:v0.8.0
name: kube-rbac-proxy
ports:
- containerPort: 9100
hostPort: 9100
name: https
resources:
limits:
cpu: 20m
memory: 40Mi
requests:
cpu: 10m
memory: 20Mi
securityContext:
runAsGroup: 65532
runAsNonRoot: true
runAsUser: 65532
hostNetwork: true
hostPID: true
nodeSelector:
kubernetes.io/os: linux
securityContext:
runAsNonRoot: true
runAsUser: 65534
serviceAccountName: node-exporter
tolerations:
- operator: Exists
volumes:
- hostPath:
path: /sys
name: sys
- hostPath:
path: /
name: root
updateStrategy:
rollingUpdate:
maxUnavailable: 10%
type: RollingUpdate

View File

@ -1,301 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 1.1.2
prometheus: k8s
role: alert-rules
name: node-exporter-rules
namespace: default
spec:
groups:
- name: node-exporter
rules:
- alert: NodeFilesystemSpaceFillingUp
annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available space left and is filling
up.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemspacefillingup
summary: Filesystem is predicted to run out of space within the next 24 hours.
expr: |
(
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 40
and
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0
and
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
)
for: 1h
labels:
severity: warning
- alert: NodeFilesystemSpaceFillingUp
annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available space left and is filling
up fast.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemspacefillingup
summary: Filesystem is predicted to run out of space within the next 4 hours.
expr: |
(
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 15
and
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
and
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
)
for: 1h
labels:
severity: critical
- alert: NodeFilesystemAlmostOutOfSpace
annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available space left.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutofspace
summary: Filesystem has less than 5% space left.
expr: |
(
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 5
and
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
)
for: 1h
labels:
severity: warning
- alert: NodeFilesystemAlmostOutOfSpace
annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available space left.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutofspace
summary: Filesystem has less than 3% space left.
expr: |
(
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 3
and
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
)
for: 1h
labels:
severity: critical
- alert: NodeFilesystemFilesFillingUp
annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available inodes left and is filling
up.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemfilesfillingup
summary: Filesystem is predicted to run out of inodes within the next 24 hours.
expr: |
(
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 40
and
predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0
and
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
)
for: 1h
labels:
severity: warning
- alert: NodeFilesystemFilesFillingUp
annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available inodes left and is filling
up fast.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemfilesfillingup
summary: Filesystem is predicted to run out of inodes within the next 4 hours.
expr: |
(
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 20
and
predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
and
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
)
for: 1h
labels:
severity: critical
- alert: NodeFilesystemAlmostOutOfFiles
annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available inodes left.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutoffiles
summary: Filesystem has less than 5% inodes left.
expr: |
(
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 5
and
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
)
for: 1h
labels:
severity: warning
- alert: NodeFilesystemAlmostOutOfFiles
annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
has only {{ printf "%.2f" $value }}% available inodes left.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutoffiles
summary: Filesystem has less than 3% inodes left.
expr: |
(
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 3
and
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
)
for: 1h
labels:
severity: critical
- alert: NodeNetworkReceiveErrs
annotations:
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
{{ printf "%.0f" $value }} receive errors in the last two minutes.'
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodenetworkreceiveerrs
summary: Network interface is reporting many receive errors.
expr: |
rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01
for: 1h
labels:
severity: warning
- alert: NodeNetworkTransmitErrs
annotations:
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
{{ printf "%.0f" $value }} transmit errors in the last two minutes.'
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodenetworktransmiterrs
summary: Network interface is reporting many transmit errors.
expr: |
rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01
for: 1h
labels:
severity: warning
- alert: NodeHighNumberConntrackEntriesUsed
annotations:
description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodehighnumberconntrackentriesused
summary: Number of conntrack are getting close to the limit.
expr: |
(node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
labels:
severity: warning
- alert: NodeTextFileCollectorScrapeError
annotations:
description: Node Exporter text file collector failed to scrape.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodetextfilecollectorscrapeerror
summary: Node Exporter text file collector failed to scrape.
expr: |
node_textfile_scrape_error{job="node-exporter"} == 1
labels:
severity: warning
- alert: NodeClockSkewDetected
annotations:
description: Clock on {{ $labels.instance }} is out of sync by more than 300s.
Ensure NTP is configured correctly on this host.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodeclockskewdetected
summary: Clock skew detected.
expr: |
(
node_timex_offset_seconds > 0.05
and
deriv(node_timex_offset_seconds[5m]) >= 0
)
or
(
node_timex_offset_seconds < -0.05
and
deriv(node_timex_offset_seconds[5m]) <= 0
)
for: 10m
labels:
severity: warning
- alert: NodeClockNotSynchronising
annotations:
description: Clock on {{ $labels.instance }} is not synchronising. Ensure
NTP is configured on this host.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodeclocknotsynchronising
summary: Clock not synchronising.
expr: |
min_over_time(node_timex_sync_status[5m]) == 0
and
node_timex_maxerror_seconds >= 16
for: 10m
labels:
severity: warning
- alert: NodeRAIDDegraded
annotations:
description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is
in degraded state due to one or more disks failures. Number of spare drives
is insufficient to fix issue automatically.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/noderaiddegraded
summary: RAID Array is degraded
expr: |
node_md_disks_required - ignoring (state) (node_md_disks{state="active"}) > 0
for: 15m
labels:
severity: critical
- alert: NodeRAIDDiskFailure
annotations:
description: At least one device in RAID array on {{ $labels.instance }} failed.
Array '{{ $labels.device }}' needs attention and possibly a disk swap.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/noderaiddiskfailure
summary: Failed device in RAID array
expr: |
node_md_disks{state="failed"} > 0
labels:
severity: warning
- name: node-exporter.rules
rules:
- expr: |
count without (cpu) (
count without (mode) (
node_cpu_seconds_total{job="node-exporter"}
)
)
record: instance:node_num_cpu:sum
- expr: |
1 - avg without (cpu, mode) (
rate(node_cpu_seconds_total{job="node-exporter", mode="idle"}[1m])
)
record: instance:node_cpu_utilisation:rate1m
- expr: |
(
node_load1{job="node-exporter"}
/
instance:node_num_cpu:sum{job="node-exporter"}
)
record: instance:node_load1_per_cpu:ratio
- expr: |
1 - (
node_memory_MemAvailable_bytes{job="node-exporter"}
/
node_memory_MemTotal_bytes{job="node-exporter"}
)
record: instance:node_memory_utilisation:ratio
- expr: |
rate(node_vmstat_pgmajfault{job="node-exporter"}[1m])
record: instance:node_vmstat_pgmajfault:rate1m
- expr: |
rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
record: instance_device:node_disk_io_time_seconds:rate1m
- expr: |
rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
record: instance_device:node_disk_io_time_weighted_seconds:rate1m
- expr: |
sum without (device) (
rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[1m])
)
record: instance:node_network_receive_bytes_excluding_lo:rate1m
- expr: |
sum without (device) (
rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[1m])
)
record: instance:node_network_transmit_bytes_excluding_lo:rate1m
- expr: |
sum without (device) (
rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[1m])
)
record: instance:node_network_receive_drop_excluding_lo:rate1m
- expr: |
sum without (device) (
rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[1m])
)
record: instance:node_network_transmit_drop_excluding_lo:rate1m

View File

@ -1,20 +0,0 @@
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 1.1.2
name: node-exporter
namespace: default
spec:
clusterIP: None
ports:
- name: https
port: 9100
targetPort: https
selector:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus

View File

@ -1,10 +0,0 @@
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 1.1.2
name: node-exporter
namespace: default

View File

@ -1,31 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 1.1.2
name: node-exporter
namespace: default
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 15s
port: https
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: instance
scheme: https
tlsConfig:
insecureSkipVerify: true
jobLabel: app.kubernetes.io/name
selector:
matchLabels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus

View File

@ -1,18 +0,0 @@
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
labels:
app.kubernetes.io/component: metrics-adapter
app.kubernetes.io/name: prometheus-adapter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.8.4
name: v1beta1.metrics.k8s.io
spec:
group: metrics.k8s.io
groupPriorityMinimum: 100
insecureSkipTLSVerify: true
service:
name: prometheus-adapter
namespace: default
version: v1beta1
versionPriority: 100

View File

@ -1,21 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: metrics-adapter
app.kubernetes.io/name: prometheus-adapter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.8.4
name: prometheus-adapter
rules:
- apiGroups:
- ""
resources:
- nodes
- namespaces
- pods
- services
verbs:
- get
- list
- watch

View File

@ -1,22 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: metrics-adapter
app.kubernetes.io/name: prometheus-adapter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.8.4
rbac.authorization.k8s.io/aggregate-to-admin: "true"
rbac.authorization.k8s.io/aggregate-to-edit: "true"
rbac.authorization.k8s.io/aggregate-to-view: "true"
name: system:aggregated-metrics-reader
rules:
- apiGroups:
- metrics.k8s.io
resources:
- pods
- nodes
verbs:
- get
- list
- watch

View File

@ -1,17 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: metrics-adapter
app.kubernetes.io/name: prometheus-adapter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.8.4
name: prometheus-adapter
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus-adapter
subjects:
- kind: ServiceAccount
name: prometheus-adapter
namespace: default

View File

@ -1,17 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: metrics-adapter
app.kubernetes.io/name: prometheus-adapter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.8.4
name: resource-metrics:system:auth-delegator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:auth-delegator
subjects:
- kind: ServiceAccount
name: prometheus-adapter
namespace: default

View File

@ -1,16 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: metrics-adapter
app.kubernetes.io/name: prometheus-adapter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.8.4
name: resource-metrics-server-resources
rules:
- apiGroups:
- metrics.k8s.io
resources:
- '*'
verbs:
- '*'

View File

@ -1,38 +0,0 @@
apiVersion: v1
data:
config.yaml: |-
"resourceRules":
"cpu":
"containerLabel": "container"
"containerQuery": "sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!=\"\",pod!=\"\"}[5m])) by (<<.GroupBy>>)"
"nodeQuery": "sum(1 - irate(node_cpu_seconds_total{mode=\"idle\"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>) or sum (1- irate(windows_cpu_time_total{mode=\"idle\", job=\"windows-exporter\",<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>)"
"resources":
"overrides":
"namespace":
"resource": "namespace"
"node":
"resource": "node"
"pod":
"resource": "pod"
"memory":
"containerLabel": "container"
"containerQuery": "sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!=\"\",pod!=\"\"}) by (<<.GroupBy>>)"
"nodeQuery": "sum(node_memory_MemTotal_bytes{job=\"node-exporter\",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job=\"node-exporter\",<<.LabelMatchers>>}) by (<<.GroupBy>>) or sum(windows_cs_physical_memory_bytes{job=\"windows-exporter\",<<.LabelMatchers>>} - windows_memory_available_bytes{job=\"windows-exporter\",<<.LabelMatchers>>}) by (<<.GroupBy>>)"
"resources":
"overrides":
"instance":
"resource": "node"
"namespace":
"resource": "namespace"
"pod":
"resource": "pod"
"window": "5m"
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: metrics-adapter
app.kubernetes.io/name: prometheus-adapter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.8.4
name: adapter-config
namespace: default

View File

@ -1,62 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/component: metrics-adapter
app.kubernetes.io/name: prometheus-adapter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.8.4
name: prometheus-adapter
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app.kubernetes.io/component: metrics-adapter
app.kubernetes.io/name: prometheus-adapter
app.kubernetes.io/part-of: kube-prometheus
strategy:
rollingUpdate:
maxSurge: 1
maxUnavailable: 1
template:
metadata:
labels:
app.kubernetes.io/component: metrics-adapter
app.kubernetes.io/name: prometheus-adapter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.8.4
spec:
containers:
- args:
- --cert-dir=/var/run/serving-cert
- --config=/etc/adapter/config.yaml
- --logtostderr=true
- --metrics-relist-interval=1m
- --prometheus-url=http://prometheus-k8s.default.svc.cluster.local:9090/
- --secure-port=6443
image: directxman12/k8s-prometheus-adapter:v0.8.4
name: prometheus-adapter
ports:
- containerPort: 6443
volumeMounts:
- mountPath: /tmp
name: tmpfs
readOnly: false
- mountPath: /var/run/serving-cert
name: volume-serving-cert
readOnly: false
- mountPath: /etc/adapter
name: config
readOnly: false
nodeSelector:
kubernetes.io/os: linux
serviceAccountName: prometheus-adapter
volumes:
- emptyDir: {}
name: tmpfs
- emptyDir: {}
name: volume-serving-cert
- configMap:
name: adapter-config
name: config

View File

@ -1,18 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
app.kubernetes.io/component: metrics-adapter
app.kubernetes.io/name: prometheus-adapter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.8.4
name: resource-metrics-auth-reader
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
name: prometheus-adapter
namespace: default

View File

@ -1,19 +0,0 @@
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: metrics-adapter
app.kubernetes.io/name: prometheus-adapter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.8.4
name: prometheus-adapter
namespace: default
spec:
ports:
- name: https
port: 443
targetPort: 6443
selector:
app.kubernetes.io/component: metrics-adapter
app.kubernetes.io/name: prometheus-adapter
app.kubernetes.io/part-of: kube-prometheus

View File

@ -1,10 +0,0 @@
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/component: metrics-adapter
app.kubernetes.io/name: prometheus-adapter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.8.4
name: prometheus-adapter
namespace: default

View File

@ -1,23 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: metrics-adapter
app.kubernetes.io/name: prometheus-adapter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.8.4
name: prometheus-adapter
namespace: default
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 30s
port: https
scheme: https
tlsConfig:
insecureSkipVerify: true
selector:
matchLabels:
app.kubernetes.io/component: metrics-adapter
app.kubernetes.io/name: prometheus-adapter
app.kubernetes.io/part-of: kube-prometheus

View File

@ -1,20 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.26.0
name: prometheus-k8s
rules:
- apiGroups:
- ""
resources:
- nodes/metrics
verbs:
- get
- nonResourceURLs:
- /metrics
verbs:
- get

View File

@ -1,17 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.26.0
name: prometheus-k8s
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: default

View File

@ -1,14 +0,0 @@
apiVersion: v1
kind: Endpoints
metadata:
labels:
k8s-app: kube-controller-manager
name: kube-controller-manager
namespace: kube-system
subsets:
- addresses:
- ip: 185.95.218.11
ports:
- name: http-metrics
port: 10252
protocol: TCP

View File

@ -1,14 +0,0 @@
apiVersion: v1
kind: Service
metadata:
labels:
k8s-app: kube-controller-manager
name: kube-controller-manager
namespace: kube-system
spec:
clusterIP: None
ports:
- name: http-metrics
port: 10252
targetPort: 10252
selector: {}

View File

@ -1,14 +0,0 @@
apiVersion: v1
kind: Endpoints
metadata:
labels:
k8s-app: kube-scheduler
name: kube-scheduler
namespace: kube-system
subsets:
- addresses:
- ip: 185.95.218.11
ports:
- name: http-metrics
port: 10251
protocol: TCP

View File

@ -1,14 +0,0 @@
apiVersion: v1
kind: Service
metadata:
labels:
k8s-app: kube-scheduler
name: kube-scheduler
namespace: kube-system
spec:
clusterIP: None
ports:
- name: http-metrics
port: 10251
targetPort: 10251
selector: {}

View File

@ -1,24 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.47.0
name: prometheus-operator
namespace: default
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
honorLabels: true
port: https
scheme: https
tlsConfig:
insecureSkipVerify: true
selector:
matchLabels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.47.0

View File

@ -1,18 +0,0 @@
apiVersion: policy/v1beta1
kind: PodDisruptionBudget
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.26.0
name: prometheus-k8s
namespace: default
spec:
minAvailable: 1
selector:
matchLabels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
prometheus: k8s

View File

@ -1,73 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.26.0
prometheus: k8s
name: k8s
namespace: default
spec:
alerting:
alertmanagers:
- apiVersion: v2
name: alertmanager-main
namespace: default
port: web
externalLabels: {}
externalUrl: http://prometheus-k8s.monitoring:9090
image: quay.io/prometheus/prometheus:v2.26.0
nodeSelector:
kubernetes.io/os: linux
podMetadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.26.0
podMonitorNamespaceSelector:
matchExpressions:
- key: prometheus
operator: In
values:
- "yes"
- "true"
podMonitorSelector: {}
probeNamespaceSelector: {}
probeSelector: {}
replicas: 2
resources:
requests:
memory: 400Mi
retention: 7d
ruleSelector:
matchLabels:
prometheus: k8s
role: alert-rules
securityContext:
fsGroup: 2000
runAsNonRoot: true
runAsUser: 1000
serviceAccountName: prometheus-k8s
serviceMonitorNamespaceSelector:
matchExpressions:
- key: prometheus
operator: In
values:
- "yes"
- "true"
serviceMonitorSelector: {}
storage:
volumeClaimTemplate:
apiVersion: v1
kind: PersistentVolumeClaim
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
storageClassName: local-path
version: 2.26.0

View File

@ -1,256 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.26.0
prometheus: k8s
role: alert-rules
name: prometheus-k8s-prometheus-rules
namespace: default
spec:
groups:
- name: prometheus
rules:
- alert: PrometheusBadConfig
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to
reload its configuration.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusbadconfig
summary: Failed Prometheus configuration reload.
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
max_over_time(prometheus_config_last_reload_successful{job="prometheus-k8s",namespace="default"}[5m]) == 0
for: 10m
labels:
severity: critical
- alert: PrometheusNotificationQueueRunningFull
annotations:
description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}}
is running full.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusnotificationqueuerunningfull
summary: Prometheus alert notification queue predicted to run full in less
than 30m.
expr: |
# Without min_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
(
predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s",namespace="default"}[5m], 60 * 30)
>
min_over_time(prometheus_notifications_queue_capacity{job="prometheus-k8s",namespace="default"}[5m])
)
for: 15m
labels:
severity: warning
- alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
annotations:
description: '{{ printf "%.1f" $value }}% errors while sending alerts from
Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.'
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheuserrorsendingalertstosomealertmanagers
summary: Prometheus has encountered more than 1% errors sending alerts to
a specific Alertmanager.
expr: |
(
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="default"}[5m])
/
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="default"}[5m])
)
* 100
> 1
for: 15m
labels:
severity: warning
- alert: PrometheusNotConnectedToAlertmanagers
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected
to any Alertmanagers.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusnotconnectedtoalertmanagers
summary: Prometheus is not connected to any Alertmanagers.
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s",namespace="default"}[5m]) < 1
for: 10m
labels:
severity: warning
- alert: PrometheusTSDBReloadsFailing
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected
{{$value | humanize}} reload failures over the last 3h.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheustsdbreloadsfailing
summary: Prometheus has issues reloading blocks from disk.
expr: |
increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="default"}[3h]) > 0
for: 4h
labels:
severity: warning
- alert: PrometheusTSDBCompactionsFailing
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected
{{$value | humanize}} compaction failures over the last 3h.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheustsdbcompactionsfailing
summary: Prometheus has issues compacting blocks.
expr: |
increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="default"}[3h]) > 0
for: 4h
labels:
severity: warning
- alert: PrometheusNotIngestingSamples
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting
samples.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusnotingestingsamples
summary: Prometheus is not ingesting samples.
expr: |
(
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="default"}[5m]) <= 0
and
(
sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-k8s",namespace="default"}) > 0
or
sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-k8s",namespace="default"}) > 0
)
)
for: 10m
labels:
severity: warning
- alert: PrometheusDuplicateTimestamps
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping
{{ printf "%.4g" $value }} samples/s with different values but duplicated
timestamp.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusduplicatetimestamps
summary: Prometheus is dropping samples with duplicate timestamps.
expr: |
rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="default"}[5m]) > 0
for: 10m
labels:
severity: warning
- alert: PrometheusOutOfOrderTimestamps
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping
{{ printf "%.4g" $value }} samples/s with timestamps arriving out of order.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoutofordertimestamps
summary: Prometheus drops samples with out-of-order timestamps.
expr: |
rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="default"}[5m]) > 0
for: 10m
labels:
severity: warning
- alert: PrometheusRemoteStorageFailures
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send
{{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{
$labels.url }}
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusremotestoragefailures
summary: Prometheus fails to send samples to remote storage.
expr: |
(
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="default"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-k8s",namespace="default"}[5m]))
/
(
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="default"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-k8s",namespace="default"}[5m]))
+
(rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-k8s",namespace="default"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus-k8s",namespace="default"}[5m]))
)
)
* 100
> 1
for: 15m
labels:
severity: critical
- alert: PrometheusRemoteWriteBehind
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url
}}.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusremotewritebehind
summary: Prometheus remote write is behind.
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
(
max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-k8s",namespace="default"}[5m])
- ignoring(remote_name, url) group_right
max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-k8s",namespace="default"}[5m])
)
> 120
for: 15m
labels:
severity: critical
- alert: PrometheusRemoteWriteDesiredShards
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
desired shards calculation wants to run {{ $value }} shards for queue {{
$labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{
printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="default"}`
$labels.instance | query | first | value }}.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusremotewritedesiredshards
summary: Prometheus remote write desired shards calculation wants to run more
than configured max shards.
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
(
max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-k8s",namespace="default"}[5m])
>
max_over_time(prometheus_remote_storage_shards_max{job="prometheus-k8s",namespace="default"}[5m])
)
for: 15m
labels:
severity: warning
- alert: PrometheusRuleFailures
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to
evaluate {{ printf "%.0f" $value }} rules in the last 5m.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusrulefailures
summary: Prometheus is failing rule evaluations.
expr: |
increase(prometheus_rule_evaluation_failures_total{job="prometheus-k8s",namespace="default"}[5m]) > 0
for: 15m
labels:
severity: critical
- alert: PrometheusMissingRuleEvaluations
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{
printf "%.0f" $value }} rule group evaluations in the last 5m.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusmissingruleevaluations
summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
expr: |
increase(prometheus_rule_group_iterations_missed_total{job="prometheus-k8s",namespace="default"}[5m]) > 0
for: 15m
labels:
severity: warning
- alert: PrometheusTargetLimitHit
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped
{{ printf "%.0f" $value }} targets because the number of targets exceeded
the configured target_limit.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheustargetlimithit
summary: Prometheus has dropped targets because some scrape configs have exceeded
the targets limit.
expr: |
increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-k8s",namespace="default"}[5m]) > 0
for: 15m
labels:
severity: warning
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager
annotations:
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts
from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheuserrorsendingalertstoanyalertmanager
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
expr: |
min without (alertmanager) (
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="default",alertmanager!~``}[5m])
/
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="default",alertmanager!~``}[5m])
)
* 100
> 3
for: 15m
labels:
severity: critical

View File

@ -1,42 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: prometheus-pushgateway
name: prometheus-pushgateway
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
app: prometheus-pushgateway
template:
metadata:
labels:
app: prometheus-pushgateway
spec:
containers:
- image: prom/pushgateway:v1.1.0
livenessProbe:
httpGet:
path: /#/status
port: 9091
initialDelaySeconds: 10
timeoutSeconds: 10
name: pushgateway
ports:
- containerPort: 9091
name: metrics
readinessProbe:
httpGet:
path: /#/status
port: 9091
initialDelaySeconds: 10
timeoutSeconds: 10
resources:
limits:
cpu: 50m
memory: 100Mi
requests:
cpu: 50m
memory: 100Mi

View File

@ -1,15 +0,0 @@
apiVersion: v1
kind: Service
metadata:
labels:
app: prometheus-pushgateway
name: prometheus-pushgateway
namespace: monitoring
spec:
ports:
- name: http
port: 9091
targetPort: metrics
selector:
app: prometheus-pushgateway
type: ClusterIP

View File

@ -1,15 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
prometheus: k8s
name: prometheus-pushgateway
namespace: monitoring
spec:
endpoints:
- honorLabels: true
port: http
jobLabel: k8s-app
selector:
matchLabels:
app: prometheus-pushgateway

View File

@ -1,18 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.26.0
name: prometheus-k8s-config
namespace: default
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s-config
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: default

View File

@ -1,57 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
items:
- apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.26.0
name: prometheus-k8s
namespace: default
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: default
- apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.26.0
name: prometheus-k8s
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: default
- apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.26.0
name: prometheus-k8s
namespace: default
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: default
kind: RoleBindingList

View File

@ -1,17 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.26.0
name: prometheus-k8s-config
namespace: default
rules:
- apiGroups:
- ""
resources:
- configmaps
verbs:
- get

View File

@ -1,114 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
items:
- apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.26.0
name: prometheus-k8s
namespace: default
rules:
- apiGroups:
- ""
resources:
- services
- endpoints
- pods
verbs:
- get
- list
- watch
- apiGroups:
- extensions
resources:
- ingresses
verbs:
- get
- list
- watch
- apiGroups:
- networking.k8s.io
resources:
- ingresses
verbs:
- get
- list
- watch
- apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.26.0
name: prometheus-k8s
namespace: kube-system
rules:
- apiGroups:
- ""
resources:
- services
- endpoints
- pods
verbs:
- get
- list
- watch
- apiGroups:
- extensions
resources:
- ingresses
verbs:
- get
- list
- watch
- apiGroups:
- networking.k8s.io
resources:
- ingresses
verbs:
- get
- list
- watch
- apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.26.0
name: prometheus-k8s
namespace: default
rules:
- apiGroups:
- ""
resources:
- services
- endpoints
- pods
verbs:
- get
- list
- watch
- apiGroups:
- extensions
resources:
- ingresses
verbs:
- get
- list
- watch
- apiGroups:
- networking.k8s.io
resources:
- ingresses
verbs:
- get
- list
- watch
kind: RoleList

View File

@ -1,23 +0,0 @@
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.26.0
prometheus: k8s
name: prometheus-k8s
namespace: default
spec:
ports:
- name: web
port: 9090
targetPort: web
selector:
app: prometheus
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
prometheus: k8s
sessionAffinity: ClientIP

View File

@ -1,10 +0,0 @@
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.26.0
name: prometheus-k8s
namespace: default

View File

@ -1,20 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.26.0
name: prometheus-k8s
namespace: default
spec:
endpoints:
- interval: 30s
port: web
selector:
matchLabels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
prometheus: k8s

View File

@ -1,41 +0,0 @@
spec:
endpoints:
- interval: 30s
metricRelabelings:
- action: drop
regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
sourceLabels:
- __name__
- action: drop
regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
sourceLabels:
- __name__
- action: drop
regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
sourceLabels:
- __name__
- action: drop
regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
sourceLabels:
- __name__
- action: drop
regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
sourceLabels:
- __name__
- action: drop
regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
sourceLabels:
- __name__
- action: drop
regex: transformation_(transformation_latencies_microseconds|failures_total)
sourceLabels:
- __name__
- action: drop
regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
sourceLabels:
- __name__
- action: drop
regex: etcd_(debugging|disk|request|server).*
sourceLabels:
- __name__
port: http-metrics

View File

@ -1,4 +0,0 @@
spec:
endpoints:
- interval: 30s
port: http-metrics

View File

@ -1,4 +0,0 @@
apiVersion: v1
kind: Namespace
metadata:
name: default

View File

@ -1,76 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-prometheus
app.kubernetes.io/part-of: kube-prometheus
prometheus: k8s
role: alert-rules
name: kube-prometheus-rules
namespace: default
spec:
groups:
- name: general.rules
rules:
- alert: TargetDown
annotations:
description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service
}} targets in {{ $labels.namespace }} namespace are down.'
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/targetdown
summary: One or more targets are unreachable.
expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job,
namespace, service)) > 10
for: 10m
labels:
severity: warning
- alert: Watchdog
annotations:
description: |
This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
and always fire against a receiver. There are integrations with various notification
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/watchdog
summary: An alert that should always be firing to certify that Alertmanager
is working properly.
expr: vector(1)
labels:
severity: none
- name: node-network
rules:
- alert: NodeNetworkInterfaceFlapping
annotations:
message: Network interface "{{ $labels.device }}" changing it's up status
often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodenetworkinterfaceflapping
expr: |
changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2
for: 2m
labels:
severity: warning
- name: kube-prometheus-node-recording.rules
rules:
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m]))
BY (instance)
record: instance:node_cpu:rate:sum
- expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
record: instance:node_network_receive_bytes:rate:sum
- expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
record: instance:node_network_transmit_bytes:rate:sum
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total)
BY (instance, cpu)) BY (instance)
record: instance:node_cpu:ratio
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
record: cluster:node_cpu:sum_rate5m
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total)
BY (instance, cpu))
record: cluster:node_cpu:ratio
- name: kube-prometheus-general.rules
rules:
- expr: count without(instance, pod, node) (up == 1)
record: count:up1
- expr: count without(instance, pod, node) (up == 0)
record: count:up0

View File

@ -1,452 +0,0 @@
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.4.1
creationTimestamp: null
name: podmonitors.monitoring.coreos.com
spec:
group: monitoring.coreos.com
names:
categories:
- prometheus-operator
kind: PodMonitor
listKind: PodMonitorList
plural: podmonitors
singular: podmonitor
scope: Namespaced
versions:
- name: v1
schema:
openAPIV3Schema:
description: PodMonitor defines monitoring for a set of pods.
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: Specification of desired Pod selection for target discovery
by Prometheus.
properties:
jobLabel:
description: The label to use to retrieve the job name from.
type: string
namespaceSelector:
description: Selector to select which namespaces the Endpoints objects
are discovered from.
properties:
any:
description: Boolean describing whether all namespaces are selected
in contrast to a list restricting them.
type: boolean
matchNames:
description: List of namespace names.
items:
type: string
type: array
type: object
podMetricsEndpoints:
description: A list of endpoints allowed as part of this PodMonitor.
items:
description: PodMetricsEndpoint defines a scrapeable endpoint of
a Kubernetes Pod serving Prometheus metrics.
properties:
basicAuth:
description: 'BasicAuth allow an endpoint to authenticate over
basic authentication. More info: https://prometheus.io/docs/operating/configuration/#endpoint'
properties:
password:
description: The secret in the service monitor namespace
that contains the password for authentication.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
username:
description: The secret in the service monitor namespace
that contains the username for authentication.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
type: object
bearerTokenSecret:
description: Secret to mount to read bearer token for scraping
targets. The secret needs to be in the same namespace as the
pod monitor and accessible by the Prometheus Operator.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
honorLabels:
description: HonorLabels chooses the metric's labels on collisions
with target labels.
type: boolean
honorTimestamps:
description: HonorTimestamps controls whether Prometheus respects
the timestamps present in scraped data.
type: boolean
interval:
description: Interval at which metrics should be scraped
type: string
metricRelabelings:
description: MetricRelabelConfigs to apply to samples before
ingestion.
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It
defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
description: Action to perform based on regex matching.
Default is 'replace'
type: string
modulus:
description: Modulus to take of the hash of the source
label values.
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
type: string
replacement:
description: Replacement value against which a regex replace
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string
type: object
type: array
params:
additionalProperties:
items:
type: string
type: array
description: Optional HTTP URL parameters
type: object
path:
description: HTTP path to scrape for metrics.
type: string
port:
description: Name of the pod port this endpoint refers to. Mutually
exclusive with targetPort.
type: string
proxyUrl:
description: ProxyURL eg http://proxyserver:2195 Directs scrapes
to proxy through this endpoint.
type: string
relabelings:
description: 'RelabelConfigs to apply to samples before scraping.
Prometheus Operator automatically adds relabelings for a few
standard Kubernetes fields and replaces original scrape job
name with __tmp_prometheus_job_name. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It
defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
description: Action to perform based on regex matching.
Default is 'replace'
type: string
modulus:
description: Modulus to take of the hash of the source
label values.
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
type: string
replacement:
description: Replacement value against which a regex replace
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string
type: object
type: array
scheme:
description: HTTP scheme to use for scraping.
type: string
scrapeTimeout:
description: Timeout after which the scrape is ended
type: string
targetPort:
anyOf:
- type: integer
- type: string
description: 'Deprecated: Use ''port'' instead.'
x-kubernetes-int-or-string: true
tlsConfig:
description: TLS configuration to use when scraping the endpoint.
properties:
ca:
description: Struct containing the CA cert to use for the
targets.
properties:
configMap:
description: ConfigMap containing data to use for the
targets.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
secret:
description: Secret containing data to use for the targets.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
cert:
description: Struct containing the client cert file for
the targets.
properties:
configMap:
description: ConfigMap containing data to use for the
targets.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
secret:
description: Secret containing data to use for the targets.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
insecureSkipVerify:
description: Disable target certificate validation.
type: boolean
keySecret:
description: Secret containing the client key file for the
targets.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
serverName:
description: Used to verify the hostname for the targets.
type: string
type: object
type: object
type: array
podTargetLabels:
description: PodTargetLabels transfers labels on the Kubernetes Pod
onto the target.
items:
type: string
type: array
sampleLimit:
description: SampleLimit defines per-scrape limit on number of scraped
samples that will be accepted.
format: int64
type: integer
selector:
description: Selector to select Pod objects.
properties:
matchExpressions:
description: matchExpressions is a list of label selector requirements.
The requirements are ANDed.
items:
description: A label selector requirement is a selector that
contains values, a key, and an operator that relates the key
and values.
properties:
key:
description: key is the label key that the selector applies
to.
type: string
operator:
description: operator represents a key's relationship to
a set of values. Valid operators are In, NotIn, Exists
and DoesNotExist.
type: string
values:
description: values is an array of string values. If the
operator is In or NotIn, the values array must be non-empty.
If the operator is Exists or DoesNotExist, the values
array must be empty. This array is replaced during a strategic
merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value} pairs. A single
{key,value} in the matchLabels map is equivalent to an element
of matchExpressions, whose key field is "key", the operator
is "In", and the values array contains only "value". The requirements
are ANDed.
type: object
type: object
targetLimit:
description: TargetLimit defines a limit on the number of scraped
targets that will be accepted.
format: int64
type: integer
required:
- podMetricsEndpoints
- selector
type: object
required:
- spec
type: object
served: true
storage: true
status:
acceptedNames:
kind: ""
plural: ""
conditions: []
storedVersions: []

View File

@ -1,428 +0,0 @@
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.4.1
creationTimestamp: null
name: probes.monitoring.coreos.com
spec:
group: monitoring.coreos.com
names:
categories:
- prometheus-operator
kind: Probe
listKind: ProbeList
plural: probes
singular: probe
scope: Namespaced
versions:
- name: v1
schema:
openAPIV3Schema:
description: Probe defines monitoring for a set of static targets or ingresses.
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: Specification of desired Ingress selection for target discovery
by Prometheus.
properties:
basicAuth:
description: 'BasicAuth allow an endpoint to authenticate over basic
authentication. More info: https://prometheus.io/docs/operating/configuration/#endpoint'
properties:
password:
description: The secret in the service monitor namespace that
contains the password for authentication.
properties:
key:
description: The key of the secret to select from. Must be
a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must be
defined
type: boolean
required:
- key
type: object
username:
description: The secret in the service monitor namespace that
contains the username for authentication.
properties:
key:
description: The key of the secret to select from. Must be
a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must be
defined
type: boolean
required:
- key
type: object
type: object
bearerTokenSecret:
description: Secret to mount to read bearer token for scraping targets.
The secret needs to be in the same namespace as the probe and accessible
by the Prometheus Operator.
properties:
key:
description: The key of the secret to select from. Must be a
valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must be defined
type: boolean
required:
- key
type: object
interval:
description: Interval at which targets are probed using the configured
prober. If not specified Prometheus' global scrape interval is used.
type: string
jobName:
description: The job name assigned to scraped metrics by default.
type: string
module:
description: 'The module to use for probing specifying how to probe
the target. Example module configuring in the blackbox exporter:
https://github.com/prometheus/blackbox_exporter/blob/master/example.yml'
type: string
prober:
description: Specification for the prober to use for probing targets.
The prober.URL parameter is required. Targets cannot be probed if
left empty.
properties:
path:
description: Path to collect metrics from. Defaults to `/probe`.
type: string
scheme:
description: HTTP scheme to use for scraping. Defaults to `http`.
type: string
url:
description: Mandatory URL of the prober.
type: string
required:
- url
type: object
scrapeTimeout:
description: Timeout for scraping metrics from the Prometheus exporter.
type: string
targets:
description: Targets defines a set of static and/or dynamically discovered
targets to be probed using the prober.
properties:
ingress:
description: Ingress defines the set of dynamically discovered
ingress objects which hosts are considered for probing.
properties:
namespaceSelector:
description: Select Ingress objects by namespace.
properties:
any:
description: Boolean describing whether all namespaces
are selected in contrast to a list restricting them.
type: boolean
matchNames:
description: List of namespace names.
items:
type: string
type: array
type: object
relabelingConfigs:
description: 'RelabelConfigs to apply to samples before ingestion.
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items:
description: 'RelabelConfig allows dynamic rewriting of
the label set, being applied to samples before ingestion.
It defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
description: Action to perform based on regex matching.
Default is 'replace'
type: string
modulus:
description: Modulus to take of the hash of the source
label values.
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
type: string
replacement:
description: Replacement value against which a regex
replace is performed if the regular expression matches.
Regex capture groups are available. Default is '$1'
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string
type: object
type: array
selector:
description: Select Ingress objects by labels.
properties:
matchExpressions:
description: matchExpressions is a list of label selector
requirements. The requirements are ANDed.
items:
description: A label selector requirement is a selector
that contains values, a key, and an operator that
relates the key and values.
properties:
key:
description: key is the label key that the selector
applies to.
type: string
operator:
description: operator represents a key's relationship
to a set of values. Valid operators are In, NotIn,
Exists and DoesNotExist.
type: string
values:
description: values is an array of string values.
If the operator is In or NotIn, the values array
must be non-empty. If the operator is Exists or
DoesNotExist, the values array must be empty.
This array is replaced during a strategic merge
patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value} pairs.
A single {key,value} in the matchLabels map is equivalent
to an element of matchExpressions, whose key field is
"key", the operator is "In", and the values array contains
only "value". The requirements are ANDed.
type: object
type: object
type: object
staticConfig:
description: 'StaticConfig defines static targets which are considers
for probing. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config.'
properties:
labels:
additionalProperties:
type: string
description: Labels assigned to all metrics scraped from the
targets.
type: object
relabelingConfigs:
description: 'RelabelConfigs to apply to samples before ingestion.
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items:
description: 'RelabelConfig allows dynamic rewriting of
the label set, being applied to samples before ingestion.
It defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
description: Action to perform based on regex matching.
Default is 'replace'
type: string
modulus:
description: Modulus to take of the hash of the source
label values.
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
type: string
replacement:
description: Replacement value against which a regex
replace is performed if the regular expression matches.
Regex capture groups are available. Default is '$1'
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string
type: object
type: array
static:
description: Targets is a list of URLs to probe using the
configured prober.
items:
type: string
type: array
type: object
type: object
tlsConfig:
description: TLS configuration to use when scraping the endpoint.
properties:
ca:
description: Struct containing the CA cert to use for the targets.
properties:
configMap:
description: ConfigMap containing data to use for the targets.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the ConfigMap or its key
must be defined
type: boolean
required:
- key
type: object
secret:
description: Secret containing data to use for the targets.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
type: object
cert:
description: Struct containing the client cert file for the targets.
properties:
configMap:
description: ConfigMap containing data to use for the targets.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the ConfigMap or its key
must be defined
type: boolean
required:
- key
type: object
secret:
description: Secret containing data to use for the targets.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
type: object
insecureSkipVerify:
description: Disable target certificate validation.
type: boolean
keySecret:
description: Secret containing the client key file for the targets.
properties:
key:
description: The key of the secret to select from. Must be
a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must be
defined
type: boolean
required:
- key
type: object
serverName:
description: Used to verify the hostname for the targets.
type: string
type: object
type: object
required:
- spec
type: object
served: true
storage: true
status:
acceptedNames:
kind: ""
plural: ""
conditions: []
storedVersions: []

View File

@ -1,95 +0,0 @@
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.4.1
creationTimestamp: null
name: prometheusrules.monitoring.coreos.com
spec:
group: monitoring.coreos.com
names:
kind: PrometheusRule
listKind: PrometheusRuleList
plural: prometheusrules
singular: prometheusrule
scope: Namespaced
versions:
- name: v1
schema:
openAPIV3Schema:
description: PrometheusRule defines recording and alerting rules for a Prometheus
instance
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: Specification of desired alerting rule definitions for Prometheus.
properties:
groups:
description: Content of Prometheus rule file
items:
description: 'RuleGroup is a list of sequentially evaluated recording
and alerting rules. Note: PartialResponseStrategy is only used
by ThanosRuler and will be ignored by Prometheus instances. Valid
values for this field are ''warn'' or ''abort''. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response'
properties:
interval:
type: string
name:
type: string
partial_response_strategy:
type: string
rules:
items:
description: Rule describes an alerting or recording rule.
properties:
alert:
type: string
annotations:
additionalProperties:
type: string
type: object
expr:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
for:
type: string
labels:
additionalProperties:
type: string
type: object
record:
type: string
required:
- expr
type: object
type: array
required:
- name
- rules
type: object
type: array
type: object
required:
- spec
type: object
served: true
storage: true
status:
acceptedNames:
kind: ""
plural: ""
conditions: []
storedVersions: []

View File

@ -1,475 +0,0 @@
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.4.1
creationTimestamp: null
name: servicemonitors.monitoring.coreos.com
spec:
group: monitoring.coreos.com
names:
categories:
- prometheus-operator
kind: ServiceMonitor
listKind: ServiceMonitorList
plural: servicemonitors
singular: servicemonitor
scope: Namespaced
versions:
- name: v1
schema:
openAPIV3Schema:
description: ServiceMonitor defines monitoring for a set of services.
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: Specification of desired Service selection for target discovery
by Prometheus.
properties:
endpoints:
description: A list of endpoints allowed as part of this ServiceMonitor.
items:
description: Endpoint defines a scrapeable endpoint serving Prometheus
metrics.
properties:
basicAuth:
description: 'BasicAuth allow an endpoint to authenticate over
basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints'
properties:
password:
description: The secret in the service monitor namespace
that contains the password for authentication.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
username:
description: The secret in the service monitor namespace
that contains the username for authentication.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
type: object
bearerTokenFile:
description: File to read bearer token for scraping targets.
type: string
bearerTokenSecret:
description: Secret to mount to read bearer token for scraping
targets. The secret needs to be in the same namespace as the
service monitor and accessible by the Prometheus Operator.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
honorLabels:
description: HonorLabels chooses the metric's labels on collisions
with target labels.
type: boolean
honorTimestamps:
description: HonorTimestamps controls whether Prometheus respects
the timestamps present in scraped data.
type: boolean
interval:
description: Interval at which metrics should be scraped
type: string
metricRelabelings:
description: MetricRelabelConfigs to apply to samples before
ingestion.
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It
defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
description: Action to perform based on regex matching.
Default is 'replace'
type: string
modulus:
description: Modulus to take of the hash of the source
label values.
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
type: string
replacement:
description: Replacement value against which a regex replace
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string
type: object
type: array
params:
additionalProperties:
items:
type: string
type: array
description: Optional HTTP URL parameters
type: object
path:
description: HTTP path to scrape for metrics.
type: string
port:
description: Name of the service port this endpoint refers to.
Mutually exclusive with targetPort.
type: string
proxyUrl:
description: ProxyURL eg http://proxyserver:2195 Directs scrapes
to proxy through this endpoint.
type: string
relabelings:
description: 'RelabelConfigs to apply to samples before scraping.
Prometheus Operator automatically adds relabelings for a few
standard Kubernetes fields and replaces original scrape job
name with __tmp_prometheus_job_name. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It
defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
description: Action to perform based on regex matching.
Default is 'replace'
type: string
modulus:
description: Modulus to take of the hash of the source
label values.
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
type: string
replacement:
description: Replacement value against which a regex replace
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string
type: object
type: array
scheme:
description: HTTP scheme to use for scraping.
type: string
scrapeTimeout:
description: Timeout after which the scrape is ended
type: string
targetPort:
anyOf:
- type: integer
- type: string
description: Name or number of the target port of the Pod behind
the Service, the port must be specified with container port
property. Mutually exclusive with port.
x-kubernetes-int-or-string: true
tlsConfig:
description: TLS configuration to use when scraping the endpoint
properties:
ca:
description: Struct containing the CA cert to use for the
targets.
properties:
configMap:
description: ConfigMap containing data to use for the
targets.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
secret:
description: Secret containing data to use for the targets.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
caFile:
description: Path to the CA cert in the Prometheus container
to use for the targets.
type: string
cert:
description: Struct containing the client cert file for
the targets.
properties:
configMap:
description: ConfigMap containing data to use for the
targets.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
secret:
description: Secret containing data to use for the targets.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
certFile:
description: Path to the client cert file in the Prometheus
container for the targets.
type: string
insecureSkipVerify:
description: Disable target certificate validation.
type: boolean
keyFile:
description: Path to the client key file in the Prometheus
container for the targets.
type: string
keySecret:
description: Secret containing the client key file for the
targets.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
serverName:
description: Used to verify the hostname for the targets.
type: string
type: object
type: object
type: array
jobLabel:
description: The label to use to retrieve the job name from.
type: string
namespaceSelector:
description: Selector to select which namespaces the Endpoints objects
are discovered from.
properties:
any:
description: Boolean describing whether all namespaces are selected
in contrast to a list restricting them.
type: boolean
matchNames:
description: List of namespace names.
items:
type: string
type: array
type: object
podTargetLabels:
description: PodTargetLabels transfers labels on the Kubernetes Pod
onto the target.
items:
type: string
type: array
sampleLimit:
description: SampleLimit defines per-scrape limit on number of scraped
samples that will be accepted.
format: int64
type: integer
selector:
description: Selector to select Endpoints objects.
properties:
matchExpressions:
description: matchExpressions is a list of label selector requirements.
The requirements are ANDed.
items:
description: A label selector requirement is a selector that
contains values, a key, and an operator that relates the key
and values.
properties:
key:
description: key is the label key that the selector applies
to.
type: string
operator:
description: operator represents a key's relationship to
a set of values. Valid operators are In, NotIn, Exists
and DoesNotExist.
type: string
values:
description: values is an array of string values. If the
operator is In or NotIn, the values array must be non-empty.
If the operator is Exists or DoesNotExist, the values
array must be empty. This array is replaced during a strategic
merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value} pairs. A single
{key,value} in the matchLabels map is equivalent to an element
of matchExpressions, whose key field is "key", the operator
is "In", and the values array contains only "value". The requirements
are ANDed.
type: object
type: object
targetLabels:
description: TargetLabels transfers labels on the Kubernetes Service
onto the target.
items:
type: string
type: array
targetLimit:
description: TargetLimit defines a limit on the number of scraped
targets that will be accepted.
format: int64
type: integer
required:
- endpoints
- selector
type: object
required:
- spec
type: object
served: true
storage: true
status:
acceptedNames:
kind: ""
plural: ""
conditions: []
storedVersions: []

View File

@ -1,92 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.47.0
name: prometheus-operator
rules:
- apiGroups:
- monitoring.coreos.com
resources:
- alertmanagers
- alertmanagers/finalizers
- alertmanagerconfigs
- prometheuses
- prometheuses/finalizers
- thanosrulers
- thanosrulers/finalizers
- servicemonitors
- podmonitors
- probes
- prometheusrules
verbs:
- '*'
- apiGroups:
- apps
resources:
- statefulsets
verbs:
- '*'
- apiGroups:
- ""
resources:
- configmaps
- secrets
verbs:
- '*'
- apiGroups:
- ""
resources:
- pods
verbs:
- list
- delete
- apiGroups:
- ""
resources:
- services
- services/finalizers
- endpoints
verbs:
- get
- create
- update
- delete
- apiGroups:
- ""
resources:
- nodes
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- namespaces
verbs:
- get
- list
- watch
- apiGroups:
- networking.k8s.io
resources:
- ingresses
verbs:
- get
- list
- watch
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create

View File

@ -1,17 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.47.0
name: prometheus-operator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus-operator
subjects:
- kind: ServiceAccount
name: prometheus-operator
namespace: default

View File

@ -1,70 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.47.0
name: prometheus-operator
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/part-of: kube-prometheus
template:
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.47.0
spec:
containers:
- args:
- --kubelet-service=kube-system/kubelet
- --prometheus-config-reloader=quay.io/prometheus-operator/prometheus-config-reloader:v0.47.0
image: quay.io/prometheus-operator/prometheus-operator:v0.47.0
name: prometheus-operator
ports:
- containerPort: 8080
name: http
resources:
limits:
cpu: 200m
memory: 200Mi
requests:
cpu: 100m
memory: 100Mi
securityContext:
allowPrivilegeEscalation: false
- args:
- --logtostderr
- --secure-listen-address=:8443
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
- --upstream=http://127.0.0.1:8080/
image: quay.io/brancz/kube-rbac-proxy:v0.8.0
name: kube-rbac-proxy
ports:
- containerPort: 8443
name: https
resources:
limits:
cpu: 20m
memory: 40Mi
requests:
cpu: 10m
memory: 20Mi
securityContext:
runAsGroup: 65532
runAsNonRoot: true
runAsUser: 65532
nodeSelector:
kubernetes.io/os: linux
securityContext:
runAsNonRoot: true
runAsUser: 65534
serviceAccountName: prometheus-operator

View File

@ -1,95 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.47.0
prometheus: k8s
role: alert-rules
name: prometheus-operator-rules
namespace: default
spec:
groups:
- name: prometheus-operator
rules:
- alert: PrometheusOperatorListErrors
annotations:
description: Errors while performing List operations in controller {{$labels.controller}}
in {{$labels.namespace}} namespace.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorlisterrors
summary: Errors while performing list operations in controller.
expr: |
(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-operator",namespace="default"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-operator",namespace="default"}[10m]))) > 0.4
for: 15m
labels:
severity: warning
- alert: PrometheusOperatorWatchErrors
annotations:
description: Errors while performing watch operations in controller {{$labels.controller}}
in {{$labels.namespace}} namespace.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorwatcherrors
summary: Errors while performing watch operations in controller.
expr: |
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="default"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="default"}[10m]))) > 0.4
for: 15m
labels:
severity: warning
- alert: PrometheusOperatorSyncFailed
annotations:
description: Controller {{ $labels.controller }} in {{ $labels.namespace }}
namespace fails to reconcile {{ $value }} objects.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorsyncfailed
summary: Last controller reconciliation failed
expr: |
min_over_time(prometheus_operator_syncs{status="failed",job="prometheus-operator",namespace="default"}[5m]) > 0
for: 10m
labels:
severity: warning
- alert: PrometheusOperatorReconcileErrors
annotations:
description: '{{ $value | humanizePercentage }} of reconciling operations
failed for {{ $labels.controller }} controller in {{ $labels.namespace }}
namespace.'
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorreconcileerrors
summary: Errors while reconciling controller.
expr: |
(sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="default"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator",namespace="default"}[5m]))) > 0.1
for: 10m
labels:
severity: warning
- alert: PrometheusOperatorNodeLookupErrors
annotations:
description: Errors while reconciling Prometheus in {{ $labels.namespace }}
Namespace.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatornodelookuperrors
summary: Errors while reconciling Prometheus.
expr: |
rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="default"}[5m]) > 0.1
for: 10m
labels:
severity: warning
- alert: PrometheusOperatorNotReady
annotations:
description: Prometheus operator in {{ $labels.namespace }} namespace isn't
ready to reconcile {{ $labels.controller }} resources.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatornotready
summary: Prometheus operator not ready
expr: |
min by(namespace, controller) (max_over_time(prometheus_operator_ready{job="prometheus-operator",namespace="default"}[5m]) == 0)
for: 5m
labels:
severity: warning
- alert: PrometheusOperatorRejectedResources
annotations:
description: Prometheus operator in {{ $labels.namespace }} namespace rejected
{{ printf "%0.0f" $value }} {{ $labels.controller }}/{{ $labels.resource
}} resources.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheusoperatorrejectedresources
summary: Resources rejected by Prometheus operator
expr: |
min_over_time(prometheus_operator_managed_resources{state="rejected",job="prometheus-operator",namespace="default"}[5m]) > 0
for: 5m
labels:
severity: warning

View File

@ -1,20 +0,0 @@
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.47.0
name: prometheus-operator
namespace: default
spec:
clusterIP: None
ports:
- name: https
port: 8443
targetPort: https
selector:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/part-of: kube-prometheus

View File

@ -1,10 +0,0 @@
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.47.0
name: prometheus-operator
namespace: default

View File

@ -1,17 +0,0 @@
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: traefik
namespace: monitoring
spec:
endpoints:
- interval: 30s
path: /metrics
port: metrics
namespaceSelector:
matchNames:
- kube-system
selector:
matchLabels:
app: traefik

View File

@ -1,109 +0,0 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local pvc = k.core.v1.persistentVolumeClaim;
local ingress = k.extensions.v1beta1.ingress;
local ingressTls = ingress.mixin.spec.tlsType;
local ingressRule = ingress.mixin.spec.rulesType;
local httpIngressPath = ingressRule.mixin.http.pathsType;
local statefulSet = k.apps.v1.statefulSet;
local selector = statefulSet.mixin.spec.selectorType;
local kp =
(import 'kube-prometheus/main.libsonnet') +
(import 'prometheus-pushgateway/pushgateway.libsonnet') +
(import 'k3s.libsonnet')
{
_config+:: {
namespace: 'monitoring',
versions+:: {
pushgateway: 'v1.1.0',
},
prometheus+:: {
names: 'k8s',
replicas: 1,
namespaces+: ['k8up', 'owntracks'],
},
alertmanager+:: {
replicas: 1,
},
grafana+: {
plugins: ['grafana-piechart-panel'],
datasources+: [{
name: 'Loki',
type: 'loki',
access: 'proxy',
orgId: 1,
url: 'http://loki.loki:3100',
version: 1,
editable: false,
}],
},
},
alertmanager+:: {
alertmanager+: {
spec+: {
configSecret: 'alertmanager-tbrnt-config',
},
},
},
prometheus+:: {
prometheus+: {
spec+: {
retention: '7d',
externalUrl: 'http://prometheus-k8s.monitoring:9090',
serviceMonitorNamespaceSelector: selector.withMatchExpressions({ key: 'prometheus', operator: 'In', values: ['yes', 'true'] }),
podMonitorNamespaceSelector: selector.withMatchExpressions({ key: 'prometheus', operator: 'In', values: ['yes', 'true'] }),
storage: {
volumeClaimTemplate:
pvc.new() +
pvc.mixin.spec.withAccessModes('ReadWriteOnce') +
pvc.mixin.spec.resources.withRequests({ storage: '10Gi' }) +
pvc.mixin.spec.withStorageClassName('local-path'),
},
},
},
},
ingress+:: {
grafana:
ingress.new() +
ingress.mixin.metadata.withName('grafana') +
ingress.mixin.metadata.withNamespace($._config.namespace) +
ingress.mixin.metadata.withAnnotations({
'cert-manager.io/cluster-issuer': 'letsencrypt-prod',
'ingress.kubernetes.io/ssl-redirect': 'true',
}) +
ingress.mixin.spec.withRules(
ingressRule.new() +
ingressRule.withHost('grafana.knurrli.tbrnt.ch') +
ingressRule.mixin.http.withPaths(
httpIngressPath.new() +
httpIngressPath.mixin.backend.withServiceName('grafana') +
httpIngressPath.mixin.backend.withServicePort('http')
),
) +
ingress.mixin.spec.withTls(
ingressTls.new() +
ingressTls.withHosts('grafana.knurrli.tbrnt.ch') +
ingressTls.withSecretName('grafana-ingress-cert')
),
},
grafanaDashboards+:: {
'traefik.json': (import 'traefik-grafana-dashboard.json'),
},
};
{ ['setup/0namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{
['setup/prometheus-operator-' + name]: kp.prometheusOperator[name]
for name in std.filter((function(name) name != 'serviceMonitor'), std.objectFields(kp.prometheusOperator))
} +
// serviceMonitor is separated so that it can be created after the CRDs are ready
{ 'prometheus-operator-serviceMonitor': kp.prometheusOperator.serviceMonitor } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } +
{ ['prometheus-pushgateway-' + name]: kp.pushgateway[name] for name in std.objectFields(kp.pushgateway) } +
{ ['ingress-' + name]: kp.ingress[name] for name in std.objectFields(kp.ingress) }

View File

@ -1,720 +0,0 @@
{
"__inputs": [
{
"name": "DS_PROMETHEUS",
"label": "Prometheus",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "5.2.4"
},
{
"type": "panel",
"id": "grafana-piechart-panel",
"name": "Pie Chart",
"version": "1.1.6"
},
{
"type": "panel",
"id": "graph",
"name": "Graph",
"version": "5.0.0"
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "5.0.0"
},
{
"type": "panel",
"id": "singlestat",
"name": "Singlestat",
"version": "5.0.0"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "Traefik dashboard prometheus",
"editable": true,
"gnetId": 4475,
"graphTooltip": 0,
"id": null,
"iteration": 1538662098977,
"links": [],
"panels": [
{
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 10,
"title": "$backend stats",
"type": "row"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": true,
"colors": [
"#d44a3a",
"rgba(237, 129, 40, 0.89)",
"#299c46"
],
"datasource": "prometheus",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 1
},
"id": 1,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "sum(traefik_backend_server_up{backend=\"$backend\"})/count(traefik_config_reloads_total)",
"format": "time_series",
"intervalFactor": 2,
"refId": "A"
}
],
"thresholds": "0,1",
"title": "$backend status",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "OK",
"value": "1"
}
],
"valueName": "current"
},
{
"aliasColors": {},
"breakPoint": "50%",
"cacheTimeout": null,
"combine": {
"label": "Others",
"threshold": 0
},
"datasource": "prometheus",
"fontSize": "80%",
"format": "short",
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 1
},
"id": 2,
"interval": null,
"legend": {
"percentage": true,
"show": true,
"values": true
},
"legendType": "Right side",
"links": [],
"maxDataPoints": 3,
"nullPointMode": "connected",
"pieType": "pie",
"strokeWidth": 1,
"targets": [
{
"expr": "traefik_backend_requests_total{backend=\"$backend\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{method}} : {{code}}",
"refId": "A"
}
],
"title": "$backend return code",
"type": "grafana-piechart-panel",
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "prometheus",
"format": "ms",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 1
},
"id": 4,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": true
},
"tableColumn": "",
"targets": [
{
"expr": "sum(traefik_backend_request_duration_seconds_sum{backend=\"$backend\"}) / sum(traefik_backend_requests_total{backend=\"$backend\"}) * 1000",
"format": "time_series",
"intervalFactor": 2,
"refId": "A"
}
],
"thresholds": "",
"title": "$backend response time",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "avg"
},
{
"aliasColors": {},
"bars": true,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"fill": 1,
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 8
},
"id": 3,
"legend": {
"alignAsTable": true,
"avg": true,
"current": false,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": false,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(traefik_backend_requests_total{backend=\"$backend\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Total requests $backend",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Total requests over 5min $backend",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 15
},
"id": 12,
"panels": [],
"title": "Global stats",
"type": "row"
},
{
"aliasColors": {},
"bars": true,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"fill": 1,
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 16
},
"id": 5,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": false,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "rate(traefik_entrypoint_requests_total{entrypoint=~\"$entrypoint\",code=\"200\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{method}} : {{code}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Status code 200 over 5min",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": true,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"fill": 1,
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 16
},
"id": 6,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": false,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "rate(traefik_entrypoint_requests_total{entrypoint=~\"$entrypoint\",code!=\"200\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ method }} : {{code}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Others status code over 5min",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"breakPoint": "50%",
"cacheTimeout": null,
"combine": {
"label": "Others",
"threshold": 0
},
"datasource": "prometheus",
"fontSize": "80%",
"format": "short",
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 23
},
"id": 7,
"interval": null,
"legend": {
"show": true,
"values": true
},
"legendType": "Right side",
"links": [],
"maxDataPoints": 3,
"nullPointMode": "connected",
"pieType": "pie",
"strokeWidth": 1,
"targets": [
{
"expr": "sum(rate(traefik_backend_requests_total[5m])) by (backend) ",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{ backend }}",
"refId": "A"
}
],
"title": "Requests by service",
"type": "grafana-piechart-panel",
"valueName": "total"
},
{
"aliasColors": {},
"breakPoint": "50%",
"cacheTimeout": null,
"combine": {
"label": "Others",
"threshold": 0
},
"datasource": "prometheus",
"fontSize": "80%",
"format": "short",
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 23
},
"id": 8,
"interval": null,
"legend": {
"show": true,
"values": true
},
"legendType": "Right side",
"links": [],
"maxDataPoints": 3,
"nullPointMode": "connected",
"pieType": "pie",
"strokeWidth": 1,
"targets": [
{
"expr": "sum(rate(traefik_entrypoint_requests_total{entrypoint =~ \"$entrypoint\"}[5m])) by (entrypoint) ",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{ entrypoint }}",
"refId": "A"
}
],
"title": "Requests by protocol",
"type": "grafana-piechart-panel",
"valueName": "total"
}
],
"schemaVersion": 16,
"style": "dark",
"tags": [
"traefik",
"prometheus"
],
"templating": {
"list": [
{
"allValue": null,
"current": {},
"datasource": "prometheus",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "backend",
"options": [],
"query": "label_values(backend)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {},
"datasource": "prometheus",
"hide": 0,
"includeAll": true,
"label": null,
"multi": true,
"name": "entrypoint",
"options": [],
"query": "label_values(entrypoint)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "Traefik",
"uid": "qPdAviJmz",
"version": 5
}

View File

@ -1,17 +0,0 @@
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: traefik
namespace: monitoring
spec:
endpoints:
- interval: 30s
path: /metrics
port: metrics
namespaceSelector:
matchNames:
- kube-system
selector:
matchLabels:
app: traefik

View File

@ -1 +0,0 @@
github.com/prometheus/alertmanager/doc/alertmanager-mixin

View File

@ -1,349 +0,0 @@
{
_config+:: {
namespace: 'default',
versions+:: {
grafana: '7.3.4',
},
imageRepos+:: {
grafana: 'docker.io/grafana/grafana',
},
prometheus+:: {
name: 'k8s',
serviceName: 'prometheus-' + $._config.prometheus.name,
},
grafana+:: {
labels: {
'app.kubernetes.io/name': 'grafana',
'app.kubernetes.io/version': $._config.versions.grafana,
'app.kubernetes.io/component': 'grafana',
},
dashboards: {},
rawDashboards: {},
folderDashboards: {},
datasources: [{
name: 'prometheus',
type: 'prometheus',
access: 'proxy',
orgId: 1,
url: 'http://' + $._config.prometheus.serviceName + '.' + $._config.namespace + '.svc:9090',
version: 1,
editable: false,
}],
// Forces pod restarts when dashboards are changed
dashboardsChecksum: false,
config: {},
ldap: null,
plugins: [],
env: [],
port: 3000,
resources: {
requests: { cpu: '100m', memory: '100Mi' },
limits: { cpu: '200m', memory: '200Mi' },
},
containers: [],
},
},
grafanaDashboards: {},
grafana+: {
[if std.length($._config.grafana.config) > 0 then 'config']:
{
apiVersion: 'v1',
kind: 'Secret',
metadata: {
name: 'grafana-config',
namespace: $._config.namespace,
labels: $._config.grafana.labels,
},
type: 'Opaque',
data: {
'grafana.ini': std.base64(std.encodeUTF8(std.manifestIni($._config.grafana.config))),
} +
if $._config.grafana.ldap != null then { 'ldap.toml': std.base64(std.encodeUTF8($._config.grafana.ldap)) } else {},
},
dashboardDefinitions:
[
{
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
apiVersion: 'v1',
kind: 'ConfigMap',
metadata: {
name: dashboardName,
namespace: $._config.namespace,
labels: $._config.grafana.labels,
},
data: { [name]: std.manifestJsonEx($._config.grafana.dashboards[name], ' ') },
}
for name in std.objectFields($._config.grafana.dashboards)
] + [
{
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
apiVersion: 'v1',
kind: 'ConfigMap',
metadata: {
name: dashboardName,
namespace: $._config.namespace,
labels: $._config.grafana.labels,
},
data: { [name]: std.manifestJsonEx($._config.grafana.folderDashboards[folder][name], ' ') },
}
for folder in std.objectFields($._config.grafana.folderDashboards)
for name in std.objectFields($._config.grafana.folderDashboards[folder])
] + (
if std.length($._config.grafana.rawDashboards) > 0 then
[
{
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
apiVersion: 'v1',
kind: 'ConfigMap',
metadata: {
name: dashboardName,
namespace: $._config.namespace,
labels: $._config.grafana.labels,
},
data: { [name]: $._config.grafana.rawDashboards[name] },
}
for name in std.objectFields($._config.grafana.rawDashboards)
]
else
[]
),
dashboardSources:
local dashboardSources = {
apiVersion: 1,
providers:
(
if std.length($._config.grafana.dashboards) +
std.length($._config.grafana.rawDashboards) > 0 then [
{
name: '0',
orgId: 1,
folder: 'Default',
type: 'file',
options: {
path: '/grafana-dashboard-definitions/0',
},
},
] else []
) +
[
{
name: folder,
orgId: 1,
folder: folder,
type: 'file',
options: {
path: '/grafana-dashboard-definitions/' + folder,
},
}
for folder in std.objectFields($._config.grafana.folderDashboards)
],
};
{
kind: 'ConfigMap',
apiVersion: 'v1',
metadata: {
name: 'grafana-dashboards',
namespace: $._config.namespace,
labels: $._config.grafana.labels,
},
data: { 'dashboards.yaml': std.manifestJsonEx(dashboardSources, ' ') },
},
dashboardDatasources:
{
apiVersion: 'v1',
kind: 'Secret',
metadata: {
name: 'grafana-datasources',
namespace: $._config.namespace,
labels: $._config.grafana.labels,
},
type: 'Opaque',
data: { 'datasources.yaml': std.base64(std.encodeUTF8(std.manifestJsonEx({
apiVersion: 1,
datasources: $._config.grafana.datasources,
}, ' '))) },
},
service:
{
apiVersion: 'v1',
kind: 'Service',
metadata: {
name: 'grafana',
namespace: $._config.namespace,
labels: $._config.grafana.labels,
},
spec: {
selector: $.grafana.deployment.spec.selector.matchLabels,
ports: [
{ name: 'http', targetPort: 'http', port: 3000 },
],
},
},
serviceAccount:
{
apiVersion: 'v1',
kind: 'ServiceAccount',
metadata: {
name: 'grafana',
namespace: $._config.namespace,
},
},
deployment:
local targetPort = $._config.grafana.port;
local portName = 'http';
local podLabels = $._config.grafana.labels;
local podSelectorLabels = {
[labelName]: podLabels[labelName]
for labelName in std.objectFields(podLabels)
if !std.setMember(labelName, ['app.kubernetes.io/version'])
};
local configVolumeName = 'grafana-config';
local configSecretName = 'grafana-config';
local configVolume = { name: configVolumeName, secret: { secretName: configSecretName } };
local configVolumeMount = { name: configVolumeName, mountPath: '/etc/grafana', readOnly: false };
local storageVolumeName = 'grafana-storage';
local storageVolume = { name: storageVolumeName, emptyDir: {} };
local storageVolumeMount = { name: storageVolumeName, mountPath: '/var/lib/grafana', readOnly: false };
local datasourcesVolumeName = 'grafana-datasources';
local datasourcesSecretName = 'grafana-datasources';
local datasourcesVolume = { name: datasourcesVolumeName, secret: { secretName: datasourcesSecretName } };
local datasourcesVolumeMount = { name: datasourcesVolumeName, mountPath: '/etc/grafana/provisioning/datasources', readOnly: false };
local dashboardsVolumeName = 'grafana-dashboards';
local dashboardsConfigMapName = 'grafana-dashboards';
local dashboardsVolume = { name: dashboardsVolumeName, configMap: { name: dashboardsConfigMapName } };
local dashboardsVolumeMount = { name: dashboardsVolumeName, mountPath: '/etc/grafana/provisioning/dashboards', readOnly: false };
local volumeMounts =
[
storageVolumeMount,
datasourcesVolumeMount,
dashboardsVolumeMount,
] +
[
{
local dashboardName = std.strReplace(name, '.json', ''),
name: 'grafana-dashboard-' + dashboardName,
mountPath: '/grafana-dashboard-definitions/0/' + dashboardName,
readOnly: false,
}
for name in std.objectFields($._config.grafana.dashboards)
] +
[
{
local dashboardName = std.strReplace(name, '.json', ''),
name: 'grafana-dashboard-' + dashboardName,
mountPath: '/grafana-dashboard-definitions/' + folder + '/' + dashboardName,
readOnly: false,
}
for folder in std.objectFields($._config.grafana.folderDashboards)
for name in std.objectFields($._config.grafana.folderDashboards[folder])
] +
[
{
local dashboardName = std.strReplace(name, '.json', ''),
name: 'grafana-dashboard-' + dashboardName,
mountPath: '/grafana-dashboard-definitions/0/' + dashboardName,
readOnly: false,
}
for name in std.objectFields($._config.grafana.rawDashboards)
] + (
if std.length($._config.grafana.config) > 0 then [configVolumeMount] else []
);
local volumes =
[
storageVolume,
datasourcesVolume,
dashboardsVolume,
] +
[
{
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
name: dashboardName,
configMap: { name: dashboardName },
}
for name in std.objectFields($._config.grafana.dashboards)
] +
[
{
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
name: dashboardName,
configMap: { name: dashboardName },
}
for folder in std.objectFields($._config.grafana.folderDashboards)
for name in std.objectFields($._config.grafana.folderDashboards[folder])
] +
[
{
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', ''),
name: dashboardName,
configMap: { name: dashboardName },
}
for name in std.objectFields($._config.grafana.rawDashboards)
] +
if std.length($._config.grafana.config) > 0 then [configVolume] else [];
local plugins = (
if std.length($._config.grafana.plugins) == 0 then
[]
else
[{ name: 'GF_INSTALL_PLUGINS', value: std.join(',', $._config.grafana.plugins) }]
);
local c = [{
name: 'grafana',
image: $._config.imageRepos.grafana + ':' + $._config.versions.grafana,
env: $._config.grafana.env + plugins,
volumeMounts: volumeMounts,
ports: [{ name: portName, containerPort: targetPort }],
readinessProbe: {
httpGet: { path: '/api/health', port: portName },
},
resources: $._config.grafana.resources,
}] + $._config.grafana.containers;
{
apiVersion: 'apps/v1',
kind: 'Deployment',
metadata: {
name: 'grafana',
namespace: $._config.namespace,
labels: podLabels,
},
spec: {
replicas: 1,
selector: {
matchLabels: podSelectorLabels,
},
template: {
metadata: {
labels: podLabels,
annotations: {
[if std.length($._config.grafana.config) > 0 then 'checksum/grafana-config']: std.md5(std.toString($.grafana.config)),
'checksum/grafana-datasources': std.md5(std.toString($.grafana.dashboardDatasources)),
[if $._config.grafana.dashboardsChecksum then 'checksum/grafana-dashboards']: std.md5(std.toString($.grafana.dashboardDefinitions)),
},
},
spec: {
containers: c,
volumes: volumes,
serviceAccountName: $.grafana.serviceAccount.metadata.name,
nodeSelector: { 'beta.kubernetes.io/os': 'linux' },
securityContext: { fsGroup: 65534, runAsNonRoot: true, runAsUser: 65534 },
},
},
},
},
},
}

View File

@ -1,15 +0,0 @@
{
"version": 1,
"dependencies": [
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib.git",
"subdir": "grafonnet"
}
},
"version": "master"
}
],
"legacyImports": false
}

Some files were not shown because too many files have changed in this diff Show More