update jsonnet deps

This commit is contained in:
Tobias Brunner 2022-12-31 16:01:15 +01:00
parent 14f2e1afac
commit 3f53cd52f5
Signed by: tobru
SSH key fingerprint: SHA256:kywVhvCA+MIxL6eBgoQa+BfC/ROJqcfD2bpy1PR6Ebk
32 changed files with 693 additions and 196 deletions

View file

@ -0,0 +1,15 @@
{
"version": 1,
"dependencies": [
{
"source": {
"git": {
"remote": "https://github.com/prometheus-operator/kube-prometheus.git",
"subdir": "jsonnet/kube-prometheus"
}
},
"version": "main"
}
],
"legacyImports": true
}

View file

@ -0,0 +1,180 @@
{
"version": 1,
"dependencies": [
{
"source": {
"git": {
"remote": "https://github.com/brancz/kubernetes-grafana.git",
"subdir": "grafana"
}
},
"version": "d039275e4916aceae1c137120882e01d857787ac",
"sum": "515vMn4x4tP8vegL4HLW0nDO5+njGTgnDZB5OOhtsCI="
},
{
"source": {
"git": {
"remote": "https://github.com/etcd-io/etcd.git",
"subdir": "contrib/mixin"
}
},
"version": "4ae4d9fe6c98b617338ce7519253b2d145290349",
"sum": "IkDHlaE0gvvcPjSNurFT+jQ2aCOAbqHF1WVmXbAgkds="
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafana.git",
"subdir": "grafana-mixin"
}
},
"version": "1120f9e255760a3c104b57871fcb91801e934382",
"sum": "MkjR7zCgq6MUZgjDzop574tFKoTX2OBr7DTwm1K+Ofs="
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib.git",
"subdir": "grafonnet"
}
},
"version": "30280196507e0fe6fa978a3e0eaca3a62844f817",
"sum": "342u++/7rViR/zj2jeJOjshzglkZ1SY+hFNuyCBFMdc="
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib.git",
"subdir": "grafonnet-7.0"
}
},
"version": "30280196507e0fe6fa978a3e0eaca3a62844f817",
"sum": "gCtR9s/4D5fxU9aKXg0Bru+/njZhA0YjLjPiASc61FM="
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/jsonnet-libs.git",
"subdir": "grafana-builder"
}
},
"version": "d68f9a6e0b1af7c4c4056dc2b43fb8f3bac01f43",
"sum": "tDR6yT2GVfw0wTU12iZH+m01HrbIr6g/xN+/8nzNkU0="
},
{
"source": {
"git": {
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin.git",
"subdir": ""
}
},
"version": "3c386687c1f8ceb6b79ff887c4a934e9cee1b90a",
"sum": "H8lcnk7gQEUoRi58/xq+JTfd2PcjJUjMQHgxGklUiFY="
},
{
"source": {
"git": {
"remote": "https://github.com/kubernetes/kube-state-metrics.git",
"subdir": "jsonnet/kube-state-metrics"
}
},
"version": "f1288f943a49344b00ed3d02cc07799da7226414",
"sum": "4PJ2ROxODsoYO/1Y70+dgLZVjW5zlfzB+TDpxJBHwaI="
},
{
"source": {
"git": {
"remote": "https://github.com/kubernetes/kube-state-metrics.git",
"subdir": "jsonnet/kube-state-metrics-mixin"
}
},
"version": "f1288f943a49344b00ed3d02cc07799da7226414",
"sum": "u8gaydJoxEjzizQ8jY8xSjYgWooPmxw+wIWdDxifMAk="
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus-operator/kube-prometheus.git",
"subdir": "jsonnet/kube-prometheus"
}
},
"version": "37d00082289c587f5a02a343ba23cfbe167000e2",
"sum": "5onAaPSrjnmgXIAsypnx0W/sIA7iTsHCeCjPrhGxj5A="
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus-operator/prometheus-operator.git",
"subdir": "jsonnet/mixin"
}
},
"version": "5485624cf90dd4de046b4f90757950691d5e71bc",
"sum": "GQmaVFJwKMiD/P4n3N2LrAZVcwutriWrP8joclDtBYQ=",
"name": "prometheus-operator-mixin"
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus-operator/prometheus-operator.git",
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "5485624cf90dd4de046b4f90757950691d5e71bc",
"sum": "wJ1E8XxYJ0RJrUuDNWLzE7bzo6JrH7P9q1lAu/xi4Ow="
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus/alertmanager.git",
"subdir": "doc/alertmanager-mixin"
}
},
"version": "87ad8437fc1e28280e8c5c5fdcb41e0a8904a855",
"sum": "PsK+V7oETCPKu2gLoPfqY0wwPKH9TzhNj6o2xezjjXc=",
"name": "alertmanager"
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus/node_exporter.git",
"subdir": "docs/node-mixin"
}
},
"version": "a3bd2e13052929663dbd7d680fab4a952efb1de6",
"sum": "TwdaTm0Z++diiLyaKAAimmC6hBL7XbrJc0RHhBCpAdU="
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus/prometheus.git",
"subdir": "documentation/prometheus-mixin"
}
},
"version": "49f775d8a0d76532fae36c0392d929bbf110577a",
"sum": "LRx0tbMnoE1p8KEn+i81j2YsA5Sgt3itE5Y6jBf5eOQ=",
"name": "prometheus"
},
{
"source": {
"git": {
"remote": "https://github.com/pyrra-dev/pyrra.git",
"subdir": "config/crd/bases"
}
},
"version": "1b11dfbbf5d8fc5e201d0c22df94f0858ac8f43a",
"sum": "d1550yhsX4VxdVN7b0gWT0cido/W90P6OGLzLqPwZcs="
},
{
"source": {
"git": {
"remote": "https://github.com/thanos-io/thanos.git",
"subdir": "mixin"
}
},
"version": "b60c09b1184c37f43990ae5071daedcb72f51879",
"sum": "Io++1+lp1oQVoQiVRSCXUiGdTIRPV7aL6Ewgs3bShEs=",
"name": "thanos-mixin"
}
],
"legacyImports": false
}

View file

@ -24,70 +24,127 @@
rules: [
{
alert: 'KubeCPUOvercommit',
labels: {
severity: 'warning',
},
annotations: {
summary: 'Cluster has overcommitted CPU resource requests.',
},
'for': '10m',
} +
if $._config.showMultiCluster then {
expr: |||
sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s)) > 0
and
(sum(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s)) > 0
||| % $._config,
annotations+: {
description: 'Cluster {{ $labels.%(clusterLabel)s }} has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure.' % $._config,
},
} else {
expr: |||
sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0
and
(sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0
||| % $._config,
labels: {
severity: 'warning',
annotations+: {
description: 'Cluster has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure.' % $._config,
},
annotations: {
description: 'Cluster has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure.',
summary: 'Cluster has overcommitted CPU resource requests.',
},
'for': '10m',
},
{
alert: 'KubeMemoryOvercommit',
expr: |||
sum(namespace_memory:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0
and
(sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'Cluster has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure.',
summary: 'Cluster has overcommitted memory resource requests.',
},
'for': '10m',
},
} +
if $._config.showMultiCluster then {
expr: |||
sum(namespace_memory:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s)) > 0
and
(sum(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s)) > 0
||| % $._config,
annotations+: {
description: 'Cluster {{ $labels.%(clusterLabel)s }} has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure.' % $._config,
},
} else
{
expr: |||
sum(namespace_memory:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0
and
(sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0
||| % $._config,
annotations+: {
description: 'Cluster has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure.',
},
},
{
alert: 'KubeCPUQuotaOvercommit',
expr: |||
sum(min without(resource) (kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource=~"(cpu|requests.cpu)"}))
/
sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s})
> %(namespaceOvercommitFactor)s
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'Cluster has overcommitted CPU resource requests for Namespaces.',
summary: 'Cluster has overcommitted CPU resource requests.',
},
'for': '5m',
},
{
alert: 'KubeMemoryQuotaOvercommit',
} +
if $._config.showMultiCluster then {
expr: |||
sum(min without(resource) (kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource=~"(memory|requests.memory)"}))
sum(min without(resource) (kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource=~"(cpu|requests.cpu)"})) by (%(clusterLabel)s)
/
sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s})
sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s)
> %(namespaceOvercommitFactor)s
||| % $._config,
annotations+: {
description: 'Cluster {{ $labels.%(clusterLabel)s }} has overcommitted CPU resource requests for Namespaces.' % $._config,
},
} else
{
expr: |||
sum(min without(resource) (kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource=~"(cpu|requests.cpu)"}))
/
sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s})
> %(namespaceOvercommitFactor)s
||| % $._config,
annotations+: {
description: 'Cluster has overcommitted CPU resource requests for Namespaces.',
},
},
{
alert: 'KubeMemoryQuotaOvercommit',
labels: {
severity: 'warning',
},
annotations: {
description: 'Cluster has overcommitted memory resource requests for Namespaces.',
summary: 'Cluster has overcommitted memory resource requests.',
},
'for': '5m',
},
} +
if $._config.showMultiCluster then {
expr: |||
sum(min without(resource) (kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource=~"(memory|requests.memory)"})) by (%(clusterLabel)s)
/
sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s)
> %(namespaceOvercommitFactor)s
||| % $._config,
annotations+: {
description: 'Cluster {{ $labels.%(clusterLabel)s }} has overcommitted memory resource requests for Namespaces.' % $._config,
},
} else
{
expr: |||
sum(min without(resource) (kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource=~"(memory|requests.memory)"}))
/
sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s})
> %(namespaceOvercommitFactor)s
||| % $._config,
annotations+: {
description: 'Cluster has overcommitted memory resource requests for Namespaces.',
},
},
{
alert: 'KubeQuotaAlmostFull',
expr: |||

View file

@ -98,7 +98,7 @@
// This list of disk device names is referenced in various expressions.
diskDevices: ['mmcblk.p.+', 'nvme.+', 'rbd.+', 'sd.+', 'vd.+', 'xvd.+', 'dm-.+', 'dasd.+'],
diskDeviceSelector: 'device=~"%s"' % std.join('|', self.diskDevices),
diskDeviceSelector: 'device=~"(/dev.+)|%s"' % std.join('|', self.diskDevices),
// Certain workloads (e.g. KubeVirt/CDI) will fully utilise the persistent volume they claim
// the size of the PV will never grow since they consume the entirety of the volume by design.

View file

@ -30,11 +30,11 @@
// This rule gives the number of CPUs per node.
record: 'node:node_num_cpu:sum',
expr: |||
count by (%(clusterLabel)s, node) (sum by (node, cpu) (
node_cpu_seconds_total{%(nodeExporterSelector)s}
* on (namespace, %(podLabel)s) group_left(node)
count by (%(clusterLabel)s, node) (
node_cpu_seconds_total{mode="idle",%(nodeExporterSelector)s}
* on (namespace, %(podLabel)s) group_left(node)
topk by(namespace, %(podLabel)s) (1, node_namespace_pod:kube_pod_info:)
))
)
||| % $._config,
},
// Add separate rules for Available memory, so we can aggregate across clusters in dashboards.
@ -52,12 +52,24 @@
) by (%(clusterLabel)s)
||| % $._config,
},
{
// This rule gives cpu utilization per node.
record: 'node:node_cpu_utilization:ratio_rate5m',
expr: |||
avg by (%(clusterLabel)s, node) (
sum without (mode) (
rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal",%(nodeExporterSelector)s}[5m])
)
)
||| % $._config,
},
{
// This rule gives cpu utilization per cluster
record: 'cluster:node_cpu:ratio_rate5m',
expr: |||
sum(rate(node_cpu_seconds_total{%(nodeExporterSelector)s,mode!="idle",mode!="iowait",mode!="steal"}[5m])) /
count(sum(node_cpu_seconds_total{%(nodeExporterSelector)s}) by (%(clusterLabel)s, instance, cpu))
avg by (%(clusterLabel)s) (
node:node_cpu_utilization:ratio_rate5m
)
||| % $._config,
},
],

View file

@ -114,6 +114,13 @@
],
verbs: ['list', 'watch'],
},
{
apiGroups: ['discovery.k8s.io'],
resources: [
'endpointslices',
],
verbs: ['list', 'watch'],
},
{
apiGroups: ['storage.k8s.io'],
resources: [
@ -134,6 +141,7 @@
apiGroups: ['networking.k8s.io'],
resources: [
'networkpolicies',
'ingressclasses',
'ingresses',
],
verbs: ['list', 'watch'],

View file

@ -242,10 +242,10 @@
indicator: {
ratio: {
errors: {
metric: 'apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}',
metric: 'apiserver_request_total{component="apiserver",verb=~"LIST|GET",code=~"5.."}',
},
total: {
metric: 'apiserver_request_total{job="apiserver",verb=~"LIST|GET"}',
metric: 'apiserver_request_total{component="apiserver",verb=~"LIST|GET"}',
},
},
},
@ -270,10 +270,10 @@
indicator: {
ratio: {
errors: {
metric: 'apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}',
metric: 'apiserver_request_total{component="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}',
},
total: {
metric: 'apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}',
metric: 'apiserver_request_total{component="apiserver",verb=~"POST|PUT|PATCH|DELETE"}',
},
},
},
@ -298,10 +298,10 @@
indicator: {
latency: {
success: {
metric: 'apiserver_request_duration_seconds_bucket{job="apiserver",scope=~"resource|",verb=~"LIST|GET",le="0.1"}',
metric: 'apiserver_request_duration_seconds_bucket{component="apiserver",scope=~"resource|",verb=~"LIST|GET",le="0.1"}',
},
total: {
metric: 'apiserver_request_duration_seconds_count{job="apiserver",scope=~"resource|",verb=~"LIST|GET"}',
metric: 'apiserver_request_duration_seconds_count{component="apiserver",scope=~"resource|",verb=~"LIST|GET"}',
},
},
},
@ -326,10 +326,10 @@
indicator: {
latency: {
success: {
metric: 'apiserver_request_duration_seconds_bucket{job="apiserver",scope=~"namespace|",verb=~"LIST|GET",le="5"}',
metric: 'apiserver_request_duration_seconds_bucket{component="apiserver",scope=~"namespace|",verb=~"LIST|GET",le="5"}',
},
total: {
metric: 'apiserver_request_duration_seconds_count{job="apiserver",scope=~"namespace|",verb=~"LIST|GET"}',
metric: 'apiserver_request_duration_seconds_count{component="apiserver",scope=~"namespace|",verb=~"LIST|GET"}',
},
},
},
@ -354,10 +354,10 @@
indicator: {
latency: {
success: {
metric: 'apiserver_request_duration_seconds_bucket{job="apiserver",scope=~"cluster|",verb=~"LIST|GET",le="5"}',
metric: 'apiserver_request_duration_seconds_bucket{component="apiserver",scope=~"cluster|",verb=~"LIST|GET",le="5"}',
},
total: {
metric: 'apiserver_request_duration_seconds_count{job="apiserver",scope=~"cluster|",verb=~"LIST|GET"}',
metric: 'apiserver_request_duration_seconds_count{component="apiserver",scope=~"cluster|",verb=~"LIST|GET"}',
},
},
},

View file

@ -20,7 +20,7 @@ local defaults = {
kubeApiserverSelector: 'job="apiserver"',
podLabel: 'pod',
runbookURLPattern: 'https://runbooks.prometheus-operator.dev/runbooks/kubernetes/%s',
diskDeviceSelector: 'device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)"',
diskDeviceSelector: 'device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"',
hostNetworkInterfaceSelector: 'device!~"veth.+"',
},
},

View file

@ -15,6 +15,10 @@ local defaults = {
},
listenAddress:: '127.0.0.1',
filesystemMountPointsExclude:: '^/(dev|proc|sys|run/k3s/containerd/.+|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)',
// NOTE: ignore veth network interface associated with containers.
// OVN renames veth.* to <rand-hex>@if<X> where X is /sys/class/net/<if>/ifindex
// thus [a-z0-9] regex below
ignoredNetworkDevices:: '^(veth.*|[a-f0-9]{15})$',
port:: 9100,
commonLabels:: {
'app.kubernetes.io/name': defaults.name,
@ -41,7 +45,7 @@ local defaults = {
fsSpaceFillingUpWarningThreshold: 15,
// Send critical alert only after (imageGCHighThresholdPercent + 5) is hit, but filesystem is not freed up for a prolonged duration.
fsSpaceFillingUpCriticalThreshold: 10,
diskDeviceSelector: 'device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)"',
diskDeviceSelector: 'device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"',
runbookURLPattern: 'https://runbooks.prometheus-operator.dev/runbooks/node/%s',
},
},
@ -197,14 +201,12 @@ function(params) {
'--web.listen-address=' + std.join(':', [ne._config.listenAddress, std.toString(ne._config.port)]),
'--path.sysfs=/host/sys',
'--path.rootfs=/host/root',
'--path.udev.data=/host/root/run/udev/data',
'--no-collector.wifi',
'--no-collector.hwmon',
'--collector.filesystem.mount-points-exclude=' + ne._config.filesystemMountPointsExclude,
// NOTE: ignore veth network interface associated with containers.
// OVN renames veth.* to <rand-hex>@if<X> where X is /sys/class/net/<if>/ifindex
// thus [a-z0-9] regex below
'--collector.netclass.ignored-devices=^(veth.*|[a-f0-9]{15})$',
'--collector.netdev.device-exclude=^(veth.*|[a-f0-9]{15})$',
'--collector.netclass.ignored-devices=' + ne._config.ignoredNetworkDevices,
'--collector.netdev.device-exclude=' + ne._config.ignoredNetworkDevices,
],
volumeMounts: [
{ name: 'sys', mountPath: '/host/sys', mountPropagation: 'HostToContainer', readOnly: true },

View file

@ -64,7 +64,7 @@
"subdir": "jsonnet/kube-state-metrics"
}
},
"version": "master"
"version": "main"
},
{
"source": {
@ -73,7 +73,7 @@
"subdir": "jsonnet/kube-state-metrics-mixin"
}
},
"version": "master"
"version": "main"
},
{
"source": {

View file

@ -40,7 +40,7 @@ local utils = import './lib/utils.libsonnet';
alertmanager: 'quay.io/prometheus/alertmanager:v' + $.values.common.versions.alertmanager,
blackboxExporter: 'quay.io/prometheus/blackbox-exporter:v' + $.values.common.versions.blackboxExporter,
grafana: 'grafana/grafana:' + $.values.common.versions.grafana,
kubeStateMetrics: 'k8s.gcr.io/kube-state-metrics/kube-state-metrics:v' + $.values.common.versions.kubeStateMetrics,
kubeStateMetrics: 'registry.k8s.io/kube-state-metrics/kube-state-metrics:v' + $.values.common.versions.kubeStateMetrics,
nodeExporter: 'quay.io/prometheus/node-exporter:v' + $.values.common.versions.nodeExporter,
prometheus: 'quay.io/prometheus/prometheus:v' + $.values.common.versions.prometheus,
prometheusAdapter: 'registry.k8s.io/prometheus-adapter/prometheus-adapter:v' + $.values.common.versions.prometheusAdapter,

View file

@ -1,13 +1,13 @@
{
"alertmanager": "0.24.0",
"blackboxExporter": "0.22.0",
"grafana": "9.1.6",
"kubeStateMetrics": "2.6.0",
"nodeExporter": "1.4.0",
"prometheus": "2.38.0",
"alertmanager": "0.25.0",
"blackboxExporter": "0.23.0",
"grafana": "9.3.2",
"kubeStateMetrics": "2.7.0",
"nodeExporter": "1.5.0",
"prometheus": "2.41.0",
"prometheusAdapter": "0.10.0",
"prometheusOperator": "0.59.2",
"kubeRbacProxy": "0.13.0",
"prometheusOperator": "0.61.1",
"kubeRbacProxy": "0.14.0",
"configmapReload": "0.5.0",
"pyrra": "0.4.4"
"pyrra": "0.5.2"
}

View file

@ -4,7 +4,13 @@ local defaults = {
namespace: error 'must provide namespace',
version: error 'must provide version',
image: error 'must provide admission webhook image',
port: 8443,
// The name of the Secret containing the TLS certificate and key of the admission webhook service.
tlsSecretName: error 'must provide tlsSecretName',
// The Secret's key containing the TLS certificate.
tlsCertRef: 'tls.crt',
// The Secret's key containing the TLS private key.
tlsPrivateKeyRef: 'tls.key',
port: 443,
replicas: 2,
resources: {
limits: { cpu: '200m', memory: '200Mi' },
@ -54,15 +60,28 @@ function(params) {
name: aw._config.name,
image: aw._config.image,
ports: [{
containerPort: aw._config.port,
containerPort: 8443,
name: 'https',
}],
args: [
'--web.enable-tls=true',
'--web.cert-file=/etc/tls/private/tls.crt',
'--web.key-file=/etc/tls/private/tls.key',
],
resources: aw._config.resources,
terminationMessagePolicy: 'FallbackToLogsOnError',
securityContext: {
allowPrivilegeEscalation: false,
readOnlyRootFilesystem: true,
capabilities: { drop: ['ALL'] },
},
volumeMounts: [
{
mountPath: '/etc/tls/private',
name: 'tls-certificates',
readOnly: true,
},
],
};
{
apiVersion: 'apps/v1',
@ -86,6 +105,42 @@ function(params) {
},
serviceAccountName: aw._config.name,
automountServiceAccountToken: false,
volumes: [{
name: 'tls-certificates',
secret: {
secretName: aw._config.tlsSecretName,
items: [{
key: aw._config.tlsCertRef,
path: 'tls.crt',
}, {
key: aw._config.tlsPrivateKeyRef,
path: 'tls.key',
}],
},
}],
},
},
} + if aw._config.replicas > 1 then {
// configure hard anti-affinity + rolling update for proper HA.
template+: {
spec+: {
affinity: {
podAntiAffinity: {
requiredDuringSchedulingIgnoredDuringExecution: [{
namespaces: [aw._config.namespace],
topologyKey: 'kubernetes.io/hostname',
labelSelector: {
matchLabels: aw._config.selectorLabels,
},
}],
},
},
},
},
strategy: {
rollingUpdate: {
maxUnavailable: 1,
},
},
},

View file

@ -341,7 +341,7 @@
"description": "TLS configuration",
"properties": {
"ca": {
"description": "Struct containing the CA cert to use for the targets.",
"description": "Certificate authority used when verifying server certificates.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -391,7 +391,7 @@
"type": "object"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"description": "Client certificate to present when doing client-authentication.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -772,7 +772,7 @@
"description": "TLS configuration for the client.",
"properties": {
"ca": {
"description": "Struct containing the CA cert to use for the targets.",
"description": "Certificate authority used when verifying server certificates.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -822,7 +822,7 @@
"type": "object"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"description": "Client certificate to present when doing client-authentication.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -1249,7 +1249,7 @@
"description": "TLS configuration for the client.",
"properties": {
"ca": {
"description": "Struct containing the CA cert to use for the targets.",
"description": "Certificate authority used when verifying server certificates.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -1299,7 +1299,7 @@
"type": "object"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"description": "Client certificate to present when doing client-authentication.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -1717,7 +1717,7 @@
"description": "TLS configuration for the client.",
"properties": {
"ca": {
"description": "Struct containing the CA cert to use for the targets.",
"description": "Certificate authority used when verifying server certificates.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -1767,7 +1767,7 @@
"type": "object"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"description": "Client certificate to present when doing client-authentication.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -2275,7 +2275,7 @@
"description": "TLS configuration for the client.",
"properties": {
"ca": {
"description": "Struct containing the CA cert to use for the targets.",
"description": "Certificate authority used when verifying server certificates.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -2325,7 +2325,7 @@
"type": "object"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"description": "Client certificate to present when doing client-authentication.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -2694,7 +2694,7 @@
"description": "TLS configuration for the client.",
"properties": {
"ca": {
"description": "Struct containing the CA cert to use for the targets.",
"description": "Certificate authority used when verifying server certificates.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -2744,7 +2744,7 @@
"type": "object"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"description": "Client certificate to present when doing client-authentication.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -3179,7 +3179,7 @@
"description": "TLS configuration for the client.",
"properties": {
"ca": {
"description": "Struct containing the CA cert to use for the targets.",
"description": "Certificate authority used when verifying server certificates.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -3229,7 +3229,7 @@
"type": "object"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"description": "Client certificate to present when doing client-authentication.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -3613,7 +3613,7 @@
"description": "TLS configuration for the client.",
"properties": {
"ca": {
"description": "Struct containing the CA cert to use for the targets.",
"description": "Certificate authority used when verifying server certificates.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -3663,7 +3663,7 @@
"type": "object"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"description": "Client certificate to present when doing client-authentication.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -3998,7 +3998,7 @@
"description": "TLS configuration for the client.",
"properties": {
"ca": {
"description": "Struct containing the CA cert to use for the targets.",
"description": "Certificate authority used when verifying server certificates.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -4048,7 +4048,7 @@
"type": "object"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"description": "Client certificate to present when doing client-authentication.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -4430,7 +4430,7 @@
"description": "TLS configuration for the client.",
"properties": {
"ca": {
"description": "Struct containing the CA cert to use for the targets.",
"description": "Certificate authority used when verifying server certificates.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -4480,7 +4480,7 @@
"type": "object"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"description": "Client certificate to present when doing client-authentication.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -4601,6 +4601,13 @@
"route": {
"description": "The Alertmanager route definition for alerts matching the resource's namespace. If present, it will be added to the generated Alertmanager configuration as a first-level route.",
"properties": {
"activeTimeIntervals": {
"description": "ActiveTimeIntervals is a list of MuteTimeInterval names when this route should be active.",
"items": {
"type": "string"
},
"type": "array"
},
"continue": {
"description": "Boolean indicating whether an alert should continue matching subsequent sibling nodes. It will always be overridden to true for the first-level route by the Prometheus operator.",
"type": "boolean"

View file

@ -214,7 +214,7 @@
description: 'TLS configuration',
properties: {
ca: {
description: 'Struct containing the CA cert to use for the targets.',
description: 'Certificate authority used when verifying server certificates.',
properties: {
configMap: {
description: 'ConfigMap containing data to use for the targets.',
@ -264,7 +264,7 @@
type: 'object',
},
cert: {
description: 'Struct containing the client cert file for the targets.',
description: 'Client certificate to present when doing client-authentication.',
properties: {
configMap: {
description: 'ConfigMap containing data to use for the targets.',
@ -643,7 +643,7 @@
description: 'TLS configuration for the client.',
properties: {
ca: {
description: 'Struct containing the CA cert to use for the targets.',
description: 'Certificate authority used when verifying server certificates.',
properties: {
configMap: {
description: 'ConfigMap containing data to use for the targets.',
@ -693,7 +693,7 @@
type: 'object',
},
cert: {
description: 'Struct containing the client cert file for the targets.',
description: 'Client certificate to present when doing client-authentication.',
properties: {
configMap: {
description: 'ConfigMap containing data to use for the targets.',
@ -1115,7 +1115,7 @@
description: 'TLS configuration for the client.',
properties: {
ca: {
description: 'Struct containing the CA cert to use for the targets.',
description: 'Certificate authority used when verifying server certificates.',
properties: {
configMap: {
description: 'ConfigMap containing data to use for the targets.',
@ -1165,7 +1165,7 @@
type: 'object',
},
cert: {
description: 'Struct containing the client cert file for the targets.',
description: 'Client certificate to present when doing client-authentication.',
properties: {
configMap: {
description: 'ConfigMap containing data to use for the targets.',
@ -1580,7 +1580,7 @@
description: 'TLS configuration for the client.',
properties: {
ca: {
description: 'Struct containing the CA cert to use for the targets.',
description: 'Certificate authority used when verifying server certificates.',
properties: {
configMap: {
description: 'ConfigMap containing data to use for the targets.',
@ -1630,7 +1630,7 @@
type: 'object',
},
cert: {
description: 'Struct containing the client cert file for the targets.',
description: 'Client certificate to present when doing client-authentication.',
properties: {
configMap: {
description: 'ConfigMap containing data to use for the targets.',
@ -2134,7 +2134,7 @@
description: 'TLS configuration for the client.',
properties: {
ca: {
description: 'Struct containing the CA cert to use for the targets.',
description: 'Certificate authority used when verifying server certificates.',
properties: {
configMap: {
description: 'ConfigMap containing data to use for the targets.',
@ -2184,7 +2184,7 @@
type: 'object',
},
cert: {
description: 'Struct containing the client cert file for the targets.',
description: 'Client certificate to present when doing client-authentication.',
properties: {
configMap: {
description: 'ConfigMap containing data to use for the targets.',
@ -2552,7 +2552,7 @@
description: 'TLS configuration for the client.',
properties: {
ca: {
description: 'Struct containing the CA cert to use for the targets.',
description: 'Certificate authority used when verifying server certificates.',
properties: {
configMap: {
description: 'ConfigMap containing data to use for the targets.',
@ -2602,7 +2602,7 @@
type: 'object',
},
cert: {
description: 'Struct containing the client cert file for the targets.',
description: 'Client certificate to present when doing client-authentication.',
properties: {
configMap: {
description: 'ConfigMap containing data to use for the targets.',
@ -3035,7 +3035,7 @@
description: 'TLS configuration for the client.',
properties: {
ca: {
description: 'Struct containing the CA cert to use for the targets.',
description: 'Certificate authority used when verifying server certificates.',
properties: {
configMap: {
description: 'ConfigMap containing data to use for the targets.',
@ -3085,7 +3085,7 @@
type: 'object',
},
cert: {
description: 'Struct containing the client cert file for the targets.',
description: 'Client certificate to present when doing client-authentication.',
properties: {
configMap: {
description: 'ConfigMap containing data to use for the targets.',
@ -3467,7 +3467,7 @@
description: 'TLS configuration for the client.',
properties: {
ca: {
description: 'Struct containing the CA cert to use for the targets.',
description: 'Certificate authority used when verifying server certificates.',
properties: {
configMap: {
description: 'ConfigMap containing data to use for the targets.',
@ -3517,7 +3517,7 @@
type: 'object',
},
cert: {
description: 'Struct containing the client cert file for the targets.',
description: 'Client certificate to present when doing client-authentication.',
properties: {
configMap: {
description: 'ConfigMap containing data to use for the targets.',
@ -3851,7 +3851,7 @@
description: 'TLS configuration for the client.',
properties: {
ca: {
description: 'Struct containing the CA cert to use for the targets.',
description: 'Certificate authority used when verifying server certificates.',
properties: {
configMap: {
description: 'ConfigMap containing data to use for the targets.',
@ -3901,7 +3901,7 @@
type: 'object',
},
cert: {
description: 'Struct containing the client cert file for the targets.',
description: 'Client certificate to present when doing client-authentication.',
properties: {
configMap: {
description: 'ConfigMap containing data to use for the targets.',
@ -4280,7 +4280,7 @@
description: 'TLS configuration for the client.',
properties: {
ca: {
description: 'Struct containing the CA cert to use for the targets.',
description: 'Certificate authority used when verifying server certificates.',
properties: {
configMap: {
description: 'ConfigMap containing data to use for the targets.',
@ -4330,7 +4330,7 @@
type: 'object',
},
cert: {
description: 'Struct containing the client cert file for the targets.',
description: 'Client certificate to present when doing client-authentication.',
properties: {
configMap: {
description: 'ConfigMap containing data to use for the targets.',
@ -4451,6 +4451,13 @@
route: {
description: "The Alertmanager route definition for alerts matching the resource's namespace. If present, it will be added to the generated Alertmanager configuration as a first-level route.",
properties: {
activeTimeIntervals: {
description: 'ActiveTimeIntervals is a list of TimeInterval names when this route should be active.',
items: {
type: 'string',
},
type: 'array',
},
continue: {
description: 'Boolean indicating whether an alert should continue matching subsequent sibling nodes. It will always be overridden to true for the first-level route by the Prometheus operator.',
type: 'boolean',
@ -4503,7 +4510,7 @@
type: 'array',
},
muteTimeIntervals: {
description: "Note: this comment applies to the field definition above but appears below otherwise it gets included in the generated manifest. CRD schema doesn't support self-referential types for now (see https://github.com/kubernetes/kubernetes/issues/62872). We have to use an alternative type to circumvent the limitation. The downside is that the Kube API can't validate the data beyond the fact that it is a valid JSON representation. MuteTimeIntervals is a list of MuteTimeInterval names that will mute this route when matched,",
description: "Note: this comment applies to the field definition above but appears below otherwise it gets included in the generated manifest. CRD schema doesn't support self-referential types for now (see https://github.com/kubernetes/kubernetes/issues/62872). We have to use an alternative type to circumvent the limitation. The downside is that the Kube API can't validate the data beyond the fact that it is a valid JSON representation. MuteTimeIntervals is a list of TimeInterval names that will mute this route when matched.",
items: {
type: 'string',
},

View file

@ -734,6 +734,21 @@
},
"type": "object"
},
"alertmanagerConfigMatcherStrategy": {
"description": "The AlertmanagerConfigMatcherStrategy defines how AlertmanagerConfig objects match the alerts. In the future more options may be added.",
"properties": {
"type": {
"default": "OnNamespace",
"description": "If set to `OnNamespace`, the operator injects a label matcher matching the namespace of the AlertmanagerConfig object for all its routes and inhibition rules. `None` will not add any additional matchers other than the ones specified in the AlertmanagerConfig. Default is `OnNamespace`.",
"enum": [
"OnNamespace",
"None"
],
"type": "string"
}
},
"type": "object"
},
"alertmanagerConfigNamespaceSelector": {
"description": "Namespaces to be selected for AlertmanagerConfig discovery. If nil, only check own namespace.",
"properties": {
@ -1047,7 +1062,7 @@
"description": "TLS configuration for the client.",
"properties": {
"ca": {
"description": "Struct containing the CA cert to use for the targets.",
"description": "Certificate authority used when verifying server certificates.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -1097,7 +1112,7 @@
"type": "object"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"description": "Client certificate to present when doing client-authentication.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -1283,7 +1298,7 @@
"type": "array"
},
"configSecret": {
"description": "ConfigSecret is the name of a Kubernetes Secret in the same namespace as the Alertmanager object, which contains the configuration for this Alertmanager instance. If empty, it defaults to `alertmanager-<alertmanager-name>`. \n The Alertmanager configuration should be available under the `alertmanager.yaml` key. Additional keys from the original secret are copied to the generated secret. \n If either the secret or the `alertmanager.yaml` key is missing, the operator provisions an Alertmanager configuration with one empty receiver (effectively dropping alert notifications).",
"description": "ConfigSecret is the name of a Kubernetes Secret in the same namespace as the Alertmanager object, which contains the configuration for this Alertmanager instance. If empty, it defaults to `alertmanager-<alertmanager-name>`. \n The Alertmanager configuration should be available under the `alertmanager.yaml` key. Additional keys from the original secret are copied to the generated secret and mounted into the `/etc/alertmanager/config` directory in the `alertmanager` container. \n If either the secret or the `alertmanager.yaml` key is missing, the operator provisions a minimal Alertmanager configuration with one empty receiver (effectively dropping alert notifications).",
"type": "string"
},
"containers": {
@ -2457,6 +2472,16 @@
"description": "Image if specified has precedence over baseImage, tag and sha combinations. Specifying the version is still necessary to ensure the Prometheus Operator knows what version of Alertmanager is being configured.",
"type": "string"
},
"imagePullPolicy": {
"description": "Image pull policy for the 'alertmanager', 'init-config-reloader' and 'config-reloader' containers. See https://kubernetes.io/docs/concepts/containers/images/#image-pull-policy for more details.",
"enum": [
"",
"Always",
"Never",
"IfNotPresent"
],
"type": "string"
},
"imagePullSecrets": {
"description": "An optional list of references to secrets in the same namespace to use for pulling prometheus and alertmanager images from registries see http://kubernetes.io/docs/user-guide/images#specifying-imagepullsecrets-on-a-pod",
"items": {
@ -3627,7 +3652,7 @@
"type": "string"
},
"minReadySeconds": {
"description": "Minimum number of seconds for which a newly created pod should be ready without any of its container crashing for it to be considered available. Defaults to 0 (pod will be considered available as soon as it is ready) This is an alpha field and requires enabling StatefulSetMinReadySeconds feature gate.",
"description": "Minimum number of seconds for which a newly created pod should be ready without any of its container crashing for it to be considered available. Defaults to 0 (pod will be considered available as soon as it is ready) This is an alpha field from kubernetes 1.22 until 1.24 which requires enabling the StatefulSetMinReadySeconds feature gate.",
"format": "int32",
"type": "integer"
},
@ -3870,7 +3895,7 @@
"type": "boolean"
},
"emptyDir": {
"description": "EmptyDirVolumeSource to be used by the Prometheus StatefulSets. If specified, used in place of any volumeClaimTemplate. More info: https://kubernetes.io/docs/concepts/storage/volumes/#emptydir",
"description": "EmptyDirVolumeSource to be used by the StatefulSet. If specified, used in place of any volumeClaimTemplate. More info: https://kubernetes.io/docs/concepts/storage/volumes/#emptydir",
"properties": {
"medium": {
"description": "medium represents what type of storage medium should back this directory. The default is \"\" which means to use the node's default medium. Must be an empty string (default) or Memory. More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir",
@ -3893,7 +3918,7 @@
"type": "object"
},
"ephemeral": {
"description": "EphemeralVolumeSource to be used by the Prometheus StatefulSets. This is a beta field in k8s 1.21, for lower versions, starting with k8s 1.19, it requires enabling the GenericEphemeralVolume feature gate. More info: https://kubernetes.io/docs/concepts/storage/ephemeral-volumes/#generic-ephemeral-volumes",
"description": "EphemeralVolumeSource to be used by the StatefulSet. This is a beta field in k8s 1.21, for lower versions, starting with k8s 1.19, it requires enabling the GenericEphemeralVolume feature gate. More info: https://kubernetes.io/docs/concepts/storage/ephemeral-volumes/#generic-ephemeral-volumes",
"properties": {
"volumeClaimTemplate": {
"description": "Will be used to create a stand-alone PVC to provision the volume. The pod in which this EphemeralVolumeSource is embedded will be the owner of the PVC, i.e. the PVC will be deleted together with the pod. The name of the PVC will be `<pod name>-<volume name>` where `<volume name>` is the name from the `PodSpec.Volumes` array entry. Pod validation will reject the pod if the concatenated name is not valid for a PVC (for example, too long). \n An existing PVC with that name that is not owned by the pod will *not* be used for the pod to avoid using an unrelated volume by mistake. Starting the pod is then blocked until the unrelated PVC is removed. If such a pre-created PVC is meant to be used by the pod, the PVC has to updated with an owner reference to the pod once the pod exists. Normally this should not be necessary, but it may be useful when manually reconstructing a broken cluster. \n This field is read-only and no changes will be made by Kubernetes to the PVC after it has been created. \n Required, must not be nil.",
@ -4064,7 +4089,7 @@
"type": "object"
},
"volumeClaimTemplate": {
"description": "A PVC spec to be used by the Prometheus StatefulSets.",
"description": "A PVC spec to be used by the StatefulSet. The easiest way to use a volume that cannot be automatically provisioned (for whatever reason) is to use a label selector alongside manually created PersistentVolumes.",
"properties": {
"apiVersion": {
"description": "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources",

View file

@ -45,7 +45,7 @@
"description": "Specification of desired Pod selection for target discovery by Prometheus.",
"properties": {
"attachMetadata": {
"description": "Attaches node metadata to discovered targets. Only valid for role: pod. Only valid in Prometheus versions 2.35.0 and newer.",
"description": "Attaches node metadata to discovered targets. Requires Prometheus v2.35.0 and above.",
"properties": {
"node": {
"description": "When set to true, Prometheus must have permissions to get Nodes.",
@ -503,7 +503,7 @@
"description": "TLS configuration to use when scraping the endpoint.",
"properties": {
"ca": {
"description": "Struct containing the CA cert to use for the targets.",
"description": "Certificate authority used when verifying server certificates.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -553,7 +553,7 @@
"type": "object"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"description": "Client certificate to present when doing client-authentication.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",

View file

@ -610,7 +610,7 @@
"description": "TLS configuration to use when scraping the endpoint.",
"properties": {
"ca": {
"description": "Struct containing the CA cert to use for the targets.",
"description": "Certificate authority used when verifying server certificates.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -660,7 +660,7 @@
"type": "object"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"description": "Client certificate to present when doing client-authentication.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",

View file

@ -875,10 +875,64 @@
},
"type": "object"
},
"basicAuth": {
"description": "BasicAuth allow an endpoint to authenticate over basic authentication",
"properties": {
"password": {
"description": "The secret in the service monitor namespace that contains the password for authentication.",
"properties": {
"key": {
"description": "The key of the secret to select from. Must be a valid secret key.",
"type": "string"
},
"name": {
"description": "Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?",
"type": "string"
},
"optional": {
"description": "Specify whether the Secret or its key must be defined",
"type": "boolean"
}
},
"required": [
"key"
],
"type": "object",
"x-kubernetes-map-type": "atomic"
},
"username": {
"description": "The secret in the service monitor namespace that contains the username for authentication.",
"properties": {
"key": {
"description": "The key of the secret to select from. Must be a valid secret key.",
"type": "string"
},
"name": {
"description": "Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?",
"type": "string"
},
"optional": {
"description": "Specify whether the Secret or its key must be defined",
"type": "boolean"
}
},
"required": [
"key"
],
"type": "object",
"x-kubernetes-map-type": "atomic"
}
},
"type": "object"
},
"bearerTokenFile": {
"description": "BearerTokenFile to read from filesystem to use when authenticating to Alertmanager.",
"type": "string"
},
"enableHttp2": {
"description": "Whether to enable HTTP2.",
"type": "boolean"
},
"name": {
"description": "Name of Endpoints object in Namespace.",
"type": "string"
@ -916,7 +970,7 @@
"description": "TLS Config to use for alertmanager connection.",
"properties": {
"ca": {
"description": "Struct containing the CA cert to use for the targets.",
"description": "Certificate authority used when verifying server certificates.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -970,7 +1024,7 @@
"type": "string"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"description": "Client certificate to present when doing client-authentication.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -1185,7 +1239,7 @@
"description": "TLS Config to use for accessing apiserver.",
"properties": {
"ca": {
"description": "Struct containing the CA cert to use for the targets.",
"description": "Certificate authority used when verifying server certificates.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -1239,7 +1293,7 @@
"type": "string"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"description": "Client certificate to present when doing client-authentication.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -2648,6 +2702,16 @@
"description": "Image if specified has precedence over baseImage, tag and sha combinations. Specifying the version is still necessary to ensure the Prometheus Operator knows what version of Prometheus is being configured.",
"type": "string"
},
"imagePullPolicy": {
"description": "Image pull policy for the 'prometheus', 'init-config-reloader' and 'config-reloader' containers. See https://kubernetes.io/docs/concepts/containers/images/#image-pull-policy for more details.",
"enum": [
"",
"Always",
"Never",
"IfNotPresent"
],
"type": "string"
},
"imagePullSecrets": {
"description": "An optional list of references to secrets in the same namespace to use for pulling prometheus and alertmanager images from registries see http://kubernetes.io/docs/user-guide/images#specifying-imagepullsecrets-on-a-pod",
"items": {
@ -3818,7 +3882,7 @@
"type": "string"
},
"minReadySeconds": {
"description": "Minimum number of seconds for which a newly created pod should be ready without any of its container crashing for it to be considered available. Defaults to 0 (pod will be considered available as soon as it is ready) This is an alpha field and requires enabling StatefulSetMinReadySeconds feature gate.",
"description": "Minimum number of seconds for which a newly created pod should be ready without any of its container crashing for it to be considered available. Defaults to 0 (pod will be considered available as soon as it is ready) This is an alpha field from kubernetes 1.22 until 1.24 which requires enabling the StatefulSetMinReadySeconds feature gate.",
"format": "int32",
"type": "integer"
},
@ -4199,6 +4263,10 @@
"description": "File to read bearer token for remote read.",
"type": "string"
},
"filterExternalLabels": {
"description": "Whether to use the external labels as selectors for the remote read endpoint. Requires Prometheus v2.34.0 and above.",
"type": "boolean"
},
"headers": {
"additionalProperties": {
"type": "string"
@ -4336,7 +4404,7 @@
"description": "TLS Config to use for remote read.",
"properties": {
"ca": {
"description": "Struct containing the CA cert to use for the targets.",
"description": "Certificate authority used when verifying server certificates.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -4390,7 +4458,7 @@
"type": "string"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"description": "Client certificate to present when doing client-authentication.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -4840,7 +4908,7 @@
"description": "TLS Config to use for remote write.",
"properties": {
"ca": {
"description": "Struct containing the CA cert to use for the targets.",
"description": "Certificate authority used when verifying server certificates.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -4894,7 +4962,7 @@
"type": "string"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"description": "Client certificate to present when doing client-authentication.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -5477,7 +5545,7 @@
"type": "boolean"
},
"emptyDir": {
"description": "EmptyDirVolumeSource to be used by the Prometheus StatefulSets. If specified, used in place of any volumeClaimTemplate. More info: https://kubernetes.io/docs/concepts/storage/volumes/#emptydir",
"description": "EmptyDirVolumeSource to be used by the StatefulSet. If specified, used in place of any volumeClaimTemplate. More info: https://kubernetes.io/docs/concepts/storage/volumes/#emptydir",
"properties": {
"medium": {
"description": "medium represents what type of storage medium should back this directory. The default is \"\" which means to use the node's default medium. Must be an empty string (default) or Memory. More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir",
@ -5500,7 +5568,7 @@
"type": "object"
},
"ephemeral": {
"description": "EphemeralVolumeSource to be used by the Prometheus StatefulSets. This is a beta field in k8s 1.21, for lower versions, starting with k8s 1.19, it requires enabling the GenericEphemeralVolume feature gate. More info: https://kubernetes.io/docs/concepts/storage/ephemeral-volumes/#generic-ephemeral-volumes",
"description": "EphemeralVolumeSource to be used by the StatefulSet. This is a beta field in k8s 1.21, for lower versions, starting with k8s 1.19, it requires enabling the GenericEphemeralVolume feature gate. More info: https://kubernetes.io/docs/concepts/storage/ephemeral-volumes/#generic-ephemeral-volumes",
"properties": {
"volumeClaimTemplate": {
"description": "Will be used to create a stand-alone PVC to provision the volume. The pod in which this EphemeralVolumeSource is embedded will be the owner of the PVC, i.e. the PVC will be deleted together with the pod. The name of the PVC will be `<pod name>-<volume name>` where `<volume name>` is the name from the `PodSpec.Volumes` array entry. Pod validation will reject the pod if the concatenated name is not valid for a PVC (for example, too long). \n An existing PVC with that name that is not owned by the pod will *not* be used for the pod to avoid using an unrelated volume by mistake. Starting the pod is then blocked until the unrelated PVC is removed. If such a pre-created PVC is meant to be used by the pod, the PVC has to updated with an owner reference to the pod once the pod exists. Normally this should not be necessary, but it may be useful when manually reconstructing a broken cluster. \n This field is read-only and no changes will be made by Kubernetes to the PVC after it has been created. \n Required, must not be nil.",
@ -5671,7 +5739,7 @@
"type": "object"
},
"volumeClaimTemplate": {
"description": "A PVC spec to be used by the Prometheus StatefulSets.",
"description": "A PVC spec to be used by the StatefulSet. The easiest way to use a volume that cannot be automatically provisioned (for whatever reason) is to use a label selector alongside manually created PersistentVolumes.",
"properties": {
"apiVersion": {
"description": "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources",
@ -5996,7 +6064,7 @@
"description": "GRPCServerTLSConfig configures the TLS parameters for the gRPC server providing the StoreAPI. Note: Currently only the CAFile, CertFile, and KeyFile fields are supported. Maps to the '--grpc-server-tls-*' CLI args.",
"properties": {
"ca": {
"description": "Struct containing the CA cert to use for the targets.",
"description": "Certificate authority used when verifying server certificates.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -6050,7 +6118,7 @@
"type": "string"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"description": "Client certificate to present when doing client-authentication.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",

View file

@ -47,28 +47,37 @@
"groups": {
"description": "Content of Prometheus rule file",
"items": {
"description": "RuleGroup is a list of sequentially evaluated recording and alerting rules. Note: PartialResponseStrategy is only used by ThanosRuler and will be ignored by Prometheus instances. Valid values for this field are 'warn' or 'abort'. More info: https://github.com/thanos-io/thanos/blob/main/docs/components/rule.md#partial-response",
"description": "RuleGroup is a list of sequentially evaluated recording and alerting rules.",
"properties": {
"interval": {
"description": "Interval determines how often rules in the group are evaluated.",
"pattern": "^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$",
"type": "string"
},
"name": {
"description": "Name of the rule group.",
"minLength": 1,
"type": "string"
},
"partial_response_strategy": {
"description": "PartialResponseStrategy is only used by ThanosRuler and will be ignored by Prometheus instances. More info: https://github.com/thanos-io/thanos/blob/main/docs/components/rule.md#partial-response",
"pattern": "^(?i)(abort|warn)?$",
"type": "string"
},
"rules": {
"description": "List of alerting and recording rules.",
"items": {
"description": "Rule describes an alerting or recording rule See Prometheus documentation: [alerting](https://www.prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) or [recording](https://www.prometheus.io/docs/prometheus/latest/configuration/recording_rules/#recording-rules) rule",
"properties": {
"alert": {
"description": "Name of the alert. Must be a valid label value. Only one of `record` and `alert` must be set.",
"type": "string"
},
"annotations": {
"additionalProperties": {
"type": "string"
},
"description": "Annotations to add to each alert. Only valid for alerting rules.",
"type": "object"
},
"expr": {
@ -80,18 +89,23 @@
"type": "string"
}
],
"description": "PromQL expression to evaluate.",
"x-kubernetes-int-or-string": true
},
"for": {
"description": "Alerts are considered firing once they have been returned for this long.",
"pattern": "^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$",
"type": "string"
},
"labels": {
"additionalProperties": {
"type": "string"
},
"description": "Labels to add or overwrite.",
"type": "object"
},
"record": {
"description": "Name of the time series to output to. Must be a valid metric name. Only one of `record` and `alert` must be set.",
"type": "string"
}
},
@ -109,7 +123,11 @@
],
"type": "object"
},
"type": "array"
"type": "array",
"x-kubernetes-list-map-keys": [
"name"
],
"x-kubernetes-list-type": "map"
}
},
"type": "object"

View file

@ -44,6 +44,16 @@
"spec": {
"description": "Specification of desired Service selection for target discovery by Prometheus.",
"properties": {
"attachMetadata": {
"description": "Attaches node metadata to discovered targets. Requires Prometheus v2.37.0 and above.",
"properties": {
"node": {
"description": "When set to true, Prometheus must have permissions to get Nodes.",
"type": "boolean"
}
},
"type": "object"
},
"endpoints": {
"description": "A list of endpoints allowed as part of this ServiceMonitor.",
"items": {
@ -161,6 +171,10 @@
"description": "Whether to enable HTTP2.",
"type": "boolean"
},
"filterRunning": {
"description": "Drop pods that are not running. (Failed, Succeeded). Enabled by default. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase",
"type": "boolean"
},
"followRedirects": {
"description": "FollowRedirects configures whether scrape requests follow HTTP 3xx redirects.",
"type": "boolean"
@ -457,7 +471,7 @@
"description": "TLS configuration to use when scraping the endpoint",
"properties": {
"ca": {
"description": "Struct containing the CA cert to use for the targets.",
"description": "Certificate authority used when verifying server certificates.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -511,7 +525,7 @@
"type": "string"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"description": "Client certificate to present when doing client-authentication.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",

View file

@ -1976,7 +1976,7 @@
"description": "GRPCServerTLSConfig configures the gRPC server from which Thanos Querier reads recorded rule data. Note: Currently only the CAFile, CertFile, and KeyFile fields are supported. Maps to the '--grpc-server-tls-*' CLI args.",
"properties": {
"ca": {
"description": "Struct containing the CA cert to use for the targets.",
"description": "Certificate authority used when verifying server certificates.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -2030,7 +2030,7 @@
"type": "string"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"description": "Client certificate to present when doing client-authentication.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
@ -2153,6 +2153,16 @@
"description": "Thanos container image URL.",
"type": "string"
},
"imagePullPolicy": {
"description": "Image pull policy for the 'thanos', 'init-config-reloader' and 'config-reloader' containers. See https://kubernetes.io/docs/concepts/containers/images/#image-pull-policy for more details.",
"enum": [
"",
"Always",
"Never",
"IfNotPresent"
],
"type": "string"
},
"imagePullSecrets": {
"description": "An optional list of references to secrets in the same namespace to use for pulling thanos images from registries see http://kubernetes.io/docs/user-guide/images#specifying-imagepullsecrets-on-a-pod",
"items": {
@ -3330,7 +3340,7 @@
"type": "string"
},
"minReadySeconds": {
"description": "Minimum number of seconds for which a newly created pod should be ready without any of its container crashing for it to be considered available. Defaults to 0 (pod will be considered available as soon as it is ready) This is an alpha field and requires enabling StatefulSetMinReadySeconds feature gate.",
"description": "Minimum number of seconds for which a newly created pod should be ready without any of its container crashing for it to be considered available. Defaults to 0 (pod will be considered available as soon as it is ready) This is an alpha field from kubernetes 1.22 until 1.24 which requires enabling the StatefulSetMinReadySeconds feature gate.",
"format": "int32",
"type": "integer"
},
@ -3725,7 +3735,7 @@
"type": "boolean"
},
"emptyDir": {
"description": "EmptyDirVolumeSource to be used by the Prometheus StatefulSets. If specified, used in place of any volumeClaimTemplate. More info: https://kubernetes.io/docs/concepts/storage/volumes/#emptydir",
"description": "EmptyDirVolumeSource to be used by the StatefulSet. If specified, used in place of any volumeClaimTemplate. More info: https://kubernetes.io/docs/concepts/storage/volumes/#emptydir",
"properties": {
"medium": {
"description": "medium represents what type of storage medium should back this directory. The default is \"\" which means to use the node's default medium. Must be an empty string (default) or Memory. More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir",
@ -3748,7 +3758,7 @@
"type": "object"
},
"ephemeral": {
"description": "EphemeralVolumeSource to be used by the Prometheus StatefulSets. This is a beta field in k8s 1.21, for lower versions, starting with k8s 1.19, it requires enabling the GenericEphemeralVolume feature gate. More info: https://kubernetes.io/docs/concepts/storage/ephemeral-volumes/#generic-ephemeral-volumes",
"description": "EphemeralVolumeSource to be used by the StatefulSet. This is a beta field in k8s 1.21, for lower versions, starting with k8s 1.19, it requires enabling the GenericEphemeralVolume feature gate. More info: https://kubernetes.io/docs/concepts/storage/ephemeral-volumes/#generic-ephemeral-volumes",
"properties": {
"volumeClaimTemplate": {
"description": "Will be used to create a stand-alone PVC to provision the volume. The pod in which this EphemeralVolumeSource is embedded will be the owner of the PVC, i.e. the PVC will be deleted together with the pod. The name of the PVC will be `<pod name>-<volume name>` where `<volume name>` is the name from the `PodSpec.Volumes` array entry. Pod validation will reject the pod if the concatenated name is not valid for a PVC (for example, too long). \n An existing PVC with that name that is not owned by the pod will *not* be used for the pod to avoid using an unrelated volume by mistake. Starting the pod is then blocked until the unrelated PVC is removed. If such a pre-created PVC is meant to be used by the pod, the PVC has to updated with an owner reference to the pod once the pod exists. Normally this should not be necessary, but it may be useful when manually reconstructing a broken cluster. \n This field is read-only and no changes will be made by Kubernetes to the PVC after it has been created. \n Required, must not be nil.",
@ -3919,7 +3929,7 @@
"type": "object"
},
"volumeClaimTemplate": {
"description": "A PVC spec to be used by the Prometheus StatefulSets.",
"description": "A PVC spec to be used by the StatefulSet. The easiest way to use a volume that cannot be automatically provisioned (for whatever reason) is to use a label selector alongside manually created PersistentVolumes.",
"properties": {
"apiVersion": {
"description": "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources",
@ -4352,6 +4362,10 @@
"description": "TracingConfig specifies the path of the tracing configuration file. When used alongside with TracingConfig, TracingConfigFile takes precedence.",
"type": "string"
},
"version": {
"description": "Version of Thanos to be deployed.",
"type": "string"
},
"volumes": {
"description": "Volumes allows configuration of additional volumes on the output StatefulSet definition. Volumes specified will be appended to other volumes that are generated as a result of StorageSpec objects.",
"items": {

View file

@ -8,11 +8,11 @@
alert: 'NodeFilesystemSpaceFillingUp',
expr: |||
(
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < %(fsSpaceFillingUpWarningThreshold)d
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < %(fsSpaceFillingUpWarningThreshold)d
and
predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s}[6h], 24*60*60) < 0
predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[6h], 24*60*60) < 0
and
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s} == 0
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0
)
||| % $._config,
'for': '1h',
@ -28,11 +28,11 @@
alert: 'NodeFilesystemSpaceFillingUp',
expr: |||
(
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < %(fsSpaceFillingUpCriticalThreshold)d
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < %(fsSpaceFillingUpCriticalThreshold)d
and
predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s}[6h], 4*60*60) < 0
predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[6h], 4*60*60) < 0
and
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s} == 0
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0
)
||| % $._config,
'for': '1h',
@ -48,9 +48,9 @@
alert: 'NodeFilesystemAlmostOutOfSpace',
expr: |||
(
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < %(fsSpaceAvailableWarningThreshold)d
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < %(fsSpaceAvailableWarningThreshold)d
and
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s} == 0
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0
)
||| % $._config,
'for': '30m',
@ -66,9 +66,9 @@
alert: 'NodeFilesystemAlmostOutOfSpace',
expr: |||
(
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < %(fsSpaceAvailableCriticalThreshold)d
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < %(fsSpaceAvailableCriticalThreshold)d
and
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s} == 0
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0
)
||| % $._config,
'for': '30m',
@ -84,11 +84,11 @@
alert: 'NodeFilesystemFilesFillingUp',
expr: |||
(
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < 40
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < 40
and
predict_linear(node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s}[6h], 24*60*60) < 0
predict_linear(node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[6h], 24*60*60) < 0
and
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s} == 0
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0
)
||| % $._config,
'for': '1h',
@ -104,11 +104,11 @@
alert: 'NodeFilesystemFilesFillingUp',
expr: |||
(
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < 20
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < 20
and
predict_linear(node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s}[6h], 4*60*60) < 0
predict_linear(node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[6h], 4*60*60) < 0
and
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s} == 0
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0
)
||| % $._config,
'for': '1h',
@ -124,9 +124,9 @@
alert: 'NodeFilesystemAlmostOutOfFiles',
expr: |||
(
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < 5
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < 5
and
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s} == 0
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0
)
||| % $._config,
'for': '1h',
@ -142,9 +142,9 @@
alert: 'NodeFilesystemAlmostOutOfFiles',
expr: |||
(
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < 3
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < 3
and
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s} == 0
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0
)
||| % $._config,
'for': '1h',

View file

@ -16,6 +16,14 @@
// alerting, you can exclude them here, e.g. 'fstype!="tmpfs"'.
fsSelector: 'fstype!=""',
// Select the mountpoint for filesystem-related queries. If left
// empty, all mountpoints are selected. For example if you have a
// special purpose tmpfs instance that has a fixed size and will
// always be 100% full, but you still want alerts and dashboards for
// other tmpfs instances, you can exclude those by mountpoint prefix
// like so: 'mountpoint!~"/var/lib/foo.*"'.
fsMountpointSelector: 'mountpoint!=""',
// Select the device for disk-related queries. If left empty, all
// devices are selected. If you have unusual devices you don't
// want to include in dashboards and alerting, you can exclude

View file

@ -307,12 +307,12 @@ local diskSpaceUtilisation =
|||
sum without (device) (
max without (fstype, mountpoint) ((
node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(clusterLabel)s="$cluster"}
node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"}
-
node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(clusterLabel)s="$cluster"}
node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"}
) != 0)
)
/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(clusterLabel)s="$cluster"})))
/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"})))
||| % $._config, legendFormat='{{instance}}'
))
)
@ -453,10 +453,10 @@ local diskSpaceUtilisation =
sum (
sum without (device) (
max without (fstype, mountpoint, instance, pod) ((
node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s}
node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s}
) != 0)
)
/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s})))
/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s})))
) by (%(clusterLabel)s)
||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config
))

View file

@ -232,7 +232,7 @@ local table = grafana70.panel.table;
.addThresholdStep(color='red', value=0.9)
.addTarget(prometheus.target(
|||
max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s})
max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s, %(fsMountpointSelector)s})
||| % config,
legendFormat='',
instant=true,
@ -240,7 +240,7 @@ local table = grafana70.panel.table;
))
.addTarget(prometheus.target(
|||
max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s})
max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s, %(fsMountpointSelector)s})
||| % config,
legendFormat='',
instant=true,

View file

@ -27,7 +27,7 @@ local template = grafana.template;
instance: { alias: 'Instance' },
version: { alias: 'Version' },
'Value #A': { alias: 'Count', type: 'hidden' },
'Value #B': { alias: 'Uptime' },
'Value #B': { alias: 'Uptime', type: 'number', unit: 's' },
})
)
)

View file

@ -144,24 +144,6 @@
severity: 'critical',
},
},
{
alert: 'ThanosReceiveTrafficBelowThreshold',
annotations: {
description: 'At Thanos Receive {{$labels.job}} in {{$labels.namespace}} , the average 1-hr avg. metrics ingestion rate is {{$value | humanize}}% of 12-hr avg. ingestion rate.',
summary: 'Thanos Receive is experiencing low avg. 1-hr ingestion rate relative to avg. 12-hr ingestion rate.',
},
expr: |||
(
avg_over_time(rate(http_requests_total{%(selector)s, code=~"2..", handler="receive"}[5m])[1h:5m])
/
avg_over_time(rate(http_requests_total{%(selector)s, code=~"2..", handler="receive"}[5m])[12h:5m])
) * 100 < %(ingestionThreshold)s
||| % thanos.receive,
'for': '1h',
labels: {
severity: 'warning',
},
},
],
},
],

View file

@ -96,6 +96,16 @@ local utils = import '../lib/utils.libsonnet';
)
)
)
.addRow(
g.row('Query Concurrency')
.addPanel(
g.panel('Concurrent Capacity', 'Shows available capacity of processing queries in parallel.') +
g.queryPanel(
'max_over_time(thanos_query_concurrent_gate_queries_max{%s}[$__rate_interval]) - avg_over_time(thanos_query_concurrent_gate_queries_in_flight{%s}[$__rate_interval])' % [thanos.query.dashboard.selector, thanos.query.dashboard.selector],
'{{job}} - {{pod}}'
)
)
)
.addRow(
g.resourceUtilizationRow(thanos.query.dashboard.selector, thanos.query.dashboard.dimensions)
),

View file

@ -154,7 +154,7 @@ local utils = import '../lib/utils.libsonnet';
.addPanel(
g.panel('Rate of samples received (per tenant, only 2XX)') +
g.queryPanel(
'sum(rate(thanos_receive_write_samples_bucket{%s}[$interval])) by (%s) ' % [
'sum(rate(thanos_receive_write_samples_sum{%s}[$interval])) by (%s) ' % [
utils.joinLabels([thanos.receive.dashboard.tenantSelector, 'code=~"2.."']),
thanos.receive.dashboard.tenantDimensions,
],
@ -164,7 +164,7 @@ local utils = import '../lib/utils.libsonnet';
.addPanel(
g.panel('Rate of samples not written (per tenant and code, non 2XX)') +
g.queryPanel(
'sum(rate(thanos_receive_write_samples_bucket{%s}[$interval])) by (%s) ' % [
'sum(rate(thanos_receive_write_samples_sum{%s}[$interval])) by (%s) ' % [
utils.joinLabels([thanos.receive.dashboard.tenantSelector, 'code!~"2.."']),
tenantWithHttpCodeDimensions,
],

View file

@ -15,6 +15,7 @@ local utils = import '../lib/utils.libsonnet';
[if thanos.store != null then 'store.json']:
local grpcUnarySelector = utils.joinLabels([thanos.store.dashboard.selector, 'grpc_type="unary"']);
local grpcServerStreamSelector = utils.joinLabels([thanos.store.dashboard.selector, 'grpc_type="server_stream"']);
local dataSizeDimensions = utils.joinLabels([thanos.store.dashboard.dimensions, 'data_type']);
g.dashboard(thanos.store.title)
.addRow(
@ -181,12 +182,27 @@ local utils = import '../lib/utils.libsonnet';
g.queryPanel(
[
'thanos_bucket_store_series_data_fetched{%s, quantile="0.99"}' % thanos.store.dashboard.selector,
'sum by (%(dimensions)s) (rate(thanos_bucket_store_series_data_fetched_sum{%(selector)s}[$interval])) / sum by (%(dimensions)s) (rate(thanos_bucket_store_series_data_fetched_count{%(selector)s}[$interval]))' % thanos.store.dashboard,
'sum by (%s) (rate(thanos_bucket_store_series_data_fetched_sum{%s}[$interval])) / sum by (%s) (rate(thanos_bucket_store_series_data_fetched_count{%s}[$interval]))' % [dataSizeDimensions, thanos.store.dashboard.selector, dataSizeDimensions, thanos.store.dashboard.selector],
'thanos_bucket_store_series_data_fetched{%s, quantile="0.50"}' % thanos.store.dashboard.selector,
], [
'P99',
'mean {{job}}',
'P50',
'P99: {{data_type}} / {{job}}',
'mean: {{data_type}} / {{job}}',
'P50: {{data_type}} / {{job}}',
],
) +
{ yaxes: g.yaxes('bytes') }
)
.addPanel(
g.panel('Data Touched', 'Show the size of data touched') +
g.queryPanel(
[
'thanos_bucket_store_series_data_touched{%s, quantile="0.99"}' % thanos.store.dashboard.selector,
'sum by (%s) (rate(thanos_bucket_store_series_data_touched_sum{%s}[$interval])) / sum by (%s) (rate(thanos_bucket_store_series_data_touched_count{%s}[$interval]))' % [dataSizeDimensions, thanos.store.dashboard.selector, dataSizeDimensions, thanos.store.dashboard.selector],
'thanos_bucket_store_series_data_touched{%s, quantile="0.50"}' % thanos.store.dashboard.selector,
], [
'P99: {{data_type}} / {{job}}',
'mean: {{data_type}} / {{job}}',
'P50: {{data_type}} / {{job}}',
],
) +
{ yaxes: g.yaxes('bytes') }

View file

@ -63,7 +63,6 @@
|ThanosReceiveHighHashringFileRefreshFailures|Thanos Receive is failing to refresh hasring file.|Thanos Receive {{$labels.job}} is failing to refresh hashring file, {{$value humanize}} of attempts failed.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehighhashringfilerefreshfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehighhashringfilerefreshfailures)|
|ThanosReceiveConfigReloadFailure|Thanos Receive has not been able to reload configuration.|Thanos Receive {{$labels.job}} has not been able to reload hashring configurations.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceiveconfigreloadfailure](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceiveconfigreloadfailure)|
|ThanosReceiveNoUpload|Thanos Receive has not uploaded latest data to object storage.|Thanos Receive {{$labels.instance}} has not uploaded latest data to object storage.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivenoupload](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivenoupload)|
|ThanosReceiveTrafficBelowThreshold|Thanos Receive is experiencing low avg. 1-hr ingestion rate relative to avg. 12-hr ingestion rate.|At Thanos Receive {{$labels.job}} in {{$labels.namespace}} , the average 1-hr avg. metrics ingestion rate is {{$value humanize}}% of 12-hr avg. ingestion rate.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivetrafficbelowthreshold](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivetrafficbelowthreshold)|
## thanos-rule