initial monitoring config

This commit is contained in:
Tobias Brunner 2020-02-11 22:21:17 +01:00
parent dcd94ad157
commit 2120a03aac
827 changed files with 243740 additions and 0 deletions

23
_apps/monitoring.yaml Normal file
View File

@ -0,0 +1,23 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: monitoring
namespace: argocd
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
destination:
namespace: monitoring
server: https://kubernetes.default.svc
project: default
source:
path: monitoring/manifests
repoURL: https://git.tbrnt.ch/tobru/gitops-tbrnt.git
targetRevision: HEAD
directory:
recurse: true
---
apiVersion: v1
kind: Namespace
metadata:
name: monitoring

17
monitoring/README.md Normal file
View File

@ -0,0 +1,17 @@
# Cluster Monitoring
Source: [kube-prometheus](https://github.com/coreos/kube-prometheus).
## Build
```
docker run --rm -v $(pwd):$(pwd) --workdir $(pwd) quay.io/coreos/jsonnet-ci ./build.sh monitoring.jsonnet
```
## Update libs
```
docker run --rm -v $(pwd):$(pwd) --workdir $(pwd) quay.io/coreos/jsonnet-ci jb update
```
Then build again, obviously

15
monitoring/build.sh Executable file
View File

@ -0,0 +1,15 @@
#!/usr/bin/env bash
# This script uses arg $1 (name of *.jsonnet file to use) to generate the manifests/*.yaml files.
set -e
set -x
# only exit with zero if all commands of the pipeline exit successfully
set -o pipefail
# Make sure to start with a clean 'manifests' dir
rm -rf manifests
mkdir -p manifests/setup
jsonnet -J vendor -m manifests "${1-example.jsonnet}" | xargs -I{} sh -c 'cat {} | gojsontoyaml > {}.yaml; rm -f {}' -- {}

View File

@ -0,0 +1,14 @@
{
"dependencies": [
{
"name": "kube-prometheus",
"source": {
"git": {
"remote": "https://github.com/coreos/kube-prometheus",
"subdir": "jsonnet/kube-prometheus"
}
},
"version": "master"
}
]
}

View File

@ -0,0 +1,158 @@
{
"dependencies": [
{
"name": "etcd-mixin",
"source": {
"git": {
"remote": "https://github.com/coreos/etcd",
"subdir": "Documentation/etcd-mixin"
}
},
"version": "c94782cd55fb44df43574505db9ac1c1b7d49c00",
"sum": "Ko3qhNfC2vN/houLh6C0Ryacjv70gl0DVPGU/PQ4OD0="
},
{
"name": "grafana",
"source": {
"git": {
"remote": "https://github.com/brancz/kubernetes-grafana",
"subdir": "grafana"
}
},
"version": "539a90dbf63c812ad0194d8078dd776868a11c81",
"sum": "b8faWX1qqLGyN67sA36oRqYZ5HX+tHBRMPtrWRqIysE="
},
{
"name": "grafana-builder",
"source": {
"git": {
"remote": "https://github.com/grafana/jsonnet-libs",
"subdir": "grafana-builder"
}
},
"version": "7ac7da1a0fe165b68cdb718b2521b560d51bd1f4",
"sum": "slxrtftVDiTlQK22ertdfrg4Epnq97gdrLI63ftUfaE="
},
{
"name": "grafonnet",
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib",
"subdir": "grafonnet"
}
},
"version": "c459106d2d2b583dd3a83f6c75eb52abee3af764",
"sum": "CeM3LRgUCUJTolTdMnerfMPGYmhClx7gX5ajrQVEY2Y="
},
{
"name": "ksonnet",
"source": {
"git": {
"remote": "https://github.com/ksonnet/ksonnet-lib",
"subdir": ""
}
},
"version": "0d2f82676817bbf9e4acf6495b2090205f323b9f",
"sum": "h28BXZ7+vczxYJ2sCt8JuR9+yznRtU/iA6DCpQUrtEg="
},
{
"name": "kube-prometheus",
"source": {
"git": {
"remote": "https://github.com/coreos/kube-prometheus",
"subdir": "jsonnet/kube-prometheus"
}
},
"version": "8b0b0bc51435a5f7742307c86235273ab568dffe",
"sum": "NJN0f7veWXOJyM3PNDM6vJQEzpkDxOchU9EVnoSRe6E="
},
{
"name": "kube-state-metrics",
"source": {
"git": {
"remote": "https://github.com/kubernetes/kube-state-metrics",
"subdir": "jsonnet/kube-state-metrics"
}
},
"version": "30c152b805781b5571ed25b914cc66c615964ffb",
"sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA="
},
{
"name": "kube-state-metrics-mixin",
"source": {
"git": {
"remote": "https://github.com/kubernetes/kube-state-metrics",
"subdir": "jsonnet/kube-state-metrics-mixin"
}
},
"version": "30c152b805781b5571ed25b914cc66c615964ffb",
"sum": "E1GGavnf9PCWBm4WVrxWnc0FIj72UcbcweqGioWrOdU="
},
{
"name": "kubernetes-mixin",
"source": {
"git": {
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
"subdir": ""
}
},
"version": "b2d7f762bd22be3ba5e7d54a1fcecfe1092f214b",
"sum": "NqrJQnQnRDzkCbrHg7L1zX8XPAzfoE4DS2XBEj6WC8g="
},
{
"name": "node-mixin",
"source": {
"git": {
"remote": "https://github.com/prometheus/node_exporter",
"subdir": "docs/node-mixin"
}
},
"version": "92ea3c6a3f0ea2d1d55de168e65037e2313f9940",
"sum": "7vEamDTP9AApeiF4Zu9ZyXzDIs3rYHzwf9k7g8X+wsg="
},
{
"name": "prometheus",
"source": {
"git": {
"remote": "https://github.com/prometheus/prometheus",
"subdir": "documentation/prometheus-mixin"
}
},
"version": "40dd13b07420a044cc1b0ca57f639c572583d9c1",
"sum": "u1YS9CVuBTcw2vks0PZbLb1gtlI/7bVGDVBZsjWFLTw="
},
{
"name": "prometheus-operator",
"source": {
"git": {
"remote": "https://github.com/coreos/prometheus-operator",
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "89f35ef22db0dc24c523bf8be473bcbcf9ac81f6",
"sum": "KCO153lAOWmWfoj3rQGhLB+8UmyvQ2Bghu/ewDqVum4="
},
{
"name": "promgrafonnet",
"source": {
"git": {
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
"subdir": "lib/promgrafonnet"
}
},
"version": "b2d7f762bd22be3ba5e7d54a1fcecfe1092f214b",
"sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc="
},
{
"name": "slo-libsonnet",
"source": {
"git": {
"remote": "https://github.com/metalmatze/slo-libsonnet",
"subdir": "slo-libsonnet"
}
},
"version": "437c402c5f3ad86c3c16db8471f1649284fef0ee",
"sum": "2Zcyku1f558VrUpMaJnI78fahDksPLcS1idmxxwcQ7Q="
}
]
}

View File

@ -0,0 +1,18 @@
apiVersion: monitoring.coreos.com/v1
kind: Alertmanager
metadata:
labels:
alertmanager: main
name: main
namespace: monitoring
spec:
baseImage: quay.io/prometheus/alertmanager
nodeSelector:
kubernetes.io/os: linux
replicas: 3
securityContext:
fsGroup: 2000
runAsNonRoot: true
runAsUser: 1000
serviceAccountName: alertmanager-main
version: v0.20.0

View File

@ -0,0 +1,42 @@
apiVersion: v1
data: {}
kind: Secret
metadata:
name: alertmanager-main
namespace: monitoring
stringData:
alertmanager.yaml: |-
"global":
"resolve_timeout": "5m"
"inhibit_rules":
- "equal":
- "alertname"
"source_match":
"severity": "critical"
"target_match_re":
"severity": "warning|info"
- "equal":
- "alertname"
"source_match":
"severity": "warning"
"target_match_re":
"severity": "info"
"receivers":
- "name": "Default"
- "name": "Watchdog"
- "name": "Critical"
"route":
"group_by":
- "namespace"
"group_interval": "5m"
"group_wait": "30s"
"receiver": "Default"
"repeat_interval": "12h"
"routes":
- "match":
"alertname": "Watchdog"
"receiver": "Watchdog"
- "match":
"severity": "critical"
"receiver": "Critical"
type: Opaque

View File

@ -0,0 +1,16 @@
apiVersion: v1
kind: Service
metadata:
labels:
alertmanager: main
name: alertmanager-main
namespace: monitoring
spec:
ports:
- name: web
port: 9093
targetPort: web
selector:
alertmanager: main
app: alertmanager
sessionAffinity: ClientIP

View File

@ -0,0 +1,5 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: alertmanager-main
namespace: monitoring

View File

@ -0,0 +1,14 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
k8s-app: alertmanager
name: alertmanager
namespace: monitoring
spec:
endpoints:
- interval: 30s
port: web
selector:
matchLabels:
alertmanager: main

View File

@ -0,0 +1,8 @@
apiVersion: v1
data:
datasources.yaml: ewogICAgImFwaVZlcnNpb24iOiAxLAogICAgImRhdGFzb3VyY2VzIjogWwogICAgICAgIHsKICAgICAgICAgICAgImFjY2VzcyI6ICJwcm94eSIsCiAgICAgICAgICAgICJlZGl0YWJsZSI6IGZhbHNlLAogICAgICAgICAgICAibmFtZSI6ICJwcm9tZXRoZXVzIiwKICAgICAgICAgICAgIm9yZ0lkIjogMSwKICAgICAgICAgICAgInR5cGUiOiAicHJvbWV0aGV1cyIsCiAgICAgICAgICAgICJ1cmwiOiAiaHR0cDovL3Byb21ldGhldXMtazhzLm1vbml0b3Jpbmcuc3ZjOjkwOTAiLAogICAgICAgICAgICAidmVyc2lvbiI6IDEKICAgICAgICB9CiAgICBdCn0=
kind: Secret
metadata:
name: grafana-datasources
namespace: monitoring
type: Opaque

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,21 @@
apiVersion: v1
data:
dashboards.yaml: |-
{
"apiVersion": 1,
"providers": [
{
"folder": "",
"name": "0",
"options": {
"path": "/grafana-dashboard-definitions/0"
},
"orgId": 1,
"type": "file"
}
]
}
kind: ConfigMap
metadata:
name: grafana-dashboards
namespace: monitoring

View File

@ -0,0 +1,203 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: grafana
name: grafana
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
app: grafana
template:
metadata:
labels:
app: grafana
spec:
containers:
- image: grafana/grafana:6.6.0
name: grafana
ports:
- containerPort: 3000
name: http
readinessProbe:
httpGet:
path: /api/health
port: http
resources:
limits:
cpu: 200m
memory: 200Mi
requests:
cpu: 100m
memory: 100Mi
volumeMounts:
- mountPath: /var/lib/grafana
name: grafana-storage
readOnly: false
- mountPath: /etc/grafana/provisioning/datasources
name: grafana-datasources
readOnly: false
- mountPath: /etc/grafana/provisioning/dashboards
name: grafana-dashboards
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/apiserver
name: grafana-dashboard-apiserver
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/cluster-total
name: grafana-dashboard-cluster-total
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/controller-manager
name: grafana-dashboard-controller-manager
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-cluster
name: grafana-dashboard-k8s-resources-cluster
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-namespace
name: grafana-dashboard-k8s-resources-namespace
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-node
name: grafana-dashboard-k8s-resources-node
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-pod
name: grafana-dashboard-k8s-resources-pod
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-workload
name: grafana-dashboard-k8s-resources-workload
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-workloads-namespace
name: grafana-dashboard-k8s-resources-workloads-namespace
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/kubelet
name: grafana-dashboard-kubelet
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/namespace-by-pod
name: grafana-dashboard-namespace-by-pod
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/namespace-by-workload
name: grafana-dashboard-namespace-by-workload
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/node-cluster-rsrc-use
name: grafana-dashboard-node-cluster-rsrc-use
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/node-rsrc-use
name: grafana-dashboard-node-rsrc-use
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/nodes
name: grafana-dashboard-nodes
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/persistentvolumesusage
name: grafana-dashboard-persistentvolumesusage
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/pod-total
name: grafana-dashboard-pod-total
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/pods
name: grafana-dashboard-pods
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/prometheus-remote-write
name: grafana-dashboard-prometheus-remote-write
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/prometheus
name: grafana-dashboard-prometheus
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/proxy
name: grafana-dashboard-proxy
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/scheduler
name: grafana-dashboard-scheduler
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/statefulset
name: grafana-dashboard-statefulset
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/workload-total
name: grafana-dashboard-workload-total
readOnly: false
nodeSelector:
beta.kubernetes.io/os: linux
securityContext:
runAsNonRoot: true
runAsUser: 65534
serviceAccountName: grafana
volumes:
- emptyDir: {}
name: grafana-storage
- name: grafana-datasources
secret:
secretName: grafana-datasources
- configMap:
name: grafana-dashboards
name: grafana-dashboards
- configMap:
name: grafana-dashboard-apiserver
name: grafana-dashboard-apiserver
- configMap:
name: grafana-dashboard-cluster-total
name: grafana-dashboard-cluster-total
- configMap:
name: grafana-dashboard-controller-manager
name: grafana-dashboard-controller-manager
- configMap:
name: grafana-dashboard-k8s-resources-cluster
name: grafana-dashboard-k8s-resources-cluster
- configMap:
name: grafana-dashboard-k8s-resources-namespace
name: grafana-dashboard-k8s-resources-namespace
- configMap:
name: grafana-dashboard-k8s-resources-node
name: grafana-dashboard-k8s-resources-node
- configMap:
name: grafana-dashboard-k8s-resources-pod
name: grafana-dashboard-k8s-resources-pod
- configMap:
name: grafana-dashboard-k8s-resources-workload
name: grafana-dashboard-k8s-resources-workload
- configMap:
name: grafana-dashboard-k8s-resources-workloads-namespace
name: grafana-dashboard-k8s-resources-workloads-namespace
- configMap:
name: grafana-dashboard-kubelet
name: grafana-dashboard-kubelet
- configMap:
name: grafana-dashboard-namespace-by-pod
name: grafana-dashboard-namespace-by-pod
- configMap:
name: grafana-dashboard-namespace-by-workload
name: grafana-dashboard-namespace-by-workload
- configMap:
name: grafana-dashboard-node-cluster-rsrc-use
name: grafana-dashboard-node-cluster-rsrc-use
- configMap:
name: grafana-dashboard-node-rsrc-use
name: grafana-dashboard-node-rsrc-use
- configMap:
name: grafana-dashboard-nodes
name: grafana-dashboard-nodes
- configMap:
name: grafana-dashboard-persistentvolumesusage
name: grafana-dashboard-persistentvolumesusage
- configMap:
name: grafana-dashboard-pod-total
name: grafana-dashboard-pod-total
- configMap:
name: grafana-dashboard-pods
name: grafana-dashboard-pods
- configMap:
name: grafana-dashboard-prometheus-remote-write
name: grafana-dashboard-prometheus-remote-write
- configMap:
name: grafana-dashboard-prometheus
name: grafana-dashboard-prometheus
- configMap:
name: grafana-dashboard-proxy
name: grafana-dashboard-proxy
- configMap:
name: grafana-dashboard-scheduler
name: grafana-dashboard-scheduler
- configMap:
name: grafana-dashboard-statefulset
name: grafana-dashboard-statefulset
- configMap:
name: grafana-dashboard-workload-total
name: grafana-dashboard-workload-total

View File

@ -0,0 +1,14 @@
apiVersion: v1
kind: Service
metadata:
labels:
app: grafana
name: grafana
namespace: monitoring
spec:
ports:
- name: http
port: 3000
targetPort: http
selector:
app: grafana

View File

@ -0,0 +1,5 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: grafana
namespace: monitoring

View File

@ -0,0 +1,12 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: grafana
namespace: monitoring
spec:
endpoints:
- interval: 15s
port: http
selector:
matchLabels:
app: grafana

View File

@ -0,0 +1,117 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.4
name: kube-state-metrics
rules:
- apiGroups:
- ""
resources:
- configmaps
- secrets
- nodes
- pods
- services
- resourcequotas
- replicationcontrollers
- limitranges
- persistentvolumeclaims
- persistentvolumes
- namespaces
- endpoints
verbs:
- list
- watch
- apiGroups:
- extensions
resources:
- daemonsets
- deployments
- replicasets
- ingresses
verbs:
- list
- watch
- apiGroups:
- apps
resources:
- statefulsets
- daemonsets
- deployments
- replicasets
verbs:
- list
- watch
- apiGroups:
- batch
resources:
- cronjobs
- jobs
verbs:
- list
- watch
- apiGroups:
- autoscaling
resources:
- horizontalpodautoscalers
verbs:
- list
- watch
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create
- apiGroups:
- policy
resources:
- poddisruptionbudgets
verbs:
- list
- watch
- apiGroups:
- certificates.k8s.io
resources:
- certificatesigningrequests
verbs:
- list
- watch
- apiGroups:
- storage.k8s.io
resources:
- storageclasses
- volumeattachments
verbs:
- list
- watch
- apiGroups:
- admissionregistration.k8s.io
resources:
- mutatingwebhookconfigurations
- validatingwebhookconfigurations
verbs:
- list
- watch
- apiGroups:
- networking.k8s.io
resources:
- networkpolicies
verbs:
- list
- watch
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- list
- watch

View File

@ -0,0 +1,15 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.4
name: kube-state-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kube-state-metrics
subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: monitoring

View File

@ -0,0 +1,44 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.4
name: kube-state-metrics
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: kube-state-metrics
template:
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.4
spec:
containers:
- image: quay.io/coreos/kube-state-metrics:v1.9.4
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 5
timeoutSeconds: 5
name: kube-state-metrics
ports:
- containerPort: 8080
name: http-metrics
- containerPort: 8081
name: telemetry
readinessProbe:
httpGet:
path: /
port: 8081
initialDelaySeconds: 5
timeoutSeconds: 5
securityContext:
runAsUser: 65534
nodeSelector:
kubernetes.io/os: linux
serviceAccountName: kube-state-metrics

View File

@ -0,0 +1,19 @@
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.4
name: kube-state-metrics
namespace: monitoring
spec:
clusterIP: None
ports:
- name: http-metrics
port: 8080
targetPort: http-metrics
- name: telemetry
port: 8081
targetPort: telemetry
selector:
app.kubernetes.io/name: kube-state-metrics

View File

@ -0,0 +1,8 @@
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.4
name: kube-state-metrics
namespace: monitoring

View File

@ -0,0 +1,24 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.4
name: kube-state-metrics
namespace: monitoring
spec:
endpoints:
- honorLabels: true
interval: 30s
port: http-metrics
relabelings:
- action: labeldrop
regex: (pod|service|endpoint|namespace)
scrapeTimeout: 30s
- interval: 30s
port: telemetry
jobLabel: app.kubernetes.io/name
selector:
matchLabels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 1.9.4

View File

@ -0,0 +1,17 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: node-exporter
rules:
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create

View File

@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: node-exporter
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: node-exporter
subjects:
- kind: ServiceAccount
name: node-exporter
namespace: monitoring

View File

@ -0,0 +1,89 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
app: node-exporter
name: node-exporter
namespace: monitoring
spec:
selector:
matchLabels:
app: node-exporter
template:
metadata:
labels:
app: node-exporter
spec:
containers:
- args:
- --web.listen-address=127.0.0.1:9100
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
- --path.rootfs=/host/root
- --no-collector.wifi
- --no-collector.hwmon
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/)
- --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$
image: quay.io/prometheus/node-exporter:v0.18.1
name: node-exporter
resources:
limits:
cpu: 250m
memory: 180Mi
requests:
cpu: 102m
memory: 180Mi
volumeMounts:
- mountPath: /host/proc
name: proc
readOnly: false
- mountPath: /host/sys
name: sys
readOnly: false
- mountPath: /host/root
mountPropagation: HostToContainer
name: root
readOnly: true
- args:
- --logtostderr
- --secure-listen-address=[$(IP)]:9100
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
- --upstream=http://127.0.0.1:9100/
env:
- name: IP
valueFrom:
fieldRef:
fieldPath: status.podIP
image: quay.io/coreos/kube-rbac-proxy:v0.4.1
name: kube-rbac-proxy
ports:
- containerPort: 9100
hostPort: 9100
name: https
resources:
limits:
cpu: 20m
memory: 40Mi
requests:
cpu: 10m
memory: 20Mi
hostNetwork: true
hostPID: true
nodeSelector:
kubernetes.io/os: linux
securityContext:
runAsNonRoot: true
runAsUser: 65534
serviceAccountName: node-exporter
tolerations:
- operator: Exists
volumes:
- hostPath:
path: /proc
name: proc
- hostPath:
path: /sys
name: sys
- hostPath:
path: /
name: root

View File

@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
labels:
k8s-app: node-exporter
name: node-exporter
namespace: monitoring
spec:
clusterIP: None
ports:
- name: https
port: 9100
targetPort: https
selector:
app: node-exporter

View File

@ -0,0 +1,5 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: node-exporter
namespace: monitoring

View File

@ -0,0 +1,26 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
k8s-app: node-exporter
name: node-exporter
namespace: monitoring
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 15s
port: https
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: instance
scheme: https
tlsConfig:
insecureSkipVerify: true
jobLabel: k8s-app
selector:
matchLabels:
k8s-app: node-exporter

View File

@ -0,0 +1,13 @@
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
name: v1beta1.metrics.k8s.io
spec:
group: metrics.k8s.io
groupPriorityMinimum: 100
insecureSkipTLSVerify: true
service:
name: prometheus-adapter
namespace: monitoring
version: v1beta1
versionPriority: 100

View File

@ -0,0 +1,16 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus-adapter
rules:
- apiGroups:
- ""
resources:
- nodes
- namespaces
- pods
- services
verbs:
- get
- list
- watch

View File

@ -0,0 +1,18 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
rbac.authorization.k8s.io/aggregate-to-admin: "true"
rbac.authorization.k8s.io/aggregate-to-edit: "true"
rbac.authorization.k8s.io/aggregate-to-view: "true"
name: system:aggregated-metrics-reader
rules:
- apiGroups:
- metrics.k8s.io
resources:
- pods
- nodes
verbs:
- get
- list
- watch

View File

@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus-adapter
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus-adapter
subjects:
- kind: ServiceAccount
name: prometheus-adapter
namespace: monitoring

View File

@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: resource-metrics:system:auth-delegator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:auth-delegator
subjects:
- kind: ServiceAccount
name: prometheus-adapter
namespace: monitoring

View File

@ -0,0 +1,11 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: resource-metrics-server-resources
rules:
- apiGroups:
- metrics.k8s.io
resources:
- '*'
verbs:
- '*'

View File

@ -0,0 +1,33 @@
apiVersion: v1
data:
config.yaml: |
resourceRules:
cpu:
containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}[5m])) by (<<.GroupBy>>)
nodeQuery: sum(1 - rate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)
resources:
overrides:
node:
resource: node
namespace:
resource: namespace
pod:
resource: pod
containerLabel: container
memory:
containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>)
nodeQuery: sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>)
resources:
overrides:
instance:
resource: node
namespace:
resource: namespace
pod:
resource: pod
containerLabel: container
window: 5m
kind: ConfigMap
metadata:
name: adapter-config
namespace: monitoring

View File

@ -0,0 +1,52 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-adapter
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
name: prometheus-adapter
strategy:
rollingUpdate:
maxSurge: 1
maxUnavailable: 0
template:
metadata:
labels:
name: prometheus-adapter
spec:
containers:
- args:
- --cert-dir=/var/run/serving-cert
- --config=/etc/adapter/config.yaml
- --logtostderr=true
- --metrics-relist-interval=1m
- --prometheus-url=http://prometheus-k8s.monitoring.svc:9090/
- --secure-port=6443
image: quay.io/coreos/k8s-prometheus-adapter-amd64:v0.5.0
name: prometheus-adapter
ports:
- containerPort: 6443
volumeMounts:
- mountPath: /tmp
name: tmpfs
readOnly: false
- mountPath: /var/run/serving-cert
name: volume-serving-cert
readOnly: false
- mountPath: /etc/adapter
name: config
readOnly: false
nodeSelector:
kubernetes.io/os: linux
serviceAccountName: prometheus-adapter
volumes:
- emptyDir: {}
name: tmpfs
- emptyDir: {}
name: volume-serving-cert
- configMap:
name: adapter-config
name: config

View File

@ -0,0 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: resource-metrics-auth-reader
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
name: prometheus-adapter
namespace: monitoring

View File

@ -0,0 +1,14 @@
apiVersion: v1
kind: Service
metadata:
labels:
name: prometheus-adapter
name: prometheus-adapter
namespace: monitoring
spec:
ports:
- name: https
port: 443
targetPort: 6443
selector:
name: prometheus-adapter

View File

@ -0,0 +1,5 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus-adapter
namespace: monitoring

View File

@ -0,0 +1,15 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus-k8s
rules:
- apiGroups:
- ""
resources:
- nodes/metrics
verbs:
- get
- nonResourceURLs:
- /metrics
verbs:
- get

View File

@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus-k8s
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring

View File

@ -0,0 +1,18 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.36.0
name: prometheus-operator
namespace: monitoring
spec:
endpoints:
- honorLabels: true
port: http
selector:
matchLabels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.36.0

View File

@ -0,0 +1,34 @@
apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
labels:
prometheus: k8s
name: k8s
namespace: monitoring
spec:
alerting:
alertmanagers:
- name: alertmanager-main
namespace: monitoring
port: web
baseImage: quay.io/prometheus/prometheus
nodeSelector:
kubernetes.io/os: linux
podMonitorNamespaceSelector: {}
podMonitorSelector: {}
replicas: 2
resources:
requests:
memory: 400Mi
ruleSelector:
matchLabels:
prometheus: k8s
role: alert-rules
securityContext:
fsGroup: 2000
runAsNonRoot: true
runAsUser: 1000
serviceAccountName: prometheus-k8s
serviceMonitorNamespaceSelector: {}
serviceMonitorSelector: {}
version: v2.15.2

View File

@ -0,0 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prometheus-k8s-config
namespace: monitoring
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s-config
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring

View File

@ -0,0 +1,42 @@
apiVersion: rbac.authorization.k8s.io/v1
items:
- apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prometheus-k8s
namespace: default
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring
- apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prometheus-k8s
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring
- apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prometheus-k8s
namespace: monitoring
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring
kind: RoleBindingList

View File

@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-k8s-config
namespace: monitoring
rules:
- apiGroups:
- ""
resources:
- configmaps
verbs:
- get

View File

@ -0,0 +1,51 @@
apiVersion: rbac.authorization.k8s.io/v1
items:
- apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-k8s
namespace: default
rules:
- apiGroups:
- ""
resources:
- services
- endpoints
- pods
verbs:
- get
- list
- watch
- apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-k8s
namespace: kube-system
rules:
- apiGroups:
- ""
resources:
- services
- endpoints
- pods
verbs:
- get
- list
- watch
- apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-k8s
namespace: monitoring
rules:
- apiGroups:
- ""
resources:
- services
- endpoints
- pods
verbs:
- get
- list
- watch
kind: RoleList

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,16 @@
apiVersion: v1
kind: Service
metadata:
labels:
prometheus: k8s
name: prometheus-k8s
namespace: monitoring
spec:
ports:
- name: web
port: 9090
targetPort: web
selector:
app: prometheus
prometheus: k8s
sessionAffinity: ClientIP

View File

@ -0,0 +1,5 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus-k8s
namespace: monitoring

View File

@ -0,0 +1,14 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
k8s-app: prometheus
name: prometheus
namespace: monitoring
spec:
endpoints:
- interval: 30s
port: web
selector:
matchLabels:
prometheus: k8s

View File

@ -0,0 +1,74 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
k8s-app: apiserver
name: kube-apiserver
namespace: monitoring
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 30s
metricRelabelings:
- action: drop
regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
sourceLabels:
- __name__
- action: drop
regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
sourceLabels:
- __name__
- action: drop
regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
sourceLabels:
- __name__
- action: drop
regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
sourceLabels:
- __name__
- action: drop
regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
sourceLabels:
- __name__
- action: drop
regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
sourceLabels:
- __name__
- action: drop
regex: transformation_(transformation_latencies_microseconds|failures_total)
sourceLabels:
- __name__
- action: drop
regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
sourceLabels:
- __name__
- action: drop
regex: etcd_(debugging|disk|request|server).*
sourceLabels:
- __name__
- action: drop
regex: apiserver_admission_controller_admission_latencies_seconds_.*
sourceLabels:
- __name__
- action: drop
regex: apiserver_admission_step_admission_latencies_seconds_.*
sourceLabels:
- __name__
- action: drop
regex: apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)
sourceLabels:
- __name__
- le
port: https
scheme: https
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
serverName: kubernetes
jobLabel: component
namespaceSelector:
matchNames:
- default
selector:
matchLabels:
component: apiserver
provider: kubernetes

View File

@ -0,0 +1,19 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
k8s-app: coredns
name: coredns
namespace: monitoring
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 15s
port: metrics
jobLabel: k8s-app
namespaceSelector:
matchNames:
- kube-system
selector:
matchLabels:
k8s-app: kube-dns

View File

@ -0,0 +1,55 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
k8s-app: kube-controller-manager
name: kube-controller-manager
namespace: monitoring
spec:
endpoints:
- interval: 30s
metricRelabelings:
- action: drop
regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
sourceLabels:
- __name__
- action: drop
regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
sourceLabels:
- __name__
- action: drop
regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
sourceLabels:
- __name__
- action: drop
regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
sourceLabels:
- __name__
- action: drop
regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
sourceLabels:
- __name__
- action: drop
regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
sourceLabels:
- __name__
- action: drop
regex: transformation_(transformation_latencies_microseconds|failures_total)
sourceLabels:
- __name__
- action: drop
regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
sourceLabels:
- __name__
- action: drop
regex: etcd_(debugging|disk|request|server).*
sourceLabels:
- __name__
port: http-metrics
jobLabel: k8s-app
namespaceSelector:
matchNames:
- kube-system
selector:
matchLabels:
k8s-app: kube-controller-manager

View File

@ -0,0 +1,18 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
k8s-app: kube-scheduler
name: kube-scheduler
namespace: monitoring
spec:
endpoints:
- interval: 30s
port: http-metrics
jobLabel: k8s-app
namespaceSelector:
matchNames:
- kube-system
selector:
matchLabels:
k8s-app: kube-scheduler

View File

@ -0,0 +1,77 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
k8s-app: kubelet
name: kubelet
namespace: monitoring
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
honorLabels: true
interval: 30s
metricRelabelings:
- action: drop
regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
sourceLabels:
- __name__
- action: drop
regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
sourceLabels:
- __name__
- action: drop
regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
sourceLabels:
- __name__
- action: drop
regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
sourceLabels:
- __name__
- action: drop
regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
sourceLabels:
- __name__
- action: drop
regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
sourceLabels:
- __name__
- action: drop
regex: transformation_(transformation_latencies_microseconds|failures_total)
sourceLabels:
- __name__
- action: drop
regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
sourceLabels:
- __name__
port: https-metrics
relabelings:
- sourceLabels:
- __metrics_path__
targetLabel: metrics_path
scheme: https
tlsConfig:
insecureSkipVerify: true
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
honorLabels: true
interval: 30s
metricRelabelings:
- action: drop
regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)
sourceLabels:
- __name__
path: /metrics/cadvisor
port: https-metrics
relabelings:
- sourceLabels:
- __metrics_path__
targetLabel: metrics_path
scheme: https
tlsConfig:
insecureSkipVerify: true
jobLabel: k8s-app
namespaceSelector:
matchNames:
- kube-system
selector:
matchLabels:
k8s-app: kubelet

View File

@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: monitoring

View File

@ -0,0 +1,265 @@
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.2.4
creationTimestamp: null
name: podmonitors.monitoring.coreos.com
spec:
group: monitoring.coreos.com
names:
kind: PodMonitor
listKind: PodMonitorList
plural: podmonitors
singular: podmonitor
scope: Namespaced
validation:
openAPIV3Schema:
description: PodMonitor defines monitoring for a set of pods.
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: Specification of desired Pod selection for target discovery
by Prometheus.
properties:
jobLabel:
description: The label to use to retrieve the job name from.
type: string
namespaceSelector:
description: Selector to select which namespaces the Endpoints objects
are discovered from.
properties:
any:
description: Boolean describing whether all namespaces are selected
in contrast to a list restricting them.
type: boolean
matchNames:
description: List of namespace names.
items:
type: string
type: array
type: object
podMetricsEndpoints:
description: A list of endpoints allowed as part of this PodMonitor.
items:
description: PodMetricsEndpoint defines a scrapeable endpoint of a
Kubernetes Pod serving Prometheus metrics.
properties:
honorLabels:
description: HonorLabels chooses the metric's labels on collisions
with target labels.
type: boolean
honorTimestamps:
description: HonorTimestamps controls whether Prometheus respects
the timestamps present in scraped data.
type: boolean
interval:
description: Interval at which metrics should be scraped
type: string
metricRelabelings:
description: MetricRelabelConfigs to apply to samples before ingestion.
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It defines
`<metric_relabel_configs>`-section of Prometheus configuration.
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
description: Action to perform based on regex matching.
Default is 'replace'
type: string
modulus:
description: Modulus to take of the hash of the source label
values.
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
type: string
replacement:
description: Replacement value against which a regex replace
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular expression
for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string
type: object
type: array
params:
additionalProperties:
items:
type: string
type: array
description: Optional HTTP URL parameters
type: object
path:
description: HTTP path to scrape for metrics.
type: string
port:
description: Name of the port this endpoint refers to. Mutually
exclusive with targetPort.
type: string
proxyUrl:
description: ProxyURL eg http://proxyserver:2195 Directs scrapes
to proxy through this endpoint.
type: string
relabelings:
description: 'RelabelConfigs to apply to samples before ingestion.
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It defines
`<metric_relabel_configs>`-section of Prometheus configuration.
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
description: Action to perform based on regex matching.
Default is 'replace'
type: string
modulus:
description: Modulus to take of the hash of the source label
values.
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
type: string
replacement:
description: Replacement value against which a regex replace
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular expression
for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string
type: object
type: array
scheme:
description: HTTP scheme to use for scraping.
type: string
scrapeTimeout:
description: Timeout after which the scrape is ended
type: string
targetPort:
anyOf:
- type: integer
- type: string
description: Name or number of the target port of the endpoint.
Mutually exclusive with port.
x-kubernetes-int-or-string: true
type: object
type: array
podTargetLabels:
description: PodTargetLabels transfers labels on the Kubernetes Pod
onto the target.
items:
type: string
type: array
sampleLimit:
description: SampleLimit defines per-scrape limit on number of scraped
samples that will be accepted.
format: int64
type: integer
selector:
description: Selector to select Pod objects.
properties:
matchExpressions:
description: matchExpressions is a list of label selector requirements.
The requirements are ANDed.
items:
description: A label selector requirement is a selector that contains
values, a key, and an operator that relates the key and values.
properties:
key:
description: key is the label key that the selector applies
to.
type: string
operator:
description: operator represents a key's relationship to a
set of values. Valid operators are In, NotIn, Exists and
DoesNotExist.
type: string
values:
description: values is an array of string values. If the operator
is In or NotIn, the values array must be non-empty. If the
operator is Exists or DoesNotExist, the values array must
be empty. This array is replaced during a strategic merge
patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value} pairs. A single
{key,value} in the matchLabels map is equivalent to an element
of matchExpressions, whose key field is "key", the operator is
"In", and the values array contains only "value". The requirements
are ANDed.
type: object
type: object
required:
- podMetricsEndpoints
- selector
type: object
required:
- spec
type: object
version: v1
versions:
- name: v1
served: true
storage: true
status:
acceptedNames:
kind: ""
plural: ""
conditions: []
storedVersions: []

View File

@ -0,0 +1,95 @@
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.2.4
creationTimestamp: null
name: prometheusrules.monitoring.coreos.com
spec:
group: monitoring.coreos.com
names:
kind: PrometheusRule
listKind: PrometheusRuleList
plural: prometheusrules
singular: prometheusrule
scope: Namespaced
validation:
openAPIV3Schema:
description: PrometheusRule defines alerting rules for a Prometheus instance
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: Specification of desired alerting rule definitions for Prometheus.
properties:
groups:
description: Content of Prometheus rule file
items:
description: 'RuleGroup is a list of sequentially evaluated recording
and alerting rules. Note: PartialResponseStrategy is only used by
ThanosRuler and will be ignored by Prometheus instances. Valid
values for this field are ''warn'' or ''abort''. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response'
properties:
interval:
type: string
name:
type: string
partial_response_strategy:
type: string
rules:
items:
description: Rule describes an alerting or recording rule.
properties:
alert:
type: string
annotations:
additionalProperties:
type: string
type: object
expr:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
for:
type: string
labels:
additionalProperties:
type: string
type: object
record:
type: string
required:
- expr
type: object
type: array
required:
- name
- rules
type: object
type: array
type: object
required:
- spec
type: object
version: v1
versions:
- name: v1
served: true
storage: true
status:
acceptedNames:
kind: ""
plural: ""
conditions: []
storedVersions: []

View File

@ -0,0 +1,463 @@
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.2.4
creationTimestamp: null
name: servicemonitors.monitoring.coreos.com
spec:
group: monitoring.coreos.com
names:
kind: ServiceMonitor
listKind: ServiceMonitorList
plural: servicemonitors
singular: servicemonitor
scope: Namespaced
validation:
openAPIV3Schema:
description: ServiceMonitor defines monitoring for a set of services.
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: Specification of desired Service selection for target discovery
by Prometheus.
properties:
endpoints:
description: A list of endpoints allowed as part of this ServiceMonitor.
items:
description: Endpoint defines a scrapeable endpoint serving Prometheus
metrics.
properties:
basicAuth:
description: 'BasicAuth allow an endpoint to authenticate over
basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints'
properties:
password:
description: The secret in the service monitor namespace that
contains the password for authentication.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
username:
description: The secret in the service monitor namespace that
contains the username for authentication.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
type: object
bearerTokenFile:
description: File to read bearer token for scraping targets.
type: string
bearerTokenSecret:
description: Secret to mount to read bearer token for scraping
targets. The secret needs to be in the same namespace as the
service monitor and accessible by the Prometheus Operator.
properties:
key:
description: The key of the secret to select from. Must be
a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must be
defined
type: boolean
required:
- key
type: object
honorLabels:
description: HonorLabels chooses the metric's labels on collisions
with target labels.
type: boolean
honorTimestamps:
description: HonorTimestamps controls whether Prometheus respects
the timestamps present in scraped data.
type: boolean
interval:
description: Interval at which metrics should be scraped
type: string
metricRelabelings:
description: MetricRelabelConfigs to apply to samples before ingestion.
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It defines
`<metric_relabel_configs>`-section of Prometheus configuration.
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
description: Action to perform based on regex matching.
Default is 'replace'
type: string
modulus:
description: Modulus to take of the hash of the source label
values.
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
type: string
replacement:
description: Replacement value against which a regex replace
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular expression
for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string
type: object
type: array
params:
additionalProperties:
items:
type: string
type: array
description: Optional HTTP URL parameters
type: object
path:
description: HTTP path to scrape for metrics.
type: string
port:
description: Name of the service port this endpoint refers to.
Mutually exclusive with targetPort.
type: string
proxyUrl:
description: ProxyURL eg http://proxyserver:2195 Directs scrapes
to proxy through this endpoint.
type: string
relabelings:
description: 'RelabelConfigs to apply to samples before scraping.
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It defines
`<metric_relabel_configs>`-section of Prometheus configuration.
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
description: Action to perform based on regex matching.
Default is 'replace'
type: string
modulus:
description: Modulus to take of the hash of the source label
values.
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
type: string
replacement:
description: Replacement value against which a regex replace
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular expression
for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string
type: object
type: array
scheme:
description: HTTP scheme to use for scraping.
type: string
scrapeTimeout:
description: Timeout after which the scrape is ended
type: string
targetPort:
anyOf:
- type: integer
- type: string
description: Name or number of the target port of the endpoint.
Mutually exclusive with port.
x-kubernetes-int-or-string: true
tlsConfig:
description: TLS configuration to use when scraping the endpoint
properties:
ca:
description: Stuct containing the CA cert to use for the targets.
properties:
configMap:
description: ConfigMap containing data to use for the
targets.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
secret:
description: Secret containing data to use for the targets.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
caFile:
description: Path to the CA cert in the Prometheus container
to use for the targets.
type: string
cert:
description: Struct containing the client cert file for the
targets.
properties:
configMap:
description: ConfigMap containing data to use for the
targets.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the ConfigMap or its
key must be defined
type: boolean
required:
- key
type: object
secret:
description: Secret containing data to use for the targets.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
type: object
certFile:
description: Path to the client cert file in the Prometheus
container for the targets.
type: string
insecureSkipVerify:
description: Disable target certificate validation.
type: boolean
keyFile:
description: Path to the client key file in the Prometheus
container for the targets.
type: string
keySecret:
description: Secret containing the client key file for the
targets.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
serverName:
description: Used to verify the hostname for the targets.
type: string
type: object
type: object
type: array
jobLabel:
description: The label to use to retrieve the job name from.
type: string
namespaceSelector:
description: Selector to select which namespaces the Endpoints objects
are discovered from.
properties:
any:
description: Boolean describing whether all namespaces are selected
in contrast to a list restricting them.
type: boolean
matchNames:
description: List of namespace names.
items:
type: string
type: array
type: object
podTargetLabels:
description: PodTargetLabels transfers labels on the Kubernetes Pod
onto the target.
items:
type: string
type: array
sampleLimit:
description: SampleLimit defines per-scrape limit on number of scraped
samples that will be accepted.
format: int64
type: integer
selector:
description: Selector to select Endpoints objects.
properties:
matchExpressions:
description: matchExpressions is a list of label selector requirements.
The requirements are ANDed.
items:
description: A label selector requirement is a selector that contains
values, a key, and an operator that relates the key and values.
properties:
key:
description: key is the label key that the selector applies
to.
type: string
operator:
description: operator represents a key's relationship to a
set of values. Valid operators are In, NotIn, Exists and
DoesNotExist.
type: string
values:
description: values is an array of string values. If the operator
is In or NotIn, the values array must be non-empty. If the
operator is Exists or DoesNotExist, the values array must
be empty. This array is replaced during a strategic merge
patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value} pairs. A single
{key,value} in the matchLabels map is equivalent to an element
of matchExpressions, whose key field is "key", the operator is
"In", and the values array contains only "value". The requirements
are ANDed.
type: object
type: object
targetLabels:
description: TargetLabels transfers labels on the Kubernetes Service
onto the target.
items:
type: string
type: array
required:
- endpoints
- selector
type: object
required:
- spec
type: object
version: v1
versions:
- name: v1
served: true
storage: true
status:
acceptedNames:
kind: ""
plural: ""
conditions: []
storedVersions: []

View File

@ -0,0 +1,89 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.36.0
name: prometheus-operator
rules:
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- create
- apiGroups:
- apiextensions.k8s.io
resourceNames:
- alertmanagers.monitoring.coreos.com
- podmonitors.monitoring.coreos.com
- prometheuses.monitoring.coreos.com
- prometheusrules.monitoring.coreos.com
- servicemonitors.monitoring.coreos.com
- thanosrulers.monitoring.coreos.com
resources:
- customresourcedefinitions
verbs:
- get
- update
- apiGroups:
- monitoring.coreos.com
resources:
- alertmanagers
- alertmanagers/finalizers
- prometheuses
- prometheuses/finalizers
- thanosrulers
- thanosrulers/finalizers
- servicemonitors
- podmonitors
- prometheusrules
verbs:
- '*'
- apiGroups:
- apps
resources:
- statefulsets
verbs:
- '*'
- apiGroups:
- ""
resources:
- configmaps
- secrets
verbs:
- '*'
- apiGroups:
- ""
resources:
- pods
verbs:
- list
- delete
- apiGroups:
- ""
resources:
- services
- services/finalizers
- endpoints
verbs:
- get
- create
- update
- delete
- apiGroups:
- ""
resources:
- nodes
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- namespaces
verbs:
- get
- list
- watch

View File

@ -0,0 +1,16 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.36.0
name: prometheus-operator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus-operator
subjects:
- kind: ServiceAccount
name: prometheus-operator
namespace: monitoring

View File

@ -0,0 +1,48 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.36.0
name: prometheus-operator
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
template:
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.36.0
spec:
containers:
- args:
- --kubelet-service=kube-system/kubelet
- --logtostderr=true
- --config-reloader-image=jimmidyson/configmap-reload:v0.3.0
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.36.0
image: quay.io/coreos/prometheus-operator:v0.36.0
name: prometheus-operator
ports:
- containerPort: 8080
name: http
resources:
limits:
cpu: 200m
memory: 200Mi
requests:
cpu: 100m
memory: 100Mi
securityContext:
allowPrivilegeEscalation: false
nodeSelector:
beta.kubernetes.io/os: linux
securityContext:
runAsNonRoot: true
runAsUser: 65534
serviceAccountName: prometheus-operator

View File

@ -0,0 +1,18 @@
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.36.0
name: prometheus-operator
namespace: monitoring
spec:
clusterIP: None
ports:
- name: http
port: 8080
targetPort: http
selector:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator

View File

@ -0,0 +1,9 @@
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.36.0
name: prometheus-operator
namespace: monitoring

View File

@ -0,0 +1,28 @@
local kp =
(import 'kube-prometheus/kube-prometheus.libsonnet') +
// Uncomment the following imports to enable its patches
// (import 'kube-prometheus/kube-prometheus-anti-affinity.libsonnet') +
// (import 'kube-prometheus/kube-prometheus-managed-cluster.libsonnet') +
// (import 'kube-prometheus/kube-prometheus-node-ports.libsonnet') +
// (import 'kube-prometheus/kube-prometheus-static-etcd.libsonnet') +
// (import 'kube-prometheus/kube-prometheus-thanos-sidecar.libsonnet') +
{
_config+:: {
namespace: 'monitoring',
},
};
{ ['setup/0namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{
['setup/prometheus-operator-' + name]: kp.prometheusOperator[name]
for name in std.filter((function(name) name != 'serviceMonitor'), std.objectFields(kp.prometheusOperator))
} +
// serviceMonitor is separated so that it can be created after the CRDs are ready
{ 'prometheus-operator-serviceMonitor': kp.prometheusOperator.serviceMonitor } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

25
monitoring/vendor/etcd-mixin/README.md vendored Normal file
View File

@ -0,0 +1,25 @@
# Prometheus Monitoring Mixin for etcd
> NOTE: This project is *alpha* stage. Flags, configuration, behaviour and design may change significantly in following releases.
A set of customisable Prometheus alerts for etcd.
Instructions for use are the same as the [kubernetes-mixin](https://github.com/kubernetes-monitoring/kubernetes-mixin).
## Background
* For more information about monitoring mixins, see this [design doc](https://docs.google.com/document/d/1A9xvzwqnFVSOZ5fD3blKODXfsat5fg6ZhnKu9LK3lB4/edit#).
## Testing alerts
Make sure to have [jsonnet](https://jsonnet.org/) and [gojsontoyaml](https://github.com/brancz/gojsontoyaml) installed.
First compile the mixin to a YAML file, which the promtool will read:
```
jsonnet -e '(import "mixin.libsonnet").prometheusAlerts' | gojsontoyaml > mixin.yaml
```
Then run the unit test:
```
promtool test rules test.yaml
```

File diff suppressed because it is too large Load Diff

115
monitoring/vendor/etcd-mixin/test.yaml vendored Normal file
View File

@ -0,0 +1,115 @@
rule_files:
- mixin.yaml
evaluation_interval: 1m
tests:
- interval: 1m
input_series:
- series: 'up{job="etcd",instance="10.10.10.0"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0'
- series: 'up{job="etcd",instance="10.10.10.1"}'
values: '1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0'
- series: 'up{job="etcd",instance="10.10.10.2"}'
values: '1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0'
alert_rule_test:
- eval_time: 3m
alertname: etcdInsufficientMembers
- eval_time: 5m
alertname: etcdInsufficientMembers
- eval_time: 5m
alertname: etcdMembersDown
- eval_time: 7m
alertname: etcdMembersDown
exp_alerts:
- exp_labels:
job: etcd
severity: critical
exp_annotations:
message: 'etcd cluster "etcd": members are down (1).'
- eval_time: 7m
alertname: etcdInsufficientMembers
- eval_time: 11m
alertname: etcdInsufficientMembers
exp_alerts:
- exp_labels:
job: etcd
severity: critical
exp_annotations:
message: 'etcd cluster "etcd": insufficient members (1).'
- eval_time: 15m
alertname: etcdInsufficientMembers
exp_alerts:
- exp_labels:
job: etcd
severity: critical
exp_annotations:
message: 'etcd cluster "etcd": insufficient members (0).'
- interval: 1m
input_series:
- series: 'up{job="etcd",instance="10.10.10.0"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0'
- series: 'up{job="etcd",instance="10.10.10.1"}'
values: '1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0'
- series: 'up{job="etcd",instance="10.10.10.2"}'
values: '1 1 1 1 0 0 0 0'
alert_rule_test:
- eval_time: 10m
alertname: etcdMembersDown
exp_alerts:
- exp_labels:
job: etcd
severity: critical
exp_annotations:
message: 'etcd cluster "etcd": members are down (2).'
- interval: 1m
input_series:
- series: 'up{job="etcd",instance="10.10.10.0"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0'
- series: 'up{job="etcd",instance="10.10.10.1"}'
values: '1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0'
- series: 'etcd_network_peer_sent_failures_total{To="member-1",job="etcd",endpoint="test"}'
values: '0 0 1 2 3 4 5 6 7 8 9 10'
alert_rule_test:
- eval_time: 4m
alertname: etcdMembersDown
- eval_time: 6m
alertname: etcdMembersDown
exp_alerts:
- exp_labels:
job: etcd
severity: critical
exp_annotations:
message: 'etcd cluster "etcd": members are down (1).'
- interval: 1m
input_series:
- series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.0"}'
values: '0 0 2 0 0 1 0 0 0 0 0 0 0 0 0 0'
- series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.1"}'
values: '0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0'
- series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.2"}'
values: '0 0 0 0 0 0 0 0'
alert_rule_test:
- eval_time: 10m
alertname: etcdHighNumberOfLeaderChanges
exp_alerts:
- exp_labels:
job: etcd
severity: warning
exp_annotations:
message: 'etcd cluster "etcd": 3 leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.'
- interval: 1m
input_series:
- series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.0"}'
values: '0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0'
- series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.1"}'
values: '0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0'
- series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.2"}'
values: '0 0 0 0 0 0 0 0'
alert_rule_test:
- eval_time: 10m
alertname: etcdHighNumberOfLeaderChanges
exp_alerts:

View File

@ -0,0 +1,418 @@
{
dashboard(title, uid=''):: {
// Stuff that isn't materialised.
_nextPanel:: 1,
addRow(row):: self {
// automatically number panels in added rows.
local n = std.length(row.panels),
local nextPanel = super._nextPanel,
local panels = std.makeArray(n, function(i)
row.panels[i] { id: nextPanel + i }),
_nextPanel: nextPanel + n,
rows+: [row { panels: panels }],
},
addTemplate(name, metric_name, label_name, hide=0):: self {
templating+: {
list+: [{
allValue: null,
current: {
text: 'prod',
value: 'prod',
},
datasource: '$datasource',
hide: hide,
includeAll: false,
label: name,
multi: false,
name: name,
options: [],
query: 'label_values(%s, %s)' % [metric_name, label_name],
refresh: 1,
regex: '',
sort: 2,
tagValuesQuery: '',
tags: [],
tagsQuery: '',
type: 'query',
useTags: false,
}],
},
},
addMultiTemplate(name, metric_name, label_name, hide=0):: self {
templating+: {
list+: [{
allValue: null,
current: {
selected: true,
text: 'All',
value: '$__all',
},
datasource: '$datasource',
hide: hide,
includeAll: true,
label: name,
multi: true,
name: name,
options: [],
query: 'label_values(%s, %s)' % [metric_name, label_name],
refresh: 1,
regex: '',
sort: 2,
tagValuesQuery: '',
tags: [],
tagsQuery: '',
type: 'query',
useTags: false,
}],
},
},
// Stuff that is materialised.
uid: uid,
annotations: {
list: [],
},
hideControls: false,
links: [],
rows: [],
schemaVersion: 14,
style: 'dark',
tags: [],
editable: true,
gnetId: null,
graphTooltip: 0,
templating: {
list: [
{
current: {
text: 'default',
value: 'default',
},
hide: 0,
label: null,
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: '',
type: 'datasource',
},
],
},
time: {
from: 'now-1h',
to: 'now',
},
refresh: '10s',
timepicker: {
refresh_intervals: [
'5s',
'10s',
'30s',
'1m',
'5m',
'15m',
'30m',
'1h',
'2h',
'1d',
],
time_options: [
'5m',
'15m',
'1h',
'6h',
'12h',
'24h',
'2d',
'7d',
'30d',
],
},
timezone: 'utc',
title: title,
version: 0,
},
row(title):: {
_panels:: [],
addPanel(panel):: self {
_panels+: [panel],
},
panels:
// Automatically distribute panels within a row.
local n = std.length(self._panels);
[
p { span: std.floor(12 / n) }
for p in self._panels
],
collapse: false,
height: '250px',
repeat: null,
repeatIteration: null,
repeatRowId: null,
showTitle: true,
title: title,
titleSize: 'h6',
},
panel(title):: {
aliasColors: {},
bars: false,
dashLength: 10,
dashes: false,
datasource: '$datasource',
fill: 1,
legend: {
avg: false,
current: false,
max: false,
min: false,
show: true,
total: false,
values: false,
},
lines: true,
linewidth: 1,
links: [],
nullPointMode: 'null as zero',
percentage: false,
pointradius: 5,
points: false,
renderer: 'flot',
seriesOverrides: [],
spaceLength: 10,
span: 6,
stack: false,
steppedLine: false,
targets: [],
thresholds: [],
timeFrom: null,
timeShift: null,
title: title,
tooltip: {
shared: true,
sort: 0,
value_type: 'individual',
},
type: 'graph',
xaxis: {
buckets: null,
mode: 'time',
name: null,
show: true,
values: [],
},
yaxes: $.yaxes('short'),
},
queryPanel(queries, legends, legendLink=null):: {
local qs =
if std.type(queries) == 'string'
then [queries]
else queries,
local ls =
if std.type(legends) == 'string'
then [legends]
else legends,
local qsandls = if std.length(ls) == std.length(qs)
then std.makeArray(std.length(qs), function(x) { q: qs[x], l: ls[x] })
else error 'length of queries is not equal to length of legends',
targets+: [
{
legendLink: legendLink,
expr: ql.q,
format: 'time_series',
intervalFactor: 2,
legendFormat: ql.l,
step: 10,
}
for ql in qsandls
],
},
statPanel(query, format='percentunit'):: {
type: 'singlestat',
thresholds: '70,80',
format: format,
targets: [
{
expr: query,
format: 'time_series',
instant: true,
intervalFactor: 2,
refId: 'A',
},
],
},
tablePanel(queries, labelStyles):: {
local qs =
if std.type(queries) == 'string'
then [queries]
else queries,
local style(labelStyle) =
if std.type(labelStyle) == 'string'
then {
alias: labelStyle,
colorMode: null,
colors: [],
dateFormat: 'YYYY-MM-DD HH:mm:ss',
decimals: 2,
thresholds: [],
type: 'string',
unit: 'short',
}
else {
alias: labelStyle.alias,
colorMode: null,
colors: [],
dateFormat: 'YYYY-MM-DD HH:mm:ss',
decimals: if std.objectHas(labelStyle, 'decimals') then labelStyle.decimals else 2,
thresholds: [],
type: if std.objectHas(labelStyle, 'type') then labelStyle.type else 'number',
unit: if std.objectHas(labelStyle, 'unit') then labelStyle.unit else 'short',
link: std.objectHas(labelStyle, 'link'),
linkTooltip: if std.objectHas(labelStyle, 'linkTooltip') then labelStyle.linkTooltip else 'Drill down',
linkUrl: if std.objectHas(labelStyle, 'link') then labelStyle.link else '',
},
_styles:: {
// By default hide time.
Time: {
alias: 'Time',
dateFormat: 'YYYY-MM-DD HH:mm:ss',
type: 'hidden',
},
} + {
[label]: style(labelStyles[label])
for label in std.objectFields(labelStyles)
},
styles: [
self._styles[pattern] { pattern: pattern }
for pattern in std.objectFields(self._styles)
] + [style('') + { pattern: '/.*/' }],
transform: 'table',
type: 'table',
targets: [
{
expr: qs[i],
format: 'table',
instant: true,
intervalFactor: 2,
legendFormat: '',
step: 10,
refId: std.char(65 + i),
}
for i in std.range(0, std.length(qs) - 1)
],
},
stack:: {
stack: true,
fill: 10,
linewidth: 0,
},
yaxes(args)::
local format = if std.type(args) == 'string' then args else null;
local options = if std.type(args) == 'object' then args else {};
[
{
format: format,
label: null,
logBase: 1,
max: null,
min: 0,
show: true,
} + options,
{
format: 'short',
label: null,
logBase: 1,
max: null,
min: null,
show: false,
},
],
qpsPanel(selector):: {
aliasColors: {
'1xx': '#EAB839',
'2xx': '#7EB26D',
'3xx': '#6ED0E0',
'4xx': '#EF843C',
'5xx': '#E24D42',
success: '#7EB26D',
'error': '#E24D42',
},
targets: [
{
expr: 'sum by (status) (label_replace(label_replace(rate(' + selector + '[$__interval]),'
+ ' "status", "${1}xx", "status_code", "([0-9]).."),'
+ ' "status", "${1}", "status_code", "([a-z]+)"))',
format: 'time_series',
intervalFactor: 2,
legendFormat: '{{status}}',
refId: 'A',
step: 10,
},
],
} + $.stack,
latencyPanel(metricName, selector, multiplier='1e3'):: {
nullPointMode: 'null as zero',
targets: [
{
expr: 'histogram_quantile(0.99, sum(rate(%s_bucket%s[$__interval])) by (le)) * %s' % [metricName, selector, multiplier],
format: 'time_series',
intervalFactor: 2,
legendFormat: '99th Percentile',
refId: 'A',
step: 10,
},
{
expr: 'histogram_quantile(0.50, sum(rate(%s_bucket%s[$__interval])) by (le)) * %s' % [metricName, selector, multiplier],
format: 'time_series',
intervalFactor: 2,
legendFormat: '50th Percentile',
refId: 'B',
step: 10,
},
{
expr: 'sum(rate(%s_sum%s[$__interval])) * %s / sum(rate(%s_count%s[$__interval]))' % [metricName, selector, multiplier, metricName, selector],
format: 'time_series',
intervalFactor: 2,
legendFormat: 'Average',
refId: 'C',
step: 10,
},
],
yaxes: $.yaxes('ms'),
},
selector:: {
eq(label, value):: { label: label, op: '=', value: value },
neq(label, value):: { label: label, op: '!=', value: value },
re(label, value):: { label: label, op: '=~', value: value },
nre(label, value):: { label: label, op: '!~', value: value },
},
toPrometheusSelector(selector)::
local pairs = [
'%(label)s%(op)s"%(value)s"' % matcher
for matcher in selector
];
'{%s}' % std.join(', ', pairs),
}

View File

@ -0,0 +1,14 @@
{
apiVersion: 1,
providers: [
{
name: '0',
orgId: 1,
folder: '',
type: 'file',
options: {
path: '/grafana-dashboard-definitions/0',
},
},
],
}

View File

@ -0,0 +1,182 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
_config+:: {
namespace: 'default',
versions+:: {
grafana: '6.4.3',
},
imageRepos+:: {
grafana: 'grafana/grafana',
},
prometheus+:: {
name: 'k8s',
serviceName: 'prometheus-' + $._config.prometheus.name,
},
grafana+:: {
dashboards: {},
rawDashboards: {},
datasources: [{
name: 'prometheus',
type: 'prometheus',
access: 'proxy',
orgId: 1,
url: 'http://' + $._config.prometheus.serviceName + '.' + $._config.namespace + '.svc:9090',
version: 1,
editable: false,
}],
config: {},
ldap: null,
plugins: [],
container: {
requests: { cpu: '100m', memory: '100Mi' },
limits: { cpu: '200m', memory: '200Mi' },
},
},
},
grafanaDashboards: {},
grafana+: {
[if std.length($._config.grafana.config) > 0 then 'config']:
local secret = k.core.v1.secret;
local grafanaConfig = { 'grafana.ini': std.base64(std.encodeUTF8(std.manifestIni($._config.grafana.config))) } +
if $._config.grafana.ldap != null then { 'ldap.toml': std.base64(std.encodeUTF8($._config.grafana.ldap)) } else {};
secret.new('grafana-config', grafanaConfig) +
secret.mixin.metadata.withNamespace($._config.namespace),
dashboardDefinitions:
local configMap = k.core.v1.configMap;
[
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', '');
configMap.new(dashboardName, { [name]: std.manifestJsonEx($._config.grafana.dashboards[name], ' ') }) +
configMap.mixin.metadata.withNamespace($._config.namespace)
for name in std.objectFields($._config.grafana.dashboards)
] + if std.length($._config.grafana.rawDashboards) > 0 then
[
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', '');
configMap.new(dashboardName, { [name]: $._config.grafana.rawDashboards[name] }) +
configMap.mixin.metadata.withNamespace($._config.namespace)
for name in std.objectFields($._config.grafana.rawDashboards)
] else [],
dashboardSources:
local configMap = k.core.v1.configMap;
local dashboardSources = import 'configs/dashboard-sources/dashboards.libsonnet';
configMap.new('grafana-dashboards', { 'dashboards.yaml': std.manifestJsonEx(dashboardSources, ' ') }) +
configMap.mixin.metadata.withNamespace($._config.namespace),
dashboardDatasources:
local secret = k.core.v1.secret;
secret.new('grafana-datasources', { 'datasources.yaml': std.base64(std.encodeUTF8(std.manifestJsonEx({
apiVersion: 1,
datasources: $._config.grafana.datasources,
}, ' '))) }) +
secret.mixin.metadata.withNamespace($._config.namespace),
service:
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local grafanaServiceNodePort = servicePort.newNamed('http', 3000, 'http');
service.new('grafana', $.grafana.deployment.spec.selector.matchLabels, grafanaServiceNodePort) +
service.mixin.metadata.withLabels({ app: 'grafana' }) +
service.mixin.metadata.withNamespace($._config.namespace),
serviceAccount:
local serviceAccount = k.core.v1.serviceAccount;
serviceAccount.new('grafana') +
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
deployment:
local deployment = k.apps.v1.deployment;
local container = k.apps.v1.deployment.mixin.spec.template.spec.containersType;
local volume = k.apps.v1.deployment.mixin.spec.template.spec.volumesType;
local containerPort = container.portsType;
local containerVolumeMount = container.volumeMountsType;
local podSelector = deployment.mixin.spec.template.spec.selectorType;
local env = container.envType;
local targetPort = 3000;
local portName = 'http';
local podLabels = { app: 'grafana' };
local configVolumeName = 'grafana-config';
local configSecretName = 'grafana-config';
local configVolume = volume.withName(configVolumeName) + volume.mixin.secret.withSecretName(configSecretName);
local configVolumeMount = containerVolumeMount.new(configVolumeName, '/etc/grafana');
local storageVolumeName = 'grafana-storage';
local storageVolume = volume.fromEmptyDir(storageVolumeName);
local storageVolumeMount = containerVolumeMount.new(storageVolumeName, '/var/lib/grafana');
local datasourcesVolumeName = 'grafana-datasources';
local datasourcesSecretName = 'grafana-datasources';
local datasourcesVolume = volume.withName(datasourcesVolumeName) + volume.mixin.secret.withSecretName(datasourcesSecretName);
local datasourcesVolumeMount = containerVolumeMount.new(datasourcesVolumeName, '/etc/grafana/provisioning/datasources');
local dashboardsVolumeName = 'grafana-dashboards';
local dashboardsConfigMapName = 'grafana-dashboards';
local dashboardsVolume = volume.withName(dashboardsVolumeName) + volume.mixin.configMap.withName(dashboardsConfigMapName);
local dashboardsVolumeMount = containerVolumeMount.new(dashboardsVolumeName, '/etc/grafana/provisioning/dashboards');
local volumeMounts =
[
storageVolumeMount,
datasourcesVolumeMount,
dashboardsVolumeMount,
] +
[
local dashboardName = std.strReplace(name, '.json', '');
containerVolumeMount.new('grafana-dashboard-' + dashboardName, '/grafana-dashboard-definitions/0/' + dashboardName)
for name in std.objectFields($._config.grafana.dashboards)
] +
[
local dashboardName = std.strReplace(name, '.json', '');
containerVolumeMount.new('grafana-dashboard-' + dashboardName, '/grafana-dashboard-definitions/0/' + dashboardName)
for name in std.objectFields($._config.grafana.rawDashboards)
] +
if std.length($._config.grafana.config) > 0 then [configVolumeMount] else [];
local volumes =
[
storageVolume,
datasourcesVolume,
dashboardsVolume,
] +
[
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', '');
volume.withName(dashboardName) +
volume.mixin.configMap.withName(dashboardName)
for name in std.objectFields($._config.grafana.dashboards)
] +
[
local dashboardName = 'grafana-dashboard-' + std.strReplace(name, '.json', '');
volume.withName(dashboardName) +
volume.mixin.configMap.withName(dashboardName)
for name in std.objectFields($._config.grafana.rawDashboards)
] +
if std.length($._config.grafana.config) > 0 then [configVolume] else [];
local c =
container.new('grafana', $._config.imageRepos.grafana + ':' + $._config.versions.grafana) +
(if std.length($._config.grafana.plugins) == 0 then {} else container.withEnv([env.new('GF_INSTALL_PLUGINS', std.join(',', $._config.grafana.plugins))])) +
container.withVolumeMounts(volumeMounts) +
container.withPorts(containerPort.newNamed(targetPort, portName)) +
container.mixin.readinessProbe.httpGet.withPath('/api/health') +
container.mixin.readinessProbe.httpGet.withPort(portName) +
container.mixin.resources.withRequests($._config.grafana.container.requests) +
container.mixin.resources.withLimits($._config.grafana.container.limits);
deployment.new('grafana', 1, c, podLabels) +
deployment.mixin.metadata.withNamespace($._config.namespace) +
deployment.mixin.metadata.withLabels(podLabels) +
deployment.mixin.spec.selector.withMatchLabels(podLabels) +
deployment.mixin.spec.template.spec.withNodeSelector({ 'beta.kubernetes.io/os': 'linux' }) +
deployment.mixin.spec.template.spec.withVolumes(volumes) +
deployment.mixin.spec.template.spec.securityContext.withRunAsNonRoot(true) +
deployment.mixin.spec.template.spec.securityContext.withRunAsUser(65534) +
deployment.mixin.spec.template.spec.withServiceAccountName('grafana'),
},
}

View File

@ -0,0 +1,24 @@
{
"dependencies": [
{
"name": "grafonnet",
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib",
"subdir": "grafonnet"
}
},
"version": "master"
},
{
"name": "ksonnet",
"source": {
"git": {
"remote": "https://github.com/ksonnet/ksonnet-lib",
"subdir": ""
}
},
"version": "master"
}
]
}

View File

@ -0,0 +1,44 @@
{
/**
* Returns a new condition of alert of graph panel.
* Currently the only condition type that exists is a Query condition
* that allows to specify a query letter, time range and an aggregation function.
*
* @param evaluatorParams Value of threshold
* @param evaluatorType Type of threshold
* @param operatorType Operator between conditions
* @param queryRefId The letter defines what query to execute from the Metrics tab
* @param queryTimeStart Begging of time range
* @param queryTimeEnd End of time range
* @param reducerParams Params of an aggregation function
* @param reducerType Name of an aggregation function
* @return A json that represents a condition of alert
*/
new(
evaluatorParams=[],
evaluatorType='gt',
operatorType='and',
queryRefId='A',
queryTimeEnd='now',
queryTimeStart='5m',
reducerParams=[],
reducerType='avg',
)::
{
evaluator: {
params: if std.type(evaluatorParams) == 'array' then evaluatorParams else [evaluatorParams],
type: evaluatorType,
},
operator: {
type: operatorType,
},
query: {
params: [queryRefId, queryTimeStart, queryTimeEnd],
},
reducer: {
params: if std.type(reducerParams) == 'array' then reducerParams else [reducerParams],
type: reducerType,
},
type: 'query',
},
}

View File

@ -0,0 +1,35 @@
{
default::
{
builtIn: 1,
datasource: '-- Grafana --',
enable: true,
hide: true,
iconColor: 'rgba(0, 211, 255, 1)',
name: 'Annotations & Alerts',
type: 'dashboard',
},
datasource(
name,
datasource,
expr=null,
enable=true,
hide=false,
iconColor='rgba(255, 96, 96, 1)',
tags=[],
type='tags',
builtIn=null,
)::
{
datasource: datasource,
enable: enable,
[if expr != null then 'expr']: expr,
hide: hide,
iconColor: iconColor,
name: name,
showIn: 0,
tags: tags,
type: type,
[if builtIn != null then 'builtIn']: builtIn,
},
}

View File

@ -0,0 +1,39 @@
{
/**
* Return a CloudWatch Target
*
* @param region
* @param namespace
* @param metric
* @param datasource
* @param statistic
* @param alias
* @param highResolution
* @param period
* @param dimensions
* @return Panel target
*/
target(
region,
namespace,
metric,
datasource=null,
statistic='Average',
alias=null,
highResolution=false,
period='1m',
dimensions={}
):: {
region: region,
namespace: namespace,
metricName: metric,
[if datasource != null then 'datasource']: datasource,
statistics: [statistic],
[if alias != null then 'alias']: alias,
highResolution: highResolution,
period: period,
dimensions: dimensions,
},
}

View File

@ -0,0 +1,147 @@
local timepickerlib = import 'timepicker.libsonnet';
{
new(
title,
editable=false,
style='dark',
tags=[],
time_from='now-6h',
time_to='now',
timezone='browser',
refresh='',
timepicker=timepickerlib.new(),
graphTooltip='default',
hideControls=false,
schemaVersion=14,
uid='',
description=null,
):: {
local it = self,
_annotations:: [],
[if uid != '' then 'uid']: uid,
editable: editable,
[if description != null then 'description']: description,
gnetId: null,
graphTooltip:
if graphTooltip == 'shared_tooltip' then 2
else if graphTooltip == 'shared_crosshair' then 1
else if graphTooltip == 'default' then 0
else graphTooltip,
hideControls: hideControls,
id: null,
links: [],
panels:: [],
refresh: refresh,
rows: [],
schemaVersion: schemaVersion,
style: style,
tags: tags,
time: {
from: time_from,
to: time_to,
},
timezone: timezone,
timepicker: timepicker,
title: title,
version: 0,
addAnnotations(annotations):: self {
_annotations+:: annotations,
},
addAnnotation(a):: self.addAnnotations([a]),
addTemplates(templates):: self {
templates+: templates,
},
addTemplate(t):: self.addTemplates([t]),
templates:: [],
annotations: { list: it._annotations },
templating: { list: it.templates },
_nextPanel:: 2,
addRow(row)::
self {
// automatically number panels in added rows.
// https://github.com/kausalco/public/blob/master/klumps/grafana.libsonnet
local n = std.length(row.panels),
local nextPanel = super._nextPanel,
local panels = std.makeArray(n, function(i)
row.panels[i] { id: nextPanel + i }),
_nextPanel: nextPanel + n,
rows+: [row { panels: panels }],
},
addPanels(newpanels)::
self {
// automatically number panels in added rows.
// https://github.com/kausalco/public/blob/master/klumps/grafana.libsonnet
local n = std.foldl(function(numOfPanels, p)
(if 'panels' in p then
numOfPanels + 1 + std.length(p.panels)
else
numOfPanels + 1), newpanels, 0),
local nextPanel = super._nextPanel,
local _panels = std.makeArray(
std.length(newpanels), function(i)
newpanels[i] {
id: nextPanel + (
if i == 0 then
0
else
if 'panels' in _panels[i - 1] then
(_panels[i - 1].id - nextPanel) + 1 + std.length(_panels[i - 1].panels)
else
(_panels[i - 1].id - nextPanel) + 1
),
[if 'panels' in newpanels[i] then 'panels']: std.makeArray(
std.length(newpanels[i].panels), function(j)
newpanels[i].panels[j] {
id: 1 + j +
nextPanel + (
if i == 0 then
0
else
if 'panels' in _panels[i - 1] then
(_panels[i - 1].id - nextPanel) + 1 + std.length(_panels[i - 1].panels)
else
(_panels[i - 1].id - nextPanel) + 1
),
}
),
}
),
_nextPanel: nextPanel + n,
panels+::: _panels,
},
addPanel(panel, gridPos):: self.addPanels([panel { gridPos: gridPos }]),
addRows(rows):: std.foldl(function(d, row) d.addRow(row), rows, self),
addLink(link):: self {
links+: [link],
},
required:: [],
__requires: it.required,
addRequired(type, name, id, version):: self {
required+: [{ type: type, name: name, id: id, version: version }],
},
inputs:: [],
__inputs: it.inputs,
addInput(
name,
label,
type,
pluginId,
pluginName,
description='',
):: self {
inputs+: [{
name: name,
label: label,
type: type,
pluginId: pluginId,
pluginName: pluginName,
description: description,
}],
},
},
}

View File

@ -0,0 +1,38 @@
{
target(
query,
timeField,
id=null,
datasource=null,
metrics=[{
field: 'value',
id: null,
type: 'percentiles',
settings: {
percents: [
'90',
],
},
}],
bucketAggs=[{
field: 'timestamp',
id: null,
type: 'date_histogram',
settings: {
interval: '1s',
min_doc_count: 0,
trimEdges: 0,
},
}],
alias=null,
):: {
[if datasource != null then 'datasource']: datasource,
query: query,
id: id,
timeField: timeField,
bucketAggs: bucketAggs,
metrics: metrics,
alias: alias,
// TODO: generate bucket ids
},
}

View File

@ -0,0 +1,32 @@
{
new(
title,
datasource=null,
calc='mean',
description='',
height=null,
transparent=null,
)::
{
[if description != '' then 'description']: description,
[if height != null then 'height']: height,
[if transparent != null then 'transparent']: transparent,
title: title,
type: 'gauge',
datasource: datasource,
options: {
fieldOptions: {
calcs: [
calc,
],
},
},
_nextTarget:: 0,
addTarget(target):: self {
local nextTarget = super._nextTarget,
_nextTarget: nextTarget + 1,
targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
},
},
}

View File

@ -0,0 +1,22 @@
{
dashboard:: import 'dashboard.libsonnet',
template:: import 'template.libsonnet',
text:: import 'text.libsonnet',
timepicker:: import 'timepicker.libsonnet',
row:: import 'row.libsonnet',
link:: import 'link.libsonnet',
annotation:: import 'annotation.libsonnet',
graphPanel:: import 'graph_panel.libsonnet',
tablePanel:: import 'table_panel.libsonnet',
singlestat:: import 'singlestat.libsonnet',
pieChartPanel:: import 'pie_chart_panel.libsonnet',
influxdb:: import 'influxdb.libsonnet',
prometheus:: import 'prometheus.libsonnet',
sql:: import 'sql.libsonnet',
graphite:: import 'graphite.libsonnet',
alertCondition:: import 'alert_condition.libsonnet',
cloudwatch:: import 'cloudwatch.libsonnet',
elasticsearch:: import 'elasticsearch.libsonnet',
heatmapPanel:: import 'heatmap_panel.libsonnet',
gauge:: import 'gauge.libsonnet',
}

View File

@ -0,0 +1,248 @@
{
/**
* Returns a new graph panel that can be added in a row.
* It requires the graph panel plugin in grafana, which is built-in.
*
* @param title The title of the graph panel.
* @param span Width of the panel
* @param datasource Datasource
* @param fill Fill, integer from 0 to 10
* @param linewidth Line Width, integer from 0 to 10
* @param decimals Override automatic decimal precision for legend and tooltip. If null, not added to the json output.
* @param min_span Min span
* @param format Unit of the Y axes
* @param formatY1 Unit of the first Y axe
* @param formatY2 Unit of the second Y axe
* @param min Min of the Y axes
* @param max Max of the Y axes
* @param labelY1 Label of the first Y axe
* @param labelY2 Label of the second Y axe
* @param x_axis_mode X axis mode, one of [time, series, histogram]
* @param x_axis_values Chosen value of series, one of [avg, min, max, total, count]
* @param x_axis_buckets restricts the x axis to this amount of buckets
* @param x_axis_min restricts the x axis to display from this value if supplied
* @param x_axis_max restricts the x axis to display up to this value if supplied
* @param lines Display lines, boolean
* @param points Display points, boolean
* @param pointradius Radius of the points, allowed values are 0.5 or [1 ... 10] with step 1
* @param bars Display bars, boolean
* @param dashes Display line as dashes
* @param stack Stack values
* @param repeat Variable used to repeat the graph panel
* @param legend_show Show legend
* @param legend_values Show values in legend
* @param legend_min Show min in legend
* @param legend_max Show max in legend
* @param legend_current Show current in legend
* @param legend_total Show total in legend
* @param legend_avg Show average in legend
* @param legend_alignAsTable Show legend as table
* @param legend_rightSide Show legend to the right
* @param legend_sort Sort order of legend
* @param legend_sortDesc Sort legend descending
* @param aliasColors Define color mappings for graphs
* @param thresholds Configuration of graph thresholds
* @param logBase1Y Value of logarithm base of the first Y axe
* @param logBase2Y Value of logarithm base of the second Y axe
* @param transparent Boolean (default: false) If set to true the panel will be transparent
* @param value_type Type of tooltip value
* @param shared_tooltip Boolean Allow to group or spit tooltips on mouseover within a chart
* @param percentage Boolean (defaut: false) show as percentages
* @return A json that represents a graph panel
*/
new(
title,
span=null,
fill=1,
linewidth=1,
decimals=null,
description=null,
min_span=null,
format='short',
formatY1=null,
formatY2=null,
min=null,
max=null,
labelY1=null,
labelY2=null,
x_axis_mode='time',
x_axis_values='total',
x_axis_buckets=null,
x_axis_min=null,
x_axis_max=null,
lines=true,
datasource=null,
points=false,
pointradius=5,
bars=false,
height=null,
nullPointMode='null',
dashes=false,
stack=false,
repeat=null,
repeatDirection=null,
sort=0,
show_xaxis=true,
legend_show=true,
legend_values=false,
legend_min=false,
legend_max=false,
legend_current=false,
legend_total=false,
legend_avg=false,
legend_alignAsTable=false,
legend_rightSide=false,
legend_hideEmpty=null,
legend_hideZero=null,
legend_sort=null,
legend_sortDesc=null,
aliasColors={},
thresholds=[],
logBase1Y=1,
logBase2Y=1,
transparent=false,
value_type='individual',
shared_tooltip=true,
percentage=false,
time_from=null,
time_shift=null,
):: {
title: title,
[if span != null then 'span']: span,
[if min_span != null then 'minSpan']: min_span,
[if decimals != null then 'decimals']: decimals,
type: 'graph',
datasource: datasource,
targets: [
],
[if description != null then 'description']: description,
[if height != null then 'height']: height,
renderer: 'flot',
yaxes: [
self.yaxe(if formatY1 != null then formatY1 else format, min, max, decimals=decimals, logBase=logBase1Y, label=labelY1),
self.yaxe(if formatY2 != null then formatY2 else format, min, max, decimals=decimals, logBase=logBase2Y, label=labelY2),
],
xaxis: {
show: show_xaxis,
mode: x_axis_mode,
name: null,
values: if x_axis_mode == 'series' then [x_axis_values] else [],
buckets: if x_axis_mode == 'histogram' then [x_axis_buckets] else null,
[if x_axis_min != null then 'min']: x_axis_min,
[if x_axis_max != null then 'max']: x_axis_max,
},
lines: lines,
fill: fill,
linewidth: linewidth,
dashes: dashes,
dashLength: 10,
spaceLength: 10,
points: points,
pointradius: pointradius,
bars: bars,
stack: stack,
percentage: percentage,
legend: {
show: legend_show,
values: legend_values,
min: legend_min,
max: legend_max,
current: legend_current,
total: legend_total,
alignAsTable: legend_alignAsTable,
rightSide: legend_rightSide,
avg: legend_avg,
[if legend_hideEmpty != null then 'hideEmpty']: legend_hideEmpty,
[if legend_hideZero != null then 'hideZero']: legend_hideZero,
[if legend_sort != null then 'sort']: legend_sort,
[if legend_sortDesc != null then 'sortDesc']: legend_sortDesc,
},
nullPointMode: nullPointMode,
steppedLine: false,
tooltip: {
value_type: value_type,
shared: shared_tooltip,
sort: if sort == 'decreasing' then 2 else if sort == 'increasing' then 1 else sort,
},
timeFrom: time_from,
timeShift: time_shift,
[if transparent == true then 'transparent']: transparent,
aliasColors: aliasColors,
repeat: repeat,
[if repeatDirection != null then 'repeatDirection']: repeatDirection,
seriesOverrides: [],
thresholds: thresholds,
links: [],
yaxe(
format='short',
min=null,
max=null,
label=null,
show=true,
logBase=1,
decimals=null,
):: {
label: label,
show: show,
logBase: logBase,
min: min,
max: max,
format: format,
[if decimals != null then 'decimals']: decimals,
},
_nextTarget:: 0,
addTarget(target):: self {
// automatically ref id in added targets.
// https://github.com/kausalco/public/blob/master/klumps/grafana.libsonnet
local nextTarget = super._nextTarget,
_nextTarget: nextTarget + 1,
targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
},
addTargets(targets):: std.foldl(function(p, t) p.addTarget(t), targets, self),
addSeriesOverride(override):: self {
seriesOverrides+: [override],
},
resetYaxes():: self {
yaxes: [],
},
addYaxis(
format='short',
min=null,
max=null,
label=null,
show=true,
logBase=1,
decimals=null,
):: self {
yaxes+: [self.yaxe(format, min, max, label, show, logBase, decimals)],
},
addAlert(
name,
executionErrorState='alerting',
forDuration='5m',
frequency='60s',
handler=1,
message='',
noDataState='no_data',
notifications=[],
):: self {
local it = self,
_conditions:: [],
alert: {
name: name,
conditions: it._conditions,
executionErrorState: executionErrorState,
'for': forDuration,
frequency: frequency,
handler: handler,
noDataState: noDataState,
notifications: notifications,
message: message,
},
addCondition(condition):: self {
_conditions+: [condition],
},
addConditions(conditions):: std.foldl(function(p, c) p.addCondition(c), conditions, it),
},
},
}

View File

@ -0,0 +1,27 @@
{
/**
* Return an Graphite Target
*
* @param target Graphite Query. Nested queries are possible by adding the query reference (refId).
* @param targetFull Expanding the @target. Used in nested queries.
* @param hide Disable query on graph.
* @param textEditor Enable raw query mode.
* @param datasource Datasource.
* @return Panel target
*/
target(
target,
targetFull=null,
hide=false,
textEditor=false,
datasource=null,
):: {
target: target,
hide: hide,
textEditor: textEditor,
[if targetFull != null then 'targetFull']: targetFull,
[if datasource != null then 'datasource']: datasource,
},
}

View File

@ -0,0 +1,137 @@
{
/*
* Returns a heatmap panel.
* Requires the heatmap panel plugin in Grafana, which is built-in.
*
* @param title The title of the heatmap panel
* @param datasource Datasource
* @param min_span Min span
* @param span Width of the panel
* @param cards_cardPadding How much padding to put between bucket cards
* @param cards_cardRound How much rounding should be applied to the bucket card shape
* @param color_cardColor Hex value of color used when color_colorScheme is 'opacity'
* @param color_colorScale How to scale the color range, 'linear' or 'sqrt'
* @param color_colorScheme TODO: document
* @param color_exponent TODO: document
* @param color_max The value for the end of the color range
* @param color_min The value for the beginning of the color range
* @param color_mode How to display difference in frequency with color, default 'opacity'
* @param dataFormat How to format the data, default is 'timeseries'
* @param highlightCards TODO: document
* @param legend_show Show legend
* @param minSpan Minimum span of the panel when repeated on a template variable
* @param repeat Variable used to repeat the heatmap panel
* @param repeatDirection Which direction to repeat the panel, 'h' for horizontal and 'v' for vertically
* @param tooltipDecimals The number of decimal places to display in the tooltip
* @param tooltip_show Whether or not to display a tooltip when hovering over the heatmap
* @param tooltip_showHistogram Whether or not to display a histogram in the tooltip
* @param xAxis_show Whether or not to show the X axis, default true
* @param xBucketNumber Number of buckets for the X axis
* @param xBucketSize Size of X axis buckets. Number or interval(10s, 15h, etc.) Has priority over xBucketNumber
* @param yAxis_decimals Override automatic decimal precision for the Y axis
* @param yAxis_format Unit of the Y axis
* @param yAxis_logBase Only if dataFormat is 'timeseries'
* @param yAxis_min Only if dataFormat is 'timeseries', min of the Y axis
* @param yAxis_max Only if dataFormat is 'timeseries', max of the Y axis
* @param yAxis_show Wheter or not to show the Y axis
* @param yAxis_splitFactor TODO: document
* @param yBucketBound Which bound ('lower' or 'upper') of the bucket to use, default 'auto'
* @param yBucketNumber Number of buckets for the Y axis
* @param yBucketSize Size of Y axis buckets. Has priority over yBucketNumber
*/
new(
title,
datasource=null,
description=null,
cards_cardPadding=null,
cards_cardRound=null,
color_cardColor='#b4ff00',
color_colorScale='sqrt',
color_colorScheme='interpolateOranges',
color_exponent=0.5,
color_max=null,
color_min=null,
color_mode='spectrum',
dataFormat='timeseries',
highlightCards=true,
legend_show=false,
minSpan=null,
repeat=null,
repeatDirection=null,
tooltipDecimals=null,
tooltip_show=true,
tooltip_showHistogram=false,
xAxis_show=true,
xBucketNumber=null,
xBucketSize=null,
yAxis_decimals=null,
yAxis_format='short',
yAxis_logBase=1,
yAxis_min=null,
yAxis_max=null,
yAxis_show=true,
yAxis_splitFactor=null,
yBucketBound='auto',
yBucketNumber=null,
yBucketSize=null,
):: {
title: title,
type: 'heatmap',
[if description != null then 'description']: description,
datasource: datasource,
cards: {
cardPadding: cards_cardPadding,
cardRound: cards_cardRound,
},
color: {
mode: color_mode,
cardColor: color_cardColor,
colorScale: color_colorScale,
exponent: color_exponent,
[if color_mode == 'spectrum' then 'colorScheme']: color_colorScheme,
[if color_max != null then 'max']: color_max,
[if color_min != null then 'min']: color_min,
},
[if dataFormat != null then 'dataFormat']: dataFormat,
heatmap: {},
highlightCards: highlightCards,
legend: {
show: legend_show,
},
[if minSpan != null then 'minSpan']: minSpan,
[if repeat != null then 'repeat']: repeat,
[if repeatDirection != null then 'repeatDirection']: repeatDirection,
tooltip: {
show: tooltip_show,
showHistogram: tooltip_showHistogram,
},
[if tooltipDecimals != null then 'tooltipDecimals']: tooltipDecimals,
xAxis: {
show: xAxis_show,
},
xBucketNumber: if dataFormat == 'timeseries' && xBucketSize != null then xBucketNumber else null,
xBucketSize: if dataFormat == 'timeseries' && xBucketSize != null then xBucketSize else null,
yAxis: {
decimals: yAxis_decimals,
[if dataFormat == 'timeseries' then 'logBase']: yAxis_logBase,
format: yAxis_format,
[if dataFormat == 'timeseries' then 'max']: yAxis_max,
[if dataFormat == 'timeseries' then 'min']: yAxis_min,
show: yAxis_show,
splitFactor: yAxis_splitFactor,
},
yBucketBound: yBucketBound,
[if dataFormat == 'timeseries' then 'yBucketNumber']: yBucketNumber,
[if dataFormat == 'timeseries' then 'yBucketSize']: yBucketSize,
_nextTarget:: 0,
addTarget(target):: self {
local nextTarget = super._nextTarget,
_nextTarget: nextTarget + 1,
targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
},
addTargets(targets):: std.foldl(function(p, t) p.addTarget(t), targets, self),
},
}

View File

@ -0,0 +1,27 @@
{
/**
* Return an InfluxDB Target
*
* @param query Raw InfluxQL statement
* @param alias Alias By pattern
* @param datasource Datasource
* @param rawQuery En/Disable raw query mode
* @param resultFormat Format results as 'Time series' or 'Table'
* @return Panel target
*/
target(
query,
alias=null,
datasource=null,
rawQuery=true,
resultFormat='time_series',
):: {
query: query,
rawQuery: rawQuery,
resultFormat: resultFormat,
[if alias != null then 'alias']: alias,
[if datasource != null then 'datasource']: datasource,
},
}

View File

@ -0,0 +1,24 @@
{
dashboards(
title,
tags,
asDropdown=true,
includeVars=false,
keepTime=false,
icon='external link',
url='',
targetBlank=false,
type='dashboards',
)::
{
asDropdown: asDropdown,
icon: icon,
includeVars: includeVars,
keepTime: keepTime,
tags: tags,
title: title,
type: type,
url: url,
targetBlank: targetBlank,
},
}

View File

@ -0,0 +1,54 @@
{
/**
* Returns a new pie chart panel that can be added in a row.
* It requires the pie chart panel plugin in grafana, which needs to be explicitly installed.
*
* @param title The title of the pie chart panel.
* @param description Description of the panel
* @param span Width of the panel
* @param min_span Min span
* @param datasource Datasource
* @param aliasColors Define color mappings
* @param pieType Type of pie chart (one of pie or donut)
* @return A json that represents a pie chart panel
*/
new(
title,
description='',
span=null,
min_span=null,
datasource=null,
height=null,
aliasColors={},
pieType='pie',
valueName='current',
showLegend=true,
showLegendPercentage=true,
legendType='Right side',
):: {
type: 'grafana-piechart-panel',
[if description != null then 'description']: description,
pieType: pieType,
title: title,
aliasColors: aliasColors,
[if span != null then 'span']: span,
[if min_span != null then 'minSpan']: min_span,
[if height != null then 'height']: height,
valueName: valueName,
datasource: datasource,
legend: {
show: showLegend,
values: true,
percentage: showLegendPercentage,
},
legendType: legendType,
targets: [
],
_nextTarget:: 0,
addTarget(target):: self {
local nextTarget = super._nextTarget,
_nextTarget: nextTarget + 1,
targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
},
},
}

View File

@ -0,0 +1,21 @@
{
target(
expr,
format='time_series',
intervalFactor=2,
legendFormat='',
datasource=null,
interval=null,
instant=null,
hide=null,
):: {
[if hide != null then 'hide']: hide,
[if datasource != null then 'datasource']: datasource,
expr: expr,
format: format,
intervalFactor: intervalFactor,
legendFormat: legendFormat,
[if interval != null then 'interval']: interval,
[if instant != null then 'instant']: instant,
},
}

View File

@ -0,0 +1,32 @@
{
new(
title='Dashboard Row',
height=null,
collapse=false,
repeat=null,
showTitle=null,
titleSize='h6'
):: {
collapse: collapse,
collapsed: collapse,
[if height != null then 'height']: height,
panels: [],
repeat: repeat,
repeatIteration: null,
repeatRowId: null,
showTitle:
if showTitle != null then
showTitle
else
title != 'Dashboard Row',
title: title,
type: 'row',
titleSize: titleSize,
addPanels(panels):: self {
panels+: panels,
},
addPanel(panel, gridPos={}):: self {
panels+: [panel { gridPos: gridPos }],
},
},
}

View File

@ -0,0 +1,133 @@
{
new(
title,
format='none',
description='',
interval=null,
height=null,
datasource=null,
span=null,
min_span=null,
decimals=null,
valueName='avg',
valueFontSize='80%',
prefixFontSize='50%',
postfixFontSize='50%',
mappingType=1,
repeat=null,
repeatDirection=null,
prefix='',
postfix='',
colors=[
'#299c46',
'rgba(237, 129, 40, 0.89)',
'#d44a3a',
],
colorBackground=false,
colorValue=false,
thresholds='',
valueMaps=[
{
value: 'null',
op: '=',
text: 'N/A',
},
],
rangeMaps=[
{
from: 'null',
to: 'null',
text: 'N/A',
},
],
transparent=null,
sparklineFillColor='rgba(31, 118, 189, 0.18)',
sparklineFull=false,
sparklineLineColor='rgb(31, 120, 193)',
sparklineShow=false,
gaugeShow=false,
gaugeMinValue=0,
gaugeMaxValue=100,
gaugeThresholdMarkers=true,
gaugeThresholdLabels=false,
timeFrom=null,
links=[],
tableColumn='',
maxPerRow=null,
)::
{
[if height != null then 'height']: height,
[if description != '' then 'description']: description,
[if repeat != null then 'repeat']: repeat,
[if repeatDirection != null then 'repeatDirection']: repeatDirection,
[if transparent != null then 'transparent']: transparent,
[if min_span != null then 'minSpan']: min_span,
title: title,
[if span != null then 'span']: span,
type: 'singlestat',
datasource: datasource,
targets: [
],
links: links,
[if decimals != null then 'decimals']: decimals,
maxDataPoints: 100,
interval: interval,
cacheTimeout: null,
format: format,
prefix: prefix,
postfix: postfix,
nullText: null,
valueMaps: valueMaps,
[if maxPerRow != null then 'maxPerRow']: maxPerRow,
mappingTypes: [
{
name: 'value to text',
value: 1,
},
{
name: 'range to text',
value: 2,
},
],
rangeMaps: rangeMaps,
mappingType:
if mappingType == 'value'
then
1
else if mappingType == 'range'
then
2
else
mappingType,
nullPointMode: 'connected',
valueName: valueName,
prefixFontSize: prefixFontSize,
valueFontSize: valueFontSize,
postfixFontSize: postfixFontSize,
thresholds: thresholds,
[if timeFrom != null then 'timeFrom']: timeFrom,
colorBackground: colorBackground,
colorValue: colorValue,
colors: colors,
gauge: {
show: gaugeShow,
minValue: gaugeMinValue,
maxValue: gaugeMaxValue,
thresholdMarkers: gaugeThresholdMarkers,
thresholdLabels: gaugeThresholdLabels,
},
sparkline: {
fillColor: sparklineFillColor,
full: sparklineFull,
lineColor: sparklineLineColor,
show: sparklineShow,
},
tableColumn: tableColumn,
_nextTarget:: 0,
addTarget(target):: self {
local nextTarget = super._nextTarget,
_nextTarget: nextTarget + 1,
targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
},
},
}

View File

@ -0,0 +1,11 @@
{
target(
rawSql,
datasource=null,
format='time_series',
):: {
[if datasource != null then 'datasource']: datasource,
format: format,
rawSql: rawSql,
},
}

View File

@ -0,0 +1,108 @@
{
/**
* Returns a new table panel that can be added in a row.
* It requires the table panel plugin in grafana, which is built-in.
*
* @param title The title of the graph panel.
* @param span Width of the panel
* @param height Height of the panel
* @param description Description of the panel
* @param datasource Datasource
* @param min_span Min span
* @param styles Styles for the panel
* @param columns Columns for the panel
* @param sort Sorting instruction for the panel
* @param transform allow table manipulation to present data as desired
* @param transparent Boolean (default: false) If set to true the panel will be transparent
* @return A json that represents a table panel
*/
new(
title,
description=null,
span=null,
min_span=null,
height=null,
datasource=null,
styles=[],
transform=null,
transparent=false,
columns=[],
sort=null,
time_from=null,
time_shift=null,
):: {
type: 'table',
title: title,
[if span != null then 'span']: span,
[if min_span != null then 'minSpan']: min_span,
[if height != null then 'height']: height,
datasource: datasource,
targets: [
],
styles: styles,
columns: columns,
timeFrom: time_from,
timeShift: time_shift,
[if sort != null then 'sort']: sort,
[if description != null then 'description']: description,
[if transform != null then 'transform']: transform,
[if transparent == true then 'transparent']: transparent,
_nextTarget:: 0,
addTarget(target):: self + self.addTargets([target]),
addTargets(newtargets)::
self {
local n = std.foldl(function(numOfTargets, p)
(if 'targets' in p then
numOfTargets + 1 + std.length(p.targets)
else
numOfTargets + 1), newtargets, 0),
local nextTarget = super._nextTarget,
local _targets = std.makeArray(
std.length(newtargets), function(i)
newtargets[i] {
refId: std.char(std.codepoint('A') + nextTarget + (
if i == 0 then
0
else
if 'targets' in _targets[i - 1] then
(std.codepoint(_targets[i - 1].refId) - nextTarget) + 1 + std.length(_targets[i - 1].targets)
else
(std.codepoint(_targets[i - 1].refId) - nextTarget) + 1
)),
[if 'targets' in newtargets[i] then 'targets']: std.makeArray(
std.length(newtargets[i].targets), function(j)
newtargets[i].targets[j] {
refId: std.char(std.codepoint('A') + 1 + j +
nextTarget + (
if i == 0 then
0
else
if 'targets' in _targets[i - 1] then
(std.codepoint(_targets[i - 1].refId) - nextTarget) + 1 + std.length(_targets[i - 1].targets)
else
(std.codepoint(_targets[i - 1].refId) - nextTarget) + 1
)),
}
),
}
),
_nextTarget: nextTarget + n,
targets+::: _targets,
},
addColumn(field, style):: self {
local style_ = style { pattern: field },
local column_ = { text: field, value: field },
styles+: [style_],
columns+: [column_],
},
hideColumn(field):: self {
styles+: [{
alias: field,
pattern: field,
type: 'hidden',
}],
},
},
}

View File

@ -0,0 +1,134 @@
{
new(
name,
datasource,
query,
label=null,
allValues=null,
tagValuesQuery='',
current=null,
hide='',
regex='',
refresh='never',
includeAll=false,
multi=false,
sort=0,
)::
{
allValue: allValues,
current: $.current(current),
datasource: datasource,
includeAll: includeAll,
hide: $.hide(hide),
label: label,
multi: multi,
name: name,
options: [],
query: query,
refresh: $.refresh(refresh),
regex: regex,
sort: sort,
tagValuesQuery: tagValuesQuery,
tags: [],
tagsQuery: '',
type: 'query',
useTags: false,
},
interval(
name,
query,
current,
hide='',
label=null,
auto_count=300,
auto_min='10s',
)::
{
current: $.current(current),
hide: if hide == '' then 0 else if hide == 'label' then 1 else 2,
label: label,
name: name,
query: std.join(',', std.filter($.filterAuto, std.split(query, ','))),
refresh: 2,
type: 'interval',
auto: std.count(std.split(query, ','), 'auto') > 0,
auto_count: auto_count,
auto_min: auto_min,
},
hide(hide)::
if hide == '' then 0 else if hide == 'label' then 1 else 2,
current(current):: {
[if current != null then 'text']: current,
[if current != null then 'value']: if current == 'auto' then
'$__auto_interval'
else if current == 'all' then
'$__all'
else
current,
},
datasource(
name,
query,
current,
hide='',
label=null,
regex='',
refresh='load',
):: {
current: $.current(current),
hide: $.hide(hide),
label: label,
name: name,
options: [],
query: query,
refresh: $.refresh(refresh),
regex: regex,
type: 'datasource',
},
refresh(refresh):: if refresh == 'never'
then
0
else if refresh == 'load'
then
1
else if refresh == 'time'
then
2
else
refresh,
filterAuto(str):: str != 'auto',
custom(
name,
query,
current,
refresh='never',
label='',
valuelabels={},
multi=false,
allValues=null,
includeAll=false,
hide='',
)::
{
allValue: allValues,
current: {
value: current,
text: if current in valuelabels then valuelabels[current] else current,
},
options: std.map(
function(i)
{
text: if i in valuelabels then valuelabels[i] else i,
value: i,
}, std.split(query, ',')
),
hide: $.hide(hide),
includeAll: includeAll,
label: label,
refresh: $.refresh(refresh),
multi: multi,
name: name,
query: query,
type: 'custom',
},
}

View File

@ -0,0 +1,21 @@
{
new(
title='',
span=null,
mode='markdown',
content='',
transparent=null,
description=null,
datasource=null,
)::
{
[if transparent != null then 'transparent']: transparent,
title: title,
[if span != null then 'span']: span,
type: 'text',
mode: mode,
content: content,
[if description != null then 'description']: description,
datasource: datasource,
},
}

View File

@ -0,0 +1,30 @@
{
new(
refresh_intervals=[
'5s',
'10s',
'30s',
'1m',
'5m',
'15m',
'30m',
'1h',
'2h',
'1d',
],
time_options=[
'5m',
'15m',
'1h',
'6h',
'12h',
'24h',
'2d',
'7d',
'30d',
],
):: {
refresh_intervals: refresh_intervals,
time_options: time_options,
},
}

View File

@ -0,0 +1,5 @@
root = true
[*.jsonnet]
indent_size = 2
indent_style = space

41
monitoring/vendor/ksonnet/.gitignore vendored Normal file
View File

@ -0,0 +1,41 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof
# Output of the go coverage tool, specifically when used with LiteIDE
*.out
# External packages folder
vendor/
tmp/
# Project-specific working space
/charts/
#stray unwanted in fork
.DS_Store
/ksonnet-gen/ksonnet-gen
.vscode

Some files were not shown because too many files have changed in this diff Show More