add missing monitoring files
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Tobias Brunner 2020-08-19 20:51:06 +02:00
parent 0108ac6084
commit 71c9a980f6
56 changed files with 9672 additions and 0 deletions

View file

@ -0,0 +1,18 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
name: prometheus-adapter
name: prometheus-adapter
namespace: monitoring
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 30s
port: https
scheme: https
tlsConfig:
insecureSkipVerify: true
selector:
matchLabels:
name: prometheus-adapter

View file

@ -0,0 +1,212 @@
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.2.4
creationTimestamp: null
name: probes.monitoring.coreos.com
spec:
group: monitoring.coreos.com
names:
kind: Probe
listKind: ProbeList
plural: probes
singular: probe
scope: Namespaced
versions:
- name: v1
schema:
openAPIV3Schema:
description: Probe defines monitoring for a set of static targets or ingresses.
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: Specification of desired Ingress selection for target discovery
by Prometheus.
properties:
interval:
description: Interval at which targets are probed using the configured
prober. If not specified Prometheus' global scrape interval is used.
type: string
jobName:
description: The job name assigned to scraped metrics by default.
type: string
module:
description: 'The module to use for probing specifying how to probe
the target. Example module configuring in the blackbox exporter:
https://github.com/prometheus/blackbox_exporter/blob/master/example.yml'
type: string
prober:
description: Specification for the prober to use for probing targets.
The prober.URL parameter is required. Targets cannot be probed if
left empty.
properties:
path:
description: Path to collect metrics from. Defaults to `/probe`.
type: string
scheme:
description: HTTP scheme to use for scraping. Defaults to `http`.
type: string
url:
description: Mandatory URL of the prober.
type: string
required:
- url
type: object
scrapeTimeout:
description: Timeout for scraping metrics from the Prometheus exporter.
type: string
targets:
description: Targets defines a set of static and/or dynamically discovered
targets to be probed using the prober.
properties:
ingress:
description: Ingress defines the set of dynamically discovered
ingress objects which hosts are considered for probing.
properties:
namespaceSelector:
description: Select Ingress objects by namespace.
properties:
any:
description: Boolean describing whether all namespaces
are selected in contrast to a list restricting them.
type: boolean
matchNames:
description: List of namespace names.
items:
type: string
type: array
type: object
relabelingConfigs:
description: 'RelabelConfigs to apply to samples before ingestion.
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items:
description: 'RelabelConfig allows dynamic rewriting of
the label set, being applied to samples before ingestion.
It defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
description: Action to perform based on regex matching.
Default is 'replace'
type: string
modulus:
description: Modulus to take of the hash of the source
label values.
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
type: string
replacement:
description: Replacement value against which a regex
replace is performed if the regular expression matches.
Regex capture groups are available. Default is '$1'
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string
type: object
type: array
selector:
description: Select Ingress objects by labels.
properties:
matchExpressions:
description: matchExpressions is a list of label selector
requirements. The requirements are ANDed.
items:
description: A label selector requirement is a selector
that contains values, a key, and an operator that
relates the key and values.
properties:
key:
description: key is the label key that the selector
applies to.
type: string
operator:
description: operator represents a key's relationship
to a set of values. Valid operators are In, NotIn,
Exists and DoesNotExist.
type: string
values:
description: values is an array of string values.
If the operator is In or NotIn, the values array
must be non-empty. If the operator is Exists or
DoesNotExist, the values array must be empty.
This array is replaced during a strategic merge
patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value} pairs.
A single {key,value} in the matchLabels map is equivalent
to an element of matchExpressions, whose key field is
"key", the operator is "In", and the values array contains
only "value". The requirements are ANDed.
type: object
type: object
type: object
staticConfig:
description: 'StaticConfig defines static targets which are considers
for probing. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config.'
properties:
labels:
additionalProperties:
type: string
description: Labels assigned to all metrics scraped from the
targets.
type: object
static:
description: Targets is a list of URLs to probe using the
configured prober.
items:
type: string
type: array
type: object
type: object
type: object
required:
- spec
type: object
served: true
storage: true
status:
acceptedNames:
kind: ""
plural: ""
conditions: []
storedVersions: []

View file

@ -0,0 +1,47 @@
{
/**
* Create a [bar gauge panel](https://grafana.com/docs/grafana/latest/panels/visualizations/bar-gauge-panel/),
*
* @name barGaugePanel.new
*
* @param title Panel title.
* @param description Panel description.
* @param datasource Panel datasource.
* @param unit The unit of the data.
* @param thresholds An array of threashold values.
*
* @method addTarget(target) Adds a target object.
* @method addTargets(targets) Adds an array of targets.
*/
new(
title,
description=null,
datasource=null,
unit=null,
thresholds=[],
):: {
type: 'bargauge',
title: title,
[if description != null then 'description']: description,
datasource: datasource,
targets: [
],
fieldConfig: {
defaults: {
unit: unit,
thresholds: {
mode: 'absolute',
steps: thresholds,
},
},
},
_nextTarget:: 0,
addTarget(target):: self {
// automatically ref id in added targets.
local nextTarget = super._nextTarget,
_nextTarget: nextTarget + 1,
targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
},
addTargets(targets):: std.foldl(function(p, t) p.addTarget(t), targets, self),
},
}

View file

@ -0,0 +1,2 @@
jsonnetfile.lock.json
vendor/

View file

@ -0,0 +1,155 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
_config+:: {
namespace: 'default',
versions+:: {
alertmanager: 'v0.21.0',
},
imageRepos+:: {
alertmanager: 'quay.io/prometheus/alertmanager',
},
alertmanager+:: {
name: 'main',
config: {
global: {
resolve_timeout: '5m',
},
inhibit_rules: [{
source_match: {
severity: 'critical',
},
target_match_re: {
severity: 'warning|info',
},
equal: ['namespace', 'alertname'],
}, {
source_match: {
severity: 'warning',
},
target_match_re: {
severity: 'info',
},
equal: ['namespace', 'alertname'],
}],
route: {
group_by: ['namespace'],
group_wait: '30s',
group_interval: '5m',
repeat_interval: '12h',
receiver: 'Default',
routes: [
{
receiver: 'Watchdog',
match: {
alertname: 'Watchdog',
},
},
{
receiver: 'Critical',
match: {
severity: 'critical',
},
},
],
},
receivers: [
{
name: 'Default',
},
{
name: 'Watchdog',
},
{
name: 'Critical',
},
],
},
replicas: 3,
},
},
alertmanager+:: {
secret:
local secret = k.core.v1.secret;
if std.type($._config.alertmanager.config) == 'object' then
secret.new('alertmanager-' + $._config.alertmanager.name, {})
.withStringData({ 'alertmanager.yaml': std.manifestYamlDoc($._config.alertmanager.config) }) +
secret.mixin.metadata.withNamespace($._config.namespace)
else
secret.new('alertmanager-' + $._config.alertmanager.name, { 'alertmanager.yaml': std.base64($._config.alertmanager.config) }) +
secret.mixin.metadata.withNamespace($._config.namespace),
serviceAccount:
local serviceAccount = k.core.v1.serviceAccount;
serviceAccount.new('alertmanager-' + $._config.alertmanager.name) +
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
service:
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local alertmanagerPort = servicePort.newNamed('web', 9093, 'web');
service.new('alertmanager-' + $._config.alertmanager.name, { app: 'alertmanager', alertmanager: $._config.alertmanager.name }, alertmanagerPort) +
service.mixin.spec.withSessionAffinity('ClientIP') +
service.mixin.metadata.withNamespace($._config.namespace) +
service.mixin.metadata.withLabels({ alertmanager: $._config.alertmanager.name }),
serviceMonitor:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'alertmanager',
namespace: $._config.namespace,
labels: {
'k8s-app': 'alertmanager',
},
},
spec: {
selector: {
matchLabels: {
alertmanager: $._config.alertmanager.name,
},
},
endpoints: [
{
port: 'web',
interval: '30s',
},
],
},
},
alertmanager:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'Alertmanager',
metadata: {
name: $._config.alertmanager.name,
namespace: $._config.namespace,
labels: {
alertmanager: $._config.alertmanager.name,
},
},
spec: {
replicas: $._config.alertmanager.replicas,
version: $._config.versions.alertmanager,
image: $._config.imageRepos.alertmanager + ':' + $._config.versions.alertmanager,
nodeSelector: { 'kubernetes.io/os': 'linux' },
serviceAccountName: 'alertmanager-' + $._config.alertmanager.name,
securityContext: {
runAsUser: 1000,
runAsNonRoot: true,
fsGroup: 2000,
},
},
},
},
}

View file

@ -0,0 +1,57 @@
{
prometheusAlerts+:: {
groups+: [
{
name: 'alertmanager.rules',
rules: [
{
alert: 'AlertmanagerConfigInconsistent',
annotations: {
message: |||
The configuration of the instances of the Alertmanager cluster `{{ $labels.namespace }}/{{ $labels.service }}` are out of sync.
{{ range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query }}
Configuration hash for pod {{ .Labels.pod }} is "{{ printf "%.f" .Value }}"
{{ end }}
|||,
},
expr: |||
count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s})) != 1
||| % $._config,
'for': '5m',
labels: {
severity: 'critical',
},
},
{
alert: 'AlertmanagerFailedReload',
annotations: {
message: "Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}.",
},
expr: |||
alertmanager_config_last_reload_successful{%(alertmanagerSelector)s} == 0
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert: 'AlertmanagerMembersInconsistent',
annotations: {
message: 'Alertmanager has not found all other members of the cluster.',
},
expr: |||
alertmanager_cluster_members{%(alertmanagerSelector)s}
!= on (service) GROUP_LEFT()
count by (service) (alertmanager_cluster_members{%(alertmanagerSelector)s})
||| % $._config,
'for': '5m',
labels: {
severity: 'critical',
},
},
],
},
],
},
}

View file

@ -0,0 +1,4 @@
(import 'alertmanager.libsonnet') +
(import 'general.libsonnet') +
(import 'node.libsonnet') +
(import 'prometheus-operator.libsonnet')

View file

@ -0,0 +1,38 @@
{
prometheusAlerts+:: {
groups+: [
{
name: 'general.rules',
rules: [
{
alert: 'TargetDown',
annotations: {
message: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace are down.',
},
expr: '100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10',
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert: 'Watchdog',
annotations: {
message: |||
This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
and always fire against a receiver. There are integrations with various notification
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
|||,
},
expr: 'vector(1)',
labels: {
severity: 'none',
},
},
],
},
],
},
}

View file

@ -0,0 +1,24 @@
{
prometheusAlerts+:: {
groups+: [
{
name: 'node-network',
rules: [
{
alert: 'NodeNetworkInterfaceFlapping',
annotations: {
message: 'Network interface "{{ $labels.device }}" changing it\'s up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"',
},
expr: |||
changes(node_network_up{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 2
||| % $._config,
'for': '2m',
labels: {
severity: 'warning',
},
},
],
},
],
},
}

View file

@ -0,0 +1,63 @@
{
prometheusAlerts+:: {
groups+: [
{
name: 'prometheus-operator',
rules: [
{
alert: 'PrometheusOperatorListErrors',
expr: |||
(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{%(prometheusOperatorSelector)s}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{%(prometheusOperatorSelector)s}[10m]))) > 0.4
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
message: 'Errors while performing List operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.',
},
'for': '15m',
},
{
alert: 'PrometheusOperatorWatchErrors',
expr: |||
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{%(prometheusOperatorSelector)s}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{%(prometheusOperatorSelector)s}[10m]))) > 0.4
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
message: 'Errors while performing Watch operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.',
},
'for': '15m',
},
{
alert: 'PrometheusOperatorReconcileErrors',
expr: |||
rate(prometheus_operator_reconcile_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
message: 'Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace }} Namespace.',
},
'for': '10m',
},
{
alert: 'PrometheusOperatorNodeLookupErrors',
expr: |||
rate(prometheus_operator_node_address_lookup_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
message: 'Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.',
},
'for': '10m',
},
],
},
],
},
}

View file

@ -0,0 +1,157 @@
# TODO(metalmatze): This file is temporarily saved here for later reference
# until we find out how to integrate the tests into our jsonnet stack.
rule_files:
- rules.yaml
evaluation_interval: 1m
tests:
- interval: 1m
input_series:
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.0",namespace="monitoring",pod="alertmanager-main-0",service="alertmanager-main"}'
values: '3 3 3 3 3 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0'
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.1",namespace="monitoring",pod="alertmanager-main-1",service="alertmanager-main"}'
values: '3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3'
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.2",namespace="monitoring",pod="alertmanager-main-2",service="alertmanager-main"}'
values: '3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3'
alert_rule_test:
- eval_time: 5m
alertname: AlertmanagerMembersInconsistent
- eval_time: 11m
alertname: AlertmanagerMembersInconsistent
exp_alerts:
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.0
namespace: monitoring
pod: alertmanager-main-0
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- eval_time: 17m
alertname: AlertmanagerMembersInconsistent
exp_alerts:
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.0
namespace: monitoring
pod: alertmanager-main-0
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- eval_time: 23m
alertname: AlertmanagerMembersInconsistent
exp_alerts:
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.0
namespace: monitoring
pod: alertmanager-main-0
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- interval: 1m
input_series:
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.0",namespace="monitoring",pod="alertmanager-main-0",service="alertmanager-main"}'
values: '3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.1",namespace="monitoring",pod="alertmanager-main-1",service="alertmanager-main"}'
values: '3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2'
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.2",namespace="monitoring",pod="alertmanager-main-2",service="alertmanager-main"}'
values: '3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2'
alert_rule_test:
- eval_time: 5m
alertname: AlertmanagerMembersInconsistent
- eval_time: 11m
alertname: AlertmanagerMembersInconsistent
exp_alerts:
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.0
namespace: monitoring
pod: alertmanager-main-0
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.1
namespace: monitoring
pod: alertmanager-main-1
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.2
namespace: monitoring
pod: alertmanager-main-2
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- eval_time: 17m
alertname: AlertmanagerMembersInconsistent
exp_alerts:
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.0
namespace: monitoring
pod: alertmanager-main-0
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.1
namespace: monitoring
pod: alertmanager-main-1
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.2
namespace: monitoring
pod: alertmanager-main-2
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- eval_time: 23m
alertname: AlertmanagerMembersInconsistent
exp_alerts:
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.0
namespace: monitoring
pod: alertmanager-main-0
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.1
namespace: monitoring
pod: alertmanager-main-1
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.2
namespace: monitoring
pod: alertmanager-main-2
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'

View file

@ -0,0 +1,50 @@
[
// Drop all kubelet metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)',
action: 'drop',
},
// Drop all scheduler metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)',
action: 'drop',
},
// Drop all apiserver metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)',
action: 'drop',
},
// Drop all docker metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)',
action: 'drop',
},
// Drop all reflector metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)',
action: 'drop',
},
// Drop all etcd metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)',
action: 'drop',
},
// Drop all transformation metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'transformation_(transformation_latencies_microseconds|failures_total)',
action: 'drop',
},
// Drop all other metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: '(admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)',
action: 'drop',
},
]

View file

@ -0,0 +1,89 @@
{
"version": 1,
"dependencies": [
{
"source": {
"git": {
"remote": "https://github.com/brancz/kubernetes-grafana",
"subdir": "grafana"
}
},
"version": "master"
},
{
"source": {
"git": {
"remote": "https://github.com/coreos/etcd",
"subdir": "Documentation/etcd-mixin"
}
},
"version": "master"
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus-operator/prometheus-operator",
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "release-0.41"
},
{
"source": {
"git": {
"remote": "https://github.com/ksonnet/ksonnet-lib",
"subdir": ""
}
},
"version": "master",
"name": "ksonnet"
},
{
"source": {
"git": {
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
"subdir": ""
}
},
"version": "master"
},
{
"source": {
"git": {
"remote": "https://github.com/kubernetes/kube-state-metrics",
"subdir": "jsonnet/kube-state-metrics"
}
},
"version": "master"
},
{
"source": {
"git": {
"remote": "https://github.com/kubernetes/kube-state-metrics",
"subdir": "jsonnet/kube-state-metrics-mixin"
}
},
"version": "master"
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus/node_exporter",
"subdir": "docs/node-mixin"
}
},
"version": "master"
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus/prometheus",
"subdir": "documentation/prometheus-mixin"
}
},
"version": "release-2.20",
"name": "prometheus"
}
],
"legacyImports": true
}

View file

@ -0,0 +1,118 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
_config+:: {
versions+:: {
clusterVerticalAutoscaler: "v0.8.1"
},
imageRepos+:: {
clusterVerticalAutoscaler: 'gcr.io/google_containers/cpvpa-amd64'
},
kubeStateMetrics+:: {
stepCPU: '1m',
stepMemory: '2Mi',
},
},
ksmAutoscaler+:: {
clusterRole:
local clusterRole = k.rbac.v1.clusterRole;
local rulesType = clusterRole.rulesType;
local rules = [
rulesType.new() +
rulesType.withApiGroups(['']) +
rulesType.withResources([
'nodes',
]) +
rulesType.withVerbs(['list', 'watch']),
];
clusterRole.new() +
clusterRole.mixin.metadata.withName('ksm-autoscaler') +
clusterRole.withRules(rules),
clusterRoleBinding:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withName('ksm-autoscaler') +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName('ksm-autoscaler') +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'ksm-autoscaler', namespace: $._config.namespace }]),
roleBinding:
local roleBinding = k.rbac.v1.roleBinding;
roleBinding.new() +
roleBinding.mixin.metadata.withName('ksm-autoscaler') +
roleBinding.mixin.metadata.withNamespace($._config.namespace) +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withName('ksm-autoscaler') +
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'ksm-autoscaler' }]),
role:
local role = k.rbac.v1.role;
local rulesType = role.rulesType;
local extensionsRule = rulesType.new() +
rulesType.withApiGroups(['extensions']) +
rulesType.withResources([
'deployments',
]) +
rulesType.withVerbs(['patch']) +
rulesType.withResourceNames(['kube-state-metrics']);
local appsRule = rulesType.new() +
rulesType.withApiGroups(['apps']) +
rulesType.withResources([
'deployments',
]) +
rulesType.withVerbs(['patch']) +
rulesType.withResourceNames(['kube-state-metrics']);
local rules = [extensionsRule, appsRule];
role.new() +
role.mixin.metadata.withName('ksm-autoscaler') +
role.mixin.metadata.withNamespace($._config.namespace) +
role.withRules(rules),
serviceAccount:
local serviceAccount = k.core.v1.serviceAccount;
serviceAccount.new('ksm-autoscaler') +
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
deployment:
local deployment = k.apps.v1.deployment;
local container = deployment.mixin.spec.template.spec.containersType;
local podSelector = deployment.mixin.spec.template.spec.selectorType;
local podLabels = { app: 'ksm-autoscaler' };
local kubeStateMetricsAutoscaler =
container.new('ksm-autoscaler', $._config.imageRepos.clusterVerticalAutoscaler + ':' + $._config.versions.clusterVerticalAutoscaler) +
container.withArgs([
'/cpvpa',
'--target=deployment/kube-state-metrics',
'--namespace=' + $._config.namespace,
'--logtostderr=true',
'--poll-period-seconds=10',
'--default-config={"kube-state-metrics":{"requests":{"cpu":{"base":"' + $._config.kubeStateMetrics.baseCPU + '","step":"' + $._config.kubeStateMetrics.stepCPU + '","nodesPerStep":1},"memory":{"base":"' + $._config.kubeStateMetrics.baseMemory + '","step":"' + $._config.kubeStateMetrics.stepMemory + '","nodesPerStep":1}},"limits":{"cpu":{"base":"' + $._config.kubeStateMetrics.baseCPU + '","step":"' + $._config.kubeStateMetrics.stepCPU + '","nodesPerStep":1},"memory":{"base":"' + $._config.kubeStateMetrics.baseMemory + '","step":"' + $._config.kubeStateMetrics.stepMemory + '","nodesPerStep":1}}}}'
]) +
container.mixin.resources.withRequests({cpu: '20m', memory: '10Mi'});
local c = [kubeStateMetricsAutoscaler];
deployment.new('ksm-autoscaler', 1, c, podLabels) +
deployment.mixin.metadata.withNamespace($._config.namespace) +
deployment.mixin.metadata.withLabels(podLabels) +
deployment.mixin.spec.selector.withMatchLabels(podLabels) +
deployment.mixin.spec.template.spec.withNodeSelector({ 'kubernetes.io/os': 'linux' }) +
deployment.mixin.spec.template.spec.securityContext.withRunAsNonRoot(true) +
deployment.mixin.spec.template.spec.securityContext.withRunAsUser(65534) +
deployment.mixin.spec.template.spec.withServiceAccountName('ksm-autoscaler'),
},
}

View file

@ -0,0 +1,20 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
prometheus+:: {
clusterRole+: {
rules+:
local role = k.rbac.v1.role;
local policyRule = role.rulesType;
local rule = policyRule.new() +
policyRule.withApiGroups(['']) +
policyRule.withResources([
'services',
'endpoints',
'pods',
]) +
policyRule.withVerbs(['get', 'list', 'watch']);
[rule]
},
}
}

View file

@ -0,0 +1,41 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local statefulSet = k.apps.v1.statefulSet;
local affinity = statefulSet.mixin.spec.template.spec.affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecutionType;
local matchExpression = affinity.mixin.podAffinityTerm.labelSelector.matchExpressionsType;
{
local antiaffinity(key, values, namespace) = {
affinity: {
podAntiAffinity: {
preferredDuringSchedulingIgnoredDuringExecution: [
affinity.new() +
affinity.withWeight(100) +
affinity.mixin.podAffinityTerm.withNamespaces(namespace) +
affinity.mixin.podAffinityTerm.withTopologyKey('kubernetes.io/hostname') +
affinity.mixin.podAffinityTerm.labelSelector.withMatchExpressions([
matchExpression.new() +
matchExpression.withKey(key) +
matchExpression.withOperator('In') +
matchExpression.withValues(values),
]),
],
},
},
},
alertmanager+:: {
alertmanager+: {
spec+:
antiaffinity('alertmanager', [$._config.alertmanager.name], $._config.namespace),
},
},
prometheus+: {
local p = self,
prometheus+: {
spec+:
antiaffinity('prometheus', [p.name], p.namespace),
},
},
}

View file

@ -0,0 +1,23 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
prometheus+:: {
kubeControllerManagerPrometheusDiscoveryService:
service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('https-metrics', 10257, 10257)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
service.mixin.spec.withClusterIp('None'),
kubeSchedulerPrometheusDiscoveryService:
service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('https-metrics', 10259, 10259)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
service.mixin.spec.withClusterIp('None'),
kubeDnsPrometheusDiscoveryService:
service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('http-metrics-skydns', 10055, 10055), servicePort.newNamed('http-metrics-dnsmasq', 10054, 10054)]) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) +
service.mixin.spec.withClusterIp('None'),
},
}

View file

@ -0,0 +1,20 @@
local l = import 'lib/lib.libsonnet';
// withImageRepository is a mixin that replaces all images prefixes by repository. eg.
// quay.io/coreos/addon-resizer -> $repository/addon-resizer
// grafana/grafana -> grafana $repository/grafana
local withImageRepository(repository) = {
local oldRepos = super._config.imageRepos,
local substituteRepository(image, repository) =
if repository == null then image else repository + '/' + l.imageName(image),
_config+:: {
imageRepos:: {
[field]: substituteRepository(oldRepos[field], repository),
for field in std.objectFields(oldRepos)
}
},
};
{
withImageRepository:: withImageRepository,
}

View file

@ -0,0 +1,197 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
// Custom metrics API allows the HPA v2 to scale based on arbirary metrics.
// For more details on usage visit https://github.com/DirectXMan12/k8s-prometheus-adapter#quick-links
{
_config+:: {
prometheusAdapter+:: {
// Rules for custom-metrics
config+:: {
rules+: [
{
seriesQuery: '{__name__=~"^container_.*",container!="POD",namespace!="",pod!=""}',
seriesFilters: [],
resources: {
overrides: {
namespace: {
resource: 'namespace'
},
pod: {
resource: 'pod'
}
},
},
name: {
matches: '^container_(.*)_seconds_total$',
as: ""
},
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>,container!="POD"}[1m])) by (<<.GroupBy>>)'
},
{
seriesQuery: '{__name__=~"^container_.*",container!="POD",namespace!="",pod!=""}',
seriesFilters: [
{ isNot: '^container_.*_seconds_total$' },
],
resources: {
overrides: {
namespace: {
resource: 'namespace'
},
pod: {
resource: 'pod'
}
},
},
name: {
matches: '^container_(.*)_total$',
as: ''
},
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>,container!="POD"}[1m])) by (<<.GroupBy>>)'
},
{
seriesQuery: '{__name__=~"^container_.*",container!="POD",namespace!="",pod!=""}',
seriesFilters: [
{ isNot: '^container_.*_total$' },
],
resources: {
overrides: {
namespace: {
resource: 'namespace'
},
pod: {
resource: 'pod'
}
},
},
name: {
matches: '^container_(.*)$',
as: ''
},
metricsQuery: 'sum(<<.Series>>{<<.LabelMatchers>>,container!="POD"}) by (<<.GroupBy>>)'
},
{
seriesQuery: '{namespace!="",__name__!~"^container_.*"}',
seriesFilters: [
{ isNot: '.*_total$' },
],
resources: {
template: '<<.Resource>>'
},
name: {
matches: '',
as: ''
},
metricsQuery: 'sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)'
},
{
seriesQuery: '{namespace!="",__name__!~"^container_.*"}',
seriesFilters: [
{ isNot: '.*_seconds_total' },
],
resources: {
template: '<<.Resource>>'
},
name: {
matches: '^(.*)_total$',
as: ''
},
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)'
},
{
seriesQuery: '{namespace!="",__name__!~"^container_.*"}',
seriesFilters: [],
resources: {
template: '<<.Resource>>'
},
name: {
matches: '^(.*)_seconds_total$',
as: ''
},
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)'
}
],
},
},
},
prometheusAdapter+:: {
customMetricsApiService: {
apiVersion: 'apiregistration.k8s.io/v1',
kind: 'APIService',
metadata: {
name: 'v1beta1.custom.metrics.k8s.io',
},
spec: {
service: {
name: $.prometheusAdapter.service.metadata.name,
namespace: $._config.namespace,
},
group: 'custom.metrics.k8s.io',
version: 'v1beta1',
insecureSkipTLSVerify: true,
groupPriorityMinimum: 100,
versionPriority: 100,
},
},
customMetricsApiServiceV1Beta2: {
apiVersion: 'apiregistration.k8s.io/v1',
kind: 'APIService',
metadata: {
name: 'v1beta2.custom.metrics.k8s.io',
},
spec: {
service: {
name: $.prometheusAdapter.service.metadata.name,
namespace: $._config.namespace,
},
group: 'custom.metrics.k8s.io',
version: 'v1beta2',
insecureSkipTLSVerify: true,
groupPriorityMinimum: 100,
versionPriority: 200,
},
},
customMetricsClusterRoleServerResources:
local clusterRole = k.rbac.v1.clusterRole;
local policyRule = clusterRole.rulesType;
local rules =
policyRule.new() +
policyRule.withApiGroups(['custom.metrics.k8s.io']) +
policyRule.withResources(['*']) +
policyRule.withVerbs(['*']);
clusterRole.new() +
clusterRole.mixin.metadata.withName('custom-metrics-server-resources') +
clusterRole.withRules(rules),
customMetricsClusterRoleBindingServerResources:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withName('custom-metrics-server-resources') +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName('custom-metrics-server-resources') +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{
kind: 'ServiceAccount',
name: $.prometheusAdapter.serviceAccount.metadata.name,
namespace: $._config.namespace,
}]),
customMetricsClusterRoleBindingHPA:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withName('hpa-controller-custom-metrics') +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName('custom-metrics-server-resources') +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{
kind: 'ServiceAccount',
name: 'horizontal-pod-autoscaler',
namespace: 'kube-system',
}]),
}
}

View file

@ -0,0 +1,82 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
_config+:: {
eks: {
minimumAvailableIPs: 10,
minimumAvailableIPsTime: '10m'
}
},
prometheus+: {
serviceMonitorCoreDNS+: {
spec+: {
endpoints: [
{
bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token",
interval: "15s",
targetPort: 9153
}
]
},
},
AwsEksCniMetricService:
service.new('aws-node', { 'k8s-app' : 'aws-node' } , servicePort.newNamed('cni-metrics-port', 61678, 61678)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'aws-node' }) +
service.mixin.spec.withClusterIp('None'),
serviceMonitorAwsEksCNI:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'awsekscni',
namespace: $._config.namespace,
labels: {
'k8s-app': 'eks-cni',
},
},
spec: {
jobLabel: 'k8s-app',
selector: {
matchLabels: {
'k8s-app': 'aws-node',
},
},
namespaceSelector: {
matchNames: [
'kube-system',
],
},
endpoints: [
{
port: 'cni-metrics-port',
interval: '30s',
path: '/metrics',
},
],
},
},
},
prometheusRules+: {
groups+: [
{
name: 'kube-prometheus-eks.rules',
rules: [
{
expr: 'sum by(instance) (awscni_total_ip_addresses) - sum by(instance) (awscni_assigned_ip_addresses) < %s' % $._config.eks.minimumAvailableIPs,
labels: {
severity: 'critical',
},
annotations: {
message: 'Instance {{ $labels.instance }} has less than 10 IPs available.'
},
'for': $._config.eks.minimumAvailableIPsTime,
alert: 'EksAvailableIPs'
},
],
},
],
},
}

View file

@ -0,0 +1,46 @@
{
prometheus+:: {
serviceMonitorKubelet+:
{
spec+: {
endpoints: [
{
port: 'http-metrics',
scheme: 'http',
interval: '30s',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path'
},
],
},
{
port: 'http-metrics',
scheme: 'http',
path: '/metrics/cadvisor',
interval: '30s',
honorLabels: true,
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path'
},
],
metricRelabelings: [
// Drop a bunch of metrics which are disabled but still sent, see
// https://github.com/google/cadvisor/issues/1925.
{
sourceLabels: ['__name__'],
regex: 'container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)',
action: 'drop',
},
],
},
],
},
},
},
}

View file

@ -0,0 +1,13 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
prometheus+:: {
kubeDnsPrometheusDiscoveryService:
service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('metrics', 9153, 9153)]) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) +
service.mixin.spec.withClusterIp('None'),
},
}

View file

@ -0,0 +1,23 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
prometheus+:: {
kubeControllerManagerPrometheusDiscoveryService:
service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('https-metrics', 10257, 10257)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
service.mixin.spec.withClusterIp('None'),
kubeSchedulerPrometheusDiscoveryService:
service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('https-metrics', 10259, 10259)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
service.mixin.spec.withClusterIp('None'),
kubeDnsPrometheusDiscoveryService:
service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('metrics', 10055, 10055), servicePort.newNamed('http-metrics-dnsmasq', 10054, 10054)]) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) +
service.mixin.spec.withClusterIp('None'),
},
}

View file

@ -0,0 +1,8 @@
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet');
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View file

@ -0,0 +1,18 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
prometheus+: {
kubeControllerManagerPrometheusDiscoveryService:
service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('https-metrics', 10257, 10257)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
service.mixin.spec.withClusterIp('None'),
kubeSchedulerPrometheusDiscoveryService:
service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('https-metrics', 10259, 10259)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
service.mixin.spec.withClusterIp('None'),
},
}

View file

@ -0,0 +1,18 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
prometheus+: {
kubeControllerManagerPrometheusDiscoveryService:
service.new('kube-controller-manager-prometheus-discovery', { component: 'kube-controller-manager' }, servicePort.newNamed('https-metrics', 10257, 10257)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
service.mixin.spec.withClusterIp('None'),
kubeSchedulerPrometheusDiscoveryService:
service.new('kube-scheduler-prometheus-discovery', { component: 'kube-scheduler' }, servicePort.newNamed('https-metrics', 10259, 10259)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
service.mixin.spec.withClusterIp('None'),
},
}

View file

@ -0,0 +1,40 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
prometheus+: {
kubeControllerManagerPrometheusDiscoveryService:
service.new('kube-controller-manager-prometheus-discovery', { 'component': 'kube-controller-manager' }, servicePort.newNamed('https-metrics', 10257, 10257)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
service.mixin.spec.withClusterIp('None'),
kubeSchedulerPrometheusDiscoveryService:
service.new('kube-scheduler-prometheus-discovery', { 'component': 'kube-scheduler' }, servicePort.newNamed('https-metrics', 10259, 10259)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
service.mixin.spec.withClusterIp('None'),
serviceMonitorKubeScheduler+: {
spec+: {
selector+: {
matchLabels: {
'k8s-app': 'kube-scheduler',
},
},
},
},
serviceMonitorKubeControllerManager+: {
spec+: {
selector+: {
matchLabels: {
'k8s-app': 'kube-controller-manager',
},
},
},
},
},
}

View file

@ -0,0 +1,35 @@
// On managed Kubernetes clusters some of the control plane components are not exposed to customers.
// Disable scrape jobs, service monitors, and alert groups for these components by overwriting 'kube-prometheus.libsonnet' defaults
{
_config+:: {
// This snippet walks the original object (super.jobs, set as temp var j) and creates a replacement jobs object
// excluding any members of the set specified (eg: controller and scheduler).
local j = super.jobs,
jobs: {
[k]: j[k]
for k in std.objectFields(j)
if !std.setMember(k, ['KubeControllerManager', 'KubeScheduler'])
},
// Skip alerting rules too
prometheus+:: {
rules+:: {
local g = super.groups,
groups: [
h
for h in g
if !std.setMember(h.name, ['kubernetes-system-controller-manager', 'kubernetes-system-scheduler'])
],
},
},
},
// Same as above but for ServiceMonitor's
local p = super.prometheus,
prometheus: {
[q]: p[q]
for q in std.objectFields(p)
if !std.setMember(q, ['serviceMonitorKubeControllerManager', 'serviceMonitorKubeScheduler'])
},
}

View file

@ -0,0 +1,21 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
prometheus+: {
service+:
service.mixin.spec.withPorts(servicePort.newNamed('web', 9090, 'web') + servicePort.withNodePort(30900)) +
service.mixin.spec.withType('NodePort'),
},
alertmanager+: {
service+:
service.mixin.spec.withPorts(servicePort.newNamed('web', 9093, 'web') + servicePort.withNodePort(30903)) +
service.mixin.spec.withType('NodePort'),
},
grafana+: {
service+:
service.mixin.spec.withPorts(servicePort.newNamed('http', 3000, 'http') + servicePort.withNodePort(30902)) +
service.mixin.spec.withType('NodePort'),
},
}

View file

@ -0,0 +1,99 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
(import 'etcd-mixin/mixin.libsonnet') + {
_config+:: {
etcd: {
ips: [],
clientCA: null,
clientKey: null,
clientCert: null,
serverName: null,
insecureSkipVerify: null,
},
},
prometheus+:: {
serviceEtcd:
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local etcdServicePort = servicePort.newNamed('metrics', 2379, 2379);
service.new('etcd', null, etcdServicePort) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'etcd' }) +
service.mixin.spec.withClusterIp('None'),
endpointsEtcd:
local endpoints = k.core.v1.endpoints;
local endpointSubset = endpoints.subsetsType;
local endpointPort = endpointSubset.portsType;
local etcdPort = endpointPort.new() +
endpointPort.withName('metrics') +
endpointPort.withPort(2379) +
endpointPort.withProtocol('TCP');
local subset = endpointSubset.new() +
endpointSubset.withAddresses([
{ ip: etcdIP }
for etcdIP in $._config.etcd.ips
]) +
endpointSubset.withPorts(etcdPort);
endpoints.new() +
endpoints.mixin.metadata.withName('etcd') +
endpoints.mixin.metadata.withNamespace('kube-system') +
endpoints.mixin.metadata.withLabels({ 'k8s-app': 'etcd' }) +
endpoints.withSubsets(subset),
serviceMonitorEtcd:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'etcd',
namespace: 'kube-system',
labels: {
'k8s-app': 'etcd',
},
},
spec: {
jobLabel: 'k8s-app',
endpoints: [
{
port: 'metrics',
interval: '30s',
scheme: 'https',
// Prometheus Operator (and Prometheus) allow us to specify a tlsConfig. This is required as most likely your etcd metrics end points is secure.
tlsConfig: {
caFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client-ca.crt',
keyFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client.key',
certFile: '/etc/prometheus/secrets/kube-etcd-client-certs/etcd-client.crt',
[if $._config.etcd.serverName != null then 'serverName']: $._config.etcd.serverName,
[if $._config.etcd.insecureSkipVerify != null then 'insecureSkipVerify']: $._config.etcd.insecureSkipVerify,
},
},
],
selector: {
matchLabels: {
'k8s-app': 'etcd',
},
},
},
},
secretEtcdCerts:
// Prometheus Operator allows us to mount secrets in the pod. By loading the secrets as files, they can be made available inside the Prometheus pod.
local secret = k.core.v1.secret;
secret.new('kube-etcd-client-certs', {
'etcd-client-ca.crt': std.base64($._config.etcd.clientCA),
'etcd-client.key': std.base64($._config.etcd.clientKey),
'etcd-client.crt': std.base64($._config.etcd.clientCert),
}) +
secret.mixin.metadata.withNamespace($._config.namespace),
prometheus+:
{
// Reference info: https://coreos.com/operators/prometheus/docs/latest/api.html#prometheusspec
spec+: {
secrets+: [$.prometheus.secretEtcdCerts.metadata.name],
},
},
},
}

View file

@ -0,0 +1,35 @@
// Strips spec.containers[].limits for certain containers
// https://github.com/prometheus-operator/kube-prometheus/issues/72
{
_config+:: {
resources+:: {
'addon-resizer'+: {
limits: {},
},
'kube-rbac-proxy'+: {
limits: {},
},
'kube-state-metrics'+: {
limits: {},
},
'node-exporter'+: {
limits: {},
},
},
},
prometheusOperator+: {
deployment+: {
spec+: {
template+: {
spec+: {
local addArgs(c) =
if c.name == 'prometheus-operator'
then c + {args+: ['--config-reloader-cpu=0']}
else c,
containers: std.map(addArgs, super.containers),
},
},
},
},
},
}

View file

@ -0,0 +1,76 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
_config+:: {
versions+:: {
thanos: 'v0.14.0',
},
imageRepos+:: {
thanos: 'quay.io/thanos/thanos',
},
thanos+:: {
objectStorageConfig: {
key: 'thanos.yaml', // How the file inside the secret is called
name: 'thanos-objectstorage', // This is the name of your Kubernetes secret with the config
},
},
},
prometheus+:: {
// Add the grpc port to the Prometheus service to be able to query it with the Thanos Querier
service+: {
spec+: {
ports+: [
servicePort.newNamed('grpc', 10901, 10901),
],
},
},
// Create a new service that exposes both sidecar's HTTP metrics port and gRPC StoreAPI
serviceThanosSidecar:
local thanosGrpcSidecarPort = servicePort.newNamed('grpc', 10901, 10901);
local thanosHttpSidecarPort = servicePort.newNamed('http', 10902, 10902);
service.new('prometheus-' + $._config.prometheus.name + '-thanos-sidecar', { app: 'prometheus', prometheus: $._config.prometheus.name }) +
service.mixin.spec.withPorts([thanosGrpcSidecarPort, thanosHttpSidecarPort]) +
service.mixin.spec.withClusterIp('None') +
service.mixin.metadata.withLabels({'prometheus': $._config.prometheus.name, 'app': 'thanos-sidecar'}) +
service.mixin.metadata.withNamespace($._config.namespace),
prometheus+: {
spec+: {
thanos+: {
version: $._config.versions.thanos,
image: $._config.imageRepos.thanos + ':' + $._config.versions.thanos,
objectStorageConfig: $._config.thanos.objectStorageConfig,
},
},
},
serviceMonitorThanosSidecar:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'thanos-sidecar',
namespace: $._config.namespace,
labels: {
'k8s-app': 'prometheus',
},
},
spec: {
// Use the service's app label (thanos-sidecar) as the value for the job label.
jobLabel: 'app',
selector: {
matchLabels: {
prometheus: $._config.prometheus.name,
app: 'thanos-sidecar',
},
},
endpoints: [
{
port: 'http',
interval: '30s',
},
],
},
},
},
}

View file

@ -0,0 +1,189 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
prometheus+: {
serviceWeaveNet:
service.new('weave-net', { 'name': 'weave-net' }, servicePort.newNamed('weave-net-metrics', 6782, 6782)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'weave-net' }) +
service.mixin.spec.withClusterIp('None'),
serviceMonitorWeaveNet: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'weave-net',
labels: {
'k8s-app': 'weave-net',
},
namespace: 'monitoring',
},
spec: {
jobLabel: 'k8s-app',
endpoints: [
{
port: 'weave-net-metrics',
path: '/metrics',
interval: '15s',
},
],
namespaceSelector: {
matchNames: [
'kube-system',
],
},
selector: {
matchLabels: {
'k8s-app': 'weave-net',
},
},
},
},
},
prometheusRules+: {
groups+: [
{
name: 'weave-net',
rules: [
{
alert: 'WeaveNetIPAMSplitBrain',
expr: 'max(weave_ipam_unreachable_percentage) - min(weave_ipam_unreachable_percentage) > 0',
'for': '3m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'Percentage of all IP addresses owned by unreachable peers is not same for every node.',
description: 'actionable: Weave Net network has a split brain problem. Please find the problem and fix it.',
},
},
{
alert: 'WeaveNetIPAMUnreachable',
expr: 'weave_ipam_unreachable_percentage > 25',
'for': '10m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'Percentage of all IP addresses owned by unreachable peers is above threshold.',
description: 'actionable: Please find the problem and fix it.',
},
},
{
alert: 'WeaveNetIPAMPendingAllocates',
expr: 'sum(weave_ipam_pending_allocates) > 0',
'for': '3m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'Number of pending allocates is above the threshold.',
description: 'actionable: Please find the problem and fix it.',
},
},
{
alert: 'WeaveNetIPAMPendingClaims',
expr: 'sum(weave_ipam_pending_claims) > 0',
'for': '3m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'Number of pending claims is above the threshold.',
description: 'actionable: Please find the problem and fix it.',
},
},
{
alert: 'WeaveNetFastDPFlowsLow',
expr: 'sum(weave_flows) < 15000',
'for': '3m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'Number of FastDP flows is below the threshold.',
description: 'actionable: Please find the reason for FastDP flows to go below the threshold and fix it.',
},
},
{
alert: 'WeaveNetFastDPFlowsOff',
expr: 'sum(weave_flows == bool 0) > 0',
'for': '3m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'FastDP flows is zero.',
description: 'actionable: Please find the reason for FastDP flows to be off and fix it.',
},
},
{
alert: 'WeaveNetHighConnectionTerminationRate',
expr: 'rate(weave_connection_terminations_total[5m]) > 0.1',
'for': '5m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'A lot of connections are getting terminated.',
description: 'actionable: Please find the reason for the high connection termination rate and fix it.',
},
},
{
alert: 'WeaveNetConnectionsConnecting',
expr: 'sum(weave_connections{state="connecting"}) > 0',
'for': '3m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'A lot of connections are in connecting state.',
description: 'actionable: Please find the reason for this and fix it.',
},
},
{
alert: 'WeaveNetConnectionsRetying',
expr: 'sum(weave_connections{state="retrying"}) > 0',
'for': '3m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'A lot of connections are in retrying state.',
description: 'actionable: Please find the reason for this and fix it.',
},
},
{
alert: 'WeaveNetConnectionsPending',
expr: 'sum(weave_connections{state="pending"}) > 0',
'for': '3m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'A lot of connections are in pending state.',
description: 'actionable: Please find the reason for this and fix it.',
},
},
{
alert: 'WeaveNetConnectionsFailed',
expr: 'sum(weave_connections{state="failed"}) > 0',
'for': '3m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'A lot of connections are in failed state.',
description: 'actionable: Please find the reason and fix it.',
},
},
],
},
],
},
grafanaDashboards+:: {
'weave-net.json': (import 'grafana-weave-net.json'),
'weave-net-cluster.json': (import 'grafana-weave-net-cluster.json'),
},
}

View file

@ -0,0 +1,196 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local k3 = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local configMapList = k3.core.v1.configMapList;
(import 'grafana/grafana.libsonnet') +
(import 'kube-state-metrics/kube-state-metrics.libsonnet') +
(import 'kube-state-metrics-mixin/mixin.libsonnet') +
(import 'node-exporter/node-exporter.libsonnet') +
(import 'node-mixin/mixin.libsonnet') +
(import 'alertmanager/alertmanager.libsonnet') +
(import 'prometheus-operator/prometheus-operator.libsonnet') +
(import 'prometheus/prometheus.libsonnet') +
(import 'prometheus-adapter/prometheus-adapter.libsonnet') +
(import 'kubernetes-mixin/mixin.libsonnet') +
(import 'prometheus/mixin.libsonnet') +
(import 'alerts/alerts.libsonnet') +
(import 'rules/rules.libsonnet') + {
kubePrometheus+:: {
namespace: k.core.v1.namespace.new($._config.namespace),
},
prometheusOperator+:: {
service+: {
spec+: {
ports: [
{
name: 'https',
port: 8443,
targetPort: 'https',
},
],
},
},
serviceMonitor+: {
spec+: {
endpoints: [
{
port: 'https',
scheme: 'https',
honorLabels: true,
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
tlsConfig: {
insecureSkipVerify: true,
},
},
]
},
},
clusterRole+: {
rules+: [
{
apiGroups: ['authentication.k8s.io'],
resources: ['tokenreviews'],
verbs: ['create'],
},
{
apiGroups: ['authorization.k8s.io'],
resources: ['subjectaccessreviews'],
verbs: ['create'],
},
],
},
} +
((import 'kube-prometheus/kube-rbac-proxy/container.libsonnet') {
config+:: {
kubeRbacProxy: {
local cfg = self,
image: $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy,
name: 'kube-rbac-proxy',
securePortName: 'https',
securePort: 8443,
secureListenAddress: ':%d' % self.securePort,
upstream: 'http://127.0.0.1:8080/',
tlsCipherSuites: $._config.tlsCipherSuites,
},
},
}).deploymentMixin,
grafana+:: {
dashboardDefinitions: configMapList.new(super.dashboardDefinitions),
serviceMonitor: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'grafana',
namespace: $._config.namespace,
},
spec: {
selector: {
matchLabels: {
app: 'grafana',
},
},
endpoints: [
{
port: 'http',
interval: '15s',
},
],
},
},
},
} + {
_config+:: {
namespace: 'default',
versions+:: {
grafana: '7.1.0',
},
tlsCipherSuites: [
'TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256', // required by h2: http://golang.org/cl/30721
'TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256', // required by h2: http://golang.org/cl/30721
// 'TLS_RSA_WITH_RC4_128_SHA', // insecure: https://access.redhat.com/security/cve/cve-2013-2566
// 'TLS_RSA_WITH_3DES_EDE_CBC_SHA', // insecure: https://access.redhat.com/articles/2548661
// 'TLS_RSA_WITH_AES_128_CBC_SHA', // disabled by h2
// 'TLS_RSA_WITH_AES_256_CBC_SHA', // disabled by h2
// 'TLS_RSA_WITH_AES_128_CBC_SHA256', // insecure: https://access.redhat.com/security/cve/cve-2013-0169
// 'TLS_RSA_WITH_AES_128_GCM_SHA256', // disabled by h2
// 'TLS_RSA_WITH_AES_256_GCM_SHA384', // disabled by h2
// 'TLS_ECDHE_ECDSA_WITH_RC4_128_SHA', // insecure: https://access.redhat.com/security/cve/cve-2013-2566
// 'TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA', // disabled by h2
// 'TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA', // disabled by h2
// 'TLS_ECDHE_RSA_WITH_RC4_128_SHA', // insecure: https://access.redhat.com/security/cve/cve-2013-2566
// 'TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA', // insecure: https://access.redhat.com/articles/2548661
// 'TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA', // disabled by h2
// 'TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA', // disabled by h2
// 'TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256', // insecure: https://access.redhat.com/security/cve/cve-2013-0169
// 'TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256', // insecure: https://access.redhat.com/security/cve/cve-2013-0169
// disabled by h2 means: https://github.com/golang/net/blob/e514e69ffb8bc3c76a71ae40de0118d794855992/http2/ciphers.go
'TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384',
'TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384',
'TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305',
'TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305',
],
cadvisorSelector: 'job="kubelet", metrics_path="/metrics/cadvisor"',
kubeletSelector: 'job="kubelet", metrics_path="/metrics"',
kubeStateMetricsSelector: 'job="kube-state-metrics"',
nodeExporterSelector: 'job="node-exporter"',
fsSpaceFillingUpCriticalThreshold: 15,
notKubeDnsSelector: 'job!="kube-dns"',
kubeSchedulerSelector: 'job="kube-scheduler"',
kubeControllerManagerSelector: 'job="kube-controller-manager"',
kubeApiserverSelector: 'job="apiserver"',
coreDNSSelector: 'job="kube-dns"',
podLabel: 'pod',
alertmanagerSelector: 'job="alertmanager-' + $._config.alertmanager.name + '",namespace="' + $._config.namespace + '"',
prometheusSelector: 'job="prometheus-' + $._config.prometheus.name + '",namespace="' + $._config.namespace + '"',
prometheusName: '{{$labels.namespace}}/{{$labels.pod}}',
prometheusOperatorSelector: 'job="prometheus-operator",namespace="' + $._config.namespace + '"',
jobs: {
Kubelet: $._config.kubeletSelector,
KubeScheduler: $._config.kubeSchedulerSelector,
KubeControllerManager: $._config.kubeControllerManagerSelector,
KubeAPI: $._config.kubeApiserverSelector,
KubeStateMetrics: $._config.kubeStateMetricsSelector,
NodeExporter: $._config.nodeExporterSelector,
Alertmanager: $._config.alertmanagerSelector,
Prometheus: $._config.prometheusSelector,
PrometheusOperator: $._config.prometheusOperatorSelector,
CoreDNS: $._config.coreDNSSelector,
},
resources+:: {
'addon-resizer': {
requests: { cpu: '10m', memory: '30Mi' },
limits: { cpu: '50m', memory: '30Mi' },
},
'kube-rbac-proxy': {
requests: { cpu: '10m', memory: '20Mi' },
limits: { cpu: '20m', memory: '40Mi' },
},
'kube-state-metrics': {
requests: { cpu: '100m', memory: '150Mi' },
limits: { cpu: '100m', memory: '150Mi' },
},
'node-exporter': {
requests: { cpu: '102m', memory: '180Mi' },
limits: { cpu: '250m', memory: '180Mi' },
},
},
prometheus+:: {
rules: $.prometheusRules + $.prometheusAlerts,
},
grafana+:: {
dashboards: $.grafanaDashboards,
},
},
}

View file

@ -0,0 +1,91 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local deployment = k.apps.v1.deployment;
local container = deployment.mixin.spec.template.spec.containersType;
local containerPort = container.portsType;
{
local krp = self,
config+:: {
kubeRbacProxy: {
image: error 'must provide image',
name: error 'must provide name',
securePortName: error 'must provide securePortName',
securePort: error 'must provide securePort',
secureListenAddress: error 'must provide secureListenAddress',
upstream: error 'must provide upstream',
tlsCipherSuites: error 'must provide tlsCipherSuites',
},
},
specMixin:: {
local sm = self,
config+:: {
kubeRbacProxy: {
image: error 'must provide image',
name: error 'must provide name',
securePortName: error 'must provide securePortName',
securePort: error 'must provide securePort',
secureListenAddress: error 'must provide secureListenAddress',
upstream: error 'must provide upstream',
tlsCipherSuites: error 'must provide tlsCipherSuites',
},
},
spec+: {
template+: {
spec+: {
containers+: [
container.new(krp.config.kubeRbacProxy.name, krp.config.kubeRbacProxy.image) +
container.mixin.securityContext.withRunAsUser(65534) +
container.withArgs([
'--logtostderr',
'--secure-listen-address=' + krp.config.kubeRbacProxy.secureListenAddress,
'--tls-cipher-suites=' + std.join(',', krp.config.kubeRbacProxy.tlsCipherSuites),
'--upstream=' + krp.config.kubeRbacProxy.upstream,
]) +
container.withPorts(containerPort.newNamed(krp.config.kubeRbacProxy.securePort, krp.config.kubeRbacProxy.securePortName)),
],
},
},
},
},
deploymentMixin:: {
local dm = self,
config+:: {
kubeRbacProxy: {
image: error 'must provide image',
name: error 'must provide name',
securePortName: error 'must provide securePortName',
securePort: error 'must provide securePort',
secureListenAddress: error 'must provide secureListenAddress',
upstream: error 'must provide upstream',
tlsCipherSuites: error 'must provide tlsCipherSuites',
},
},
deployment+: krp.specMixin {
config+:: {
kubeRbacProxy+: dm.config.kubeRbacProxy,
},
},
},
statefulSetMixin:: {
local sm = self,
config+:: {
kubeRbacProxy: {
image: error 'must provide image',
name: error 'must provide name',
securePortName: error 'must provide securePortName',
securePort: error 'must provide securePort',
secureListenAddress: error 'must provide secureListenAddress',
upstream: error 'must provide upstream',
tlsCipherSuites: error 'must provide tlsCipherSuites',
},
},
statefulSet+: krp.specMixin {
config+:: {
kubeRbacProxy+: sm.config.kubeRbacProxy,
},
},
},
}

View file

@ -0,0 +1,129 @@
{
_config+:: {
versions+:: {
kubeStateMetrics: '1.9.7',
},
imageRepos+:: {
kubeStateMetrics: 'quay.io/coreos/kube-state-metrics',
},
kubeStateMetrics+:: {
scrapeInterval: '30s',
scrapeTimeout: '30s',
},
},
kubeStateMetrics+:: (import 'kube-state-metrics/kube-state-metrics.libsonnet') +
{
local ksm = self,
name:: 'kube-state-metrics',
namespace:: $._config.namespace,
version:: $._config.versions.kubeStateMetrics,
image:: $._config.imageRepos.kubeStateMetrics + ':v' + $._config.versions.kubeStateMetrics,
service+: {
spec+: {
ports: [
{
name: 'https-main',
port: 8443,
targetPort: 'https-main',
},
{
name: 'https-self',
port: 9443,
targetPort: 'https-self',
},
],
},
},
deployment+: {
spec+: {
template+: {
spec+: {
containers: std.map(function(c) c {
ports:: null,
livenessProbe:: null,
readinessProbe:: null,
args: ['--host=127.0.0.1', '--port=8081', '--telemetry-host=127.0.0.1', '--telemetry-port=8082'],
}, super.containers),
},
},
},
},
serviceMonitor:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'kube-state-metrics',
namespace: $._config.namespace,
labels: {
'app.kubernetes.io/name': 'kube-state-metrics',
'app.kubernetes.io/version': ksm.version,
},
},
spec: {
jobLabel: 'app.kubernetes.io/name',
selector: {
matchLabels: {
'app.kubernetes.io/name': 'kube-state-metrics',
},
},
endpoints: [
{
port: 'https-main',
scheme: 'https',
interval: $._config.kubeStateMetrics.scrapeInterval,
scrapeTimeout: $._config.kubeStateMetrics.scrapeTimeout,
honorLabels: true,
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
regex: '(pod|service|endpoint|namespace)',
action: 'labeldrop',
},
],
tlsConfig: {
insecureSkipVerify: true,
},
},
{
port: 'https-self',
scheme: 'https',
interval: $._config.kubeStateMetrics.scrapeInterval,
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
tlsConfig: {
insecureSkipVerify: true,
},
},
],
},
},
} +
((import 'kube-prometheus/kube-rbac-proxy/container.libsonnet') {
config+:: {
kubeRbacProxy: {
local cfg = self,
image: $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy,
name: 'kube-rbac-proxy-main',
securePortName: 'https-main',
securePort: 8443,
secureListenAddress: ':%d' % self.securePort,
upstream: 'http://127.0.0.1:8081/',
tlsCipherSuites: $._config.tlsCipherSuites,
},
},
}).deploymentMixin +
((import 'kube-prometheus/kube-rbac-proxy/container.libsonnet') {
config+:: {
kubeRbacProxy: {
local cfg = self,
image: $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy,
name: 'kube-rbac-proxy-self',
securePortName: 'https-self',
securePort: 9443,
secureListenAddress: ':%d' % self.securePort,
upstream: 'http://127.0.0.1:8082/',
tlsCipherSuites: $._config.tlsCipherSuites,
},
},
}).deploymentMixin,
}

View file

@ -0,0 +1,21 @@
// imageName extracts the image name from a fully qualified image string. eg.
// quay.io/coreos/addon-resizer -> addon-resizer
// grafana/grafana -> grafana
local imageName(image) =
local parts = std.split(image, '/');
local len = std.length(parts);
if len == 3 then
# registry.com/org/image
parts[2]
else if len == 2 then
# org/image
parts[1]
else if len == 1 then
# image, ie. busybox
parts[0]
else
error 'unknown image format: ' + image;
{
imageName:: imageName,
}

View file

@ -0,0 +1 @@
(import 'image.libsonnet')

View file

@ -0,0 +1,205 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
_config+:: {
namespace: 'default',
versions+:: {
nodeExporter: 'v0.18.1',
kubeRbacProxy: 'v0.4.1',
},
imageRepos+:: {
nodeExporter: 'quay.io/prometheus/node-exporter',
kubeRbacProxy: 'quay.io/coreos/kube-rbac-proxy',
},
nodeExporter+:: {
listenAddress: '127.0.0.1',
port: 9100,
labels: {
'app.kubernetes.io/name': 'node-exporter',
'app.kubernetes.io/version': $._config.versions.nodeExporter,
},
selectorLabels: {
[labelName]: $._config.nodeExporter.labels[labelName]
for labelName in std.objectFields($._config.nodeExporter.labels)
if !std.setMember(labelName, ['app.kubernetes.io/version'])
},
},
},
nodeExporter+:: {
clusterRoleBinding:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withName('node-exporter') +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName('node-exporter') +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'node-exporter', namespace: $._config.namespace }]),
clusterRole:
local clusterRole = k.rbac.v1.clusterRole;
local policyRule = clusterRole.rulesType;
local authenticationRole = policyRule.new() +
policyRule.withApiGroups(['authentication.k8s.io']) +
policyRule.withResources([
'tokenreviews',
]) +
policyRule.withVerbs(['create']);
local authorizationRole = policyRule.new() +
policyRule.withApiGroups(['authorization.k8s.io']) +
policyRule.withResources([
'subjectaccessreviews',
]) +
policyRule.withVerbs(['create']);
local rules = [authenticationRole, authorizationRole];
clusterRole.new() +
clusterRole.mixin.metadata.withName('node-exporter') +
clusterRole.withRules(rules),
daemonset:
local daemonset = k.apps.v1.daemonSet;
local container = daemonset.mixin.spec.template.spec.containersType;
local volume = daemonset.mixin.spec.template.spec.volumesType;
local containerPort = container.portsType;
local containerVolumeMount = container.volumeMountsType;
local podSelector = daemonset.mixin.spec.template.spec.selectorType;
local toleration = daemonset.mixin.spec.template.spec.tolerationsType;
local containerEnv = container.envType;
local podLabels = $._config.nodeExporter.labels;
local selectorLabels = $._config.nodeExporter.selectorLabels;
local existsToleration = toleration.new() +
toleration.withOperator('Exists');
local procVolumeName = 'proc';
local procVolume = volume.fromHostPath(procVolumeName, '/proc');
local procVolumeMount = containerVolumeMount.new(procVolumeName, '/host/proc');
local sysVolumeName = 'sys';
local sysVolume = volume.fromHostPath(sysVolumeName, '/sys');
local sysVolumeMount = containerVolumeMount.new(sysVolumeName, '/host/sys');
local rootVolumeName = 'root';
local rootVolume = volume.fromHostPath(rootVolumeName, '/');
local rootVolumeMount = containerVolumeMount.new(rootVolumeName, '/host/root').
withMountPropagation('HostToContainer').
withReadOnly(true);
local nodeExporter =
container.new('node-exporter', $._config.imageRepos.nodeExporter + ':' + $._config.versions.nodeExporter) +
container.withArgs([
'--web.listen-address=' + std.join(':', [$._config.nodeExporter.listenAddress, std.toString($._config.nodeExporter.port)]),
'--path.procfs=/host/proc',
'--path.sysfs=/host/sys',
'--path.rootfs=/host/root',
'--no-collector.wifi',
'--no-collector.hwmon',
'--collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)',
]) +
container.withVolumeMounts([procVolumeMount, sysVolumeMount, rootVolumeMount]) +
container.mixin.resources.withRequests($._config.resources['node-exporter'].requests) +
container.mixin.resources.withLimits($._config.resources['node-exporter'].limits);
local ip = containerEnv.fromFieldPath('IP', 'status.podIP');
local proxy =
container.new('kube-rbac-proxy', $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy) +
container.withArgs([
'--logtostderr',
'--secure-listen-address=[$(IP)]:' + $._config.nodeExporter.port,
'--tls-cipher-suites=' + std.join(',', $._config.tlsCipherSuites),
'--upstream=http://127.0.0.1:' + $._config.nodeExporter.port + '/',
]) +
// Keep `hostPort` here, rather than in the node-exporter container
// because Kubernetes mandates that if you define a `hostPort` then
// `containerPort` must match. In our case, we are splitting the
// host port and container port between the two containers.
// We'll keep the port specification here so that the named port
// used by the service is tied to the proxy container. We *could*
// forgo declaring the host port, however it is important to declare
// it so that the scheduler can decide if the pod is schedulable.
container.withPorts(containerPort.new($._config.nodeExporter.port) + containerPort.withHostPort($._config.nodeExporter.port) + containerPort.withName('https')) +
container.mixin.resources.withRequests($._config.resources['kube-rbac-proxy'].requests) +
container.mixin.resources.withLimits($._config.resources['kube-rbac-proxy'].limits) +
container.withEnv([ip]);
local c = [nodeExporter, proxy];
daemonset.new() +
daemonset.mixin.metadata.withName('node-exporter') +
daemonset.mixin.metadata.withNamespace($._config.namespace) +
daemonset.mixin.metadata.withLabels(podLabels) +
daemonset.mixin.spec.selector.withMatchLabels(selectorLabels) +
daemonset.mixin.spec.template.metadata.withLabels(podLabels) +
daemonset.mixin.spec.template.spec.withTolerations([existsToleration]) +
daemonset.mixin.spec.template.spec.withNodeSelector({ 'kubernetes.io/os': 'linux' }) +
daemonset.mixin.spec.template.spec.withContainers(c) +
daemonset.mixin.spec.template.spec.withVolumes([procVolume, sysVolume, rootVolume]) +
daemonset.mixin.spec.template.spec.securityContext.withRunAsNonRoot(true) +
daemonset.mixin.spec.template.spec.securityContext.withRunAsUser(65534) +
daemonset.mixin.spec.template.spec.withServiceAccountName('node-exporter') +
daemonset.mixin.spec.template.spec.withHostPid(true) +
daemonset.mixin.spec.template.spec.withHostNetwork(true),
serviceAccount:
local serviceAccount = k.core.v1.serviceAccount;
serviceAccount.new('node-exporter') +
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
serviceMonitor:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'node-exporter',
namespace: $._config.namespace,
labels: $._config.nodeExporter.labels,
},
spec: {
jobLabel: 'app.kubernetes.io/name',
selector: {
matchLabels: $._config.nodeExporter.selectorLabels,
},
endpoints: [
{
port: 'https',
scheme: 'https',
interval: '15s',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
action: 'replace',
regex: '(.*)',
replacement: '$1',
sourceLabels: ['__meta_kubernetes_pod_node_name'],
targetLabel: 'instance',
},
],
tlsConfig: {
insecureSkipVerify: true,
},
},
],
},
},
service:
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local nodeExporterPort = servicePort.newNamed('https', $._config.nodeExporter.port, 'https');
service.new('node-exporter', $._config.nodeExporter.selectorLabels, nodeExporterPort) +
service.mixin.metadata.withNamespace($._config.namespace) +
service.mixin.metadata.withLabels($._config.nodeExporter.labels) +
service.mixin.spec.withClusterIp('None'),
},
}

View file

@ -0,0 +1,261 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
_config+:: {
namespace: 'default',
versions+:: {
prometheusAdapter: 'v0.7.0',
},
imageRepos+:: {
prometheusAdapter: 'directxman12/k8s-prometheus-adapter',
},
prometheusAdapter+:: {
name: 'prometheus-adapter',
labels: { name: $._config.prometheusAdapter.name },
prometheusURL: 'http://prometheus-' + $._config.prometheus.name + '.' + $._config.namespace + '.svc.cluster.local:9090/',
config: {
resourceRules: {
cpu: {
containerQuery: 'sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}[5m])) by (<<.GroupBy>>)',
nodeQuery: 'sum(1 - irate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)',
resources: {
overrides: {
node: {
resource: 'node'
},
namespace: {
resource: 'namespace'
},
pod: {
resource: 'pod'
},
},
},
containerLabel: 'container'
},
memory: {
containerQuery: 'sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>)',
nodeQuery: 'sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>)',
resources: {
overrides: {
instance: {
resource: 'node'
},
namespace: {
resource: 'namespace'
},
pod: {
resource: 'pod'
},
},
},
containerLabel: 'container'
},
window: '5m',
},
}
},
},
prometheusAdapter+:: {
apiService:
{
apiVersion: 'apiregistration.k8s.io/v1',
kind: 'APIService',
metadata: {
name: 'v1beta1.metrics.k8s.io',
},
spec: {
service: {
name: $.prometheusAdapter.service.metadata.name,
namespace: $._config.namespace,
},
group: 'metrics.k8s.io',
version: 'v1beta1',
insecureSkipTLSVerify: true,
groupPriorityMinimum: 100,
versionPriority: 100,
},
},
configMap:
local configmap = k.core.v1.configMap;
configmap.new('adapter-config', { 'config.yaml': std.manifestYamlDoc($._config.prometheusAdapter.config) }) +
configmap.mixin.metadata.withNamespace($._config.namespace),
serviceMonitor:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: $._config.prometheusAdapter.name,
namespace: $._config.namespace,
labels: $._config.prometheusAdapter.labels,
},
spec: {
selector: {
matchLabels: $._config.prometheusAdapter.labels,
},
endpoints: [
{
port: 'https',
interval: '30s',
scheme: 'https',
tlsConfig: {
insecureSkipVerify: true,
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
},
],
},
},
service:
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
service.new(
$._config.prometheusAdapter.name,
$._config.prometheusAdapter.labels,
servicePort.newNamed('https', 443, 6443),
) +
service.mixin.metadata.withNamespace($._config.namespace) +
service.mixin.metadata.withLabels($._config.prometheusAdapter.labels),
deployment:
local deployment = k.apps.v1.deployment;
local volume = deployment.mixin.spec.template.spec.volumesType;
local container = deployment.mixin.spec.template.spec.containersType;
local containerVolumeMount = container.volumeMountsType;
local c =
container.new($._config.prometheusAdapter.name, $._config.imageRepos.prometheusAdapter + ':' + $._config.versions.prometheusAdapter) +
container.withArgs([
'--cert-dir=/var/run/serving-cert',
'--config=/etc/adapter/config.yaml',
'--logtostderr=true',
'--metrics-relist-interval=1m',
'--prometheus-url=' + $._config.prometheusAdapter.prometheusURL,
'--secure-port=6443',
]) +
container.withPorts([{ containerPort: 6443 }]) +
container.withVolumeMounts([
containerVolumeMount.new('tmpfs', '/tmp'),
containerVolumeMount.new('volume-serving-cert', '/var/run/serving-cert'),
containerVolumeMount.new('config', '/etc/adapter'),
],);
deployment.new($._config.prometheusAdapter.name, 1, c, $._config.prometheusAdapter.labels) +
deployment.mixin.metadata.withNamespace($._config.namespace) +
deployment.mixin.spec.selector.withMatchLabels($._config.prometheusAdapter.labels) +
deployment.mixin.spec.template.spec.withServiceAccountName($.prometheusAdapter.serviceAccount.metadata.name) +
deployment.mixin.spec.template.spec.withNodeSelector({ 'kubernetes.io/os': 'linux' }) +
deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(1) +
deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(0) +
deployment.mixin.spec.template.spec.withVolumes([
volume.fromEmptyDir(name='tmpfs'),
volume.fromEmptyDir(name='volume-serving-cert'),
{ name: 'config', configMap: { name: 'adapter-config' } },
]),
serviceAccount:
local serviceAccount = k.core.v1.serviceAccount;
serviceAccount.new($._config.prometheusAdapter.name) +
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
clusterRole:
local clusterRole = k.rbac.v1.clusterRole;
local policyRule = clusterRole.rulesType;
local rules =
policyRule.new() +
policyRule.withApiGroups(['']) +
policyRule.withResources(['nodes', 'namespaces', 'pods', 'services']) +
policyRule.withVerbs(['get', 'list', 'watch']);
clusterRole.new() +
clusterRole.mixin.metadata.withName($._config.prometheusAdapter.name) +
clusterRole.withRules(rules),
clusterRoleBinding:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withName($._config.prometheusAdapter.name) +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName($.prometheusAdapter.clusterRole.metadata.name) +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{
kind: 'ServiceAccount',
name: $.prometheusAdapter.serviceAccount.metadata.name,
namespace: $._config.namespace,
}]),
clusterRoleBindingDelegator:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withName('resource-metrics:system:auth-delegator') +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName('system:auth-delegator') +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{
kind: 'ServiceAccount',
name: $.prometheusAdapter.serviceAccount.metadata.name,
namespace: $._config.namespace,
}]),
clusterRoleServerResources:
local clusterRole = k.rbac.v1.clusterRole;
local policyRule = clusterRole.rulesType;
local rules =
policyRule.new() +
policyRule.withApiGroups(['metrics.k8s.io']) +
policyRule.withResources(['*']) +
policyRule.withVerbs(['*']);
clusterRole.new() +
clusterRole.mixin.metadata.withName('resource-metrics-server-resources') +
clusterRole.withRules(rules),
clusterRoleAggregatedMetricsReader:
local clusterRole = k.rbac.v1.clusterRole;
local policyRule = clusterRole.rulesType;
local rules =
policyRule.new() +
policyRule.withApiGroups(['metrics.k8s.io']) +
policyRule.withResources(['pods', 'nodes']) +
policyRule.withVerbs(['get','list','watch']);
clusterRole.new() +
clusterRole.mixin.metadata.withName('system:aggregated-metrics-reader') +
clusterRole.mixin.metadata.withLabels({
"rbac.authorization.k8s.io/aggregate-to-admin": "true",
"rbac.authorization.k8s.io/aggregate-to-edit": "true",
"rbac.authorization.k8s.io/aggregate-to-view": "true",
}) +
clusterRole.withRules(rules),
roleBindingAuthReader:
local roleBinding = k.rbac.v1.roleBinding;
roleBinding.new() +
roleBinding.mixin.metadata.withName('resource-metrics-auth-reader') +
roleBinding.mixin.metadata.withNamespace('kube-system') +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withName('extension-apiserver-authentication-reader') +
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
roleBinding.withSubjects([{
kind: 'ServiceAccount',
name: $.prometheusAdapter.serviceAccount.metadata.name,
namespace: $._config.namespace,
}]),
},
}

View file

@ -0,0 +1,493 @@
local k3 = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
_config+:: {
namespace: 'default',
versions+:: {
prometheus: 'v2.20.0',
},
imageRepos+:: {
prometheus: 'quay.io/prometheus/prometheus',
},
alertmanager+:: {
name: 'main',
},
prometheus+:: {
name: 'k8s',
replicas: 2,
rules: {},
namespaces: ['default', 'kube-system', $._config.namespace],
},
},
prometheus+:: {
local p = self,
name:: $._config.prometheus.name,
namespace:: $._config.namespace,
roleBindingNamespaces:: $._config.prometheus.namespaces,
replicas:: $._config.prometheus.replicas,
prometheusRules:: $._config.prometheus.rules,
alertmanagerName:: $.alertmanager.service.metadata.name,
serviceAccount:
local serviceAccount = k.core.v1.serviceAccount;
serviceAccount.new('prometheus-' + p.name) +
serviceAccount.mixin.metadata.withNamespace(p.namespace),
service:
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local prometheusPort = servicePort.newNamed('web', 9090, 'web');
service.new('prometheus-' + p.name, { app: 'prometheus', prometheus: p.name }, prometheusPort) +
service.mixin.spec.withSessionAffinity('ClientIP') +
service.mixin.metadata.withNamespace(p.namespace) +
service.mixin.metadata.withLabels({ prometheus: p.name }),
rules:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'PrometheusRule',
metadata: {
labels: {
prometheus: p.name,
role: 'alert-rules',
},
name: 'prometheus-' + p.name + '-rules',
namespace: p.namespace,
},
spec: {
groups: p.prometheusRules.groups,
},
},
roleBindingSpecificNamespaces:
local roleBinding = k.rbac.v1.roleBinding;
local newSpecificRoleBinding(namespace) =
roleBinding.new() +
roleBinding.mixin.metadata.withName('prometheus-' + p.name) +
roleBinding.mixin.metadata.withNamespace(namespace) +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withName('prometheus-' + p.name) +
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + p.name, namespace: p.namespace }]);
local roleBindingList = k3.rbac.v1.roleBindingList;
roleBindingList.new([newSpecificRoleBinding(x) for x in p.roleBindingNamespaces]),
clusterRole:
local clusterRole = k.rbac.v1.clusterRole;
local policyRule = clusterRole.rulesType;
local nodeMetricsRule = policyRule.new() +
policyRule.withApiGroups(['']) +
policyRule.withResources(['nodes/metrics']) +
policyRule.withVerbs(['get']);
local metricsRule = policyRule.new() +
policyRule.withNonResourceUrls('/metrics') +
policyRule.withVerbs(['get']);
local rules = [nodeMetricsRule, metricsRule];
clusterRole.new() +
clusterRole.mixin.metadata.withName('prometheus-' + p.name) +
clusterRole.withRules(rules),
roleConfig:
local role = k.rbac.v1.role;
local policyRule = role.rulesType;
local configmapRule = policyRule.new() +
policyRule.withApiGroups(['']) +
policyRule.withResources([
'configmaps',
]) +
policyRule.withVerbs(['get']);
role.new() +
role.mixin.metadata.withName('prometheus-' + p.name + '-config') +
role.mixin.metadata.withNamespace(p.namespace) +
role.withRules(configmapRule),
roleBindingConfig:
local roleBinding = k.rbac.v1.roleBinding;
roleBinding.new() +
roleBinding.mixin.metadata.withName('prometheus-' + p.name + '-config') +
roleBinding.mixin.metadata.withNamespace(p.namespace) +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withName('prometheus-' + p.name + '-config') +
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + p.name, namespace: p.namespace }]),
clusterRoleBinding:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withName('prometheus-' + p.name) +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName('prometheus-' + p.name) +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + p.name, namespace: p.namespace }]),
roleSpecificNamespaces:
local role = k.rbac.v1.role;
local policyRule = role.rulesType;
local coreRule = policyRule.new() +
policyRule.withApiGroups(['']) +
policyRule.withResources([
'services',
'endpoints',
'pods',
]) +
policyRule.withVerbs(['get', 'list', 'watch']);
local newSpecificRole(namespace) =
role.new() +
role.mixin.metadata.withName('prometheus-' + p.name) +
role.mixin.metadata.withNamespace(namespace) +
role.withRules(coreRule);
local roleList = k3.rbac.v1.roleList;
roleList.new([newSpecificRole(x) for x in p.roleBindingNamespaces]),
prometheus:
local statefulSet = k.apps.v1.statefulSet;
local container = statefulSet.mixin.spec.template.spec.containersType;
local resourceRequirements = container.mixin.resourcesType;
local selector = statefulSet.mixin.spec.selectorType;
local resources =
resourceRequirements.new() +
resourceRequirements.withRequests({ memory: '400Mi' });
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'Prometheus',
metadata: {
name: p.name,
namespace: p.namespace,
labels: {
prometheus: p.name,
},
},
spec: {
replicas: p.replicas,
version: $._config.versions.prometheus,
image: $._config.imageRepos.prometheus + ':' + $._config.versions.prometheus,
serviceAccountName: 'prometheus-' + p.name,
serviceMonitorSelector: {},
podMonitorSelector: {},
serviceMonitorNamespaceSelector: {},
podMonitorNamespaceSelector: {},
nodeSelector: { 'kubernetes.io/os': 'linux' },
ruleSelector: selector.withMatchLabels({
role: 'alert-rules',
prometheus: p.name,
}),
resources: resources,
alerting: {
alertmanagers: [
{
namespace: p.namespace,
name: p.alertmanagerName,
port: 'web',
},
],
},
securityContext: {
runAsUser: 1000,
runAsNonRoot: true,
fsGroup: 2000,
},
},
},
serviceMonitor:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'prometheus',
namespace: p.namespace,
labels: {
'k8s-app': 'prometheus',
},
},
spec: {
selector: {
matchLabels: {
prometheus: p.name,
},
},
endpoints: [
{
port: 'web',
interval: '30s',
},
],
},
},
serviceMonitorKubeScheduler:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'kube-scheduler',
namespace: p.namespace,
labels: {
'k8s-app': 'kube-scheduler',
},
},
spec: {
jobLabel: 'k8s-app',
endpoints: [
{
port: 'https-metrics',
interval: '30s',
scheme: "https",
bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token",
tlsConfig: {
insecureSkipVerify: true
}
},
],
selector: {
matchLabels: {
'k8s-app': 'kube-scheduler',
},
},
namespaceSelector: {
matchNames: [
'kube-system',
],
},
},
},
serviceMonitorKubelet:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'kubelet',
namespace: p.namespace,
labels: {
'k8s-app': 'kubelet',
},
},
spec: {
jobLabel: 'k8s-app',
endpoints: [
{
port: 'https-metrics',
scheme: 'https',
interval: '30s',
honorLabels: true,
tlsConfig: {
insecureSkipVerify: true,
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
metricRelabelings: (import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet'),
relabelings: [
{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path',
},
],
},
{
port: 'https-metrics',
scheme: 'https',
path: '/metrics/cadvisor',
interval: '30s',
honorLabels: true,
tlsConfig: {
insecureSkipVerify: true,
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path',
},
],
metricRelabelings: [
// Drop a bunch of metrics which are disabled but still sent, see
// https://github.com/google/cadvisor/issues/1925.
{
sourceLabels: ['__name__'],
regex: 'container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)',
action: 'drop',
},
],
},
{
port: 'https-metrics',
scheme: 'https',
path: '/metrics/probes',
interval: '30s',
honorLabels: true,
tlsConfig: {
insecureSkipVerify: true,
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path',
},
],
},
],
selector: {
matchLabels: {
'k8s-app': 'kubelet',
},
},
namespaceSelector: {
matchNames: [
'kube-system',
],
},
},
},
serviceMonitorKubeControllerManager:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'kube-controller-manager',
namespace: p.namespace,
labels: {
'k8s-app': 'kube-controller-manager',
},
},
spec: {
jobLabel: 'k8s-app',
endpoints: [
{
port: 'https-metrics',
interval: '30s',
scheme: "https",
bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token",
tlsConfig: {
insecureSkipVerify: true
},
metricRelabelings: (import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet') + [
{
sourceLabels: ['__name__'],
regex: 'etcd_(debugging|disk|request|server).*',
action: 'drop',
},
],
},
],
selector: {
matchLabels: {
'k8s-app': 'kube-controller-manager',
},
},
namespaceSelector: {
matchNames: [
'kube-system',
],
},
},
},
serviceMonitorApiserver:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'kube-apiserver',
namespace: p.namespace,
labels: {
'k8s-app': 'apiserver',
},
},
spec: {
jobLabel: 'component',
selector: {
matchLabels: {
component: 'apiserver',
provider: 'kubernetes',
},
},
namespaceSelector: {
matchNames: [
'default',
],
},
endpoints: [
{
port: 'https',
interval: '30s',
scheme: 'https',
tlsConfig: {
caFile: '/var/run/secrets/kubernetes.io/serviceaccount/ca.crt',
serverName: 'kubernetes',
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
metricRelabelings: (import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet') + [
{
sourceLabels: ['__name__'],
regex: 'etcd_(debugging|disk|server).*',
action: 'drop',
},
{
sourceLabels: ['__name__'],
regex: 'apiserver_admission_controller_admission_latencies_seconds_.*',
action: 'drop',
},
{
sourceLabels: ['__name__'],
regex: 'apiserver_admission_step_admission_latencies_seconds_.*',
action: 'drop',
},
{
sourceLabels: ['__name__', 'le'],
regex: 'apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)',
action: 'drop',
},
],
},
],
},
},
serviceMonitorCoreDNS:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'coredns',
namespace: p.namespace,
labels: {
'k8s-app': 'coredns',
},
},
spec: {
jobLabel: 'k8s-app',
selector: {
matchLabels: {
'k8s-app': 'kube-dns',
},
},
namespaceSelector: {
matchNames: [
'kube-system',
],
},
endpoints: [
{
port: 'metrics',
interval: '15s',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
},
],
},
},
},
}

View file

@ -0,0 +1,19 @@
{
prometheusRules+:: {
groups+: [
{
name: 'kube-prometheus-general.rules',
rules: [
{
expr: 'count without(instance, pod, node) (up == 1)',
record: 'count:up1',
},
{
expr: 'count without(instance, pod, node) (up == 0)',
record: 'count:up0',
},
],
},
],
},
}

View file

@ -0,0 +1,35 @@
{
prometheusRules+:: {
groups+: [
{
name: 'kube-prometheus-node-recording.rules',
rules: [
{
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance)',
record: 'instance:node_cpu:rate:sum',
},
{
expr: 'sum(rate(node_network_receive_bytes_total[3m])) BY (instance)',
record: 'instance:node_network_receive_bytes:rate:sum',
},
{
expr: 'sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)',
record: 'instance:node_network_transmit_bytes:rate:sum',
},
{
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)',
record: 'instance:node_cpu:ratio',
},
{
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m]))',
record: 'cluster:node_cpu:sum_rate5m',
},
{
expr: 'cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))',
record: 'cluster:node_cpu:ratio',
},
],
},
],
},
}

View file

@ -0,0 +1,2 @@
(import 'node-rules.libsonnet') +
(import 'general.libsonnet')

View file

@ -0,0 +1,2 @@
vendor/
jsonnetfile.lock.json

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,14 @@
{
"dependencies": [
{
"name": "ksonnet",
"source": {
"git": {
"remote": "https://github.com/ksonnet/ksonnet-lib",
"subdir": ""
}
},
"version": "master"
}
]
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,206 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
_config+:: {
namespace: 'default',
prometheusOperator+:: {
deploymentSelectorLabels: {
'app.kubernetes.io/name': 'prometheus-operator',
'app.kubernetes.io/component': 'controller',
},
commonLabels:
$._config.prometheusOperator.deploymentSelectorLabels
{ 'app.kubernetes.io/version': $._config.versions.prometheusOperator },
},
versions+:: {
prometheusOperator: 'v0.41.1',
prometheusConfigReloader: self.prometheusOperator,
configmapReloader: 'v0.4.0',
},
imageRepos+:: {
prometheusOperator: 'quay.io/coreos/prometheus-operator',
configmapReloader: 'jimmidyson/configmap-reload',
prometheusConfigReloader: 'quay.io/coreos/prometheus-config-reloader',
},
},
prometheusOperator+:: {
local po = self,
namespace:: $._config.namespace,
commonLabels:: $._config.prometheusOperator.commonLabels,
deploymentSelectorLabels:: $._config.prometheusOperator.deploymentSelectorLabels,
image:: $._config.imageRepos.prometheusOperator,
version:: $._config.versions.prometheusOperator,
configReloaderImage:: $._config.imageRepos.configmapReloader,
configReloaderVersion:: $._config.versions.configmapReloader,
prometheusConfigReloaderImage:: $._config.imageRepos.prometheusConfigReloader,
prometheusConfigReloaderVersion:: $._config.versions.prometheusConfigReloader,
// Prefixing with 0 to ensure these manifests are listed and therefore created first.
'0alertmanagerCustomResourceDefinition': import 'alertmanager-crd.libsonnet',
'0prometheusCustomResourceDefinition': import 'prometheus-crd.libsonnet',
'0servicemonitorCustomResourceDefinition': import 'servicemonitor-crd.libsonnet',
'0podmonitorCustomResourceDefinition': import 'podmonitor-crd.libsonnet',
'0probeCustomResourceDefinition': import 'probe-crd.libsonnet',
'0prometheusruleCustomResourceDefinition': import 'prometheusrule-crd.libsonnet',
'0thanosrulerCustomResourceDefinition': import 'thanosruler-crd.libsonnet',
clusterRoleBinding:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withLabels(po.commonLabels) +
clusterRoleBinding.mixin.metadata.withName('prometheus-operator') +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName('prometheus-operator') +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-operator', namespace: po.namespace }]),
clusterRole:
local clusterRole = k.rbac.v1.clusterRole;
local policyRule = clusterRole.rulesType;
local monitoringRule = policyRule.new() +
policyRule.withApiGroups(['monitoring.coreos.com']) +
policyRule.withResources([
'alertmanagers',
'alertmanagers/finalizers',
'prometheuses',
'prometheuses/finalizers',
'thanosrulers',
'thanosrulers/finalizers',
'servicemonitors',
'podmonitors',
'probes',
'prometheusrules',
]) +
policyRule.withVerbs(['*']);
local appsRule = policyRule.new() +
policyRule.withApiGroups(['apps']) +
policyRule.withResources([
'statefulsets',
]) +
policyRule.withVerbs(['*']);
local coreRule = policyRule.new() +
policyRule.withApiGroups(['']) +
policyRule.withResources([
'configmaps',
'secrets',
]) +
policyRule.withVerbs(['*']);
local podRule = policyRule.new() +
policyRule.withApiGroups(['']) +
policyRule.withResources([
'pods',
]) +
policyRule.withVerbs(['list', 'delete']);
local routingRule = policyRule.new() +
policyRule.withApiGroups(['']) +
policyRule.withResources([
'services',
'services/finalizers',
'endpoints',
]) +
policyRule.withVerbs(['get', 'create', 'update', 'delete']);
local nodeRule = policyRule.new() +
policyRule.withApiGroups(['']) +
policyRule.withResources([
'nodes',
]) +
policyRule.withVerbs(['list', 'watch']);
local namespaceRule = policyRule.new() +
policyRule.withApiGroups(['']) +
policyRule.withResources([
'namespaces',
]) +
policyRule.withVerbs(['get', 'list', 'watch']);
local rules = [monitoringRule, appsRule, coreRule, podRule, routingRule, nodeRule, namespaceRule];
clusterRole.new() +
clusterRole.mixin.metadata.withLabels(po.commonLabels) +
clusterRole.mixin.metadata.withName('prometheus-operator') +
clusterRole.withRules(rules),
deployment:
local deployment = k.apps.v1.deployment;
local container = k.apps.v1.deployment.mixin.spec.template.spec.containersType;
local containerPort = container.portsType;
local targetPort = 8080;
local operatorContainer =
container.new('prometheus-operator', po.image + ':' + po.version) +
container.withPorts(containerPort.newNamed(targetPort, 'http')) +
container.withArgs([
'--kubelet-service=kube-system/kubelet',
// Prometheus Operator is run with a read-only root file system. By
// default glog saves logfiles to /tmp. Make it log to stderr instead.
'--logtostderr=true',
'--config-reloader-image=' + po.configReloaderImage + ':' + po.configReloaderVersion,
'--prometheus-config-reloader=' + po.prometheusConfigReloaderImage + ':' + po.prometheusConfigReloaderVersion,
]) +
container.mixin.securityContext.withAllowPrivilegeEscalation(false) +
container.mixin.resources.withRequests({ cpu: '100m', memory: '100Mi' }) +
container.mixin.resources.withLimits({ cpu: '200m', memory: '200Mi' });
deployment.new('prometheus-operator', 1, operatorContainer, po.commonLabels) +
deployment.mixin.metadata.withNamespace(po.namespace) +
deployment.mixin.metadata.withLabels(po.commonLabels) +
deployment.mixin.spec.selector.withMatchLabels(po.deploymentSelectorLabels) +
deployment.mixin.spec.template.spec.withNodeSelector({ 'beta.kubernetes.io/os': 'linux' }) +
deployment.mixin.spec.template.spec.securityContext.withRunAsNonRoot(true) +
deployment.mixin.spec.template.spec.securityContext.withRunAsUser(65534) +
deployment.mixin.spec.template.spec.withServiceAccountName('prometheus-operator'),
serviceAccount:
local serviceAccount = k.core.v1.serviceAccount;
serviceAccount.new('prometheus-operator') +
serviceAccount.mixin.metadata.withLabels(po.commonLabels) +
serviceAccount.mixin.metadata.withNamespace(po.namespace),
service:
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local poServicePort = servicePort.newNamed('http', 8080, 'http');
service.new('prometheus-operator', po.deployment.spec.selector.matchLabels, [poServicePort]) +
service.mixin.metadata.withLabels(po.commonLabels) +
service.mixin.metadata.withNamespace(po.namespace) +
service.mixin.spec.withClusterIp('None'),
serviceMonitor:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'prometheus-operator',
namespace: po.namespace,
labels: po.commonLabels,
},
spec: {
endpoints: [
{
port: 'http',
honorLabels: true,
},
],
selector: {
matchLabels: po.commonLabels,
},
},
},
},
}

View file

@ -0,0 +1 @@
{"apiVersion":"apiextensions.k8s.io/v1","kind":"CustomResourceDefinition","metadata":{"annotations":{"controller-gen.kubebuilder.io/version":"v0.2.4"},"creationTimestamp":null,"name":"prometheusrules.monitoring.coreos.com"},"spec":{"group":"monitoring.coreos.com","names":{"kind":"PrometheusRule","listKind":"PrometheusRuleList","plural":"prometheusrules","singular":"prometheusrule"},"scope":"Namespaced","versions":[{"name":"v1","schema":{"openAPIV3Schema":{"description":"PrometheusRule defines alerting rules for a Prometheus instance","properties":{"apiVersion":{"description":"APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources","type":"string"},"kind":{"description":"Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds","type":"string"},"metadata":{"type":"object"},"spec":{"description":"Specification of desired alerting rule definitions for Prometheus.","properties":{"groups":{"description":"Content of Prometheus rule file","items":{"description":"RuleGroup is a list of sequentially evaluated recording and alerting rules. Note: PartialResponseStrategy is only used by ThanosRuler and will be ignored by Prometheus instances. Valid values for this field are 'warn' or 'abort'. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response","properties":{"interval":{"type":"string"},"name":{"type":"string"},"partial_response_strategy":{"type":"string"},"rules":{"items":{"description":"Rule describes an alerting or recording rule.","properties":{"alert":{"type":"string"},"annotations":{"additionalProperties":{"type":"string"},"type":"object"},"expr":{"anyOf":[{"type":"integer"},{"type":"string"}],"x-kubernetes-int-or-string":true},"for":{"type":"string"},"labels":{"additionalProperties":{"type":"string"},"type":"object"},"record":{"type":"string"}},"required":["expr"],"type":"object"},"type":"array"}},"required":["name","rules"],"type":"object"},"type":"array"}},"type":"object"}},"required":["spec"],"type":"object"}},"served":true,"storage":true}]},"status":{"acceptedNames":{"kind":"","plural":""},"conditions":[],"storedVersions":[]}}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long