{ prometheusRules+:: { groups+: [ { name: 'windows.node.rules', rules: [ { // This rule gives the number of windows nodes record: 'node:windows_node:sum', expr: ||| count ( windows_system_system_up_time{%(wmiExporterSelector)s} ) ||| % $._config, }, { // This rule gives the number of CPUs per node. record: 'node:windows_node_num_cpu:sum', expr: ||| count by (instance) (sum by (instance, core) ( windows_cpu_time_total{%(wmiExporterSelector)s} )) ||| % $._config, }, { // CPU utilisation is % CPU is not idle. record: ':windows_node_cpu_utilisation:avg1m', expr: ||| 1 - avg(rate(windows_cpu_time_total{%(wmiExporterSelector)s,mode="idle"}[1m])) ||| % $._config, }, { // CPU utilisation is % CPU is not idle. record: 'node:windows_node_cpu_utilisation:avg1m', expr: ||| 1 - avg by (instance) ( rate(windows_cpu_time_total{%(wmiExporterSelector)s,mode="idle"}[1m]) ) ||| % $._config, }, { record: ':windows_node_memory_utilisation:', expr: ||| 1 - sum(windows_memory_available_bytes{%(wmiExporterSelector)s}) / sum(windows_os_visible_memory_bytes{%(wmiExporterSelector)s}) ||| % $._config, }, // Add separate rules for Free & Total, so we can aggregate across clusters // in dashboards. { record: ':windows_node_memory_MemFreeCached_bytes:sum', expr: ||| sum(windows_memory_available_bytes{%(wmiExporterSelector)s} + windows_memory_cache_bytes{%(wmiExporterSelector)s}) ||| % $._config, }, { record: 'node:windows_node_memory_totalCached_bytes:sum', expr: ||| (windows_memory_cache_bytes{%(wmiExporterSelector)s} + windows_memory_modified_page_list_bytes{%(wmiExporterSelector)s} + windows_memory_standby_cache_core_bytes{%(wmiExporterSelector)s} + windows_memory_standby_cache_normal_priority_bytes{%(wmiExporterSelector)s} + windows_memory_standby_cache_reserve_bytes{%(wmiExporterSelector)s}) ||| % $._config, }, { record: ':windows_node_memory_MemTotal_bytes:sum', expr: ||| sum(windows_os_visible_memory_bytes{%(wmiExporterSelector)s}) ||| % $._config, }, { // Available memory per node // SINCE 2018-02-08 record: 'node:windows_node_memory_bytes_available:sum', expr: ||| sum by (instance) ( (windows_memory_available_bytes{%(wmiExporterSelector)s}) ) ||| % $._config, }, { // Total memory per node record: 'node:windows_node_memory_bytes_total:sum', expr: ||| sum by (instance) ( windows_os_visible_memory_bytes{%(wmiExporterSelector)s} ) ||| % $._config, }, { // Memory utilisation per node, normalized by per-node memory record: 'node:windows_node_memory_utilisation:ratio', expr: ||| (node:windows_node_memory_bytes_total:sum - node:windows_node_memory_bytes_available:sum) / scalar(sum(node:windows_node_memory_bytes_total:sum)) |||, }, { record: 'node:windows_node_memory_utilisation:', expr: ||| 1 - (node:windows_node_memory_bytes_available:sum / node:windows_node_memory_bytes_total:sum) ||| % $._config, }, { record: 'node:windows_node_memory_swap_io_pages:irate', expr: ||| irate(windows_memory_swap_page_operations_total{%(wmiExporterSelector)s}[5m]) ||| % $._config, }, { // Disk utilisation (ms spent, by rate() it's bound by 1 second) record: ':windows_node_disk_utilisation:avg_irate', expr: ||| avg(irate(windows_logical_disk_read_seconds_total{%(wmiExporterSelector)s}[1m]) + irate(windows_logical_disk_write_seconds_total{%(wmiExporterSelector)s}[1m]) ) ||| % $._config, }, { // Disk utilisation (ms spent, by rate() it's bound by 1 second) record: 'node:windows_node_disk_utilisation:avg_irate', expr: ||| avg by (instance) ( (irate(windows_logical_disk_read_seconds_total{%(wmiExporterSelector)s}[1m]) + irate(windows_logical_disk_write_seconds_total{%(wmiExporterSelector)s}[1m])) ) ||| % $._config, }, { record: 'node:windows_node_filesystem_usage:', expr: ||| max by (instance,volume)( (windows_logical_disk_size_bytes{%(wmiExporterSelector)s} - windows_logical_disk_free_bytes{%(wmiExporterSelector)s}) / windows_logical_disk_size_bytes{%(wmiExporterSelector)s} ) ||| % $._config, }, { record: 'node:windows_node_filesystem_avail:', expr: ||| max by (instance, volume) (windows_logical_disk_free_bytes{%(wmiExporterSelector)s} / windows_logical_disk_size_bytes{%(wmiExporterSelector)s}) ||| % $._config, }, { record: ':windows_node_net_utilisation:sum_irate', expr: ||| sum(irate(windows_net_bytes_total{%(wmiExporterSelector)s}[1m])) ||| % $._config, }, { record: 'node:windows_node_net_utilisation:sum_irate', expr: ||| sum by (instance) ( (irate(windows_net_bytes_total{%(wmiExporterSelector)s}[1m])) ) ||| % $._config, }, { record: ':windows_node_net_saturation:sum_irate', expr: ||| sum(irate(windows_net_packets_received_discarded{%(wmiExporterSelector)s}[1m])) + sum(irate(windows_net_packets_outbound_discarded{%(wmiExporterSelector)s}[1m])) ||| % $._config, }, { record: 'node:windows_node_net_saturation:sum_irate', expr: ||| sum by (instance) ( (irate(windows_net_packets_received_discarded{%(wmiExporterSelector)s}[1m]) + irate(windows_net_packets_outbound_discarded{%(wmiExporterSelector)s}[1m])) ) ||| % $._config, }, ], }, { name: 'windows.pod.rules', rules: [ { record: 'windows_container_available', expr: ||| windows_container_available{%(wmiExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace) ||| % $._config, }, { record: 'windows_container_total_runtime', expr: ||| windows_container_cpu_usage_seconds_total{%(wmiExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace) ||| % $._config, }, { record: 'windows_container_memory_usage', expr: ||| windows_container_memory_usage_commit_bytes{%(wmiExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace) ||| % $._config, }, { record: 'windows_container_private_working_set_usage', expr: ||| windows_container_memory_usage_private_working_set_bytes{%(wmiExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace) ||| % $._config, }, { record: 'windows_container_network_received_bytes_total', expr: ||| windows_container_network_receive_bytes_total{%(wmiExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace) ||| % $._config, }, { record: 'windows_container_network_transmitted_bytes_total', expr: ||| windows_container_network_transmit_bytes_total{%(wmiExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_memory_request', expr: ||| kube_pod_container_resource_requests_memory_bytes {%(kubeStateMetricsSelector)s} * on(container,pod,namespace) (windows_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_memory_limit', expr: ||| kube_pod_container_resource_limits_memory_bytes {%(kubeStateMetricsSelector)s} * on(container,pod,namespace) (windows_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_cpu_cores_request', expr: ||| kube_pod_container_resource_requests_cpu_cores {%(kubeStateMetricsSelector)s} * on(container,pod,namespace) (windows_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_cpu_cores_limit', expr: ||| kube_pod_container_resource_limits_cpu_cores {%(kubeStateMetricsSelector)s} * on(container,pod,namespace) (windows_container_available) ||| % $._config, }, { record: 'namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate', expr: ||| sum by (namespace, pod, container) ( rate(windows_container_total_runtime{}[5m]) ) ||| % $._config, }, ], }, ], }, }