This repository has been archived on 2023-04-02. You can view files and clone it, but cannot push or open issues or pull requests.
gitops-tbrnt/monitoring/vendor/github.com/etcd-io/etcd/contrib/mixin/mixin.libsonnet

1317 lines
41 KiB
Plaintext

{
_config+:: {
etcd_selector: 'job=~".*etcd.*"',
// etcd_instance_labels are the label names that are uniquely
// identifying an instance and need to be aggreated away for alerts
// that are about an etcd cluster as a whole. For example, if etcd
// instances are deployed on K8s, you will likely want to change
// this to 'instance, pod'.
etcd_instance_labels: 'instance',
// scrape_interval_seconds is the global scrape interval which can be
// used to dynamically adjust rate windows as a function of the interval.
scrape_interval_seconds: 30,
// Dashboard variable refresh option on Grafana (https://grafana.com/docs/grafana/latest/datasources/prometheus/).
// 0 : Never (Will never refresh the Dashboard variables values)
// 1 : On Dashboard Load (Will refresh Dashboards variables when dashboard are loaded)
// 2 : On Time Range Change (Will refresh Dashboards variables when time range will be changed)
dashboard_var_refresh: 2,
},
prometheusAlerts+:: {
groups+: [
{
name: 'etcd',
rules: [
{
alert: 'etcdMembersDown',
expr: |||
max without (endpoint) (
sum without (%(etcd_instance_labels)s) (up{%(etcd_selector)s} == bool 0)
or
count without (To) (
sum without (%(etcd_instance_labels)s) (rate(etcd_network_peer_sent_failures_total{%(etcd_selector)s}[%(network_failure_range)ss])) > 0.01
)
)
> 0
||| % {etcd_instance_labels: $._config.etcd_instance_labels, etcd_selector: $._config.etcd_selector, network_failure_range: $._config.scrape_interval_seconds*4},
'for': '10m',
labels: {
severity: 'critical',
},
annotations: {
description: 'etcd cluster "{{ $labels.job }}": members are down ({{ $value }}).',
summary: 'etcd cluster members are down.',
},
},
{
alert: 'etcdInsufficientMembers',
expr: |||
sum(up{%(etcd_selector)s} == bool 1) without (%(etcd_instance_labels)s) < ((count(up{%(etcd_selector)s}) without (%(etcd_instance_labels)s) + 1) / 2)
||| % $._config,
'for': '3m',
labels: {
severity: 'critical',
},
annotations: {
description: 'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value }}).',
summary: 'etcd cluster has insufficient number of members.',
},
},
{
alert: 'etcdNoLeader',
expr: |||
etcd_server_has_leader{%(etcd_selector)s} == 0
||| % $._config,
'for': '1m',
labels: {
severity: 'critical',
},
annotations: {
description: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }} has no leader.',
summary: 'etcd cluster has no leader.',
},
},
{
alert: 'etcdHighNumberOfLeaderChanges',
expr: |||
increase((max without (%(etcd_instance_labels)s) (etcd_server_leader_changes_seen_total{%(etcd_selector)s}) or 0*absent(etcd_server_leader_changes_seen_total{%(etcd_selector)s}))[15m:1m]) >= 4
||| % $._config,
'for': '5m',
labels: {
severity: 'warning',
},
annotations: {
description: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.',
summary: 'etcd cluster has high number of leader changes.',
},
},
{
alert: 'etcdHighNumberOfFailedGRPCRequests',
expr: |||
100 * sum(rate(grpc_server_handled_total{%(etcd_selector)s, grpc_code!="OK"}[5m])) without (grpc_type, grpc_code)
/
sum(rate(grpc_server_handled_total{%(etcd_selector)s}[5m])) without (grpc_type, grpc_code)
> 1
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
annotations: {
description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.',
summary: 'etcd cluster has high number of failed grpc requests.',
},
},
{
alert: 'etcdHighNumberOfFailedGRPCRequests',
expr: |||
100 * sum(rate(grpc_server_handled_total{%(etcd_selector)s, grpc_code!="OK"}[5m])) without (grpc_type, grpc_code)
/
sum(rate(grpc_server_handled_total{%(etcd_selector)s}[5m])) without (grpc_type, grpc_code)
> 5
||| % $._config,
'for': '5m',
labels: {
severity: 'critical',
},
annotations: {
description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.',
summary: 'etcd cluster has high number of failed grpc requests.',
},
},
{
alert: 'etcdGRPCRequestsSlow',
expr: |||
histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{%(etcd_selector)s, grpc_type="unary"}[5m])) without(grpc_type))
> 0.15
||| % $._config,
'for': '10m',
labels: {
severity: 'critical',
},
annotations: {
description: 'etcd cluster "{{ $labels.job }}": gRPC requests to {{ $labels.grpc_method }} are taking {{ $value }}s on etcd instance {{ $labels.instance }}.',
summary: 'etcd grpc requests are slow',
},
},
{
alert: 'etcdMemberCommunicationSlow',
expr: |||
histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{%(etcd_selector)s}[5m]))
> 0.15
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
annotations: {
description: 'etcd cluster "{{ $labels.job }}": member communication with {{ $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}.',
summary: 'etcd cluster member communication is slow.',
},
},
{
alert: 'etcdHighNumberOfFailedProposals',
expr: |||
rate(etcd_server_proposals_failed_total{%(etcd_selector)s}[15m]) > 5
||| % $._config,
'for': '15m',
labels: {
severity: 'warning',
},
annotations: {
description: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures within the last 30 minutes on etcd instance {{ $labels.instance }}.',
summary: 'etcd cluster has high number of proposal failures.',
},
},
{
alert: 'etcdHighFsyncDurations',
expr: |||
histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{%(etcd_selector)s}[5m]))
> 0.5
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
annotations: {
description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.',
summary: 'etcd cluster 99th percentile fsync durations are too high.',
},
},
{
alert: 'etcdHighFsyncDurations',
expr: |||
histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{%(etcd_selector)s}[5m]))
> 1
||| % $._config,
'for': '10m',
labels: {
severity: 'critical',
},
annotations: {
message: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.',
},
},
{
alert: 'etcdHighCommitDurations',
expr: |||
histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{%(etcd_selector)s}[5m]))
> 0.25
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
annotations: {
description: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations {{ $value }}s on etcd instance {{ $labels.instance }}.',
summary: 'etcd cluster 99th percentile commit durations are too high.',
},
},
{
alert: 'etcdBackendQuotaLowSpace',
expr: |||
(etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100 > 95
||| % $._config,
'for': '10m',
labels: {
severity: 'critical',
},
annotations: {
message: 'etcd cluster "{{ $labels.job }}": database size exceeds the defined quota on etcd instance {{ $labels.instance }}, please defrag or increase the quota as the writes to etcd will be disabled when it is full.',
},
},
{
alert: 'etcdExcessiveDatabaseGrowth',
expr: |||
increase(((etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100)[240m:1m]) > 50
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
annotations: {
message: 'etcd cluster "{{ $labels.job }}": Observed surge in etcd writes leading to 50% increase in database size over the past four hours on etcd instance {{ $labels.instance }}, please check as it might be disruptive.',
},
},
],
},
],
},
grafanaDashboards+:: {
'etcd.json': {
uid: std.md5('etcd.json'),
title: 'etcd',
description: 'etcd sample Grafana dashboard with Prometheus',
tags: [ 'etcd-mixin' ],
style: 'dark',
timezone: 'browser',
editable: true,
hideControls: false,
sharedCrosshair: false,
rows: [
{
collapse: false,
editable: true,
height: '250px',
panels: [
{
cacheTimeout: null,
colorBackground: false,
colorValue: false,
colors: [
'rgba(245, 54, 54, 0.9)',
'rgba(237, 129, 40, 0.89)',
'rgba(50, 172, 45, 0.97)',
],
datasource: '$datasource',
editable: true,
'error': false,
format: 'none',
gauge: {
maxValue: 100,
minValue: 0,
show: false,
thresholdLabels: false,
thresholdMarkers: true,
},
id: 28,
interval: null,
isNew: true,
links: [],
mappingType: 1,
mappingTypes: [
{
name: 'value to text',
value: 1,
},
{
name: 'range to text',
value: 2,
},
],
maxDataPoints: 100,
nullPointMode: 'connected',
nullText: null,
postfix: '',
postfixFontSize: '50%',
prefix: '',
prefixFontSize: '50%',
rangeMaps: [{
from: 'null',
text: 'N/A',
to: 'null',
}],
span: 3,
sparkline: {
fillColor: 'rgba(31, 118, 189, 0.18)',
full: false,
lineColor: 'rgb(31, 120, 193)',
show: false,
},
targets: [{
expr: 'sum(etcd_server_has_leader{job="$cluster"})',
intervalFactor: 2,
legendFormat: '',
metric: 'etcd_server_has_leader',
refId: 'A',
step: 20,
}],
thresholds: '',
title: 'Up',
type: 'singlestat',
valueFontSize: '200%',
valueMaps: [{
op: '=',
text: 'N/A',
value: 'null',
}],
valueName: 'avg',
},
{
aliasColors: {},
bars: false,
datasource: '$datasource',
editable: true,
'error': false,
fill: 0,
id: 23,
isNew: true,
legend: {
avg: false,
current: false,
max: false,
min: false,
show: false,
total: false,
values: false,
},
lines: true,
linewidth: 2,
links: [],
nullPointMode: 'connected',
percentage: false,
pointradius: 5,
points: false,
renderer: 'flot',
seriesOverrides: [],
span: 5,
stack: false,
steppedLine: false,
targets: [
{
expr: 'sum(rate(grpc_server_started_total{job="$cluster",grpc_type="unary"}[5m]))',
format: 'time_series',
intervalFactor: 2,
legendFormat: 'RPC Rate',
metric: 'grpc_server_started_total',
refId: 'A',
step: 2,
},
{
expr: 'sum(rate(grpc_server_handled_total{job="$cluster",grpc_type="unary",grpc_code!="OK"}[5m]))',
format: 'time_series',
intervalFactor: 2,
legendFormat: 'RPC Failed Rate',
metric: 'grpc_server_handled_total',
refId: 'B',
step: 2,
},
],
thresholds: [],
timeFrom: null,
timeShift: null,
title: 'RPC Rate',
tooltip: {
msResolution: false,
shared: true,
sort: 0,
value_type: 'individual',
},
type: 'graph',
xaxis: {
mode: 'time',
name: null,
show: true,
values: [],
},
yaxes: [
{
format: 'ops',
label: null,
logBase: 1,
max: null,
min: null,
show: true,
},
{
format: 'short',
label: null,
logBase: 1,
max: null,
min: null,
show: true,
},
],
},
{
aliasColors: {},
bars: false,
datasource: '$datasource',
editable: true,
'error': false,
fill: 0,
id: 41,
isNew: true,
legend: {
avg: false,
current: false,
max: false,
min: false,
show: false,
total: false,
values: false,
},
lines: true,
linewidth: 2,
links: [],
nullPointMode: 'connected',
percentage: false,
pointradius: 5,
points: false,
renderer: 'flot',
seriesOverrides: [],
span: 4,
stack: true,
steppedLine: false,
targets: [
{
expr: 'sum(grpc_server_started_total{job="$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{job="$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"})',
intervalFactor: 2,
legendFormat: 'Watch Streams',
metric: 'grpc_server_handled_total',
refId: 'A',
step: 4,
},
{
expr: 'sum(grpc_server_started_total{job="$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{job="$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"})',
intervalFactor: 2,
legendFormat: 'Lease Streams',
metric: 'grpc_server_handled_total',
refId: 'B',
step: 4,
},
],
thresholds: [],
timeFrom: null,
timeShift: null,
title: 'Active Streams',
tooltip: {
msResolution: false,
shared: true,
sort: 0,
value_type: 'individual',
},
type: 'graph',
xaxis: {
mode: 'time',
name: null,
show: true,
values: [],
},
yaxes: [
{
format: 'short',
label: '',
logBase: 1,
max: null,
min: null,
show: true,
},
{
format: 'short',
label: null,
logBase: 1,
max: null,
min: null,
show: true,
},
],
},
],
showTitle: false,
title: 'Row',
},
{
collapse: false,
editable: true,
height: '250px',
panels: [
{
aliasColors: {},
bars: false,
datasource: '$datasource',
decimals: null,
editable: true,
'error': false,
fill: 0,
grid: {},
id: 1,
legend: {
avg: false,
current: false,
max: false,
min: false,
show: false,
total: false,
values: false,
},
lines: true,
linewidth: 2,
links: [],
nullPointMode: 'connected',
percentage: false,
pointradius: 5,
points: false,
renderer: 'flot',
seriesOverrides: [],
span: 4,
stack: false,
steppedLine: false,
targets: [{
expr: 'etcd_mvcc_db_total_size_in_bytes{job="$cluster"}',
hide: false,
interval: '',
intervalFactor: 2,
legendFormat: '{{instance}} DB Size',
metric: '',
refId: 'A',
step: 4,
}],
thresholds: [],
timeFrom: null,
timeShift: null,
title: 'DB Size',
tooltip: {
msResolution: false,
shared: true,
sort: 0,
value_type: 'cumulative',
},
type: 'graph',
xaxis: {
mode: 'time',
name: null,
show: true,
values: [],
},
yaxes: [
{
format: 'bytes',
logBase: 1,
max: null,
min: null,
show: true,
},
{
format: 'short',
logBase: 1,
max: null,
min: null,
show: false,
},
],
},
{
aliasColors: {},
bars: false,
datasource: '$datasource',
editable: true,
'error': false,
fill: 0,
grid: {},
id: 3,
legend: {
avg: false,
current: false,
max: false,
min: false,
show: false,
total: false,
values: false,
},
lines: true,
linewidth: 2,
links: [],
nullPointMode: 'connected',
percentage: false,
pointradius: 1,
points: false,
renderer: 'flot',
seriesOverrides: [],
span: 4,
stack: false,
steppedLine: true,
targets: [
{
expr: 'histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket{job="$cluster"}[5m])) by (instance, le))',
hide: false,
intervalFactor: 2,
legendFormat: '{{instance}} WAL fsync',
metric: 'etcd_disk_wal_fsync_duration_seconds_bucket',
refId: 'A',
step: 4,
},
{
expr: 'histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket{job="$cluster"}[5m])) by (instance, le))',
intervalFactor: 2,
legendFormat: '{{instance}} DB fsync',
metric: 'etcd_disk_backend_commit_duration_seconds_bucket',
refId: 'B',
step: 4,
},
],
thresholds: [],
timeFrom: null,
timeShift: null,
title: 'Disk Sync Duration',
tooltip: {
msResolution: false,
shared: true,
sort: 0,
value_type: 'cumulative',
},
type: 'graph',
xaxis: {
mode: 'time',
name: null,
show: true,
values: [],
},
yaxes: [
{
format: 's',
logBase: 1,
max: null,
min: null,
show: true,
},
{
format: 'short',
logBase: 1,
max: null,
min: null,
show: false,
},
],
},
{
aliasColors: {},
bars: false,
datasource: '$datasource',
editable: true,
'error': false,
fill: 0,
id: 29,
isNew: true,
legend: {
avg: false,
current: false,
max: false,
min: false,
show: false,
total: false,
values: false,
},
lines: true,
linewidth: 2,
links: [],
nullPointMode: 'connected',
percentage: false,
pointradius: 5,
points: false,
renderer: 'flot',
seriesOverrides: [],
span: 4,
stack: false,
steppedLine: false,
targets: [{
expr: 'process_resident_memory_bytes{job="$cluster"}',
intervalFactor: 2,
legendFormat: '{{instance}} Resident Memory',
metric: 'process_resident_memory_bytes',
refId: 'A',
step: 4,
}],
thresholds: [],
timeFrom: null,
timeShift: null,
title: 'Memory',
tooltip: {
msResolution: false,
shared: true,
sort: 0,
value_type: 'individual',
},
type: 'graph',
xaxis: {
mode: 'time',
name: null,
show: true,
values: [],
},
yaxes: [
{
format: 'bytes',
label: null,
logBase: 1,
max: null,
min: null,
show: true,
},
{
format: 'short',
label: null,
logBase: 1,
max: null,
min: null,
show: true,
},
],
},
],
title: 'New row',
},
{
collapse: false,
editable: true,
height: '250px',
panels: [
{
aliasColors: {},
bars: false,
datasource: '$datasource',
editable: true,
'error': false,
fill: 5,
id: 22,
isNew: true,
legend: {
avg: false,
current: false,
max: false,
min: false,
show: false,
total: false,
values: false,
},
lines: true,
linewidth: 2,
links: [],
nullPointMode: 'connected',
percentage: false,
pointradius: 5,
points: false,
renderer: 'flot',
seriesOverrides: [],
span: 3,
stack: true,
steppedLine: false,
targets: [{
expr: 'rate(etcd_network_client_grpc_received_bytes_total{job="$cluster"}[5m])',
intervalFactor: 2,
legendFormat: '{{instance}} Client Traffic In',
metric: 'etcd_network_client_grpc_received_bytes_total',
refId: 'A',
step: 4,
}],
thresholds: [],
timeFrom: null,
timeShift: null,
title: 'Client Traffic In',
tooltip: {
msResolution: false,
shared: true,
sort: 0,
value_type: 'individual',
},
type: 'graph',
xaxis: {
mode: 'time',
name: null,
show: true,
values: [],
},
yaxes: [
{
format: 'Bps',
label: null,
logBase: 1,
max: null,
min: null,
show: true,
},
{
format: 'short',
label: null,
logBase: 1,
max: null,
min: null,
show: true,
},
],
},
{
aliasColors: {},
bars: false,
datasource: '$datasource',
editable: true,
'error': false,
fill: 5,
id: 21,
isNew: true,
legend: {
avg: false,
current: false,
max: false,
min: false,
show: false,
total: false,
values: false,
},
lines: true,
linewidth: 2,
links: [],
nullPointMode: 'connected',
percentage: false,
pointradius: 5,
points: false,
renderer: 'flot',
seriesOverrides: [],
span: 3,
stack: true,
steppedLine: false,
targets: [{
expr: 'rate(etcd_network_client_grpc_sent_bytes_total{job="$cluster"}[5m])',
intervalFactor: 2,
legendFormat: '{{instance}} Client Traffic Out',
metric: 'etcd_network_client_grpc_sent_bytes_total',
refId: 'A',
step: 4,
}],
thresholds: [],
timeFrom: null,
timeShift: null,
title: 'Client Traffic Out',
tooltip: {
msResolution: false,
shared: true,
sort: 0,
value_type: 'individual',
},
type: 'graph',
xaxis: {
mode: 'time',
name: null,
show: true,
values: [],
},
yaxes: [
{
format: 'Bps',
label: null,
logBase: 1,
max: null,
min: null,
show: true,
},
{
format: 'short',
label: null,
logBase: 1,
max: null,
min: null,
show: true,
},
],
},
{
aliasColors: {},
bars: false,
datasource: '$datasource',
editable: true,
'error': false,
fill: 0,
id: 20,
isNew: true,
legend: {
avg: false,
current: false,
max: false,
min: false,
show: false,
total: false,
values: false,
},
lines: true,
linewidth: 2,
links: [],
nullPointMode: 'connected',
percentage: false,
pointradius: 5,
points: false,
renderer: 'flot',
seriesOverrides: [],
span: 3,
stack: false,
steppedLine: false,
targets: [{
expr: 'sum(rate(etcd_network_peer_received_bytes_total{job="$cluster"}[5m])) by (instance)',
intervalFactor: 2,
legendFormat: '{{instance}} Peer Traffic In',
metric: 'etcd_network_peer_received_bytes_total',
refId: 'A',
step: 4,
}],
thresholds: [],
timeFrom: null,
timeShift: null,
title: 'Peer Traffic In',
tooltip: {
msResolution: false,
shared: true,
sort: 0,
value_type: 'individual',
},
type: 'graph',
xaxis: {
mode: 'time',
name: null,
show: true,
values: [],
},
yaxes: [
{
format: 'Bps',
label: null,
logBase: 1,
max: null,
min: null,
show: true,
},
{
format: 'short',
label: null,
logBase: 1,
max: null,
min: null,
show: true,
},
],
},
{
aliasColors: {},
bars: false,
datasource: '$datasource',
decimals: null,
editable: true,
'error': false,
fill: 0,
grid: {},
id: 16,
legend: {
avg: false,
current: false,
max: false,
min: false,
show: false,
total: false,
values: false,
},
lines: true,
linewidth: 2,
links: [],
nullPointMode: 'connected',
percentage: false,
pointradius: 5,
points: false,
renderer: 'flot',
seriesOverrides: [],
span: 3,
stack: false,
steppedLine: false,
targets: [{
expr: 'sum(rate(etcd_network_peer_sent_bytes_total{job="$cluster"}[5m])) by (instance)',
hide: false,
interval: '',
intervalFactor: 2,
legendFormat: '{{instance}} Peer Traffic Out',
metric: 'etcd_network_peer_sent_bytes_total',
refId: 'A',
step: 4,
}],
thresholds: [],
timeFrom: null,
timeShift: null,
title: 'Peer Traffic Out',
tooltip: {
msResolution: false,
shared: true,
sort: 0,
value_type: 'cumulative',
},
type: 'graph',
xaxis: {
mode: 'time',
name: null,
show: true,
values: [],
},
yaxes: [
{
format: 'Bps',
logBase: 1,
max: null,
min: null,
show: true,
},
{
format: 'short',
logBase: 1,
max: null,
min: null,
show: true,
},
],
},
],
title: 'New row',
},
{
collapse: false,
editable: true,
height: '250px',
panels: [
{
aliasColors: {},
bars: false,
datasource: '$datasource',
editable: true,
'error': false,
fill: 0,
id: 40,
isNew: true,
legend: {
avg: false,
current: false,
max: false,
min: false,
show: false,
total: false,
values: false,
},
lines: true,
linewidth: 2,
links: [],
nullPointMode: 'connected',
percentage: false,
pointradius: 5,
points: false,
renderer: 'flot',
seriesOverrides: [],
span: 6,
stack: false,
steppedLine: false,
targets: [
{
expr: 'sum(rate(etcd_server_proposals_failed_total{job="$cluster"}[5m]))',
intervalFactor: 2,
legendFormat: 'Proposal Failure Rate',
metric: 'etcd_server_proposals_failed_total',
refId: 'A',
step: 2,
},
{
expr: 'sum(etcd_server_proposals_pending{job="$cluster"})',
intervalFactor: 2,
legendFormat: 'Proposal Pending Total',
metric: 'etcd_server_proposals_pending',
refId: 'B',
step: 2,
},
{
expr: 'sum(rate(etcd_server_proposals_committed_total{job="$cluster"}[5m]))',
intervalFactor: 2,
legendFormat: 'Proposal Commit Rate',
metric: 'etcd_server_proposals_committed_total',
refId: 'C',
step: 2,
},
{
expr: 'sum(rate(etcd_server_proposals_applied_total{job="$cluster"}[5m]))',
intervalFactor: 2,
legendFormat: 'Proposal Apply Rate',
refId: 'D',
step: 2,
},
],
thresholds: [],
timeFrom: null,
timeShift: null,
title: 'Raft Proposals',
tooltip: {
msResolution: false,
shared: true,
sort: 0,
value_type: 'individual',
},
type: 'graph',
xaxis: {
mode: 'time',
name: null,
show: true,
values: [],
},
yaxes: [
{
format: 'short',
label: '',
logBase: 1,
max: null,
min: null,
show: true,
},
{
format: 'short',
label: null,
logBase: 1,
max: null,
min: null,
show: true,
},
],
},
{
aliasColors: {},
bars: false,
datasource: '$datasource',
decimals: 0,
editable: true,
'error': false,
fill: 0,
id: 19,
isNew: true,
legend: {
alignAsTable: false,
avg: false,
current: false,
max: false,
min: false,
rightSide: false,
show: false,
total: false,
values: false,
},
lines: true,
linewidth: 2,
links: [],
nullPointMode: 'connected',
percentage: false,
pointradius: 5,
points: false,
renderer: 'flot',
seriesOverrides: [],
span: 6,
stack: false,
steppedLine: false,
targets: [{
expr: 'changes(etcd_server_leader_changes_seen_total{job="$cluster"}[1d])',
intervalFactor: 2,
legendFormat: '{{instance}} Total Leader Elections Per Day',
metric: 'etcd_server_leader_changes_seen_total',
refId: 'A',
step: 2,
}],
thresholds: [],
timeFrom: null,
timeShift: null,
title: 'Total Leader Elections Per Day',
tooltip: {
msResolution: false,
shared: true,
sort: 0,
value_type: 'individual',
},
type: 'graph',
xaxis: {
mode: 'time',
name: null,
show: true,
values: [],
},
yaxes: [
{
format: 'short',
label: null,
logBase: 1,
max: null,
min: null,
show: true,
},
{
format: 'short',
label: null,
logBase: 1,
max: null,
min: null,
show: true,
},
],
},
],
title: 'New row',
},
],
time: {
from: 'now-15m',
to: 'now',
},
timepicker: {
now: true,
refresh_intervals: [
'5s',
'10s',
'30s',
'1m',
'5m',
'15m',
'30m',
'1h',
'2h',
'1d',
],
time_options: [
'5m',
'15m',
'1h',
'6h',
'12h',
'24h',
'2d',
'7d',
'30d',
],
},
templating: {
list: [
{
current: {
text: 'Prometheus',
value: 'Prometheus',
},
hide: 0,
label: null,
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: '',
type: 'datasource',
},
{
allValue: null,
current: {
text: 'prod',
value: 'prod',
},
datasource: '$datasource',
hide: 0,
includeAll: false,
label: 'cluster',
multi: false,
name: 'cluster',
options: [],
query: 'label_values(etcd_server_has_leader, job)',
refresh: $._config.dashboard_var_refresh,
regex: '',
sort: 2,
tagValuesQuery: '',
tags: [],
tagsQuery: '',
type: 'query',
useTags: false,
},
],
},
annotations: {
list: [],
},
refresh: '10s',
schemaVersion: 13,
version: 215,
links: [],
gnetId: null,
},
},
}