Skip to content

Commit 0726b5d

Browse files
adbergergitperr
authored andcommitted
Add multi-cluster support for Nodes dashboard (prometheus#2945)
Signed-off-by: Adrian Berger <[email protected]>
1 parent a112c58 commit 0726b5d

File tree

1 file changed

+56
-39
lines changed

1 file changed

+56
-39
lines changed

docs/node-mixin/lib/prom-mixin.libsonnet

Lines changed: 56 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,23 @@ local table = grafana70.panel.table;
2727
type: 'datasource',
2828
},
2929

30+
local clusterTemplatePrototype =
31+
template.new(
32+
'cluster',
33+
'$datasource',
34+
'',
35+
hide=if config.showMultiCluster then '' else '2',
36+
refresh='time',
37+
label='Cluster',
38+
),
39+
local clusterTemplate =
40+
if platform == 'Darwin' then
41+
clusterTemplatePrototype
42+
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname="Darwin"}, %(clusterLabel)s)' % config }
43+
else
44+
clusterTemplatePrototype
45+
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin"}, %(clusterLabel)s)' % config },
46+
3047
local instanceTemplatePrototype =
3148
template.new(
3249
'instance',
@@ -38,11 +55,10 @@ local table = grafana70.panel.table;
3855
local instanceTemplate =
3956
if platform == 'Darwin' then
4057
instanceTemplatePrototype
41-
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname="Darwin"}, instance)' % config }
58+
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname="Darwin"}, instance)' % config }
4259
else
4360
instanceTemplatePrototype
44-
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin"}, instance)' % config },
45-
61+
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname!="Darwin"}, instance)' % config },
4662

4763
local idleCPU =
4864
graphPanel.new(
@@ -57,9 +73,9 @@ local table = grafana70.panel.table;
5773
.addTarget(prometheus.target(
5874
|||
5975
(
60-
(1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance"}[$__rate_interval])))
76+
(1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance", %(clusterLabel)s="$cluster"}[$__rate_interval])))
6177
/ ignoring(cpu) group_left
62-
count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"})
78+
count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance", %(clusterLabel)s="$cluster"})
6379
)
6480
||| % config,
6581
legendFormat='{{cpu}}',
@@ -75,10 +91,10 @@ local table = grafana70.panel.table;
7591
min=0,
7692
fill=0,
7793
)
78-
.addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='1m load average'))
79-
.addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='5m load average'))
80-
.addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='15m load average'))
81-
.addTarget(prometheus.target('count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", mode="idle"})' % config, legendFormat='logical cores')),
94+
.addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='1m load average'))
95+
.addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='5m load average'))
96+
.addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='15m load average'))
97+
.addTarget(prometheus.target('count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", mode="idle"})' % config, legendFormat='logical cores')),
8298

8399
local memoryGraphPanelPrototype =
84100
graphPanel.new(
@@ -94,44 +110,44 @@ local table = grafana70.panel.table;
94110
.addTarget(prometheus.target(
95111
|||
96112
(
97-
node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"}
113+
node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
98114
-
99-
node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance"}
115+
node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
100116
-
101-
node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance"}
117+
node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
102118
-
103-
node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance"}
119+
node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
104120
)
105121
||| % config,
106122
legendFormat='memory used'
107123
))
108-
.addTarget(prometheus.target('node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='memory buffers'))
109-
.addTarget(prometheus.target('node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='memory cached'))
110-
.addTarget(prometheus.target('node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='memory free'))
124+
.addTarget(prometheus.target('node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory buffers'))
125+
.addTarget(prometheus.target('node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory cached'))
126+
.addTarget(prometheus.target('node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory free'))
111127
else if platform == 'Darwin' then
112128
// not useful to stack
113129
memoryGraphPanelPrototype { stack: false }
114-
.addTarget(prometheus.target('node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='Physical Memory'))
130+
.addTarget(prometheus.target('node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Physical Memory'))
115131
.addTarget(prometheus.target(
116132
|||
117133
(
118-
node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance"} -
119-
node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance"} +
120-
node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance"} +
121-
node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance"}
134+
node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} -
135+
node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} +
136+
node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} +
137+
node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
122138
)
123139
||| % config, legendFormat='Memory Used'
124140
))
125141
.addTarget(prometheus.target(
126142
|||
127143
(
128-
node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance"} -
129-
node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance"}
144+
node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} -
145+
node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
130146
)
131147
||| % config, legendFormat='App Memory'
132148
))
133-
.addTarget(prometheus.target('node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='Wired Memory'))
134-
.addTarget(prometheus.target('node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='Compressed')),
149+
.addTarget(prometheus.target('node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Wired Memory'))
150+
.addTarget(prometheus.target('node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Compressed')),
135151

136152
// NOTE: avg() is used to circumvent a label change caused by a node_exporter rollout.
137153
local memoryGaugePanelPrototype =
@@ -155,8 +171,8 @@ local table = grafana70.panel.table;
155171
|||
156172
100 -
157173
(
158-
avg(node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance"}) /
159-
avg(node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"})
174+
avg(node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) /
175+
avg(node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"})
160176
* 100
161177
)
162178
||| % config,
@@ -168,12 +184,12 @@ local table = grafana70.panel.table;
168184
|||
169185
(
170186
(
171-
avg(node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance"}) -
172-
avg(node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance"}) +
173-
avg(node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance"}) +
174-
avg(node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance"})
187+
avg(node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) -
188+
avg(node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) +
189+
avg(node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) +
190+
avg(node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"})
175191
) /
176-
avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance"})
192+
avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"})
177193
)
178194
*
179195
100
@@ -190,17 +206,17 @@ local table = grafana70.panel.table;
190206
)
191207
// TODO: Does it make sense to have those three in the same panel?
192208
.addTarget(prometheus.target(
193-
'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % config,
209+
'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config,
194210
legendFormat='{{device}} read',
195211
intervalFactor=1,
196212
))
197213
.addTarget(prometheus.target(
198-
'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % config,
214+
'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config,
199215
legendFormat='{{device}} written',
200216
intervalFactor=1,
201217
))
202218
.addTarget(prometheus.target(
203-
'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % config,
219+
'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config,
204220
legendFormat='{{device}} io time',
205221
intervalFactor=1,
206222
)) +
@@ -232,15 +248,15 @@ local table = grafana70.panel.table;
232248
.addThresholdStep(color='red', value=0.9)
233249
.addTarget(prometheus.target(
234250
|||
235-
max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s, %(fsMountpointSelector)s})
251+
max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s})
236252
||| % config,
237253
legendFormat='',
238254
instant=true,
239255
format='table'
240256
))
241257
.addTarget(prometheus.target(
242258
|||
243-
max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s, %(fsMountpointSelector)s})
259+
max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s})
244260
||| % config,
245261
legendFormat='',
246262
instant=true,
@@ -421,7 +437,7 @@ local table = grafana70.panel.table;
421437
fill=0,
422438
)
423439
.addTarget(prometheus.target(
424-
'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__rate_interval]) * 8' % config,
440+
'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config,
425441
legendFormat='{{device}}',
426442
intervalFactor=1,
427443
)),
@@ -437,7 +453,7 @@ local table = grafana70.panel.table;
437453
fill=0,
438454
)
439455
.addTarget(prometheus.target(
440-
'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__rate_interval]) * 8' % config,
456+
'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config,
441457
legendFormat='{{device}}',
442458
intervalFactor=1,
443459
)),
@@ -473,6 +489,7 @@ local table = grafana70.panel.table;
473489
local templates =
474490
[
475491
prometheusDatasourceTemplate,
492+
clusterTemplate,
476493
instanceTemplate,
477494
],
478495

0 commit comments

Comments
 (0)