Skip to content

Commit b44f157

Browse files
committed
update mixin for darwin
Signed-off-by: Robbie Lankford <[email protected]>
1 parent 3136901 commit b44f157

File tree

3 files changed

+299
-0
lines changed

3 files changed

+299
-0
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
(import 'config.libsonnet') +
2+
(import 'alerts/alerts.libsonnet') +
3+
(import 'dashboards/darwin-dashboards.libsonnet') +
4+
(import 'rules/rules.libsonnet')
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
(import 'darwin-node.libsonnet') +
2+
(import 'use.libsonnet')
Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,293 @@
1+
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet';
2+
local dashboard = grafana.dashboard;
3+
local row = grafana.row;
4+
local prometheus = grafana.prometheus;
5+
local template = grafana.template;
6+
local graphPanel = grafana.graphPanel;
7+
local promgrafonnet = import 'github.com/kubernetes-monitoring/kubernetes-mixin/lib/promgrafonnet/promgrafonnet.libsonnet';
8+
local gauge = promgrafonnet.gauge;
9+
10+
{
11+
grafanaDashboards+:: {
12+
'nodes.json':
13+
local idleCPU =
14+
graphPanel.new(
15+
'CPU Usage',
16+
datasource='$datasource',
17+
span=6,
18+
format='percentunit',
19+
max=1,
20+
min=0,
21+
stack=true,
22+
)
23+
.addTarget(prometheus.target(
24+
|||
25+
(
26+
(1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance"}[$__rate_interval])))
27+
/ ignoring(cpu) group_left
28+
count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"})
29+
)
30+
||| % $._config,
31+
legendFormat='{{cpu}}',
32+
intervalFactor=5,
33+
));
34+
35+
local systemLoad =
36+
graphPanel.new(
37+
'Load Average',
38+
datasource='$datasource',
39+
span=6,
40+
format='short',
41+
min=0,
42+
fill=0,
43+
)
44+
.addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='1m load average'))
45+
.addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='5m load average'))
46+
.addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='15m load average'))
47+
.addTarget(prometheus.target('count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", mode="idle"})' % $._config, legendFormat='logical cores'));
48+
49+
local memoryGraph =
50+
graphPanel.new(
51+
'Memory Usage',
52+
datasource='$datasource',
53+
span=9,
54+
format='bytes',
55+
stack=false,
56+
min=0,
57+
)
58+
.addTarget(prometheus.target(
59+
|||
60+
(
61+
node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance"}
62+
)
63+
||| % $._config, legendFormat='Physical Memory'
64+
))
65+
.addTarget(prometheus.target(
66+
|||
67+
(
68+
node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance"}
69+
-
70+
(
71+
node_memory_free_bytes{%(nodeExporterSelector)s, instance="$instance"}
72+
+
73+
node_memory_inactive_bytes{%(nodeExporterSelector)s, instance="$instance"}
74+
)
75+
+
76+
node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance"}
77+
+
78+
node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance"}
79+
)
80+
||| % $._config, legendFormat='Memory Used'
81+
))
82+
.addTarget(prometheus.target(
83+
|||
84+
(
85+
node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance"}
86+
-
87+
(
88+
node_memory_free_bytes{%(nodeExporterSelector)s, instance="$instance"}
89+
+
90+
node_memory_inactive_bytes{%(nodeExporterSelector)s, instance="$instance"}
91+
)
92+
)
93+
||| % $._config, legendFormat='App Memory'
94+
))
95+
.addTarget(prometheus.target('node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='Wired Memory'))
96+
.addTarget(prometheus.target('node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='Compressed'));
97+
98+
// TODO: It would be nicer to have a gauge that gets a 0-1 range and displays it as a percentage 0%-100%.
99+
// This needs to be added upstream in the promgrafonnet library and then changed here.
100+
// NOTE: avg() is used to circumvent a label change caused by a node_exporter rollout.
101+
local memoryGauge = gauge.new(
102+
'Memory Usage',
103+
|||
104+
(
105+
(
106+
(
107+
avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance"})
108+
-
109+
(
110+
avg(node_memory_free_bytes{%(nodeExporterSelector)s, instance="$instance"})
111+
+
112+
avg(node_memory_inactive_bytes{%(nodeExporterSelector)s, instance="$instance"})
113+
)
114+
)
115+
+
116+
avg(node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance"})
117+
+
118+
avg(node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance"})
119+
)
120+
/
121+
avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance"})
122+
)
123+
*
124+
100
125+
||| % $._config,
126+
).withLowerBeingBetter();
127+
128+
local diskIO =
129+
graphPanel.new(
130+
'Disk I/O',
131+
datasource='$datasource',
132+
span=6,
133+
min=0,
134+
fill=0,
135+
)
136+
// TODO: Does it make sense to have those three in the same panel?
137+
.addTarget(prometheus.target(
138+
'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % $._config,
139+
legendFormat='{{device}} read',
140+
))
141+
.addTarget(prometheus.target(
142+
'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % $._config,
143+
legendFormat='{{device}} written',
144+
))
145+
.addTarget(prometheus.target(
146+
'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % $._config,
147+
legendFormat='{{device}} io time',
148+
)) +
149+
{
150+
seriesOverrides: [
151+
{
152+
alias: '/ read| written/',
153+
yaxis: 1,
154+
},
155+
{
156+
alias: '/ io time/',
157+
yaxis: 2,
158+
},
159+
],
160+
yaxes: [
161+
self.yaxe(format='bytes'),
162+
self.yaxe(format='s'),
163+
],
164+
};
165+
166+
// TODO: Somehow partition this by device while excluding read-only devices.
167+
local diskSpaceUsage =
168+
graphPanel.new(
169+
'Disk Space Usage',
170+
datasource='$datasource',
171+
span=6,
172+
format='bytes',
173+
min=0,
174+
fill=1,
175+
stack=true,
176+
)
177+
.addTarget(prometheus.target(
178+
|||
179+
sum(
180+
max by (device) (
181+
node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s}
182+
-
183+
node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s}
184+
)
185+
)
186+
||| % $._config,
187+
legendFormat='used',
188+
))
189+
.addTarget(prometheus.target(
190+
|||
191+
sum(
192+
max by (device) (
193+
node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s}
194+
)
195+
)
196+
||| % $._config,
197+
legendFormat='available',
198+
)) +
199+
{
200+
seriesOverrides: [
201+
{
202+
alias: 'used',
203+
color: '#E0B400',
204+
},
205+
{
206+
alias: 'available',
207+
color: '#73BF69',
208+
},
209+
],
210+
};
211+
212+
local networkReceived =
213+
graphPanel.new(
214+
'Network Received',
215+
datasource='$datasource',
216+
span=6,
217+
format='bytes',
218+
min=0,
219+
fill=0,
220+
)
221+
.addTarget(prometheus.target(
222+
'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__rate_interval])' % $._config,
223+
legendFormat='{{device}}',
224+
));
225+
226+
local networkTransmitted =
227+
graphPanel.new(
228+
'Network Transmitted',
229+
datasource='$datasource',
230+
span=6,
231+
format='bytes',
232+
min=0,
233+
fill=0,
234+
)
235+
.addTarget(prometheus.target(
236+
'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__rate_interval])' % $._config,
237+
legendFormat='{{device}}',
238+
));
239+
240+
dashboard.new(
241+
'%sNodes' % $._config.dashboardNamePrefix,
242+
time_from='now-1h',
243+
tags=($._config.dashboardTags),
244+
timezone='utc',
245+
refresh='30s',
246+
graphTooltip='shared_crosshair'
247+
)
248+
.addTemplate(
249+
{
250+
current: {
251+
text: 'Prometheus',
252+
value: 'Prometheus',
253+
},
254+
hide: 0,
255+
label: 'Data Source',
256+
name: 'datasource',
257+
options: [],
258+
query: 'prometheus',
259+
refresh: 1,
260+
regex: '',
261+
type: 'datasource',
262+
},
263+
)
264+
.addTemplate(
265+
template.new(
266+
'instance',
267+
'$datasource',
268+
'label_values(node_exporter_build_info{%(nodeExporterSelector)s}, instance)' % $._config,
269+
refresh='time',
270+
)
271+
)
272+
.addRow(
273+
row.new()
274+
.addPanel(idleCPU)
275+
.addPanel(systemLoad)
276+
)
277+
.addRow(
278+
row.new()
279+
.addPanel(memoryGraph)
280+
.addPanel(memoryGauge)
281+
)
282+
.addRow(
283+
row.new()
284+
.addPanel(diskIO)
285+
.addPanel(diskSpaceUsage)
286+
)
287+
.addRow(
288+
row.new()
289+
.addPanel(networkReceived)
290+
.addPanel(networkTransmitted)
291+
),
292+
},
293+
}

0 commit comments

Comments
 (0)