Skip to content

Commit 93a7f70

Browse files
authored
Merge pull request #2318 from alexjx/topic/pick-up-new-procfs
fix cpustat when some cpus are offline
2 parents a3bd2e1 + 39b4556 commit 93a7f70

File tree

4 files changed

+109
-97
lines changed

4 files changed

+109
-97
lines changed

collector/cpu_linux.go

Lines changed: 41 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ type cpuCollector struct {
4343
cpuPackageThrottle *prometheus.Desc
4444
cpuIsolated *prometheus.Desc
4545
logger log.Logger
46-
cpuStats []procfs.CPUStat
46+
cpuStats map[int64]procfs.CPUStat
4747
cpuStatsMutex sync.Mutex
4848
isolatedCpus []uint16
4949

@@ -126,6 +126,7 @@ func NewCPUCollector(logger log.Logger) (Collector, error) {
126126
),
127127
logger: logger,
128128
isolatedCpus: isolcpus,
129+
cpuStats: make(map[int64]procfs.CPUStat),
129130
}
130131
err = c.compileIncludeFlags(flagsInclude, bugsInclude)
131132
if err != nil {
@@ -324,7 +325,7 @@ func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error {
324325
c.cpuStatsMutex.Lock()
325326
defer c.cpuStatsMutex.Unlock()
326327
for cpuID, cpuStat := range c.cpuStats {
327-
cpuNum := strconv.Itoa(cpuID)
328+
cpuNum := strconv.Itoa(int(cpuID))
328329
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.User, cpuNum, "user")
329330
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Nice, cpuNum, "nice")
330331
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.System, cpuNum, "system")
@@ -345,82 +346,82 @@ func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error {
345346
}
346347

347348
// updateCPUStats updates the internal cache of CPU stats.
348-
func (c *cpuCollector) updateCPUStats(newStats []procfs.CPUStat) {
349+
func (c *cpuCollector) updateCPUStats(newStats map[int64]procfs.CPUStat) {
349350

350351
// Acquire a lock to update the stats.
351352
c.cpuStatsMutex.Lock()
352353
defer c.cpuStatsMutex.Unlock()
353354

354355
// Reset the cache if the list of CPUs has changed.
355-
if len(c.cpuStats) != len(newStats) {
356-
c.cpuStats = make([]procfs.CPUStat, len(newStats))
357-
}
358-
359356
for i, n := range newStats {
357+
cpuStats := c.cpuStats[i]
358+
360359
// If idle jumps backwards by more than X seconds, assume we had a hotplug event and reset the stats for this CPU.
361-
if (c.cpuStats[i].Idle - n.Idle) >= jumpBackSeconds {
362-
level.Debug(c.logger).Log("msg", jumpBackDebugMessage, "cpu", i, "old_value", c.cpuStats[i].Idle, "new_value", n.Idle)
363-
c.cpuStats[i] = procfs.CPUStat{}
360+
if (cpuStats.Idle - n.Idle) >= jumpBackSeconds {
361+
level.Debug(c.logger).Log("msg", jumpBackDebugMessage, "cpu", i, "old_value", cpuStats.Idle, "new_value", n.Idle)
362+
cpuStats = procfs.CPUStat{}
364363
}
365364

366-
if n.Idle >= c.cpuStats[i].Idle {
367-
c.cpuStats[i].Idle = n.Idle
365+
if n.Idle >= cpuStats.Idle {
366+
cpuStats.Idle = n.Idle
368367
} else {
369-
level.Debug(c.logger).Log("msg", "CPU Idle counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].Idle, "new_value", n.Idle)
368+
level.Debug(c.logger).Log("msg", "CPU Idle counter jumped backwards", "cpu", i, "old_value", cpuStats.Idle, "new_value", n.Idle)
370369
}
371370

372-
if n.User >= c.cpuStats[i].User {
373-
c.cpuStats[i].User = n.User
371+
if n.User >= cpuStats.User {
372+
cpuStats.User = n.User
374373
} else {
375-
level.Debug(c.logger).Log("msg", "CPU User counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].User, "new_value", n.User)
374+
level.Debug(c.logger).Log("msg", "CPU User counter jumped backwards", "cpu", i, "old_value", cpuStats.User, "new_value", n.User)
376375
}
377376

378-
if n.Nice >= c.cpuStats[i].Nice {
379-
c.cpuStats[i].Nice = n.Nice
377+
if n.Nice >= cpuStats.Nice {
378+
cpuStats.Nice = n.Nice
380379
} else {
381-
level.Debug(c.logger).Log("msg", "CPU Nice counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].Nice, "new_value", n.Nice)
380+
level.Debug(c.logger).Log("msg", "CPU Nice counter jumped backwards", "cpu", i, "old_value", cpuStats.Nice, "new_value", n.Nice)
382381
}
383382

384-
if n.System >= c.cpuStats[i].System {
385-
c.cpuStats[i].System = n.System
383+
if n.System >= cpuStats.System {
384+
cpuStats.System = n.System
386385
} else {
387-
level.Debug(c.logger).Log("msg", "CPU System counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].System, "new_value", n.System)
386+
level.Debug(c.logger).Log("msg", "CPU System counter jumped backwards", "cpu", i, "old_value", cpuStats.System, "new_value", n.System)
388387
}
389388

390-
if n.Iowait >= c.cpuStats[i].Iowait {
391-
c.cpuStats[i].Iowait = n.Iowait
389+
if n.Iowait >= cpuStats.Iowait {
390+
cpuStats.Iowait = n.Iowait
392391
} else {
393-
level.Debug(c.logger).Log("msg", "CPU Iowait counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].Iowait, "new_value", n.Iowait)
392+
level.Debug(c.logger).Log("msg", "CPU Iowait counter jumped backwards", "cpu", i, "old_value", cpuStats.Iowait, "new_value", n.Iowait)
394393
}
395394

396-
if n.IRQ >= c.cpuStats[i].IRQ {
397-
c.cpuStats[i].IRQ = n.IRQ
395+
if n.IRQ >= cpuStats.IRQ {
396+
cpuStats.IRQ = n.IRQ
398397
} else {
399-
level.Debug(c.logger).Log("msg", "CPU IRQ counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].IRQ, "new_value", n.IRQ)
398+
level.Debug(c.logger).Log("msg", "CPU IRQ counter jumped backwards", "cpu", i, "old_value", cpuStats.IRQ, "new_value", n.IRQ)
400399
}
401400

402-
if n.SoftIRQ >= c.cpuStats[i].SoftIRQ {
403-
c.cpuStats[i].SoftIRQ = n.SoftIRQ
401+
if n.SoftIRQ >= cpuStats.SoftIRQ {
402+
cpuStats.SoftIRQ = n.SoftIRQ
404403
} else {
405-
level.Debug(c.logger).Log("msg", "CPU SoftIRQ counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].SoftIRQ, "new_value", n.SoftIRQ)
404+
level.Debug(c.logger).Log("msg", "CPU SoftIRQ counter jumped backwards", "cpu", i, "old_value", cpuStats.SoftIRQ, "new_value", n.SoftIRQ)
406405
}
407406

408-
if n.Steal >= c.cpuStats[i].Steal {
409-
c.cpuStats[i].Steal = n.Steal
407+
if n.Steal >= cpuStats.Steal {
408+
cpuStats.Steal = n.Steal
410409
} else {
411-
level.Debug(c.logger).Log("msg", "CPU Steal counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].Steal, "new_value", n.Steal)
410+
level.Debug(c.logger).Log("msg", "CPU Steal counter jumped backwards", "cpu", i, "old_value", cpuStats.Steal, "new_value", n.Steal)
412411
}
413412

414-
if n.Guest >= c.cpuStats[i].Guest {
415-
c.cpuStats[i].Guest = n.Guest
413+
if n.Guest >= cpuStats.Guest {
414+
cpuStats.Guest = n.Guest
416415
} else {
417-
level.Debug(c.logger).Log("msg", "CPU Guest counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].Guest, "new_value", n.Guest)
416+
level.Debug(c.logger).Log("msg", "CPU Guest counter jumped backwards", "cpu", i, "old_value", cpuStats.Guest, "new_value", n.Guest)
418417
}
419418

420-
if n.GuestNice >= c.cpuStats[i].GuestNice {
421-
c.cpuStats[i].GuestNice = n.GuestNice
419+
if n.GuestNice >= cpuStats.GuestNice {
420+
cpuStats.GuestNice = n.GuestNice
422421
} else {
423-
level.Debug(c.logger).Log("msg", "CPU GuestNice counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].GuestNice, "new_value", n.GuestNice)
422+
level.Debug(c.logger).Log("msg", "CPU GuestNice counter jumped backwards", "cpu", i, "old_value", cpuStats.GuestNice, "new_value", n.GuestNice)
424423
}
424+
425+
c.cpuStats[i] = cpuStats
425426
}
426427
}

collector/cpu_linux_test.go

Lines changed: 62 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -24,80 +24,91 @@ import (
2424
"github.com/prometheus/procfs"
2525
)
2626

27-
func makeTestCPUCollector(s []procfs.CPUStat) *cpuCollector {
28-
dup := make([]procfs.CPUStat, len(s))
29-
copy(dup, s)
27+
func copyStats(d, s map[int64]procfs.CPUStat) {
28+
for k := range s {
29+
v := s[k]
30+
d[k] = v
31+
}
32+
}
33+
34+
func makeTestCPUCollector(s map[int64]procfs.CPUStat) *cpuCollector {
35+
dup := make(map[int64]procfs.CPUStat, len(s))
36+
copyStats(dup, s)
3037
return &cpuCollector{
3138
logger: log.NewNopLogger(),
3239
cpuStats: dup,
3340
}
3441
}
3542

3643
func TestCPU(t *testing.T) {
37-
firstCPUStat := []procfs.CPUStat{{
38-
User: 100.0,
39-
Nice: 100.0,
40-
System: 100.0,
41-
Idle: 100.0,
42-
Iowait: 100.0,
43-
IRQ: 100.0,
44-
SoftIRQ: 100.0,
45-
Steal: 100.0,
46-
Guest: 100.0,
47-
GuestNice: 100.0,
48-
}}
44+
firstCPUStat := map[int64]procfs.CPUStat{
45+
0: {
46+
User: 100.0,
47+
Nice: 100.0,
48+
System: 100.0,
49+
Idle: 100.0,
50+
Iowait: 100.0,
51+
IRQ: 100.0,
52+
SoftIRQ: 100.0,
53+
Steal: 100.0,
54+
Guest: 100.0,
55+
GuestNice: 100.0,
56+
}}
4957

5058
c := makeTestCPUCollector(firstCPUStat)
51-
want := []procfs.CPUStat{{
52-
User: 101.0,
53-
Nice: 101.0,
54-
System: 101.0,
55-
Idle: 101.0,
56-
Iowait: 101.0,
57-
IRQ: 101.0,
58-
SoftIRQ: 101.0,
59-
Steal: 101.0,
60-
Guest: 101.0,
61-
GuestNice: 101.0,
62-
}}
59+
want := map[int64]procfs.CPUStat{
60+
0: {
61+
User: 101.0,
62+
Nice: 101.0,
63+
System: 101.0,
64+
Idle: 101.0,
65+
Iowait: 101.0,
66+
IRQ: 101.0,
67+
SoftIRQ: 101.0,
68+
Steal: 101.0,
69+
Guest: 101.0,
70+
GuestNice: 101.0,
71+
}}
6372
c.updateCPUStats(want)
6473
got := c.cpuStats
6574
if !reflect.DeepEqual(want, got) {
6675
t.Fatalf("should have %v CPU Stat: got %v", want, got)
6776
}
6877

6978
c = makeTestCPUCollector(firstCPUStat)
70-
jumpBack := []procfs.CPUStat{{
71-
User: 99.9,
72-
Nice: 99.9,
73-
System: 99.9,
74-
Idle: 99.9,
75-
Iowait: 99.9,
76-
IRQ: 99.9,
77-
SoftIRQ: 99.9,
78-
Steal: 99.9,
79-
Guest: 99.9,
80-
GuestNice: 99.9,
81-
}}
79+
jumpBack := map[int64]procfs.CPUStat{
80+
0: {
81+
User: 99.9,
82+
Nice: 99.9,
83+
System: 99.9,
84+
Idle: 99.9,
85+
Iowait: 99.9,
86+
IRQ: 99.9,
87+
SoftIRQ: 99.9,
88+
Steal: 99.9,
89+
Guest: 99.9,
90+
GuestNice: 99.9,
91+
}}
8292
c.updateCPUStats(jumpBack)
8393
got = c.cpuStats
8494
if reflect.DeepEqual(jumpBack, got) {
8595
t.Fatalf("should have %v CPU Stat: got %v", firstCPUStat, got)
8696
}
8797

8898
c = makeTestCPUCollector(firstCPUStat)
89-
resetIdle := []procfs.CPUStat{{
90-
User: 102.0,
91-
Nice: 102.0,
92-
System: 102.0,
93-
Idle: 1.0,
94-
Iowait: 102.0,
95-
IRQ: 102.0,
96-
SoftIRQ: 102.0,
97-
Steal: 102.0,
98-
Guest: 102.0,
99-
GuestNice: 102.0,
100-
}}
99+
resetIdle := map[int64]procfs.CPUStat{
100+
0: {
101+
User: 102.0,
102+
Nice: 102.0,
103+
System: 102.0,
104+
Idle: 1.0,
105+
Iowait: 102.0,
106+
IRQ: 102.0,
107+
SoftIRQ: 102.0,
108+
Steal: 102.0,
109+
Guest: 102.0,
110+
GuestNice: 102.0,
111+
}}
101112
c.updateCPUStats(resetIdle)
102113
got = c.cpuStats
103114
if !reflect.DeepEqual(resetIdle, got) {

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ require (
2424
github.com/prometheus/client_model v0.3.0
2525
github.com/prometheus/common v0.37.0
2626
github.com/prometheus/exporter-toolkit v0.8.2
27-
github.com/prometheus/procfs v0.8.0
27+
github.com/prometheus/procfs v0.9.0
2828
github.com/safchain/ethtool v0.2.0
2929
github.com/soundcloud/go-runit v0.0.0-20150630195641-06ad41a06c4a
30-
golang.org/x/sys v0.2.0
30+
golang.org/x/sys v0.4.0
3131
gopkg.in/alecthomas/kingpin.v2 v2.2.6
3232
)
3333

go.sum

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -246,8 +246,8 @@ github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsT
246246
github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
247247
github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
248248
github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
249-
github.com/prometheus/procfs v0.8.0 h1:ODq8ZFEaYeCaZOJlZZdJA2AbQR98dSHSM1KW/You5mo=
250-
github.com/prometheus/procfs v0.8.0/go.mod h1:z7EfXMXOkbkqb9IINtpCn86r/to3BnA0uaxHdg830/4=
249+
github.com/prometheus/procfs v0.9.0 h1:wzCHvIvM5SxWqYvwgVL7yJY8Lz3PKn49KQtpgMYJfhI=
250+
github.com/prometheus/procfs v0.9.0/go.mod h1:+pB4zwohETzFnmlpe6yd2lSc+0/46IYZRB/chUwxUZY=
251251
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
252252
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
253253
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
@@ -418,8 +418,8 @@ golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBc
418418
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
419419
golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
420420
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
421-
golang.org/x/sys v0.2.0 h1:ljd4t30dBnAvMZaQCevtY0xLLD0A+bRZXbgLMLU1F/A=
422-
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
421+
golang.org/x/sys v0.4.0 h1:Zr2JFtRQNX3BCZ8YtxRE9hNJYC8J6I1MVbMg6owUp18=
422+
golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
423423
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
424424
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
425425
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

0 commit comments

Comments
 (0)