17
17
package collector
18
18
19
19
import (
20
+ "errors"
20
21
"fmt"
21
22
"os"
22
23
"path/filepath"
@@ -25,17 +26,19 @@ import (
25
26
"strconv"
26
27
"sync"
27
28
29
+ "golang.org/x/exp/maps"
30
+
28
31
"github.com/alecthomas/kingpin/v2"
29
32
"github.com/go-kit/log"
30
33
"github.com/go-kit/log/level"
31
34
"github.com/prometheus/client_golang/prometheus"
32
35
"github.com/prometheus/procfs"
33
36
"github.com/prometheus/procfs/sysfs"
34
- "golang.org/x/exp/maps"
35
37
)
36
38
37
39
type cpuCollector struct {
38
- fs procfs.FS
40
+ procfs procfs.FS
41
+ sysfs sysfs.FS
39
42
cpu * prometheus.Desc
40
43
cpuInfo * prometheus.Desc
41
44
cpuFrequencyHz * prometheus.Desc
@@ -45,6 +48,7 @@ type cpuCollector struct {
45
48
cpuCoreThrottle * prometheus.Desc
46
49
cpuPackageThrottle * prometheus.Desc
47
50
cpuIsolated * prometheus.Desc
51
+ cpuOnline * prometheus.Desc
48
52
logger log.Logger
49
53
cpuStats map [int64 ]procfs.CPUStat
50
54
cpuStatsMutex sync.Mutex
@@ -71,17 +75,17 @@ func init() {
71
75
72
76
// NewCPUCollector returns a new Collector exposing kernel/system statistics.
73
77
func NewCPUCollector (logger log.Logger ) (Collector , error ) {
74
- fs , err := procfs .NewFS (* procPath )
78
+ pfs , err := procfs .NewFS (* procPath )
75
79
if err != nil {
76
80
return nil , fmt .Errorf ("failed to open procfs: %w" , err )
77
81
}
78
82
79
- sysfs , err := sysfs .NewFS (* sysPath )
83
+ sfs , err := sysfs .NewFS (* sysPath )
80
84
if err != nil {
81
85
return nil , fmt .Errorf ("failed to open sysfs: %w" , err )
82
86
}
83
87
84
- isolcpus , err := sysfs .IsolatedCPUs ()
88
+ isolcpus , err := sfs .IsolatedCPUs ()
85
89
if err != nil {
86
90
if ! os .IsNotExist (err ) {
87
91
return nil , fmt .Errorf ("Unable to get isolated cpus: %w" , err )
@@ -90,8 +94,9 @@ func NewCPUCollector(logger log.Logger) (Collector, error) {
90
94
}
91
95
92
96
c := & cpuCollector {
93
- fs : fs ,
94
- cpu : nodeCPUSecondsDesc ,
97
+ procfs : pfs ,
98
+ sysfs : sfs ,
99
+ cpu : nodeCPUSecondsDesc ,
95
100
cpuInfo : prometheus .NewDesc (
96
101
prometheus .BuildFQName (namespace , cpuCollectorSubsystem , "info" ),
97
102
"CPU information from /proc/cpuinfo." ,
@@ -132,6 +137,11 @@ func NewCPUCollector(logger log.Logger) (Collector, error) {
132
137
"Whether each core is isolated, information from /sys/devices/system/cpu/isolated." ,
133
138
[]string {"cpu" }, nil ,
134
139
),
140
+ cpuOnline : prometheus .NewDesc (
141
+ prometheus .BuildFQName (namespace , cpuCollectorSubsystem , "online" ),
142
+ "CPUs that are online and being scheduled." ,
143
+ []string {"cpu" }, nil ,
144
+ ),
135
145
logger : logger ,
136
146
isolatedCpus : isolcpus ,
137
147
cpuStats : make (map [int64 ]procfs.CPUStat ),
@@ -178,12 +188,21 @@ func (c *cpuCollector) Update(ch chan<- prometheus.Metric) error {
178
188
if c .isolatedCpus != nil {
179
189
c .updateIsolated (ch )
180
190
}
181
- return c .updateThermalThrottle (ch )
191
+ err := c .updateThermalThrottle (ch )
192
+ if err != nil {
193
+ return err
194
+ }
195
+ err = c .updateOnline (ch )
196
+ if err != nil {
197
+ return err
198
+ }
199
+
200
+ return nil
182
201
}
183
202
184
203
// updateInfo reads /proc/cpuinfo
185
204
func (c * cpuCollector ) updateInfo (ch chan <- prometheus.Metric ) error {
186
- info , err := c .fs .CPUInfo ()
205
+ info , err := c .procfs .CPUInfo ()
187
206
if err != nil {
188
207
return err
189
208
}
@@ -334,9 +353,31 @@ func (c *cpuCollector) updateIsolated(ch chan<- prometheus.Metric) {
334
353
}
335
354
}
336
355
356
+ // updateOnline reads /sys/devices/system/cpu/cpu*/online through sysfs and exports online status metrics.
357
+ func (c * cpuCollector ) updateOnline (ch chan <- prometheus.Metric ) error {
358
+ cpus , err := c .sysfs .CPUs ()
359
+ if err != nil {
360
+ return err
361
+ }
362
+ // No-op if the system does not support CPU online stats.
363
+ cpu0 := cpus [0 ]
364
+ if _ , err := cpu0 .Online (); err != nil && errors .Is (err , os .ErrNotExist ) {
365
+ return nil
366
+ }
367
+ for _ , cpu := range cpus {
368
+ setOnline := float64 (0 )
369
+ if online , _ := cpu .Online (); online {
370
+ setOnline = 1
371
+ }
372
+ ch <- prometheus .MustNewConstMetric (c .cpuOnline , prometheus .GaugeValue , setOnline , cpu .Number ())
373
+ }
374
+
375
+ return nil
376
+ }
377
+
337
378
// updateStat reads /proc/stat through procfs and exports CPU-related metrics.
338
379
func (c * cpuCollector ) updateStat (ch chan <- prometheus.Metric ) error {
339
- stats , err := c .fs .Stat ()
380
+ stats , err := c .procfs .Stat ()
340
381
if err != nil {
341
382
return err
342
383
}
0 commit comments