Skip to content

Commit 503e4fc

Browse files
authored
Expose cpu bugs and flags as info metrics. (#1788)
* Expose cpu bugs and flags as info metrics with a regexp filter. * Automatically enable CPU info metrics when using flags or bugs feature. Signed-off-by: domgoer <[email protected]>
1 parent f4b89c7 commit 503e4fc

File tree

4 files changed

+98
-2
lines changed

4 files changed

+98
-2
lines changed

collector/cpu_linux.go

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package collector
1818
import (
1919
"fmt"
2020
"path/filepath"
21+
"regexp"
2122
"strconv"
2223
"sync"
2324

@@ -32,16 +33,23 @@ type cpuCollector struct {
3233
fs procfs.FS
3334
cpu *prometheus.Desc
3435
cpuInfo *prometheus.Desc
36+
cpuFlagsInfo *prometheus.Desc
37+
cpuBugsInfo *prometheus.Desc
3538
cpuGuest *prometheus.Desc
3639
cpuCoreThrottle *prometheus.Desc
3740
cpuPackageThrottle *prometheus.Desc
3841
logger log.Logger
3942
cpuStats []procfs.CPUStat
4043
cpuStatsMutex sync.Mutex
44+
45+
cpuFlagsIncludeRegexp *regexp.Regexp
46+
cpuBugsIncludeRegexp *regexp.Regexp
4147
}
4248

4349
var (
4450
enableCPUInfo = kingpin.Flag("collector.cpu.info", "Enables metric cpu_info").Bool()
51+
flagsInclude = kingpin.Flag("collector.cpu.info.flags-include", "Filter the `flags` field in cpuInfo with a value that must be a regular expression").String()
52+
bugsInclude = kingpin.Flag("collector.cpu.info.bugs-include", "Filter the `bugs` field in cpuInfo with a value that must be a regular expression").String()
4553
)
4654

4755
func init() {
@@ -54,14 +62,24 @@ func NewCPUCollector(logger log.Logger) (Collector, error) {
5462
if err != nil {
5563
return nil, fmt.Errorf("failed to open procfs: %w", err)
5664
}
57-
return &cpuCollector{
65+
c := &cpuCollector{
5866
fs: fs,
5967
cpu: nodeCPUSecondsDesc,
6068
cpuInfo: prometheus.NewDesc(
6169
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "info"),
6270
"CPU information from /proc/cpuinfo.",
6371
[]string{"package", "core", "cpu", "vendor", "family", "model", "model_name", "microcode", "stepping", "cachesize"}, nil,
6472
),
73+
cpuFlagsInfo: prometheus.NewDesc(
74+
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "flag_info"),
75+
"The `flags` field of CPU information from /proc/cpuinfo.",
76+
[]string{"flag"}, nil,
77+
),
78+
cpuBugsInfo: prometheus.NewDesc(
79+
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "bug_info"),
80+
"The `bugs` field of CPU information from /proc/cpuinfo.",
81+
[]string{"bug"}, nil,
82+
),
6583
cpuGuest: prometheus.NewDesc(
6684
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "guest_seconds_total"),
6785
"Seconds the cpus spent in guests (VMs) for each mode.",
@@ -78,7 +96,34 @@ func NewCPUCollector(logger log.Logger) (Collector, error) {
7896
[]string{"package"}, nil,
7997
),
8098
logger: logger,
81-
}, nil
99+
}
100+
err = c.compileIncludeFlags(flagsInclude, bugsInclude)
101+
if err != nil {
102+
return nil, fmt.Errorf("fail to compile --collector.cpu.info.flags-include and --collector.cpu.info.bugs-include, the values of them must be regular expressions: %w", err)
103+
}
104+
return c, nil
105+
}
106+
107+
func (c *cpuCollector) compileIncludeFlags(flagsIncludeFlag, bugsIncludeFlag *string) error {
108+
if (*flagsIncludeFlag != "" || *bugsIncludeFlag != "") && !*enableCPUInfo {
109+
*enableCPUInfo = true
110+
level.Info(c.logger).Log("msg", "--collector.cpu.info has been set to `true` because you set the following flags, like --collector.cpu.info.flags-include and --collector.cpu.info.bugs-include")
111+
}
112+
113+
var err error
114+
if *flagsIncludeFlag != "" {
115+
c.cpuFlagsIncludeRegexp, err = regexp.Compile(*flagsIncludeFlag)
116+
if err != nil {
117+
return err
118+
}
119+
}
120+
if *bugsIncludeFlag != "" {
121+
c.cpuBugsIncludeRegexp, err = regexp.Compile(*bugsIncludeFlag)
122+
if err != nil {
123+
return err
124+
}
125+
}
126+
return nil
82127
}
83128

84129
// Update implements Collector and exposes cpu related metrics from /proc/stat and /sys/.../cpu/.
@@ -117,6 +162,31 @@ func (c *cpuCollector) updateInfo(ch chan<- prometheus.Metric) error {
117162
cpu.Microcode,
118163
cpu.Stepping,
119164
cpu.CacheSize)
165+
166+
if err := updateFieldInfo(cpu.Flags, c.cpuFlagsIncludeRegexp, c.cpuFlagsInfo, ch); err != nil {
167+
return err
168+
}
169+
if err := updateFieldInfo(cpu.Bugs, c.cpuBugsIncludeRegexp, c.cpuBugsInfo, ch); err != nil {
170+
return err
171+
}
172+
}
173+
return nil
174+
}
175+
176+
func updateFieldInfo(valueList []string, filter *regexp.Regexp, desc *prometheus.Desc, ch chan<- prometheus.Metric) error {
177+
if filter == nil {
178+
return nil
179+
}
180+
181+
for _, val := range valueList {
182+
if !filter.MatchString(val) {
183+
continue
184+
}
185+
ch <- prometheus.MustNewConstMetric(desc,
186+
prometheus.GaugeValue,
187+
1,
188+
val,
189+
)
120190
}
121191
return nil
122192
}

collector/fixtures/e2e-64k-page-output.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,12 +184,24 @@ node_cooling_device_cur_state{name="0",type="Processor"} 0
184184
# HELP node_cooling_device_max_state Maximum throttle state of the cooling device
185185
# TYPE node_cooling_device_max_state gauge
186186
node_cooling_device_max_state{name="0",type="Processor"} 3
187+
# HELP node_cpu_bug_info The `bugs` field of CPU information from /proc/cpuinfo.
188+
# TYPE node_cpu_bug_info gauge
189+
node_cpu_bug_info{bug="cpu_meltdown"} 1
190+
node_cpu_bug_info{bug="mds"} 1
191+
node_cpu_bug_info{bug="spectre_v1"} 1
192+
node_cpu_bug_info{bug="spectre_v2"} 1
187193
# HELP node_cpu_core_throttles_total Number of times this cpu core has been throttled.
188194
# TYPE node_cpu_core_throttles_total counter
189195
node_cpu_core_throttles_total{core="0",package="0"} 5
190196
node_cpu_core_throttles_total{core="0",package="1"} 0
191197
node_cpu_core_throttles_total{core="1",package="0"} 0
192198
node_cpu_core_throttles_total{core="1",package="1"} 9
199+
# HELP node_cpu_flag_info The `flags` field of CPU information from /proc/cpuinfo.
200+
# TYPE node_cpu_flag_info gauge
201+
node_cpu_flag_info{flag="aes"} 1
202+
node_cpu_flag_info{flag="avx"} 1
203+
node_cpu_flag_info{flag="avx2"} 1
204+
node_cpu_flag_info{flag="constant_tsc"} 1
193205
# HELP node_cpu_guest_seconds_total Seconds the cpus spent in guests (VMs) for each mode.
194206
# TYPE node_cpu_guest_seconds_total counter
195207
node_cpu_guest_seconds_total{cpu="0",mode="nice"} 0.01

collector/fixtures/e2e-output.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,12 +232,24 @@ node_cooling_device_cur_state{name="0",type="Processor"} 0
232232
# HELP node_cooling_device_max_state Maximum throttle state of the cooling device
233233
# TYPE node_cooling_device_max_state gauge
234234
node_cooling_device_max_state{name="0",type="Processor"} 3
235+
# HELP node_cpu_bug_info The `bugs` field of CPU information from /proc/cpuinfo.
236+
# TYPE node_cpu_bug_info gauge
237+
node_cpu_bug_info{bug="cpu_meltdown"} 1
238+
node_cpu_bug_info{bug="mds"} 1
239+
node_cpu_bug_info{bug="spectre_v1"} 1
240+
node_cpu_bug_info{bug="spectre_v2"} 1
235241
# HELP node_cpu_core_throttles_total Number of times this cpu core has been throttled.
236242
# TYPE node_cpu_core_throttles_total counter
237243
node_cpu_core_throttles_total{core="0",package="0"} 5
238244
node_cpu_core_throttles_total{core="0",package="1"} 0
239245
node_cpu_core_throttles_total{core="1",package="0"} 0
240246
node_cpu_core_throttles_total{core="1",package="1"} 9
247+
# HELP node_cpu_flag_info The `flags` field of CPU information from /proc/cpuinfo.
248+
# TYPE node_cpu_flag_info gauge
249+
node_cpu_flag_info{flag="aes"} 1
250+
node_cpu_flag_info{flag="avx"} 1
251+
node_cpu_flag_info{flag="avx2"} 1
252+
node_cpu_flag_info{flag="constant_tsc"} 1
241253
# HELP node_cpu_guest_seconds_total Seconds the cpus spent in guests (VMs) for each mode.
242254
# TYPE node_cpu_guest_seconds_total counter
243255
node_cpu_guest_seconds_total{cpu="0",mode="nice"} 0.01

end-to-end-test.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ fi
107107
--collector.qdisc.fixtures="collector/fixtures/qdisc/" \
108108
--collector.netclass.ignored-devices="(bond0|dmz|int)" \
109109
--collector.cpu.info \
110+
--collector.cpu.info.flags-include="^(aes|avx.?|constant_tsc)$" \
111+
--collector.cpu.info.bugs-include="^(cpu_meltdown|spectre_.*|mds)$" \
110112
--web.listen-address "127.0.0.1:${port}" \
111113
--log.level="debug" > "${tmpdir}/node_exporter.log" 2>&1 &
112114

0 commit comments

Comments
 (0)