|
| 1 | +// Copyright 2023 The Prometheus Authors |
| 2 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 3 | +// you may not use this file except in compliance with the License. |
| 4 | +// You may obtain a copy of the License at |
| 5 | +// |
| 6 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 7 | +// |
| 8 | +// Unless required by applicable law or agreed to in writing, software |
| 9 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 10 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 11 | +// See the License for the specific language governing permissions and |
| 12 | +// limitations under the License. |
| 13 | + |
| 14 | +//go:build !nocpu |
| 15 | +// +build !nocpu |
| 16 | + |
| 17 | +package collector |
| 18 | + |
| 19 | +import ( |
| 20 | + "errors" |
| 21 | + "math" |
| 22 | + "regexp" |
| 23 | + "sort" |
| 24 | + "strconv" |
| 25 | + "strings" |
| 26 | + "unsafe" |
| 27 | + |
| 28 | + "github.com/go-kit/log" |
| 29 | + "github.com/go-kit/log/level" |
| 30 | + "github.com/prometheus/client_golang/prometheus" |
| 31 | + "golang.org/x/sys/unix" |
| 32 | + |
| 33 | + "howett.net/plist" |
| 34 | +) |
| 35 | + |
| 36 | +type clockinfo struct { |
| 37 | + hz int32 // clock frequency |
| 38 | + tick int32 // micro-seconds per hz tick |
| 39 | + spare int32 |
| 40 | + stathz int32 // statistics clock frequency |
| 41 | + profhz int32 // profiling clock frequency |
| 42 | +} |
| 43 | + |
| 44 | +type cputime struct { |
| 45 | + user float64 |
| 46 | + nice float64 |
| 47 | + sys float64 |
| 48 | + intr float64 |
| 49 | + idle float64 |
| 50 | +} |
| 51 | + |
| 52 | +type plistref struct { |
| 53 | + pref_plist unsafe.Pointer |
| 54 | + pref_len uint64 |
| 55 | +} |
| 56 | + |
| 57 | +type sysmonValues struct { |
| 58 | + CurValue int `plist:"cur-value"` |
| 59 | + Description string `plist:"description"` |
| 60 | + State string `plist:"state"` |
| 61 | + Type string `plist:"type"` |
| 62 | +} |
| 63 | + |
| 64 | +type sysmonProperty []sysmonValues |
| 65 | + |
| 66 | +type sysmonProperties map[string]sysmonProperty |
| 67 | + |
| 68 | +func readBytes(ptr unsafe.Pointer, length uint64) []byte { |
| 69 | + buf := make([]byte, length-1) |
| 70 | + var i uint64 |
| 71 | + for ; i < length-1; i++ { |
| 72 | + buf[i] = *(*byte)(unsafe.Pointer(uintptr(ptr) + uintptr(i))) |
| 73 | + } |
| 74 | + return buf |
| 75 | +} |
| 76 | + |
| 77 | +func ioctl(fd int, nr int64, typ byte, size uintptr, retptr unsafe.Pointer) error { |
| 78 | + _, _, errno := unix.Syscall( |
| 79 | + unix.SYS_IOCTL, |
| 80 | + uintptr(fd), |
| 81 | + // Some magicks derived from sys/ioccom.h. |
| 82 | + uintptr((0x40000000|0x80000000)| |
| 83 | + ((int64(size)&(1<<13-1))<<16)| |
| 84 | + (int64(typ)<<8)| |
| 85 | + nr, |
| 86 | + ), |
| 87 | + uintptr(retptr), |
| 88 | + ) |
| 89 | + if errno != 0 { |
| 90 | + return errno |
| 91 | + } |
| 92 | + return nil |
| 93 | +} |
| 94 | + |
| 95 | +func readSysmonProperties() (sysmonProperties, error) { |
| 96 | + fd, err := unix.Open(rootfsFilePath("/dev/sysmon"), unix.O_RDONLY, 0777) |
| 97 | + if err != nil { |
| 98 | + return nil, err |
| 99 | + } |
| 100 | + defer unix.Close(fd) |
| 101 | + |
| 102 | + var retptr plistref |
| 103 | + |
| 104 | + if err = ioctl(fd, 0, 'E', unsafe.Sizeof(retptr), unsafe.Pointer(&retptr)); err != nil { |
| 105 | + return nil, err |
| 106 | + } |
| 107 | + |
| 108 | + bytes := readBytes(retptr.pref_plist, retptr.pref_len) |
| 109 | + |
| 110 | + var props sysmonProperties |
| 111 | + if _, err = plist.Unmarshal(bytes, &props); err != nil { |
| 112 | + return nil, err |
| 113 | + } |
| 114 | + return props, nil |
| 115 | +} |
| 116 | + |
| 117 | +func sortFilterSysmonProperties(props sysmonProperties, prefix string) []string { |
| 118 | + var keys []string |
| 119 | + for key := range props { |
| 120 | + if !strings.HasPrefix(key, prefix) { |
| 121 | + continue |
| 122 | + } |
| 123 | + keys = append(keys, key) |
| 124 | + } |
| 125 | + sort.Strings(keys) |
| 126 | + return keys |
| 127 | +} |
| 128 | + |
| 129 | +func convertTemperatures(prop sysmonProperty, res map[int]float64) error { |
| 130 | + |
| 131 | + for _, val := range prop { |
| 132 | + if val.State == "invalid" || val.State == "unknown" || val.State == "" { |
| 133 | + continue |
| 134 | + } |
| 135 | + |
| 136 | + re := regexp.MustCompile("^cpu([0-9]+) temperature$") |
| 137 | + core := re.FindStringSubmatch(val.Description)[1] |
| 138 | + ncore, _ := strconv.Atoi(core) |
| 139 | + temperature := ((float64(uint64(val.CurValue))) / 1000000) - 273.15 |
| 140 | + res[ncore] = temperature |
| 141 | + } |
| 142 | + return nil |
| 143 | +} |
| 144 | + |
| 145 | +func getCPUTemperatures() (map[int]float64, error) { |
| 146 | + |
| 147 | + res := make(map[int]float64) |
| 148 | + |
| 149 | + // Read all properties |
| 150 | + props, err := readSysmonProperties() |
| 151 | + if err != nil { |
| 152 | + return res, err |
| 153 | + } |
| 154 | + |
| 155 | + keys := sortFilterSysmonProperties(props, "coretemp") |
| 156 | + for idx, _ := range keys { |
| 157 | + convertTemperatures(props[keys[idx]], res) |
| 158 | + } |
| 159 | + |
| 160 | + return res, nil |
| 161 | +} |
| 162 | + |
| 163 | +func getCPUTimes() ([]cputime, error) { |
| 164 | + const states = 5 |
| 165 | + |
| 166 | + clockb, err := unix.SysctlRaw("kern.clockrate") |
| 167 | + if err != nil { |
| 168 | + return nil, err |
| 169 | + } |
| 170 | + clock := *(*clockinfo)(unsafe.Pointer(&clockb[0])) |
| 171 | + |
| 172 | + var cpufreq float64 |
| 173 | + if clock.stathz > 0 { |
| 174 | + cpufreq = float64(clock.stathz) |
| 175 | + } else { |
| 176 | + cpufreq = float64(clock.hz) |
| 177 | + } |
| 178 | + |
| 179 | + ncpusb, err := unix.SysctlRaw("hw.ncpu") |
| 180 | + if err != nil { |
| 181 | + return nil, err |
| 182 | + } |
| 183 | + ncpus := *(*int)(unsafe.Pointer(&ncpusb[0])) |
| 184 | + |
| 185 | + if ncpus < 1 { |
| 186 | + return nil, errors.New("Invalid cpu number") |
| 187 | + } |
| 188 | + |
| 189 | + var times []float64 |
| 190 | + for ncpu := 0; ncpu < ncpus; ncpu++ { |
| 191 | + cpb, err := unix.SysctlRaw("kern.cp_time", ncpu) |
| 192 | + if err != nil { |
| 193 | + return nil, err |
| 194 | + } |
| 195 | + for len(cpb) >= int(unsafe.Sizeof(int(0))) { |
| 196 | + t := *(*int)(unsafe.Pointer(&cpb[0])) |
| 197 | + times = append(times, float64(t)/cpufreq) |
| 198 | + cpb = cpb[unsafe.Sizeof(int(0)):] |
| 199 | + } |
| 200 | + } |
| 201 | + |
| 202 | + cpus := make([]cputime, len(times)/states) |
| 203 | + for i := 0; i < len(times); i += states { |
| 204 | + cpu := &cpus[i/states] |
| 205 | + cpu.user = times[i] |
| 206 | + cpu.nice = times[i+1] |
| 207 | + cpu.sys = times[i+2] |
| 208 | + cpu.intr = times[i+3] |
| 209 | + cpu.idle = times[i+4] |
| 210 | + } |
| 211 | + return cpus, nil |
| 212 | +} |
| 213 | + |
| 214 | +type statCollector struct { |
| 215 | + cpu typedDesc |
| 216 | + temp typedDesc |
| 217 | + logger log.Logger |
| 218 | +} |
| 219 | + |
| 220 | +func init() { |
| 221 | + registerCollector("cpu", defaultEnabled, NewStatCollector) |
| 222 | +} |
| 223 | + |
| 224 | +// NewStatCollector returns a new Collector exposing CPU stats. |
| 225 | +func NewStatCollector(logger log.Logger) (Collector, error) { |
| 226 | + return &statCollector{ |
| 227 | + cpu: typedDesc{nodeCPUSecondsDesc, prometheus.CounterValue}, |
| 228 | + temp: typedDesc{prometheus.NewDesc( |
| 229 | + prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "temperature_celsius"), |
| 230 | + "CPU temperature", |
| 231 | + []string{"cpu"}, nil, |
| 232 | + ), prometheus.GaugeValue}, |
| 233 | + logger: logger, |
| 234 | + }, nil |
| 235 | +} |
| 236 | + |
| 237 | +// Expose CPU stats using sysctl. |
| 238 | +func (c *statCollector) Update(ch chan<- prometheus.Metric) error { |
| 239 | + // We want time spent per-cpu per CPUSTATE. |
| 240 | + // CPUSTATES (number of CPUSTATES) is defined as 5U. |
| 241 | + // Order: CP_USER | CP_NICE | CP_SYS | CP_IDLE | CP_INTR |
| 242 | + // sysctl kern.cp_time.x provides CPUSTATES long integers: |
| 243 | + // (space-separated list of the above variables, where |
| 244 | + // x stands for the number of the CPU core) |
| 245 | + // |
| 246 | + // Each value is a counter incremented at frequency |
| 247 | + // kern.clockrate.(stathz | hz) |
| 248 | + // |
| 249 | + // Look into sys/kern/kern_clock.c for details. |
| 250 | + |
| 251 | + cpuTimes, err := getCPUTimes() |
| 252 | + if err != nil { |
| 253 | + return err |
| 254 | + } |
| 255 | + |
| 256 | + cpuTemperatures, err := getCPUTemperatures() |
| 257 | + if err != nil { |
| 258 | + return err |
| 259 | + } |
| 260 | + |
| 261 | + for cpu, t := range cpuTimes { |
| 262 | + lcpu := strconv.Itoa(cpu) |
| 263 | + ch <- c.cpu.mustNewConstMetric(float64(t.user), lcpu, "user") |
| 264 | + ch <- c.cpu.mustNewConstMetric(float64(t.nice), lcpu, "nice") |
| 265 | + ch <- c.cpu.mustNewConstMetric(float64(t.sys), lcpu, "system") |
| 266 | + ch <- c.cpu.mustNewConstMetric(float64(t.intr), lcpu, "interrupt") |
| 267 | + ch <- c.cpu.mustNewConstMetric(float64(t.idle), lcpu, "idle") |
| 268 | + |
| 269 | + if temp, ok := cpuTemperatures[cpu]; ok { |
| 270 | + ch <- c.temp.mustNewConstMetric(temp, lcpu) |
| 271 | + } else { |
| 272 | + level.Debug(c.logger).Log("msg", "no temperature information for CPU", "cpu", cpu) |
| 273 | + ch <- c.temp.mustNewConstMetric(math.NaN(), lcpu) |
| 274 | + } |
| 275 | + } |
| 276 | + return err |
| 277 | +} |
0 commit comments