Skip to content

Commit 3770e43

Browse files
committed
netlink: read missing attributes from sysfs
Read missing dev_id, name_assign_type, and addr_assign_type from sysfs, since they only take a device-specific lock and not the whole RTNL lock. This means reading them is much less impactful on other system processes than many of the other attributes in sysfs that do take the RTNL lock. Signed-off-by: Dan Williams <[email protected]>
1 parent b87c6a8 commit 3770e43

File tree

1 file changed

+84
-2
lines changed

1 file changed

+84
-2
lines changed

collector/netclass_rtnl_linux.go

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,20 @@
1717
package collector
1818

1919
import (
20+
"bytes"
2021
"errors"
2122
"fmt"
2223
"io/fs"
24+
"os"
25+
"path/filepath"
26+
"strconv"
2327

2428
"github.com/alecthomas/kingpin/v2"
2529
"github.com/go-kit/log/level"
2630
"github.com/jsimonetti/rtnetlink"
2731
"github.com/mdlayher/ethtool"
2832
"github.com/prometheus/client_golang/prometheus"
33+
"github.com/prometheus/procfs/sysfs"
2934
)
3035

3136
var (
@@ -57,14 +62,27 @@ func (c *netClassCollector) netClassRTNLUpdate(ch chan<- prometheus.Metric) erro
5762
}
5863
}
5964

65+
// Get most attributes from Netlink
6066
lMsgs, err := c.getNetClassInfoRTNL()
6167
if err != nil {
6268
return fmt.Errorf("could not get net class info: %w", err)
6369
}
70+
71+
relevantLinks := make([]rtnetlink.LinkMessage, 0, len(lMsgs))
6472
for _, msg := range lMsgs {
65-
if c.ignoredDevicesPattern.MatchString(msg.Attributes.Name) {
66-
continue
73+
if !c.ignoredDevicesPattern.MatchString(msg.Attributes.Name) {
74+
relevantLinks = append(relevantLinks, msg)
6775
}
76+
}
77+
78+
// Read sysfs for attributes that Netlink doesn't expose
79+
sysfsAttrs, err := getSysfsAttributes(relevantLinks)
80+
if err != nil {
81+
return fmt.Errorf("could not get sysfs device info: %w", err)
82+
}
83+
84+
// Parse all the info and update metrics
85+
for _, msg := range relevantLinks {
6886
upDesc := prometheus.NewDesc(
6987
prometheus.BuildFQName(namespace, c.subsystem, "up"),
7088
"Value is 1 if operstate is 'up', 0 otherwise.",
@@ -96,12 +114,16 @@ func (c *netClassCollector) netClassRTNLUpdate(ch chan<- prometheus.Metric) erro
96114
duplex = lm.Duplex.String()
97115
}
98116

117+
ifaceInfo := sysfsAttrs[msg.Attributes.Name]
118+
99119
ch <- prometheus.MustNewConstMetric(infoDesc, prometheus.GaugeValue, infoValue, msg.Attributes.Name, msg.Attributes.Address.String(), msg.Attributes.Broadcast.String(), duplex, operstateStr[int(msg.Attributes.OperationalState)], ifalias)
100120

121+
pushMetric(ch, c.getFieldDesc("address_assign_type"), "address_assign_type", ifaceInfo.AddrAssignType, prometheus.GaugeValue, msg.Attributes.Name)
101122
pushMetric(ch, c.getFieldDesc("carrier"), "carrier", msg.Attributes.Carrier, prometheus.GaugeValue, msg.Attributes.Name)
102123
pushMetric(ch, c.getFieldDesc("carrier_changes_total"), "carrier_changes_total", msg.Attributes.CarrierChanges, prometheus.CounterValue, msg.Attributes.Name)
103124
pushMetric(ch, c.getFieldDesc("carrier_up_changes_total"), "carrier_up_changes_total", msg.Attributes.CarrierUpCount, prometheus.CounterValue, msg.Attributes.Name)
104125
pushMetric(ch, c.getFieldDesc("carrier_down_changes_total"), "carrier_down_changes_total", msg.Attributes.CarrierDownCount, prometheus.CounterValue, msg.Attributes.Name)
126+
pushMetric(ch, c.getFieldDesc("device_id"), "device_id", ifaceInfo.DevID, prometheus.GaugeValue, msg.Attributes.Name)
105127
pushMetric(ch, c.getFieldDesc("flags"), "flags", msg.Flags, prometheus.GaugeValue, msg.Attributes.Name)
106128
pushMetric(ch, c.getFieldDesc("iface_id"), "iface_id", msg.Index, prometheus.GaugeValue, msg.Attributes.Name)
107129
pushMetric(ch, c.getFieldDesc("iface_link_mode"), "iface_link_mode", msg.Attributes.LinkMode, prometheus.GaugeValue, msg.Attributes.Name)
@@ -117,6 +139,7 @@ func (c *netClassCollector) netClassRTNLUpdate(ch chan<- prometheus.Metric) erro
117139
}
118140

119141
pushMetric(ch, c.getFieldDesc("mtu_bytes"), "mtu_bytes", msg.Attributes.MTU, prometheus.GaugeValue, msg.Attributes.Name)
142+
pushMetric(ch, c.getFieldDesc("name_assign_type"), "name_assign_type", ifaceInfo.NameAssignType, prometheus.GaugeValue, msg.Attributes.Name)
120143
pushMetric(ch, c.getFieldDesc("net_dev_group"), "net_dev_group", msg.Attributes.NetDevGroup, prometheus.GaugeValue, msg.Attributes.Name)
121144
pushMetric(ch, c.getFieldDesc("transmit_queue_length"), "transmit_queue_length", msg.Attributes.TxQueueLen, prometheus.GaugeValue, msg.Attributes.Name)
122145
pushMetric(ch, c.getFieldDesc("protocol_type"), "protocol_type", msg.Type, prometheus.GaugeValue, msg.Attributes.Name)
@@ -186,3 +209,62 @@ func (c *netClassCollector) getLinkModes() ([]*ethtool.LinkMode, error) {
186209

187210
return lms, err
188211
}
212+
213+
// getSysfsAttributes reads attributes that are absent from netlink but provided
214+
// by sysfs.
215+
func getSysfsAttributes(links []rtnetlink.LinkMessage) (sysfs.NetClass, error) {
216+
netClass := sysfs.NetClass{}
217+
for _, msg := range links {
218+
var err error
219+
interfaceClass := sysfs.NetClassIface{}
220+
ifName := msg.Attributes.Name
221+
222+
// These three attributes hold a device-specific lock when
223+
// accessed, not the RTNL lock, so they are much less impactful
224+
// than reading most of the other attributes from sysfs.
225+
interfaceClass.AddrAssignType, err = getSysfsAttrAsInt64(ifName, "addr_assign_type")
226+
if err != nil {
227+
return nil, err
228+
}
229+
interfaceClass.DevID, err = getSysfsAttrAsInt64(ifName, "dev_id")
230+
if err != nil {
231+
return nil, err
232+
}
233+
interfaceClass.NameAssignType, err = getSysfsAttrAsInt64(ifName, "name_assign_type")
234+
if err != nil {
235+
return nil, err
236+
}
237+
238+
netClass[ifName] = interfaceClass
239+
}
240+
return netClass, nil
241+
}
242+
243+
func getSysfsAttrAsInt64(linkName, attr string) (*int64, error) {
244+
name := filepath.Join("/sys", "class", "net", linkName, attr)
245+
data, err := os.ReadFile(name)
246+
if err != nil {
247+
var perr *fs.PathError
248+
// Ignore certain errors we know aren't fatal; same as
249+
// prometheus's sysfs class does
250+
if os.IsNotExist(err) || os.IsPermission(err) {
251+
return nil, nil
252+
} else if errors.As(err, &perr) {
253+
realErr := perr.Unwrap()
254+
if realErr.Error() == "operation not supported" || realErr.Error() == "invalid argument" {
255+
return nil, nil
256+
}
257+
}
258+
return nil, fmt.Errorf("failed to read file %q: %w", name, err)
259+
}
260+
261+
// base 0 automatically handles number type prefixes (hex, octal, etc)
262+
const base = 0
263+
val, err := strconv.ParseInt(string(bytes.TrimSpace(data)), base, 64)
264+
if err != nil {
265+
// Ignore the error; can't do much with it and an error
266+
// parsing a single attribute shouldn't stop parsing others
267+
return nil, nil
268+
}
269+
return &val, nil
270+
}

0 commit comments

Comments
 (0)