Skip to content

Commit 9f1f791

Browse files
authored
filesystem: fix mountTimeout not working issue (prometheus#2903)
Signed-off-by: DongWei <[email protected]>
1 parent 6d18ce7 commit 9f1f791

File tree

1 file changed

+13
-9
lines changed

1 file changed

+13
-9
lines changed

collector/filesystem_linux.go

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -122,16 +122,8 @@ func (c *filesystemCollector) processStat(labels filesystemLabels) filesystemSta
122122

123123
buf := new(unix.Statfs_t)
124124
err := unix.Statfs(rootfsFilePath(labels.mountPoint), buf)
125-
stuckMountsMtx.Lock()
126125
close(success)
127126

128-
// If the mount has been marked as stuck, unmark it and log it's recovery.
129-
if _, ok := stuckMounts[labels.mountPoint]; ok {
130-
level.Debug(c.logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", labels.mountPoint)
131-
delete(stuckMounts, labels.mountPoint)
132-
}
133-
stuckMountsMtx.Unlock()
134-
135127
if err != nil {
136128
level.Debug(c.logger).Log("msg", "Error on statfs() system call", "rootfs", rootfsFilePath(labels.mountPoint), "err", err)
137129
return filesystemStats{
@@ -161,17 +153,29 @@ func stuckMountWatcher(mountPoint string, success chan struct{}, logger log.Logg
161153
select {
162154
case <-success:
163155
// Success
156+
// If the mount has been marked as stuck, unmark it and log it's recovery.
157+
stuckMountsMtx.Lock()
158+
defer stuckMountsMtx.Unlock()
159+
if _, ok := stuckMounts[mountPoint]; ok {
160+
level.Debug(logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", mountPoint)
161+
delete(stuckMounts, mountPoint)
162+
}
164163
case <-mountCheckTimer.C:
165164
// Timed out, mark mount as stuck
166165
stuckMountsMtx.Lock()
166+
defer stuckMountsMtx.Unlock()
167167
select {
168168
case <-success:
169169
// Success came in just after the timeout was reached, don't label the mount as stuck
170+
// If the mount has been marked as stuck, unmark it and log it's recovery.
171+
if _, ok := stuckMounts[mountPoint]; ok {
172+
level.Debug(logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", mountPoint)
173+
delete(stuckMounts, mountPoint)
174+
}
170175
default:
171176
level.Debug(logger).Log("msg", "Mount point timed out, it is being labeled as stuck and will not be monitored", "mountpoint", mountPoint)
172177
stuckMounts[mountPoint] = struct{}{}
173178
}
174-
stuckMountsMtx.Unlock()
175179
}
176180
}
177181

0 commit comments

Comments
 (0)