Skip to content

Commit cddef07

Browse files
committed
Clarify nomenclature, new boot status file format
Old format Boot status (WDIOF) : 0x0010 Timeout (sec) : 20 Kick interval : 10 Reset counter : 163 Reset date : 2020-01-09T10:11:04Z PID : 0 Watchdog ID : 0 Label : Reset cause : 7 Reset reason : Unknown failure New format Timeout (sec) : 20 Kick interval : 10 Boot status (WDIOF) : 0x0020 Reset cause : WDIOF_CARDRESET Reset counter : 163 Reset date : 2020-01-09T10:11:04Z Reset reason : 7 - Unknown failure If reset reason code is one of: 2, 3, 4, or 5, information about the the failing monitored process is also included. Timeout (sec) : 20 Kick interval : 10 Boot status (WDIOF) : 0x0020 Reset cause : WDIOF_CARDRESET Reset counter : 163 Reset date : 2020-01-09T10:11:04Z Reset reason : 3 - Failed kick Failed PID : 1234 Failed Watchdog ID : 1 Failed Label : dpaad Signed-off-by: Joachim Nilsson <[email protected]>
1 parent d7e48e5 commit cddef07

File tree

2 files changed

+74
-49
lines changed

2 files changed

+74
-49
lines changed

src/wdt.c

Lines changed: 67 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ static uev_t period_watcher;
2727
static uev_t timeout_watcher;
2828
static struct watchdog_info info;
2929

30-
/* Actual reset reason as read at boot, reported by supervisor API */
30+
/* Watchdogd reset reason as read at boot */
3131
wdog_reason_t reset_reason;
3232
wdog_code_t reset_code = WDOG_SYSTEM_OK;
3333
unsigned int reset_counter = 0;
@@ -38,10 +38,9 @@ unsigned int reset_counter = 0;
3838
int wdt_open(const char *dev)
3939
{
4040
static int once = 0;
41-
static int cause = 0;
4241

4342
if (fd >= 0)
44-
return cause;
43+
return 0;
4544

4645
if (dev) {
4746
if (!strncmp(dev, "/dev", 4))
@@ -93,8 +92,7 @@ int wdt_open(const char *dev)
9392
if (!wdt_capability(WDIOF_POWERUNDER))
9493
WARN("WDT does not support PWR fail condition, treating as card reset.");
9594

96-
/* Read boot cause from watchdog ... */
97-
return cause = wdt_get_bootstatus();
95+
return 0;
9896
}
9997

10098
static void period_cb(uev_t *w, void *arg, int event)
@@ -108,13 +106,13 @@ static void period_cb(uev_t *w, void *arg, int event)
108106
*/
109107
int wdt_init(uev_ctx_t *ctx, const char *dev)
110108
{
111-
int T, cause;
109+
int T, err;
112110

113111
if (wdt_testmode())
114112
return 0;
115113

116-
cause = wdt_open(dev);
117-
if (cause < 0)
114+
err = wdt_open(dev);
115+
if (err)
118116
return 1;
119117

120118
/* Set requested WDT timeout right before we enter the event loop. */
@@ -145,7 +143,7 @@ int wdt_init(uev_ctx_t *ctx, const char *dev)
145143
return 0;
146144

147145
/* Save/update /run/watchdogd.status */
148-
wdt_set_bootstatus(cause, timeout, period);
146+
wdt_set_bootstatus(timeout, period);
149147

150148
/* Calculate period (T) in milliseconds for libuEv */
151149
T = period * 1000;
@@ -246,13 +244,13 @@ int wdt_fload_reason(FILE *fp, wdog_reason_t *r, pid_t *pid)
246244
pid = &dummy;
247245

248246
while ((ptr = fgets(buf, sizeof(buf), fp))) {
249-
if (sscanf(buf, WDT_REASON_CNT ": %u\n", &r->counter) == 1)
247+
if (sscanf(buf, WDT_RESETCOUNT ": %u\n", &r->counter) == 1)
250248
continue;
251249
if (sscanf(buf, WDT_REASON_PID ": %d\n", pid) == 1)
252250
continue;
253251
if (sscanf(buf, WDT_REASON_WID ": %d\n", &r->wid) == 1)
254252
continue;
255-
if (sscanf(buf, WDT_REASON_CSE ": %d\n", (int *)&r->code) == 1)
253+
if (sscanf(buf, WDT_REASON_STR ": %d\n", (int *)&r->code) == 1)
256254
continue;
257255

258256
if (string_match(buf, WDT_REASON_LBL ": ")) {
@@ -261,8 +259,8 @@ int wdt_fload_reason(FILE *fp, wdog_reason_t *r, pid_t *pid)
261259
continue;
262260
}
263261

264-
if (string_match(buf, WDT_REASON_TME ": ")) {
265-
ptr += strlen(WDT_REASON_TME) + 2;
262+
if (string_match(buf, WDT_RESET_DATE ": ")) {
263+
ptr += strlen(WDT_RESET_DATE) + 2;
266264
strptime(chomp(ptr), "%FT%TZ", &r->date);
267265
continue;
268266
}
@@ -275,19 +273,27 @@ int wdt_fstore_reason(FILE *fp, wdog_reason_t *r, pid_t pid)
275273
{
276274
time_t now;
277275

278-
fprintf(fp, WDT_REASON_CNT ": %u\n", r->counter);
276+
fprintf(fp, WDT_RESETCOUNT ": %u\n", r->counter);
279277
now = time(NULL);
280278
if (now != (time_t)-1) {
281279
char buf[25];
282280

283281
strftime(buf, sizeof buf, "%FT%TZ", gmtime(&now));
284-
fprintf(fp, WDT_REASON_TME ": %s\n", buf);
282+
fprintf(fp, WDT_RESET_DATE ": %s\n", buf);
283+
}
284+
fprintf(fp, WDT_REASON_STR ": %d - %s\n", r->code, wdog_reset_reason_str(r));
285+
switch (r->code) {
286+
case WDOG_FAILED_SUBSCRIPTION:
287+
case WDOG_FAILED_KICK:
288+
case WDOG_FAILED_UNSUBSCRIPTION:
289+
case WDOG_FAILED_TO_MEET_DEADLINE:
290+
fprintf(fp, WDT_REASON_PID ": %d\n", pid);
291+
fprintf(fp, WDT_REASON_WID ": %d\n", r->wid);
292+
fprintf(fp, WDT_REASON_LBL ": %s\n", r->label);
293+
break;
294+
default:
295+
break;
285296
}
286-
fprintf(fp, WDT_REASON_PID ": %d\n", pid);
287-
fprintf(fp, WDT_REASON_WID ": %d\n", r->wid);
288-
fprintf(fp, WDT_REASON_LBL ": %s\n", r->label);
289-
fprintf(fp, WDT_REASON_CSE ": %d\n", r->code);
290-
fprintf(fp, WDT_REASON_STR ": %s\n", wdog_reset_reason_str(r));
291297

292298
return fclose(fp);
293299
}
@@ -326,6 +332,31 @@ static int compat_supervisor(wdog_reason_t *r)
326332
#define compat_supervisor(r) 0
327333
#endif /* COMPAT_SUPERVISOR */
328334

335+
const char *bootstatus_string(int cause)
336+
{
337+
const char *str = NULL;
338+
339+
if (cause & WDIOF_CARDRESET)
340+
str = "WDIOF_CARDRESET";
341+
if (cause & WDIOF_EXTERN1)
342+
str = "WDIOF_EXTERN1";
343+
if (cause & WDIOF_EXTERN2)
344+
str = "WDIOF_EXTERN2";
345+
if (cause & WDIOF_POWERUNDER)
346+
str = "WDIOF_POWERUNDER";
347+
if (cause & WDIOF_POWEROVER)
348+
str = "WDIOF_POWEROVER";
349+
if (cause & WDIOF_FANFAULT)
350+
str = "WDIOF_FANFAULT";
351+
if (cause & WDIOF_OVERHEAT)
352+
str = "WDIOF_OVERHEAT";
353+
354+
if (!str)
355+
str = "WDIOF_UNKNOWN";
356+
357+
return str;
358+
}
359+
329360
static int create_bootstatus(char *fn, wdog_reason_t *r, int cause, int timeout, int interval, pid_t pid)
330361
{
331362
FILE *fp;
@@ -336,24 +367,29 @@ static int create_bootstatus(char *fn, wdog_reason_t *r, int cause, int timeout,
336367
return -1;
337368
}
338369

339-
fprintf(fp, WDT_REASON_WDT ": 0x%04x\n", cause >= 0 ? cause : 0);
340-
fprintf(fp, WDT_REASON_TMO ": %d\n", timeout);
341-
fprintf(fp, WDT_REASON_INT ": %d\n", interval);
370+
fprintf(fp, WDT_TMOSEC_OPT ": %d\n", timeout);
371+
fprintf(fp, WDT_INTSEC_OPT ": %d\n", interval);
372+
fprintf(fp, WDT_BOOTSTATUS ": 0x%04x\n", cause >= 0 ? cause : 0);
373+
fprintf(fp, WDT_RESETCAUSE ": %s\n", bootstatus_string(cause));
342374

343375
return wdt_fstore_reason(fp, r, pid);
344376
}
345377

346-
int wdt_set_bootstatus(int cause, int timeout, int interval)
378+
int wdt_set_bootstatus(int timeout, int interval)
347379
{
380+
wdog_reason_t reason;
348381
pid_t pid = 0;
349382
char *status;
350-
wdog_reason_t reason;
383+
int cause;
351384

352385
if (wdt_testmode())
353386
status = WDOG_STATUS_TEST;
354387
else
355388
status = WDOG_STATUS;
356389

390+
cause = wdt_get_bootstatus();
391+
LOG("Reset cause: 0x%04x - %s", cause, bootstatus_string(cause));
392+
357393
/*
358394
* In case we're restarted at runtime this prevents us from
359395
* recreating the status file(s).
@@ -403,32 +439,22 @@ int wdt_set_bootstatus(int cause, int timeout, int interval)
403439

404440
int wdt_get_bootstatus(void)
405441
{
406-
int status = 0;
442+
int cause;
407443
int err;
408444

409445
if (wdt_testmode())
410-
return status;
446+
return 0;
411447

412448
if (fd == -1) {
413449
DEBUG("Cannot get boot status, currently disabled.");
414450
return 0;
415451
}
416452

417-
if ((err = ioctl(fd, WDIOC_GETBOOTSTATUS, &status)))
418-
status += err;
419-
420-
if (!err && status) {
421-
if (status & WDIOF_POWERUNDER)
422-
LOG("Reset cause: POWER-ON");
423-
if (status & WDIOF_FANFAULT)
424-
LOG("Reset cause: FAN-FAULT");
425-
if (status & WDIOF_OVERHEAT)
426-
LOG("Reset cause: CPU-OVERHEAT");
427-
if (status & WDIOF_CARDRESET)
428-
LOG("Reset cause: WATCHDOG");
429-
}
453+
err = ioctl(fd, WDIOC_GETBOOTSTATUS, &cause);
454+
if (err)
455+
return err;
430456

431-
return status;
457+
return cause;
432458
}
433459

434460
int wdt_enable(int enable)

src/wdt.h

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,13 @@
5151
#define WDT_REASON_PID "PID "
5252
#define WDT_REASON_WID "Watchdog ID "
5353
#define WDT_REASON_LBL "Label "
54-
#define WDT_REASON_TME "Reset date "
55-
#define WDT_REASON_CSE "Reset cause "
54+
#define WDT_RESET_DATE "Reset date "
55+
#define WDT_RESETCAUSE "Reset cause "
5656
#define WDT_REASON_STR "Reset reason "
57-
#define WDT_REASON_CNT "Reset counter "
58-
#define WDT_REASON_WDT "Boot status (WDIOF) "
59-
#define WDT_REASON_TMO "Timeout (sec) "
60-
#define WDT_REASON_INT "Kick interval "
57+
#define WDT_RESETCOUNT "Reset counter "
58+
#define WDT_BOOTSTATUS "Boot status (WDIOF) "
59+
#define WDT_TMOSEC_OPT "Timeout (sec) "
60+
#define WDT_INTSEC_OPT "Kick interval "
6161

6262
#define EMERG(fmt, args...) syslog(LOG_EMERG, fmt, ##args)
6363
#define ERROR(fmt, args...) syslog(LOG_ERR, fmt, ##args)
@@ -112,10 +112,9 @@ int wdt_forced_reset (uev_ctx_t *ctx, pid_t pid, char *label, int timeout);
112112
int wdt_fload_reason (FILE *fp, wdog_reason_t *r, pid_t *pid);
113113
int wdt_fstore_reason (FILE *fp, wdog_reason_t *r, pid_t pid);
114114

115-
int wdt_set_bootstatus (int cause, int timeout, int interval);
115+
int wdt_set_bootstatus (int timeout, int interval);
116116
int wdt_get_bootstatus (void);
117117

118-
119118
static inline unsigned int wdt_reset_counter(void)
120119
{
121120
return reset_counter;

0 commit comments

Comments
 (0)