Skip to content

Commit 2b36eeb

Browse files
authored
Eliminate TODO stored to error trace from job rescuer (#1010)
Here, eliminate a TODO that's stored to an error's `trace` when a job is rescued by `JobRescuer`. I've changed this to an empty string instead and documented that it'll be an empty string.
1 parent 7be36dd commit 2b36eeb

File tree

4 files changed

+41
-32
lines changed

4 files changed

+41
-32
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
3636
### Fixed
3737

3838
- Cleanly error on invalid schema names in `Config.Schema`. [PR #952](https://github.com/riverqueue/river/pull/952).
39+
- Jobs rescued by `JobRescuer` no longer have their trace set to "TODO". This becomes an empty string instead. [PR #1010](https://github.com/riverqueue/river/pull/1010).
3940

4041
## [0.23.1] - 2025-06-04
4142

internal/maintenance/job_rescuer.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,8 @@ func (s *JobRescuer) runOnce(ctx context.Context) (*rescuerRunOnceResult, error)
188188
errorData, err := json.Marshal(rivertype.AttemptError{
189189
At: now,
190190
Attempt: max(job.Attempt, 0),
191-
Error: "Stuck job rescued by Rescuer",
192-
Trace: "TODO",
191+
Error: "Stuck job rescued by JobRescuer",
192+
Trace: "",
193193
})
194194
if err != nil {
195195
return nil, fmt.Errorf("error marshaling error JSON: %w", err)

internal/maintenance/job_rescuer_test.go

Lines changed: 35 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@ func TestJobRescuer(t *testing.T) {
134134

135135
t.Run("RescuesStuckJobs", func(t *testing.T) {
136136
t.Parallel()
137-
require := require.New(t)
138137

139138
rescuer, bundle := setup(t)
140139

@@ -163,63 +162,69 @@ func TestJobRescuer(t *testing.T) {
163162
longTimeOutJob1 := testfactory.Job(ctx, t, bundle.exec, &testfactory.JobOpts{Kind: ptrutil.Ptr(rescuerJobKindLongTimeout), State: ptrutil.Ptr(rivertype.JobStateRunning), AttemptedAt: ptrutil.Ptr(bundle.rescueHorizon.Add(-1 * time.Minute)), MaxAttempts: ptrutil.Ptr(5)})
164163
longTimeOutJob2 := testfactory.Job(ctx, t, bundle.exec, &testfactory.JobOpts{Kind: ptrutil.Ptr(rescuerJobKindLongTimeout), State: ptrutil.Ptr(rivertype.JobStateRunning), AttemptedAt: ptrutil.Ptr(bundle.rescueHorizon.Add(-6 * time.Minute)), MaxAttempts: ptrutil.Ptr(5)})
165164

166-
require.NoError(rescuer.Start(ctx))
165+
require.NoError(t, rescuer.Start(ctx))
167166

168167
rescuer.TestSignals.FetchedBatch.WaitOrTimeout()
169168
rescuer.TestSignals.UpdatedBatch.WaitOrTimeout()
170169

171170
confirmRetried := func(jobBefore *rivertype.JobRow) {
172171
jobAfter, err := bundle.exec.JobGetByID(ctx, &riverdriver.JobGetByIDParams{ID: jobBefore.ID, Schema: rescuer.Config.Schema})
173-
require.NoError(err)
174-
require.Equal(rivertype.JobStateRetryable, jobAfter.State)
172+
require.NoError(t, err)
173+
require.Equal(t, rivertype.JobStateRetryable, jobAfter.State)
174+
175+
require.Len(t, jobAfter.Errors, 1)
176+
attemptError := jobAfter.Errors[0]
177+
require.Zero(t, attemptError.Attempt)
178+
require.Equal(t, "Stuck job rescued by JobRescuer", attemptError.Error)
179+
require.Empty(t, attemptError.Trace)
175180
}
176181

177182
var err error
178183
confirmRetried(stuckToRetryJob1)
179184
confirmRetried(stuckToRetryJob2)
180185

181186
job3After, err := bundle.exec.JobGetByID(ctx, &riverdriver.JobGetByIDParams{ID: stuckToRetryJob3.ID, Schema: rescuer.Config.Schema})
182-
require.NoError(err)
183-
require.Equal(stuckToRetryJob3.State, job3After.State) // not rescued
187+
require.NoError(t, err)
188+
require.Equal(t, stuckToRetryJob3.State, job3After.State) // not rescued
184189

185190
discardJob1After, err := bundle.exec.JobGetByID(ctx, &riverdriver.JobGetByIDParams{ID: stuckToDiscardJob1.ID, Schema: rescuer.Config.Schema})
186-
require.NoError(err)
187-
require.Equal(rivertype.JobStateDiscarded, discardJob1After.State)
188-
require.WithinDuration(time.Now(), *discardJob1After.FinalizedAt, 5*time.Second)
189-
require.Len(discardJob1After.Errors, 1)
191+
require.NoError(t, err)
192+
require.Equal(t, rivertype.JobStateDiscarded, discardJob1After.State)
193+
require.WithinDuration(t, time.Now(), *discardJob1After.FinalizedAt, 5*time.Second)
194+
require.Len(t, discardJob1After.Errors, 1)
190195

191196
discardJob2After, err := bundle.exec.JobGetByID(ctx, &riverdriver.JobGetByIDParams{ID: stuckToDiscardJob2.ID, Schema: rescuer.Config.Schema})
192-
require.NoError(err)
193-
require.Equal(rivertype.JobStateRunning, discardJob2After.State)
194-
require.Nil(discardJob2After.FinalizedAt)
197+
require.NoError(t, err)
198+
require.Equal(t, rivertype.JobStateRunning, discardJob2After.State)
199+
require.Nil(t, discardJob2After.FinalizedAt)
195200

196201
cancelJob1After, err := bundle.exec.JobGetByID(ctx, &riverdriver.JobGetByIDParams{ID: stuckToCancelJob1.ID, Schema: rescuer.Config.Schema})
197-
require.NoError(err)
198-
require.Equal(rivertype.JobStateCancelled, cancelJob1After.State)
199-
require.WithinDuration(time.Now(), *cancelJob1After.FinalizedAt, 5*time.Second)
200-
require.Len(cancelJob1After.Errors, 1)
202+
require.NoError(t, err)
203+
require.Equal(t, rivertype.JobStateCancelled, cancelJob1After.State)
204+
require.WithinDuration(t, time.Now(), *cancelJob1After.FinalizedAt, 5*time.Second)
205+
require.Len(t, cancelJob1After.Errors, 1)
201206

202207
cancelJob2After, err := bundle.exec.JobGetByID(ctx, &riverdriver.JobGetByIDParams{ID: stuckToCancelJob2.ID, Schema: rescuer.Config.Schema})
203-
require.NoError(err)
204-
require.Equal(rivertype.JobStateRunning, cancelJob2After.State)
205-
require.Nil(cancelJob2After.FinalizedAt)
208+
require.NoError(t, err)
209+
require.Equal(t, rivertype.JobStateRunning, cancelJob2After.State)
210+
require.Nil(t, cancelJob2After.FinalizedAt)
206211

207212
notRunningJob1After, err := bundle.exec.JobGetByID(ctx, &riverdriver.JobGetByIDParams{ID: notRunningJob1.ID, Schema: rescuer.Config.Schema})
208-
require.NoError(err)
209-
require.Equal(notRunningJob1.State, notRunningJob1After.State)
213+
require.NoError(t, err)
214+
require.Equal(t, notRunningJob1.State, notRunningJob1After.State)
210215
notRunningJob2After, err := bundle.exec.JobGetByID(ctx, &riverdriver.JobGetByIDParams{ID: notRunningJob2.ID, Schema: rescuer.Config.Schema})
211-
require.NoError(err)
212-
require.Equal(notRunningJob2.State, notRunningJob2After.State)
216+
require.NoError(t, err)
217+
require.Equal(t, notRunningJob2.State, notRunningJob2After.State)
213218
notRunningJob3After, err := bundle.exec.JobGetByID(ctx, &riverdriver.JobGetByIDParams{ID: notRunningJob3.ID, Schema: rescuer.Config.Schema})
214-
require.NoError(err)
215-
require.Equal(notRunningJob3.State, notRunningJob3After.State)
219+
require.NoError(t, err)
220+
require.Equal(t, notRunningJob3.State, notRunningJob3After.State)
216221

217222
notTimedOutJob1After, err := bundle.exec.JobGetByID(ctx, &riverdriver.JobGetByIDParams{ID: longTimeOutJob1.ID, Schema: rescuer.Config.Schema})
218-
require.NoError(err)
219-
require.Equal(rivertype.JobStateRunning, notTimedOutJob1After.State)
223+
require.NoError(t, err)
224+
require.Equal(t, rivertype.JobStateRunning, notTimedOutJob1After.State)
220225
notTimedOutJob2After, err := bundle.exec.JobGetByID(ctx, &riverdriver.JobGetByIDParams{ID: longTimeOutJob2.ID, Schema: rescuer.Config.Schema})
221-
require.NoError(err)
222-
require.Equal(rivertype.JobStateRetryable, notTimedOutJob2After.State)
226+
require.NoError(t, err)
227+
require.Equal(t, rivertype.JobStateRetryable, notTimedOutJob2After.State)
223228
})
224229

225230
t.Run("RescuesInBatches", func(t *testing.T) {

rivertype/river_type.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,9 @@ type AttemptError struct {
251251

252252
// Trace contains a stack trace from a job that panicked. The trace is
253253
// produced by invoking `debug.Trace()`.
254+
//
255+
// In the case of a non-panic or an error produced as a stuck job was
256+
// rescued, this value will be an empty string.
254257
Trace string `json:"trace"`
255258
}
256259

0 commit comments

Comments
 (0)