Skip to content

Commit b77b00a

Browse files
committed
fix: do not fail PipelineRun when TaskRef reconciles with retryable err
With this change, no `ErrCouldntValidateObjectRetryable` returned by DryRunValidate results in a PipelineRun being marked as failed. Previously, the only errors which were actually retried were naming conflicts, server-timeouts, timeouts, and too-many-requests. Unknown errors were marked as "Retryable", however they still caused the PipelineRun to fail since `resolutioncommon.IsErrTransient` did not check if the err was of type `ErrCouldntValidateObjectRetryable`
1 parent d8f7b93 commit b77b00a

File tree

4 files changed

+125
-14
lines changed

4 files changed

+125
-14
lines changed

pkg/reconciler/pipelinerun/pipelinerun_test.go

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8513,7 +8513,7 @@ func (prt PipelineRunTest) reconcileRun(namespace, pipelineRunName string, wantE
85138513
prt.Test.Fatalf("Expected an error to be returned by Reconcile, got nil instead")
85148514
}
85158515
if controller.IsPermanentError(reconcileError) != permanentError {
8516-
prt.Test.Fatalf("Expected the error to be permanent: %v but got: %v", permanentError, controller.IsPermanentError(reconcileError))
8516+
prt.Test.Fatalf("Expected the error to be permanent: %v but got: %v: %v", permanentError, controller.IsPermanentError(reconcileError), reconcileError)
85178517
}
85188518
} else if ok, _ := controller.IsRequeueKey(reconcileError); ok {
85198519
// This is normal, it happens for timeouts when we otherwise successfully reconcile.
@@ -9297,6 +9297,95 @@ spec:
92979297
th.CheckPipelineRunConditionStatusAndReason(t, updatedPipelineRun.Status, corev1.ConditionFalse, v1.PipelineRunReasonCouldntGetTask.String())
92989298
}
92999299

9300+
// TestReconcileWithFailingTaskResolver checks that a PipelineRun with a failing Resolver
9301+
// field for a Task creates a ResolutionRequest object for that Resolver's type, and
9302+
// that when the request fails, the PipelineRun fails.
9303+
func TestReconcileWithTaskResolutionDryRunValidaitonErrors(t *testing.T) {
9304+
pr := parse.MustParseV1PipelineRun(t, `
9305+
metadata:
9306+
name: pr
9307+
namespace: default
9308+
spec:
9309+
pipelineSpec:
9310+
tasks:
9311+
- name: some-task
9312+
taskRef:
9313+
resolver: foobar
9314+
taskRunTemplate:
9315+
serviceAccountName: default
9316+
`)
9317+
remoteTask := parse.MustParseV1Task(t, `
9318+
metadata:
9319+
name: some-task
9320+
namespace: default
9321+
`)
9322+
taskBytes, err := yaml.Marshal(remoteTask)
9323+
if err != nil {
9324+
t.Fatal("fail to marshal task", err)
9325+
}
9326+
for _, testCase := range []struct {
9327+
name string
9328+
apiErr error
9329+
isPermanentError bool
9330+
}{
9331+
{"leader change", errors.New("etcdserver: leader changed"), false},
9332+
{"kubeapi timeout", apierrors.NewTimeoutError("roses are red, tulips are long, this dang ol' request done took too long", 5), false},
9333+
{"kubeapi throttling", apierrors.NewTooManyRequestsError("over nine thousand requests"), false},
9334+
{"unknown error", errors.New("the tranformogrifier is not tranformogrifying"), false},
9335+
{"bad request", apierrors.NewBadRequest("you've thrown off the emperors groove"), true},
9336+
{"actual validation error", apierrors.NewInvalid(schema.GroupKind{Group: "tekton.dev/v1", Kind: "Task"}, "some-invalid-task", field.ErrorList{}), true},
9337+
} {
9338+
t.Run(testCase.name, func(t *testing.T) {
9339+
taskReq := getResolvedResolutionRequest(t, "foobar", taskBytes, "default", "pr-some-task")
9340+
9341+
d := test.Data{
9342+
PipelineRuns: []*v1.PipelineRun{pr},
9343+
ServiceAccounts: []*corev1.ServiceAccount{{
9344+
ObjectMeta: metav1.ObjectMeta{Name: pr.Spec.TaskRunTemplate.ServiceAccountName, Namespace: "foo"},
9345+
}},
9346+
ConfigMaps: th.NewAlphaFeatureFlagsConfigMapInSlice(),
9347+
ResolutionRequests: []*resolutionv1beta1.ResolutionRequest{&taskReq},
9348+
}
9349+
9350+
prt := newPipelineRunTest(t, d)
9351+
defer prt.Cancel()
9352+
clients := prt.TestAssets.Clients
9353+
9354+
failDryRunCreation := true
9355+
9356+
clients.Pipeline.PrependReactor("create", "tasks", func(action ktesting.Action) (bool, runtime.Object, error) {
9357+
return failDryRunCreation, nil, testCase.apiErr
9358+
})
9359+
9360+
err := prt.TestAssets.Controller.Reconciler.Reconcile(prt.TestAssets.Ctx, "default/pr")
9361+
if err == nil {
9362+
t.Errorf("Expected error returned from reconcile after failing to cancel TaskRun but saw none!")
9363+
}
9364+
9365+
if testCase.isPermanentError && !controller.IsPermanentError(err) {
9366+
t.Errorf("Expected permanent error reconciling PipelineRun but got non-permanent error: %v", err)
9367+
} else if !testCase.isPermanentError && controller.IsPermanentError(err) {
9368+
t.Errorf("Expected non-permanent error reconciling PipelineRun but got permanent error: %v", err)
9369+
}
9370+
9371+
// Check that the PipelineRun is still running with correct error message
9372+
updatedPipelineRun, err := clients.Pipeline.TektonV1().PipelineRuns("default").Get(prt.TestAssets.Ctx, "pr", metav1.GetOptions{})
9373+
if err != nil {
9374+
t.Fatalf("Somehow had error getting reconciled run out of fake client: %s", err)
9375+
}
9376+
9377+
// The PipelineRun should not be cancelled since the error was retryable
9378+
condition := updatedPipelineRun.Status.GetCondition(apis.ConditionSucceeded)
9379+
if testCase.isPermanentError && condition.IsUnknown() {
9380+
t.Errorf("Expected PipelineRun to be failed but succeeded condition is %v", condition.Status)
9381+
}
9382+
if !testCase.isPermanentError && !condition.IsUnknown() {
9383+
t.Errorf("Expected PipelineRun to still be running but succeeded condition is %v", condition.Status)
9384+
}
9385+
})
9386+
}
9387+
}
9388+
93009389
// TestReconcileWithTaskResolver checks that a PipelineRun with a populated Resolver
93019390
// field for a Task creates a ResolutionRequest object for that Resolver's type, and
93029391
// that when the request is successfully resolved the PipelineRun begins running.

pkg/reconciler/pipelinerun/resources/pipelinerunresolution.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,15 @@ type TaskSkipStatus struct {
5454
// TaskNotFoundError indicates that the resolution failed because a referenced Task couldn't be retrieved
5555
type TaskNotFoundError struct {
5656
Name string
57-
Msg string
57+
Err error
5858
}
5959

6060
func (e *TaskNotFoundError) Error() string {
61-
return fmt.Sprintf("Couldn't retrieve Task %q: %s", e.Name, e.Msg)
61+
return fmt.Sprintf("Couldn't retrieve Task %q: %s", e.Name, e.Err.Error())
62+
}
63+
64+
func (e *TaskNotFoundError) Unwrap() error {
65+
return e.Err
6266
}
6367

6468
// ResolvedPipelineTask contains a PipelineTask and its associated child PipelineRun(s) (Pipelines-in-Pipelines), TaskRun(s) or CustomRuns, if they exist.
@@ -820,7 +824,7 @@ func resolveTask(
820824
}
821825
return rt, &TaskNotFoundError{
822826
Name: name,
823-
Msg: err.Error(),
827+
Err: err,
824828
}
825829
default:
826830
spec := t.Spec

pkg/reconciler/pipelinerun/resources/pipelinerunresolution_test.go

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"github.com/tektoncd/pipeline/pkg/apis/config"
3131
v1 "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1"
3232
"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1beta1"
33+
"github.com/tektoncd/pipeline/pkg/reconciler/apiserver"
3334
"github.com/tektoncd/pipeline/pkg/reconciler/pipeline/dag"
3435
"github.com/tektoncd/pipeline/pkg/reconciler/taskrun/resources"
3536
resolutioncommon "github.com/tektoncd/pipeline/pkg/resolution/common"
@@ -2746,16 +2747,32 @@ func TestResolvePipelineRun_TransientError(t *testing.T) {
27462747
TaskRef: &v1.TaskRef{Name: "task"},
27472748
}
27482749

2749-
getTask := getTaskFn(nil, apierrors.NewTimeoutError("some dang ol' timeout", 5))
2750-
getTaskRun := getTaskRunFn(nil)
2751-
pr := v1.PipelineRun{
2752-
ObjectMeta: metav1.ObjectMeta{
2753-
Name: "pipelinerun",
2754-
},
2750+
testCases := []struct {
2751+
name string
2752+
transientErr error
2753+
}{
2754+
{"kubeapi timeout error", apierrors.NewTimeoutError("some dang ol' timeout", 5)},
2755+
{"retryable resolution error", apiserver.ErrCouldntValidateObjectRetryable},
2756+
{"any knonw-retryable validation error", apiserver.ErrCouldntValidateObjectRetryable},
27552757
}
2756-
_, err := ResolvePipelineTask(t.Context(), pr, nopGetPipelineRun, getTask, getTaskRun, nopGetCustomRun, pt, nil)
2757-
if !resolutioncommon.IsErrTransient(err) {
2758-
t.Error("Transient error while getting Task did not result in a transient error")
2758+
2759+
for _, testCase := range testCases {
2760+
t.Run(testCase.name, func(t *testing.T) {
2761+
getTask := getTaskFn(nil, testCase.transientErr)
2762+
getTaskRun := getTaskRunFn(nil)
2763+
pr := v1.PipelineRun{
2764+
ObjectMeta: metav1.ObjectMeta{
2765+
Name: "pipelinerun",
2766+
},
2767+
}
2768+
_, err := ResolvePipelineTask(t.Context(), pr, nopGetPipelineRun, getTask, getTaskRun, nopGetCustomRun, pt, nil)
2769+
2770+
// This is really testing is that if the getTask function give ResolvePipelineTask a transient error,
2771+
// ResolvePipielineTask does not swallow that error
2772+
if !resolutioncommon.IsErrTransient(err) {
2773+
t.Error("Transient error while getting Task did not result in a transient error")
2774+
}
2775+
})
27592776
}
27602777
}
27612778

pkg/resolution/common/errors.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"slices"
2424
"strings"
2525

26+
"github.com/tektoncd/pipeline/pkg/reconciler/apiserver"
2627
apierrors "k8s.io/apimachinery/pkg/api/errors"
2728
)
2829

@@ -153,7 +154,7 @@ func ReasonError(err error) (string, error) {
153154
// IsErrTransient returns true if an error returned by GetTask/GetStepAction is retryable.
154155
func IsErrTransient(err error) bool {
155156
switch {
156-
case apierrors.IsConflict(err), apierrors.IsServerTimeout(err), apierrors.IsTimeout(err), apierrors.IsTooManyRequests(err):
157+
case apierrors.IsConflict(err), apierrors.IsServerTimeout(err), apierrors.IsTimeout(err), apierrors.IsTooManyRequests(err), errors.Is(err, apiserver.ErrCouldntValidateObjectRetryable):
157158
return true
158159
default:
159160
return slices.ContainsFunc([]string{errEtcdLeaderChange, context.DeadlineExceeded.Error()}, func(s string) bool {

0 commit comments

Comments
 (0)