Skip to content

Commit 561f348

Browse files
authored
Add default conditions to create PA to avoid potential race conditions (#16078)
between revision and autoscaler fix revision lifecyle check for conditions
1 parent 1ffe339 commit 561f348

File tree

7 files changed

+66
-7
lines changed

7 files changed

+66
-7
lines changed

config/core/300-resources/podautoscaler.yaml

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,32 @@ spec:
125125
status:
126126
description: Status communicates the observed state of the PodAutoscaler (from the controller).
127127
type: object
128-
required:
129-
- metricsServiceName
130-
- serviceName
128+
default:
129+
conditions:
130+
- lastTransitionTime: "1970-01-01T00:00:00Z"
131+
message: Waiting for controller
132+
reason: Pending
133+
severity: ""
134+
status: Unknown
135+
type: Active
136+
- lastTransitionTime: "1970-01-01T00:00:00Z"
137+
message: Waiting for controller
138+
reason: Pending
139+
severity: ""
140+
status: Unknown
141+
type: Ready
142+
- lastTransitionTime: "1970-01-01T00:00:00Z"
143+
message: Waiting for controller
144+
reason: Pending
145+
severity: ""
146+
status: Unknown
147+
type: SKSReady
148+
- lastTransitionTime: "1970-01-01T00:00:00Z"
149+
message: Waiting for controller
150+
reason: Pending
151+
severity: ""
152+
status: Unknown
153+
type: ScaleTargetInitialized
131154
properties:
132155
actualScale:
133156
description: ActualScale shows the actual number of replicas for the revision.

docs/serving-api.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,7 @@ string
459459
</em>
460460
</td>
461461
<td>
462+
<em>(Optional)</em>
462463
<p>ServiceName is the K8s Service name that serves the revision, scaled by this PA.
463464
The service is created and owned by the ServerlessService object owned by this PA.</p>
464465
</td>
@@ -471,6 +472,7 @@ string
471472
</em>
472473
</td>
473474
<td>
475+
<em>(Optional)</em>
474476
<p>MetricsServiceName is the K8s Service name that provides revision metrics.
475477
The service is managed by the PA object.</p>
476478
</td>

pkg/apis/autoscaling/v1alpha1/pa_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,14 +112,17 @@ const (
112112

113113
// PodAutoscalerStatus communicates the observed state of the PodAutoscaler (from the controller).
114114
type PodAutoscalerStatus struct {
115+
// +kubebuilder:default={"conditions": {{"type":"Active", "status": "Unknown", "reason": "Pending", "message": "Waiting for controller", "severity": "", "lastTransitionTime": "1970-01-01T00:00:00Z"}, {"type":"Ready", "status": "Unknown", "reason": "Pending", "message": "Waiting for controller", "severity": "", "lastTransitionTime": "1970-01-01T00:00:00Z"}, {"type":"SKSReady", "status": "Unknown", "reason": "Pending", "message": "Waiting for controller", "severity": "", "lastTransitionTime": "1970-01-01T00:00:00Z"}, {"type":"ScaleTargetInitialized", "status": "Unknown", "reason": "Pending", "message": "Waiting for controller", "severity": "", "lastTransitionTime": "1970-01-01T00:00:00Z"}}}
115116
duckv1.Status `json:",inline"`
116117

117118
// ServiceName is the K8s Service name that serves the revision, scaled by this PA.
118119
// The service is created and owned by the ServerlessService object owned by this PA.
120+
// +optional
119121
ServiceName string `json:"serviceName"`
120122

121123
// MetricsServiceName is the K8s Service name that provides revision metrics.
122124
// The service is managed by the PA object.
125+
// +optional
123126
MetricsServiceName string `json:"metricsServiceName"`
124127

125128
// DesiredScale shows the current desired number of replicas for the revision.

pkg/apis/serving/v1/revision_lifecycle.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,8 @@ func (rs *RevisionStatus) PropagateAutoscalerStatus(ps *autoscalingv1alpha1.PodA
184184
rs.DesiredReplicas = ps.DesiredScale
185185
}
186186

187-
if cond == nil {
187+
sksCondition := ps.GetCondition(autoscalingv1alpha1.PodAutoscalerConditionSKSReady)
188+
if cond == nil || (cond.IsUnknown() && sksCondition != nil && sksCondition.IsUnknown()) {
188189
rs.MarkActiveUnknown(ReasonDeploying, "")
189190

190191
if !resUnavailable {
@@ -219,6 +220,7 @@ func (rs *RevisionStatus) PropagateAutoscalerStatus(ps *autoscalingv1alpha1.PodA
219220
// 2. Initial scale was never achieved, which means we failed to progress
220221
// towards initial scale during the progress deadline period and scaled to 0
221222
// failing to activate.
223+
// 3. Need to make sure to wait for SKS to be ready, or at least fail.
222224
// So mark the revision as failed at that point.
223225
// See #8922 for details. When we try to scale to 0, we force the Deployment's
224226
// Progress status to become `true`, since successful scale down means
@@ -228,7 +230,7 @@ func (rs *RevisionStatus) PropagateAutoscalerStatus(ps *autoscalingv1alpha1.PodA
228230
// ScaleTargetInitialized down the road, we would have marked resources
229231
// unavailable here, and have no way of recovering later.
230232
// If the ResourcesAvailable is already false, don't override the message.
231-
if !ps.IsScaleTargetInitialized() && !resUnavailable && ps.ServiceName != "" {
233+
if !ps.IsScaleTargetInitialized() && !resUnavailable && ps.ServiceName != "" && !sksCondition.IsUnknown() {
232234
rs.MarkResourcesAvailableFalse(ReasonProgressDeadlineExceeded,
233235
"Initial scale was never achieved")
234236
}

pkg/apis/serving/v1/revision_lifecycle_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,9 @@ func TestPropagateAutoscalerStatusNoProgress(t *testing.T) {
678678
}, {
679679
Type: autoscalingv1alpha1.PodAutoscalerConditionScaleTargetInitialized,
680680
Status: corev1.ConditionUnknown,
681+
}, {
682+
Type: autoscalingv1alpha1.PodAutoscalerConditionSKSReady,
683+
Status: corev1.ConditionFalse,
681684
}},
682685
},
683686
})
@@ -764,6 +767,7 @@ func TestPropagateAutoscalerStatusReplicas(t *testing.T) {
764767

765768
for _, tc := range testCases {
766769
t.Run(tc.name, func(t *testing.T) {
770+
tc.ps.InitializeConditions()
767771
r.PropagateAutoscalerStatus(&tc.ps)
768772

769773
if !cmp.Equal(tc.wantActualReplicas, r.ActualReplicas) {

pkg/reconciler/revision/table_test.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,7 @@ func TestReconcile(t *testing.T) {
312312
WithPAStatusService("pa-inactive"),
313313
WithNoTraffic("NoTraffic", "This thing is inactive."),
314314
WithScaleTargetInitialized,
315+
WithPASKSNotReady(""),
315316
WithReachabilityUnreachable),
316317
readyDeploy(deploy(t, "foo", "pa-inactive")),
317318
image("foo", "pa-inactive"),
@@ -335,13 +336,14 @@ func TestReconcile(t *testing.T) {
335336
WithRevisionObservedGeneration(1)),
336337
pa("foo", "pa-inactive",
337338
WithNoTraffic("NoTraffic", "This thing is inactive."),
338-
WithPAStatusService("pa-inactive")),
339+
WithPAStatusService("pa-inactive"),
340+
WithPASKSReady),
339341
readyDeploy(deploy(t, "foo", "pa-inactive")),
340342
image("foo", "pa-inactive"),
341343
},
342344
WantStatusUpdates: []clientgotesting.UpdateActionImpl{{
343345
Object: Revision("foo", "pa-inactive",
344-
WithLogURL, withDefaultContainerStatuses(), MarkDeploying(""),
346+
WithLogURL, withDefaultContainerStatuses(), MarkContainerHealthyUnknown(""),
345347
// When we reconcile an "all ready" revision when the PA
346348
// is inactive, we should see the following change.
347349
MarkInactive("NoTraffic", "This thing is inactive."), WithRevisionObservedGeneration(1),

pkg/reconciler/testing/v1/factory.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
fakenetworkingclient "knative.dev/networking/pkg/client/injection/client/fake"
2626
fakekubeclient "knative.dev/pkg/client/injection/kube/client/fake"
2727
fakedynamicclient "knative.dev/pkg/injection/clients/dynamicclient/fake"
28+
autoscalingv1alpha1 "knative.dev/serving/pkg/apis/autoscaling/v1alpha1"
2829
v1 "knative.dev/serving/pkg/apis/serving/v1"
2930
fakeservingclient "knative.dev/serving/pkg/client/injection/client/fake"
3031

@@ -95,6 +96,28 @@ func MakeFactory(ctor Ctor) rtesting.Factory {
9596
return false, nil, nil
9697
},
9798
)
99+
100+
client.PrependReactor("create", "podautoscalers", func(action ktesting.Action) (handled bool, ret runtime.Object, err error) {
101+
if createAction, ok := action.(ktesting.CreateAction); ok {
102+
if pa, ok := createAction.GetObject().(*autoscalingv1alpha1.PodAutoscaler); ok {
103+
// Initialize conditions first
104+
pa.Status.InitializeConditions()
105+
106+
// Set all conditions to match kubebuilder defaults: status="Unknown", reason="Deploying"
107+
// This matches the behavior defined in the +kubebuilder:default annotation
108+
condSet := pa.GetConditionSet()
109+
manager := condSet.Manage(&pa.Status)
110+
111+
// Set each condition to Unknown with "Deploying" reason to match kubebuilder defaults
112+
manager.MarkUnknown(autoscalingv1alpha1.PodAutoscalerConditionActive, "Pending", "Waiting for controller")
113+
manager.MarkUnknown(autoscalingv1alpha1.PodAutoscalerConditionReady, "Pending", "Waiting for controller")
114+
manager.MarkUnknown(autoscalingv1alpha1.PodAutoscalerConditionSKSReady, "Pending", "Waiting for controller")
115+
manager.MarkUnknown(autoscalingv1alpha1.PodAutoscalerConditionScaleTargetInitialized, "Pending", "Waiting for controller")
116+
}
117+
}
118+
return false, nil, nil
119+
})
120+
98121
// This is needed by the Configuration controller tests, which
99122
// use GenerateName to produce Revisions.
100123
rtesting.PrependGenerateNameReactor(&client.Fake)

0 commit comments

Comments
 (0)