@@ -115,13 +115,13 @@ func NewDeviceState(ctx context.Context, config *Config) (*DeviceState, error) {
115
115
}
116
116
117
117
for _ , c := range checkpoints {
118
- if c == DriverPluginCheckpointFile {
118
+ if c == DriverPluginCheckpointFileBasename {
119
119
return state , nil
120
120
}
121
121
}
122
122
123
123
checkpoint := newCheckpoint ()
124
- if err := state .checkpointManager .CreateCheckpoint (DriverPluginCheckpointFile , checkpoint ); err != nil {
124
+ if err := state .checkpointManager .CreateCheckpoint (DriverPluginCheckpointFileBasename , checkpoint ); err != nil {
125
125
return nil , fmt .Errorf ("unable to sync to checkpoint: %v" , err )
126
126
}
127
127
@@ -135,13 +135,17 @@ func (s *DeviceState) Prepare(ctx context.Context, claim *resourceapi.ResourceCl
135
135
claimUID := string (claim .UID )
136
136
137
137
checkpoint := newCheckpoint ()
138
- if err := s .checkpointManager .GetCheckpoint (DriverPluginCheckpointFile , checkpoint ); err != nil {
139
- return nil , fmt .Errorf ("unable to sync from checkpoint: %v " , err )
138
+ if err := s .checkpointManager .GetCheckpoint (DriverPluginCheckpointFileBasename , checkpoint ); err != nil {
139
+ return nil , fmt .Errorf ("unable to get checkpoint: %w " , err )
140
140
}
141
- preparedClaims := checkpoint .V1 .PreparedClaims
142
141
143
- if preparedClaims [claimUID ] != nil {
144
- return preparedClaims [claimUID ].GetDevices (), nil
142
+ preparedClaim , exists := checkpoint .V1 .PreparedClaimsByUID [claimUID ]
143
+ if exists {
144
+ // Make this a noop. Associated device(s) has/ave been prepared by us.
145
+ // Prepare() must be idempotent, as it may be invoked more than once per
146
+ // claim (and actual device preparation must happen at most once).
147
+ klog .V (6 ).Infof ("skip prepare: claim %v found in checkpoint" , claimUID )
148
+ return preparedClaim .PreparedDevices .GetDevices (), nil
145
149
}
146
150
147
151
preparedDevices , err := s .prepareDevices (ctx , claim )
@@ -153,50 +157,65 @@ func (s *DeviceState) Prepare(ctx context.Context, claim *resourceapi.ResourceCl
153
157
return nil , fmt .Errorf ("unable to create CDI spec file for claim: %w" , err )
154
158
}
155
159
156
- preparedClaims [claimUID ] = preparedDevices
157
- if err := s .checkpointManager .CreateCheckpoint (DriverPluginCheckpointFile , checkpoint ); err != nil {
158
- return nil , fmt .Errorf ("unable to sync to checkpoint: %v" , err )
160
+ // Add ResourceClaimStatus API object to node-local checkpoint: the
161
+ // 'unprepare' code path must use local state exclusively (ResourceClaim
162
+ // object might have been deleted from the API server).
163
+ checkpoint .V1 .PreparedClaimsByUID [claimUID ] = PreparedClaim {
164
+ Status : claim .Status ,
165
+ PreparedDevices : preparedDevices ,
166
+ }
167
+ if err := s .checkpointManager .CreateCheckpoint (DriverPluginCheckpointFileBasename , checkpoint ); err != nil {
168
+ return nil , fmt .Errorf ("unable to create checkpoint: %w" , err )
159
169
}
170
+ klog .V (6 ).Infof ("checkpoint written for claim %v" , claimUID )
160
171
161
- return preparedClaims [ claimUID ] .GetDevices (), nil
172
+ return preparedDevices .GetDevices (), nil
162
173
}
163
174
164
- func (s * DeviceState ) Unprepare (ctx context.Context , claim * resourceapi. ResourceClaim ) error {
175
+ func (s * DeviceState ) Unprepare (ctx context.Context , claimRef kubeletplugin. NamespacedObject ) error {
165
176
s .Lock ()
166
177
defer s .Unlock ()
167
178
168
- claimUID := string (claim .UID )
169
-
170
- if err := s .unprepareDevices (ctx , claim ); err != nil {
171
- return fmt .Errorf ("unprepare devices failed: %w" , err )
172
- }
179
+ claimUID := string (claimRef .UID )
173
180
181
+ // Rely on local checkpoint state for ability to clean up.
174
182
checkpoint := newCheckpoint ()
175
- if err := s .checkpointManager .GetCheckpoint (DriverPluginCheckpointFile , checkpoint ); err != nil {
176
- return fmt .Errorf ("unable to sync from checkpoint: %v " , err )
183
+ if err := s .checkpointManager .GetCheckpoint (DriverPluginCheckpointFileBasename , checkpoint ); err != nil {
184
+ return fmt .Errorf ("unable to get checkpoint: %w " , err )
177
185
}
178
- preparedClaims := checkpoint .V1 .PreparedClaims
179
186
180
- if preparedClaims [claimUID ] == nil {
187
+ pc , exists := checkpoint .V1 .PreparedClaimsByUID [claimUID ]
188
+ if ! exists {
189
+ // Not an error: if this claim UID is not in the checkpoint then this
190
+ // device was never prepared or has already been unprepared (assume that
191
+ // Prepare+Checkpoint are done transactionally). Note that
192
+ // claimRef.String() contains namespace, name, UID.
193
+ klog .Infof ("unprepare noop: claim not found in checkpoint data: %v" , claimRef .String ())
181
194
return nil
182
195
}
183
196
197
+ if err := s .unprepareDevices (ctx , & pc .Status ); err != nil {
198
+ return fmt .Errorf ("unprepare devices failed: %w" , err )
199
+ }
200
+
184
201
err := s .cdi .DeleteClaimSpecFile (claimUID )
185
202
if err != nil {
186
203
return fmt .Errorf ("unable to delete CDI spec file for claim: %w" , err )
187
204
}
188
205
189
- delete (preparedClaims , claimUID )
190
- if err := s .checkpointManager .CreateCheckpoint (DriverPluginCheckpointFile , checkpoint ); err != nil {
191
- return fmt .Errorf ("unable to sync to checkpoint: %v" , err )
206
+ // Write new checkpoint reflecting that all devices for this claim have been
207
+ // unprepared (by virtue of removing its UID from all mappings).
208
+ delete (checkpoint .V1 .PreparedClaimsByUID , claimUID )
209
+ if err := s .checkpointManager .CreateCheckpoint (DriverPluginCheckpointFileBasename , checkpoint ); err != nil {
210
+ return fmt .Errorf ("create checkpoint failed: %w" , err )
192
211
}
193
212
194
213
return nil
195
214
}
196
215
197
216
func (s * DeviceState ) prepareDevices (ctx context.Context , claim * resourceapi.ResourceClaim ) (PreparedDevices , error ) {
198
217
// Generate a mapping of each OpaqueDeviceConfigs to the Device.Results it applies to
199
- configResultsMap , err := s .getConfigResultsMap (claim )
218
+ configResultsMap , err := s .getConfigResultsMap (& claim . Status )
200
219
if err != nil {
201
220
return nil , fmt .Errorf ("error generating configResultsMap: %w" , err )
202
221
}
@@ -283,9 +302,9 @@ func (s *DeviceState) prepareDevices(ctx context.Context, claim *resourceapi.Res
283
302
return preparedDevices , nil
284
303
}
285
304
286
- func (s * DeviceState ) unprepareDevices (ctx context.Context , claim * resourceapi.ResourceClaim ) error {
305
+ func (s * DeviceState ) unprepareDevices (ctx context.Context , cs * resourceapi.ResourceClaimStatus ) error {
287
306
// Generate a mapping of each OpaqueDeviceConfigs to the Device.Results it applies to
288
- configResultsMap , err := s .getConfigResultsMap (claim )
307
+ configResultsMap , err := s .getConfigResultsMap (cs )
289
308
if err != nil {
290
309
return fmt .Errorf ("error generating configResultsMap: %w" , err )
291
310
}
@@ -407,12 +426,12 @@ func (s *DeviceState) applyComputeDomainDaemonConfig(ctx context.Context, config
407
426
return & configState , nil
408
427
}
409
428
410
- func (s * DeviceState ) getConfigResultsMap (claim * resourceapi.ResourceClaim ) (map [runtime.Object ][]* resourceapi.DeviceRequestAllocationResult , error ) {
429
+ func (s * DeviceState ) getConfigResultsMap (rcs * resourceapi.ResourceClaimStatus ) (map [runtime.Object ][]* resourceapi.DeviceRequestAllocationResult , error ) {
411
430
// Retrieve the full set of device configs for the driver.
412
431
configs , err := GetOpaqueDeviceConfigs (
413
432
configapi .Decoder ,
414
433
DriverName ,
415
- claim . Status .Allocation .Devices .Config ,
434
+ rcs .Allocation .Devices .Config ,
416
435
)
417
436
if err != nil {
418
437
return nil , fmt .Errorf ("error getting opaque device configs: %v" , err )
@@ -433,7 +452,7 @@ func (s *DeviceState) getConfigResultsMap(claim *resourceapi.ResourceClaim) (map
433
452
// Look through the configs and figure out which one will be applied to
434
453
// each device allocation result based on their order of precedence and type.
435
454
configResultsMap := make (map [runtime.Object ][]* resourceapi.DeviceRequestAllocationResult )
436
- for _ , result := range claim . Status .Allocation .Devices .Results {
455
+ for _ , result := range rcs .Allocation .Devices .Results {
437
456
device , exists := s .allocatable [result .Device ]
438
457
if ! exists {
439
458
return nil , fmt .Errorf ("requested device is not allocatable: %v" , result .Device )
0 commit comments