@@ -115,13 +115,13 @@ func NewDeviceState(ctx context.Context, config *Config) (*DeviceState, error) {
115115 }
116116
117117 for _ , c := range checkpoints {
118- if c == DriverPluginCheckpointFile {
118+ if c == DriverPluginCheckpointFileBasename {
119119 return state , nil
120120 }
121121 }
122122
123123 checkpoint := newCheckpoint ()
124- if err := state .checkpointManager .CreateCheckpoint (DriverPluginCheckpointFile , checkpoint ); err != nil {
124+ if err := state .checkpointManager .CreateCheckpoint (DriverPluginCheckpointFileBasename , checkpoint ); err != nil {
125125 return nil , fmt .Errorf ("unable to sync to checkpoint: %v" , err )
126126 }
127127
@@ -135,13 +135,17 @@ func (s *DeviceState) Prepare(ctx context.Context, claim *resourceapi.ResourceCl
135135 claimUID := string (claim .UID )
136136
137137 checkpoint := newCheckpoint ()
138- if err := s .checkpointManager .GetCheckpoint (DriverPluginCheckpointFile , checkpoint ); err != nil {
139- return nil , fmt .Errorf ("unable to sync from checkpoint: %v " , err )
138+ if err := s .checkpointManager .GetCheckpoint (DriverPluginCheckpointFileBasename , checkpoint ); err != nil {
139+ return nil , fmt .Errorf ("unable to get checkpoint: %w " , err )
140140 }
141- preparedClaims := checkpoint .V1 .PreparedClaims
142141
143- if preparedClaims [claimUID ] != nil {
144- return preparedClaims [claimUID ].GetDevices (), nil
142+ preparedClaim , exists := checkpoint .V1 .PreparedClaims [claimUID ]
143+ if exists {
144+ // Make this a noop. Associated device(s) has/ave been prepared by us.
145+ // Prepare() must be idempotent, as it may be invoked more than once per
146+ // claim (and actual device preparation must happen at most once).
147+ klog .V (6 ).Infof ("skip prepare: claim %v found in checkpoint" , claimUID )
148+ return preparedClaim .PreparedDevices .GetDevices (), nil
145149 }
146150
147151 preparedDevices , err := s .prepareDevices (ctx , claim )
@@ -153,50 +157,65 @@ func (s *DeviceState) Prepare(ctx context.Context, claim *resourceapi.ResourceCl
153157 return nil , fmt .Errorf ("unable to create CDI spec file for claim: %w" , err )
154158 }
155159
156- preparedClaims [claimUID ] = preparedDevices
157- if err := s .checkpointManager .CreateCheckpoint (DriverPluginCheckpointFile , checkpoint ); err != nil {
158- return nil , fmt .Errorf ("unable to sync to checkpoint: %v" , err )
160+ // Add ResourceClaimStatus API object to node-local checkpoint: the
161+ // 'unprepare' code path must use local state exclusively (ResourceClaim
162+ // object might have been deleted from the API server).
163+ checkpoint .V1 .PreparedClaims [claimUID ] = PreparedClaim {
164+ Status : claim .Status ,
165+ PreparedDevices : preparedDevices ,
166+ }
167+ if err := s .checkpointManager .CreateCheckpoint (DriverPluginCheckpointFileBasename , checkpoint ); err != nil {
168+ return nil , fmt .Errorf ("unable to create checkpoint: %w" , err )
159169 }
170+ klog .V (6 ).Infof ("checkpoint written for claim %v" , claimUID )
160171
161- return preparedClaims [ claimUID ] .GetDevices (), nil
172+ return preparedDevices .GetDevices (), nil
162173}
163174
164- func (s * DeviceState ) Unprepare (ctx context.Context , claim * resourceapi. ResourceClaim ) error {
175+ func (s * DeviceState ) Unprepare (ctx context.Context , claimRef kubeletplugin. NamespacedObject ) error {
165176 s .Lock ()
166177 defer s .Unlock ()
167178
168- claimUID := string (claim .UID )
169-
170- if err := s .unprepareDevices (ctx , claim ); err != nil {
171- return fmt .Errorf ("unprepare devices failed: %w" , err )
172- }
179+ claimUID := string (claimRef .UID )
173180
181+ // Rely on local checkpoint state for ability to clean up.
174182 checkpoint := newCheckpoint ()
175- if err := s .checkpointManager .GetCheckpoint (DriverPluginCheckpointFile , checkpoint ); err != nil {
176- return fmt .Errorf ("unable to sync from checkpoint: %v " , err )
183+ if err := s .checkpointManager .GetCheckpoint (DriverPluginCheckpointFileBasename , checkpoint ); err != nil {
184+ return fmt .Errorf ("unable to get checkpoint: %w " , err )
177185 }
178- preparedClaims := checkpoint .V1 .PreparedClaims
179186
180- if preparedClaims [claimUID ] == nil {
187+ pc , exists := checkpoint .V1 .PreparedClaims [claimUID ]
188+ if ! exists {
189+ // Not an error: if this claim UID is not in the checkpoint then this
190+ // device was never prepared or has already been unprepared (assume that
191+ // Prepare+Checkpoint are done transactionally). Note that
192+ // claimRef.String() contains namespace, name, UID.
193+ klog .Infof ("unprepare noop: claim not found in checkpoint data: %v" , claimRef .String ())
181194 return nil
182195 }
183196
197+ if err := s .unprepareDevices (ctx , & pc .Status ); err != nil {
198+ return fmt .Errorf ("unprepare devices failed: %w" , err )
199+ }
200+
184201 err := s .cdi .DeleteClaimSpecFile (claimUID )
185202 if err != nil {
186203 return fmt .Errorf ("unable to delete CDI spec file for claim: %w" , err )
187204 }
188205
189- delete (preparedClaims , claimUID )
190- if err := s .checkpointManager .CreateCheckpoint (DriverPluginCheckpointFile , checkpoint ); err != nil {
191- return fmt .Errorf ("unable to sync to checkpoint: %v" , err )
206+ // Write new checkpoint reflecting that all devices for this claim have been
207+ // unprepared (by virtue of removing its UID from all mappings).
208+ delete (checkpoint .V1 .PreparedClaims , claimUID )
209+ if err := s .checkpointManager .CreateCheckpoint (DriverPluginCheckpointFileBasename , checkpoint ); err != nil {
210+ return fmt .Errorf ("create checkpoint failed: %w" , err )
192211 }
193212
194213 return nil
195214}
196215
197216func (s * DeviceState ) prepareDevices (ctx context.Context , claim * resourceapi.ResourceClaim ) (PreparedDevices , error ) {
198217 // Generate a mapping of each OpaqueDeviceConfigs to the Device.Results it applies to
199- configResultsMap , err := s .getConfigResultsMap (claim )
218+ configResultsMap , err := s .getConfigResultsMap (& claim . Status )
200219 if err != nil {
201220 return nil , fmt .Errorf ("error generating configResultsMap: %w" , err )
202221 }
@@ -283,9 +302,9 @@ func (s *DeviceState) prepareDevices(ctx context.Context, claim *resourceapi.Res
283302 return preparedDevices , nil
284303}
285304
286- func (s * DeviceState ) unprepareDevices (ctx context.Context , claim * resourceapi.ResourceClaim ) error {
305+ func (s * DeviceState ) unprepareDevices (ctx context.Context , cs * resourceapi.ResourceClaimStatus ) error {
287306 // Generate a mapping of each OpaqueDeviceConfigs to the Device.Results it applies to
288- configResultsMap , err := s .getConfigResultsMap (claim )
307+ configResultsMap , err := s .getConfigResultsMap (cs )
289308 if err != nil {
290309 return fmt .Errorf ("error generating configResultsMap: %w" , err )
291310 }
@@ -407,12 +426,12 @@ func (s *DeviceState) applyComputeDomainDaemonConfig(ctx context.Context, config
407426 return & configState , nil
408427}
409428
410- func (s * DeviceState ) getConfigResultsMap (claim * resourceapi.ResourceClaim ) (map [runtime.Object ][]* resourceapi.DeviceRequestAllocationResult , error ) {
429+ func (s * DeviceState ) getConfigResultsMap (rcs * resourceapi.ResourceClaimStatus ) (map [runtime.Object ][]* resourceapi.DeviceRequestAllocationResult , error ) {
411430 // Retrieve the full set of device configs for the driver.
412431 configs , err := GetOpaqueDeviceConfigs (
413432 configapi .Decoder ,
414433 DriverName ,
415- claim . Status .Allocation .Devices .Config ,
434+ rcs .Allocation .Devices .Config ,
416435 )
417436 if err != nil {
418437 return nil , fmt .Errorf ("error getting opaque device configs: %v" , err )
@@ -433,7 +452,7 @@ func (s *DeviceState) getConfigResultsMap(claim *resourceapi.ResourceClaim) (map
433452 // Look through the configs and figure out which one will be applied to
434453 // each device allocation result based on their order of precedence and type.
435454 configResultsMap := make (map [runtime.Object ][]* resourceapi.DeviceRequestAllocationResult )
436- for _ , result := range claim . Status .Allocation .Devices .Results {
455+ for _ , result := range rcs .Allocation .Devices .Results {
437456 device , exists := s .allocatable [result .Device ]
438457 if ! exists {
439458 return nil , fmt .Errorf ("requested device is not allocatable: %v" , result .Device )
0 commit comments