@@ -425,12 +425,6 @@ func (s *DeviceState) applyComputeDomainChannelConfig(ctx context.Context, confi
425425 return nil , fmt .Errorf ("applyComputeDomainChannelConfig: unexpected results %v" , results )
426426 }
427427
428- // For now, we treat each request as a request for channel zero, even if
429- // AllocationModeAll.
430- if err := s .allocateImexChannel (0 ); err != nil {
431- return nil , fmt .Errorf ("allocation failed: %w" , err )
432- }
433-
434428 // If explicitly requested, inject all channels instead of just one.
435429 chancount := 1
436430 if config .AllocationMode == configapi .ComputeDomainChannelAllocationModeAll {
@@ -443,6 +437,12 @@ func (s *DeviceState) applyComputeDomainChannelConfig(ctx context.Context, confi
443437 ComputeDomain : config .DomainID ,
444438 }
445439
440+ // Treat each request as a request for channel zero, even if
441+ // AllocationModeAll.
442+ if err := s .assertImexChannelNotAllocated (0 ); err != nil {
443+ return nil , fmt .Errorf ("allocation failed: %w" , err )
444+ }
445+
446446 // Create any necessary ComputeDomain channels and gather their CDI container edits.
447447 if err := s .computeDomainManager .AssertComputeDomainNamespace (ctx , claim .Namespace , config .DomainID ); err != nil {
448448 return nil , permanentError {fmt .Errorf ("error asserting ComputeDomain's namespace: %w" , err )}
@@ -578,26 +578,30 @@ func (s *DeviceState) getConfigResultsMap(rcs *resourceapi.ResourceClaimStatus,
578578 return configResultsMap , nil
579579}
580580
581- // allocateImexChannel () consults the ( absolute, node-local) source of truth,
582- // which currently is the checkpoint data. For now, It fails with an error when
583- // the channel with the given `id` is already allocated for/ by another resource
584- // claim (soon, this implementation may become more involved when the same IMEX
585- // channel may be shared across pods on the same node). Note that generally, we
586- // must expect prepare () and unprepare () calls acting on the same resource to
587- // arrive out-of- order (cf.
581+ // assertImexChannelNotAllocated () consults the absolute, node-local source of
582+ // truth ( the checkpoint data) and fails when the channel with ID `id` is
583+ // already in use by another resource claim.
584+ //
585+ // Must be performed in the Prepare() path for any claim asking for a channel to
586+ // force processing Prepare () and Unprepare () calls acting on the same resource
587+ // in the correct order (to prevent unprepare-after-prepare, cf.
588588// https://github.com/NVIDIA/k8s-dra-driver-gpu/issues/641).
589- func (s * DeviceState ) allocateImexChannel (id int ) error {
589+ //
590+ // This implementation may become more involved when the same IMEX channel may
591+ // be shared across pods on the same node)
592+ func (s * DeviceState ) assertImexChannelNotAllocated (id int ) error {
590593 cp , err := s .getCheckpoint ()
591594 if err != nil {
592595 return fmt .Errorf ("unable to get checkpoint: %w" , err )
593596 }
594597
595598 for claimUID , claim := range cp .V2 .PreparedClaims {
596- // Ignore non-completed preparations: only one instance of this program
597- // is running, and we only run one Prepare() at any given time. Is that
598- // true during upgrades though? If this is not true, then we must fail
599- // allocation also on PrepareStarted -- which leads to the question of
600- // how we clean up long-term stale PrepareStarted entries.
599+ // Ignore non-completed preparations: file-based locking guarantees that
600+ // only one Prepare() runs at any given time. If a claim is in the
601+ // `PrepareStarted` state then it is not actually currently in progress
602+ // of being prepared, but either retried soon (in which case we are
603+ // faster and win over it) or never retried (in which case we can also
604+ // safely allocate).
601605 if claim .CheckpointState != "PrepareCompleted" {
602606 continue
603607 }
0 commit comments