CD plugin: channel prepare: fail if allocated as of checkpoint

jgehrcke · jgehrcke · commit 8deb7895e4b9 · 2025-10-06T11:35:26.000+02:00
Signed-off-by: Dr. Jan-Philip Gehrcke &lt;jgehrcke@nvidia.com&gt;
diff --git a/cmd/compute-domain-kubelet-plugin/device_state.go b/cmd/compute-domain-kubelet-plugin/device_state.go
@@ -425,6 +425,12 @@ func (s *DeviceState) applyComputeDomainChannelConfig(ctx context.Context, confi
 		return nil, fmt.Errorf("applyComputeDomainChannelConfig: unexpected results %v", results)
 	}
 
+	// For now, we treat each request as a request for channel zero, even if
+	// AllocationModeAll.
+	if err := s.allocateImexChannel(0); err != nil {
+		return nil, fmt.Errorf("allocation failed: %w", err)
+	}
+
 	// If explicitly requested, inject all channels instead of just one.
 	chancount := 1
 	if config.AllocationMode == configapi.ComputeDomainChannelAllocationModeAll {
@@ -572,6 +578,43 @@ func (s *DeviceState) getConfigResultsMap(rcs *resourceapi.ResourceClaimStatus,
 	return configResultsMap, nil
 }
 
+// allocateImexChannel() consults the (absolute, node-local) source of truth,
+// which currently is the checkpoint data. For now, It fails with an error when
+// the channel with the given `id` is already allocated for/by another resource
+// claim (soon, this implementation may become more involved when the same IMEX
+// channel may be shared across pods on the same node). Note that generally, we
+// must expect prepare() and unprepare() calls acting on the same resource to
+// arrive out-of-order (cf.
+// https://github.com/NVIDIA/k8s-dra-driver-gpu/issues/641).
+func (s *DeviceState) allocateImexChannel(id int) error {
+	cp, err := s.getCheckpoint()
+	if err != nil {
+		return fmt.Errorf("unable to get checkpoint: %w", err)
+	}
+
+	for claimUID, claim := range cp.V2.PreparedClaims {
+		// Ignore non-completed preparations: only one instance of this program
+		// is running, and we only run one Prepare() at any given time. Is that
+		// true during upgrades though? If this is not true, then we must fail
+		// allocation also on PrepareStarted -- which leads to the question of
+		// how we clean up long-term stale PrepareStarted entries.
+		if claim.CheckpointState != "PrepareCompleted" {
+			continue
+		}
+
+		for _, preparedDevice := range claim.PreparedDevices {
+			for _, device := range preparedDevice.Devices {
+				if device.Channel != nil && device.Channel.Info.ID == id {
+					// Maybe log something based on `claim.Status.ReservedFor`
+					// to facilitate debugging.
+					return fmt.Errorf("channel %d already allocated by claim %s (according to checkpoint)", id, claimUID)
+				}
+			}
+		}
+	}
+	return nil
+}
+
 // validateDriverVersionForIMEXDaemonsWithDNSNames validates that the driver version
 // meets the minimum requirement for the IMEXDaemonsWithDNSNames feature gate.
 func validateDriverVersionForIMEXDaemonsWithDNSNames(flags *Flags, nvdevlib *deviceLib) error {