Skip to content

Commit f2a857c

Browse files
committed
squash: review feedback
Signed-off-by: Dr. Jan-Philip Gehrcke <[email protected]>
1 parent fa67ff1 commit f2a857c

File tree

3 files changed

+34
-28
lines changed

3 files changed

+34
-28
lines changed

cmd/compute-domain-daemon/computedomain.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -259,8 +259,7 @@ func (m *ComputeDomainManager) UpdateComputeDomainNodeInfo(ctx context.Context,
259259
Name: m.config.nodeName,
260260
CliqueID: m.config.cliqueID,
261261
Index: nextIndex,
262-
// Initialize as NotReady (will be updated by podmanager).
263-
Status: nvapi.ComputeDomainStatusNotReady,
262+
Status: nvapi.ComputeDomainStatusNotReady,
264263
}
265264

266265
klog.Infof("CD status does not contain node name '%s' yet, try to insert myself: %v", m.config.nodeName, nodeInfo)

cmd/compute-domain-kubelet-plugin/computedomain.go

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,13 +154,38 @@ func (m *ComputeDomainManager) GetComputeDomainChannelContainerEdits(devRoot str
154154
}
155155
}
156156

157+
// GetComputeDomainDaemonContainerEdits() returns the CDI spec edits always
158+
// required for launching the CD Daemon (whether or not it tries to launch an
159+
// IMEX daemon internally).
160+
func (m *ComputeDomainManager) GetComputeDomainDaemonContainerEdits(ctx context.Context, domainID string) (*cdiapi.ContainerEdits, error) {
161+
cd, err := m.GetComputeDomain(ctx, domainID)
162+
if err != nil {
163+
return nil, fmt.Errorf("error getting compute domain %s: %w", domainID, err)
164+
}
165+
if cd == nil {
166+
return nil, fmt.Errorf("compute domain not found: %s", domainID)
167+
}
168+
169+
edits := &cdiapi.ContainerEdits{
170+
ContainerEdits: &cdispec.ContainerEdits{
171+
Env: []string{
172+
fmt.Sprintf("CLIQUE_ID=%s", m.cliqueID),
173+
fmt.Sprintf("COMPUTE_DOMAIN_UUID=%s", cd.UID),
174+
fmt.Sprintf("COMPUTE_DOMAIN_NAME=%s", cd.Name),
175+
fmt.Sprintf("COMPUTE_DOMAIN_NAMESPACE=%s", cd.Namespace),
176+
},
177+
},
178+
}
179+
return edits, nil
157180
}
158181

159-
func (s *ComputeDomainDaemonSettings) GetCDIContainerEdits(ctx context.Context, devRoot string, info *nvcapDeviceInfo) (*cdiapi.ContainerEdits, error) {
160182
func (s *ComputeDomainDaemonSettings) GetDomainID() string {
161183
return s.domainID
162184
}
163185

186+
// GetCDIContainerEditsForImex() returns the CDI spec edits only required for
187+
// launching the CD Daemon when it actually wraps an IMEX daemon.
188+
func (s *ComputeDomainDaemonSettings) GetCDIContainerEditsForImex(ctx context.Context, devRoot string, info *nvcapDeviceInfo) *cdiapi.ContainerEdits {
164189
edits := &cdiapi.ContainerEdits{
165190
ContainerEdits: &cdispec.ContainerEdits{
166191
Mounts: []*cdispec.Mount{
@@ -181,7 +206,7 @@ func (s *ComputeDomainDaemonSettings) GetDomainID() string {
181206
},
182207
},
183208
}
184-
return edits, nil
209+
return edits
185210
}
186211

187212
func (s *ComputeDomainDaemonSettings) Prepare(ctx context.Context) error {

cmd/compute-domain-kubelet-plugin/device_state.go

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ import (
3030
"k8s.io/klog/v2"
3131
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
3232
cdiapi "tags.cncf.io/container-device-interface/pkg/cdi"
33-
cdispec "tags.cncf.io/container-device-interface/specs-go"
3433

3534
configapi "github.com/NVIDIA/k8s-dra-driver-gpu/api/nvidia.com/resource/v1beta1"
3635
"github.com/NVIDIA/k8s-dra-driver-gpu/pkg/featuregates"
@@ -492,26 +491,12 @@ func (s *DeviceState) applyComputeDomainDaemonConfig(ctx context.Context, config
492491
ComputeDomain: config.DomainID,
493492
}
494493

495-
cd, err := s.computeDomainManager.GetComputeDomain(ctx, config.DomainID)
494+
// Always inject CD details into the CD daemon, in a heterogeneous CD clique
495+
// ID below may be empty (while other CD details are set, and consumed by
496+
// the CD daemon).
497+
edits, err := s.computeDomainManager.GetComputeDomainDaemonContainerEdits(ctx, config.DomainID)
496498
if err != nil {
497-
return nil, fmt.Errorf("error getting compute domain: %w", err)
498-
}
499-
if cd == nil {
500-
return nil, fmt.Errorf("compute domain not found: %s", config.DomainID)
501-
}
502-
503-
// Always inject Compute Domain details into the CD daemon, in a
504-
// heterogeneous CD clique ID below may be empty (while other CD details are
505-
// set, and consumed by the CD daemon).
506-
edits := &cdiapi.ContainerEdits{
507-
ContainerEdits: &cdispec.ContainerEdits{
508-
Env: []string{
509-
fmt.Sprintf("CLIQUE_ID=%s", s.computeDomainManager.cliqueID),
510-
fmt.Sprintf("COMPUTE_DOMAIN_UUID=%s", cd.UID),
511-
fmt.Sprintf("COMPUTE_DOMAIN_NAME=%s", cd.Name),
512-
fmt.Sprintf("COMPUTE_DOMAIN_NAMESPACE=%s", cd.Namespace),
513-
},
514-
},
499+
return nil, fmt.Errorf("error preparing ComputeDomain daemon settings: %w", err)
515500
}
516501
configState.containerEdits = configState.containerEdits.Append(edits)
517502

@@ -532,10 +517,7 @@ func (s *DeviceState) applyComputeDomainDaemonConfig(ctx context.Context, config
532517
}
533518

534519
// Store information about the ComputeDomain daemon in the configState.
535-
edits, err := computeDomainDaemonSettings.GetCDIContainerEdits(ctx, s.cdi.devRoot, nvcapDeviceInfo)
536-
if err != nil {
537-
return nil, fmt.Errorf("error getting container edits for ComputeDomain daemon for requests '%v' in claim '%v': %w", requests, claim.UID, err)
538-
}
520+
edits := computeDomainDaemonSettings.GetCDIContainerEditsForImex(ctx, s.cdi.devRoot, nvcapDeviceInfo)
539521
configState.containerEdits = configState.containerEdits.Append(edits)
540522
}
541523

0 commit comments

Comments
 (0)