@@ -39,7 +39,10 @@ type ComputeDomain struct {
3939 metav1.TypeMeta `json:",inline"`
4040 metav1.ObjectMeta `json:"metadata,omitempty"`
4141
42- Spec ComputeDomainSpec `json:"spec,omitempty"`
42+ Spec ComputeDomainSpec `json:"spec,omitempty"`
43+ // Global ComputeDomain status. Updated only when `Spec.numNodes` is
44+ // non-zero. Can be used to guide debugging efforts. Workload however should
45+ // not rely on inspecting this field at any point during its lifecycle.
4346 Status ComputeDomainStatus `json:"status,omitempty"`
4447}
4548
@@ -57,6 +60,17 @@ type ComputeDomainList struct {
5760
5861// ComputeDomainSpec provides the spec for a ComputeDomain.
5962type ComputeDomainSpec struct {
63+ // Intended number of IMEX daemons (i.e., individual compute nodes) in the
64+ // ComputeDomain. Must be zero or greater. Recommended to be set to zero:
65+ // workload must implement and consult its own source of truth for the
66+ // number of workers online before trying to share GPU memory (and hence
67+ // triggering IMEX interaction). When non-zero, `numNodes` is used only for
68+ // setting the global ComputeDomain `Status` (indicating `Ready` when the
69+ // number of ready IMEX daemons equals `numNodes`). In particular,
70+ // `numNodes` does not gate the startup of IMEX daemons and their
71+ // corresponding workload pods anymore (to restore this behavior, set
72+ // `featureGates.IMEXDaemonsWithDNSNames=false`). This parameter is
73+ // deprecated and will be removed in the next API version.
6074 NumNodes int `json:"numNodes"`
6175 Channel * ComputeDomainChannelSpec `json:"channel"`
6276}
0 commit comments