@@ -37,17 +37,21 @@ import (
3737)
3838
3939const (
40- DriverName = "gpu.nvidia.com"
41- ImexDomainLabel = "nvidia.com/gpu.imex-domain"
42- ImexChannelLimit = 128
43- DriverChannelLimit = 2048
40+ DriverName = "gpu.nvidia.com"
41+ ImexDomainLabel = "nvidia.com/gpu.imex-domain"
42+ ResourceSliceImexChannelLimit = 128
43+ DriverImexChannelLimit = 2048
4444)
4545
4646type ImexManager struct {
4747 waitGroup sync.WaitGroup
4848 clientset kubernetes.Interface
4949}
5050
51+ // imexDomainOffsets represents the offset for assigning IMEX channels
52+ // to ResourceSlices for each <imex-domain, cliqueid> combination
53+ type imexDomainOffsets map [string ]map [string ]int
54+
5155func StartIMEXManager (ctx context.Context , config * Config ) (* ImexManager , error ) {
5256 // Build a client set config
5357 csconfig , err := config .flags .kubeClientConfig .NewClientSetConfig ()
@@ -99,37 +103,34 @@ func StartIMEXManager(ctx context.Context, config *Config) (*ImexManager, error)
99103
100104// manageResourceSlices reacts to added and removed IMEX domains and triggers the creation / removal of resource slices accordingly.
101105func (m * ImexManager ) manageResourceSlices (ctx context.Context , owner resourceslice.Owner , addedDomainsCh <- chan string , removedDomainsCh <- chan string ) error {
102- driverResources := resourceslice.DriverResources {
103- Pools : make (map [string ]resourceslice.Pool ),
104- }
105- controller , err := resourceslice .StartController (ctx , m .clientset , DriverName , owner , & driverResources )
106+ driverResources := & resourceslice.DriverResources {}
107+ controller , err := resourceslice .StartController (ctx , m .clientset , DriverName , owner , driverResources )
106108 if err != nil {
107109 return fmt .Errorf ("error starting resource slice controller: %w" , err )
108110 }
109111
110- imexChannelOffsets := make (map [string ]map [string ]int )
111-
112+ ico := new (imexDomainOffsets )
112113 m .waitGroup .Add (1 )
113114 go func () {
114115 defer m .waitGroup .Done ()
115116 for {
116117 select {
117118 case addedDomain := <- addedDomainsCh :
118- klog . Infof ( "Adding channels for new IMEX domain: %v" , addedDomain )
119- if err := calculateImexChannelLimit ( addedDomain , & imexChannelOffsets ); err != nil {
120- klog .Error ( err )
119+ offset , err := ico . getOffset ( addedDomain )
120+ if err != nil {
121+ klog .Errorf ( "Error calculating channel offset for IMEX domain %s: %v" , addedDomain , err )
121122 return
122123 }
124+ klog .Infof ("Adding channels for new IMEX domain: %v" , addedDomain )
125+ driverResources .Pools [addedDomain ] = generateImexChannelPool (addedDomain , offset , ResourceSliceImexChannelLimit )
123126 newDriverResources := driverResources .DeepCopy ()
124- newDriverResources .Pools [addedDomain ] = generateImexChannelPool (addedDomain , imexChannelOffsets )
125127 controller .Update (newDriverResources )
126- driverResources = * newDriverResources
127128 case removedDomain := <- removedDomainsCh :
128129 klog .Infof ("Removing channels for removed IMEX domain: %v" , removedDomain )
130+ delete (driverResources .Pools , removedDomain )
129131 newDriverResources := driverResources .DeepCopy ()
130- delete (newDriverResources .Pools , removedDomain )
131132 controller .Update (newDriverResources )
132- driverResources = * newDriverResources
133+ ico . delete ( removedDomain )
133134 case <- ctx .Done ():
134135 return
135136 }
@@ -246,13 +247,10 @@ func (m *ImexManager) streamImexDomains(ctx context.Context) (<-chan string, <-c
246247}
247248
248249// generateImexChannelPool generates the contents of a ResourceSlice pool for a given IMEX domain.
249- func generateImexChannelPool (imexDomain string , imexChannelOffsets map [string ]map [string ]int ) resourceslice.Pool {
250- id := strings .Split (imexDomain , "." )
251- numChannels := imexChannelOffsets [id [0 ]][id [1 ]] + ImexChannelLimit
252-
250+ func generateImexChannelPool (imexDomain string , offset int , numChannels int ) resourceslice.Pool {
253251 // Generate dchannels from offset to offset+ImexChannelLimit
254252 var devices []resourceapi.Device
255- for i := imexChannelOffsets [ id [ 0 ]][ id [ 1 ]] ; i < numChannels ; i ++ {
253+ for i := offset ; i < ( offset + numChannels ) ; i ++ {
256254 d := resourceapi.Device {
257255 Name : fmt .Sprintf ("imex-channel-%d" , i ),
258256 Basic : & resourceapi.BasicDevice {
@@ -313,24 +311,32 @@ func (m *ImexManager) cleanupResourceSlices() error {
313311 return nil
314312}
315313
316- // calculateImexChannelLimit calculates the number of IMEX channels that can be allocated to a given IMEX domain.
317- // if the limit is reached, it will return an error.
318- func calculateImexChannelLimit ( addedDomainsCh string , imexChannelOffsets * map [ string ] map [ string ] int ) error {
314+ // getOffset set the offset where an imex domain's channels should start counting from
315+ func ( i * imexDomainOffsets ) getOffset ( addedDomainsCh string ) ( int , error ) {
316+ var offset int
319317 id := strings .Split (addedDomainsCh , "." )
320318
321319 // Check if the IMEX domain is already in the map
322- if _ , ok := (* imexChannelOffsets )[id [0 ]]; ! ok {
323- (* imexChannelOffsets )[id [0 ]] = make (map [string ]int )
320+ if _ , ok := (* i )[id [0 ]]; ! ok {
321+ (* i )[id [0 ]] = make (map [string ]int )
324322 }
325323
326324 // Check if the clique is already in the map
327- if _ , ok := (* imexChannelOffsets )[id [0 ]][id [1 ]]; ! ok {
328- offset : = (len ((* imexChannelOffsets )[id [0 ]]) * ImexChannelLimit )
329- if offset >= DriverChannelLimit {
330- return fmt .Errorf ("error adding IMEX domain %s: channel limit reached" , id [0 ])
325+ if _ , ok := (* i )[id [0 ]][id [1 ]]; ! ok {
326+ offset = (len ((* i )[id [0 ]]) * ResourceSliceImexChannelLimit )
327+ if offset >= DriverImexChannelLimit {
328+ return 0 , fmt .Errorf ("error adding IMEX domain %s: channel limit reached" , id [0 ])
331329 }
332- (* imexChannelOffsets )[id [0 ]][id [1 ]] = offset
330+ (* i )[id [0 ]][id [1 ]] = offset
333331 }
334332
335- return nil
333+ return offset , nil
334+ }
335+
336+ func (i * imexDomainOffsets ) delete (removedDomainsCh string ) {
337+ id := strings .Split (removedDomainsCh , "." )
338+ delete ((* i )[id [0 ]], id [1 ])
339+ if len ((* i )[id [0 ]]) == 0 {
340+ delete (* i , id [0 ])
341+ }
336342}
0 commit comments