@@ -32,7 +32,9 @@ import (
3232
3333const (
3434 procDevicesPath = "/proc/devices"
35+ nvidiaCapsDeviceName = "nvidia-caps"
3536 nvidiaCapsImexChannelsDeviceName = "nvidia-caps-imex-channels"
37+ nvidiaCapFabricImexMgmtPath = "/proc/driver/nvidia/capabilities/fabric-imex-mgmt"
3638)
3739
3840type deviceLib struct {
@@ -43,6 +45,14 @@ type deviceLib struct {
4345 nvidiaSMIPath string
4446}
4547
48+ type nvcapDeviceInfo struct {
49+ major int
50+ minor int
51+ mode int
52+ modify int
53+ path string
54+ }
55+
4656func newDeviceLib (driverRoot root ) (* deviceLib , error ) {
4757 driverLibraryPath , err := driverRoot .getDriverLibraryPath ()
4858 if err != nil {
@@ -128,7 +138,7 @@ func (l deviceLib) getImexChannelCount() (int, error) {
128138 return 2048 , nil
129139}
130140
131- func (l deviceLib ) getImexChannelMajor ( ) (int , error ) {
141+ func (l deviceLib ) getDeviceMajor ( name string ) (int , error ) {
132142 file , err := os .Open (procDevicesPath )
133143 if err != nil {
134144 return - 1 , err
@@ -163,7 +173,7 @@ func (l deviceLib) getImexChannelMajor() (int, error) {
163173 // If we've passed the character devices section, check for nvidiaCapsImexChannelsDeviceName
164174 if foundCharDevices {
165175 parts := strings .Fields (line )
166- if len (parts ) == 2 && parts [1 ] == nvidiaCapsImexChannelsDeviceName {
176+ if len (parts ) == 2 && parts [1 ] == name {
167177 return strconv .Atoi (parts [0 ])
168178 }
169179 }
@@ -172,14 +182,57 @@ func (l deviceLib) getImexChannelMajor() (int, error) {
172182 return - 1 , scanner .Err ()
173183}
174184
185+ func (l deviceLib ) parseNVCapDeviceInfo (nvcapsFilePath string ) (* nvcapDeviceInfo , error ) {
186+ file , err := os .Open (nvcapsFilePath )
187+ if err != nil {
188+ return nil , err
189+ }
190+ defer file .Close ()
191+
192+ info := & nvcapDeviceInfo {}
193+
194+ major , err := l .getDeviceMajor (nvidiaCapsDeviceName )
195+ if err != nil {
196+ return nil , fmt .Errorf ("error getting device major: %w" , err )
197+ }
198+ info .major = major
199+
200+ scanner := bufio .NewScanner (file )
201+ for scanner .Scan () {
202+ line := scanner .Text ()
203+ parts := strings .SplitN (line , ":" , 2 )
204+ if len (parts ) != 2 {
205+ continue
206+ }
207+ key := strings .TrimSpace (parts [0 ])
208+ value := strings .TrimSpace (parts [1 ])
209+
210+ switch key {
211+ case "DeviceFileMinor" :
212+ _ , _ = fmt .Sscanf (value , "%d" , & info .minor )
213+ case "DeviceFileMode" :
214+ _ , _ = fmt .Sscanf (value , "%d" , & info .mode )
215+ case "DeviceFileModify" :
216+ _ , _ = fmt .Sscanf (value , "%d" , & info .modify )
217+ }
218+ }
219+ info .path = fmt .Sprintf ("/dev/nvidia-caps/nvidia-cap%d" , info .minor )
220+
221+ if err := scanner .Err (); err != nil {
222+ return nil , err
223+ }
224+
225+ return info , nil
226+ }
227+
175228func (l deviceLib ) createComputeDomainChannelDevice (channel int ) error {
176229 // Construct the properties of the device node to create.
177230 path := fmt .Sprintf ("/dev/nvidia-caps-imex-channels/channel%d" , channel )
178231 path = filepath .Join (l .devRoot , path )
179232 mode := uint32 (unix .S_IFCHR | 0666 )
180233
181234 // Get the IMEX channel major and build a /dev device from it
182- major , err := l .getImexChannelMajor ( )
235+ major , err := l .getDeviceMajor ( nvidiaCapsImexChannelsDeviceName )
183236 if err != nil {
184237 return fmt .Errorf ("error getting IMEX channel major: %w" , err )
185238 }
@@ -202,3 +255,33 @@ func (l deviceLib) createComputeDomainChannelDevice(channel int) error {
202255
203256 return nil
204257}
258+
259+ func (l deviceLib ) createNvCapDevice (nvcapFilePath string ) error {
260+ // Get the nvcapDeviceInfo for the nvcap file.
261+ deviceInfo , err := l .parseNVCapDeviceInfo (nvcapFilePath )
262+ if err != nil {
263+ return fmt .Errorf ("error parsing nvcap file for fabric-imex-mgmt: %w" , err )
264+ }
265+
266+ // Construct the necessary information to create the device node
267+ path := filepath .Join (l .devRoot , deviceInfo .path )
268+ mode := unix .S_IFCHR | uint32 (deviceInfo .mode )
269+ dev := unix .Mkdev (uint32 (deviceInfo .major ), uint32 (deviceInfo .minor ))
270+
271+ // Recursively create any parent directories of the device.
272+ if err := os .MkdirAll (filepath .Dir (path ), 0755 ); err != nil {
273+ return fmt .Errorf ("error creating directory for nvcaps device nodes: %w" , err )
274+ }
275+
276+ // Remove the device if it already exists.
277+ if err := os .Remove (path ); err != nil && ! os .IsNotExist (err ) {
278+ return fmt .Errorf ("error removing existing nvcap device node: %w" , err )
279+ }
280+
281+ // Create the device node using syscall.Mknod
282+ if err := unix .Mknod (path , mode , int (dev )); err != nil {
283+ return fmt .Errorf ("mknod of nvcap device failed: %w" , err )
284+ }
285+
286+ return nil
287+ }
0 commit comments