Skip to content

Commit 62450eb

Browse files
authored
Merge pull request #122 from isaac-400/icf/nvidia-persistenced
feat: nvidia-persistenced to Nvidia kmod packages
2 parents 169984b + 98e0cc1 commit 62450eb

13 files changed

+102
-0
lines changed

packages/ecs-gpu-init/ecs-gpu-init.service

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ Description=Initialize ECS GPU config
44
# otherwise the userspace component of the driver will fail to
55
# query the /dev devices
66
After=load-tesla-kernel-modules.service load-open-gpu-kernel-modules.service
7+
# Running this unit after nvidia persistenced ensures that
8+
# the /dev devices are created and the hardware set to
9+
# persistence mode.
10+
Requires=nvidia-persistenced.service
11+
After=nvidia-persistenced.service
712
# Block manual interactions with this service. It doesn't
813
# make sense to regenerate the GPU config file if the ECS
914
# agent won't read it when it changes

packages/kmod-5.10-nvidia/kmod-5.10-nvidia.spec

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ Source2: NVidiaEULAforAWS.pdf
1919
# Common NVIDIA conf files from 200 to 299
2020
Source200: nvidia-tmpfiles.conf.in
2121
Source202: nvidia-dependencies-modules-load.conf
22+
Source203: nvidia-sysusers.conf
23+
Source204: nvidia-persistenced.service.in
2224

2325
# NVIDIA tesla conf files from 300 to 399
2426
Source300: nvidia-tesla-tmpfiles.conf.in
@@ -86,6 +88,7 @@ install -d %{buildroot}%{_cross_libexecdir}
8688
install -d %{buildroot}%{_cross_libdir}
8789
install -d %{buildroot}%{_cross_tmpfilesdir}
8890
install -d %{buildroot}%{_cross_unitdir}
91+
install -d %{buildroot}%{_cross_bindir}
8992
install -d %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/{drivers,ld.so.conf.d}
9093

9194
KERNEL_VERSION=$(cat %{kernel_sources}/include/config/kernel.release)
@@ -105,6 +108,7 @@ install -d %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}
105108
install -d %{buildroot}%{tesla_470_libdir}
106109
install -d %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d
107110
install -d %{buildroot}%{_cross_factorydir}/nvidia/tesla/%{tesla_470}
111+
install -d %{buildroot}%{_cross_sysusersdir}
108112

109113
sed -e 's|__NVIDIA_VERSION__|%{tesla_470}|' %{S:300} > nvidia-tesla-%{tesla_470}.conf
110114
install -m 0644 nvidia-tesla-%{tesla_470}.conf %{buildroot}%{_cross_tmpfilesdir}/
@@ -158,10 +162,19 @@ install -m 755 nvidia-smi %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/%{te
158162
install -m 755 nvidia-debugdump %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}
159163
install -m 755 nvidia-cuda-mps-control %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}
160164
install -m 755 nvidia-cuda-mps-server %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}
165+
install -m 755 nvidia-persistenced %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}
166+
install -m 4755 nvidia-modprobe %{buildroot}%{_cross_bindir}
161167
%if "%{_cross_arch}" == "x86_64"
162168
install -m 755 nvidia-ngx-updater %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}
163169
%endif
164170

171+
# Users
172+
install -m 0644 %{S:203} %{buildroot}%{_cross_sysusersdir}/nvidia.conf
173+
174+
# Systemd units
175+
sed -e 's|__NVIDIA_BINDIR__|%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}|' %{S:204} > nvidia-persistenced.service
176+
install -m 0644 nvidia-persistenced.service %{buildroot}%{_cross_unitdir}
177+
165178
# We install all the libraries, and filter them out in the 'files' section, so we can catch
166179
# when new libraries are added
167180
install -m 755 *.so* %{buildroot}/%{tesla_470_libdir}/
@@ -206,6 +219,8 @@ popd
206219
# Binaries
207220
%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}/nvidia-debugdump
208221
%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}/nvidia-smi
222+
%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}/nvidia-persistenced
223+
%{_cross_bindir}/nvidia-modprobe
209224

210225
# Configuration files
211226
%{_cross_factorydir}%{_cross_sysconfdir}/drivers/nvidia-tesla-%{tesla_470}.toml
@@ -229,6 +244,12 @@ popd
229244
# tmpfiles
230245
%{_cross_tmpfilesdir}/nvidia-tesla-%{tesla_470}.conf
231246

247+
# sysuser files
248+
%{_cross_sysusersdir}/nvidia.conf
249+
250+
# systemd units
251+
%{_cross_unitdir}/nvidia-persistenced.service
252+
232253
# We only install the libraries required by all the DRIVER_CAPABILITIES, described here:
233254
# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#driver-capabilities
234255

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[Unit]
2+
Description=NVIDIA Persistence Daemon
3+
After=load-tesla-kernel-modules.service load-open-gpu-kernel-modules.service
4+
5+
[Service]
6+
Type=forking
7+
ExecStart=__NVIDIA_BINDIR__/nvidia-persistenced --user nvidia --verbose
8+
9+
[Install]
10+
RequiredBy=preconfigured.target
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
u nvidia - "nvidia-persistenced user"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
R __PREFIX__/lib/modules/__KERNEL_VERSION__/kernel/drivers/extra/video/nvidia/tesla - - - - -
22
d __PREFIX__/lib/modules/__KERNEL_VERSION__/kernel/drivers/extra/video/nvidia/tesla 0755 root root - -
3+
D /var/run/nvidia-persistenced 0755 nvidia nvidia - -

packages/kmod-5.15-nvidia/kmod-5.15-nvidia.spec

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ Source200: nvidia-tmpfiles.conf.in
4040
Source202: nvidia-dependencies-modules-load.conf
4141
Source203: nvidia-fabricmanager.service
4242
Source204: nvidia-fabricmanager.cfg
43+
Source205: nvidia-sysusers.conf
44+
Source206: nvidia-persistenced.service
4345

4446
# NVIDIA tesla conf files from 300 to 399
4547
Source300: nvidia-tesla-tmpfiles.conf
@@ -173,6 +175,8 @@ install -d %{buildroot}%{_cross_libdir}
173175
install -d %{buildroot}%{_cross_tmpfilesdir}
174176
install -d %{buildroot}%{_cross_unitdir}
175177
install -d %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/{drivers,ld.so.conf.d}
178+
install -d %{buildroot}%{_cross_sysusersdir}
179+
install -d %{buildroot}%{_cross_bindir}
176180

177181
KERNEL_VERSION=$(cat %{kernel_sources}/include/config/kernel.release)
178182
sed \
@@ -279,10 +283,18 @@ install -m 755 nvidia-smi %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin
279283
install -m 755 nvidia-debugdump %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin
280284
install -m 755 nvidia-cuda-mps-control %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin
281285
install -m 755 nvidia-cuda-mps-server %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin
286+
install -m 755 nvidia-persistenced %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/
287+
install -m 4755 nvidia-modprobe %{buildroot}%{_cross_bindir}
282288
%if "%{_cross_arch}" == "x86_64"
283289
install -m 755 nvidia-ngx-updater %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin
284290
%endif
285291

292+
# Users
293+
install -m 0644 %{S:205} %{buildroot}%{_cross_sysusersdir}/nvidia.conf
294+
295+
# Systemd units
296+
install -m 0644 %{S:206} %{buildroot}%{_cross_unitdir}
297+
286298
# We install all the libraries, and filter them out in the 'files' section, so we can catch
287299
# when new libraries are added
288300
install -m 755 *.so* %{buildroot}/%{_cross_libdir}/nvidia/tesla/
@@ -353,6 +365,8 @@ popd
353365
%{_cross_libexecdir}/nvidia/tesla/bin/nvidia-smi
354366
%{_cross_libexecdir}/nvidia/tesla/bin/nv-fabricmanager
355367
%{_cross_libexecdir}/nvidia/tesla/bin/nvswitch-audit
368+
%{_cross_libexecdir}/nvidia/tesla/bin/nvidia-persistenced
369+
%{_cross_bindir}/nvidia-modprobe
356370

357371
# nvswitch topologies
358372
%dir %{_cross_datadir}/nvidia/tesla/nvswitch
@@ -386,6 +400,12 @@ popd
386400
# tmpfiles
387401
%{_cross_tmpfilesdir}/nvidia-tesla.conf
388402

403+
# sysuser files
404+
%{_cross_sysusersdir}/nvidia.conf
405+
406+
# systemd units
407+
%{_cross_unitdir}/nvidia-persistenced.service
408+
389409
# We only install the libraries required by all the DRIVER_CAPABILITIES, described here:
390410
# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#driver-capabilities
391411

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[Unit]
2+
Description=NVIDIA Persistence Daemon
3+
After=load-tesla-kernel-modules.service load-open-gpu-kernel-modules.service
4+
5+
[Service]
6+
Type=forking
7+
ExecStart=/usr/libexec/nvidia/tesla/bin/nvidia-persistenced --user nvidia --verbose
8+
9+
[Install]
10+
RequiredBy=preconfigured.target
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
u nvidia - "nvidia-persistenced user"

packages/kmod-5.15-nvidia/nvidia-tmpfiles.conf.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ R __PREFIX__/lib/modules/__KERNEL_VERSION__/kernel/drivers/extra/video/nvidia/op
44
d __PREFIX__/lib/modules/__KERNEL_VERSION__/kernel/drivers/extra/video/nvidia/open-gpu 0755 root root - -
55
C /etc/nvidia/fabricmanager.cfg - - - -
66
d /run/nvidia 0700 root root -
7+
D /var/run/nvidia-persistenced 0755 nvidia nvidia - -

packages/kmod-6.1-nvidia/kmod-6.1-nvidia.spec

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ Source200: nvidia-tmpfiles.conf.in
4040
Source202: nvidia-dependencies-modules-load.conf
4141
Source203: nvidia-fabricmanager.service
4242
Source204: nvidia-fabricmanager.cfg
43+
Source205: nvidia-sysusers.conf
44+
Source206: nvidia-persistenced.service
4345

4446
# NVIDIA tesla conf files from 300 to 399
4547
Source300: nvidia-tesla-tmpfiles.conf
@@ -173,6 +175,8 @@ install -d %{buildroot}%{_cross_libdir}
173175
install -d %{buildroot}%{_cross_tmpfilesdir}
174176
install -d %{buildroot}%{_cross_unitdir}
175177
install -d %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/{drivers,ld.so.conf.d}
178+
install -d %{buildroot}%{_cross_sysusersdir}
179+
install -d %{buildroot}%{_cross_bindir}
176180

177181
KERNEL_VERSION=$(cat %{kernel_sources}/include/config/kernel.release)
178182
sed \
@@ -279,10 +283,18 @@ install -m 755 nvidia-smi %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin
279283
install -m 755 nvidia-debugdump %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin
280284
install -m 755 nvidia-cuda-mps-control %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin
281285
install -m 755 nvidia-cuda-mps-server %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin
286+
install -m 755 nvidia-persistenced %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/
287+
install -m 4755 nvidia-modprobe %{buildroot}%{_cross_bindir}
282288
%if "%{_cross_arch}" == "x86_64"
283289
install -m 755 nvidia-ngx-updater %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin
284290
%endif
285291

292+
# Users
293+
install -m 0644 %{S:205} %{buildroot}%{_cross_sysusersdir}/nvidia.conf
294+
295+
# Systemd units
296+
install -m 0644 %{S:206} %{buildroot}%{_cross_unitdir}
297+
286298
# We install all the libraries, and filter them out in the 'files' section, so we can catch
287299
# when new libraries are added
288300
install -m 755 *.so* %{buildroot}/%{_cross_libdir}/nvidia/tesla/
@@ -353,6 +365,8 @@ popd
353365
%{_cross_libexecdir}/nvidia/tesla/bin/nvidia-smi
354366
%{_cross_libexecdir}/nvidia/tesla/bin/nv-fabricmanager
355367
%{_cross_libexecdir}/nvidia/tesla/bin/nvswitch-audit
368+
%{_cross_libexecdir}/nvidia/tesla/bin/nvidia-persistenced
369+
%{_cross_bindir}/nvidia-modprobe
356370

357371
# nvswitch topologies
358372
%dir %{_cross_datadir}/nvidia/tesla/nvswitch
@@ -386,6 +400,12 @@ popd
386400
# tmpfiles
387401
%{_cross_tmpfilesdir}/nvidia-tesla.conf
388402

403+
# sysuser files
404+
%{_cross_sysusersdir}/nvidia.conf
405+
406+
# systemd units
407+
%{_cross_unitdir}/nvidia-persistenced.service
408+
389409
# We only install the libraries required by all the DRIVER_CAPABILITIES, described here:
390410
# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#driver-capabilities
391411

0 commit comments

Comments
 (0)