From 9aed02c2117eaade1e9e57198b27e16fa875b1fd Mon Sep 17 00:00:00 2001 From: SomeBottle Date: Sat, 25 Jan 2025 22:25:55 +0800 Subject: [PATCH 1/4] fix file copy logic in nvidia mode --- udocker/engine/nvidia.py | 41 ++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/udocker/engine/nvidia.py b/udocker/engine/nvidia.py index 7f598375..d3f99249 100644 --- a/udocker/engine/nvidia.py +++ b/udocker/engine/nvidia.py @@ -35,8 +35,24 @@ def _files_exist(self, cont_dst_dir, files_list): if os.path.exists(dstname): raise OSError("file already exists", dstname) + def _copy_single_file(self, src_path, dst_path): + """Copy a single file to destination and change mask, won't overwrite""" + if os.path.exists(dst_path): + Msg().out("Debug: dest file", dst_path, "already exists, skipped copy", l=Msg.DBG) + return + shutil.copy2(src_path, dst_path) + try: + mask = stat.S_IMODE(os.stat(src_path).st_mode) | \ + stat.S_IWUSR | stat.S_IRUSR + if os.access(src_path, os.X_OK): + mask = mask | stat.S_IXUSR + os.chmod(dst_path, mask) + except (IOError, OSError) as error: + Msg().err("Error: change mask of nvidia file", error) + Msg().out("Debug: copied file from", src_path, "to", dst_path, l=Msg.DBG) + def _copy_files(self, host_src_dir, cont_dst_dir, files_list, force=False): - """copy or link file to destination creating directories as needed""" + """Copy or link file to destination creating directories as needed""" Msg().out("Debug: Source (host) dir ", host_src_dir, l=Msg.DBG) Msg().out("Debug: Destination (container) dir ", cont_dst_dir, l=Msg.DBG) for fname in files_list: @@ -65,19 +81,19 @@ def _copy_files(self, host_src_dir, cont_dst_dir, files_list, force=False): if os.path.islink(srcname): linkto = os.readlink(srcname) - os.symlink(linkto, dstname) - Msg().out("Debug: is link", srcname, "to", dstname, l=Msg.DBG) + if os.path.basename(linkto) == linkto: + # Link to a file in the same directory as srcname, copy link + Msg().out("Debug: is link", srcname, "to", dstname, l=Msg.DBG) + os.symlink(linkto, dstname) + else: + # Link to a file in a different directory, copy real file + real_src_path = os.path.realpath(srcname) + Msg().out("Debug: is file", real_src_path, "to", dstname, l=Msg.DBG) + self._copy_single_file(real_src_path, dstname) + elif os.path.isfile(srcname): - shutil.copy2(srcname, dstname) Msg().out("Debug: is file", srcname, "to", dstname, l=Msg.DBG) - try: - mask = stat.S_IMODE(os.stat(srcname).st_mode) | \ - stat.S_IWUSR | stat.S_IRUSR - if os.access(srcname, os.X_OK): - mask = mask | stat.S_IXUSR - os.chmod(dstname, mask) - except (IOError, OSError) as error: - Msg().err("Error: change mask of nvidia file", error) + self._copy_single_file(srcname, dstname) else: Msg().err("Warn: nvidia file in config not found", srcname) @@ -132,6 +148,7 @@ def _find_host_dir(self): def _find_cont_dir(self): """Find the location of the host target directory for libraries""" + # Ensure these directories exist in the container for dst_dir in ("/usr/lib/x86_64-linux-gnu", "/usr/lib64"): if os.path.isdir(self.container_root + '/' + dst_dir): Msg().out("Debug: Cont. location nvidia", dst_dir, l=Msg.DBG) From cecce409c4c9ad39cbefd3664616f907309f922e Mon Sep 17 00:00:00 2001 From: SomeBottle Date: Sun, 26 Jan 2025 12:20:16 +0800 Subject: [PATCH 2/4] improve dir creation logic --- udocker/engine/nvidia.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/udocker/engine/nvidia.py b/udocker/engine/nvidia.py index d3f99249..5cebce6d 100644 --- a/udocker/engine/nvidia.py +++ b/udocker/engine/nvidia.py @@ -71,7 +71,8 @@ def _copy_files(self, host_src_dir, cont_dst_dir, files_list, force=False): srcdir = os.path.dirname(srcname) dstdir = os.path.dirname(dstname) - if not os.path.isdir(dstdir): + if os.path.isdir(srcdir) and not os.path.isdir(dstdir): + # Create necessary directories only if srcdir exists try: os.makedirs(dstdir) os.chmod(dstdir, stat.S_IMODE(os.stat(srcdir).st_mode) | From d35e1e0ab3a9ae6c8201a9927c19236f176e4fa2 Mon Sep 17 00:00:00 2001 From: SomeBottle Date: Sun, 26 Jan 2025 17:16:04 +0800 Subject: [PATCH 3/4] add unit tests for nvidia file copy logic --- tests/unit/test_nvidia.py | 129 ++++++++++++++++++++++++++++++++------ 1 file changed, 110 insertions(+), 19 deletions(-) diff --git a/tests/unit/test_nvidia.py b/tests/unit/test_nvidia.py index 4a10fb02..e4863baf 100755 --- a/tests/unit/test_nvidia.py +++ b/tests/unit/test_nvidia.py @@ -3,17 +3,18 @@ udocker unit tests: NVIDIA mode """ +import collections + from unittest import TestCase, main from unittest.mock import patch, Mock from udocker.config import Config from udocker.engine.nvidia import NvidiaMode -import collections collections.Callable = collections.abc.Callable class NvidiaModeTestCase(TestCase): - """Test PRootEngine() class for containers execution.""" + """Test NvidiaMode() class for nvidia mode setup.""" def setUp(self): Config().getconf() @@ -32,6 +33,11 @@ def tearDown(self): self.lrepo.stop() self.cdcont.stop() + def _reset_mocks(self,*mocks): + """Reset mocks.""" + for mock in mocks: + mock.reset_mock() + def test_01_init(self): """Test01 NvidiaMode() constructor.""" cdir = "/" + self.cont_id @@ -53,54 +59,105 @@ def test_02__files_exist(self, mock_exists): @patch('udocker.engine.nvidia.Msg') @patch('udocker.engine.nvidia.os.access') - @patch('udocker.engine.nvidia.stat') @patch('udocker.engine.nvidia.shutil.copy2') + @patch('udocker.engine.nvidia.stat') @patch('udocker.engine.nvidia.os.symlink') @patch('udocker.engine.nvidia.os.readlink') @patch('udocker.engine.nvidia.os.chmod') @patch('udocker.engine.nvidia.os.makedirs') + @patch('udocker.engine.nvidia.os.path.exists') @patch('udocker.engine.nvidia.os.path.isdir') - @patch('udocker.engine.nvidia.os.path.dirname') @patch('udocker.engine.nvidia.os.remove') + @patch('udocker.engine.nvidia.os.stat') @patch('udocker.engine.nvidia.os.path.islink') @patch('udocker.engine.nvidia.os.path.isfile') - def test_03__copy_files(self, mock_isfile, mock_islink, mock_rm, - mock_dirname, mock_isdir, mock_mkdir, mock_chmod, - mock_readln, mock_symln, mock_copy2, mock_stat, - mock_access, mock_msg): + @patch('udocker.engine.nvidia.os.path.realpath') + def test_03__copy_files(self, mock_realpath, mock_isfile, mock_islink, + mock_stat, mock_rm, mock_isdir, mock_exists, + mock_mkdir, mock_chmod, mock_readln, mock_symln, + mock_statmod, mock_copy2, mock_access, mock_msg): """Test03 NvidiaMode._copy_files.""" hsrc_dir = "/usr/lib" - cdst_dir = "/hone/.udocker/cont/ROOT/usr/lib" + cdst_dir = "/home/.udocker/cont/ROOT/usr/lib" flist = ["a"] + os_stat_result = Mock() + os_stat_result.st_mode = 0o100777 + mock_stat.return_value = os_stat_result + nvmode = NvidiaMode(self.local, self.cont_id) + + # Test force = False force = False mock_msg.level = 0 + + # Case 1: nvidia file already exists, force = False mock_isfile.side_effect = [True, False] mock_islink.side_effect = [True, False] mock_rm.return_value = None - nvmode = NvidiaMode(self.local, self.cont_id) status = nvmode._copy_files(hsrc_dir, cdst_dir, flist, force) - self.assertFalse(status) + self.assertFalse(status) # should return False self.assertTrue(mock_isfile.called) + self._reset_mocks(mock_isfile) + # Case 2: symlink to a file in the same directory + mock_isfile.side_effect = [False, False] + mock_islink.side_effect = [False, True] + mock_isdir.side_effect = [True, False] + mock_mkdir.return_value = None + mock_chmod.return_value = 644 + mock_statmod.side_effect = [444, 222] + mock_readln.return_value = "libOpenCL.so.1.0.0" + mock_symln.return_value = None + status = nvmode._copy_files(hsrc_dir, cdst_dir, flist, force) + self.assertTrue(status) + self.assertTrue(mock_isdir.called) + self.assertTrue(mock_mkdir.called) + self.assertTrue(mock_chmod.called) + self.assertTrue(mock_islink.called) + self.assertTrue(mock_symln.called) # should create symlink + self.assertFalse(mock_copy2.called) + self._reset_mocks(mock_isdir, mock_mkdir, mock_chmod, mock_islink, mock_symln, mock_copy2) + + # Case 3: symlink to a file in another directory + mock_isfile.side_effect = [False, False] + mock_islink.side_effect = [False, True] + mock_isdir.side_effect = [True, False] + mock_readln.return_value = "/usr/xxx" + mock_realpath.return_value = "/usr/lib/x86_64/xxx" + mock_exists.return_value = False + mock_copy2.return_value = None + mock_statmod.side_effect = [444, 222, 111, 111] + mock_access.return_value = False + mock_chmod.side_effect = [644, 755] + status = nvmode._copy_files(hsrc_dir, cdst_dir, flist, force) + self.assertTrue(status) + self.assertTrue(mock_copy2.called) # should copy file + self._reset_mocks(mock_copy2, mock_mkdir) + + # Case 4: srcname not exists + mock_isfile.side_effect = [False, False] + mock_islink.side_effect = [False, False] + mock_isdir.side_effect = [False, False] + status = nvmode._copy_files(hsrc_dir, cdst_dir, flist, force) + self.assertTrue(status) + self.assertFalse(mock_mkdir.called) # srcname not exists, should not mkdir + self._reset_mocks(mock_mkdir, mock_rm, mock_isfile) + + # Test force = True force = True mock_msg.level = 0 mock_isfile.side_effect = [True, True] mock_islink.side_effect = [True, False] - mock_dirname.side_effect = [None, None] + mock_isdir.side_effect = [True, True] mock_rm.return_value = None - mock_isdir.return_value = True mock_mkdir.return_value = None mock_chmod.side_effect = [644, 755] - mock_readln.return_value = "/usr/xxx" - mock_symln.return_value = None mock_copy2.return_value = None - mock_stat.side_effect = [444, 222, 111, 111] - mock_access.return_value = False - nvmode = NvidiaMode(self.local, self.cont_id) + mock_statmod.side_effect = [444, 222, 111, 111] + mock_access.return_value = True status = nvmode._copy_files(hsrc_dir, cdst_dir, flist, force) self.assertTrue(status) + self.assertTrue(mock_rm.called) # should remove existing file self.assertTrue(mock_isfile.called) - self.assertTrue(mock_rm.called) @patch('udocker.engine.nvidia.glob.glob') def test_04__get_nvidia_libs(self, mock_glob): @@ -259,6 +316,40 @@ def test_12_get_devices(self, mock_glob): status = nvmode.get_devices() self.assertEqual(status, Config().conf['nvi_dev_list']) + @patch('udocker.engine.nvidia.os.chmod') + @patch('udocker.engine.nvidia.os.access') + @patch('udocker.engine.nvidia.os.stat') + @patch('udocker.engine.nvidia.shutil.copy2') + @patch('udocker.engine.nvidia.stat') + @patch('udocker.engine.nvidia.os.path.exists') + def test_13__copy_single_file(self, mock_exists, mock_statmod, mock_copy2, + mock_stat, mock_access, mock_chmod): + """Test13 NvidiaMode._copy_single_file.""" + src = "/usr/lib/libnvidia.so" + dst = "/home/.udocker/cont/usr/lib/libnvidia.so" + os_stat_result = Mock() + os_stat_result.st_mode = 0o100777 + mock_stat.return_value = os_stat_result + nvmode = NvidiaMode(self.local, self.cont_id) + + mock_copy2.return_value = None + mock_statmod.side_effect = [444, 222, 111, 111] + mock_access.return_value = True + mock_chmod.return_value = 644 + + # Case 1: dst file exists + mock_exists.return_value = True + nvmode._copy_single_file(src, dst) + self.assertTrue(mock_exists.called) + self.assertFalse(mock_copy2.called) # should not overwrite + self._reset_mocks(mock_exists, mock_copy2) + + # Case 2: dst file does not exist + mock_exists.return_value = False + nvmode._copy_single_file(src, dst) + self.assertTrue(mock_copy2.called) # should copy file + self.assertTrue(mock_chmod.called) + if __name__ == '__main__': main() From 7f0c93d73c8ddf6b9a43eaed082e0325cff5cb1d Mon Sep 17 00:00:00 2001 From: SomeBottle Date: Thu, 30 Jan 2025 01:11:27 +0800 Subject: [PATCH 4/4] add nvidia lib path to env before running --- tests/unit/test_nvidia.py | 24 ++++++++++++++++++++++++ udocker/engine/base.py | 5 +++++ udocker/engine/nvidia.py | 21 +++++++++++++++++++-- 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_nvidia.py b/tests/unit/test_nvidia.py index e4863baf..bddd24cb 100755 --- a/tests/unit/test_nvidia.py +++ b/tests/unit/test_nvidia.py @@ -9,6 +9,7 @@ from unittest.mock import patch, Mock from udocker.config import Config from udocker.engine.nvidia import NvidiaMode +from udocker.utils.uenv import Uenv collections.Callable = collections.abc.Callable @@ -350,6 +351,29 @@ def test_13__copy_single_file(self, mock_exists, mock_statmod, mock_copy2, self.assertTrue(mock_copy2.called) # should copy file self.assertTrue(mock_chmod.called) + @patch('udocker.engine.nvidia.os.path.isdir') + def test_14_merge_path_env(self, mock_isdir): + """Test14 NvidiaMode.merge_path_env().""" + nvmode = NvidiaMode(self.local, self.cont_id) + env_path = "/opt/python/bin" + nvi_lib_dir = nvmode._find_cont_dir() + mock_isdir.return_value = True + + # Case 1: LD_LIBRARY_PATH not in env + env = Uenv(["PATH=%s" % env_path]) + nvmode.merge_path_env(env) + self.assertEqual(nvi_lib_dir, env.getenv("LD_LIBRARY_PATH")) + self.assertEqual(env_path, env.getenv("PATH")) + self.assertTrue(mock_isdir.called) + + # Case 2: LD_LIBRARY_PATH in env + original_ld_library_path = "/usr/local/nvidia/lib:/usr/local/nvidia/lib64" + env = Uenv(["PATH=%s" % env_path, "LD_LIBRARY_PATH=%s" % original_ld_library_path]) + nvmode.merge_path_env(env) + self.assertTrue(nvi_lib_dir in env.getenv("LD_LIBRARY_PATH")) + self.assertTrue(original_ld_library_path in env.getenv("LD_LIBRARY_PATH")) + self.assertEqual(env_path, env.getenv("PATH")) + if __name__ == '__main__': main() diff --git a/udocker/engine/base.py b/udocker/engine/base.py index 06f8afd9..fa2fe4b7 100644 --- a/udocker/engine/base.py +++ b/udocker/engine/base.py @@ -9,6 +9,7 @@ from udocker.genstr import is_genstr from udocker.msg import Msg from udocker.config import Config +from udocker.engine.nvidia import NvidiaMode from udocker.helper.nixauth import NixAuthentication from udocker.helper.hostinfo import HostInfo from udocker.helper.osinfo import OSInfo @@ -566,6 +567,10 @@ def _run_env_set(self): self.opt["env"].append("container_execmode=" + self.exec_mode.get_mode()) cont_name = self.container_names + # Add nvidia library path to LD_LIBRARY_PATH + nvidia_mode = NvidiaMode(self.localrepo, self.container_id) + if nvidia_mode.get_mode(): + nvidia_mode.merge_path_env(self.opt["env"]) # if Python 3 if sys.version_info[0] >= 3: names = str(cont_name).translate(str.maketrans('', '', " '\"[]")) diff --git a/udocker/engine/nvidia.py b/udocker/engine/nvidia.py index 5cebce6d..4a8277d7 100644 --- a/udocker/engine/nvidia.py +++ b/udocker/engine/nvidia.py @@ -149,7 +149,7 @@ def _find_host_dir(self): def _find_cont_dir(self): """Find the location of the host target directory for libraries""" - # Ensure these directories exist in the container + # Support different linux distributions for dst_dir in ("/usr/lib/x86_64-linux-gnu", "/usr/lib64"): if os.path.isdir(self.container_root + '/' + dst_dir): Msg().out("Debug: Cont. location nvidia", dst_dir, l=Msg.DBG) @@ -164,8 +164,9 @@ def _installation_exists(self, nvi_host_dir_list, nvi_cont_dir): self._files_exist(nvi_cont_dir, lib_list) self._files_exist("/etc", Config.conf['nvi_etc_list']) self._files_exist("/usr/bin", Config.conf['nvi_bin_list']) - Msg().out("Info: Cont, has files from previous nvidia install") + Msg().out("Info: Cont, nvidia not set up") except OSError: + Msg().out("Info: Cont, has files from previous nvidia install") return True return False @@ -209,3 +210,19 @@ def get_devices(self): dev_list.append(expanded_devs) Msg().out("Debug: nvidia device list", dev_list, l=Msg.DBG) return dev_list + + def merge_path_env(self, env): + """Add nvidia library path into LD_LIBRARY_PATH of env(Uenv)""" + nvi_cont_dir = self._find_cont_dir() + if not nvi_cont_dir: + return + ld_library_path = env.getenv("LD_LIBRARY_PATH") + if not ld_library_path: + ld_library_list = [] + else: + ld_library_list = [p.strip() for p in ld_library_path.split(':')] + if nvi_cont_dir in ld_library_list: + return + ld_library_list.append(nvi_cont_dir) + env.setenv("LD_LIBRARY_PATH", ':'.join(ld_library_list)) + Msg().out("Debug: add nvidia library path", nvi_cont_dir, "to LD_LIBRARY_PATH", l=Msg.DBG)