Skip to content

Commit b69158a

Browse files
committed
Allow update-ldcache to work when pivot-root is not supported
This change updates the update-ldcache logic to use an alternative to pivot-root when this is not supported. This includes cases where the root filesystem is in a ramfs (e.g. when running from the kata-agent). Signed-off-by: Evan Lezar <[email protected]>
1 parent 25f7bfa commit b69158a

File tree

3 files changed

+146
-1
lines changed

3 files changed

+146
-1
lines changed

internal/ldconfig/ldconfig.go

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ import (
2727
"runtime"
2828
"strings"
2929

30+
"github.com/prometheus/procfs"
31+
3032
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
3133
)
3234

@@ -44,6 +46,7 @@ type Ldconfig struct {
4446
inRoot string
4547
isDebianLikeHost bool
4648
isDebianLikeContainer bool
49+
noPivotRoot bool
4750
directories []string
4851
}
4952

@@ -57,6 +60,11 @@ func NewRunner(id string, ldconfigPath string, containerRoot string, additionala
5760
if isDebian() {
5861
args = append(args, "--is-debian-like-host")
5962
}
63+
64+
if noPivotRoot() {
65+
args = append(args, "--no-pivot")
66+
}
67+
6068
args = append(args, additionalargs...)
6169

6270
return createReexecCommand(args)
@@ -76,6 +84,7 @@ func NewRunner(id string, ldconfigPath string, containerRoot string, additionala
7684
// The following flags are optional:
7785
//
7886
// --is-debian-like-host Indicates that the host system is debian-based.
87+
// --no-pivot pivot_root should not be used to provide process isolation.
7988
//
8089
// The remaining args are folders where soname symlinks need to be created.
8190
func NewFromArgs(args ...string) (*Ldconfig, error) {
@@ -86,6 +95,7 @@ func NewFromArgs(args ...string) (*Ldconfig, error) {
8695
ldconfigPath := fs.String("ldconfig-path", "", "the path to ldconfig on the host")
8796
containerRoot := fs.String("container-root", "", "the path in which ldconfig must be run")
8897
isDebianLikeHost := fs.Bool("is-debian-like-host", false, "the hook is running from a Debian-like host")
98+
noPivot := fs.Bool("no-pivot", false, "don't use pivot_root to perform isolation")
8999
if err := fs.Parse(args[1:]); err != nil {
90100
return nil, err
91101
}
@@ -101,6 +111,7 @@ func NewFromArgs(args ...string) (*Ldconfig, error) {
101111
ldconfigPath: *ldconfigPath,
102112
inRoot: *containerRoot,
103113
isDebianLikeHost: *isDebianLikeHost,
114+
noPivotRoot: *noPivot,
104115
directories: fs.Args(),
105116
}
106117
return l, nil
@@ -158,7 +169,7 @@ func (l *Ldconfig) prepareRoot() (string, error) {
158169

159170
// We pivot to the container root for the new process, this further limits
160171
// access to the host.
161-
if err := pivotRoot(root.Name()); err != nil {
172+
if err := l.pivotRoot(root); err != nil {
162173
return "", fmt.Errorf("error running pivot_root: %w", err)
163174
}
164175

@@ -339,3 +350,50 @@ func debianSystemSearchPaths() []string {
339350

340351
return paths
341352
}
353+
354+
func (l *Ldconfig) pivotRoot(root *os.Root) error {
355+
rootDir := root.Name()
356+
// We select the function to pivot the root based on whether pivot_root is
357+
// supported.
358+
// See https://github.com/opencontainers/runc/blob/c3d127f6e8d9f6c06d78b8329cafa8dd39f6236e/libcontainer/rootfs_linux.go#L207-L216
359+
if l.noPivotRoot {
360+
return msMoveRoot(rootDir)
361+
}
362+
return pivotRoot(rootDir)
363+
}
364+
365+
// noPivotRoot checks whether the current root filesystem supports a pivot_root.
366+
// See https://github.com/opencontainers/runc/blob/main/libcontainer/SPEC.md#filesystem
367+
// for a discussion on when this is not the case.
368+
// If we fail to detect whether pivot-root is supported, we assume that it is supported.
369+
// The logic to check for support is adapted from kata-containers:
370+
//
371+
// https://github.com/kata-containers/kata-containers/blob/e7b9eddcede4bbe2edeb9c3af7b2358dc65da76f/src/agent/src/sandbox.rs#L150
372+
//
373+
// and checks whether "/" is mounted as a rootfs.
374+
func noPivotRoot() bool {
375+
rootFsType, err := getRootfsType("/")
376+
if err != nil {
377+
return false
378+
}
379+
return rootFsType == "rootfs"
380+
}
381+
382+
func getRootfsType(path string) (string, error) {
383+
procSelf, err := procfs.Self()
384+
if err != nil {
385+
return "", err
386+
}
387+
388+
mountStats, err := procSelf.MountStats()
389+
if err != nil {
390+
return "", err
391+
}
392+
393+
for _, mountStat := range mountStats {
394+
if mountStat.Mount == path {
395+
return mountStat.Type, nil
396+
}
397+
}
398+
return "", fmt.Errorf("mount stats for %q not found", path)
399+
}

internal/ldconfig/ldconfig_linux.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,17 @@
2020
package ldconfig
2121

2222
import (
23+
"errors"
2324
"fmt"
2425
"os"
2526
"os/exec"
2627
"path/filepath"
2728
"strconv"
29+
"strings"
2830
"syscall"
2931

3032
"github.com/google/uuid"
33+
"github.com/moby/sys/mountinfo"
3134
"github.com/moby/sys/reexec"
3235

3336
"github.com/opencontainers/runc/libcontainer/utils"
@@ -98,6 +101,86 @@ func pivotRoot(rootfs string) error {
98101
return nil
99102
}
100103

104+
// msMoveRoot is used in cases where pivot root is not supported.
105+
// This includes initramfs filesystems where the root is read-only.
106+
// This is adapted from the implementation here:
107+
//
108+
// https://github.com/opencontainers/runc/blob/e89a29929c775025419ab0d218a43588b4c12b9a/libcontainer/rootfs_linux.go#L1115
109+
//
110+
// With the `mount` and `unmount` calls changed to direct unix.Mount and unix.Unmount calls.
111+
func msMoveRoot(rootfs string) error {
112+
// Before we move the root and chroot we have to mask all "full" sysfs and
113+
// procfs mounts which exist on the host. This is because while the kernel
114+
// has protections against mounting procfs if it has masks, when using
115+
// chroot(2) the *host* procfs mount is still reachable in the mount
116+
// namespace and the kernel permits procfs mounts inside --no-pivot
117+
// containers.
118+
//
119+
// Users shouldn't be using --no-pivot except in exceptional circumstances,
120+
// but to avoid such a trivial security flaw we apply a best-effort
121+
// protection here. The kernel only allows a mount of a pseudo-filesystem
122+
// like procfs or sysfs if there is a *full* mount (the root of the
123+
// filesystem is mounted) without any other locked mount points covering a
124+
// subtree of the mount.
125+
//
126+
// So we try to unmount (or mount tmpfs on top of) any mountpoint which is
127+
// a full mount of either sysfs or procfs (since those are the most
128+
// concerning filesystems to us).
129+
mountinfos, err := mountinfo.GetMounts(func(info *mountinfo.Info) (skip, stop bool) {
130+
// Collect every sysfs and procfs filesystem, except for those which
131+
// are non-full mounts or are inside the rootfs of the container.
132+
if info.Root != "/" ||
133+
(info.FSType != "proc" && info.FSType != "sysfs") ||
134+
strings.HasPrefix(info.Mountpoint, rootfs) {
135+
skip = true
136+
}
137+
return
138+
})
139+
if err != nil {
140+
return err
141+
}
142+
for _, info := range mountinfos {
143+
p := info.Mountpoint
144+
// Be sure umount events are not propagated to the host.
145+
if err := unix.Mount("", p, "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
146+
if errors.Is(err, unix.ENOENT) {
147+
// If the mountpoint doesn't exist that means that we've
148+
// already blasted away some parent directory of the mountpoint
149+
// and so we don't care about this error.
150+
continue
151+
}
152+
return err
153+
}
154+
if err := unix.Unmount(p, unix.MNT_DETACH); err != nil {
155+
if !errors.Is(err, unix.EINVAL) && !errors.Is(err, unix.EPERM) {
156+
return err
157+
} else {
158+
// If we have not privileges for umounting (e.g. rootless), then
159+
// cover the path.
160+
if err := unix.Mount("tmpfs", p, "tmpfs", 0, ""); err != nil {
161+
return err
162+
}
163+
}
164+
}
165+
}
166+
167+
// Move the rootfs on top of "/" in our mount namespace.
168+
if err := unix.Mount(rootfs, "/", "", unix.MS_MOVE, ""); err != nil {
169+
return err
170+
}
171+
return chroot()
172+
}
173+
174+
func chroot() error {
175+
if err := unix.Chroot("."); err != nil {
176+
return &os.PathError{Op: "chroot", Path: ".", Err: err}
177+
}
178+
if err := unix.Chdir("/"); err != nil {
179+
return &os.PathError{Op: "chdir", Path: "/", Err: err}
180+
}
181+
return nil
182+
}
183+
101184
// mountLdConfig mounts the host ldconfig to the mount namespace of the hook.
102185
// We use WithProcfd to perform the mount operations to ensure that the changes
103186
// are persisted across the pivot root.

internal/ldconfig/ldconfig_other.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ func pivotRoot(newroot string) error {
2929
return fmt.Errorf("not supported")
3030
}
3131

32+
func msMoveRoot(rootfs string) error {
33+
return fmt.Errorf("not supported")
34+
}
35+
3236
func mountLdConfig(hostLdconfigPath string, containerRoot *os.Root) (string, error) {
3337
return "", fmt.Errorf("not supported")
3438
}

0 commit comments

Comments
 (0)