diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index 302708fcb..f297deb8b 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -102,9 +102,9 @@ jobs: - uses: actions/checkout@v3 - name: vars run: | - echo 'VERSION=v0.7.2' >> $GITHUB_ENV + echo 'VERSION=v0.8.0' >> $GITHUB_ENV echo 'BASEURL=https://github.com/koalaman/shellcheck/releases/download' >> $GITHUB_ENV - echo 'SHA256SUM=12ee2e0b90a3d1e9cae24ac9b2838be66b48573cb2c8e8f3c566b959df6f050c' >> $GITHUB_ENV + echo 'SHA256SUM=f4bce23c11c3919c1b20bcb0f206f6b44c44e26f2bc95f8aa708716095fa0651' >> $GITHUB_ENV echo ~/bin >> $GITHUB_PATH - name: install shellcheck run: | diff --git a/features.go b/features.go index c9cd15cd0..c86adc0a2 100644 --- a/features.go +++ b/features.go @@ -59,10 +59,12 @@ var featuresCommand = cli.Command{ if seccomp.Enabled { feat.Linux.Seccomp = &features.Seccomp{ - Enabled: &tru, - Actions: seccomp.KnownActions(), - Operators: seccomp.KnownOperators(), - Archs: seccomp.KnownArchs(), + Enabled: &tru, + Actions: seccomp.KnownActions(), + Operators: seccomp.KnownOperators(), + Archs: seccomp.KnownArchs(), + KnownFlags: seccomp.KnownFlags(), + SupportedFlags: seccomp.SupportedFlags(), } major, minor, patch := seccomp.Version() feat.Annotations[features.AnnotationLibseccompVersion] = fmt.Sprintf("%d.%d.%d", major, minor, patch) diff --git a/go.mod b/go.mod index 7d5e04b16..d458f1d41 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( github.com/godbus/dbus/v5 v5.0.6 github.com/moby/sys/mountinfo v0.5.0 github.com/mrunalp/fileutils v0.5.0 - github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 + github.com/opencontainers/runtime-spec v1.0.3-0.20220718201635-a8106e99982b github.com/opencontainers/selinux v1.10.0 github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646 github.com/sirupsen/logrus v1.8.1 diff --git a/go.sum b/go.sum index b813e8bb7..62b15705a 100644 --- a/go.sum +++ b/go.sum @@ -33,8 +33,8 @@ github.com/moby/sys/mountinfo v0.5.0 h1:2Ks8/r6lopsxWi9m58nlwjaeSzUX9iiL1vj5qB/9 github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU= github.com/mrunalp/fileutils v0.5.0 h1:NKzVxiH7eSk+OQ4M+ZYW1K6h27RUV3MI6NUTsHhU6Z4= github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= -github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 h1:3snG66yBm59tKhhSPQrQ/0bCrv1LQbKt40LnUPiUxdc= -github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/runtime-spec v1.0.3-0.20220718201635-a8106e99982b h1:udwtfS44rxYE/ViMLchHQBjfE60GZSB1arY7BFbyxLs= +github.com/opencontainers/runtime-spec v1.0.3-0.20220718201635-a8106e99982b/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/selinux v1.10.0 h1:rAiKF8hTcgLI3w0DHm6i0ylVVcOrlgR1kK99DRLDhyU= github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go index c1b4a0041..643f8bbd9 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go @@ -31,12 +31,13 @@ type IDMap struct { // for syscalls. Additional architectures can be added by specifying them in // Architectures. type Seccomp struct { - DefaultAction Action `json:"default_action"` - Architectures []string `json:"architectures"` - Syscalls []*Syscall `json:"syscalls"` - DefaultErrnoRet *uint `json:"default_errno_ret"` - ListenerPath string `json:"listener_path,omitempty"` - ListenerMetadata string `json:"listener_metadata,omitempty"` + DefaultAction Action `json:"default_action"` + Architectures []string `json:"architectures"` + Flags []specs.LinuxSeccompFlag `json:"flags"` + Syscalls []*Syscall `json:"syscalls"` + DefaultErrnoRet *uint `json:"default_errno_ret"` + ListenerPath string `json:"listener_path,omitempty"` + ListenerMetadata string `json:"listener_metadata,omitempty"` } // Action is taken upon rule match in Seccomp diff --git a/libcontainer/seccomp/config.go b/libcontainer/seccomp/config.go index 98e08e8f0..8b866bc1f 100644 --- a/libcontainer/seccomp/config.go +++ b/libcontainer/seccomp/config.go @@ -5,6 +5,7 @@ import ( "sort" "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runtime-spec/specs-go" ) var operators = map[string]configs.Operator{ @@ -110,3 +111,35 @@ func ConvertStringToArch(in string) (string, error) { } return "", fmt.Errorf("string %s is not a valid arch for seccomp", in) } + +// List of flags known to this version of runc. +var flags = []string{ + "SECCOMP_FILTER_FLAG_TSYNC", + string(specs.LinuxSeccompFlagSpecAllow), + string(specs.LinuxSeccompFlagLog), +} + +// KnownFlags returns the list of the known filter flags. +// Used by `runc features`. +func KnownFlags() []string { + return flags +} + +// SupportedFlags returns the list of the supported filter flags. +// This list may be a subset of one returned by KnownFlags due to +// some flags not supported by the current kernel and/or libseccomp. +// Used by `runc features`. +func SupportedFlags() []string { + if !Enabled { + return nil + } + + var res []string + for _, flag := range flags { + if FlagSupported(specs.LinuxSeccompFlag(flag)) == nil { + res = append(res, flag) + } + } + + return res +} diff --git a/libcontainer/seccomp/patchbpf/enosys_linux.go b/libcontainer/seccomp/patchbpf/enosys_linux.go index 7d4ec6a42..14345269b 100644 --- a/libcontainer/seccomp/patchbpf/enosys_linux.go +++ b/libcontainer/seccomp/patchbpf/enosys_linux.go @@ -43,6 +43,11 @@ const uintptr_t C_SET_MODE_FILTER = SECCOMP_SET_MODE_FILTER; #endif const uintptr_t C_FILTER_FLAG_LOG = SECCOMP_FILTER_FLAG_LOG; +#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW +# define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) +#endif +const uintptr_t C_FILTER_FLAG_SPEC_ALLOW = SECCOMP_FILTER_FLAG_SPEC_ALLOW; + #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER # define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) #endif @@ -629,8 +634,13 @@ func filterFlags(config *configs.Seccomp, filter *libseccomp.ScmpFilter) (flags flags |= uint(C.C_FILTER_FLAG_LOG) } } - - // TODO: Support seccomp flags not yet added to libseccomp-golang... + if apiLevel >= 4 { + if ssb, err := filter.GetSSB(); err != nil { + return 0, false, fmt.Errorf("unable to fetch SECCOMP_FILTER_FLAG_SPEC_ALLOW bit: %w", err) + } else if ssb { + flags |= uint(C.C_FILTER_FLAG_SPEC_ALLOW) + } + } for _, call := range config.Syscalls { if call.Action == configs.Notify { @@ -643,6 +653,9 @@ func filterFlags(config *configs.Seccomp, filter *libseccomp.ScmpFilter) (flags } func sysSeccompSetFilter(flags uint, filter []unix.SockFilter) (fd int, err error) { + // This debug output is validated in tests/integration/seccomp.bats + // by the SECCOMP_FILTER_FLAG_* test. + logrus.Debugf("seccomp filter flags: %d", flags) fprog := unix.SockFprog{ Len: uint16(len(filter)), Filter: &filter[0], diff --git a/libcontainer/seccomp/seccomp_linux.go b/libcontainer/seccomp/seccomp_linux.go index 8c12af72b..030bd4c51 100644 --- a/libcontainer/seccomp/seccomp_linux.go +++ b/libcontainer/seccomp/seccomp_linux.go @@ -13,6 +13,7 @@ import ( "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/seccomp/patchbpf" + "github.com/opencontainers/runtime-spec/specs-go" ) var ( @@ -86,6 +87,13 @@ func InitSeccomp(config *configs.Seccomp) (int, error) { } } + // Add extra flags. + for _, flag := range config.Flags { + if err := setFlag(filter, flag); err != nil { + return -1, err + } + } + // Unset no new privs bit if err := filter.SetNoNewPrivsBit(false); err != nil { return -1, fmt.Errorf("error setting no new privileges: %w", err) @@ -110,6 +118,67 @@ func InitSeccomp(config *configs.Seccomp) (int, error) { return seccompFd, nil } +type unknownFlagError struct { + flag specs.LinuxSeccompFlag +} + +func (e *unknownFlagError) Error() string { + return "seccomp flag " + string(e.flag) + " is not known to runc" +} + +func setFlag(filter *libseccomp.ScmpFilter, flag specs.LinuxSeccompFlag) error { + switch flag { + case "SECCOMP_FILTER_FLAG_TSYNC": + // libseccomp-golang always use filterAttrTsync when + // possible so all goroutines will receive the same + // rules, so there is nothing to do. It does not make + // sense to apply the seccomp filter on only one + // thread; other threads will be terminated after exec + // anyway. + return nil + case specs.LinuxSeccompFlagLog: + if err := filter.SetLogBit(true); err != nil { + return fmt.Errorf("error adding log flag to seccomp filter: %w", err) + } + return nil + case specs.LinuxSeccompFlagSpecAllow: + if err := filter.SetSSB(true); err != nil { + return fmt.Errorf("error adding SSB flag to seccomp filter: %w", err) + } + return nil + } + // NOTE when adding more flags above, do not forget to also: + // - add new flags to `flags` array in config.go; + // - add new flags to tests/integration/seccomp.bats flags test; + // - modify func filterFlags in patchbpf/ accordingly. + + return &unknownFlagError{flag: flag} +} + +// FlagSupported checks if the flag is known to runc and supported by +// currently used libseccomp and kernel (i.e. it can be set). +func FlagSupported(flag specs.LinuxSeccompFlag) error { + filter := &libseccomp.ScmpFilter{} + err := setFlag(filter, flag) + + // For flags we don't know, setFlag returns unknownFlagError. + var uf *unknownFlagError + if errors.As(err, &uf) { + return err + } + // For flags that are known to runc and libseccomp-golang but can not + // be applied because either libseccomp or the kernel is too old, + // seccomp.VersionError is returned. + var verErr *libseccomp.VersionError + if errors.As(err, &verErr) { + // Not supported by libseccomp or the kernel. + return err + } + + // All other flags are known and supported. + return nil +} + // Convert Libcontainer Action to Libseccomp ScmpAction func getAction(act configs.Action, errnoRet *uint) (libseccomp.ScmpAction, error) { switch act { diff --git a/libcontainer/seccomp/seccomp_unsupported.go b/libcontainer/seccomp/seccomp_unsupported.go index be2b324e0..885529dc7 100644 --- a/libcontainer/seccomp/seccomp_unsupported.go +++ b/libcontainer/seccomp/seccomp_unsupported.go @@ -7,6 +7,7 @@ import ( "errors" "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runtime-spec/specs-go" ) var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported") @@ -19,6 +20,11 @@ func InitSeccomp(config *configs.Seccomp) (int, error) { return -1, nil } +// FlagSupported tells if a provided seccomp flag is supported. +func FlagSupported(_ specs.LinuxSeccompFlag) error { + return ErrSeccompNotEnabled +} + // Version returns major, minor, and micro. func Version() (uint, uint, uint) { return 0, 0, 0 diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index c7ca4c8af..8816d459a 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -1015,14 +1015,28 @@ func SetupSeccomp(config *specs.LinuxSeccomp) (*configs.Seccomp, error) { return nil, nil } - // We don't currently support seccomp flags. - if len(config.Flags) != 0 { - return nil, errors.New("seccomp flags are not yet supported by runc") - } - newConfig := new(configs.Seccomp) newConfig.Syscalls = []*configs.Syscall{} + // The list of flags defined in runtime-spec is a subset of the flags + // in the seccomp() syscall. + if config.Flags == nil { + // No flags are set explicitly (not even the empty set); + // set the default of specs.LinuxSeccompFlagSpecAllow, + // if it is supported by the libseccomp and the kernel. + if err := seccomp.FlagSupported(specs.LinuxSeccompFlagSpecAllow); err == nil { + newConfig.Flags = []specs.LinuxSeccompFlag{specs.LinuxSeccompFlagSpecAllow} + } + } else { + // Fail early if some flags are unknown or unsupported. + for _, flag := range config.Flags { + if err := seccomp.FlagSupported(flag); err != nil { + return nil, err + } + newConfig.Flags = append(newConfig.Flags, flag) + } + } + if len(config.Architectures) > 0 { newConfig.Architectures = []string{} for _, arch := range config.Architectures { diff --git a/tests/integration/events.bats b/tests/integration/events.bats index 9d28420e5..a8fe52baf 100644 --- a/tests/integration/events.bats +++ b/tests/integration/events.bats @@ -10,6 +10,7 @@ function teardown() { teardown_bundle } +# shellcheck disable=SC2030 @test "events --stats" { # XXX: currently cgroups require root containers. requires root @@ -38,6 +39,7 @@ function test_events() { fi runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox + # shellcheck disable=SC2031 [ "$status" -eq 0 ] # Spawn two subshels: diff --git a/tests/integration/helpers.bash b/tests/integration/helpers.bash index ea68bfc86..b55d1056e 100644 --- a/tests/integration/helpers.bash +++ b/tests/integration/helpers.bash @@ -26,7 +26,7 @@ TESTDATA="${INTEGRATION_ROOT}/testdata" # Kernel version KERNEL_VERSION="$(uname -r)" KERNEL_MAJOR="${KERNEL_VERSION%%.*}" -KERNEL_MINOR="${KERNEL_VERSION#$KERNEL_MAJOR.}" +KERNEL_MINOR="${KERNEL_VERSION#"$KERNEL_MAJOR".}" KERNEL_MINOR="${KERNEL_MINOR%%.*}" ARCH=$(uname -m) diff --git a/tests/integration/seccomp.bats b/tests/integration/seccomp.bats index e81beca33..2700e4353 100644 --- a/tests/integration/seccomp.bats +++ b/tests/integration/seccomp.bats @@ -66,6 +66,92 @@ function teardown() { [[ "$output" == *"Network is down"* ]] } +# Prints the numeric value of provided seccomp flags combination. +# The parameter is flags string, as supplied in OCI spec, for example +# '"SECCOMP_FILTER_FLAG_TSYNC","SECCOMP_FILTER_FLAG_LOG"'. +function flags_value() { + # Numeric values of seccomp flags. + declare -A values=( + ['"SECCOMP_FILTER_FLAG_TSYNC"']=0 # Supported but ignored by runc, thus 0. + ['"SECCOMP_FILTER_FLAG_LOG"']=2 + ['"SECCOMP_FILTER_FLAG_SPEC_ALLOW"']=4 + # XXX: add new values above this line. + ) + # Split the flags. + IFS=',' read -ra flags <<<"$1" + + local flag v sum=0 + for flag in "${flags[@]}"; do + # This will produce "values[$flag]: unbound variable" + # error for a new flag yet unknown to the test. + v=${values[$flag]} + ((sum += v)) || true + done + + echo $sum +} + +@test "runc run [seccomp] (SECCOMP_FILTER_FLAG_*)" { + update_config ' .process.args = ["/bin/sh", "-c", "mkdir /dev/shm/foo"] + | .process.noNewPrivileges = false + | .linux.seccomp = { + "defaultAction":"SCMP_ACT_ALLOW", + "architectures":["SCMP_ARCH_X86","SCMP_ARCH_X32","SCMP_ARCH_X86_64","SCMP_ARCH_AARCH64","SCMP_ARCH_ARM"], + "syscalls":[{"names":["mkdir"], "action":"SCMP_ACT_ERRNO"}] + }' + + # Get the list of flags supported by runc/seccomp/kernel, + # or "null" if no flags are supported or runc is too old. + mapfile -t flags < <(__runc features | jq -c '.linux.seccomp.supportedFlags' | + tr -d '[]\n' | tr ',' '\n') + + # This is a set of all possible flag combinations to test. + declare -A TEST_CASES=( + ['EMPTY']=0 # Special value: empty set of flags. + ['REMOVE']=0 # Special value: no flags set. + ) + + # If supported, runc should set SPEC_ALLOW if no flags are set. + if [[ " ${flags[*]} " == *' "SECCOMP_FILTER_FLAG_SPEC_ALLOW" '* ]]; then + TEST_CASES['REMOVE']=$(flags_value '"SECCOMP_FILTER_FLAG_SPEC_ALLOW"') + fi + + # Add all possible combinations of seccomp flags + # and their expected numeric values to TEST_CASES. + if [ "${flags[0]}" != "null" ]; then + # Use shell {a,}{b,}{c,} to generate the powerset. + for fc in $(eval echo "$(printf "{'%s,',}" "${flags[@]}")"); do + # Remove the last comma. + fc="${fc/%,/}" + TEST_CASES[$fc]=$(flags_value "$fc") + done + fi + + # Finally, run the tests. + for key in "${!TEST_CASES[@]}"; do + case "$key" in + 'REMOVE') + update_config ' del(.linux.seccomp.flags)' + ;; + 'EMPTY') + update_config ' .linux.seccomp.flags = []' + ;; + *) + update_config ' .linux.seccomp.flags = [ '"${key}"' ]' + ;; + esac + + runc --debug run test_busybox + [ "$status" -ne 0 ] + [[ "$output" == *"mkdir:"*"/dev/shm/foo"*"Operation not permitted"* ]] + + # Check the numeric flags value, as printed in the debug log, is as expected. + exp="\"seccomp filter flags: ${TEST_CASES[$key]}\"" + echo "flags $key, expecting $exp" + [[ "$output" == *"$exp"* ]] + done +} + @test "runc run [seccomp] (SCMP_ACT_KILL)" { update_config ' .process.args = ["/bin/sh", "-c", "mkdir /dev/shm/foo"] | .process.noNewPrivileges = false diff --git a/types/features/features.go b/types/features/features.go index c6269ca63..4ea629eea 100644 --- a/types/features/features.go +++ b/types/features/features.go @@ -53,13 +53,23 @@ type Seccomp struct { // Nil value means "unknown", not "no support for any action". Actions []string `json:"actions,omitempty"` - // Operators is the list of the recognized actions, e.g., "SCMP_CMP_NE". + // Operators is the list of the recognized operators, e.g., "SCMP_CMP_NE". // Nil value means "unknown", not "no support for any operator". Operators []string `json:"operators,omitempty"` - // Operators is the list of the recognized archs, e.g., "SCMP_ARCH_X86_64". + // Archs is the list of the recognized archs, e.g., "SCMP_ARCH_X86_64". // Nil value means "unknown", not "no support for any arch". Archs []string `json:"archs,omitempty"` + + // KnownFlags is the list of the recognized filter flags, e.g., "SECCOMP_FILTER_FLAG_LOG". + // Nil value means "unknown", not "no flags are recognized". + KnownFlags []string `json:"knownFlags,omitempty"` + + // SupportedFlags is the list of the supported filter flags, e.g., "SECCOMP_FILTER_FLAG_LOG". + // This list may be a subset of KnownFlags due to some flags + // not supported by the current kernel and/or libseccomp. + // Nil value means "unknown", not "no flags are supported". + SupportedFlags []string `json:"supportedFlags,omitempty"` } // Apparmor represents the "apparmor" field. diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go index 6a7a91e55..cf1b338c8 100644 --- a/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go +++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go @@ -15,7 +15,7 @@ type Spec struct { // Mounts configures additional mounts (on top of Root). Mounts []Mount `json:"mounts,omitempty"` // Hooks configures callbacks for container lifecycle events. - Hooks *Hooks `json:"hooks,omitempty" platform:"linux,solaris"` + Hooks *Hooks `json:"hooks,omitempty" platform:"linux,solaris,zos"` // Annotations contains arbitrary metadata for the container. Annotations map[string]string `json:"annotations,omitempty"` @@ -27,6 +27,8 @@ type Spec struct { Windows *Windows `json:"windows,omitempty" platform:"windows"` // VM specifies configuration for virtual-machine-based containers. VM *VM `json:"vm,omitempty" platform:"vm"` + // ZOS is platform-specific configuration for z/OS based containers. + ZOS *ZOS `json:"zos,omitempty" platform:"zos"` } // Process contains information to start a specific application inside the container. @@ -49,7 +51,7 @@ type Process struct { // Capabilities are Linux capabilities that are kept for the process. Capabilities *LinuxCapabilities `json:"capabilities,omitempty" platform:"linux"` // Rlimits specifies rlimit options to apply to the process. - Rlimits []POSIXRlimit `json:"rlimits,omitempty" platform:"linux,solaris"` + Rlimits []POSIXRlimit `json:"rlimits,omitempty" platform:"linux,solaris,zos"` // NoNewPrivileges controls whether additional privileges could be gained by processes in the container. NoNewPrivileges bool `json:"noNewPrivileges,omitempty" platform:"linux"` // ApparmorProfile specifies the apparmor profile for the container. @@ -86,11 +88,11 @@ type Box struct { // User specifies specific user (and group) information for the container process. type User struct { // UID is the user id. - UID uint32 `json:"uid" platform:"linux,solaris"` + UID uint32 `json:"uid" platform:"linux,solaris,zos"` // GID is the group id. - GID uint32 `json:"gid" platform:"linux,solaris"` + GID uint32 `json:"gid" platform:"linux,solaris,zos"` // Umask is the umask for the init process. - Umask *uint32 `json:"umask,omitempty" platform:"linux,solaris"` + Umask *uint32 `json:"umask,omitempty" platform:"linux,solaris,zos"` // AdditionalGids are additional group ids set for the container's process. AdditionalGids []uint32 `json:"additionalGids,omitempty" platform:"linux,solaris"` // Username is the user name. @@ -110,11 +112,16 @@ type Mount struct { // Destination is the absolute path where the mount will be placed in the container. Destination string `json:"destination"` // Type specifies the mount kind. - Type string `json:"type,omitempty" platform:"linux,solaris"` + Type string `json:"type,omitempty" platform:"linux,solaris,zos"` // Source specifies the source path of the mount. Source string `json:"source,omitempty"` // Options are fstab style mount options. Options []string `json:"options,omitempty"` + + // UID/GID mappings used for changing file owners w/o calling chown, fs should support it. + // Every mount point could have its own mapping. + UIDMappings []LinuxIDMapping `json:"uidMappings,omitempty" platform:"linux"` + GIDMappings []LinuxIDMapping `json:"gidMappings,omitempty" platform:"linux"` } // Hook specifies a command that is run at a particular event in the lifecycle of a container @@ -178,7 +185,7 @@ type Linux struct { // MountLabel specifies the selinux context for the mounts in the container. MountLabel string `json:"mountLabel,omitempty"` // IntelRdt contains Intel Resource Director Technology (RDT) information for - // handling resource constraints (e.g., L3 cache, memory bandwidth) for the container + // handling resource constraints and monitoring metrics (e.g., L3 cache, memory bandwidth) for the container IntelRdt *LinuxIntelRdt `json:"intelRdt,omitempty"` // Personality contains configuration for the Linux personality syscall Personality *LinuxPersonality `json:"personality,omitempty"` @@ -250,8 +257,8 @@ type LinuxInterfacePriority struct { Priority uint32 `json:"priority"` } -// linuxBlockIODevice holds major:minor format supported in blkio cgroup -type linuxBlockIODevice struct { +// LinuxBlockIODevice holds major:minor format supported in blkio cgroup +type LinuxBlockIODevice struct { // Major is the device's major number. Major int64 `json:"major"` // Minor is the device's minor number. @@ -260,7 +267,7 @@ type linuxBlockIODevice struct { // LinuxWeightDevice struct holds a `major:minor weight` pair for weightDevice type LinuxWeightDevice struct { - linuxBlockIODevice + LinuxBlockIODevice // Weight is the bandwidth rate for the device. Weight *uint16 `json:"weight,omitempty"` // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, CFQ scheduler only @@ -269,7 +276,7 @@ type LinuxWeightDevice struct { // LinuxThrottleDevice struct holds a `major:minor rate_per_second` pair type LinuxThrottleDevice struct { - linuxBlockIODevice + LinuxBlockIODevice // Rate is the IO rate limit per cgroup per device Rate uint64 `json:"rate"` } @@ -328,6 +335,8 @@ type LinuxCPU struct { Cpus string `json:"cpus,omitempty"` // List of memory nodes in the cpuset. Default is to use any available memory node. Mems string `json:"mems,omitempty"` + // cgroups are configured with minimum weight, 0: default behavior, 1: SCHED_IDLE. + Idle *int64 `json:"idle,omitempty"` } // LinuxPids for Linux cgroup 'pids' resource management (Linux 4.3) @@ -522,11 +531,21 @@ type WindowsMemoryResources struct { // WindowsCPUResources contains CPU resource management settings. type WindowsCPUResources struct { - // Number of CPUs available to the container. + // Count is the number of CPUs available to the container. It represents the + // fraction of the configured processor `count` in a container in relation + // to the processors available in the host. The fraction ultimately + // determines the portion of processor cycles that the threads in a + // container can use during each scheduling interval, as the number of + // cycles per 10,000 cycles. Count *uint64 `json:"count,omitempty"` - // CPU shares (relative weight to other containers with cpu shares). + // Shares limits the share of processor time given to the container relative + // to other workloads on the processor. The processor `shares` (`weight` at + // the platform level) is a value between 0 and 10000. Shares *uint16 `json:"shares,omitempty"` - // Specifies the portion of processor cycles that this container can use as a percentage times 100. + // Maximum determines the portion of processor cycles that the threads in a + // container can use during each scheduling interval, as the number of + // cycles per 10,000 cycles. Set processor `maximum` to a percentage times + // 100. Maximum *uint16 `json:"maximum,omitempty"` } @@ -613,6 +632,19 @@ type Arch string // LinuxSeccompFlag is a flag to pass to seccomp(2). type LinuxSeccompFlag string +const ( + // LinuxSeccompFlagLog is a seccomp flag to request all returned + // actions except SECCOMP_RET_ALLOW to be logged. An administrator may + // override this filter flag by preventing specific actions from being + // logged via the /proc/sys/kernel/seccomp/actions_logged file. (since + // Linux 4.14) + LinuxSeccompFlagLog LinuxSeccompFlag = "SECCOMP_FILTER_FLAG_LOG" + + // LinuxSeccompFlagSpecAllow can be used to disable Speculative Store + // Bypass mitigation. (since Linux 4.17) + LinuxSeccompFlagSpecAllow LinuxSeccompFlag = "SECCOMP_FILTER_FLAG_SPEC_ALLOW" +) + // Additional architectures permitted to be used for system calls // By default only the native architecture of the kernel is permitted const ( @@ -683,8 +715,9 @@ type LinuxSyscall struct { Args []LinuxSeccompArg `json:"args,omitempty"` } -// LinuxIntelRdt has container runtime resource constraints for Intel RDT -// CAT and MBA features which introduced in Linux 4.10 and 4.12 kernel +// LinuxIntelRdt has container runtime resource constraints for Intel RDT CAT and MBA +// features and flags enabling Intel RDT CMT and MBM features. +// Intel RDT features are available in Linux 4.14 and newer kernel versions. type LinuxIntelRdt struct { // The identity for RDT Class of Service ClosID string `json:"closID,omitempty"` @@ -697,4 +730,36 @@ type LinuxIntelRdt struct { // The unit of memory bandwidth is specified in "percentages" by // default, and in "MBps" if MBA Software Controller is enabled. MemBwSchema string `json:"memBwSchema,omitempty"` + + // EnableCMT is the flag to indicate if the Intel RDT CMT is enabled. CMT (Cache Monitoring Technology) supports monitoring of + // the last-level cache (LLC) occupancy for the container. + EnableCMT bool `json:"enableCMT,omitempty"` + + // EnableMBM is the flag to indicate if the Intel RDT MBM is enabled. MBM (Memory Bandwidth Monitoring) supports monitoring of + // total and local memory bandwidth for the container. + EnableMBM bool `json:"enableMBM,omitempty"` +} + +// ZOS contains platform-specific configuration for z/OS based containers. +type ZOS struct { + // Devices are a list of device nodes that are created for the container + Devices []ZOSDevice `json:"devices,omitempty"` +} + +// ZOSDevice represents the mknod information for a z/OS special device file +type ZOSDevice struct { + // Path to the device. + Path string `json:"path"` + // Device type, block, char, etc. + Type string `json:"type"` + // Major is the device's major number. + Major int64 `json:"major"` + // Minor is the device's minor number. + Minor int64 `json:"minor"` + // FileMode permission bits for the device. + FileMode *os.FileMode `json:"fileMode,omitempty"` + // UID of the device. + UID *uint32 `json:"uid,omitempty"` + // Gid of the device. + GID *uint32 `json:"gid,omitempty"` } diff --git a/vendor/modules.txt b/vendor/modules.txt index 346a5f654..932b18a1c 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -34,7 +34,7 @@ github.com/moby/sys/mountinfo # github.com/mrunalp/fileutils v0.5.0 ## explicit github.com/mrunalp/fileutils -# github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 +# github.com/opencontainers/runtime-spec v1.0.3-0.20220718201635-a8106e99982b ## explicit github.com/opencontainers/runtime-spec/specs-go # github.com/opencontainers/selinux v1.10.0