Skip to content

Commit 06415e0

Browse files
committed
Add memory policy support
Implement support for Linux memory policy in OCI spec PR: opencontainers/runtime-spec#1282 Signed-off-by: Antti Kervinen <[email protected]>
1 parent 9902a3d commit 06415e0

File tree

16 files changed

+376
-16
lines changed

16 files changed

+376
-16
lines changed

features.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@ var featuresCommand = cli.Command{
5858
IntelRdt: &features.IntelRdt{
5959
Enabled: &t,
6060
},
61+
MemoryPolicy: &features.MemoryPolicy{
62+
Modes: specconv.KnownMemoryPolicyModes(),
63+
Flags: specconv.KnownMemoryPolicyFlags(),
64+
},
6165
MountExtensions: &features.MountExtensions{
6266
IDMap: &features.IDMap{
6367
Enabled: &t,

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ require (
1515
github.com/moby/sys/userns v0.1.0
1616
github.com/mrunalp/fileutils v0.5.1
1717
github.com/opencontainers/cgroups v0.0.4
18-
github.com/opencontainers/runtime-spec v1.2.2-0.20250401095657-e935f995dd67
18+
github.com/opencontainers/runtime-spec v1.2.2-0.20250804081626-bfdffd548aa6
1919
github.com/opencontainers/selinux v1.12.0
2020
github.com/seccomp/libseccomp-golang v0.11.0
2121
github.com/sirupsen/logrus v1.9.3

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm
4747
github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
4848
github.com/opencontainers/cgroups v0.0.4 h1:XVj8P/IHVms/j+7eh8ggdkTLAxjz84ZzuFyGoE28DR4=
4949
github.com/opencontainers/cgroups v0.0.4/go.mod h1:s8lktyhlGUqM7OSRL5P7eAW6Wb+kWPNvt4qvVfzA5vs=
50-
github.com/opencontainers/runtime-spec v1.2.2-0.20250401095657-e935f995dd67 h1:Q+KewUGTMamIe6Q39xCD/T1NC1POmaTlWnhjikCrZHA=
51-
github.com/opencontainers/runtime-spec v1.2.2-0.20250401095657-e935f995dd67/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
50+
github.com/opencontainers/runtime-spec v1.2.2-0.20250804081626-bfdffd548aa6 h1:6S6r1L8VO9b1UfgIQi+nteqlElma9KDlzZw/nM3ctI0=
51+
github.com/opencontainers/runtime-spec v1.2.2-0.20250804081626-bfdffd548aa6/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
5252
github.com/opencontainers/selinux v1.12.0 h1:6n5JV4Cf+4y0KNXW48TLj5DwfXpvWlxXplUkdTrmPb8=
5353
github.com/opencontainers/selinux v1.12.0/go.mod h1:BTPX+bjVbWGXw7ZZWUbdENt8w0htPSrlgOOysQaU62U=
5454
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=

internal/linux/linux.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package linux
22

33
import (
44
"os"
5+
"unsafe"
56

67
"golang.org/x/sys/unix"
78
)
@@ -72,3 +73,26 @@ func Sendmsg(fd int, p, oob []byte, to unix.Sockaddr, flags int) error {
7273
})
7374
return os.NewSyscallError("sendmsg", err)
7475
}
76+
77+
func bitmaskFromInts(bits []int) []uint64 {
78+
maxBit := 0
79+
for _, bit := range bits {
80+
if bit > maxBit {
81+
maxBit = bit
82+
}
83+
}
84+
mask := make([]uint64, (maxBit/64)+1)
85+
for _, bit := range bits {
86+
mask[bit/64] |= (1 << (bit % 64))
87+
}
88+
return mask
89+
}
90+
91+
// SetMempolicy wraps set_mempolicy.
92+
func SetMempolicy(mode uint, mask *unix.CPUSet) error {
93+
_, _, errno := unix.Syscall(unix.SYS_SET_MEMPOLICY, uintptr(mode), uintptr(unsafe.Pointer(mask)), uintptr(unsafe.Sizeof(*mask)*8))
94+
if errno != 0 {
95+
return os.NewSyscallError("set_mempolicy", errno)
96+
}
97+
return nil
98+
}

libcontainer/configs/config.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,9 @@ type Config struct {
214214
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
215215
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
216216

217+
// MemoryPolicy specifies NUMA memory policy for the container.
218+
MemoryPolicy *LinuxMemoryPolicy `json:"memoryPolicy,omitempty"`
219+
217220
// RootlessEUID is set when the runc was launched with non-zero EUID.
218221
// Note that RootlessEUID is set to false when launched with EUID=0 in userns.
219222
// When RootlessEUID is set, runc creates a new userns for the container.
@@ -305,7 +308,8 @@ type CPUAffinity struct {
305308
Initial, Final *unix.CPUSet
306309
}
307310

308-
func toCPUSet(str string) (*unix.CPUSet, error) {
311+
// ToCPUSet parses a string representing a CPU set into a unix.CPUSet.
312+
func ToCPUSet(str string) (*unix.CPUSet, error) {
309313
if str == "" {
310314
return nil, nil
311315
}
@@ -356,7 +360,7 @@ func toCPUSet(str string) (*unix.CPUSet, error) {
356360
}
357361
}
358362
if s.Count() == 0 {
359-
return nil, fmt.Errorf("no CPUs found in %q", str)
363+
return nil, fmt.Errorf("no members found in set %q", str)
360364
}
361365

362366
return s, nil
@@ -367,11 +371,11 @@ func ConvertCPUAffinity(sa *specs.CPUAffinity) (*CPUAffinity, error) {
367371
if sa == nil {
368372
return nil, nil
369373
}
370-
initial, err := toCPUSet(sa.Initial)
374+
initial, err := ToCPUSet(sa.Initial)
371375
if err != nil {
372376
return nil, fmt.Errorf("bad CPUAffinity.Initial: %w", err)
373377
}
374-
final, err := toCPUSet(sa.Final)
378+
final, err := ToCPUSet(sa.Final)
375379
if err != nil {
376380
return nil, fmt.Errorf("bad CPUAffinity.Final: %w", err)
377381
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package configs
2+
3+
import "golang.org/x/sys/unix"
4+
5+
const (
6+
// Memory policy modes.
7+
MPOL_DEFAULT = iota
8+
MPOL_PREFERRED
9+
MPOL_BIND
10+
MPOL_INTERLEAVE
11+
MPOL_LOCAL
12+
MPOL_PREFERRED_MANY
13+
MPOL_WEIGHTED_INTERLEAVE
14+
15+
// Mode flags.
16+
MPOL_F_STATIC_NODES = 1 << 15
17+
MPOL_F_RELATIVE_NODES = 1 << 14
18+
MPOL_F_NUMA_BALANCING = 1 << 13
19+
)
20+
21+
// LinuxMemoryPolicy contains memory policy configuration.
22+
type LinuxMemoryPolicy struct {
23+
// Mode specifies memory policy mode without mode flags. See
24+
// set_mempolicy() documentation for details.
25+
Mode uint
26+
// Flags contains mode flags.
27+
Flags []uint
28+
// Nodes contains NUMA nodes to which the mode applies.
29+
// Using unix.CPUSet to represent a bitmask of nodes.
30+
Nodes *unix.CPUSet
31+
}

libcontainer/configs/tocpuset_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ func TestToCPUSet(t *testing.T) {
5959
for _, tc := range testCases {
6060
tc := tc
6161
t.Run(tc.in, func(t *testing.T) {
62-
out, err := toCPUSet(tc.in)
63-
t.Logf("toCPUSet(%q) = %v (error: %v)", tc.in, out, err)
62+
out, err := ToCPUSet(tc.in)
63+
t.Logf("ToCPUSet(%q) = %v (error: %v)", tc.in, out, err)
6464
// Check the error.
6565
if tc.isErr {
6666
if err == nil {

libcontainer/configs/validate/validator.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ func Validate(config *configs.Config) error {
3434
mountsStrict,
3535
scheduler,
3636
ioPriority,
37+
memoryPolicy,
3738
}
3839
for _, c := range checks {
3940
if err := c(config); err != nil {
@@ -454,3 +455,26 @@ func ioPriority(config *configs.Config) error {
454455

455456
return nil
456457
}
458+
459+
func memoryPolicy(config *configs.Config) error {
460+
mpol := config.MemoryPolicy
461+
if mpol == nil {
462+
return nil
463+
}
464+
switch mpol.Mode {
465+
case configs.MPOL_DEFAULT, configs.MPOL_LOCAL:
466+
if mpol.Nodes != nil && mpol.Nodes.Count() != 0 {
467+
return fmt.Errorf("memory policy mode requires 0 nodes but got %d", mpol.Nodes.Count())
468+
}
469+
case configs.MPOL_BIND, configs.MPOL_INTERLEAVE,
470+
configs.MPOL_PREFERRED_MANY, configs.MPOL_WEIGHTED_INTERLEAVE:
471+
if mpol.Nodes == nil || mpol.Nodes.Count() == 0 {
472+
return fmt.Errorf("memory policy mode requires at least one node but got 0")
473+
}
474+
case configs.MPOL_PREFERRED:
475+
// Zero or more nodes are allowed by the kernel.
476+
default:
477+
return fmt.Errorf("invalid memory policy mode: %d", mpol.Mode)
478+
}
479+
return nil
480+
}

libcontainer/init_linux.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,18 @@ func setupIOPriority(config *initConfig) error {
659659
return nil
660660
}
661661

662+
func setupMemoryPolicy(config *configs.Config) error {
663+
mpol := config.MemoryPolicy
664+
if mpol == nil {
665+
return nil
666+
}
667+
modeWithFlags := mpol.Mode
668+
for _, flag := range mpol.Flags {
669+
modeWithFlags |= flag
670+
}
671+
return linux.SetMempolicy(modeWithFlags, config.MemoryPolicy.Nodes)
672+
}
673+
662674
func setupPersonality(config *configs.Config) error {
663675
return system.SetLinuxPersonality(config.Personality.Domain)
664676
}

libcontainer/setns_init_linux.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@ func (l *linuxSetnsInit) Init() error {
8080
if err := setupIOPriority(l.config); err != nil {
8181
return err
8282
}
83+
84+
if err := setupMemoryPolicy(l.config.Config); err != nil {
85+
return err
86+
}
87+
8388
// Tell our parent that we're ready to exec. This must be done before the
8489
// Seccomp rules have been applied, because we need to be able to read and
8590
// write to a socket.

0 commit comments

Comments
 (0)