Skip to content

Commit 10ca66b

Browse files
committed
runc exec: implement CPU affinity
As per - opencontainers/runtime-spec#1253 - opencontainers/runtime-spec#1261 CPU affinity can be set in two ways: 1. When creating/starting a container, in config.json's Process.ExecCPUAffinity, which is when applied to all execs. 2. When running an exec, in process.json's CPUAffinity, which applied to a given exec and overrides the value from (1). Add some basic tests. Note that older kernels (RHEL8, Ubuntu 20.04) change CPU affinity of a process to that of a container's cgroup, as soon as it is moved to that cgroup, while newer kernels (Ubuntu 24.04, Fedora 41) don't do that. Because of the above, - it's impossible to really test initial CPU affinity without adding debug logging to libcontainer/nsenter; - for older kernels, there can be a brief moment when exec's affinity is different than either initial or final affinity being set; - exec's final CPU affinity, if not specified, can be different depending on the kernel, therefore we don't test it. Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
1 parent 701516b commit 10ca66b

13 files changed

+389
-5
lines changed

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2323
methods no longer ignore `Process.IOPriority` and `Process.Scheduler`
2424
settings. (#4585)
2525

26+
### Added
27+
* CPU affinity support for `runc exec`. (#4327)
28+
2629
## [1.2.5] - 2025-02-13
2730

2831
> Мороз и солнце; день чудесный!

libcontainer/configs/config.go

+91
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,13 @@ package configs
33
import (
44
"bytes"
55
"encoding/json"
6+
"errors"
67
"fmt"
78
"os/exec"
9+
"strconv"
10+
"strings"
811
"time"
12+
"unsafe"
913

1014
"github.com/sirupsen/logrus"
1115
"golang.org/x/sys/unix"
@@ -225,6 +229,9 @@ type Config struct {
225229

226230
// IOPriority is the container's I/O priority.
227231
IOPriority *IOPriority `json:"io_priority,omitempty"`
232+
233+
// ExecCPUAffinity is CPU affinity for a non-init process to be run in the container.
234+
ExecCPUAffinity *CPUAffinity `json:"exec_cpu_affinity,omitempty"`
228235
}
229236

230237
// Scheduler is based on the Linux sched_setattr(2) syscall.
@@ -288,6 +295,90 @@ func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) {
288295

289296
type IOPriority = specs.LinuxIOPriority
290297

298+
type CPUAffinity struct {
299+
Initial, Final *unix.CPUSet
300+
}
301+
302+
func toCPUSet(str string) (*unix.CPUSet, error) {
303+
if str == "" {
304+
return nil, nil
305+
}
306+
s := new(unix.CPUSet)
307+
308+
// Since (*CPUset).Set silently ignores too high CPU values,
309+
// find out what the maximum is, and return an error.
310+
maxCPU := uint64(unsafe.Sizeof(*s) * 8)
311+
toInt := func(v string) (int, error) {
312+
ret, err := strconv.ParseUint(v, 10, 32)
313+
if err != nil {
314+
return 0, err
315+
}
316+
if ret >= maxCPU {
317+
return 0, fmt.Errorf("values larger than %d are not supported", maxCPU-1)
318+
}
319+
return int(ret), nil
320+
}
321+
322+
for _, r := range strings.Split(str, ",") {
323+
// Allow extra spaces around.
324+
r = strings.TrimSpace(r)
325+
// Allow empty elements (extra commas).
326+
if r == "" {
327+
continue
328+
}
329+
if r0, r1, found := strings.Cut(r, "-"); found {
330+
start, err := toInt(r0)
331+
if err != nil {
332+
return nil, err
333+
}
334+
end, err := toInt(r1)
335+
if err != nil {
336+
return nil, err
337+
}
338+
if start > end {
339+
return nil, errors.New("invalid range: " + r)
340+
}
341+
for i := start; i <= end; i++ {
342+
s.Set(i)
343+
}
344+
} else {
345+
val, err := toInt(r)
346+
if err != nil {
347+
return nil, err
348+
}
349+
s.Set(val)
350+
}
351+
}
352+
if s.Count() == 0 {
353+
return nil, fmt.Errorf("no CPUs found in %q", str)
354+
}
355+
356+
return s, nil
357+
}
358+
359+
// ConvertCPUAffinity converts [specs.CPUAffinity] to [CPUAffinity].
360+
func ConvertCPUAffinity(sa *specs.CPUAffinity) (*CPUAffinity, error) {
361+
if sa == nil {
362+
return nil, nil
363+
}
364+
initial, err := toCPUSet(sa.Initial)
365+
if err != nil {
366+
return nil, fmt.Errorf("bad CPUAffinity.Initial: %w", err)
367+
}
368+
final, err := toCPUSet(sa.Final)
369+
if err != nil {
370+
return nil, fmt.Errorf("bad CPUAffinity.Final: %w", err)
371+
}
372+
if initial == nil && final == nil {
373+
return nil, nil
374+
}
375+
376+
return &CPUAffinity{
377+
Initial: initial,
378+
Final: final,
379+
}, nil
380+
}
381+
291382
type (
292383
HookName string
293384
HookList []Hook

libcontainer/configs/tocpuset_test.go

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
package configs
2+
3+
import (
4+
"testing"
5+
6+
"golang.org/x/sys/unix"
7+
)
8+
9+
func TestToCPUSet(t *testing.T) {
10+
set := func(cpus ...int) *unix.CPUSet {
11+
r := &unix.CPUSet{}
12+
for _, cpu := range cpus {
13+
r.Set(cpu)
14+
}
15+
return r
16+
}
17+
18+
testCases := []struct {
19+
in string
20+
out *unix.CPUSet
21+
isErr bool
22+
}{
23+
{in: ""}, // Empty means unset.
24+
25+
// Valid cases.
26+
{in: "0", out: &unix.CPUSet{1}},
27+
{in: "1", out: &unix.CPUSet{2}},
28+
{in: "0-1", out: &unix.CPUSet{3}},
29+
{in: "0,1", out: &unix.CPUSet{3}},
30+
{in: ",0,1,", out: &unix.CPUSet{3}},
31+
{in: "0-3", out: &unix.CPUSet{0x0f}},
32+
{in: "0,1,2-3", out: &unix.CPUSet{0x0f}},
33+
{in: "4-7", out: &unix.CPUSet{0xf0}},
34+
{in: "0-7", out: &unix.CPUSet{0xff}},
35+
{in: "0-15", out: &unix.CPUSet{0xffff}},
36+
{in: "16", out: &unix.CPUSet{0x10000}},
37+
// Extra whitespace in between ranges are OK.
38+
{in: "1, 2, 1-2", out: &unix.CPUSet{6}},
39+
{in: " , 1 , 3 , 5-7, ", out: &unix.CPUSet{0xea}},
40+
// Somewhat large values. The underlying type in unix.CPUSet
41+
// can either be uint32 or uint64, so we have to use a helper.
42+
{in: "0-3,32-33", out: set(0, 1, 2, 3, 32, 33)},
43+
{in: "127-129, 1", out: set(1, 127, 128, 129)},
44+
{in: "1023", out: set(1023)},
45+
46+
// Error cases.
47+
{in: "-", isErr: true},
48+
{in: "1-", isErr: true},
49+
{in: "-3", isErr: true},
50+
{in: ",", isErr: true},
51+
{in: " ", isErr: true},
52+
// Bad range (start > end).
53+
{in: "54-53", isErr: true},
54+
// Extra spaces inside a range is not OK.
55+
{in: "1 - 2", isErr: true},
56+
{in: "1024", isErr: true}, // Too big for unix.CPUSet.
57+
}
58+
59+
for _, tc := range testCases {
60+
tc := tc
61+
t.Run(tc.in, func(t *testing.T) {
62+
out, err := toCPUSet(tc.in)
63+
t.Logf("toCPUSet(%q) = %v (error: %v)", tc.in, out, err)
64+
// Check the error.
65+
if tc.isErr {
66+
if err == nil {
67+
t.Error("want error, got nil")
68+
}
69+
return // No more checks.
70+
}
71+
if err != nil {
72+
t.Fatalf("want no error, got %v", err)
73+
}
74+
// Check the value.
75+
if tc.out == nil {
76+
if out != nil {
77+
t.Fatalf("want nil, got %v", out)
78+
}
79+
return // No more checks.
80+
}
81+
if out == nil {
82+
t.Fatalf("want %v, got nil", tc.out)
83+
}
84+
if *out != *tc.out {
85+
t.Errorf("case %q: want %v, got %v", tc.in, tc.out, out)
86+
}
87+
})
88+
}
89+
}

libcontainer/container_linux.go

+4
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,7 @@ func (c *Container) newInitConfig(process *Process) *initConfig {
709709
Rlimits: c.config.Rlimits,
710710
IOPriority: c.config.IOPriority,
711711
Scheduler: c.config.Scheduler,
712+
CPUAffinity: c.config.ExecCPUAffinity,
712713
CreateConsole: process.ConsoleSocket != nil,
713714
ConsoleWidth: process.ConsoleWidth,
714715
ConsoleHeight: process.ConsoleHeight,
@@ -737,6 +738,9 @@ func (c *Container) newInitConfig(process *Process) *initConfig {
737738
if process.Scheduler != nil {
738739
cfg.Scheduler = process.Scheduler
739740
}
741+
if process.CPUAffinity != nil {
742+
cfg.CPUAffinity = process.CPUAffinity
743+
}
740744

741745
// Set misc properties.
742746

libcontainer/init_linux.go

+1
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ type initConfig struct {
8383
Rlimits []configs.Rlimit `json:"rlimits"`
8484
IOPriority *configs.IOPriority `json:"io_priority,omitempty"`
8585
Scheduler *configs.Scheduler `json:"scheduler,omitempty"`
86+
CPUAffinity *configs.CPUAffinity `json:"cpu_affinity,omitempty"`
8687

8788
// Miscellaneous properties, filled in by [Container.newInitConfig]
8889
// unless documented otherwise.

libcontainer/nsenter/log.c

+7-2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@ void setup_logpipe(void)
3131
loglevel = i;
3232
}
3333

34+
bool log_enabled_for(int level)
35+
{
36+
return (logfd >= 0 && level <= loglevel);
37+
}
38+
3439
/* Defined in nsexec.c */
3540
extern int current_stage;
3641

@@ -40,8 +45,8 @@ void write_log(int level, const char *format, ...)
4045
va_list args;
4146
int ret;
4247

43-
if (logfd < 0 || level > loglevel)
44-
goto out;
48+
if (!log_enabled_for(level))
49+
return;
4550

4651
va_start(args, format);
4752
ret = vasprintf(&message, format, args);

libcontainer/nsenter/log.h

+3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#ifndef NSENTER_LOG_H
22
#define NSENTER_LOG_H
33

4+
#include <stdbool.h>
45
#include <stdio.h>
56

67
/*
@@ -20,6 +21,8 @@
2021
*/
2122
void setup_logpipe(void);
2223

24+
bool log_enabled_for(int level);
25+
2326
void write_log(int level, const char *format, ...) __attribute__((format(printf, 2, 3)));
2427

2528
extern int logfd;

libcontainer/nsenter/nsexec.c

+31
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,28 @@ static void update_timens_offsets(pid_t pid, char *map, size_t map_len)
673673
bail("failed to update /proc/%d/timens_offsets", pid);
674674
}
675675

676+
static void log_cpu_affinity()
677+
{
678+
cpu_set_t cpus = { };
679+
size_t i, mask = 0;
680+
681+
if (!log_enabled_for(DEBUG))
682+
return;
683+
684+
if (sched_getaffinity(0, sizeof(cpus), &cpus) < 0) {
685+
write_log(WARNING, "sched_getaffinity: %m");
686+
return;
687+
}
688+
689+
/* Do not print the complete mask, we only need a few first CPUs. */
690+
for (i = 0; i < sizeof(mask) * 8; i++) {
691+
if (CPU_ISSET(i, &cpus))
692+
mask |= 1 << i;
693+
}
694+
695+
write_log(DEBUG, "affinity: 0x%zx", mask);
696+
}
697+
676698
void nsexec(void)
677699
{
678700
int pipenum;
@@ -699,6 +721,15 @@ void nsexec(void)
699721

700722
write_log(DEBUG, "=> nsexec container setup");
701723

724+
/* Log initial CPU affinity, this is solely for the tests in
725+
* ../../tests/integration/cpu_affinity.bats.
726+
*
727+
* Logging this from Go code might be too late as some kernels
728+
* change the process' CPU affinity to that of container's cpuset
729+
* as soon as the process is moved into container's cgroup.
730+
*/
731+
log_cpu_affinity();
732+
702733
/* Parse all of the netlink configuration. */
703734
nl_parse(pipenum, &config);
704735

libcontainer/process.go

+2
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ type Process struct {
121121
//
122122
// If not empty, takes precedence over container's [configs.Config.IOPriority].
123123
IOPriority *configs.IOPriority
124+
125+
CPUAffinity *configs.CPUAffinity
124126
}
125127

126128
// Wait waits for the process to exit.

0 commit comments

Comments
 (0)