Skip to content

Commit 80643c5

Browse files
committed
runc exec: implement CPU affinity
As per - opencontainers/runtime-spec#1253 - opencontainers/runtime-spec#1261 Add some tests (alas it's impossible to test initial CPU affinity without adding debug logging to nsexec). Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
1 parent 6c749bb commit 80643c5

File tree

11 files changed

+278
-7
lines changed

11 files changed

+278
-7
lines changed

libcontainer/configs/config.go

+72
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,11 @@ package configs
33
import (
44
"bytes"
55
"encoding/json"
6+
"errors"
67
"fmt"
78
"os/exec"
9+
"strconv"
10+
"strings"
811
"time"
912

1013
"github.com/sirupsen/logrus"
@@ -225,6 +228,9 @@ type Config struct {
225228

226229
// IOPriority is the container's I/O priority.
227230
IOPriority *IOPriority `json:"io_priority,omitempty"`
231+
232+
// ExecCPUAffinity is CPU affinity for a non-init process to be run in the container.
233+
ExecCPUAffinity *CPUAffinity `json:"exec_cpu_affinity,omitempty"`
228234
}
229235

230236
// Scheduler is based on the Linux sched_setattr(2) syscall.
@@ -288,6 +294,72 @@ func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) {
288294

289295
type IOPriority = specs.LinuxIOPriority
290296

297+
type CPUAffinity struct {
298+
Initial, Final *unix.CPUSet
299+
}
300+
301+
func toCPUSet(str string) (*unix.CPUSet, error) {
302+
if str == "" {
303+
return nil, nil
304+
}
305+
s := new(unix.CPUSet)
306+
for _, r := range strings.Split(str, ",") {
307+
// Allow extra spaces around.
308+
r = strings.TrimSpace(r)
309+
// Allow empty elements (extra commas).
310+
if r == "" {
311+
continue
312+
}
313+
if r0, r1, found := strings.Cut(r, "-"); found {
314+
start, err := strconv.ParseUint(r0, 10, 32)
315+
if err != nil {
316+
return nil, err
317+
}
318+
end, err := strconv.ParseUint(r1, 10, 32)
319+
if err != nil {
320+
return nil, err
321+
}
322+
if start > end {
323+
return nil, errors.New("invalid range: " + r)
324+
}
325+
for i := int(start); i <= int(end); i++ {
326+
s.Set(i)
327+
}
328+
} else {
329+
val, err := strconv.ParseUint(r, 10, 32)
330+
if err != nil {
331+
return nil, err
332+
}
333+
s.Set(int(val))
334+
}
335+
}
336+
337+
return s, nil
338+
}
339+
340+
// ConvertCPUAffinity converts [specs.CPUAffinity] to [CPUAffinity].
341+
func ConvertCPUAffinity(sa *specs.CPUAffinity) (*CPUAffinity, error) {
342+
if sa == nil {
343+
return nil, nil
344+
}
345+
initial, err := toCPUSet(sa.Initial)
346+
if err != nil {
347+
return nil, fmt.Errorf("bad CPUAffinity.Initial: %w", err)
348+
}
349+
final, err := toCPUSet(sa.Final)
350+
if err != nil {
351+
return nil, fmt.Errorf("bad CPUAffinity.Final: %w", err)
352+
}
353+
if initial == nil && final == nil {
354+
return nil, nil
355+
}
356+
357+
return &CPUAffinity{
358+
Initial: initial,
359+
Final: final,
360+
}, nil
361+
}
362+
291363
type (
292364
HookName string
293365
HookList []Hook

libcontainer/container_linux.go

+4
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,7 @@ func (c *Container) newInitConfig(process *Process) *initConfig {
697697
AppArmorProfile: c.config.AppArmorProfile,
698698
ProcessLabel: c.config.ProcessLabel,
699699
Rlimits: c.config.Rlimits,
700+
CPUAffinity: c.config.ExecCPUAffinity,
700701
CreateConsole: process.ConsoleSocket != nil,
701702
ConsoleWidth: process.ConsoleWidth,
702703
ConsoleHeight: process.ConsoleHeight,
@@ -713,6 +714,9 @@ func (c *Container) newInitConfig(process *Process) *initConfig {
713714
if len(process.Rlimits) > 0 {
714715
cfg.Rlimits = process.Rlimits
715716
}
717+
if process.CPUAffinity != nil {
718+
cfg.CPUAffinity = process.CPUAffinity
719+
}
716720
if cgroups.IsCgroup2UnifiedMode() {
717721
cfg.Cgroup2Path = c.cgroupManager.Path("")
718722
}

libcontainer/init_linux.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ type initConfig struct {
7171
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
7272
SpecState *specs.State `json:"spec_state,omitempty"`
7373
Cgroup2Path string `json:"cgroup2_path,omitempty"`
74+
CPUAffinity *configs.CPUAffinity `json:"cpu_affinity,omitempty"`
7475
}
7576

7677
// Init is part of "runc init" implementation.
@@ -150,7 +151,7 @@ func startInitialization() (retErr error) {
150151

151152
logrus.SetOutput(logPipe)
152153
logrus.SetFormatter(new(logrus.JSONFormatter))
153-
logrus.Debug("child process in init()")
154+
logrus.Debugf("child process in init()")
154155

155156
// Only init processes have FIFOFD.
156157
var fifoFile *os.File

libcontainer/nsenter/log.c

+7-2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@ void setup_logpipe(void)
3131
loglevel = i;
3232
}
3333

34+
bool log_enabled_for(int level)
35+
{
36+
return (logfd >= 0 && level <= loglevel);
37+
}
38+
3439
/* Defined in nsexec.c */
3540
extern int current_stage;
3641

@@ -40,8 +45,8 @@ void write_log(int level, const char *format, ...)
4045
va_list args;
4146
int ret;
4247

43-
if (logfd < 0 || level > loglevel)
44-
goto out;
48+
if (!log_enabled_for(level))
49+
return;
4550

4651
va_start(args, format);
4752
ret = vasprintf(&message, format, args);

libcontainer/nsenter/log.h

+3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#ifndef NSENTER_LOG_H
22
#define NSENTER_LOG_H
33

4+
#include <stdbool.h>
45
#include <stdio.h>
56

67
/*
@@ -20,6 +21,8 @@
2021
*/
2122
void setup_logpipe(void);
2223

24+
bool log_enabled_for(int level);
25+
2326
void write_log(int level, const char *format, ...) __attribute__((format(printf, 2, 3)));
2427

2528
extern int logfd;

libcontainer/nsenter/nsexec.c

+29
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,25 @@ static void update_timens_offsets(pid_t pid, char *map, size_t map_len)
673673
bail("failed to update /proc/%d/timens_offsets", pid);
674674
}
675675

676+
void print_cpu_affinity()
677+
{
678+
cpu_set_t cpus = { };
679+
size_t i, mask = 0;
680+
681+
if (sched_getaffinity(0, sizeof(cpus), &cpus) < 0) {
682+
write_log(WARNING, "sched_getaffinity: %m");
683+
return;
684+
}
685+
686+
/* Do not print the complete mask, we only need a few first CPUs. */
687+
for (i = 0; i < sizeof(mask) * 8; i++) {
688+
if (CPU_ISSET(i, &cpus))
689+
mask |= 1 << i;
690+
}
691+
692+
write_log(DEBUG, "affinity: 0x%zx", mask);
693+
}
694+
676695
void nsexec(void)
677696
{
678697
int pipenum;
@@ -699,6 +718,16 @@ void nsexec(void)
699718

700719
write_log(DEBUG, "=> nsexec container setup");
701720

721+
/* This is for ../../tests/integration/cpu_affinity.bats test only.
722+
*
723+
* Printing this from Go code might be too late as some kernels
724+
* change the process' CPU affinity to that of container's cpuset
725+
* as soon as the process is moved into container's cgroup.
726+
*/
727+
if (log_enabled_for(DEBUG)) {
728+
print_cpu_affinity();
729+
}
730+
702731
/* Parse all of the netlink configuration. */
703732
nl_parse(pipenum, &config);
704733

libcontainer/process.go

+2
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ type Process struct {
102102
Scheduler *configs.Scheduler
103103

104104
IOPriority *configs.IOPriority
105+
106+
CPUAffinity *configs.CPUAffinity
105107
}
106108

107109
// Wait waits for the process to exit.

libcontainer/process_linux.go

+47-4
Original file line numberDiff line numberDiff line change
@@ -163,13 +163,53 @@ type setnsProcess struct {
163163
initProcessPid int
164164
}
165165

166+
// Starts setns process with specified initial CPU affinity.
167+
func (p *setnsProcess) startWithCPUAffinity() error {
168+
aff := p.config.CPUAffinity
169+
if aff == nil || aff.Initial == nil {
170+
return p.cmd.Start()
171+
}
172+
errCh := make(chan error)
173+
defer close(errCh)
174+
175+
// Use a goroutine to dedicate an OS thread.
176+
go func() {
177+
runtime.LockOSThread()
178+
// Command inherits the CPU affinity.
179+
if err := unix.SchedSetaffinity(unix.Gettid(), aff.Initial); err != nil {
180+
runtime.UnlockOSThread()
181+
errCh <- fmt.Errorf("error setting initial CPU affinity: %w", err)
182+
return
183+
}
184+
185+
errCh <- p.cmd.Start()
186+
// Deliberately omit runtime.UnlockOSThread here.
187+
// https://pkg.go.dev/runtime#LockOSThread says:
188+
// "If the calling goroutine exits without unlocking the
189+
// thread, the thread will be terminated".
190+
}()
191+
192+
return <-errCh
193+
}
194+
195+
func (p *setnsProcess) setFinalCPUAffinity() error {
196+
aff := p.config.CPUAffinity
197+
if aff == nil || aff.Final == nil {
198+
return nil
199+
}
200+
if err := unix.SchedSetaffinity(p.pid(), aff.Final); err != nil {
201+
return fmt.Errorf("error setting final CPU affinity: %w", err)
202+
}
203+
return nil
204+
}
205+
166206
func (p *setnsProcess) start() (retErr error) {
167207
defer p.comm.closeParent()
168208

169-
// get the "before" value of oom kill count
209+
// Get the "before" value of oom kill count.
170210
oom, _ := p.manager.OOMKillCount()
171-
err := p.cmd.Start()
172-
// close the child-side of the pipes (controlled by child)
211+
err := p.startWithCPUAffinity()
212+
// Close the child-side of the pipes (controlled by child).
173213
p.comm.closeChild()
174214
if err != nil {
175215
return fmt.Errorf("error starting setns process: %w", err)
@@ -219,6 +259,10 @@ func (p *setnsProcess) start() (retErr error) {
219259
}
220260
}
221261
}
262+
// Set final CPU affinity right after the process is moved into container's cgroup.
263+
if err := p.setFinalCPUAffinity(); err != nil {
264+
return err
265+
}
222266
if p.intelRdtPath != "" {
223267
// if Intel RDT "resource control" filesystem path exists
224268
_, err := os.Stat(p.intelRdtPath)
@@ -228,7 +272,6 @@ func (p *setnsProcess) start() (retErr error) {
228272
}
229273
}
230274
}
231-
232275
if err := utils.WriteJSON(p.comm.initSockParent, p.config); err != nil {
233276
return fmt.Errorf("error writing config to pipe: %w", err)
234277
}

libcontainer/specconv/spec_linux.go

+5
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,11 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
556556
ioPriority := *spec.Process.IOPriority
557557
config.IOPriority = &ioPriority
558558
}
559+
config.ExecCPUAffinity, err = configs.ConvertCPUAffinity(spec.Process.ExecCPUAffinity)
560+
if err != nil {
561+
return nil, err
562+
}
563+
559564
}
560565
createHooks(spec, config)
561566
config.Version = specs.Version

0 commit comments

Comments
 (0)