Skip to content

Commit 770728e

Browse files
utam0klifubang
andcommitted
Support process.scheduler
Spec: opencontainers/runtime-spec#1188 Fix: #3895 Co-authored-by: lifubang <lifubang@acmcoder.com> Signed-off-by: utam0k <k0ma@utam0k.jp> Signed-off-by: lifubang <lifubang@acmcoder.com>
1 parent 634280f commit 770728e

11 files changed

+208
-1
lines changed

docs/spec-conformance.md

-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ v1.0.0 | `SCMP_ARCH_PARISC64` | Unplanned, due to lack
1212
v1.0.2 | `.linux.personality` | [#3126](https://github.com/opencontainers/runc/pull/3126)
1313
v1.1.0 | `SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV` | [#3862](https://github.com/opencontainers/runc/pull/3862)
1414
v1.1.0 | rsvd hugetlb cgroup | TODO ([#3859](https://github.com/opencontainers/runc/issues/3859))
15-
v1.1.0 | `.process.scheduler` | TODO ([#3895](https://github.com/opencontainers/runc/issues/3895))
1615
v1.1.0 | `.process.ioPriority` | [#3783](https://github.com/opencontainers/runc/pull/3783)
1716

1817

libcontainer/configs/config.go

+63
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"time"
99

1010
"github.com/sirupsen/logrus"
11+
"golang.org/x/sys/unix"
1112

1213
"github.com/opencontainers/runc/libcontainer/devices"
1314
"github.com/opencontainers/runtime-spec/specs-go"
@@ -219,6 +220,68 @@ type Config struct {
219220

220221
// TimeOffsets specifies the offset for supporting time namespaces.
221222
TimeOffsets map[string]specs.LinuxTimeOffset `json:"time_offsets,omitempty"`
223+
224+
// Scheduler represents the scheduling attributes for a process.
225+
Scheduler *Scheduler `json:"scheduler,omitempty"`
226+
}
227+
228+
// Scheduler is based on the Linux sched_setattr(2) syscall.
229+
type Scheduler = specs.Scheduler
230+
231+
// ToSchedAttr is to convert *configs.Scheduler to *unix.SchedAttr.
232+
func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) {
233+
var policy uint32
234+
switch scheduler.Policy {
235+
case specs.SchedOther:
236+
policy = 0
237+
case specs.SchedFIFO:
238+
policy = 1
239+
case specs.SchedRR:
240+
policy = 2
241+
case specs.SchedBatch:
242+
policy = 3
243+
case specs.SchedISO:
244+
policy = 4
245+
case specs.SchedIdle:
246+
policy = 5
247+
case specs.SchedDeadline:
248+
policy = 6
249+
default:
250+
return nil, fmt.Errorf("invalid scheduler policy: %s", scheduler.Policy)
251+
}
252+
253+
var flags uint64
254+
for _, flag := range scheduler.Flags {
255+
switch flag {
256+
case specs.SchedFlagResetOnFork:
257+
flags |= 0x01
258+
case specs.SchedFlagReclaim:
259+
flags |= 0x02
260+
case specs.SchedFlagDLOverrun:
261+
flags |= 0x04
262+
case specs.SchedFlagKeepPolicy:
263+
flags |= 0x08
264+
case specs.SchedFlagKeepParams:
265+
flags |= 0x10
266+
case specs.SchedFlagUtilClampMin:
267+
flags |= 0x20
268+
case specs.SchedFlagUtilClampMax:
269+
flags |= 0x40
270+
default:
271+
return nil, fmt.Errorf("invalid scheduler flag: %s", flag)
272+
}
273+
}
274+
275+
return &unix.SchedAttr{
276+
Size: unix.SizeofSchedAttr,
277+
Policy: policy,
278+
Flags: flags,
279+
Nice: scheduler.Nice,
280+
Priority: uint32(scheduler.Priority),
281+
Runtime: scheduler.Runtime,
282+
Deadline: scheduler.Deadline,
283+
Period: scheduler.Period,
284+
}, nil
222285
}
223286

224287
type (

libcontainer/configs/validate/validator.go

+23
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"github.com/opencontainers/runc/libcontainer/cgroups"
1212
"github.com/opencontainers/runc/libcontainer/configs"
1313
"github.com/opencontainers/runc/libcontainer/intelrdt"
14+
"github.com/opencontainers/runtime-spec/specs-go"
1415
selinux "github.com/opencontainers/selinux/go-selinux"
1516
"github.com/sirupsen/logrus"
1617
"golang.org/x/sys/unix"
@@ -30,6 +31,7 @@ func Validate(config *configs.Config) error {
3031
intelrdtCheck,
3132
rootlessEUIDCheck,
3233
mountsStrict,
34+
scheduler,
3335
}
3436
for _, c := range checks {
3537
if err := c(config); err != nil {
@@ -353,3 +355,24 @@ func isHostNetNS(path string) (bool, error) {
353355

354356
return (st1.Dev == st2.Dev) && (st1.Ino == st2.Ino), nil
355357
}
358+
359+
// scheduler is to validate scheduler configs according to https://man7.org/linux/man-pages/man2/sched_setattr.2.html
360+
func scheduler(config *configs.Config) error {
361+
s := config.Scheduler
362+
if s == nil {
363+
return nil
364+
}
365+
if s.Policy == "" {
366+
return errors.New("scheduler policy is required")
367+
}
368+
if s.Nice < -20 || s.Nice > 19 {
369+
return fmt.Errorf("invalid scheduler.nice: %d", s.Nice)
370+
}
371+
if s.Priority != 0 && (s.Policy != specs.SchedFIFO && s.Policy != specs.SchedRR) {
372+
return errors.New("scheduler.priority can only be specified for SchedFIFO or SchedRR policy")
373+
}
374+
if s.Policy != specs.SchedDeadline && (s.Runtime != 0 || s.Deadline != 0 || s.Period != 0) {
375+
return errors.New("scheduler runtime/deadline/period can only be specified for SchedDeadline policy")
376+
}
377+
return nil
378+
}

libcontainer/configs/validate/validator_test.go

+50
Original file line numberDiff line numberDiff line change
@@ -616,3 +616,53 @@ func TestValidateIDMapMounts(t *testing.T) {
616616
})
617617
}
618618
}
619+
620+
func TestValidateScheduler(t *testing.T) {
621+
testCases := []struct {
622+
isErr bool
623+
policy string
624+
niceValue int32
625+
priority int32
626+
runtime uint64
627+
deadline uint64
628+
period uint64
629+
}{
630+
{isErr: true, niceValue: 0},
631+
{isErr: false, policy: "SCHED_OTHER", niceValue: 19},
632+
{isErr: false, policy: "SCHED_OTHER", niceValue: -20},
633+
{isErr: true, policy: "SCHED_OTHER", niceValue: 20},
634+
{isErr: true, policy: "SCHED_OTHER", niceValue: -21},
635+
{isErr: true, policy: "SCHED_OTHER", priority: 100},
636+
{isErr: false, policy: "SCHED_FIFO", priority: 100},
637+
{isErr: true, policy: "SCHED_FIFO", runtime: 20},
638+
{isErr: true, policy: "SCHED_BATCH", deadline: 30},
639+
{isErr: true, policy: "SCHED_IDLE", period: 40},
640+
{isErr: true, policy: "SCHED_DEADLINE", priority: 100},
641+
{isErr: false, policy: "SCHED_DEADLINE", runtime: 200},
642+
{isErr: false, policy: "SCHED_DEADLINE", deadline: 300},
643+
{isErr: false, policy: "SCHED_DEADLINE", period: 400},
644+
}
645+
646+
for _, tc := range testCases {
647+
scheduler := configs.Scheduler{
648+
Policy: specs.LinuxSchedulerPolicy(tc.policy),
649+
Nice: tc.niceValue,
650+
Priority: tc.priority,
651+
Runtime: tc.runtime,
652+
Deadline: tc.deadline,
653+
Period: tc.period,
654+
}
655+
config := &configs.Config{
656+
Rootfs: "/var",
657+
Scheduler: &scheduler,
658+
}
659+
660+
err := Validate(config)
661+
if tc.isErr && err == nil {
662+
t.Errorf("scheduler: %d, expected error, got nil", tc.niceValue)
663+
}
664+
if !tc.isErr && err != nil {
665+
t.Errorf("scheduler: %d, expected nil, got error %v", tc.niceValue, err)
666+
}
667+
}
668+
}

libcontainer/init_linux.go

+14
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,20 @@ func setupRlimits(limits []configs.Rlimit, pid int) error {
640640
return nil
641641
}
642642

643+
func setupScheduler(config *configs.Config) error {
644+
attr, err := configs.ToSchedAttr(config.Scheduler)
645+
if err != nil {
646+
return err
647+
}
648+
if err := unix.SchedSetAttr(0, attr, 0); err != nil {
649+
if errors.Is(err, unix.EPERM) && config.Cgroups.CpusetCpus != "" {
650+
return errors.New("process scheduler can't be used together with AllowedCPUs")
651+
}
652+
return fmt.Errorf("error setting scheduler: %w", err)
653+
}
654+
return nil
655+
}
656+
643657
// signalAllProcesses freezes then iterates over all the processes inside the
644658
// manager's cgroups sending the signal s to them.
645659
func signalAllProcesses(m cgroups.Manager, s unix.Signal) error {

libcontainer/process.go

+2
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ type Process struct {
9595
//
9696
// For cgroup v2, the only key allowed is "".
9797
SubCgroupPaths map[string]string
98+
99+
Scheduler *configs.Scheduler
98100
}
99101

100102
// Wait waits for the process to exit.

libcontainer/setns_init_linux.go

+6
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,12 @@ func (l *linuxSetnsInit) Init() error {
6565
unix.Umask(int(*l.config.Config.Umask))
6666
}
6767

68+
if l.config.Config.Scheduler != nil {
69+
if err := setupScheduler(l.config.Config); err != nil {
70+
return err
71+
}
72+
}
73+
6874
if err := selinux.SetExecLabel(l.config.ProcessLabel); err != nil {
6975
return err
7076
}

libcontainer/specconv/spec_linux.go

+4
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,10 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
494494
Ambient: spec.Process.Capabilities.Ambient,
495495
}
496496
}
497+
if spec.Process.Scheduler != nil {
498+
s := *spec.Process.Scheduler
499+
config.Scheduler = &s
500+
}
497501
}
498502
createHooks(spec, config)
499503
config.Version = specs.Version

libcontainer/standard_init_linux.go

+7
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,13 @@ func (l *linuxStandardInit) Init() error {
159159
return &os.SyscallError{Syscall: "prctl(SET_NO_NEW_PRIVS)", Err: err}
160160
}
161161
}
162+
163+
if l.config.Config.Scheduler != nil {
164+
if err := setupScheduler(l.config.Config); err != nil {
165+
return err
166+
}
167+
}
168+
162169
// Tell our parent that we're ready to Execv. This must be done before the
163170
// Seccomp rules have been applied, because we need to be able to read and
164171
// write to a socket.

tests/integration/scheduler.bats

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/usr/bin/env bats
2+
3+
load helpers
4+
5+
function setup() {
6+
requires root
7+
setup_debian
8+
}
9+
10+
function teardown() {
11+
teardown_bundle
12+
}
13+
14+
@test "scheduler is applied" {
15+
update_config ' .process.scheduler = {"policy": "SCHED_DEADLINE", "nice": 19, "priority": 0, "runtime": 42000, "deadline": 1000000, "period": 1000000, }'
16+
17+
runc run -d --console-socket "$CONSOLE_SOCKET" test_scheduler
18+
[ "$status" -eq 0 ]
19+
20+
runc exec test_scheduler chrt -p 1
21+
[ "$status" -eq 0 ]
22+
23+
[[ "${lines[0]}" == *"scheduling policy: SCHED_DEADLINE" ]]
24+
[[ "${lines[1]}" == *"priority: 0" ]]
25+
[[ "${lines[2]}" == *"runtime/deadline/period parameters: 42000/1000000/1000000" ]]
26+
}
27+
28+
@test "scheduler vs cpus" {
29+
update_config ' .linux.resources.cpu.cpus = "0"
30+
| .process.scheduler = {"policy": "SCHED_DEADLINE", "nice": 19, "runtime": 42000, "deadline": 1000000, "period": 1000000, }'
31+
32+
runc run -d --console-socket "$CONSOLE_SOCKET" test_scheduler
33+
[ "$status" -eq 1 ]
34+
}

utils_linux.go

+5
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) {
6161
lp.ConsoleHeight = uint16(p.ConsoleSize.Height)
6262
}
6363

64+
if p.Scheduler != nil {
65+
s := *p.Scheduler
66+
lp.Scheduler = &s
67+
}
68+
6469
if p.Capabilities != nil {
6570
lp.Capabilities = &configs.Capabilities{}
6671
lp.Capabilities.Bounding = p.Capabilities.Bounding

0 commit comments

Comments
 (0)