Skip to content

Commit 2cfe3c2

Browse files
committed
libct: use pidfd and epoll to wait the init process exit
Signed-off-by: lifubang <lifubang@acmcoder.com>
1 parent 854fb52 commit 2cfe3c2

File tree

3 files changed

+90
-15
lines changed

3 files changed

+90
-15
lines changed

delete.go

+6-13
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,16 @@ import (
55
"fmt"
66
"os"
77
"path/filepath"
8-
"time"
98

109
"github.com/opencontainers/runc/libcontainer"
1110
"github.com/urfave/cli"
12-
13-
"golang.org/x/sys/unix"
1411
)
1512

16-
func killContainer(container *libcontainer.Container) error {
17-
_ = container.Signal(unix.SIGKILL)
18-
for i := 0; i < 100; i++ {
19-
time.Sleep(100 * time.Millisecond)
20-
if err := container.Signal(unix.Signal(0)); err != nil {
21-
return container.Destroy()
22-
}
13+
func killAndDestroy(container *libcontainer.Container) error {
14+
if err := container.EnsureKilled(); err != nil {
15+
return err
2316
}
24-
return errors.New("container init still running")
17+
return container.Destroy()
2518
}
2619

2720
var deleteCommand = cli.Command{
@@ -71,7 +64,7 @@ status of "ubuntu01" as "stopped" the following will delete resources held for
7164
// namespace) there may be some leftover processes in the
7265
// container's cgroup.
7366
if force {
74-
return killContainer(container)
67+
return killAndDestroy(container)
7568
}
7669
s, err := container.Status()
7770
if err != nil {
@@ -81,7 +74,7 @@ status of "ubuntu01" as "stopped" the following will delete resources held for
8174
case libcontainer.Stopped:
8275
return container.Destroy()
8376
case libcontainer.Created:
84-
return killContainer(container)
77+
return killAndDestroy(container)
8578
default:
8679
return fmt.Errorf("cannot delete container %s that is not stopped: %s", id, s)
8780
}

libcontainer/README.md

+3
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,9 @@ container.Resume()
230230
// send signal to container's init process.
231231
container.Signal(signal)
232232

233+
// send signal to container's init process and waits for the kernel to finish killing it.
234+
container.EnsureKilled()
235+
233236
// update container resource constraints.
234237
container.Set(config)
235238

libcontainer/container_linux.go

+81-2
Original file line numberDiff line numberDiff line change
@@ -377,9 +377,13 @@ func (c *Container) start(process *Process) (retErr error) {
377377

378378
// Signal sends a specified signal to container's init.
379379
//
380-
// When s is SIGKILL and the container does not have its own PID namespace, all
381-
// the container's processes are killed. In this scenario, the libcontainer
380+
// When s is SIGKILL:
381+
// 1. If the container does not have its own PID namespace, all the
382+
// container's processes are killed. In this scenario, the libcontainer
382383
// user may be required to implement a proper child reaper.
384+
// 2. Otherwise, we just send the SIGKILL signal to the init process,
385+
// but we don't wait for the init process to disappear. If you want to
386+
// wait, please use c.KillAndWaitExit instead.
383387
func (c *Container) Signal(s os.Signal) error {
384388
c.m.Lock()
385389
defer c.m.Unlock()
@@ -431,6 +435,81 @@ func (c *Container) signal(s os.Signal) error {
431435
return nil
432436
}
433437

438+
func (c *Container) killViaPidfd() error {
439+
pidfd, err := unix.PidfdOpen(c.initProcess.pid(), 0)
440+
if err != nil {
441+
return err
442+
}
443+
defer unix.Close(pidfd)
444+
445+
epollfd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC)
446+
if err != nil {
447+
return err
448+
}
449+
defer unix.Close(epollfd)
450+
451+
event := unix.EpollEvent{
452+
Events: unix.EPOLLIN,
453+
Fd: int32(pidfd),
454+
}
455+
if err := unix.EpollCtl(epollfd, unix.EPOLL_CTL_ADD, pidfd, &event); err != nil {
456+
return err
457+
}
458+
459+
if err := unix.PidfdSendSignal(pidfd, unix.SIGKILL, nil, 0); err != nil {
460+
return err
461+
}
462+
463+
events := make([]unix.EpollEvent, 1)
464+
for {
465+
// Set the timeout to 10s, the same as in kill below.
466+
n, err := unix.EpollWait(epollfd, events, 10000)
467+
if err != nil {
468+
if err == unix.EINTR {
469+
continue
470+
}
471+
return err
472+
}
473+
474+
if n == 0 {
475+
return errors.New("container init still running")
476+
}
477+
478+
if n > 0 {
479+
event := events[0]
480+
if event.Fd == int32(pidfd) {
481+
return nil
482+
}
483+
}
484+
}
485+
}
486+
487+
func (c *Container) kill() error {
488+
_ = c.Signal(unix.SIGKILL)
489+
for i := 0; i < 100; i++ {
490+
time.Sleep(100 * time.Millisecond)
491+
if err := c.Signal(unix.Signal(0)); err != nil {
492+
return nil
493+
}
494+
}
495+
return errors.New("container init still running")
496+
}
497+
498+
// EnsureKilled kills the container and waits for the kernel to finish killing it.
499+
func (c *Container) EnsureKilled() error {
500+
// When a container doesn't have a private pidns, we have to kill all processes
501+
// in the cgroup, it's more simpler to use `cgroup.kill` or `unix.Kill`.
502+
if c.config.Namespaces.IsPrivate(configs.NEWPID) {
503+
err := c.killViaPidfd()
504+
if err == nil {
505+
return nil
506+
}
507+
508+
logrus.Debugf("pidfd & epoll failed, falling back to unix.Signal: %v", err)
509+
}
510+
return c.kill()
511+
}
512+
434513
func (c *Container) createExecFifo() (retErr error) {
435514
rootuid, err := c.config.HostRootUID()
436515
if err != nil {

0 commit comments

Comments
 (0)