Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

libct/cg/stats: support PSI for cgroup v2 #3900

Merged
merged 1 commit into from
Jul 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions events.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
s.CPU.Throttling.Periods = cg.CpuStats.ThrottlingData.Periods
s.CPU.Throttling.ThrottledPeriods = cg.CpuStats.ThrottlingData.ThrottledPeriods
s.CPU.Throttling.ThrottledTime = cg.CpuStats.ThrottlingData.ThrottledTime
s.CPU.PSI = cg.CpuStats.PSI

s.CPUSet = types.CPUSet(cg.CPUSetStats)

Expand All @@ -138,6 +139,7 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
s.Memory.Swap = convertMemoryEntry(cg.MemoryStats.SwapUsage)
s.Memory.Usage = convertMemoryEntry(cg.MemoryStats.Usage)
s.Memory.Raw = cg.MemoryStats.Stats
s.Memory.PSI = cg.MemoryStats.PSI

s.Blkio.IoServiceBytesRecursive = convertBlkioEntry(cg.BlkioStats.IoServiceBytesRecursive)
s.Blkio.IoServicedRecursive = convertBlkioEntry(cg.BlkioStats.IoServicedRecursive)
Expand All @@ -147,6 +149,7 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
s.Blkio.IoMergedRecursive = convertBlkioEntry(cg.BlkioStats.IoMergedRecursive)
s.Blkio.IoTimeRecursive = convertBlkioEntry(cg.BlkioStats.IoTimeRecursive)
s.Blkio.SectorsRecursive = convertBlkioEntry(cg.BlkioStats.SectorsRecursive)
s.Blkio.PSI = cg.BlkioStats.PSI

s.Hugetlb = make(map[string]types.Hugetlb)
for k, v := range cg.HugetlbStats {
Expand Down
11 changes: 11 additions & 0 deletions libcontainer/cgroups/fs2/fs2.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,17 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
if err := statCpu(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
}
// PSI (since kernel 4.20).
var err error
Copy link
Member

@lifubang lifubang Jul 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kolyshkin I think this one could be optimized. We should write their own PSI state function for cpu, memory and io. For example: add statCpuPressure in cpu.go, add statMemoryPressure in memory.go, add statIOPressure in io.go. Then, we can keep the code style like statCpu.
But this one could be done in a seperate PR if the upstream projects are very anxious to need this feature.

if st.CpuStats.PSI, err = statPSI(m.dirPath, "cpu.pressure"); err != nil {
errs = append(errs, err)
}
if st.MemoryStats.PSI, err = statPSI(m.dirPath, "memory.pressure"); err != nil {
errs = append(errs, err)
}
if st.BlkioStats.PSI, err = statPSI(m.dirPath, "io.pressure"); err != nil {
errs = append(errs, err)
}
// hugetlb (since kernel 5.6)
if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
Expand Down
89 changes: 89 additions & 0 deletions libcontainer/cgroups/fs2/psi.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package fs2

import (
"bufio"
"errors"
"fmt"
"os"
"strconv"
"strings"

"golang.org/x/sys/unix"

"github.com/opencontainers/runc/libcontainer/cgroups"
)

func statPSI(dirPath string, file string) (*cgroups.PSIStats, error) {
f, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
// Kernel < 4.20, or CONFIG_PSI is not set,
// or PSI stats are turned off for the cgroup
// ("echo 0 > cgroup.pressure", kernel >= 6.1).
return nil, nil
}
return nil, err
}
defer f.Close()

var psistats cgroups.PSIStats
sc := bufio.NewScanner(f)
for sc.Scan() {
parts := strings.Fields(sc.Text())
var pv *cgroups.PSIData
switch parts[0] {
case "some":
pv = &psistats.Some
case "full":
pv = &psistats.Full
}
if pv != nil {
*pv, err = parsePSIData(parts[1:])
if err != nil {
return nil, &parseError{Path: dirPath, File: file, Err: err}
}
}
}
if err := sc.Err(); err != nil {
if errors.Is(err, unix.ENOTSUP) {
// Some kernels (e.g. CS9) may return ENOTSUP on read
// if psi=1 kernel cmdline parameter is required.
return nil, nil
}
return nil, &parseError{Path: dirPath, File: file, Err: err}
}
return &psistats, nil
}

func parsePSIData(psi []string) (cgroups.PSIData, error) {
data := cgroups.PSIData{}
for _, f := range psi {
kv := strings.SplitN(f, "=", 2)
if len(kv) != 2 {
return data, fmt.Errorf("invalid psi data: %q", f)
}
var pv *float64
switch kv[0] {
case "avg10":
pv = &data.Avg10
case "avg60":
pv = &data.Avg60
case "avg300":
pv = &data.Avg300
case "total":
v, err := strconv.ParseUint(kv[1], 10, 64)
if err != nil {
return data, fmt.Errorf("invalid %s PSI value: %w", kv[0], err)
}
data.Total = v
}
if pv != nil {
v, err := strconv.ParseFloat(kv[1], 64)
if err != nil {
return data, fmt.Errorf("invalid %s PSI value: %w", kv[0], err)
}
*pv = v
}
}
return data, nil
}
47 changes: 47 additions & 0 deletions libcontainer/cgroups/fs2/psi_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package fs2

import (
"os"
"path/filepath"
"reflect"
"testing"

"github.com/opencontainers/runc/libcontainer/cgroups"
)

func TestStatCPUPSI(t *testing.T) {
const examplePSIData = `some avg10=1.71 avg60=2.36 avg300=2.57 total=230548833
full avg10=1.00 avg60=1.01 avg300=1.00 total=157622356`

// We're using a fake cgroupfs.
cgroups.TestMode = true

fakeCgroupDir := t.TempDir()
statPath := filepath.Join(fakeCgroupDir, "cpu.pressure")

if err := os.WriteFile(statPath, []byte(examplePSIData), 0o644); err != nil {
t.Fatal(err)
}

st, err := statPSI(fakeCgroupDir, "cpu.pressure")
if err != nil {
t.Fatal(err)
}

if !reflect.DeepEqual(*st, cgroups.PSIStats{
Some: cgroups.PSIData{
Avg10: 1.71,
Avg60: 2.36,
Avg300: 2.57,
Total: 230548833,
},
Full: cgroups.PSIData{
Avg10: 1.00,
Avg60: 1.01,
Avg300: 1.00,
Total: 157622356,
},
}) {
t.Errorf("unexpected PSI result: %+v", st)
}
}
15 changes: 15 additions & 0 deletions libcontainer/cgroups/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,22 @@ type CpuUsage struct {
UsageInUsermode uint64 `json:"usage_in_usermode"`
}

type PSIData struct {
Avg10 float64 `json:"avg10"`
Avg60 float64 `json:"avg60"`
Avg300 float64 `json:"avg300"`
Total uint64 `json:"total"`
}

type PSIStats struct {
Some PSIData `json:"some,omitempty"`
Full PSIData `json:"full,omitempty"`
}

type CpuStats struct {
CpuUsage CpuUsage `json:"cpu_usage,omitempty"`
ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
}

type CPUSetStats struct {
Expand Down Expand Up @@ -89,6 +102,7 @@ type MemoryStats struct {
UseHierarchy bool `json:"use_hierarchy"`

Stats map[string]uint64 `json:"stats,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
}

type PageUsageByNUMA struct {
Expand Down Expand Up @@ -133,6 +147,7 @@ type BlkioStats struct {
IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"`
IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"`
SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
}

type HugetlbStats struct {
Expand Down
29 changes: 29 additions & 0 deletions tests/integration/events.bats
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,35 @@ function teardown() {
[[ "${lines[0]}" == *"data"* ]]
}

# shellcheck disable=SC2030
@test "events --stats with psi data" {
requires root cgroups_v2 psi
init_cgroup_paths

update_config '.linux.resources.cpu |= { "quota": 1000 }'

runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
[ "$status" -eq 0 ]

# Stress the CPU a bit. Need something that runs for more than 10s.
runc exec test_busybox dd if=/dev/zero bs=1 count=128K of=/dev/null
[ "$status" -eq 0 ]

runc exec test_busybox sh -c 'tail /sys/fs/cgroup/*.pressure'

runc events --stats test_busybox
[ "$status" -eq 0 ]

# Check PSI metrics.
jq '.data.cpu.psi' <<<"${lines[0]}"
for psi_type in some full; do
for psi_metric in avg10 avg60 avg300 total; do
echo -n "checking .data.cpu.psi.$psi_type.$psi_metric != 0: "
jq -e '.data.cpu.psi.'$psi_type.$psi_metric' != 0' <<<"${lines[0]}"
done
done
}

function test_events() {
# XXX: currently cgroups require root containers.
requires root
Expand Down
7 changes: 7 additions & 0 deletions tests/integration/helpers.bash
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,13 @@ function requires() {
skip_me=1
fi
;;
psi)
# If PSI is not compiled in the kernel, the file will not exist.
# If PSI is compiled, but not enabled, read will fail with ENOTSUPP.
if ! cat /sys/fs/cgroup/cpu.pressure &>/dev/null; then
skip_me=1
fi
;;
*)
fail "BUG: Invalid requires $var."
;;
Expand Down
12 changes: 11 additions & 1 deletion types/events.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package types

import "github.com/opencontainers/runc/libcontainer/intelrdt"
import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/intelrdt"
)

// Event struct for encoding the event data to json.
type Event struct {
Expand All @@ -21,6 +24,10 @@ type Stats struct {
NetworkInterfaces []*NetworkInterface `json:"network_interfaces"`
}

type PSIData = cgroups.PSIData

type PSIStats = cgroups.PSIStats

type Hugetlb struct {
Usage uint64 `json:"usage,omitempty"`
Max uint64 `json:"max,omitempty"`
Expand All @@ -43,6 +50,7 @@ type Blkio struct {
IoMergedRecursive []BlkioEntry `json:"ioMergedRecursive,omitempty"`
IoTimeRecursive []BlkioEntry `json:"ioTimeRecursive,omitempty"`
SectorsRecursive []BlkioEntry `json:"sectorsRecursive,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
}

type Pids struct {
Expand All @@ -69,6 +77,7 @@ type CpuUsage struct {
type Cpu struct {
Usage CpuUsage `json:"usage,omitempty"`
Throttling Throttling `json:"throttling,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
}

type CPUSet struct {
Expand Down Expand Up @@ -99,6 +108,7 @@ type Memory struct {
Kernel MemoryEntry `json:"kernel,omitempty"`
KernelTCP MemoryEntry `json:"kernelTCP,omitempty"`
Raw map[string]uint64 `json:"raw,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
}

type L3CacheInfo struct {
Expand Down