diff options
Diffstat (limited to 'vendor/github.com/containerd/cgroups/v3/cgroup2/manager.go')
-rw-r--r-- | vendor/github.com/containerd/cgroups/v3/cgroup2/manager.go | 990 |
1 files changed, 0 insertions, 990 deletions
diff --git a/vendor/github.com/containerd/cgroups/v3/cgroup2/manager.go b/vendor/github.com/containerd/cgroups/v3/cgroup2/manager.go deleted file mode 100644 index fc9fcf453..000000000 --- a/vendor/github.com/containerd/cgroups/v3/cgroup2/manager.go +++ /dev/null @@ -1,990 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package cgroup2 - -import ( - "bufio" - "context" - "errors" - "fmt" - "io" - "math" - "os" - "path/filepath" - "strconv" - "strings" - "syscall" - "time" - - "github.com/containerd/cgroups/v3/cgroup2/stats" - - systemdDbus "github.com/coreos/go-systemd/v22/dbus" - "github.com/godbus/dbus/v5" - "github.com/opencontainers/runtime-spec/specs-go" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -const ( - subtreeControl = "cgroup.subtree_control" - controllersFile = "cgroup.controllers" - killFile = "cgroup.kill" - defaultCgroup2Path = "/sys/fs/cgroup" - defaultSlice = "system.slice" -) - -var ( - canDelegate bool -) - -type Event struct { - Low uint64 - High uint64 - Max uint64 - OOM uint64 - OOMKill uint64 -} - -// Resources for a cgroups v2 unified hierarchy -type Resources struct { - CPU *CPU - Memory *Memory - Pids *Pids - IO *IO - RDMA *RDMA - HugeTlb *HugeTlb - // When len(Devices) is zero, devices are not controlled - Devices []specs.LinuxDeviceCgroup -} - -// Values returns the raw filenames and values that -// can be written to the unified hierarchy -func (r *Resources) Values() (o []Value) { - if r.CPU != nil { - o = append(o, r.CPU.Values()...) - } - if r.Memory != nil { - o = append(o, r.Memory.Values()...) - } - if r.Pids != nil { - o = append(o, r.Pids.Values()...) - } - if r.IO != nil { - o = append(o, r.IO.Values()...) - } - if r.RDMA != nil { - o = append(o, r.RDMA.Values()...) - } - if r.HugeTlb != nil { - o = append(o, r.HugeTlb.Values()...) - } - return o -} - -// EnabledControllers returns the list of all not nil resource controllers -func (r *Resources) EnabledControllers() (c []string) { - if r.CPU != nil { - c = append(c, "cpu") - c = append(c, "cpuset") - } - if r.Memory != nil { - c = append(c, "memory") - } - if r.Pids != nil { - c = append(c, "pids") - } - if r.IO != nil { - c = append(c, "io") - } - if r.RDMA != nil { - c = append(c, "rdma") - } - if r.HugeTlb != nil { - c = append(c, "hugetlb") - } - return -} - -// Value of a cgroup setting -type Value struct { - filename string - value interface{} -} - -// write the value to the full, absolute path, of a unified hierarchy -func (c *Value) write(path string, perm os.FileMode) error { - var data []byte - switch t := c.value.(type) { - case uint64: - data = []byte(strconv.FormatUint(t, 10)) - case uint16: - data = []byte(strconv.FormatUint(uint64(t), 10)) - case int64: - data = []byte(strconv.FormatInt(t, 10)) - case []byte: - data = t - case string: - data = []byte(t) - case CPUMax: - data = []byte(t) - default: - return ErrInvalidFormat - } - - return os.WriteFile( - filepath.Join(path, c.filename), - data, - perm, - ) -} - -func writeValues(path string, values []Value) error { - for _, o := range values { - if err := o.write(path, defaultFilePerm); err != nil { - return err - } - } - return nil -} - -func NewManager(mountpoint string, group string, resources *Resources) (*Manager, error) { - if resources == nil { - return nil, errors.New("resources reference is nil") - } - if err := VerifyGroupPath(group); err != nil { - return nil, err - } - path := filepath.Join(mountpoint, group) - if err := os.MkdirAll(path, defaultDirPerm); err != nil { - return nil, err - } - m := Manager{ - unifiedMountpoint: mountpoint, - path: path, - } - if err := m.ToggleControllers(resources.EnabledControllers(), Enable); err != nil { - // clean up cgroup dir on failure - os.Remove(path) - return nil, err - } - if err := setResources(path, resources); err != nil { - os.Remove(path) - return nil, err - } - return &m, nil -} - -type InitConfig struct { - mountpoint string -} - -type InitOpts func(c *InitConfig) error - -// WithMountpoint sets the unified mountpoint. The default path is /sys/fs/cgroup. -func WithMountpoint(path string) InitOpts { - return func(c *InitConfig) error { - c.mountpoint = path - return nil - } -} - -// Load a cgroup. -func Load(group string, opts ...InitOpts) (*Manager, error) { - c := InitConfig{mountpoint: defaultCgroup2Path} - for _, opt := range opts { - if err := opt(&c); err != nil { - return nil, err - } - } - - if err := VerifyGroupPath(group); err != nil { - return nil, err - } - path := filepath.Join(c.mountpoint, group) - return &Manager{ - unifiedMountpoint: c.mountpoint, - path: path, - }, nil -} - -type Manager struct { - unifiedMountpoint string - path string -} - -func setResources(path string, resources *Resources) error { - if resources != nil { - if err := writeValues(path, resources.Values()); err != nil { - return err - } - if err := setDevices(path, resources.Devices); err != nil { - return err - } - } - return nil -} - -func (c *Manager) RootControllers() ([]string, error) { - b, err := os.ReadFile(filepath.Join(c.unifiedMountpoint, controllersFile)) - if err != nil { - return nil, err - } - return strings.Fields(string(b)), nil -} - -func (c *Manager) Controllers() ([]string, error) { - b, err := os.ReadFile(filepath.Join(c.path, controllersFile)) - if err != nil { - return nil, err - } - return strings.Fields(string(b)), nil -} - -func (c *Manager) Update(resources *Resources) error { - return setResources(c.path, resources) -} - -type ControllerToggle int - -const ( - Enable ControllerToggle = iota + 1 - Disable -) - -func toggleFunc(controllers []string, prefix string) []string { - out := make([]string, len(controllers)) - for i, c := range controllers { - out[i] = prefix + c - } - return out -} - -func (c *Manager) ToggleControllers(controllers []string, t ControllerToggle) error { - // when c.path is like /foo/bar/baz, the following files need to be written: - // * /sys/fs/cgroup/cgroup.subtree_control - // * /sys/fs/cgroup/foo/cgroup.subtree_control - // * /sys/fs/cgroup/foo/bar/cgroup.subtree_control - // Note that /sys/fs/cgroup/foo/bar/baz/cgroup.subtree_control does not need to be written. - split := strings.Split(c.path, "/") - var lastErr error - for i := range split { - f := strings.Join(split[:i], "/") - if !strings.HasPrefix(f, c.unifiedMountpoint) || f == c.path { - continue - } - filePath := filepath.Join(f, subtreeControl) - if err := c.writeSubtreeControl(filePath, controllers, t); err != nil { - // When running as rootless, the user may face EPERM on parent groups, but it is neglible when the - // controller is already written. - // So we only return the last error. - lastErr = fmt.Errorf("failed to write subtree controllers %+v to %q: %w", controllers, filePath, err) - } else { - lastErr = nil - } - } - return lastErr -} - -func (c *Manager) writeSubtreeControl(filePath string, controllers []string, t ControllerToggle) error { - f, err := os.OpenFile(filePath, os.O_WRONLY, 0) - if err != nil { - return err - } - defer f.Close() - switch t { - case Enable: - controllers = toggleFunc(controllers, "+") - case Disable: - controllers = toggleFunc(controllers, "-") - } - _, err = f.WriteString(strings.Join(controllers, " ")) - return err -} - -func (c *Manager) NewChild(name string, resources *Resources) (*Manager, error) { - if strings.HasPrefix(name, "/") { - return nil, errors.New("name must be relative") - } - path := filepath.Join(c.path, name) - if err := os.MkdirAll(path, defaultDirPerm); err != nil { - return nil, err - } - m := Manager{ - unifiedMountpoint: c.unifiedMountpoint, - path: path, - } - if resources != nil { - if err := m.ToggleControllers(resources.EnabledControllers(), Enable); err != nil { - // clean up cgroup dir on failure - os.Remove(path) - return nil, err - } - } - if err := setResources(path, resources); err != nil { - // clean up cgroup dir on failure - os.Remove(path) - return nil, err - } - return &m, nil -} - -func (c *Manager) AddProc(pid uint64) error { - v := Value{ - filename: cgroupProcs, - value: pid, - } - return writeValues(c.path, []Value{v}) -} - -func (c *Manager) AddThread(tid uint64) error { - v := Value{ - filename: cgroupThreads, - value: tid, - } - return writeValues(c.path, []Value{v}) -} - -// Kill will try to forcibly exit all of the processes in the cgroup. This is -// equivalent to sending a SIGKILL to every process. On kernels 5.14 and greater -// this will use the cgroup.kill file, on anything that doesn't have the cgroup.kill -// file, a manual process of freezing -> sending a SIGKILL to every process -> thawing -// will be used. -func (c *Manager) Kill() error { - v := Value{ - filename: killFile, - value: "1", - } - err := writeValues(c.path, []Value{v}) - if err == nil { - return nil - } - logrus.Warnf("falling back to slower kill implementation: %s", err) - // Fallback to slow method. - return c.fallbackKill() -} - -// fallbackKill is a slower fallback to the more modern (kernels 5.14+) -// approach of writing to the cgroup.kill file. This is heavily pulled -// from runc's same approach (in signalAllProcesses), with the only differences -// being this is just tailored to the API exposed in this library, and we don't -// need to care about signals other than SIGKILL. -// -// https://github.com/opencontainers/runc/blob/8da0a0b5675764feaaaaad466f6567a9983fcd08/libcontainer/init_linux.go#L523-L529 -func (c *Manager) fallbackKill() error { - if err := c.Freeze(); err != nil { - logrus.Warn(err) - } - pids, err := c.Procs(true) - if err != nil { - if err := c.Thaw(); err != nil { - logrus.Warn(err) - } - return err - } - var procs []*os.Process - for _, pid := range pids { - p, err := os.FindProcess(int(pid)) - if err != nil { - logrus.Warn(err) - continue - } - procs = append(procs, p) - if err := p.Signal(unix.SIGKILL); err != nil { - logrus.Warn(err) - } - } - if err := c.Thaw(); err != nil { - logrus.Warn(err) - } - - subreaper, err := getSubreaper() - if err != nil { - // The error here means that PR_GET_CHILD_SUBREAPER is not - // supported because this code might run on a kernel older - // than 3.4. We don't want to throw an error in that case, - // and we simplify things, considering there is no subreaper - // set. - subreaper = 0 - } - - for _, p := range procs { - // In case a subreaper has been setup, this code must not - // wait for the process. Otherwise, we cannot be sure the - // current process will be reaped by the subreaper, while - // the subreaper might be waiting for this process in order - // to retrieve its exit code. - if subreaper == 0 { - if _, err := p.Wait(); err != nil { - if !errors.Is(err, unix.ECHILD) { - logrus.Warnf("wait on pid %d failed: %s", p.Pid, err) - } - } - } - } - return nil -} - -func (c *Manager) Delete() error { - // kernel prevents cgroups with running process from being removed, check the tree is empty - processes, err := c.Procs(true) - if err != nil { - return err - } - if len(processes) > 0 { - return fmt.Errorf("cgroups: unable to remove path %q: still contains running processes", c.path) - } - return remove(c.path) -} - -func (c *Manager) Procs(recursive bool) ([]uint64, error) { - var processes []uint64 - err := filepath.Walk(c.path, func(p string, info os.FileInfo, err error) error { - if err != nil { - return err - } - if !recursive && info.IsDir() { - if p == c.path { - return nil - } - return filepath.SkipDir - } - _, name := filepath.Split(p) - if name != cgroupProcs { - return nil - } - procs, err := parseCgroupProcsFile(p) - if err != nil { - return err - } - processes = append(processes, procs...) - return nil - }) - return processes, err -} - -func (c *Manager) MoveTo(destination *Manager) error { - processes, err := c.Procs(true) - if err != nil { - return err - } - for _, p := range processes { - if err := destination.AddProc(p); err != nil { - if strings.Contains(err.Error(), "no such process") { - continue - } - return err - } - } - return nil -} - -var singleValueFiles = []string{ - "pids.current", - "pids.max", -} - -func (c *Manager) Stat() (*stats.Metrics, error) { - controllers, err := c.Controllers() - if err != nil { - return nil, err - } - out := make(map[string]interface{}) - for _, controller := range controllers { - switch controller { - case "cpu", "memory": - if err := readKVStatsFile(c.path, controller+".stat", out); err != nil { - if os.IsNotExist(err) { - continue - } - return nil, err - } - } - } - for _, name := range singleValueFiles { - if err := readSingleFile(c.path, name, out); err != nil { - if os.IsNotExist(err) { - continue - } - return nil, err - } - } - memoryEvents := make(map[string]interface{}) - if err := readKVStatsFile(c.path, "memory.events", memoryEvents); err != nil { - if !os.IsNotExist(err) { - return nil, err - } - } - var metrics stats.Metrics - - metrics.Pids = &stats.PidsStat{ - Current: getPidValue("pids.current", out), - Limit: getPidValue("pids.max", out), - } - metrics.CPU = &stats.CPUStat{ - UsageUsec: getUint64Value("usage_usec", out), - UserUsec: getUint64Value("user_usec", out), - SystemUsec: getUint64Value("system_usec", out), - NrPeriods: getUint64Value("nr_periods", out), - NrThrottled: getUint64Value("nr_throttled", out), - ThrottledUsec: getUint64Value("throttled_usec", out), - } - metrics.Memory = &stats.MemoryStat{ - Anon: getUint64Value("anon", out), - File: getUint64Value("file", out), - KernelStack: getUint64Value("kernel_stack", out), - Slab: getUint64Value("slab", out), - Sock: getUint64Value("sock", out), - Shmem: getUint64Value("shmem", out), - FileMapped: getUint64Value("file_mapped", out), - FileDirty: getUint64Value("file_dirty", out), - FileWriteback: getUint64Value("file_writeback", out), - AnonThp: getUint64Value("anon_thp", out), - InactiveAnon: getUint64Value("inactive_anon", out), - ActiveAnon: getUint64Value("active_anon", out), - InactiveFile: getUint64Value("inactive_file", out), - ActiveFile: getUint64Value("active_file", out), - Unevictable: getUint64Value("unevictable", out), - SlabReclaimable: getUint64Value("slab_reclaimable", out), - SlabUnreclaimable: getUint64Value("slab_unreclaimable", out), - Pgfault: getUint64Value("pgfault", out), - Pgmajfault: getUint64Value("pgmajfault", out), - WorkingsetRefault: getUint64Value("workingset_refault", out), - WorkingsetActivate: getUint64Value("workingset_activate", out), - WorkingsetNodereclaim: getUint64Value("workingset_nodereclaim", out), - Pgrefill: getUint64Value("pgrefill", out), - Pgscan: getUint64Value("pgscan", out), - Pgsteal: getUint64Value("pgsteal", out), - Pgactivate: getUint64Value("pgactivate", out), - Pgdeactivate: getUint64Value("pgdeactivate", out), - Pglazyfree: getUint64Value("pglazyfree", out), - Pglazyfreed: getUint64Value("pglazyfreed", out), - ThpFaultAlloc: getUint64Value("thp_fault_alloc", out), - ThpCollapseAlloc: getUint64Value("thp_collapse_alloc", out), - Usage: getStatFileContentUint64(filepath.Join(c.path, "memory.current")), - UsageLimit: getStatFileContentUint64(filepath.Join(c.path, "memory.max")), - SwapUsage: getStatFileContentUint64(filepath.Join(c.path, "memory.swap.current")), - SwapLimit: getStatFileContentUint64(filepath.Join(c.path, "memory.swap.max")), - } - if len(memoryEvents) > 0 { - metrics.MemoryEvents = &stats.MemoryEvents{ - Low: getUint64Value("low", memoryEvents), - High: getUint64Value("high", memoryEvents), - Max: getUint64Value("max", memoryEvents), - Oom: getUint64Value("oom", memoryEvents), - OomKill: getUint64Value("oom_kill", memoryEvents), - } - } - metrics.Io = &stats.IOStat{Usage: readIoStats(c.path)} - metrics.Rdma = &stats.RdmaStat{ - Current: rdmaStats(filepath.Join(c.path, "rdma.current")), - Limit: rdmaStats(filepath.Join(c.path, "rdma.max")), - } - metrics.Hugetlb = readHugeTlbStats(c.path) - - return &metrics, nil -} - -func getUint64Value(key string, out map[string]interface{}) uint64 { - v, ok := out[key] - if !ok { - return 0 - } - switch t := v.(type) { - case uint64: - return t - } - return 0 -} - -func getPidValue(key string, out map[string]interface{}) uint64 { - v, ok := out[key] - if !ok { - return 0 - } - switch t := v.(type) { - case uint64: - return t - case string: - if t == "max" { - return math.MaxUint64 - } - } - return 0 -} - -func readSingleFile(path string, file string, out map[string]interface{}) error { - f, err := os.Open(filepath.Join(path, file)) - if err != nil { - return err - } - defer f.Close() - data, err := io.ReadAll(f) - if err != nil { - return err - } - s := strings.TrimSpace(string(data)) - v, err := parseUint(s, 10, 64) - if err != nil { - // if we cannot parse as a uint, parse as a string - out[file] = s - return nil - } - out[file] = v - return nil -} - -func readKVStatsFile(path string, file string, out map[string]interface{}) error { - f, err := os.Open(filepath.Join(path, file)) - if err != nil { - return err - } - defer f.Close() - - s := bufio.NewScanner(f) - for s.Scan() { - name, value, err := parseKV(s.Text()) - if err != nil { - return fmt.Errorf("error while parsing %s (line=%q): %w", filepath.Join(path, file), s.Text(), err) - } - out[name] = value - } - return s.Err() -} - -func (c *Manager) Freeze() error { - return c.freeze(c.path, Frozen) -} - -func (c *Manager) Thaw() error { - return c.freeze(c.path, Thawed) -} - -func (c *Manager) freeze(path string, state State) error { - values := state.Values() - for { - if err := writeValues(path, values); err != nil { - return err - } - current, err := fetchState(path) - if err != nil { - return err - } - if current == state { - return nil - } - time.Sleep(1 * time.Millisecond) - } -} - -func (c *Manager) isCgroupEmpty() bool { - // In case of any error we return true so that we exit and don't leak resources - out := make(map[string]interface{}) - if err := readKVStatsFile(c.path, "cgroup.events", out); err != nil { - return true - } - if v, ok := out["populated"]; ok { - populated, ok := v.(uint64) - if !ok { - return true - } - return populated == 0 - } - return true -} - -// MemoryEventFD returns inotify file descriptor and 'memory.events' inotify watch descriptor -func (c *Manager) MemoryEventFD() (int, uint32, error) { - fpath := filepath.Join(c.path, "memory.events") - fd, err := syscall.InotifyInit() - if err != nil { - return 0, 0, errors.New("failed to create inotify fd") - } - wd, err := syscall.InotifyAddWatch(fd, fpath, unix.IN_MODIFY) - if err != nil { - syscall.Close(fd) - return 0, 0, fmt.Errorf("failed to add inotify watch for %q: %w", fpath, err) - } - // monitor to detect process exit/cgroup deletion - evpath := filepath.Join(c.path, "cgroup.events") - if _, err = syscall.InotifyAddWatch(fd, evpath, unix.IN_MODIFY); err != nil { - syscall.Close(fd) - return 0, 0, fmt.Errorf("failed to add inotify watch for %q: %w", evpath, err) - } - - return fd, uint32(wd), nil -} - -func (c *Manager) EventChan() (<-chan Event, <-chan error) { - ec := make(chan Event) - errCh := make(chan error, 1) - go c.waitForEvents(ec, errCh) - - return ec, errCh -} - -func parseMemoryEvents(out map[string]interface{}) (Event, error) { - e := Event{} - if v, ok := out["high"]; ok { - e.High, ok = v.(uint64) - if !ok { - return Event{}, fmt.Errorf("cannot convert high to uint64: %+v", v) - } - } - if v, ok := out["low"]; ok { - e.Low, ok = v.(uint64) - if !ok { - return Event{}, fmt.Errorf("cannot convert low to uint64: %+v", v) - } - } - if v, ok := out["max"]; ok { - e.Max, ok = v.(uint64) - if !ok { - return Event{}, fmt.Errorf("cannot convert max to uint64: %+v", v) - } - } - if v, ok := out["oom"]; ok { - e.OOM, ok = v.(uint64) - if !ok { - return Event{}, fmt.Errorf("cannot convert oom to uint64: %+v", v) - } - } - if v, ok := out["oom_kill"]; ok { - e.OOMKill, ok = v.(uint64) - if !ok { - return Event{}, fmt.Errorf("cannot convert oom_kill to uint64: %+v", v) - } - } - return e, nil -} - -func (c *Manager) waitForEvents(ec chan<- Event, errCh chan<- error) { - defer close(errCh) - - fd, _, err := c.MemoryEventFD() - if err != nil { - errCh <- err - return - } - defer syscall.Close(fd) - - for { - buffer := make([]byte, syscall.SizeofInotifyEvent*10) - bytesRead, err := syscall.Read(fd, buffer) - if err != nil { - errCh <- err - return - } - if bytesRead >= syscall.SizeofInotifyEvent { - out := make(map[string]interface{}) - if err := readKVStatsFile(c.path, "memory.events", out); err != nil { - // When cgroup is deleted read may return -ENODEV instead of -ENOENT from open. - if _, statErr := os.Lstat(filepath.Join(c.path, "memory.events")); !os.IsNotExist(statErr) { - errCh <- err - } - return - } - e, err := parseMemoryEvents(out) - if err != nil { - errCh <- err - return - } - ec <- e - if c.isCgroupEmpty() { - return - } - } - } -} - -func setDevices(path string, devices []specs.LinuxDeviceCgroup) error { - if len(devices) == 0 { - return nil - } - insts, license, err := DeviceFilter(devices) - if err != nil { - return err - } - dirFD, err := unix.Open(path, unix.O_DIRECTORY|unix.O_RDONLY|unix.O_CLOEXEC, 0600) - if err != nil { - return fmt.Errorf("cannot get dir FD for %s", path) - } - defer unix.Close(dirFD) - if _, err := LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil { - if !canSkipEBPFError(devices) { - return err - } - } - return nil -} - -// getSystemdFullPath returns the full systemd path when creating a systemd slice group. -// the reason this is necessary is because the "-" character has a special meaning in -// systemd slice. For example, when creating a slice called "my-group-112233.slice", -// systemd will create a hierarchy like this: -// -// /sys/fs/cgroup/my.slice/my-group.slice/my-group-112233.slice -func getSystemdFullPath(slice, group string) string { - return filepath.Join(defaultCgroup2Path, dashesToPath(slice), dashesToPath(group)) -} - -// dashesToPath converts a slice name with dashes to it's corresponding systemd filesystem path. -func dashesToPath(in string) string { - path := "" - if strings.HasSuffix(in, ".slice") && strings.Contains(in, "-") { - parts := strings.Split(in, "-") - for i := range parts { - s := strings.Join(parts[0:i+1], "-") - if !strings.HasSuffix(s, ".slice") { - s += ".slice" - } - path = filepath.Join(path, s) - } - } else { - path = filepath.Join(path, in) - } - return path -} - -func NewSystemd(slice, group string, pid int, resources *Resources) (*Manager, error) { - if slice == "" { - slice = defaultSlice - } - ctx := context.TODO() - path := getSystemdFullPath(slice, group) - conn, err := systemdDbus.NewWithContext(ctx) - if err != nil { - return &Manager{}, err - } - defer conn.Close() - - properties := []systemdDbus.Property{ - systemdDbus.PropDescription("cgroup " + group), - newSystemdProperty("DefaultDependencies", false), - newSystemdProperty("MemoryAccounting", true), - newSystemdProperty("CPUAccounting", true), - newSystemdProperty("IOAccounting", true), - } - - // if we create a slice, the parent is defined via a Wants= - if strings.HasSuffix(group, ".slice") { - properties = append(properties, systemdDbus.PropWants(defaultSlice)) - } else { - // otherwise, we use Slice= - properties = append(properties, systemdDbus.PropSlice(defaultSlice)) - } - - // only add pid if its valid, -1 is used w/ general slice creation. - if pid != -1 { - properties = append(properties, newSystemdProperty("PIDs", []uint32{uint32(pid)})) - } - - if resources.Memory != nil && resources.Memory.Min != nil && *resources.Memory.Min != 0 { - properties = append(properties, - newSystemdProperty("MemoryMin", uint64(*resources.Memory.Min))) - } - - if resources.Memory != nil && resources.Memory.Max != nil && *resources.Memory.Max != 0 { - properties = append(properties, - newSystemdProperty("MemoryMax", uint64(*resources.Memory.Max))) - } - - if resources.CPU != nil && resources.CPU.Weight != nil && *resources.CPU.Weight != 0 { - properties = append(properties, - newSystemdProperty("CPUWeight", *resources.CPU.Weight)) - } - - if resources.CPU != nil && resources.CPU.Max != "" { - quota, period := resources.CPU.Max.extractQuotaAndPeriod() - // cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd. - // corresponds to USEC_INFINITY in systemd - // if USEC_INFINITY is provided, CPUQuota is left unbound by systemd - // always setting a property value ensures we can apply a quota and remove it later - cpuQuotaPerSecUSec := uint64(math.MaxUint64) - if quota > 0 { - // systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota - // (integer percentage of CPU) internally. This means that if a fractional percent of - // CPU is indicated by Resources.CpuQuota, we need to round up to the nearest - // 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect. - cpuQuotaPerSecUSec = uint64(quota*1000000) / period - if cpuQuotaPerSecUSec%10000 != 0 { - cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000 - } - } - properties = append(properties, - newSystemdProperty("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec)) - } - - // If we can delegate, we add the property back in - if canDelegate { - properties = append(properties, newSystemdProperty("Delegate", true)) - } - - if resources.Pids != nil && resources.Pids.Max > 0 { - properties = append(properties, - newSystemdProperty("TasksAccounting", true), - newSystemdProperty("TasksMax", uint64(resources.Pids.Max))) - } - - statusChan := make(chan string, 1) - if _, err := conn.StartTransientUnitContext(ctx, group, "replace", properties, statusChan); err == nil { - select { - case <-statusChan: - case <-time.After(time.Second): - logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", group) - } - } else if !isUnitExists(err) { - return &Manager{}, err - } - - return &Manager{ - path: path, - }, nil -} - -func LoadSystemd(slice, group string) (*Manager, error) { - if slice == "" { - slice = defaultSlice - } - path := getSystemdFullPath(slice, group) - return &Manager{ - path: path, - }, nil -} - -func (c *Manager) DeleteSystemd() error { - ctx := context.TODO() - conn, err := systemdDbus.NewWithContext(ctx) - if err != nil { - return err - } - defer conn.Close() - group := systemdUnitFromPath(c.path) - ch := make(chan string) - _, err = conn.StopUnitContext(ctx, group, "replace", ch) - if err != nil { - return err - } - <-ch - return nil -} - -func newSystemdProperty(name string, units interface{}) systemdDbus.Property { - return systemdDbus.Property{ - Name: name, - Value: dbus.MakeVariant(units), - } -} |