1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
|
package memlimit
import (
"bufio"
"errors"
"fmt"
"io"
"math"
"os"
"path/filepath"
"slices"
"strconv"
"strings"
)
var (
// ErrNoCgroup is returned when the process is not in cgroup.
ErrNoCgroup = errors.New("process is not in cgroup")
// ErrCgroupsNotSupported is returned when the system does not support cgroups.
ErrCgroupsNotSupported = errors.New("cgroups is not supported on this system")
)
// fromCgroup retrieves the memory limit from the cgroup.
// The versionDetector function is used to detect the cgroup version from the mountinfo.
func fromCgroup(versionDetector func(mis []mountInfo) (bool, bool)) (uint64, error) {
mf, err := os.Open("/proc/self/mountinfo")
if err != nil {
return 0, fmt.Errorf("failed to open /proc/self/mountinfo: %w", err)
}
defer mf.Close()
mis, err := parseMountInfo(mf)
if err != nil {
return 0, fmt.Errorf("failed to parse mountinfo: %w", err)
}
v1, v2 := versionDetector(mis)
if !(v1 || v2) {
return 0, ErrNoCgroup
}
cf, err := os.Open("/proc/self/cgroup")
if err != nil {
return 0, fmt.Errorf("failed to open /proc/self/cgroup: %w", err)
}
defer cf.Close()
chs, err := parseCgroupFile(cf)
if err != nil {
return 0, fmt.Errorf("failed to parse cgroup file: %w", err)
}
if v2 {
limit, err := getMemoryLimitV2(chs, mis)
if err == nil {
return limit, nil
} else if !v1 {
return 0, err
}
}
return getMemoryLimitV1(chs, mis)
}
// detectCgroupVersion detects the cgroup version from the mountinfo.
func detectCgroupVersion(mis []mountInfo) (bool, bool) {
var v1, v2 bool
for _, mi := range mis {
switch mi.FilesystemType {
case "cgroup":
v1 = true
case "cgroup2":
v2 = true
}
}
return v1, v2
}
// getMemoryLimitV2 retrieves the memory limit from the cgroup v2 controller.
func getMemoryLimitV2(chs []cgroupHierarchy, mis []mountInfo) (uint64, error) {
// find the cgroup v2 path for the memory controller.
// in cgroup v2, the paths are unified and the controller list is empty.
idx := slices.IndexFunc(chs, func(ch cgroupHierarchy) bool {
return ch.HierarchyID == "0" && ch.ControllerList == ""
})
if idx == -1 {
return 0, errors.New("cgroup v2 path not found")
}
relPath := chs[idx].CgroupPath
// find the mountpoint for the cgroup v2 controller.
idx = slices.IndexFunc(mis, func(mi mountInfo) bool {
return mi.FilesystemType == "cgroup2"
})
if idx == -1 {
return 0, errors.New("cgroup v2 mountpoint not found")
}
root, mountPoint := mis[idx].Root, mis[idx].MountPoint
// resolve the actual cgroup path
cgroupPath, err := resolveCgroupPath(mountPoint, root, relPath)
if err != nil {
return 0, err
}
// retrieve the memory limit from the memory.max file
return readMemoryLimitV2FromPath(filepath.Join(cgroupPath, "memory.max"))
}
// readMemoryLimitV2FromPath reads the memory limit for cgroup v2 from the given path.
// this function expects the path to be memory.max file.
func readMemoryLimitV2FromPath(path string) (uint64, error) {
b, err := os.ReadFile(path)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return 0, ErrNoLimit
}
return 0, fmt.Errorf("failed to read memory.max: %w", err)
}
slimit := strings.TrimSpace(string(b))
if slimit == "max" {
return 0, ErrNoLimit
}
limit, err := strconv.ParseUint(slimit, 10, 64)
if err != nil {
return 0, fmt.Errorf("failed to parse memory.max value: %w", err)
}
return limit, nil
}
// getMemoryLimitV1 retrieves the memory limit from the cgroup v1 controller.
func getMemoryLimitV1(chs []cgroupHierarchy, mis []mountInfo) (uint64, error) {
// find the cgroup v1 path for the memory controller.
idx := slices.IndexFunc(chs, func(ch cgroupHierarchy) bool {
return slices.Contains(strings.Split(ch.ControllerList, ","), "memory")
})
if idx == -1 {
return 0, errors.New("cgroup v1 path for memory controller not found")
}
relPath := chs[idx].CgroupPath
// find the mountpoint for the cgroup v1 controller.
idx = slices.IndexFunc(mis, func(mi mountInfo) bool {
return mi.FilesystemType == "cgroup" && slices.Contains(strings.Split(mi.SuperOptions, ","), "memory")
})
if idx == -1 {
return 0, errors.New("cgroup v1 mountpoint for memory controller not found")
}
root, mountPoint := mis[idx].Root, mis[idx].MountPoint
// resolve the actual cgroup path
cgroupPath, err := resolveCgroupPath(mountPoint, root, relPath)
if err != nil {
return 0, err
}
// retrieve the memory limit from the memory.stats and memory.limit_in_bytes files.
return readMemoryLimitV1FromPath(cgroupPath)
}
// getCgroupV1NoLimit returns the maximum value that is used to represent no limit in cgroup v1.
// the max memory limit is max int64, but it should be multiple of the page size.
func getCgroupV1NoLimit() uint64 {
ps := uint64(os.Getpagesize())
return math.MaxInt64 / ps * ps
}
// readMemoryLimitV1FromPath reads the memory limit for cgroup v1 from the given path.
// this function expects the path to be the cgroup directory.
func readMemoryLimitV1FromPath(cgroupPath string) (uint64, error) {
// read hierarchical_memory_limit and memory.limit_in_bytes files.
// but if hierarchical_memory_limit is not available, then use the max value as a fallback.
hml, err := readHierarchicalMemoryLimit(filepath.Join(cgroupPath, "memory.stats"))
if err != nil && !errors.Is(err, os.ErrNotExist) {
return 0, fmt.Errorf("failed to read hierarchical_memory_limit: %w", err)
} else if hml == 0 {
hml = math.MaxUint64
}
// read memory.limit_in_bytes file.
b, err := os.ReadFile(filepath.Join(cgroupPath, "memory.limit_in_bytes"))
if err != nil && !errors.Is(err, os.ErrNotExist) {
return 0, fmt.Errorf("failed to read memory.limit_in_bytes: %w", err)
}
lib, err := strconv.ParseUint(strings.TrimSpace(string(b)), 10, 64)
if err != nil {
return 0, fmt.Errorf("failed to parse memory.limit_in_bytes value: %w", err)
} else if lib == 0 {
hml = math.MaxUint64
}
// use the minimum value between hierarchical_memory_limit and memory.limit_in_bytes.
// if the limit is the maximum value, then it is considered as no limit.
limit := min(hml, lib)
if limit >= getCgroupV1NoLimit() {
return 0, ErrNoLimit
}
return limit, nil
}
// readHierarchicalMemoryLimit extracts hierarchical_memory_limit from memory.stats.
// this function expects the path to be memory.stats file.
func readHierarchicalMemoryLimit(path string) (uint64, error) {
file, err := os.Open(path)
if err != nil {
return 0, err
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
fields := strings.Split(line, " ")
if len(fields) < 2 {
return 0, fmt.Errorf("failed to parse memory.stats %q: not enough fields", line)
}
if fields[0] == "hierarchical_memory_limit" {
if len(fields) > 2 {
return 0, fmt.Errorf("failed to parse memory.stats %q: too many fields for hierarchical_memory_limit", line)
}
return strconv.ParseUint(fields[1], 10, 64)
}
}
if err := scanner.Err(); err != nil {
return 0, err
}
return 0, nil
}
// https://www.man7.org/linux/man-pages/man5/proc_pid_mountinfo.5.html
// 731 771 0:59 /sysrq-trigger /proc/sysrq-trigger ro,nosuid,nodev,noexec,relatime - proc proc rw
//
// 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
// (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
//
// (1) mount ID: a unique ID for the mount (may be reused after umount(2)).
// (2) parent ID: the ID of the parent mount (or of self for the root of this mount namespace's mount tree).
// (3) major:minor: the value of st_dev for files on this filesystem (see stat(2)).
// (4) root: the pathname of the directory in the filesystem which forms the root of this mount.
// (5) mount point: the pathname of the mount point relative to the process's root directory.
// (6) mount options: per-mount options (see mount(2)).
// (7) optional fields: zero or more fields of the form "tag[:value]"; see below.
// (8) separator: the end of the optional fields is marked by a single hyphen.
// (9) filesystem type: the filesystem type in the form "type[.subtype]".
// (10) mount source: filesystem-specific information or "none".
// (11) super options: per-superblock options (see mount(2)).
type mountInfo struct {
Root string
MountPoint string
FilesystemType string
SuperOptions string
}
// parseMountInfoLine parses a line from the mountinfo file.
func parseMountInfoLine(line string) (mountInfo, error) {
if line == "" {
return mountInfo{}, errors.New("empty line")
}
fieldss := strings.SplitN(line, " - ", 2)
if len(fieldss) != 2 {
return mountInfo{}, fmt.Errorf("invalid separator")
}
fields1 := strings.Split(fieldss[0], " ")
if len(fields1) < 6 {
return mountInfo{}, fmt.Errorf("not enough fields before separator: %v", fields1)
} else if len(fields1) > 7 {
return mountInfo{}, fmt.Errorf("too many fields before separator: %v", fields1)
} else if len(fields1) == 6 {
fields1 = append(fields1, "")
}
fields2 := strings.Split(fieldss[1], " ")
if len(fields2) < 3 {
return mountInfo{}, fmt.Errorf("not enough fields after separator: %v", fields2)
} else if len(fields2) > 3 {
return mountInfo{}, fmt.Errorf("too many fields after separator: %v", fields2)
}
return mountInfo{
Root: fields1[3],
MountPoint: fields1[4],
FilesystemType: fields2[0],
SuperOptions: fields2[2],
}, nil
}
// parseMountInfo parses the mountinfo file.
func parseMountInfo(r io.Reader) ([]mountInfo, error) {
var (
s = bufio.NewScanner(r)
mis []mountInfo
)
for s.Scan() {
line := s.Text()
mi, err := parseMountInfoLine(line)
if err != nil {
return nil, fmt.Errorf("failed to parse mountinfo file %q: %w", line, err)
}
mis = append(mis, mi)
}
if err := s.Err(); err != nil {
return nil, err
}
return mis, nil
}
// https://www.man7.org/linux/man-pages/man7/cgroups.7.html
//
// 5:cpuacct,cpu,cpuset:/daemons
// (1) (2) (3)
//
// (1) hierarchy ID:
//
// cgroups version 1 hierarchies, this field
// contains a unique hierarchy ID number that can be
// matched to a hierarchy ID in /proc/cgroups. For the
// cgroups version 2 hierarchy, this field contains the
// value 0.
//
// (2) controller list:
//
// For cgroups version 1 hierarchies, this field
// contains a comma-separated list of the controllers
// bound to the hierarchy. For the cgroups version 2
// hierarchy, this field is empty.
//
// (3) cgroup path:
//
// This field contains the pathname of the control group
// in the hierarchy to which the process belongs. This
// pathname is relative to the mount point of the
// hierarchy.
type cgroupHierarchy struct {
HierarchyID string
ControllerList string
CgroupPath string
}
// parseCgroupHierarchyLine parses a line from the cgroup file.
func parseCgroupHierarchyLine(line string) (cgroupHierarchy, error) {
if line == "" {
return cgroupHierarchy{}, errors.New("empty line")
}
fields := strings.Split(line, ":")
if len(fields) < 3 {
return cgroupHierarchy{}, fmt.Errorf("not enough fields: %v", fields)
} else if len(fields) > 3 {
return cgroupHierarchy{}, fmt.Errorf("too many fields: %v", fields)
}
return cgroupHierarchy{
HierarchyID: fields[0],
ControllerList: fields[1],
CgroupPath: fields[2],
}, nil
}
// parseCgroupFile parses the cgroup file.
func parseCgroupFile(r io.Reader) ([]cgroupHierarchy, error) {
var (
s = bufio.NewScanner(r)
chs []cgroupHierarchy
)
for s.Scan() {
line := s.Text()
ch, err := parseCgroupHierarchyLine(line)
if err != nil {
return nil, fmt.Errorf("failed to parse cgroup file %q: %w", line, err)
}
chs = append(chs, ch)
}
if err := s.Err(); err != nil {
return nil, err
}
return chs, nil
}
// resolveCgroupPath resolves the actual cgroup path from the mountpoint, root, and cgroupRelPath.
func resolveCgroupPath(mountpoint, root, cgroupRelPath string) (string, error) {
rel, err := filepath.Rel(root, cgroupRelPath)
if err != nil {
return "", err
}
// if the relative path is ".", then the cgroupRelPath is the root itself.
if rel == "." {
return mountpoint, nil
}
// if the relative path starts with "..", then it is outside the root.
if strings.HasPrefix(rel, "..") {
return "", fmt.Errorf("invalid cgroup path: %s is not under root %s", cgroupRelPath, root)
}
return filepath.Join(mountpoint, rel), nil
}
|