2 * Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License version 2.1
6 * as published by the Free Software Foundation
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * reads unified cgroup config as proposed in
14 * https://github.com/opencontainers/runtime-spec/pull/1040
15 * attempt conversion from cgroup1 -> cgroup2
16 * https://github.com/containers/crun/blob/0.14.1/crun.1.md#cgroup-v2
19 * - convert cgroup1 net_prio and net_cls to eBPF program
20 * - rdma (anyone?) intelrdt (anyone?)
36 #include <libubox/avl.h>
37 #include <libubox/avl-cmp.h>
38 #include <libubox/blobmsg.h>
39 #include <libubox/list.h>
40 #include <libubox/utils.h>
44 #include "cgroups-bpf.h"
46 #define CGROUP_ROOT "/sys/fs/cgroup/"
47 #define CGROUP_IO_WEIGHT_MAX 10000
54 struct avl_tree cgvals
;
55 static char *cgroup_path
;
56 static bool initialized
;
58 void cgroups_prepare(void) {
62 void cgroups_init(const char *p
) {
63 avl_init(&cgvals
, avl_strcmp
, false, NULL
);
64 cgroup_path
= strdup(p
);
68 static void cgroups_set(const char *key
, const char *val
)
72 valp
= avl_find_element(&cgvals
, key
, valp
, avl
);
74 valp
= malloc(sizeof(struct cgval
));
78 valp
->avl
.key
= strdup(key
);
79 avl_insert(&cgvals
, &valp
->avl
);
81 DEBUG("overwriting previous cgroup2 assignment %s=\"%s\"!\n", key
, valp
->val
);
85 valp
->val
= strdup(val
);
88 void cgroups_free(void)
90 struct cgval
*valp
, *tmp
;
93 avl_remove_all_elements(&cgvals
, valp
, avl
, tmp
) {
94 free((void *)(valp
->avl
.key
));
102 void cgroups_apply(pid_t pid
)
107 size_t maxlen
= strlen("cgroup.subtree_control");
117 char subtree_control
[64] = { 0 };
119 DEBUG("using cgroup path %s\n", cgroup_path
);
120 mkdir_p(cgroup_path
, 0700);
122 /* find which controllers need to be enabled */
123 avl_for_each_element(&cgvals
, valp
, avl
) {
124 ent
= (char *)valp
->avl
.key
;
125 if (strlen(ent
) > maxlen
)
126 maxlen
= strlen(ent
);
128 if (!strncmp("cpuset.", ent
, 7))
130 else if (!strncmp("cpu.", ent
, 4))
132 else if (!strncmp("hugetlb.", ent
, 8))
134 else if (!strncmp("io.", ent
, 3))
136 else if (!strncmp("memory.", ent
, 7))
138 else if (!strncmp("pids.", ent
, 5))
140 else if (!strncmp("rdma.", ent
, 5))
144 maxlen
+= strlen(cgroup_path
) + 2;
147 strcat(subtree_control
, "+cpuset ");
150 strcat(subtree_control
, "+cpu ");
153 strcat(subtree_control
, "+hugetlb ");
156 strcat(subtree_control
, "+io ");
159 strcat(subtree_control
, "+memory ");
162 strcat(subtree_control
, "+pids ");
165 strcat(subtree_control
, "+rdma ");
167 /* remove trailing space */
168 ent
= strchr(subtree_control
, '\0') - 1;
171 ent
= malloc(maxlen
);
175 DEBUG("recursively applying cgroup.subtree_control = \"%s\"\n", subtree_control
);
176 cdir
= &cgroup_path
[strlen(CGROUP_ROOT
) - 2];
177 while ((cdir
= strchr(cdir
+ 1, '/'))) {
179 snprintf(ent
, maxlen
, "%s/cgroup.subtree_control", cgroup_path
);
180 DEBUG(" * %s\n", ent
);
181 if ((fd
= open(ent
, O_WRONLY
)) == -1) {
182 ERROR("can't open %s: %m\n", ent
);
186 if (write(fd
, subtree_control
, strlen(subtree_control
)) == -1) {
187 ERROR("can't write to %s: %m\n", ent
);
195 avl_for_each_element(&cgvals
, valp
, avl
) {
196 DEBUG("applying cgroup2 %s=\"%s\"\n", (char *)valp
->avl
.key
, valp
->val
);
197 snprintf(ent
, maxlen
, "%s/%s", cgroup_path
, (char *)valp
->avl
.key
);
198 fd
= open(ent
, O_WRONLY
);
200 ERROR("can't open %s: %m\n", ent
);
203 if (dprintf(fd
, "%s", valp
->val
) < 0) {
204 ERROR("can't write to %s: %m\n", ent
);
209 int dirfd
= open(cgroup_path
, O_DIRECTORY
);
211 ERROR("can't open %s: %m\n", cgroup_path
);
213 attach_cgroups_ebpf(dirfd
);
217 snprintf(ent
, maxlen
, "%s/%s", cgroup_path
, "cgroup.procs");
218 fd
= open(ent
, O_WRONLY
);
220 ERROR("can't open %s: %m\n", cgroup_path
);
222 dprintf(fd
, "%d", pid
);
230 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR
,
231 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR
,
232 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT
,
233 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT
,
234 __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX
,
237 static const struct blobmsg_policy oci_linux_cgroups_blockio_weightdevice_policy
[] = {
238 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR
] = { "major", BLOBMSG_CAST_INT64
},
239 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR
] = { "minor", BLOBMSG_CAST_INT64
},
240 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT
] = { "weight", BLOBMSG_TYPE_INT32
},
241 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT
] = { "leafWeight", BLOBMSG_TYPE_INT32
},
245 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
,
246 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
,
247 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
,
248 __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX
,
251 static const struct blobmsg_policy oci_linux_cgroups_blockio_throttledevice_policy
[] = {
252 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
] = { "major", BLOBMSG_CAST_INT64
},
253 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
] = { "minor", BLOBMSG_CAST_INT64
},
254 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
] = { "rate", BLOBMSG_CAST_INT64
},
258 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT
,
259 OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT
,
260 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE
,
261 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE
,
262 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE
,
263 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE
,
264 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE
,
265 __OCI_LINUX_CGROUPS_BLOCKIO_MAX
,
268 static const struct blobmsg_policy oci_linux_cgroups_blockio_policy
[] = {
269 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT
] = { "weight", BLOBMSG_TYPE_INT32
},
270 [OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT
] = { "leafWeight", BLOBMSG_TYPE_INT32
},
271 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE
] = { "weightDevice", BLOBMSG_TYPE_ARRAY
},
272 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE
] = { "throttleReadBpsDevice", BLOBMSG_TYPE_ARRAY
},
273 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE
] = { "throttleWriteBpsDevice", BLOBMSG_TYPE_ARRAY
},
274 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE
] = { "throttleReadIOPSDevice", BLOBMSG_TYPE_ARRAY
},
275 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE
] = { "throttleWriteIOPSDevice", BLOBMSG_TYPE_ARRAY
},
285 struct posix_dev dev
;
292 static int avl_devcmp(const void *k1
, const void *k2
, void *ptr
)
294 struct posix_dev
*d1
= (struct posix_dev
*)k1
, *d2
= (struct posix_dev
*)k2
;
296 if (d1
->major
< d2
->major
)
299 if (d1
->major
> d2
->major
)
302 if (d1
->minor
< d2
->minor
)
305 if (d1
->minor
> d2
->minor
)
311 static struct iomax_line
*get_iomax_line(struct avl_tree
*iomax
, uint64_t major
, uint64_t minor
)
313 struct iomax_line
*l
;
317 l
= avl_find_element(iomax
, &d
, l
, avl
);
319 l
= malloc(sizeof(struct iomax_line
));
323 l
->dev
.major
= d
.major
;
324 l
->dev
.minor
= d
.minor
;
325 l
->avl
.key
= &l
->dev
;
330 avl_insert(iomax
, &l
->avl
);
336 static int parseOCIlinuxcgroups_legacy_blockio(struct blob_attr
*msg
)
338 struct blob_attr
*tb
[__OCI_LINUX_CGROUPS_BLOCKIO_MAX
],
339 *tbwd
[__OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX
],
340 *tbtd
[__OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX
],
343 int weight
= -1, leafweight
= -1;
344 size_t numweightstrs
= 0, numiomaxstrs
= 0, strtotlen
= 1;
345 char **weightstrs
= NULL
, **iomaxstrs
= NULL
, **curstr
;
346 char *weightstr
, *iomaxstr
;
347 struct avl_tree iomax
;
348 struct iomax_line
*curiomax
, *tmp
;
350 blobmsg_parse(oci_linux_cgroups_blockio_policy
, __OCI_LINUX_CGROUPS_BLOCKIO_MAX
, tb
, blobmsg_data(msg
), blobmsg_len(msg
));
352 if (tb
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT
]) {
353 weight
= blobmsg_get_u32(tb
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT
]);
357 if (weight
> CGROUP_IO_WEIGHT_MAX
)
360 if (tb
[OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT
])
361 leafweight
= blobmsg_get_u32(tb
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT
]);
363 if (leafweight
> CGROUP_IO_WEIGHT_MAX
)
366 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE
], rem
)
369 weightstrs
= calloc(numweightstrs
+ 1, sizeof(char *));
376 if (asprintf(&weightstrs
[numweightstrs
++], "default %d", weight
) < 0)
379 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE
], rem
) {
380 uint64_t major
, minor
;
381 int devweight
= weight
, devleafweight
= leafweight
;
383 blobmsg_parse(oci_linux_cgroups_blockio_weightdevice_policy
, __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX
, tbwd
, blobmsg_data(cur
), blobmsg_len(cur
));
384 if (!tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR
] ||
385 !tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR
])
388 if (!tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT
] &&
389 !tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT
])
392 if (tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT
])
393 devweight
= blobmsg_get_u32(tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT
]);
395 if (devweight
> CGROUP_IO_WEIGHT_MAX
)
398 if (tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT
])
399 devleafweight
= blobmsg_get_u32(tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT
]);
401 if (devleafweight
> CGROUP_IO_WEIGHT_MAX
)
404 if (tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT
])
407 major
= blobmsg_cast_u64(tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR
]);
408 minor
= blobmsg_cast_u64(tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR
]);
410 if (asprintf(&weightstrs
[numweightstrs
++], "%" PRIu64
":%" PRIu64
" %u", major
, minor
, devweight
) < 0)
417 strtotlen
+= strlen(*(curstr
++)) + 1;
419 weightstr
= calloc(strtotlen
, sizeof(char));
425 strcat(weightstr
, *curstr
);
426 strcat(weightstr
, "\n");
430 cgroups_set("io.bfq.weight", weightstr
);
436 avl_init(&iomax
, avl_devcmp
, false, NULL
);
438 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE
], rem
) {
439 struct iomax_line
*l
;
441 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy
, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX
, tbtd
, blobmsg_data(cur
), blobmsg_len(cur
));
443 if (!tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
] ||
444 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
] ||
445 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
])
448 l
= get_iomax_line(&iomax
,
449 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
]),
450 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
]));
452 l
->rbps
= blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
]);
455 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE
], rem
) {
456 struct iomax_line
*l
;
458 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy
, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX
, tbtd
, blobmsg_data(cur
), blobmsg_len(cur
));
460 if (!tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
] ||
461 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
] ||
462 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
])
465 l
= get_iomax_line(&iomax
,
466 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
]),
467 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
]));
469 l
->wbps
= blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
]);
472 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE
], rem
) {
473 struct iomax_line
*l
;
475 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy
, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX
, tbtd
, blobmsg_data(cur
), blobmsg_len(cur
));
477 if (!tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
] ||
478 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
] ||
479 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
])
482 l
= get_iomax_line(&iomax
,
483 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
]),
484 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
]));
486 l
->riops
= blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
]);
489 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE
], rem
) {
490 struct iomax_line
*l
;
492 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy
, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX
, tbtd
, blobmsg_data(cur
), blobmsg_len(cur
));
494 if (!tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
] ||
495 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
] ||
496 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
])
499 l
= get_iomax_line(&iomax
,
500 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
]),
501 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
]));
503 l
->wiops
= blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
]);
506 avl_for_each_element(&iomax
, curiomax
, avl
)
512 iomaxstrs
= calloc(numiomaxstrs
+ 1, sizeof(char *));
518 avl_for_each_element(&iomax
, curiomax
, avl
) {
522 sprintf(iomaxlstr
, "%" PRIu64
":%" PRIu64
" ", curiomax
->dev
.major
, curiomax
->dev
.minor
);
524 if (curiomax
->rbps
!= -1) {
525 sprintf(lstr
, "rbps=%" PRIu64
" ", curiomax
->rbps
);
526 strcat(iomaxlstr
, lstr
);
528 if (curiomax
->wbps
!= -1) {
529 sprintf(lstr
, "wbps=%" PRIu64
" ", curiomax
->wbps
);
530 strcat(iomaxlstr
, lstr
);
532 if (curiomax
->riops
!= -1) {
533 sprintf(lstr
, "riops=%" PRIu64
" ", curiomax
->riops
);
534 strcat(iomaxlstr
, lstr
);
536 if (curiomax
->wiops
!= -1) {
537 sprintf(lstr
, "wiops=%" PRIu64
" ", curiomax
->wiops
);
538 strcat(iomaxlstr
, lstr
);
541 iomaxstrs
[numiomaxstrs
++] = strdup(iomaxlstr
);
544 avl_for_each_element_safe(&iomax
, curiomax
, avl
, tmp
) {
545 avl_delete(&iomax
, &curiomax
->avl
);
549 strtotlen
= 1; /* 1 accounts for \0 at end of string */
553 strtotlen
+= strlen(*(curstr
++)) + 1; /* +1 accounts for \n at end of line */
555 iomaxstr
= calloc(strtotlen
, sizeof(char));
562 strcat(iomaxstr
, *curstr
);
563 strcat(iomaxstr
, "\n");
567 cgroups_set("io.max", iomaxstr
);
578 OCI_LINUX_CGROUPS_CPU_SHARES
,
579 OCI_LINUX_CGROUPS_CPU_PERIOD
,
580 OCI_LINUX_CGROUPS_CPU_QUOTA
,
581 OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME
,
582 OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD
,
583 OCI_LINUX_CGROUPS_CPU_CPUS
,
584 OCI_LINUX_CGROUPS_CPU_MEMS
,
585 __OCI_LINUX_CGROUPS_CPU_MAX
,
588 static const struct blobmsg_policy oci_linux_cgroups_cpu_policy
[] = {
589 [OCI_LINUX_CGROUPS_CPU_SHARES
] = { "shares", BLOBMSG_CAST_INT64
},
590 [OCI_LINUX_CGROUPS_CPU_PERIOD
] = { "period", BLOBMSG_CAST_INT64
},
591 [OCI_LINUX_CGROUPS_CPU_QUOTA
] = { "quota", BLOBMSG_CAST_INT64
}, /* signed int64! */
592 [OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD
] = { "realtimePeriod", BLOBMSG_CAST_INT64
},
593 [OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME
] = { "realtimeRuntime", BLOBMSG_CAST_INT64
},
594 [OCI_LINUX_CGROUPS_CPU_CPUS
] = { "cpus", BLOBMSG_TYPE_STRING
},
595 [OCI_LINUX_CGROUPS_CPU_MEMS
] = { "mems", BLOBMSG_TYPE_STRING
},
598 static int parseOCIlinuxcgroups_legacy_cpu(struct blob_attr
*msg
)
600 struct blob_attr
*tb
[__OCI_LINUX_CGROUPS_CPU_MAX
];
601 uint64_t shares
, period
= 0;
602 int64_t quota
= -2; /* unset */
603 char tmp
[32] = { 0 };
605 blobmsg_parse(oci_linux_cgroups_cpu_policy
, __OCI_LINUX_CGROUPS_CPU_MAX
, tb
, blobmsg_data(msg
), blobmsg_len(msg
));
607 if (tb
[OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD
] ||
608 tb
[OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME
])
609 return ENOTSUP
; /* no equivalent in cgroup2 */
611 if (tb
[OCI_LINUX_CGROUPS_CPU_SHARES
]) {
612 shares
= blobmsg_cast_u64(tb
[OCI_LINUX_CGROUPS_CPU_SHARES
]);
613 if ((shares
< 2) || (shares
> 262144))
616 snprintf(tmp
, sizeof(tmp
), "%" PRIu64
, (((uint64_t)1) + ((shares
- 2) * 9999) / 262142));
617 cgroups_set("cpu.weight", tmp
);
621 if (tb
[OCI_LINUX_CGROUPS_CPU_QUOTA
])
622 quota
= blobmsg_cast_s64(tb
[OCI_LINUX_CGROUPS_CPU_QUOTA
]);
624 if (tb
[OCI_LINUX_CGROUPS_CPU_PERIOD
])
625 period
= blobmsg_cast_u64(tb
[OCI_LINUX_CGROUPS_CPU_PERIOD
]);
629 snprintf(tmp
, sizeof(tmp
), "%" PRId64
" %" PRIu64
, quota
, period
);
631 snprintf(tmp
, sizeof(tmp
), "max %" PRIu64
, period
); /* assume default */
632 } else if (quota
>= 0) {
633 snprintf(tmp
, sizeof(tmp
), "%" PRId64
, quota
);
634 } else if (quota
== -1) {
639 cgroups_set("cpu.max", tmp
);
641 if (tb
[OCI_LINUX_CGROUPS_CPU_CPUS
])
642 cgroups_set("cpuset.cpus", blobmsg_get_string(tb
[OCI_LINUX_CGROUPS_CPU_CPUS
]));
644 if (tb
[OCI_LINUX_CGROUPS_CPU_MEMS
])
645 cgroups_set("cpuset.mems", blobmsg_get_string(tb
[OCI_LINUX_CGROUPS_CPU_MEMS
]));
652 OCI_LINUX_CGROUPS_MEMORY_LIMIT
,
653 OCI_LINUX_CGROUPS_MEMORY_RESERVATION
,
654 OCI_LINUX_CGROUPS_MEMORY_SWAP
,
655 OCI_LINUX_CGROUPS_MEMORY_KERNEL
,
656 OCI_LINUX_CGROUPS_MEMORY_KERNELTCP
,
657 OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS
,
658 OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER
,
659 OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY
,
660 __OCI_LINUX_CGROUPS_MEMORY_MAX
,
663 static const struct blobmsg_policy oci_linux_cgroups_memory_policy
[] = {
664 [OCI_LINUX_CGROUPS_MEMORY_LIMIT
] = { "limit", BLOBMSG_CAST_INT64
}, /* signed int64! */
665 [OCI_LINUX_CGROUPS_MEMORY_RESERVATION
] = { "reservation", BLOBMSG_CAST_INT64
}, /* signed int64! */
666 [OCI_LINUX_CGROUPS_MEMORY_SWAP
] = { "swap", BLOBMSG_CAST_INT64
}, /* signed int64! */
667 [OCI_LINUX_CGROUPS_MEMORY_KERNEL
] = { "kernel", BLOBMSG_CAST_INT64
}, /* signed int64! ignored */
668 [OCI_LINUX_CGROUPS_MEMORY_KERNELTCP
] = { "kernelTCP", BLOBMSG_CAST_INT64
}, /* signed int64! ignored */
669 [OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS
] = { "swappiness", BLOBMSG_CAST_INT64
},
670 [OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER
] = { "disableOOMKiller", BLOBMSG_TYPE_BOOL
},
671 [OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY
] { "useHierarchy", BLOBMSG_TYPE_BOOL
},
674 static int parseOCIlinuxcgroups_legacy_memory(struct blob_attr
*msg
)
676 struct blob_attr
*tb
[__OCI_LINUX_CGROUPS_MEMORY_MAX
];
677 char tmp
[32] = { 0 };
678 int64_t limit
= -1, swap
, reservation
;
680 blobmsg_parse(oci_linux_cgroups_memory_policy
, __OCI_LINUX_CGROUPS_MEMORY_MAX
, tb
, blobmsg_data(msg
), blobmsg_len(msg
));
683 * not all properties of the OCI memory section can be mapped to cgroup2
684 * kernel memory accounting is always enabled and included in the set
685 * memory limit, hence these options can be ignored
686 * disableOOMKiller could be emulated using oom_score_adj + seccomp eBPF
687 * preventing self-upgrade (but allow downgrade)
689 * see also https://github.com/opencontainers/runtime-spec/issues/1005
691 if (tb
[OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS
] ||
692 tb
[OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER
] ||
693 tb
[OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY
])
697 if (tb
[OCI_LINUX_CGROUPS_MEMORY_LIMIT
]) {
698 limit
= blobmsg_cast_s64(tb
[OCI_LINUX_CGROUPS_MEMORY_LIMIT
]);
702 snprintf(tmp
, sizeof(tmp
), "%" PRId64
, limit
);
704 cgroups_set("memory.max", tmp
);
707 if (tb
[OCI_LINUX_CGROUPS_MEMORY_RESERVATION
]) {
708 reservation
= blobmsg_cast_s64(tb
[OCI_LINUX_CGROUPS_MEMORY_RESERVATION
]);
710 if (reservation
== -1)
713 snprintf(tmp
, sizeof(tmp
), "%" PRId64
, reservation
);
715 cgroups_set("memory.low", tmp
);
718 /* OCI 'swap' acounts for memory+swap */
719 if (tb
[OCI_LINUX_CGROUPS_MEMORY_SWAP
]) {
720 swap
= blobmsg_cast_s64(tb
[OCI_LINUX_CGROUPS_MEMORY_SWAP
]);
724 else if (limit
== -1 || (limit
< swap
))
725 snprintf(tmp
, sizeof(tmp
), "%" PRId64
, swap
);
727 snprintf(tmp
, sizeof(tmp
), "%" PRId64
, limit
- swap
);
729 cgroups_set("memory.swap_max", tmp
);
737 OCI_LINUX_CGROUPS_PIDS_LIMIT
,
738 __OCI_LINUX_CGROUPS_PIDS_MAX
,
741 static const struct blobmsg_policy oci_linux_cgroups_pids_policy
[] = {
742 [OCI_LINUX_CGROUPS_PIDS_LIMIT
] = { "limit", BLOBMSG_CAST_INT64
},
745 static int parseOCIlinuxcgroups_legacy_pids(struct blob_attr
*msg
)
747 struct blob_attr
*tb
[__OCI_LINUX_CGROUPS_MEMORY_MAX
];
748 char tmp
[32] = { 0 };
750 blobmsg_parse(oci_linux_cgroups_pids_policy
, __OCI_LINUX_CGROUPS_PIDS_MAX
, tb
, blobmsg_data(msg
), blobmsg_len(msg
));
752 if (!tb
[OCI_LINUX_CGROUPS_PIDS_LIMIT
])
755 snprintf(tmp
, sizeof(tmp
), "%" PRIu64
, blobmsg_cast_u64(tb
[OCI_LINUX_CGROUPS_PIDS_LIMIT
]));
757 cgroups_set("pids.max", tmp
);
762 static int parseOCIlinuxcgroups_unified(struct blob_attr
*msg
)
764 struct blob_attr
*cur
;
767 blobmsg_for_each_attr(cur
, msg
, rem
) {
768 if (blobmsg_type(cur
) != BLOBMSG_TYPE_STRING
)
772 if (strchr(blobmsg_name(cur
), '/') ||
773 !strcmp(blobmsg_name(cur
), "cgroup.subtree_control") ||
774 !strcmp(blobmsg_name(cur
), "cgroup.procs") ||
775 !strcmp(blobmsg_name(cur
), "cgroup.threads") ||
776 !strcmp(blobmsg_name(cur
), "cgroup.freeze"))
779 cgroups_set(blobmsg_name(cur
), blobmsg_get_string(cur
));
786 OCI_LINUX_CGROUPS_BLOCKIO
,
787 OCI_LINUX_CGROUPS_CPU
,
788 OCI_LINUX_CGROUPS_DEVICES
,
789 OCI_LINUX_CGROUPS_HUGEPAGELIMITS
,
790 OCI_LINUX_CGROUPS_INTELRDT
,
791 OCI_LINUX_CGROUPS_MEMORY
,
792 OCI_LINUX_CGROUPS_NETWORK
,
793 OCI_LINUX_CGROUPS_PIDS
,
794 OCI_LINUX_CGROUPS_RDMA
,
795 OCI_LINUX_CGROUPS_UNIFIED
,
796 __OCI_LINUX_CGROUPS_MAX
,
799 static const struct blobmsg_policy oci_linux_cgroups_policy
[] = {
800 [OCI_LINUX_CGROUPS_BLOCKIO
] = { "blockIO", BLOBMSG_TYPE_TABLE
},
801 [OCI_LINUX_CGROUPS_CPU
] = { "cpu", BLOBMSG_TYPE_TABLE
},
802 [OCI_LINUX_CGROUPS_DEVICES
] = { "devices", BLOBMSG_TYPE_ARRAY
},
803 [OCI_LINUX_CGROUPS_HUGEPAGELIMITS
] = { "hugepageLimits", BLOBMSG_TYPE_ARRAY
},
804 [OCI_LINUX_CGROUPS_INTELRDT
] = { "intelRdt", BLOBMSG_TYPE_TABLE
},
805 [OCI_LINUX_CGROUPS_MEMORY
] = { "memory", BLOBMSG_TYPE_TABLE
},
806 [OCI_LINUX_CGROUPS_NETWORK
] = { "network", BLOBMSG_TYPE_TABLE
},
807 [OCI_LINUX_CGROUPS_PIDS
] = { "pids", BLOBMSG_TYPE_TABLE
},
808 [OCI_LINUX_CGROUPS_RDMA
] = { "rdma", BLOBMSG_TYPE_TABLE
},
809 [OCI_LINUX_CGROUPS_UNIFIED
] = { "unified", BLOBMSG_TYPE_TABLE
},
812 int parseOCIlinuxcgroups(struct blob_attr
*msg
)
814 struct blob_attr
*tb
[__OCI_LINUX_CGROUPS_MAX
];
817 blobmsg_parse(oci_linux_cgroups_policy
, __OCI_LINUX_CGROUPS_MAX
, tb
, blobmsg_data(msg
), blobmsg_len(msg
));
819 if (tb
[OCI_LINUX_CGROUPS_HUGEPAGELIMITS
] ||
820 tb
[OCI_LINUX_CGROUPS_INTELRDT
] ||
821 tb
[OCI_LINUX_CGROUPS_NETWORK
] ||
822 tb
[OCI_LINUX_CGROUPS_RDMA
])
825 if (tb
[OCI_LINUX_CGROUPS_BLOCKIO
]) {
826 ret
= parseOCIlinuxcgroups_legacy_blockio(tb
[OCI_LINUX_CGROUPS_BLOCKIO
]);
831 if (tb
[OCI_LINUX_CGROUPS_CPU
]) {
832 ret
= parseOCIlinuxcgroups_legacy_cpu(tb
[OCI_LINUX_CGROUPS_CPU
]);
837 if (tb
[OCI_LINUX_CGROUPS_DEVICES
]) {
838 ret
= parseOCIlinuxcgroups_devices(tb
[OCI_LINUX_CGROUPS_DEVICES
]);
843 if (tb
[OCI_LINUX_CGROUPS_MEMORY
]) {
844 ret
= parseOCIlinuxcgroups_legacy_memory(tb
[OCI_LINUX_CGROUPS_MEMORY
]);
849 if (tb
[OCI_LINUX_CGROUPS_PIDS
]) {
850 ret
= parseOCIlinuxcgroups_legacy_pids(tb
[OCI_LINUX_CGROUPS_PIDS
]);
855 if (tb
[OCI_LINUX_CGROUPS_UNIFIED
]) {
856 ret
= parseOCIlinuxcgroups_unified(tb
[OCI_LINUX_CGROUPS_UNIFIED
]);