initd: fix off-by-one error in mkdev.c
[project/procd.git] / jail / seccomp-oci.c
1 /*
2 * parse and setup OCI seccomp filter
3 * Copyright (c) 2020 Daniel Golle <daniel@makrotopia.org>
4 * seccomp example with syscall reporting
5 * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org>
6 * Authors:
7 * Kees Cook <keescook@chromium.org>
8 * Will Drewry <wad@chromium.org>
9 *
10 * Use of this source code is governed by a BSD-style license that can be
11 * found in the LICENSE file.
12 *
13 * BPF control flow
14 *
15 * (check_arch)<t>---(check_syscall)<f>---+----[...]<f>---(return default_action)
16 * |<f> |<t> |
17 * KILL (check_argument)<f>--+
18 * |<t>
19 * [...]
20 * |<t>
21 * (return action)
22 */
23 #define _GNU_SOURCE 1
24 #include <assert.h>
25 #include <stddef.h>
26 #include <stdlib.h>
27 #include <unistd.h>
28
29 #include <libubox/utils.h>
30 #include <libubox/blobmsg.h>
31 #include <libubox/blobmsg_json.h>
32
33 #include "log.h"
34 #include "seccomp-bpf.h"
35 #include "seccomp-oci.h"
36 #include "../syscall-names.h"
37 #include "seccomp-syscalls-helpers.h"
38
39 static uint32_t resolve_action(char *actname)
40 {
41 if (!strcmp(actname, "SCMP_ACT_KILL"))
42 return SECCOMP_RET_KILL;
43 else if (!strcmp(actname, "SCMP_ACT_KILL_PROCESS"))
44 return SECCOMP_RET_KILLPROCESS;
45 else if (!strcmp(actname, "SCMP_ACT_TRAP"))
46 return SECCOMP_RET_TRAP;
47 else if (!strcmp(actname, "SCMP_ACT_ERRNO"))
48 return SECCOMP_RET_ERRNO;
49 else if (!strcmp(actname, "SCMP_ACT_ERROR"))
50 return SECCOMP_RET_ERRNO;
51 else if (!strcmp(actname, "SCMP_ACT_TRACE"))
52 return SECCOMP_RET_TRACE;
53 else if (!strcmp(actname, "SCMP_ACT_ALLOW"))
54 return SECCOMP_RET_ALLOW;
55 else if (!strcmp(actname, "SCMP_ACT_LOG"))
56 return SECCOMP_RET_LOGALLOW;
57 else {
58 ERROR("unknown seccomp action %s\n", actname);
59 return SECCOMP_RET_KILL;
60 }
61 }
62
63 static uint8_t resolve_op_ins(const char *op)
64 {
65 if (!strcmp(op, "SCMP_CMP_NE")) /* invert EQ */
66 return BPF_JEQ;
67 else if (!strcmp(op, "SCMP_CMP_LT")) /* invert GE */
68 return BPF_JGE;
69 else if (!strcmp(op, "SCMP_CMP_LE")) /* invert GT */
70 return BPF_JGT;
71 else if (!strcmp(op, "SCMP_CMP_EQ"))
72 return BPF_JEQ;
73 else if (!strcmp(op, "SCMP_CMP_GE"))
74 return BPF_JGE;
75 else if (!strcmp(op, "SCMP_CMP_GT"))
76 return BPF_JGT;
77 else if (!strcmp(op, "SCMP_CMP_MASKED_EQ"))
78 return BPF_JEQ;
79 else {
80 ERROR("unknown seccomp op %s\n", op);
81 return 0;
82 }
83 }
84
85 static bool resolve_op_is_masked(const char *op)
86 {
87 if (!strcmp(op, "SCMP_CMP_MASKED_EQ"))
88 return true;
89
90 return false;
91 }
92
93 static bool resolve_op_inv(const char *op)
94 {
95 if (!strcmp(op, "SCMP_CMP_NE") ||
96 !strcmp(op, "SCMP_CMP_LT") ||
97 !strcmp(op, "SCMP_CMP_LE"))
98 return true;
99
100 return false;
101 }
102
103 static uint32_t resolve_architecture(char *archname)
104 {
105 if (!archname)
106 return 0;
107
108 if (!strcmp(archname, "SCMP_ARCH_X86"))
109 return AUDIT_ARCH_I386;
110 else if (!strcmp(archname, "SCMP_ARCH_X86_64"))
111 return AUDIT_ARCH_X86_64;
112 else if (!strcmp(archname, "SCMP_ARCH_X32"))
113 /*
114 * return AUDIT_ARCH_X86_64;
115 * 32-bit userland on 64-bit kernel is not supported yet
116 */
117 return 0;
118 else if (!strcmp(archname, "SCMP_ARCH_ARM"))
119 return AUDIT_ARCH_ARM;
120 else if (!strcmp(archname, "SCMP_ARCH_AARCH64"))
121 return AUDIT_ARCH_AARCH64;
122 else if (!strcmp(archname, "SCMP_ARCH_MIPS"))
123 return AUDIT_ARCH_MIPS;
124 else if (!strcmp(archname, "SCMP_ARCH_MIPS64"))
125 return AUDIT_ARCH_MIPS64;
126 else if (!strcmp(archname, "SCMP_ARCH_MIPS64N32"))
127 return AUDIT_ARCH_MIPS64N32;
128 else if (!strcmp(archname, "SCMP_ARCH_MIPSEL"))
129 return AUDIT_ARCH_MIPSEL;
130 else if (!strcmp(archname, "SCMP_ARCH_MIPSEL64"))
131 return AUDIT_ARCH_MIPSEL64;
132 else if (!strcmp(archname, "SCMP_ARCH_MIPSEL64N32"))
133 return AUDIT_ARCH_MIPSEL64N32;
134 else if (!strcmp(archname, "SCMP_ARCH_PPC"))
135 return AUDIT_ARCH_PPC;
136 else if (!strcmp(archname, "SCMP_ARCH_PPC64"))
137 return AUDIT_ARCH_PPC64;
138 else if (!strcmp(archname, "SCMP_ARCH_PPC64LE"))
139 return AUDIT_ARCH_PPC64LE;
140 else if (!strcmp(archname, "SCMP_ARCH_S390"))
141 return AUDIT_ARCH_S390;
142 else if (!strcmp(archname, "SCMP_ARCH_S390X"))
143 return AUDIT_ARCH_S390X;
144 else if (!strcmp(archname, "SCMP_ARCH_PARISC"))
145 return AUDIT_ARCH_PARISC;
146 else if (!strcmp(archname, "SCMP_ARCH_PARISC64"))
147 return AUDIT_ARCH_PARISC64;
148 else {
149 ERROR("unknown seccomp architecture %s\n", archname);
150 return 0;
151 }
152 }
153
154 enum {
155 OCI_LINUX_SECCOMP_DEFAULTACTION,
156 OCI_LINUX_SECCOMP_ARCHITECTURES,
157 OCI_LINUX_SECCOMP_FLAGS,
158 OCI_LINUX_SECCOMP_SYSCALLS,
159 __OCI_LINUX_SECCOMP_MAX,
160 };
161
162 static const struct blobmsg_policy oci_linux_seccomp_policy[] = {
163 [OCI_LINUX_SECCOMP_DEFAULTACTION] = { "defaultAction", BLOBMSG_TYPE_STRING },
164 [OCI_LINUX_SECCOMP_ARCHITECTURES] = { "architectures", BLOBMSG_TYPE_ARRAY },
165 [OCI_LINUX_SECCOMP_FLAGS] = { "flags", BLOBMSG_TYPE_ARRAY },
166 [OCI_LINUX_SECCOMP_SYSCALLS] = { "syscalls", BLOBMSG_TYPE_ARRAY },
167 };
168
169 enum {
170 OCI_LINUX_SECCOMP_SYSCALLS_NAMES,
171 OCI_LINUX_SECCOMP_SYSCALLS_ACTION,
172 OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET,
173 OCI_LINUX_SECCOMP_SYSCALLS_ARGS,
174 __OCI_LINUX_SECCOMP_SYSCALLS_MAX
175 };
176
177 static const struct blobmsg_policy oci_linux_seccomp_syscalls_policy[] = {
178 [OCI_LINUX_SECCOMP_SYSCALLS_NAMES] = { "names", BLOBMSG_TYPE_ARRAY },
179 [OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET] = { "errnoRet", BLOBMSG_TYPE_INT32 },
180 [OCI_LINUX_SECCOMP_SYSCALLS_ARGS] = { "args", BLOBMSG_TYPE_ARRAY },
181 [OCI_LINUX_SECCOMP_SYSCALLS_ACTION] = { "action", BLOBMSG_TYPE_STRING },
182 };
183
184 enum {
185 OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX,
186 OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE,
187 OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO,
188 OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP,
189 __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX
190 };
191
192 static const struct blobmsg_policy oci_linux_seccomp_syscalls_args_policy[] = {
193 [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX] = { "index", BLOBMSG_TYPE_INT32 },
194 [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE] = { "value", BLOBMSG_CAST_INT64 },
195 [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO] = { "valueTwo", BLOBMSG_CAST_INT64 },
196 [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP] = { "op", BLOBMSG_TYPE_STRING },
197 };
198
199 struct sock_fprog *parseOCIlinuxseccomp(struct blob_attr *msg)
200 {
201 struct blob_attr *tb[__OCI_LINUX_SECCOMP_MAX];
202 struct blob_attr *tbn[__OCI_LINUX_SECCOMP_SYSCALLS_MAX];
203 struct blob_attr *tba[__OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX];
204 struct blob_attr *cur, *curn, *curarg;
205 int rem, remn, remargs, sc;
206 struct sock_filter *filter;
207 struct sock_fprog *prog;
208 int sz = 4, idx = 0;
209 uint32_t default_policy = 0;
210 uint32_t seccomp_arch;
211 bool arch_matched;
212 char *op_str;
213
214 blobmsg_parse(oci_linux_seccomp_policy, __OCI_LINUX_SECCOMP_MAX,
215 tb, blobmsg_data(msg), blobmsg_len(msg));
216
217 if (!tb[OCI_LINUX_SECCOMP_DEFAULTACTION]) {
218 ERROR("seccomp: no default action set\n");
219 return NULL;
220 }
221
222 default_policy = resolve_action(blobmsg_get_string(tb[OCI_LINUX_SECCOMP_DEFAULTACTION]));
223
224 /* verify architecture while ignoring the x86_64 anomaly for now */
225 if (tb[OCI_LINUX_SECCOMP_ARCHITECTURES]) {
226 arch_matched = false;
227 blobmsg_for_each_attr(cur, tb[OCI_LINUX_SECCOMP_ARCHITECTURES], rem) {
228 seccomp_arch = resolve_architecture(blobmsg_get_string(cur));
229 if (ARCH_NR == seccomp_arch) {
230 arch_matched = true;
231 break;
232 }
233 }
234 if (!arch_matched) {
235 ERROR("seccomp architecture doesn't match system\n");
236 return NULL;
237 }
238 }
239
240 blobmsg_for_each_attr(cur, tb[OCI_LINUX_SECCOMP_SYSCALLS], rem) {
241 sz += 2; /* load and return */
242
243 blobmsg_parse(oci_linux_seccomp_syscalls_policy,
244 __OCI_LINUX_SECCOMP_SYSCALLS_MAX,
245 tbn, blobmsg_data(cur), blobmsg_len(cur));
246 blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_NAMES], remn) {
247 sc = find_syscall(blobmsg_get_string(curn));
248 if (sc == -1) {
249 DEBUG("unknown syscall '%s'\n", blobmsg_get_string(curn));
250 /* TODO: support run.oci.seccomp_fail_unknown_syscall=1 annotation */
251 continue;
252 }
253 ++sz;
254 }
255
256 if (tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS]) {
257 blobmsg_for_each_attr(curarg, tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS], remargs) {
258 sz += 2; /* load and compare */
259
260 blobmsg_parse(oci_linux_seccomp_syscalls_args_policy,
261 __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX,
262 tba, blobmsg_data(curarg), blobmsg_len(curarg));
263 if (!tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX] ||
264 !tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE] ||
265 !tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP])
266 return NULL;
267
268 if (blobmsg_get_u32(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX]) > 5)
269 return NULL;
270
271 op_str = blobmsg_get_string(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP]);
272 if (!resolve_op_ins(op_str))
273 return NULL;
274
275 if (resolve_op_is_masked(op_str))
276 ++sz; /* SCMP_CMP_MASKED_EQ needs an extra BPF_AND op */
277 }
278 }
279 }
280
281 if (sz < 6)
282 return NULL;
283
284 prog = malloc(sizeof(struct sock_fprog));
285 if (!prog)
286 return NULL;
287
288 filter = calloc(sz, sizeof(struct sock_filter));
289 if (!filter) {
290 ERROR("failed to allocate memory for seccomp filter\n");
291 goto errout2;
292 }
293
294 /* validate arch */
295 set_filter(&filter[idx++], BPF_LD + BPF_W + BPF_ABS, 0, 0, arch_nr);
296 set_filter(&filter[idx++], BPF_JMP + BPF_JEQ + BPF_K, 1, 0, ARCH_NR);
297 set_filter(&filter[idx++], BPF_RET + BPF_K, 0, 0, SECCOMP_RET_KILL);
298
299 blobmsg_for_each_attr(cur, tb[OCI_LINUX_SECCOMP_SYSCALLS], rem) {
300 uint32_t action;
301 uint32_t op_idx;
302 uint8_t op_ins;
303 bool op_inv, op_masked;
304 uint64_t op_val, op_val2;
305 int start_rule_idx;
306 int next_rule_idx;
307
308 blobmsg_parse(oci_linux_seccomp_syscalls_policy,
309 __OCI_LINUX_SECCOMP_SYSCALLS_MAX,
310 tbn, blobmsg_data(cur), blobmsg_len(cur));
311 action = resolve_action(blobmsg_get_string(
312 tbn[OCI_LINUX_SECCOMP_SYSCALLS_ACTION]));
313 if (tbn[OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET]) {
314 if (action != SECCOMP_RET_ERRNO)
315 goto errout1;
316
317 action = SECCOMP_RET_ERROR(blobmsg_get_u32(
318 tbn[OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET]));
319 } else if (action == SECCOMP_RET_ERRNO)
320 action = SECCOMP_RET_ERROR(EPERM);
321
322 /* load syscall */
323 set_filter(&filter[idx++], BPF_LD + BPF_W + BPF_ABS, 0, 0, syscall_nr);
324
325 /* get number of syscall names */
326 next_rule_idx = idx;
327 blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_NAMES], remn) {
328 if (find_syscall(blobmsg_get_string(curn)) == -1)
329 continue;
330
331 ++next_rule_idx;
332 }
333 start_rule_idx = next_rule_idx;
334
335 /* calculate length of argument filter rules */
336 blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS], remn) {
337 blobmsg_parse(oci_linux_seccomp_syscalls_args_policy,
338 __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX,
339 tba, blobmsg_data(curn), blobmsg_len(curn));
340 next_rule_idx += 2;
341 op_str = blobmsg_get_string(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP]);
342 if (resolve_op_is_masked(op_str))
343 ++next_rule_idx;
344 }
345
346 ++next_rule_idx; /* account for return action */
347
348 blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_NAMES], remn) {
349 sc = find_syscall(blobmsg_get_string(curn));
350 if (sc == -1)
351 continue;
352 /*
353 * check syscall, skip other syscall checks if match is found.
354 * if no match is found, jump to next section
355 */
356 set_filter(&filter[idx], BPF_JMP + BPF_JEQ + BPF_K,
357 start_rule_idx - (idx + 1),
358 ((idx + 1) == start_rule_idx)?(next_rule_idx - (idx + 1)):0,
359 sc);
360 ++idx;
361 }
362
363 assert(idx = start_rule_idx);
364
365 /* generate argument filter rules */
366 blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS], remn) {
367 blobmsg_parse(oci_linux_seccomp_syscalls_args_policy,
368 __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX,
369 tba, blobmsg_data(curn), blobmsg_len(curn));
370
371 op_str = blobmsg_get_string(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP]);
372 op_ins = resolve_op_ins(op_str);
373 op_inv = resolve_op_inv(op_str);
374 op_masked = resolve_op_is_masked(op_str);
375 op_idx = blobmsg_get_u32(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX]);
376 op_val = blobmsg_cast_u64(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE]);
377 if (tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO])
378 op_val2 = blobmsg_cast_u64(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO]);
379 else
380 op_val2 = 0;
381
382 /* load argument */
383 set_filter(&filter[idx++], BPF_LD + BPF_W + BPF_ABS, 0, 0, syscall_arg(op_idx));
384
385 /* apply mask */
386 if (op_masked)
387 set_filter(&filter[idx++], BPF_ALU + BPF_K + BPF_AND, 0, 0, op_val);
388
389 set_filter(&filter[idx], BPF_JMP + op_ins + BPF_K,
390 op_inv?(next_rule_idx - (idx + 1)):0,
391 op_inv?0:(next_rule_idx - (idx + 1)),
392 op_masked?op_val2:op_val);
393 ++idx;
394 }
395
396 /* if we have reached until here, all conditions were met and we can return */
397 set_filter(&filter[idx++], BPF_RET + BPF_K, 0, 0, action);
398
399 assert(idx == next_rule_idx);
400 }
401
402 set_filter(&filter[idx++], BPF_RET + BPF_K, 0, 0, default_policy);
403
404 assert(idx == sz);
405
406 prog->len = (unsigned short) idx;
407 prog->filter = filter;
408
409 DEBUG("generated seccomp-bpf program:\n");
410 if (debug) {
411 fprintf(stderr, " [idx]\tcode\t jt\t jf\tk\n");
412 for (idx=0; idx<sz; idx++)
413 fprintf(stderr, " [%03d]\t%04hx\t%3hhu\t%3hhu\t%08x\n", idx,
414 filter[idx].code,
415 filter[idx].jt,
416 filter[idx].jf,
417 filter[idx].k);
418 }
419
420 return prog;
421
422 errout1:
423 free(prog->filter);
424 errout2:
425 free(prog);
426 return NULL;
427 }
428
429
430 int applyOCIlinuxseccomp(struct sock_fprog *prog)
431 {
432 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
433 ERROR("prctl(PR_SET_NO_NEW_PRIVS) failed: %m\n");
434 goto errout;
435 }
436
437 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, prog)) {
438 ERROR("prctl(PR_SET_SECCOMP) failed: %m\n");
439 goto errout;
440 }
441 free(prog);
442
443 return 0;
444
445 errout:
446 free(prog->filter);
447 free(prog);
448 return errno;
449 }