Revert "ubus: remove dnsmasq subscriber"
[project/qosify.git] / qosify-bpf.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * Copyright (C) 2021 Felix Fietkau <nbd@nbd.name>
4 */
5 #define KBUILD_MODNAME "foo"
6 #include <uapi/linux/bpf.h>
7 #include <uapi/linux/if_ether.h>
8 #include <uapi/linux/if_packet.h>
9 #include <uapi/linux/ip.h>
10 #include <uapi/linux/ipv6.h>
11 #include <uapi/linux/in.h>
12 #include <uapi/linux/tcp.h>
13 #include <uapi/linux/udp.h>
14 #include <uapi/linux/filter.h>
15 #include <uapi/linux/pkt_cls.h>
16 #include <linux/ip.h>
17 #include <net/ipv6.h>
18 #include <bpf/bpf_helpers.h>
19 #include <bpf/bpf_endian.h>
20 #include "qosify-bpf.h"
21
22 #define INET_ECN_MASK 3
23
24 #define FLOW_CHECK_INTERVAL ((u32)((1000000000ULL) >> 24))
25 #define FLOW_TIMEOUT ((u32)((30ULL * 1000000000ULL) >> 24))
26 #define FLOW_BULK_TIMEOUT 5
27
28 #define EWMA_SHIFT 12
29
30 const volatile static uint32_t module_flags = 0;
31
32 struct flow_bucket {
33 __u32 last_update;
34 __u32 pkt_len_avg;
35 __u32 pkt_count;
36 __u32 bulk_timeout;
37 };
38
39 struct {
40 __uint(type, BPF_MAP_TYPE_ARRAY);
41 __uint(pinning, 1);
42 __type(key, __u32);
43 __type(value, struct qosify_config);
44 __uint(max_entries, 1);
45 } config SEC(".maps");
46
47 struct {
48 __uint(type, BPF_MAP_TYPE_ARRAY);
49 __uint(pinning, 1);
50 __type(key, __u32);
51 __type(value, __u8);
52 __uint(max_entries, 1 << 16);
53 } tcp_ports SEC(".maps");
54
55 struct {
56 __uint(type, BPF_MAP_TYPE_ARRAY);
57 __uint(pinning, 1);
58 __type(key, __u32);
59 __type(value, __u8);
60 __uint(max_entries, 1 << 16);
61 } udp_ports SEC(".maps");
62
63 struct {
64 __uint(type, BPF_MAP_TYPE_LRU_HASH);
65 __uint(pinning, 1);
66 __type(key, __u32);
67 __type(value, struct flow_bucket);
68 __uint(max_entries, QOSIFY_FLOW_BUCKETS);
69 } flow_map SEC(".maps");
70
71 struct {
72 __uint(type, BPF_MAP_TYPE_HASH);
73 __uint(pinning, 1);
74 __uint(key_size, sizeof(struct in_addr));
75 __type(value, struct qosify_ip_map_val);
76 __uint(max_entries, 100000);
77 __uint(map_flags, BPF_F_NO_PREALLOC);
78 } ipv4_map SEC(".maps");
79
80 struct {
81 __uint(type, BPF_MAP_TYPE_HASH);
82 __uint(pinning, 1);
83 __uint(key_size, sizeof(struct in6_addr));
84 __type(value, struct qosify_ip_map_val);
85 __uint(max_entries, 100000);
86 __uint(map_flags, BPF_F_NO_PREALLOC);
87 } ipv6_map SEC(".maps");
88
89 struct {
90 __uint(type, BPF_MAP_TYPE_ARRAY);
91 __uint(pinning, 1);
92 __type(key, __u32);
93 __type(value, struct qosify_class);
94 __uint(max_entries, QOSIFY_MAX_CLASS_ENTRIES +
95 QOSIFY_DEFAULT_CLASS_ENTRIES);
96 } class_map SEC(".maps");
97
98 static struct qosify_config *get_config(void)
99 {
100 __u32 key = 0;
101
102 return bpf_map_lookup_elem(&config, &key);
103 }
104
105 static __always_inline int proto_is_vlan(__u16 h_proto)
106 {
107 return !!(h_proto == bpf_htons(ETH_P_8021Q) ||
108 h_proto == bpf_htons(ETH_P_8021AD));
109 }
110
111 static __always_inline int proto_is_ip(__u16 h_proto)
112 {
113 return !!(h_proto == bpf_htons(ETH_P_IP) ||
114 h_proto == bpf_htons(ETH_P_IPV6));
115 }
116
117 static __always_inline void *skb_ptr(struct __sk_buff *skb, __u32 offset)
118 {
119 void *start = (void *)(unsigned long long)skb->data;
120
121 return start + offset;
122 }
123
124 static __always_inline void *skb_end_ptr(struct __sk_buff *skb)
125 {
126 return (void *)(unsigned long long)skb->data_end;
127 }
128
129 static __always_inline int skb_check(struct __sk_buff *skb, void *ptr)
130 {
131 if (ptr > skb_end_ptr(skb))
132 return -1;
133
134 return 0;
135 }
136
137 static __always_inline __u32 cur_time(void)
138 {
139 __u32 val = bpf_ktime_get_ns() >> 24;
140
141 if (!val)
142 val = 1;
143
144 return val;
145 }
146
147 static __always_inline __u32 ewma(__u32 *avg, __u32 val)
148 {
149 if (*avg)
150 *avg = (*avg * 3) / 4 + (val << EWMA_SHIFT) / 4;
151 else
152 *avg = val << EWMA_SHIFT;
153
154 return *avg >> EWMA_SHIFT;
155 }
156
157 static __always_inline __u8 dscp_val(struct qosify_dscp_val *val, bool ingress)
158 {
159 __u8 ival = val->ingress;
160 __u8 eval = val->egress;
161
162 return ingress ? ival : eval;
163 }
164
165 static __always_inline void
166 ipv4_change_dsfield(struct __sk_buff *skb, __u32 offset,
167 __u8 mask, __u8 value, bool force)
168 {
169 struct iphdr *iph;
170 __u32 check;
171 __u8 dsfield;
172
173 iph = skb_ptr(skb, offset);
174 if (skb_check(skb, iph + 1))
175 return;
176
177 check = bpf_ntohs(iph->check);
178 if ((iph->tos & mask) && !force)
179 return;
180
181 dsfield = (iph->tos & mask) | value;
182 if (iph->tos == dsfield)
183 return;
184
185 check += iph->tos;
186 if ((check + 1) >> 16)
187 check = (check + 1) & 0xffff;
188 check -= dsfield;
189 check += check >> 16;
190 iph->check = bpf_htons(check);
191 iph->tos = dsfield;
192 }
193
194 static __always_inline void
195 ipv6_change_dsfield(struct __sk_buff *skb, __u32 offset,
196 __u8 mask, __u8 value, bool force)
197 {
198 struct ipv6hdr *ipv6h;
199 __u16 *p;
200 __u16 val;
201
202 ipv6h = skb_ptr(skb, offset);
203 if (skb_check(skb, ipv6h + 1))
204 return;
205
206 p = (__u16 *)ipv6h;
207 if (((*p >> 4) & mask) && !force)
208 return;
209
210 val = (*p & bpf_htons((((__u16)mask << 4) | 0xf00f))) | bpf_htons((__u16)value << 4);
211 if (val == *p)
212 return;
213
214 *p = val;
215 }
216
217 static __always_inline int
218 parse_ethernet(struct __sk_buff *skb, __u32 *offset)
219 {
220 struct ethhdr *eth;
221 __u16 h_proto;
222 int i;
223
224 eth = skb_ptr(skb, *offset);
225 if (skb_check(skb, eth + 1))
226 return -1;
227
228 h_proto = eth->h_proto;
229 *offset += sizeof(*eth);
230
231 #pragma unroll
232 for (i = 0; i < 2; i++) {
233 struct vlan_hdr *vlh = skb_ptr(skb, *offset);
234
235 if (!proto_is_vlan(h_proto))
236 break;
237
238 if (skb_check(skb, vlh + 1))
239 return -1;
240
241 h_proto = vlh->h_vlan_encapsulated_proto;
242 *offset += sizeof(*vlh);
243 }
244
245 return h_proto;
246 }
247
248 static void
249 parse_l4proto(struct qosify_config *config, struct __sk_buff *skb,
250 __u32 offset, __u8 proto, bool ingress,
251 __u8 *out_val)
252 {
253 struct udphdr *udp;
254 __u32 src, dest, key;
255 __u8 *value;
256
257 udp = skb_ptr(skb, offset);
258 if (skb_check(skb, &udp->len))
259 return;
260
261 if (config && (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6)) {
262 *out_val = config->dscp_icmp;
263 return;
264 }
265
266 src = READ_ONCE(udp->source);
267 dest = READ_ONCE(udp->dest);
268 if (ingress)
269 key = src;
270 else
271 key = dest;
272
273 if (proto == IPPROTO_TCP) {
274 value = bpf_map_lookup_elem(&tcp_ports, &key);
275 } else {
276 if (proto != IPPROTO_UDP)
277 key = 0;
278
279 value = bpf_map_lookup_elem(&udp_ports, &key);
280 }
281
282 if (value)
283 *out_val = *value;
284 }
285
286 static __always_inline void
287 check_flow_bulk(struct qosify_flow_config *config, struct __sk_buff *skb,
288 struct flow_bucket *flow, __u8 *out_val)
289 {
290 bool trigger = false;
291 __s32 delta;
292 __u32 time;
293 int segs = 1;
294
295 if (!config->bulk_trigger_pps)
296 return;
297
298 time = cur_time();
299 if (!flow->last_update)
300 goto reset;
301
302 delta = time - flow->last_update;
303 if ((u32)delta > FLOW_TIMEOUT)
304 goto reset;
305
306 if (skb->gso_segs)
307 segs = skb->gso_segs;
308 flow->pkt_count += segs;
309 if (flow->pkt_count > config->bulk_trigger_pps) {
310 flow->bulk_timeout = config->bulk_trigger_timeout + 1;
311 trigger = true;
312 }
313
314 if (delta >= FLOW_CHECK_INTERVAL) {
315 if (flow->bulk_timeout && !trigger)
316 flow->bulk_timeout--;
317
318 goto clear;
319 }
320
321 goto out;
322
323 reset:
324 flow->pkt_len_avg = 0;
325 clear:
326 flow->pkt_count = 1;
327 flow->last_update = time;
328 out:
329 if (flow->bulk_timeout)
330 *out_val = config->dscp_bulk;
331 }
332
333 static __always_inline void
334 check_flow_prio(struct qosify_flow_config *config, struct __sk_buff *skb,
335 struct flow_bucket *flow, __u8 *out_val)
336 {
337 int cur_len = skb->len;
338
339 if (flow->bulk_timeout)
340 return;
341
342 if (!config->prio_max_avg_pkt_len)
343 return;
344
345 if (skb->gso_segs > 1)
346 cur_len /= skb->gso_segs;
347
348 if (ewma(&flow->pkt_len_avg, cur_len) <= config->prio_max_avg_pkt_len)
349 *out_val = config->dscp_prio;
350 }
351
352 static __always_inline void
353 check_flow(struct qosify_flow_config *config, struct __sk_buff *skb,
354 __u8 *out_val)
355 {
356 struct flow_bucket flow_data;
357 struct flow_bucket *flow;
358 __u32 hash;
359
360 if (!config)
361 return;
362
363 hash = bpf_get_hash_recalc(skb);
364 flow = bpf_map_lookup_elem(&flow_map, &hash);
365 if (!flow) {
366 memset(&flow_data, 0, sizeof(flow_data));
367 bpf_map_update_elem(&flow_map, &hash, &flow_data, BPF_ANY);
368 flow = bpf_map_lookup_elem(&flow_map, &hash);
369 if (!flow)
370 return;
371 }
372
373 check_flow_bulk(config, skb, flow, out_val);
374 check_flow_prio(config, skb, flow, out_val);
375 }
376
377 static __always_inline struct qosify_ip_map_val *
378 parse_ipv4(struct qosify_config *config, struct __sk_buff *skb, __u32 *offset,
379 bool ingress, __u8 *out_val)
380 {
381 struct iphdr *iph;
382 __u8 ipproto;
383 int hdr_len;
384 void *key;
385
386 iph = skb_ptr(skb, *offset);
387 if (skb_check(skb, iph + 1))
388 return NULL;
389
390 hdr_len = iph->ihl * 4;
391 if (bpf_skb_pull_data(skb, *offset + hdr_len + sizeof(struct udphdr)))
392 return NULL;
393
394 iph = skb_ptr(skb, *offset);
395 *offset += hdr_len;
396
397 if (skb_check(skb, (void *)(iph + 1)))
398 return NULL;
399
400 ipproto = iph->protocol;
401 parse_l4proto(config, skb, *offset, ipproto, ingress, out_val);
402
403 if (ingress)
404 key = &iph->saddr;
405 else
406 key = &iph->daddr;
407
408 return bpf_map_lookup_elem(&ipv4_map, key);
409 }
410
411 static __always_inline struct qosify_ip_map_val *
412 parse_ipv6(struct qosify_config *config, struct __sk_buff *skb, __u32 *offset,
413 bool ingress, __u8 *out_val)
414 {
415 struct ipv6hdr *iph;
416 __u8 ipproto;
417 void *key;
418
419 if (bpf_skb_pull_data(skb, *offset + sizeof(*iph) + sizeof(struct udphdr)))
420 return NULL;
421
422 iph = skb_ptr(skb, *offset);
423 *offset += sizeof(*iph);
424
425 if (skb_check(skb, (void *)(iph + 1)))
426 return NULL;
427
428 ipproto = iph->nexthdr;
429 if (ingress)
430 key = &iph->saddr;
431 else
432 key = &iph->daddr;
433
434 parse_l4proto(config, skb, *offset, ipproto, ingress, out_val);
435
436 return bpf_map_lookup_elem(&ipv6_map, key);
437 }
438
439 static __always_inline int
440 dscp_lookup_class(uint8_t *dscp, bool ingress, struct qosify_class **out_class)
441 {
442 struct qosify_class *class;
443 __u8 fallback_flag;
444 __u32 key;
445
446 if (!(*dscp & QOSIFY_DSCP_CLASS_FLAG))
447 return 0;
448
449 fallback_flag = *dscp & QOSIFY_DSCP_FALLBACK_FLAG;
450 key = *dscp & QOSIFY_DSCP_VALUE_MASK;
451 class = bpf_map_lookup_elem(&class_map, &key);
452 if (!class)
453 return -1;
454
455 if (!(class->flags & QOSIFY_CLASS_FLAG_PRESENT))
456 return -1;
457
458 *dscp = dscp_val(&class->val, ingress);
459 *dscp |= fallback_flag;
460 *out_class = class;
461
462 return 0;
463 }
464
465 SEC("classifier")
466 int classify(struct __sk_buff *skb)
467 {
468 bool ingress = module_flags & QOSIFY_INGRESS;
469 struct qosify_config *config;
470 struct qosify_class *class = NULL;
471 struct qosify_ip_map_val *ip_val;
472 __u32 offset = 0;
473 __u32 iph_offset;
474 void *iph;
475 __u8 dscp;
476 bool force;
477 int type;
478
479 config = get_config();
480 if (!config)
481 return TC_ACT_UNSPEC;
482
483 if (module_flags & QOSIFY_IP_ONLY)
484 type = skb->protocol;
485 else
486 type = parse_ethernet(skb, &offset);
487
488 iph_offset = offset;
489 if (type == bpf_htons(ETH_P_IP))
490 ip_val = parse_ipv4(config, skb, &offset, ingress, &dscp);
491 else if (type == bpf_htons(ETH_P_IPV6))
492 ip_val = parse_ipv6(config, skb, &offset, ingress, &dscp);
493 else
494 return TC_ACT_UNSPEC;
495
496 if (ip_val) {
497 if (!ip_val->seen)
498 ip_val->seen = 1;
499 dscp = ip_val->dscp;
500 }
501
502 if (dscp_lookup_class(&dscp, ingress, &class))
503 return TC_ACT_UNSPEC;
504
505 if (class) {
506 check_flow(&class->config, skb, &dscp);
507
508 if (dscp_lookup_class(&dscp, ingress, &class))
509 return TC_ACT_UNSPEC;
510 }
511
512 dscp &= GENMASK(5, 0);
513 dscp <<= 2;
514 force = !(dscp & QOSIFY_DSCP_FALLBACK_FLAG);
515
516 if (type == bpf_htons(ETH_P_IP))
517 ipv4_change_dsfield(skb, iph_offset, INET_ECN_MASK, dscp, force);
518 else if (type == bpf_htons(ETH_P_IPV6))
519 ipv6_change_dsfield(skb, iph_offset, INET_ECN_MASK, dscp, force);
520
521 return TC_ACT_UNSPEC;
522 }
523
524 char _license[] SEC("license") = "GPL";