kernel: backport flow offload pppoe fix
[openwrt/openwrt.git] / target / linux / generic / hack-6.6 / 650-netfilter-add-xt_FLOWOFFLOAD-target.patch
1 From: Felix Fietkau <nbd@nbd.name>
2 Date: Tue, 20 Feb 2018 15:56:02 +0100
3 Subject: [PATCH] netfilter: add xt_FLOWOFFLOAD target
4
5 Signed-off-by: Felix Fietkau <nbd@nbd.name>
6 ---
7 create mode 100644 net/netfilter/xt_OFFLOAD.c
8
9 --- a/net/netfilter/Kconfig
10 +++ b/net/netfilter/Kconfig
11 @@ -729,7 +729,6 @@ config NF_FLOW_TABLE
12 tristate "Netfilter flow table module"
13 depends on NETFILTER_INGRESS
14 depends on NF_CONNTRACK
15 - depends on NF_TABLES
16 help
17 This option adds the flow table core infrastructure.
18
19 @@ -1025,6 +1024,15 @@ config NETFILTER_XT_TARGET_NOTRACK
20 depends on NETFILTER_ADVANCED
21 select NETFILTER_XT_TARGET_CT
22
23 +config NETFILTER_XT_TARGET_FLOWOFFLOAD
24 + tristate '"FLOWOFFLOAD" target support'
25 + depends on NF_FLOW_TABLE
26 + depends on NETFILTER_INGRESS
27 + help
28 + This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload
29 + module to speed up processing of packets by bypassing the usual
30 + netfilter chains
31 +
32 config NETFILTER_XT_TARGET_RATEEST
33 tristate '"RATEEST" target support'
34 depends on NETFILTER_ADVANCED
35 --- a/net/netfilter/Makefile
36 +++ b/net/netfilter/Makefile
37 @@ -163,6 +163,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIF
38 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
39 obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
40 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
41 +obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o
42 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
43 obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
44 obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
45 --- /dev/null
46 +++ b/net/netfilter/xt_FLOWOFFLOAD.c
47 @@ -0,0 +1,703 @@
48 +/*
49 + * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
50 + *
51 + * This program is free software; you can redistribute it and/or modify
52 + * it under the terms of the GNU General Public License version 2 as
53 + * published by the Free Software Foundation.
54 + */
55 +#include <linux/module.h>
56 +#include <linux/init.h>
57 +#include <linux/netfilter.h>
58 +#include <linux/netfilter/xt_FLOWOFFLOAD.h>
59 +#include <linux/if_vlan.h>
60 +#include <net/ip.h>
61 +#include <net/netfilter/nf_conntrack.h>
62 +#include <net/netfilter/nf_conntrack_extend.h>
63 +#include <net/netfilter/nf_conntrack_helper.h>
64 +#include <net/netfilter/nf_flow_table.h>
65 +
66 +struct xt_flowoffload_hook {
67 + struct hlist_node list;
68 + struct nf_hook_ops ops;
69 + struct net *net;
70 + bool registered;
71 + bool used;
72 +};
73 +
74 +struct xt_flowoffload_table {
75 + struct nf_flowtable ft;
76 + struct hlist_head hooks;
77 + struct delayed_work work;
78 +};
79 +
80 +struct nf_forward_info {
81 + const struct net_device *indev;
82 + const struct net_device *outdev;
83 + const struct net_device *hw_outdev;
84 + struct id {
85 + __u16 id;
86 + __be16 proto;
87 + } encap[NF_FLOW_TABLE_ENCAP_MAX];
88 + u8 num_encaps;
89 + u8 ingress_vlans;
90 + u8 h_source[ETH_ALEN];
91 + u8 h_dest[ETH_ALEN];
92 + enum flow_offload_xmit_type xmit_type;
93 +};
94 +
95 +static DEFINE_SPINLOCK(hooks_lock);
96 +
97 +struct xt_flowoffload_table flowtable[2];
98 +
99 +static unsigned int
100 +xt_flowoffload_net_hook(void *priv, struct sk_buff *skb,
101 + const struct nf_hook_state *state)
102 +{
103 + struct vlan_ethhdr *veth;
104 + __be16 proto;
105 +
106 + switch (skb->protocol) {
107 + case htons(ETH_P_8021Q):
108 + veth = (struct vlan_ethhdr *)skb_mac_header(skb);
109 + proto = veth->h_vlan_encapsulated_proto;
110 + break;
111 + case htons(ETH_P_PPP_SES):
112 + if (!nf_flow_pppoe_proto(skb, &proto))
113 + return NF_ACCEPT;
114 + break;
115 + default:
116 + proto = skb->protocol;
117 + break;
118 + }
119 +
120 + switch (proto) {
121 + case htons(ETH_P_IP):
122 + return nf_flow_offload_ip_hook(priv, skb, state);
123 + case htons(ETH_P_IPV6):
124 + return nf_flow_offload_ipv6_hook(priv, skb, state);
125 + }
126 +
127 + return NF_ACCEPT;
128 +}
129 +
130 +static int
131 +xt_flowoffload_create_hook(struct xt_flowoffload_table *table,
132 + struct net_device *dev)
133 +{
134 + struct xt_flowoffload_hook *hook;
135 + struct nf_hook_ops *ops;
136 +
137 + hook = kzalloc(sizeof(*hook), GFP_ATOMIC);
138 + if (!hook)
139 + return -ENOMEM;
140 +
141 + ops = &hook->ops;
142 + ops->pf = NFPROTO_NETDEV;
143 + ops->hooknum = NF_NETDEV_INGRESS;
144 + ops->priority = 10;
145 + ops->priv = &table->ft;
146 + ops->hook = xt_flowoffload_net_hook;
147 + ops->dev = dev;
148 +
149 + hlist_add_head(&hook->list, &table->hooks);
150 + mod_delayed_work(system_power_efficient_wq, &table->work, 0);
151 +
152 + return 0;
153 +}
154 +
155 +static struct xt_flowoffload_hook *
156 +flow_offload_lookup_hook(struct xt_flowoffload_table *table,
157 + struct net_device *dev)
158 +{
159 + struct xt_flowoffload_hook *hook;
160 +
161 + hlist_for_each_entry(hook, &table->hooks, list) {
162 + if (hook->ops.dev == dev)
163 + return hook;
164 + }
165 +
166 + return NULL;
167 +}
168 +
169 +static void
170 +xt_flowoffload_check_device(struct xt_flowoffload_table *table,
171 + struct net_device *dev)
172 +{
173 + struct xt_flowoffload_hook *hook;
174 +
175 + if (!dev)
176 + return;
177 +
178 + spin_lock_bh(&hooks_lock);
179 + hook = flow_offload_lookup_hook(table, dev);
180 + if (hook)
181 + hook->used = true;
182 + else
183 + xt_flowoffload_create_hook(table, dev);
184 + spin_unlock_bh(&hooks_lock);
185 +}
186 +
187 +static void
188 +xt_flowoffload_register_hooks(struct xt_flowoffload_table *table)
189 +{
190 + struct xt_flowoffload_hook *hook;
191 +
192 +restart:
193 + hlist_for_each_entry(hook, &table->hooks, list) {
194 + if (hook->registered)
195 + continue;
196 +
197 + hook->registered = true;
198 + hook->net = dev_net(hook->ops.dev);
199 + spin_unlock_bh(&hooks_lock);
200 + nf_register_net_hook(hook->net, &hook->ops);
201 + if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
202 + table->ft.type->setup(&table->ft, hook->ops.dev,
203 + FLOW_BLOCK_BIND);
204 + spin_lock_bh(&hooks_lock);
205 + goto restart;
206 + }
207 +
208 +}
209 +
210 +static bool
211 +xt_flowoffload_cleanup_hooks(struct xt_flowoffload_table *table)
212 +{
213 + struct xt_flowoffload_hook *hook;
214 + bool active = false;
215 +
216 +restart:
217 + spin_lock_bh(&hooks_lock);
218 + hlist_for_each_entry(hook, &table->hooks, list) {
219 + if (hook->used || !hook->registered) {
220 + active = true;
221 + continue;
222 + }
223 +
224 + hlist_del(&hook->list);
225 + spin_unlock_bh(&hooks_lock);
226 + if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
227 + table->ft.type->setup(&table->ft, hook->ops.dev,
228 + FLOW_BLOCK_UNBIND);
229 + nf_unregister_net_hook(hook->net, &hook->ops);
230 + kfree(hook);
231 + goto restart;
232 + }
233 + spin_unlock_bh(&hooks_lock);
234 +
235 + return active;
236 +}
237 +
238 +static void
239 +xt_flowoffload_check_hook(struct nf_flowtable *flowtable,
240 + struct flow_offload *flow, void *data)
241 +{
242 + struct xt_flowoffload_table *table;
243 + struct flow_offload_tuple *tuple0 = &flow->tuplehash[0].tuple;
244 + struct flow_offload_tuple *tuple1 = &flow->tuplehash[1].tuple;
245 + struct xt_flowoffload_hook *hook;
246 +
247 + table = container_of(flowtable, struct xt_flowoffload_table, ft);
248 +
249 + spin_lock_bh(&hooks_lock);
250 + hlist_for_each_entry(hook, &table->hooks, list) {
251 + if (hook->ops.dev->ifindex != tuple0->iifidx &&
252 + hook->ops.dev->ifindex != tuple1->iifidx)
253 + continue;
254 +
255 + hook->used = true;
256 + }
257 + spin_unlock_bh(&hooks_lock);
258 +}
259 +
260 +static void
261 +xt_flowoffload_hook_work(struct work_struct *work)
262 +{
263 + struct xt_flowoffload_table *table;
264 + struct xt_flowoffload_hook *hook;
265 + int err;
266 +
267 + table = container_of(work, struct xt_flowoffload_table, work.work);
268 +
269 + spin_lock_bh(&hooks_lock);
270 + xt_flowoffload_register_hooks(table);
271 + hlist_for_each_entry(hook, &table->hooks, list)
272 + hook->used = false;
273 + spin_unlock_bh(&hooks_lock);
274 +
275 + err = nf_flow_table_iterate(&table->ft, xt_flowoffload_check_hook,
276 + NULL);
277 + if (err && err != -EAGAIN)
278 + goto out;
279 +
280 + if (!xt_flowoffload_cleanup_hooks(table))
281 + return;
282 +
283 +out:
284 + queue_delayed_work(system_power_efficient_wq, &table->work, HZ);
285 +}
286 +
287 +static bool
288 +xt_flowoffload_skip(struct sk_buff *skb, int family)
289 +{
290 + if (skb_sec_path(skb))
291 + return true;
292 +
293 + if (family == NFPROTO_IPV4) {
294 + const struct ip_options *opt = &(IPCB(skb)->opt);
295 +
296 + if (unlikely(opt->optlen))
297 + return true;
298 + }
299 +
300 + return false;
301 +}
302 +
303 +static enum flow_offload_xmit_type nf_xmit_type(struct dst_entry *dst)
304 +{
305 + if (dst_xfrm(dst))
306 + return FLOW_OFFLOAD_XMIT_XFRM;
307 +
308 + return FLOW_OFFLOAD_XMIT_NEIGH;
309 +}
310 +
311 +static void nf_default_forward_path(struct nf_flow_route *route,
312 + struct dst_entry *dst_cache,
313 + enum ip_conntrack_dir dir,
314 + struct net_device **dev)
315 +{
316 + dev[!dir] = dst_cache->dev;
317 + route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex;
318 + route->tuple[dir].dst = dst_cache;
319 + route->tuple[dir].xmit_type = nf_xmit_type(dst_cache);
320 +}
321 +
322 +static bool nf_is_valid_ether_device(const struct net_device *dev)
323 +{
324 + if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
325 + dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
326 + return false;
327 +
328 + return true;
329 +}
330 +
331 +static void nf_dev_path_info(const struct net_device_path_stack *stack,
332 + struct nf_forward_info *info,
333 + unsigned char *ha)
334 +{
335 + const struct net_device_path *path;
336 + int i;
337 +
338 + memcpy(info->h_dest, ha, ETH_ALEN);
339 +
340 + for (i = 0; i < stack->num_paths; i++) {
341 + path = &stack->path[i];
342 + switch (path->type) {
343 + case DEV_PATH_ETHERNET:
344 + case DEV_PATH_DSA:
345 + case DEV_PATH_VLAN:
346 + case DEV_PATH_PPPOE:
347 + info->indev = path->dev;
348 + if (is_zero_ether_addr(info->h_source))
349 + memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
350 +
351 + if (path->type == DEV_PATH_ETHERNET)
352 + break;
353 + if (path->type == DEV_PATH_DSA) {
354 + i = stack->num_paths;
355 + break;
356 + }
357 +
358 + /* DEV_PATH_VLAN and DEV_PATH_PPPOE */
359 + if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
360 + info->indev = NULL;
361 + break;
362 + }
363 + if (!info->outdev)
364 + info->outdev = path->dev;
365 + info->encap[info->num_encaps].id = path->encap.id;
366 + info->encap[info->num_encaps].proto = path->encap.proto;
367 + info->num_encaps++;
368 + if (path->type == DEV_PATH_PPPOE)
369 + memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
370 + break;
371 + case DEV_PATH_BRIDGE:
372 + if (is_zero_ether_addr(info->h_source))
373 + memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
374 +
375 + switch (path->bridge.vlan_mode) {
376 + case DEV_PATH_BR_VLAN_UNTAG_HW:
377 + info->ingress_vlans |= BIT(info->num_encaps - 1);
378 + break;
379 + case DEV_PATH_BR_VLAN_TAG:
380 + info->encap[info->num_encaps].id = path->bridge.vlan_id;
381 + info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
382 + info->num_encaps++;
383 + break;
384 + case DEV_PATH_BR_VLAN_UNTAG:
385 + info->num_encaps--;
386 + break;
387 + case DEV_PATH_BR_VLAN_KEEP:
388 + break;
389 + }
390 + break;
391 + default:
392 + info->indev = NULL;
393 + break;
394 + }
395 + }
396 + if (!info->outdev)
397 + info->outdev = info->indev;
398 +
399 + info->hw_outdev = info->indev;
400 +
401 + if (nf_is_valid_ether_device(info->indev))
402 + info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
403 +}
404 +
405 +static int nf_dev_fill_forward_path(const struct nf_flow_route *route,
406 + const struct dst_entry *dst_cache,
407 + const struct nf_conn *ct,
408 + enum ip_conntrack_dir dir, u8 *ha,
409 + struct net_device_path_stack *stack)
410 +{
411 + const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
412 + struct net_device *dev = dst_cache->dev;
413 + struct neighbour *n;
414 + u8 nud_state;
415 +
416 + if (!nf_is_valid_ether_device(dev))
417 + goto out;
418 +
419 + n = dst_neigh_lookup(dst_cache, daddr);
420 + if (!n)
421 + return -1;
422 +
423 + read_lock_bh(&n->lock);
424 + nud_state = n->nud_state;
425 + ether_addr_copy(ha, n->ha);
426 + read_unlock_bh(&n->lock);
427 + neigh_release(n);
428 +
429 + if (!(nud_state & NUD_VALID))
430 + return -1;
431 +
432 +out:
433 + return dev_fill_forward_path(dev, ha, stack);
434 +}
435 +
436 +static void nf_dev_forward_path(struct nf_flow_route *route,
437 + const struct nf_conn *ct,
438 + enum ip_conntrack_dir dir,
439 + struct net_device **devs)
440 +{
441 + const struct dst_entry *dst = route->tuple[dir].dst;
442 + struct net_device_path_stack stack;
443 + struct nf_forward_info info = {};
444 + unsigned char ha[ETH_ALEN];
445 + int i;
446 +
447 + if (nf_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
448 + nf_dev_path_info(&stack, &info, ha);
449 +
450 + devs[!dir] = (struct net_device *)info.indev;
451 + if (!info.indev)
452 + return;
453 +
454 + route->tuple[!dir].in.ifindex = info.indev->ifindex;
455 + for (i = 0; i < info.num_encaps; i++) {
456 + route->tuple[!dir].in.encap[i].id = info.encap[i].id;
457 + route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
458 + }
459 + route->tuple[!dir].in.num_encaps = info.num_encaps;
460 + route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
461 +
462 + if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
463 + memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
464 + memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
465 + route->tuple[dir].out.ifindex = info.outdev->ifindex;
466 + route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
467 + route->tuple[dir].xmit_type = info.xmit_type;
468 + }
469 +}
470 +
471 +static int
472 +xt_flowoffload_route(struct sk_buff *skb, const struct nf_conn *ct,
473 + const struct xt_action_param *par,
474 + struct nf_flow_route *route, enum ip_conntrack_dir dir,
475 + struct net_device **devs)
476 +{
477 + struct dst_entry *this_dst = skb_dst(skb);
478 + struct dst_entry *other_dst = NULL;
479 + struct flowi fl;
480 +
481 + memset(&fl, 0, sizeof(fl));
482 + switch (xt_family(par)) {
483 + case NFPROTO_IPV4:
484 + fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
485 + fl.u.ip4.flowi4_oif = xt_in(par)->ifindex;
486 + break;
487 + case NFPROTO_IPV6:
488 + fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
489 + fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
490 + fl.u.ip6.flowi6_oif = xt_in(par)->ifindex;
491 + break;
492 + }
493 +
494 + if (!dst_hold_safe(this_dst))
495 + return -ENOENT;
496 +
497 + nf_route(xt_net(par), &other_dst, &fl, false, xt_family(par));
498 + if (!other_dst) {
499 + dst_release(this_dst);
500 + return -ENOENT;
501 + }
502 +
503 + nf_default_forward_path(route, this_dst, dir, devs);
504 + nf_default_forward_path(route, other_dst, !dir, devs);
505 +
506 + if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
507 + route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
508 + nf_dev_forward_path(route, ct, dir, devs);
509 + nf_dev_forward_path(route, ct, !dir, devs);
510 + }
511 +
512 + return 0;
513 +}
514 +
515 +static unsigned int
516 +flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par)
517 +{
518 + struct xt_flowoffload_table *table;
519 + const struct xt_flowoffload_target_info *info = par->targinfo;
520 + struct tcphdr _tcph, *tcph = NULL;
521 + enum ip_conntrack_info ctinfo;
522 + enum ip_conntrack_dir dir;
523 + struct nf_flow_route route = {};
524 + struct flow_offload *flow = NULL;
525 + struct net_device *devs[2] = {};
526 + struct nf_conn *ct;
527 + struct net *net;
528 +
529 + if (xt_flowoffload_skip(skb, xt_family(par)))
530 + return XT_CONTINUE;
531 +
532 + ct = nf_ct_get(skb, &ctinfo);
533 + if (ct == NULL)
534 + return XT_CONTINUE;
535 +
536 + switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
537 + case IPPROTO_TCP:
538 + if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
539 + return XT_CONTINUE;
540 +
541 + tcph = skb_header_pointer(skb, par->thoff,
542 + sizeof(_tcph), &_tcph);
543 + if (unlikely(!tcph || tcph->fin || tcph->rst))
544 + return XT_CONTINUE;
545 + break;
546 + case IPPROTO_UDP:
547 + break;
548 + default:
549 + return XT_CONTINUE;
550 + }
551 +
552 + if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
553 + ct->status & (IPS_SEQ_ADJUST | IPS_NAT_CLASH))
554 + return XT_CONTINUE;
555 +
556 + if (!nf_ct_is_confirmed(ct))
557 + return XT_CONTINUE;
558 +
559 + dir = CTINFO2DIR(ctinfo);
560 +
561 + devs[dir] = xt_out(par);
562 + devs[!dir] = xt_in(par);
563 +
564 + if (!devs[dir] || !devs[!dir])
565 + return XT_CONTINUE;
566 +
567 + if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
568 + return XT_CONTINUE;
569 +
570 + if (xt_flowoffload_route(skb, ct, par, &route, dir, devs) < 0)
571 + goto err_flow_route;
572 +
573 + flow = flow_offload_alloc(ct);
574 + if (!flow)
575 + goto err_flow_alloc;
576 +
577 + flow_offload_route_init(flow, &route);
578 +
579 + if (tcph) {
580 + ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
581 + ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
582 + }
583 +
584 + table = &flowtable[!!(info->flags & XT_FLOWOFFLOAD_HW)];
585 +
586 + net = read_pnet(&table->ft.net);
587 + if (!net)
588 + write_pnet(&table->ft.net, xt_net(par));
589 +
590 + __set_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags);
591 + if (flow_offload_add(&table->ft, flow) < 0)
592 + goto err_flow_add;
593 +
594 + xt_flowoffload_check_device(table, devs[0]);
595 + xt_flowoffload_check_device(table, devs[1]);
596 +
597 + return XT_CONTINUE;
598 +
599 +err_flow_add:
600 + flow_offload_free(flow);
601 +err_flow_alloc:
602 + dst_release(route.tuple[dir].dst);
603 + dst_release(route.tuple[!dir].dst);
604 +err_flow_route:
605 + clear_bit(IPS_OFFLOAD_BIT, &ct->status);
606 +
607 + return XT_CONTINUE;
608 +}
609 +
610 +static int flowoffload_chk(const struct xt_tgchk_param *par)
611 +{
612 + struct xt_flowoffload_target_info *info = par->targinfo;
613 +
614 + if (info->flags & ~XT_FLOWOFFLOAD_MASK)
615 + return -EINVAL;
616 +
617 + return 0;
618 +}
619 +
620 +static struct xt_target offload_tg_reg __read_mostly = {
621 + .family = NFPROTO_UNSPEC,
622 + .name = "FLOWOFFLOAD",
623 + .revision = 0,
624 + .targetsize = sizeof(struct xt_flowoffload_target_info),
625 + .usersize = sizeof(struct xt_flowoffload_target_info),
626 + .checkentry = flowoffload_chk,
627 + .target = flowoffload_tg,
628 + .me = THIS_MODULE,
629 +};
630 +
631 +static int flow_offload_netdev_event(struct notifier_block *this,
632 + unsigned long event, void *ptr)
633 +{
634 + struct xt_flowoffload_hook *hook0, *hook1;
635 + struct net_device *dev = netdev_notifier_info_to_dev(ptr);
636 +
637 + if (event != NETDEV_UNREGISTER)
638 + return NOTIFY_DONE;
639 +
640 + spin_lock_bh(&hooks_lock);
641 + hook0 = flow_offload_lookup_hook(&flowtable[0], dev);
642 + if (hook0)
643 + hlist_del(&hook0->list);
644 +
645 + hook1 = flow_offload_lookup_hook(&flowtable[1], dev);
646 + if (hook1)
647 + hlist_del(&hook1->list);
648 + spin_unlock_bh(&hooks_lock);
649 +
650 + if (hook0) {
651 + nf_unregister_net_hook(hook0->net, &hook0->ops);
652 + kfree(hook0);
653 + }
654 +
655 + if (hook1) {
656 + nf_unregister_net_hook(hook1->net, &hook1->ops);
657 + kfree(hook1);
658 + }
659 +
660 + nf_flow_table_cleanup(dev);
661 +
662 + return NOTIFY_DONE;
663 +}
664 +
665 +static struct notifier_block flow_offload_netdev_notifier = {
666 + .notifier_call = flow_offload_netdev_event,
667 +};
668 +
669 +static int nf_flow_rule_route_inet(struct net *net,
670 + struct flow_offload *flow,
671 + enum flow_offload_tuple_dir dir,
672 + struct nf_flow_rule *flow_rule)
673 +{
674 + const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
675 + int err;
676 +
677 + switch (flow_tuple->l3proto) {
678 + case NFPROTO_IPV4:
679 + err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule);
680 + break;
681 + case NFPROTO_IPV6:
682 + err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule);
683 + break;
684 + default:
685 + err = -1;
686 + break;
687 + }
688 +
689 + return err;
690 +}
691 +
692 +static struct nf_flowtable_type flowtable_inet = {
693 + .family = NFPROTO_INET,
694 + .init = nf_flow_table_init,
695 + .setup = nf_flow_table_offload_setup,
696 + .action = nf_flow_rule_route_inet,
697 + .free = nf_flow_table_free,
698 + .hook = xt_flowoffload_net_hook,
699 + .owner = THIS_MODULE,
700 +};
701 +
702 +static int init_flowtable(struct xt_flowoffload_table *tbl)
703 +{
704 + INIT_DELAYED_WORK(&tbl->work, xt_flowoffload_hook_work);
705 + tbl->ft.type = &flowtable_inet;
706 + tbl->ft.flags = NF_FLOWTABLE_COUNTER;
707 +
708 + return nf_flow_table_init(&tbl->ft);
709 +}
710 +
711 +static int __init xt_flowoffload_tg_init(void)
712 +{
713 + int ret;
714 +
715 + register_netdevice_notifier(&flow_offload_netdev_notifier);
716 +
717 + ret = init_flowtable(&flowtable[0]);
718 + if (ret)
719 + return ret;
720 +
721 + ret = init_flowtable(&flowtable[1]);
722 + if (ret)
723 + goto cleanup;
724 +
725 + flowtable[1].ft.flags |= NF_FLOWTABLE_HW_OFFLOAD;
726 +
727 + ret = xt_register_target(&offload_tg_reg);
728 + if (ret)
729 + goto cleanup2;
730 +
731 + return 0;
732 +
733 +cleanup2:
734 + nf_flow_table_free(&flowtable[1].ft);
735 +cleanup:
736 + nf_flow_table_free(&flowtable[0].ft);
737 + return ret;
738 +}
739 +
740 +static void __exit xt_flowoffload_tg_exit(void)
741 +{
742 + xt_unregister_target(&offload_tg_reg);
743 + unregister_netdevice_notifier(&flow_offload_netdev_notifier);
744 + nf_flow_table_free(&flowtable[0].ft);
745 + nf_flow_table_free(&flowtable[1].ft);
746 +}
747 +
748 +MODULE_LICENSE("GPL");
749 +module_init(xt_flowoffload_tg_init);
750 +module_exit(xt_flowoffload_tg_exit);
751 --- a/net/netfilter/nf_flow_table_core.c
752 +++ b/net/netfilter/nf_flow_table_core.c
753 @@ -7,7 +7,6 @@
754 #include <linux/netdevice.h>
755 #include <net/ip.h>
756 #include <net/ip6_route.h>
757 -#include <net/netfilter/nf_tables.h>
758 #include <net/netfilter/nf_flow_table.h>
759 #include <net/netfilter/nf_conntrack.h>
760 #include <net/netfilter/nf_conntrack_core.h>
761 @@ -377,8 +376,7 @@ flow_offload_lookup(struct nf_flowtable
762 }
763 EXPORT_SYMBOL_GPL(flow_offload_lookup);
764
765 -static int
766 -nf_flow_table_iterate(struct nf_flowtable *flow_table,
767 +int nf_flow_table_iterate(struct nf_flowtable *flow_table,
768 void (*iter)(struct nf_flowtable *flowtable,
769 struct flow_offload *flow, void *data),
770 void *data)
771 @@ -439,6 +437,7 @@ static void nf_flow_offload_gc_step(stru
772 nf_flow_offload_stats(flow_table, flow);
773 }
774 }
775 +EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
776
777 void nf_flow_table_gc_run(struct nf_flowtable *flow_table)
778 {
779 --- /dev/null
780 +++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
781 @@ -0,0 +1,17 @@
782 +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
783 +#ifndef _XT_FLOWOFFLOAD_H
784 +#define _XT_FLOWOFFLOAD_H
785 +
786 +#include <linux/types.h>
787 +
788 +enum {
789 + XT_FLOWOFFLOAD_HW = 1 << 0,
790 +
791 + XT_FLOWOFFLOAD_MASK = XT_FLOWOFFLOAD_HW
792 +};
793 +
794 +struct xt_flowoffload_target_info {
795 + __u32 flags;
796 +};
797 +
798 +#endif /* _XT_FLOWOFFLOAD_H */
799 --- a/include/net/netfilter/nf_flow_table.h
800 +++ b/include/net/netfilter/nf_flow_table.h
801 @@ -293,6 +293,11 @@ void nf_flow_table_free(struct nf_flowta
802
803 void flow_offload_teardown(struct flow_offload *flow);
804
805 +int nf_flow_table_iterate(struct nf_flowtable *flow_table,
806 + void (*iter)(struct nf_flowtable *flowtable,
807 + struct flow_offload *flow, void *data),
808 + void *data);
809 +
810 void nf_flow_snat_port(const struct flow_offload *flow,
811 struct sk_buff *skb, unsigned int thoff,
812 u8 protocol, enum flow_offload_tuple_dir dir);