86a11298809af82a0e1c0a00ddd2e1e5c7b359b8
[openwrt/openwrt.git] / target / linux / generic / backport-5.10 / 610-v5.13-20-netfilter-flowtable-add-vlan-support.patch
1 From: Pablo Neira Ayuso <pablo@netfilter.org>
2 Date: Wed, 24 Mar 2021 02:30:41 +0100
3 Subject: [PATCH] netfilter: flowtable: add vlan support
4
5 Add the vlan id and protocol to the flow tuple to uniquely identify
6 flows from the receive path. For the transmit path, dev_hard_header() on
7 the vlan device push the headers. This patch includes support for two
8 vlan headers (QinQ) from the ingress path.
9
10 Add a generic encap field to the flowtable entry which stores the
11 protocol and the tag id. This allows to reuse these fields in the PPPoE
12 support coming in a later patch.
13
14 Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
15 ---
16
17 --- a/include/net/netfilter/nf_flow_table.h
18 +++ b/include/net/netfilter/nf_flow_table.h
19 @@ -95,6 +95,8 @@ enum flow_offload_xmit_type {
20 FLOW_OFFLOAD_XMIT_DIRECT,
21 };
22
23 +#define NF_FLOW_TABLE_ENCAP_MAX 2
24 +
25 struct flow_offload_tuple {
26 union {
27 struct in_addr src_v4;
28 @@ -113,13 +115,17 @@ struct flow_offload_tuple {
29
30 u8 l3proto;
31 u8 l4proto;
32 + struct {
33 + u16 id;
34 + __be16 proto;
35 + } encap[NF_FLOW_TABLE_ENCAP_MAX];
36
37 /* All members above are keys for lookups, see flow_offload_hash(). */
38 struct { } __hash;
39
40 - u8 dir:6,
41 - xmit_type:2;
42 -
43 + u8 dir:4,
44 + xmit_type:2,
45 + encap_num:2;
46 u16 mtu;
47 union {
48 struct dst_entry *dst_cache;
49 @@ -173,6 +179,11 @@ struct nf_flow_route {
50 struct dst_entry *dst;
51 struct {
52 u32 ifindex;
53 + struct {
54 + u16 id;
55 + __be16 proto;
56 + } encap[NF_FLOW_TABLE_ENCAP_MAX];
57 + u8 num_encaps;
58 } in;
59 struct {
60 u32 ifindex;
61 --- a/net/netfilter/nf_flow_table_core.c
62 +++ b/net/netfilter/nf_flow_table_core.c
63 @@ -80,6 +80,7 @@ static int flow_offload_fill_route(struc
64 {
65 struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
66 struct dst_entry *dst = route->tuple[dir].dst;
67 + int i, j = 0;
68
69 switch (flow_tuple->l3proto) {
70 case NFPROTO_IPV4:
71 @@ -91,6 +92,12 @@ static int flow_offload_fill_route(struc
72 }
73
74 flow_tuple->iifidx = route->tuple[dir].in.ifindex;
75 + for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
76 + flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
77 + flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
78 + j++;
79 + }
80 + flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
81
82 switch (route->tuple[dir].xmit_type) {
83 case FLOW_OFFLOAD_XMIT_DIRECT:
84 --- a/net/netfilter/nf_flow_table_ip.c
85 +++ b/net/netfilter/nf_flow_table_ip.c
86 @@ -136,23 +136,44 @@ static bool ip_has_options(unsigned int
87 return thoff != sizeof(struct iphdr);
88 }
89
90 +static void nf_flow_tuple_encap(struct sk_buff *skb,
91 + struct flow_offload_tuple *tuple)
92 +{
93 + int i = 0;
94 +
95 + if (skb_vlan_tag_present(skb)) {
96 + tuple->encap[i].id = skb_vlan_tag_get(skb);
97 + tuple->encap[i].proto = skb->vlan_proto;
98 + i++;
99 + }
100 + if (skb->protocol == htons(ETH_P_8021Q)) {
101 + struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb_mac_header(skb);
102 +
103 + tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
104 + tuple->encap[i].proto = skb->protocol;
105 + }
106 +}
107 +
108 static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
109 - struct flow_offload_tuple *tuple, u32 *hdrsize)
110 + struct flow_offload_tuple *tuple, u32 *hdrsize,
111 + u32 offset)
112 {
113 struct flow_ports *ports;
114 unsigned int thoff;
115 struct iphdr *iph;
116
117 - if (!pskb_may_pull(skb, sizeof(*iph)))
118 + if (!pskb_may_pull(skb, sizeof(*iph) + offset))
119 return -1;
120
121 - iph = ip_hdr(skb);
122 - thoff = iph->ihl * 4;
123 + iph = (struct iphdr *)(skb_network_header(skb) + offset);
124 + thoff = (iph->ihl * 4);
125
126 if (ip_is_fragment(iph) ||
127 unlikely(ip_has_options(thoff)))
128 return -1;
129
130 + thoff += offset;
131 +
132 switch (iph->protocol) {
133 case IPPROTO_TCP:
134 *hdrsize = sizeof(struct tcphdr);
135 @@ -167,11 +188,10 @@ static int nf_flow_tuple_ip(struct sk_bu
136 if (iph->ttl <= 1)
137 return -1;
138
139 - thoff = iph->ihl * 4;
140 if (!pskb_may_pull(skb, thoff + *hdrsize))
141 return -1;
142
143 - iph = ip_hdr(skb);
144 + iph = (struct iphdr *)(skb_network_header(skb) + offset);
145 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
146
147 tuple->src_v4.s_addr = iph->saddr;
148 @@ -181,6 +201,7 @@ static int nf_flow_tuple_ip(struct sk_bu
149 tuple->l3proto = AF_INET;
150 tuple->l4proto = iph->protocol;
151 tuple->iifidx = dev->ifindex;
152 + nf_flow_tuple_encap(skb, tuple);
153
154 return 0;
155 }
156 @@ -207,6 +228,43 @@ static unsigned int nf_flow_xmit_xfrm(st
157 return NF_STOLEN;
158 }
159
160 +static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
161 + u32 *offset)
162 +{
163 + if (skb->protocol == htons(ETH_P_8021Q)) {
164 + struct vlan_ethhdr *veth;
165 +
166 + veth = (struct vlan_ethhdr *)skb_mac_header(skb);
167 + if (veth->h_vlan_encapsulated_proto == proto) {
168 + *offset += VLAN_HLEN;
169 + return true;
170 + }
171 + }
172 +
173 + return false;
174 +}
175 +
176 +static void nf_flow_encap_pop(struct sk_buff *skb,
177 + struct flow_offload_tuple_rhash *tuplehash)
178 +{
179 + struct vlan_hdr *vlan_hdr;
180 + int i;
181 +
182 + for (i = 0; i < tuplehash->tuple.encap_num; i++) {
183 + if (skb_vlan_tag_present(skb)) {
184 + __vlan_hwaccel_clear_tag(skb);
185 + continue;
186 + }
187 + if (skb->protocol == htons(ETH_P_8021Q)) {
188 + vlan_hdr = (struct vlan_hdr *)skb->data;
189 + __skb_pull(skb, VLAN_HLEN);
190 + vlan_set_encap_proto(skb, vlan_hdr);
191 + skb_reset_network_header(skb);
192 + break;
193 + }
194 + }
195 +}
196 +
197 static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
198 const struct flow_offload_tuple_rhash *tuplehash,
199 unsigned short type)
200 @@ -235,17 +293,18 @@ nf_flow_offload_ip_hook(void *priv, stru
201 enum flow_offload_tuple_dir dir;
202 struct flow_offload *flow;
203 struct net_device *outdev;
204 + u32 hdrsize, offset = 0;
205 + unsigned int thoff, mtu;
206 struct rtable *rt;
207 - unsigned int thoff;
208 struct iphdr *iph;
209 __be32 nexthop;
210 - u32 hdrsize;
211 int ret;
212
213 - if (skb->protocol != htons(ETH_P_IP))
214 + if (skb->protocol != htons(ETH_P_IP) &&
215 + !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
216 return NF_ACCEPT;
217
218 - if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize) < 0)
219 + if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
220 return NF_ACCEPT;
221
222 tuplehash = flow_offload_lookup(flow_table, &tuple);
223 @@ -255,11 +314,12 @@ nf_flow_offload_ip_hook(void *priv, stru
224 dir = tuplehash->tuple.dir;
225 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
226
227 - if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
228 + mtu = flow->tuplehash[dir].tuple.mtu + offset;
229 + if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
230 return NF_ACCEPT;
231
232 - iph = ip_hdr(skb);
233 - thoff = iph->ihl * 4;
234 + iph = (struct iphdr *)(skb_network_header(skb) + offset);
235 + thoff = (iph->ihl * 4) + offset;
236 if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
237 return NF_ACCEPT;
238
239 @@ -277,6 +337,9 @@ nf_flow_offload_ip_hook(void *priv, stru
240
241 flow_offload_refresh(flow_table, flow);
242
243 + nf_flow_encap_pop(skb, tuplehash);
244 + thoff -= offset;
245 +
246 iph = ip_hdr(skb);
247 nf_flow_nat_ip(flow, skb, thoff, dir, iph);
248
249 @@ -418,16 +481,18 @@ static void nf_flow_nat_ipv6(const struc
250 }
251
252 static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
253 - struct flow_offload_tuple *tuple, u32 *hdrsize)
254 + struct flow_offload_tuple *tuple, u32 *hdrsize,
255 + u32 offset)
256 {
257 struct flow_ports *ports;
258 struct ipv6hdr *ip6h;
259 unsigned int thoff;
260
261 - if (!pskb_may_pull(skb, sizeof(*ip6h)))
262 + thoff = sizeof(*ip6h) + offset;
263 + if (!pskb_may_pull(skb, thoff))
264 return -1;
265
266 - ip6h = ipv6_hdr(skb);
267 + ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
268
269 switch (ip6h->nexthdr) {
270 case IPPROTO_TCP:
271 @@ -443,11 +508,10 @@ static int nf_flow_tuple_ipv6(struct sk_
272 if (ip6h->hop_limit <= 1)
273 return -1;
274
275 - thoff = sizeof(*ip6h);
276 if (!pskb_may_pull(skb, thoff + *hdrsize))
277 return -1;
278
279 - ip6h = ipv6_hdr(skb);
280 + ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
281 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
282
283 tuple->src_v6 = ip6h->saddr;
284 @@ -457,6 +521,7 @@ static int nf_flow_tuple_ipv6(struct sk_
285 tuple->l3proto = AF_INET6;
286 tuple->l4proto = ip6h->nexthdr;
287 tuple->iifidx = dev->ifindex;
288 + nf_flow_tuple_encap(skb, tuple);
289
290 return 0;
291 }
292 @@ -472,15 +537,17 @@ nf_flow_offload_ipv6_hook(void *priv, st
293 const struct in6_addr *nexthop;
294 struct flow_offload *flow;
295 struct net_device *outdev;
296 + unsigned int thoff, mtu;
297 + u32 hdrsize, offset = 0;
298 struct ipv6hdr *ip6h;
299 struct rt6_info *rt;
300 - u32 hdrsize;
301 int ret;
302
303 - if (skb->protocol != htons(ETH_P_IPV6))
304 + if (skb->protocol != htons(ETH_P_IPV6) &&
305 + !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
306 return NF_ACCEPT;
307
308 - if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize) < 0)
309 + if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
310 return NF_ACCEPT;
311
312 tuplehash = flow_offload_lookup(flow_table, &tuple);
313 @@ -490,11 +557,13 @@ nf_flow_offload_ipv6_hook(void *priv, st
314 dir = tuplehash->tuple.dir;
315 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
316
317 - if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
318 + mtu = flow->tuplehash[dir].tuple.mtu + offset;
319 + if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
320 return NF_ACCEPT;
321
322 - if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
323 - sizeof(*ip6h)))
324 + ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
325 + thoff = sizeof(*ip6h) + offset;
326 + if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
327 return NF_ACCEPT;
328
329 if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
330 @@ -506,11 +575,13 @@ nf_flow_offload_ipv6_hook(void *priv, st
331 }
332 }
333
334 - if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize))
335 + if (skb_try_make_writable(skb, thoff + hdrsize))
336 return NF_DROP;
337
338 flow_offload_refresh(flow_table, flow);
339
340 + nf_flow_encap_pop(skb, tuplehash);
341 +
342 ip6h = ipv6_hdr(skb);
343 nf_flow_nat_ipv6(flow, skb, dir, ip6h);
344
345 --- a/net/netfilter/nft_flow_offload.c
346 +++ b/net/netfilter/nft_flow_offload.c
347 @@ -66,6 +66,11 @@ static int nft_dev_fill_forward_path(con
348 struct nft_forward_info {
349 const struct net_device *indev;
350 const struct net_device *outdev;
351 + struct id {
352 + __u16 id;
353 + __be16 proto;
354 + } encap[NF_FLOW_TABLE_ENCAP_MAX];
355 + u8 num_encaps;
356 u8 h_source[ETH_ALEN];
357 u8 h_dest[ETH_ALEN];
358 enum flow_offload_xmit_type xmit_type;
359 @@ -84,9 +89,23 @@ static void nft_dev_path_info(const stru
360 path = &stack->path[i];
361 switch (path->type) {
362 case DEV_PATH_ETHERNET:
363 + case DEV_PATH_VLAN:
364 info->indev = path->dev;
365 if (is_zero_ether_addr(info->h_source))
366 memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
367 +
368 + if (path->type == DEV_PATH_ETHERNET)
369 + break;
370 +
371 + /* DEV_PATH_VLAN */
372 + if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
373 + info->indev = NULL;
374 + break;
375 + }
376 + info->outdev = path->dev;
377 + info->encap[info->num_encaps].id = path->encap.id;
378 + info->encap[info->num_encaps].proto = path->encap.proto;
379 + info->num_encaps++;
380 break;
381 case DEV_PATH_BRIDGE:
382 if (is_zero_ether_addr(info->h_source))
383 @@ -94,7 +113,6 @@ static void nft_dev_path_info(const stru
384
385 info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
386 break;
387 - case DEV_PATH_VLAN:
388 default:
389 info->indev = NULL;
390 break;
391 @@ -130,6 +148,7 @@ static void nft_dev_forward_path(struct
392 struct net_device_path_stack stack;
393 struct nft_forward_info info = {};
394 unsigned char ha[ETH_ALEN];
395 + int i;
396
397 if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
398 nft_dev_path_info(&stack, &info, ha);
399 @@ -138,6 +157,11 @@ static void nft_dev_forward_path(struct
400 return;
401
402 route->tuple[!dir].in.ifindex = info.indev->ifindex;
403 + for (i = 0; i < info.num_encaps; i++) {
404 + route->tuple[!dir].in.encap[i].id = info.encap[i].id;
405 + route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
406 + }
407 + route->tuple[!dir].in.num_encaps = info.num_encaps;
408
409 if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
410 memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);