target/linux/generic/backport-5.10/610-v5.13-20-netfilter-flowtable-add-vlan-support.patch

   1 From: Pablo Neira Ayuso <pablo@netfilter.org>
   2 Date: Wed, 24 Mar 2021 02:30:41 +0100
   3 Subject: [PATCH] netfilter: flowtable: add vlan support
   4
   5 Add the vlan id and protocol to the flow tuple to uniquely identify
   6 flows from the receive path. For the transmit path, dev_hard_header() on
   7 the vlan device push the headers. This patch includes support for two
   8 vlan headers (QinQ) from the ingress path.
   9
  10 Add a generic encap field to the flowtable entry which stores the
  11 protocol and the tag id. This allows to reuse these fields in the PPPoE
  12 support coming in a later patch.
  13
  14 Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
  15 ---
  16
  17 --- a/include/net/netfilter/nf_flow_table.h
  18 +++ b/include/net/netfilter/nf_flow_table.h
  19 @@ -95,6 +95,8 @@ enum flow_offload_xmit_type {
  20         FLOW_OFFLOAD_XMIT_DIRECT,
  21  };
  22
  23 +#define NF_FLOW_TABLE_ENCAP_MAX                2
  24 +
  25  struct flow_offload_tuple {
  26         union {
  27                 struct in_addr          src_v4;
  28 @@ -113,13 +115,17 @@ struct flow_offload_tuple {
  29
  30         u8                              l3proto;
  31         u8                              l4proto;
  32 +       struct {
  33 +               u16                     id;
  34 +               __be16                  proto;
  35 +       } encap[NF_FLOW_TABLE_ENCAP_MAX];
  36
  37         /* All members above are keys for lookups, see flow_offload_hash(). */
  38         struct { }                      __hash;
  39
  40 -       u8                              dir:6,
  41 -                                       xmit_type:2;
  42 -
  43 +       u8                              dir:4,
  44 +                                       xmit_type:2,
  45 +                                       encap_num:2;
  46         u16                             mtu;
  47         union {
  48                 struct dst_entry        *dst_cache;
  49 @@ -173,6 +179,11 @@ struct nf_flow_route {
  50                 struct dst_entry                *dst;
  51                 struct {
  52                         u32                     ifindex;
  53 +                       struct {
  54 +                               u16             id;
  55 +                               __be16          proto;
  56 +                       } encap[NF_FLOW_TABLE_ENCAP_MAX];
  57 +                       u8                      num_encaps;
  58                 } in;
  59                 struct {
  60                         u32                     ifindex;
  61 --- a/net/netfilter/nf_flow_table_core.c
  62 +++ b/net/netfilter/nf_flow_table_core.c
  63 @@ -80,6 +80,7 @@ static int flow_offload_fill_route(struc
  64  {
  65         struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
  66         struct dst_entry *dst = route->tuple[dir].dst;
  67 +       int i, j = 0;
  68
  69         switch (flow_tuple->l3proto) {
  70         case NFPROTO_IPV4:
  71 @@ -91,6 +92,12 @@ static int flow_offload_fill_route(struc
  72         }
  73
  74         flow_tuple->iifidx = route->tuple[dir].in.ifindex;
  75 +       for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
  76 +               flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
  77 +               flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
  78 +               j++;
  79 +       }
  80 +       flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
  81
  82         switch (route->tuple[dir].xmit_type) {
  83         case FLOW_OFFLOAD_XMIT_DIRECT:
  84 --- a/net/netfilter/nf_flow_table_ip.c
  85 +++ b/net/netfilter/nf_flow_table_ip.c
  86 @@ -136,23 +136,44 @@ static bool ip_has_options(unsigned int
  87         return thoff != sizeof(struct iphdr);
  88  }
  89
  90 +static void nf_flow_tuple_encap(struct sk_buff *skb,
  91 +                               struct flow_offload_tuple *tuple)
  92 +{
  93 +       int i = 0;
  94 +
  95 +       if (skb_vlan_tag_present(skb)) {
  96 +               tuple->encap[i].id = skb_vlan_tag_get(skb);
  97 +               tuple->encap[i].proto = skb->vlan_proto;
  98 +               i++;
  99 +       }
 100 +       if (skb->protocol == htons(ETH_P_8021Q)) {
 101 +               struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb_mac_header(skb);
 102 +
 103 +               tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
 104 +               tuple->encap[i].proto = skb->protocol;
 105 +       }
 106 +}
 107 +
 108  static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
 109 -                           struct flow_offload_tuple *tuple, u32 *hdrsize)
 110 +                           struct flow_offload_tuple *tuple, u32 *hdrsize,
 111 +                           u32 offset)
 112  {
 113         struct flow_ports *ports;
 114         unsigned int thoff;
 115         struct iphdr *iph;
 116
 117 -       if (!pskb_may_pull(skb, sizeof(*iph)))
 118 +       if (!pskb_may_pull(skb, sizeof(*iph) + offset))
 119                 return -1;
 120
 121 -       iph = ip_hdr(skb);
 122 -       thoff = iph->ihl * 4;
 123 +       iph = (struct iphdr *)(skb_network_header(skb) + offset);
 124 +       thoff = (iph->ihl * 4);
 125
 126         if (ip_is_fragment(iph) ||
 127             unlikely(ip_has_options(thoff)))
 128                 return -1;
 129
 130 +       thoff += offset;
 131 +
 132         switch (iph->protocol) {
 133         case IPPROTO_TCP:
 134                 *hdrsize = sizeof(struct tcphdr);
 135 @@ -167,11 +188,10 @@ static int nf_flow_tuple_ip(struct sk_bu
 136         if (iph->ttl <= 1)
 137                 return -1;
 138
 139 -       thoff = iph->ihl * 4;
 140         if (!pskb_may_pull(skb, thoff + *hdrsize))
 141                 return -1;
 142
 143 -       iph = ip_hdr(skb);
 144 +       iph = (struct iphdr *)(skb_network_header(skb) + offset);
 145         ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
 146
 147         tuple->src_v4.s_addr    = iph->saddr;
 148 @@ -181,6 +201,7 @@ static int nf_flow_tuple_ip(struct sk_bu
 149         tuple->l3proto          = AF_INET;
 150         tuple->l4proto          = iph->protocol;
 151         tuple->iifidx           = dev->ifindex;
 152 +       nf_flow_tuple_encap(skb, tuple);
 153
 154         return 0;
 155  }
 156 @@ -207,6 +228,43 @@ static unsigned int nf_flow_xmit_xfrm(st
 157         return NF_STOLEN;
 158  }
 159
 160 +static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
 161 +                                      u32 *offset)
 162 +{
 163 +       if (skb->protocol == htons(ETH_P_8021Q)) {
 164 +               struct vlan_ethhdr *veth;
 165 +
 166 +               veth = (struct vlan_ethhdr *)skb_mac_header(skb);
 167 +               if (veth->h_vlan_encapsulated_proto == proto) {
 168 +                       *offset += VLAN_HLEN;
 169 +                       return true;
 170 +               }
 171 +       }
 172 +
 173 +       return false;
 174 +}
 175 +
 176 +static void nf_flow_encap_pop(struct sk_buff *skb,
 177 +                             struct flow_offload_tuple_rhash *tuplehash)
 178 +{
 179 +       struct vlan_hdr *vlan_hdr;
 180 +       int i;
 181 +
 182 +       for (i = 0; i < tuplehash->tuple.encap_num; i++) {
 183 +               if (skb_vlan_tag_present(skb)) {
 184 +                       __vlan_hwaccel_clear_tag(skb);
 185 +                       continue;
 186 +               }
 187 +               if (skb->protocol == htons(ETH_P_8021Q)) {
 188 +                       vlan_hdr = (struct vlan_hdr *)skb->data;
 189 +                       __skb_pull(skb, VLAN_HLEN);
 190 +                       vlan_set_encap_proto(skb, vlan_hdr);
 191 +                       skb_reset_network_header(skb);
 192 +                       break;
 193 +               }
 194 +       }
 195 +}
 196 +
 197  static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
 198                                        const struct flow_offload_tuple_rhash *tuplehash,
 199                                        unsigned short type)
 200 @@ -235,17 +293,18 @@ nf_flow_offload_ip_hook(void *priv, stru
 201         enum flow_offload_tuple_dir dir;
 202         struct flow_offload *flow;
 203         struct net_device *outdev;
 204 +       u32 hdrsize, offset = 0;
 205 +       unsigned int thoff, mtu;
 206         struct rtable *rt;
 207 -       unsigned int thoff;
 208         struct iphdr *iph;
 209         __be32 nexthop;
 210 -       u32 hdrsize;
 211         int ret;
 212
 213 -       if (skb->protocol != htons(ETH_P_IP))
 214 +       if (skb->protocol != htons(ETH_P_IP) &&
 215 +           !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
 216                 return NF_ACCEPT;
 217
 218 -       if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize) < 0)
 219 +       if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
 220                 return NF_ACCEPT;
 221
 222         tuplehash = flow_offload_lookup(flow_table, &tuple);
 223 @@ -255,11 +314,12 @@ nf_flow_offload_ip_hook(void *priv, stru
 224         dir = tuplehash->tuple.dir;
 225         flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
 226
 227 -       if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
 228 +       mtu = flow->tuplehash[dir].tuple.mtu + offset;
 229 +       if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
 230                 return NF_ACCEPT;
 231
 232 -       iph = ip_hdr(skb);
 233 -       thoff = iph->ihl * 4;
 234 +       iph = (struct iphdr *)(skb_network_header(skb) + offset);
 235 +       thoff = (iph->ihl * 4) + offset;
 236         if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
 237                 return NF_ACCEPT;
 238
 239 @@ -277,6 +337,9 @@ nf_flow_offload_ip_hook(void *priv, stru
 240
 241         flow_offload_refresh(flow_table, flow);
 242
 243 +       nf_flow_encap_pop(skb, tuplehash);
 244 +       thoff -= offset;
 245 +
 246         iph = ip_hdr(skb);
 247         nf_flow_nat_ip(flow, skb, thoff, dir, iph);
 248
 249 @@ -418,16 +481,18 @@ static void nf_flow_nat_ipv6(const struc
 250  }
 251
 252  static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
 253 -                             struct flow_offload_tuple *tuple, u32 *hdrsize)
 254 +                             struct flow_offload_tuple *tuple, u32 *hdrsize,
 255 +                             u32 offset)
 256  {
 257         struct flow_ports *ports;
 258         struct ipv6hdr *ip6h;
 259         unsigned int thoff;
 260
 261 -       if (!pskb_may_pull(skb, sizeof(*ip6h)))
 262 +       thoff = sizeof(*ip6h) + offset;
 263 +       if (!pskb_may_pull(skb, thoff))
 264                 return -1;
 265
 266 -       ip6h = ipv6_hdr(skb);
 267 +       ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
 268
 269         switch (ip6h->nexthdr) {
 270         case IPPROTO_TCP:
 271 @@ -443,11 +508,10 @@ static int nf_flow_tuple_ipv6(struct sk_
 272         if (ip6h->hop_limit <= 1)
 273                 return -1;
 274
 275 -       thoff = sizeof(*ip6h);
 276         if (!pskb_may_pull(skb, thoff + *hdrsize))
 277                 return -1;
 278
 279 -       ip6h = ipv6_hdr(skb);
 280 +       ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
 281         ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
 282
 283         tuple->src_v6           = ip6h->saddr;
 284 @@ -457,6 +521,7 @@ static int nf_flow_tuple_ipv6(struct sk_
 285         tuple->l3proto          = AF_INET6;
 286         tuple->l4proto          = ip6h->nexthdr;
 287         tuple->iifidx           = dev->ifindex;
 288 +       nf_flow_tuple_encap(skb, tuple);
 289
 290         return 0;
 291  }
 292 @@ -472,15 +537,17 @@ nf_flow_offload_ipv6_hook(void *priv, st
 293         const struct in6_addr *nexthop;
 294         struct flow_offload *flow;
 295         struct net_device *outdev;
 296 +       unsigned int thoff, mtu;
 297 +       u32 hdrsize, offset = 0;
 298         struct ipv6hdr *ip6h;
 299         struct rt6_info *rt;
 300 -       u32 hdrsize;
 301         int ret;
 302
 303 -       if (skb->protocol != htons(ETH_P_IPV6))
 304 +       if (skb->protocol != htons(ETH_P_IPV6) &&
 305 +           !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
 306                 return NF_ACCEPT;
 307
 308 -       if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize) < 0)
 309 +       if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
 310                 return NF_ACCEPT;
 311
 312         tuplehash = flow_offload_lookup(flow_table, &tuple);
 313 @@ -490,11 +557,13 @@ nf_flow_offload_ipv6_hook(void *priv, st
 314         dir = tuplehash->tuple.dir;
 315         flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
 316
 317 -       if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
 318 +       mtu = flow->tuplehash[dir].tuple.mtu + offset;
 319 +       if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
 320                 return NF_ACCEPT;
 321
 322 -       if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
 323 -                               sizeof(*ip6h)))
 324 +       ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
 325 +       thoff = sizeof(*ip6h) + offset;
 326 +       if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
 327                 return NF_ACCEPT;
 328
 329         if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
 330 @@ -506,11 +575,13 @@ nf_flow_offload_ipv6_hook(void *priv, st
 331                 }
 332         }
 333
 334 -       if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize))
 335 +       if (skb_try_make_writable(skb, thoff + hdrsize))
 336                 return NF_DROP;
 337
 338         flow_offload_refresh(flow_table, flow);
 339
 340 +       nf_flow_encap_pop(skb, tuplehash);
 341 +
 342         ip6h = ipv6_hdr(skb);
 343         nf_flow_nat_ipv6(flow, skb, dir, ip6h);
 344
 345 --- a/net/netfilter/nft_flow_offload.c
 346 +++ b/net/netfilter/nft_flow_offload.c
 347 @@ -66,6 +66,11 @@ static int nft_dev_fill_forward_path(con
 348  struct nft_forward_info {
 349         const struct net_device *indev;
 350         const struct net_device *outdev;
 351 +       struct id {
 352 +               __u16   id;
 353 +               __be16  proto;
 354 +       } encap[NF_FLOW_TABLE_ENCAP_MAX];
 355 +       u8 num_encaps;
 356         u8 h_source[ETH_ALEN];
 357         u8 h_dest[ETH_ALEN];
 358         enum flow_offload_xmit_type xmit_type;
 359 @@ -84,9 +89,23 @@ static void nft_dev_path_info(const stru
 360                 path = &stack->path[i];
 361                 switch (path->type) {
 362                 case DEV_PATH_ETHERNET:
 363 +               case DEV_PATH_VLAN:
 364                         info->indev = path->dev;
 365                         if (is_zero_ether_addr(info->h_source))
 366                                 memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
 367 +
 368 +                       if (path->type == DEV_PATH_ETHERNET)
 369 +                               break;
 370 +
 371 +                       /* DEV_PATH_VLAN */
 372 +                       if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
 373 +                               info->indev = NULL;
 374 +                               break;
 375 +                       }
 376 +                       info->outdev = path->dev;
 377 +                       info->encap[info->num_encaps].id = path->encap.id;
 378 +                       info->encap[info->num_encaps].proto = path->encap.proto;
 379 +                       info->num_encaps++;
 380                         break;
 381                 case DEV_PATH_BRIDGE:
 382                         if (is_zero_ether_addr(info->h_source))
 383 @@ -94,7 +113,6 @@ static void nft_dev_path_info(const stru
 384
 385                         info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
 386                         break;
 387 -               case DEV_PATH_VLAN:
 388                 default:
 389                         info->indev = NULL;
 390                         break;
 391 @@ -130,6 +148,7 @@ static void nft_dev_forward_path(struct
 392         struct net_device_path_stack stack;
 393         struct nft_forward_info info = {};
 394         unsigned char ha[ETH_ALEN];
 395 +       int i;
 396
 397         if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
 398                 nft_dev_path_info(&stack, &info, ha);
 399 @@ -138,6 +157,11 @@ static void nft_dev_forward_path(struct
 400                 return;
 401
 402         route->tuple[!dir].in.ifindex = info.indev->ifindex;
 403 +       for (i = 0; i < info.num_encaps; i++) {
 404 +               route->tuple[!dir].in.encap[i].id = info.encap[i].id;
 405 +               route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
 406 +       }
 407 +       route->tuple[!dir].in.num_encaps = info.num_encaps;
 408
 409         if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
 410                 memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);