kernel: cake: backport upstream tweaks & fixes
authorKevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
Sat, 18 Jan 2020 15:35:38 +0000 (15:35 +0000)
committerKevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
Tue, 30 Jun 2020 15:09:18 +0000 (16:09 +0100)
From upstream:
b8392808eb3f sch_cake: add RFC 8622 LE PHB support to CAKE diffserv handling
3f608f0c4136 sch_cake: fix a few style nits
8c95eca0bb8c sch_cake: don't call diffserv parsing code when it is not needed
9208d2863ac6 sch_cake: don't try to reallocate or unshare skb unconditionally

From netdev not yet accepted:
sch_cake: fix IP protocol handling in the presence of VLAN tags

The VLAN tag handling is actually wider than just cake so upstream are
working out how to fix it generically.  We fix it here just for cake.

Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
13 files changed:
target/linux/generic/backport-4.19/395-v5.8-net-sch_cake-Take-advantage-of-skb-hash-where-appropriate.patch
target/linux/generic/backport-4.19/396-5.8-sch_cake-don-t-try-to-reallocate-or-unshare-skb-unco.patch [new file with mode: 0644]
target/linux/generic/backport-4.19/397-5.8-sch_cake-don-t-call-diffserv-parsing-code-when-it-is.patch [new file with mode: 0644]
target/linux/generic/backport-4.19/398-5.8-sch_cake-fix-a-few-style-nits.patch [new file with mode: 0644]
target/linux/generic/backport-4.19/399-5.9-sch_cake-add-RFC-8622-LE-PHB-support-to-CAKE-diffser.patch [new file with mode: 0644]
target/linux/generic/backport-5.4/393-v5.5-sch_cake-drop-unused-variable-tin_quantum_prio.patch
target/linux/generic/backport-5.4/395-v5.8-net-sch_cake-Take-advantage-of-skb-hash-where-appropriate.patch
target/linux/generic/backport-5.4/396-5.8-sch_cake-don-t-try-to-reallocate-or-unshare-skb-unco.patch [new file with mode: 0644]
target/linux/generic/backport-5.4/397-5.8-sch_cake-don-t-call-diffserv-parsing-code-when-it-is.patch [new file with mode: 0644]
target/linux/generic/backport-5.4/398-5.8-sch_cake-fix-a-few-style-nits.patch [new file with mode: 0644]
target/linux/generic/backport-5.4/399-5.9-sch_cake-add-RFC-8622-LE-PHB-support-to-CAKE-diffser.patch [new file with mode: 0644]
target/linux/generic/hack-4.19/641-sch_cake-fix-IP-protocol-handling-in-the-presence-of.patch [new file with mode: 0644]
target/linux/generic/hack-5.4/641-sch_cake-fix-IP-protocol-handling-in-the-presence-of.patch [new file with mode: 0644]

index 7b3396c6c3f87191cb1b639920b717ee0c4bf261..12962135d05efcb3d331194955f72a817bfd9908 100644 (file)
@@ -53,7 +53,7 @@ Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
 
 --- a/net/sched/sch_cake.c
 +++ b/net/sched/sch_cake.c
-@@ -585,26 +585,48 @@ static bool cobalt_should_drop(struct co
+@@ -584,26 +584,48 @@ static bool cobalt_should_drop(struct co
        return drop;
  }
  
@@ -110,7 +110,7 @@ Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
  #endif
  }
  
-@@ -625,23 +647,36 @@ static bool cake_ddst(int flow_mode)
+@@ -624,23 +646,36 @@ static bool cake_ddst(int flow_mode)
  static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
                     int flow_mode, u16 flow_override, u16 host_override)
  {
@@ -152,7 +152,7 @@ Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
  
        /* flow_hash_from_keys() sorts the addresses by value, so we have
         * to preserve their order in a separate data structure to treat
-@@ -680,12 +715,14 @@ static u32 cake_hash(struct cake_tin_dat
+@@ -679,12 +714,14 @@ static u32 cake_hash(struct cake_tin_dat
        /* This *must* be after the above switch, since as a
         * side-effect it sorts the src and dst addresses.
         */
diff --git a/target/linux/generic/backport-4.19/396-5.8-sch_cake-don-t-try-to-reallocate-or-unshare-skb-unco.patch b/target/linux/generic/backport-4.19/396-5.8-sch_cake-don-t-try-to-reallocate-or-unshare-skb-unco.patch
new file mode 100644 (file)
index 0000000..a36095c
--- /dev/null
@@ -0,0 +1,96 @@
+From 9208d2863ac689a563b92f2161d8d1e7127d0add Mon Sep 17 00:00:00 2001
+From: Ilya Ponetayev <i.ponetaev@ndmsystems.com>
+Date: Thu, 25 Jun 2020 22:12:07 +0200
+Subject: [PATCH] sch_cake: don't try to reallocate or unshare skb
+ unconditionally
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+cake_handle_diffserv() tries to linearize mac and network header parts of
+skb and to make it writable unconditionally. In some cases it leads to full
+skb reallocation, which reduces throughput and increases CPU load. Some
+measurements of IPv4 forward + NAPT on MIPS router with 580 MHz single-core
+CPU was conducted. It appears that on kernel 4.9 skb_try_make_writable()
+reallocates skb, if skb was allocated in ethernet driver via so-called
+'build skb' method from page cache (it was discovered by strange increase
+of kmalloc-2048 slab at first).
+
+Obtain DSCP value via read-only skb_header_pointer() call, and leave
+linearization only for DSCP bleaching or ECN CE setting. And, as an
+additional optimisation, skip diffserv parsing entirely if it is not needed
+by the current configuration.
+
+Fixes: c87b4ecdbe8d ("sch_cake: Make sure we can write the IP header before changing DSCP bits")
+Signed-off-by: Ilya Ponetayev <i.ponetaev@ndmsystems.com>
+[ fix a few style issues, reflow commit message ]
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+---
+ net/sched/sch_cake.c | 41 ++++++++++++++++++++++++++++++-----------
+ 1 file changed, 30 insertions(+), 11 deletions(-)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -1553,30 +1553,49 @@ static unsigned int cake_drop(struct Qdi
+ static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
+ {
+-      int wlen = skb_network_offset(skb);
++      const int offset = skb_network_offset(skb);
++      u16 *buf, buf_;
+       u8 dscp;
+       switch (tc_skb_protocol(skb)) {
+       case htons(ETH_P_IP):
+-              wlen += sizeof(struct iphdr);
+-              if (!pskb_may_pull(skb, wlen) ||
+-                  skb_try_make_writable(skb, wlen))
++              buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
++              if (unlikely(!buf))
+                       return 0;
+-              dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+-              if (wash && dscp)
++              /* ToS is in the second byte of iphdr */
++              dscp = ipv4_get_dsfield((struct iphdr *)buf) >> 2;
++
++              if (wash && dscp) {
++                      const int wlen = offset + sizeof(struct iphdr);
++
++                      if (!pskb_may_pull(skb, wlen) ||
++                          skb_try_make_writable(skb, wlen))
++                              return 0;
++
+                       ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
++              }
++
+               return dscp;
+       case htons(ETH_P_IPV6):
+-              wlen += sizeof(struct ipv6hdr);
+-              if (!pskb_may_pull(skb, wlen) ||
+-                  skb_try_make_writable(skb, wlen))
++              buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
++              if (unlikely(!buf))
+                       return 0;
+-              dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+-              if (wash && dscp)
++              /* Traffic class is in the first and second bytes of ipv6hdr */
++              dscp = ipv6_get_dsfield((struct ipv6hdr *)buf) >> 2;
++
++              if (wash && dscp) {
++                      const int wlen = offset + sizeof(struct ipv6hdr);
++
++                      if (!pskb_may_pull(skb, wlen) ||
++                          skb_try_make_writable(skb, wlen))
++                              return 0;
++
+                       ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
++              }
++
+               return dscp;
+       case htons(ETH_P_ARP):
diff --git a/target/linux/generic/backport-4.19/397-5.8-sch_cake-don-t-call-diffserv-parsing-code-when-it-is.patch b/target/linux/generic/backport-4.19/397-5.8-sch_cake-don-t-call-diffserv-parsing-code-when-it-is.patch
new file mode 100644 (file)
index 0000000..b40bb36
--- /dev/null
@@ -0,0 +1,62 @@
+From 8c95eca0bb8c4bd2231a0d581f1ad0d50c90488c Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
+Date: Thu, 25 Jun 2020 22:12:08 +0200
+Subject: [PATCH] sch_cake: don't call diffserv parsing code when it is not
+ needed
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+As a further optimisation of the diffserv parsing codepath, we can skip it
+entirely if CAKE is configured to neither use diffserv-based
+classification, nor to zero out the diffserv bits.
+
+Fixes: c87b4ecdbe8d ("sch_cake: Make sure we can write the IP header before changing DSCP bits")
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+---
+ net/sched/sch_cake.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -1551,7 +1551,7 @@ static unsigned int cake_drop(struct Qdi
+       return idx + (tin << 16);
+ }
+-static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
++static u8 cake_handle_diffserv(struct sk_buff *skb, bool wash)
+ {
+       const int offset = skb_network_offset(skb);
+       u16 *buf, buf_;
+@@ -1612,14 +1612,17 @@ static struct cake_tin_data *cake_select
+ {
+       struct cake_sched_data *q = qdisc_priv(sch);
+       u32 tin, mark;
++      bool wash;
+       u8 dscp;
+       /* Tin selection: Default to diffserv-based selection, allow overriding
+-       * using firewall marks or skb->priority.
++       * using firewall marks or skb->priority. Call DSCP parsing early if
++       * wash is enabled, otherwise defer to below to skip unneeded parsing.
+        */
+-      dscp = cake_handle_diffserv(skb,
+-                                  q->rate_flags & CAKE_FLAG_WASH);
+       mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft;
++      wash = !!(q->rate_flags & CAKE_FLAG_WASH);
++      if (wash)
++              dscp = cake_handle_diffserv(skb, wash);
+       if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
+               tin = 0;
+@@ -1630,6 +1633,8 @@ static struct cake_tin_data *cake_select
+               tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
+       else {
++              if (!wash)
++                      dscp = cake_handle_diffserv(skb, wash);
+               tin = q->tin_index[dscp];
+               if (unlikely(tin >= q->tin_cnt))
diff --git a/target/linux/generic/backport-4.19/398-5.8-sch_cake-fix-a-few-style-nits.patch b/target/linux/generic/backport-4.19/398-5.8-sch_cake-fix-a-few-style-nits.patch
new file mode 100644 (file)
index 0000000..a1d7211
--- /dev/null
@@ -0,0 +1,40 @@
+From 3f608f0c41360b11b04c763f348b712f651c8bac Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
+Date: Thu, 25 Jun 2020 22:12:09 +0200
+Subject: [PATCH] sch_cake: fix a few style nits
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+I spotted a few nits when comparing the in-tree version of sch_cake with
+the out-of-tree one: A redundant error variable declaration shadowing an
+outer declaration, and an indentation alignment issue. Fix both of these.
+
+Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc")
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+---
+ net/sched/sch_cake.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -2713,7 +2713,7 @@ static int cake_init(struct Qdisc *sch,
+       qdisc_watchdog_init(&q->watchdog, sch);
+       if (opt) {
+-              int err = cake_change(sch, opt, extack);
++              err = cake_change(sch, opt, extack);
+               if (err)
+                       return err;
+@@ -3030,7 +3030,7 @@ static int cake_dump_class_stats(struct
+                       PUT_STAT_S32(BLUE_TIMER_US,
+                                    ktime_to_us(
+                                            ktime_sub(now,
+-                                                   flow->cvars.blue_timer)));
++                                                     flow->cvars.blue_timer)));
+               }
+               if (flow->cvars.dropping) {
+                       PUT_STAT_S32(DROP_NEXT_US,
diff --git a/target/linux/generic/backport-4.19/399-5.9-sch_cake-add-RFC-8622-LE-PHB-support-to-CAKE-diffser.patch b/target/linux/generic/backport-4.19/399-5.9-sch_cake-add-RFC-8622-LE-PHB-support-to-CAKE-diffser.patch
new file mode 100644 (file)
index 0000000..e171b4c
--- /dev/null
@@ -0,0 +1,57 @@
+From b8392808eb3fc28e523e28cb258c81ca246deb9b Mon Sep 17 00:00:00 2001
+From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+Date: Thu, 25 Jun 2020 22:18:00 +0200
+Subject: [PATCH] sch_cake: add RFC 8622 LE PHB support to CAKE diffserv
+ handling
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Change tin mapping on diffserv3, 4 & 8 for LE PHB support, in essence
+making LE a member of the Bulk tin.
+
+Bulk has the least priority and minimum of 1/16th total bandwidth in the
+face of higher priority traffic.
+
+NB: Diffserv 3 & 4 swap tin 0 & 1 priorities from the default order as
+found in diffserv8, in case anyone is wondering why it looks a bit odd.
+
+Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+[ reword commit message slightly ]
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ net/sched/sch_cake.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -312,8 +312,8 @@ static const u8 precedence[] = {
+ };
+ static const u8 diffserv8[] = {
+-      2, 5, 1, 2, 4, 2, 2, 2,
+-      0, 2, 1, 2, 1, 2, 1, 2,
++      2, 0, 1, 2, 4, 2, 2, 2,
++      1, 2, 1, 2, 1, 2, 1, 2,
+       5, 2, 4, 2, 4, 2, 4, 2,
+       3, 2, 3, 2, 3, 2, 3, 2,
+       6, 2, 3, 2, 3, 2, 3, 2,
+@@ -323,7 +323,7 @@ static const u8 diffserv8[] = {
+ };
+ static const u8 diffserv4[] = {
+-      0, 2, 0, 0, 2, 0, 0, 0,
++      0, 1, 0, 0, 2, 0, 0, 0,
+       1, 0, 0, 0, 0, 0, 0, 0,
+       2, 0, 2, 0, 2, 0, 2, 0,
+       2, 0, 2, 0, 2, 0, 2, 0,
+@@ -334,7 +334,7 @@ static const u8 diffserv4[] = {
+ };
+ static const u8 diffserv3[] = {
+-      0, 0, 0, 0, 2, 0, 0, 0,
++      0, 1, 0, 0, 2, 0, 0, 0,
+       1, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
index 33e5c54b8ccf1d31ecd6c7592a9305dfcb8a049a..2be0d36392b3331f744ae6c617e2e7d35243e7d0 100644 (file)
@@ -32,7 +32,7 @@ Signed-off-by: David S. Miller <davem@davemloft.net>
        s32     tin_deficit;
        u32     tin_backlog;
        u32     tin_dropped;
-@@ -1916,7 +1915,7 @@ begin:
+@@ -1919,7 +1918,7 @@ begin:
                while (b->tin_deficit < 0 ||
                       !(b->sparse_flow_count + b->bulk_flow_count)) {
                        if (b->tin_deficit <= 0)
@@ -41,7 +41,7 @@ Signed-off-by: David S. Miller <davem@davemloft.net>
                        if (b->sparse_flow_count + b->bulk_flow_count)
                                empty = false;
  
-@@ -2237,8 +2236,7 @@ static int cake_config_besteffort(struct
+@@ -2241,8 +2240,7 @@ static int cake_config_besteffort(struct
  
        cake_set_rate(b, rate, mtu,
                      us_to_ns(q->target), us_to_ns(q->interval));
@@ -51,7 +51,7 @@ Signed-off-by: David S. Miller <davem@davemloft.net>
  
        return 0;
  }
-@@ -2249,8 +2247,7 @@ static int cake_config_precedence(struct
+@@ -2253,8 +2251,7 @@ static int cake_config_precedence(struct
        struct cake_sched_data *q = qdisc_priv(sch);
        u32 mtu = psched_mtu(qdisc_dev(sch));
        u64 rate = q->rate_bps;
@@ -61,7 +61,7 @@ Signed-off-by: David S. Miller <davem@davemloft.net>
        u32 i;
  
        q->tin_cnt = 8;
-@@ -2263,18 +2260,14 @@ static int cake_config_precedence(struct
+@@ -2267,18 +2264,14 @@ static int cake_config_precedence(struct
                cake_set_rate(b, rate, mtu, us_to_ns(q->target),
                              us_to_ns(q->interval));
  
@@ -83,7 +83,7 @@ Signed-off-by: David S. Miller <davem@davemloft.net>
        }
  
        return 0;
-@@ -2343,8 +2336,7 @@ static int cake_config_diffserv8(struct
+@@ -2347,8 +2340,7 @@ static int cake_config_diffserv8(struct
        struct cake_sched_data *q = qdisc_priv(sch);
        u32 mtu = psched_mtu(qdisc_dev(sch));
        u64 rate = q->rate_bps;
@@ -93,7 +93,7 @@ Signed-off-by: David S. Miller <davem@davemloft.net>
        u32 i;
  
        q->tin_cnt = 8;
-@@ -2360,18 +2352,14 @@ static int cake_config_diffserv8(struct
+@@ -2364,18 +2356,14 @@ static int cake_config_diffserv8(struct
                cake_set_rate(b, rate, mtu, us_to_ns(q->target),
                              us_to_ns(q->interval));
  
@@ -115,7 +115,7 @@ Signed-off-by: David S. Miller <davem@davemloft.net>
        }
  
        return 0;
-@@ -2410,17 +2398,11 @@ static int cake_config_diffserv4(struct
+@@ -2414,17 +2402,11 @@ static int cake_config_diffserv4(struct
        cake_set_rate(&q->tins[3], rate >> 2, mtu,
                      us_to_ns(q->target), us_to_ns(q->interval));
  
@@ -137,7 +137,7 @@ Signed-off-by: David S. Miller <davem@davemloft.net>
  
        return 0;
  }
-@@ -2451,15 +2433,10 @@ static int cake_config_diffserv3(struct
+@@ -2455,15 +2437,10 @@ static int cake_config_diffserv3(struct
        cake_set_rate(&q->tins[2], rate >> 2, mtu,
                      us_to_ns(q->target), us_to_ns(q->interval));
  
index 7b3396c6c3f87191cb1b639920b717ee0c4bf261..12962135d05efcb3d331194955f72a817bfd9908 100644 (file)
@@ -53,7 +53,7 @@ Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
 
 --- a/net/sched/sch_cake.c
 +++ b/net/sched/sch_cake.c
-@@ -585,26 +585,48 @@ static bool cobalt_should_drop(struct co
+@@ -584,26 +584,48 @@ static bool cobalt_should_drop(struct co
        return drop;
  }
  
@@ -110,7 +110,7 @@ Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
  #endif
  }
  
-@@ -625,23 +647,36 @@ static bool cake_ddst(int flow_mode)
+@@ -624,23 +646,36 @@ static bool cake_ddst(int flow_mode)
  static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
                     int flow_mode, u16 flow_override, u16 host_override)
  {
@@ -152,7 +152,7 @@ Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
  
        /* flow_hash_from_keys() sorts the addresses by value, so we have
         * to preserve their order in a separate data structure to treat
-@@ -680,12 +715,14 @@ static u32 cake_hash(struct cake_tin_dat
+@@ -679,12 +714,14 @@ static u32 cake_hash(struct cake_tin_dat
        /* This *must* be after the above switch, since as a
         * side-effect it sorts the src and dst addresses.
         */
diff --git a/target/linux/generic/backport-5.4/396-5.8-sch_cake-don-t-try-to-reallocate-or-unshare-skb-unco.patch b/target/linux/generic/backport-5.4/396-5.8-sch_cake-don-t-try-to-reallocate-or-unshare-skb-unco.patch
new file mode 100644 (file)
index 0000000..a36095c
--- /dev/null
@@ -0,0 +1,96 @@
+From 9208d2863ac689a563b92f2161d8d1e7127d0add Mon Sep 17 00:00:00 2001
+From: Ilya Ponetayev <i.ponetaev@ndmsystems.com>
+Date: Thu, 25 Jun 2020 22:12:07 +0200
+Subject: [PATCH] sch_cake: don't try to reallocate or unshare skb
+ unconditionally
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+cake_handle_diffserv() tries to linearize mac and network header parts of
+skb and to make it writable unconditionally. In some cases it leads to full
+skb reallocation, which reduces throughput and increases CPU load. Some
+measurements of IPv4 forward + NAPT on MIPS router with 580 MHz single-core
+CPU was conducted. It appears that on kernel 4.9 skb_try_make_writable()
+reallocates skb, if skb was allocated in ethernet driver via so-called
+'build skb' method from page cache (it was discovered by strange increase
+of kmalloc-2048 slab at first).
+
+Obtain DSCP value via read-only skb_header_pointer() call, and leave
+linearization only for DSCP bleaching or ECN CE setting. And, as an
+additional optimisation, skip diffserv parsing entirely if it is not needed
+by the current configuration.
+
+Fixes: c87b4ecdbe8d ("sch_cake: Make sure we can write the IP header before changing DSCP bits")
+Signed-off-by: Ilya Ponetayev <i.ponetaev@ndmsystems.com>
+[ fix a few style issues, reflow commit message ]
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+---
+ net/sched/sch_cake.c | 41 ++++++++++++++++++++++++++++++-----------
+ 1 file changed, 30 insertions(+), 11 deletions(-)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -1553,30 +1553,49 @@ static unsigned int cake_drop(struct Qdi
+ static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
+ {
+-      int wlen = skb_network_offset(skb);
++      const int offset = skb_network_offset(skb);
++      u16 *buf, buf_;
+       u8 dscp;
+       switch (tc_skb_protocol(skb)) {
+       case htons(ETH_P_IP):
+-              wlen += sizeof(struct iphdr);
+-              if (!pskb_may_pull(skb, wlen) ||
+-                  skb_try_make_writable(skb, wlen))
++              buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
++              if (unlikely(!buf))
+                       return 0;
+-              dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+-              if (wash && dscp)
++              /* ToS is in the second byte of iphdr */
++              dscp = ipv4_get_dsfield((struct iphdr *)buf) >> 2;
++
++              if (wash && dscp) {
++                      const int wlen = offset + sizeof(struct iphdr);
++
++                      if (!pskb_may_pull(skb, wlen) ||
++                          skb_try_make_writable(skb, wlen))
++                              return 0;
++
+                       ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
++              }
++
+               return dscp;
+       case htons(ETH_P_IPV6):
+-              wlen += sizeof(struct ipv6hdr);
+-              if (!pskb_may_pull(skb, wlen) ||
+-                  skb_try_make_writable(skb, wlen))
++              buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
++              if (unlikely(!buf))
+                       return 0;
+-              dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+-              if (wash && dscp)
++              /* Traffic class is in the first and second bytes of ipv6hdr */
++              dscp = ipv6_get_dsfield((struct ipv6hdr *)buf) >> 2;
++
++              if (wash && dscp) {
++                      const int wlen = offset + sizeof(struct ipv6hdr);
++
++                      if (!pskb_may_pull(skb, wlen) ||
++                          skb_try_make_writable(skb, wlen))
++                              return 0;
++
+                       ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
++              }
++
+               return dscp;
+       case htons(ETH_P_ARP):
diff --git a/target/linux/generic/backport-5.4/397-5.8-sch_cake-don-t-call-diffserv-parsing-code-when-it-is.patch b/target/linux/generic/backport-5.4/397-5.8-sch_cake-don-t-call-diffserv-parsing-code-when-it-is.patch
new file mode 100644 (file)
index 0000000..2d9cd29
--- /dev/null
@@ -0,0 +1,62 @@
+From 8c95eca0bb8c4bd2231a0d581f1ad0d50c90488c Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
+Date: Thu, 25 Jun 2020 22:12:08 +0200
+Subject: [PATCH] sch_cake: don't call diffserv parsing code when it is not
+ needed
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+As a further optimisation of the diffserv parsing codepath, we can skip it
+entirely if CAKE is configured to neither use diffserv-based
+classification, nor to zero out the diffserv bits.
+
+Fixes: c87b4ecdbe8d ("sch_cake: Make sure we can write the IP header before changing DSCP bits")
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+---
+ net/sched/sch_cake.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -1551,7 +1551,7 @@ static unsigned int cake_drop(struct Qdi
+       return idx + (tin << 16);
+ }
+-static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
++static u8 cake_handle_diffserv(struct sk_buff *skb, bool wash)
+ {
+       const int offset = skb_network_offset(skb);
+       u16 *buf, buf_;
+@@ -1612,14 +1612,17 @@ static struct cake_tin_data *cake_select
+ {
+       struct cake_sched_data *q = qdisc_priv(sch);
+       u32 tin, mark;
++      bool wash;
+       u8 dscp;
+       /* Tin selection: Default to diffserv-based selection, allow overriding
+-       * using firewall marks or skb->priority.
++       * using firewall marks or skb->priority. Call DSCP parsing early if
++       * wash is enabled, otherwise defer to below to skip unneeded parsing.
+        */
+-      dscp = cake_handle_diffserv(skb,
+-                                  q->rate_flags & CAKE_FLAG_WASH);
+       mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft;
++      wash = !!(q->rate_flags & CAKE_FLAG_WASH);
++      if (wash)
++              dscp = cake_handle_diffserv(skb, wash);
+       if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
+               tin = 0;
+@@ -1633,6 +1636,8 @@ static struct cake_tin_data *cake_select
+               tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
+       else {
++              if (!wash)
++                      dscp = cake_handle_diffserv(skb, wash);
+               tin = q->tin_index[dscp];
+               if (unlikely(tin >= q->tin_cnt))
diff --git a/target/linux/generic/backport-5.4/398-5.8-sch_cake-fix-a-few-style-nits.patch b/target/linux/generic/backport-5.4/398-5.8-sch_cake-fix-a-few-style-nits.patch
new file mode 100644 (file)
index 0000000..1160489
--- /dev/null
@@ -0,0 +1,40 @@
+From 3f608f0c41360b11b04c763f348b712f651c8bac Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
+Date: Thu, 25 Jun 2020 22:12:09 +0200
+Subject: [PATCH] sch_cake: fix a few style nits
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+I spotted a few nits when comparing the in-tree version of sch_cake with
+the out-of-tree one: A redundant error variable declaration shadowing an
+outer declaration, and an indentation alignment issue. Fix both of these.
+
+Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc")
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+---
+ net/sched/sch_cake.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -2717,7 +2717,7 @@ static int cake_init(struct Qdisc *sch,
+       qdisc_watchdog_init(&q->watchdog, sch);
+       if (opt) {
+-              int err = cake_change(sch, opt, extack);
++              err = cake_change(sch, opt, extack);
+               if (err)
+                       return err;
+@@ -3034,7 +3034,7 @@ static int cake_dump_class_stats(struct
+                       PUT_STAT_S32(BLUE_TIMER_US,
+                                    ktime_to_us(
+                                            ktime_sub(now,
+-                                                   flow->cvars.blue_timer)));
++                                                     flow->cvars.blue_timer)));
+               }
+               if (flow->cvars.dropping) {
+                       PUT_STAT_S32(DROP_NEXT_US,
diff --git a/target/linux/generic/backport-5.4/399-5.9-sch_cake-add-RFC-8622-LE-PHB-support-to-CAKE-diffser.patch b/target/linux/generic/backport-5.4/399-5.9-sch_cake-add-RFC-8622-LE-PHB-support-to-CAKE-diffser.patch
new file mode 100644 (file)
index 0000000..e171b4c
--- /dev/null
@@ -0,0 +1,57 @@
+From b8392808eb3fc28e523e28cb258c81ca246deb9b Mon Sep 17 00:00:00 2001
+From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+Date: Thu, 25 Jun 2020 22:18:00 +0200
+Subject: [PATCH] sch_cake: add RFC 8622 LE PHB support to CAKE diffserv
+ handling
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Change tin mapping on diffserv3, 4 & 8 for LE PHB support, in essence
+making LE a member of the Bulk tin.
+
+Bulk has the least priority and minimum of 1/16th total bandwidth in the
+face of higher priority traffic.
+
+NB: Diffserv 3 & 4 swap tin 0 & 1 priorities from the default order as
+found in diffserv8, in case anyone is wondering why it looks a bit odd.
+
+Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+[ reword commit message slightly ]
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ net/sched/sch_cake.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -312,8 +312,8 @@ static const u8 precedence[] = {
+ };
+ static const u8 diffserv8[] = {
+-      2, 5, 1, 2, 4, 2, 2, 2,
+-      0, 2, 1, 2, 1, 2, 1, 2,
++      2, 0, 1, 2, 4, 2, 2, 2,
++      1, 2, 1, 2, 1, 2, 1, 2,
+       5, 2, 4, 2, 4, 2, 4, 2,
+       3, 2, 3, 2, 3, 2, 3, 2,
+       6, 2, 3, 2, 3, 2, 3, 2,
+@@ -323,7 +323,7 @@ static const u8 diffserv8[] = {
+ };
+ static const u8 diffserv4[] = {
+-      0, 2, 0, 0, 2, 0, 0, 0,
++      0, 1, 0, 0, 2, 0, 0, 0,
+       1, 0, 0, 0, 0, 0, 0, 0,
+       2, 0, 2, 0, 2, 0, 2, 0,
+       2, 0, 2, 0, 2, 0, 2, 0,
+@@ -334,7 +334,7 @@ static const u8 diffserv4[] = {
+ };
+ static const u8 diffserv3[] = {
+-      0, 0, 0, 0, 2, 0, 0, 0,
++      0, 1, 0, 0, 2, 0, 0, 0,
+       1, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
diff --git a/target/linux/generic/hack-4.19/641-sch_cake-fix-IP-protocol-handling-in-the-presence-of.patch b/target/linux/generic/hack-4.19/641-sch_cake-fix-IP-protocol-handling-in-the-presence-of.patch
new file mode 100644 (file)
index 0000000..e651743
--- /dev/null
@@ -0,0 +1,114 @@
+From a00590d570212c3c633bd463cef8ec7377cc7993 Mon Sep 17 00:00:00 2001
+From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+Date: Tue, 30 Jun 2020 12:07:44 +0100
+Subject: [PATCH] sch_cake: fix IP protocol handling in the presence of VLAN
+ tags
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ilya Ponetayev <i.ponetaev@ndmsystems.com>
+
+CAKE was using the return value of tc_skb_protocol() and expecting it to be
+the IP protocol type. This can fail in the presence of QinQ VLAN tags,
+making CAKE unable to handle ECN marking and diffserv parsing in this case.
+Fix this by implementing our own version of tc_skb_protocol(), which will
+use skb->protocol directly, but also parse and skip over any VLAN tags and
+return the inner protocol number instead.
+
+Also fix CE marking by implementing a version of INET_ECN_set_ce() that
+uses the same parsing routine.
+
+Fixes: ea82511518f4 ("sch_cake: Add NAT awareness to packet classifier")
+Fixes: b2100cc56fca ("sch_cake: Use tc_skb_protocol() helper for getting packet protocol")
+Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc")
+Signed-off-by: Ilya Ponetayev <i.ponetaev@ndmsystems.com>
+[ squash original two patches, rewrite commit message ]
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+---
+ net/sched/sch_cake.c | 52 +++++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 49 insertions(+), 3 deletions(-)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -497,6 +497,52 @@ static bool cobalt_queue_empty(struct co
+       return down;
+ }
++static __be16 cake_skb_proto(const struct sk_buff *skb)
++{
++      unsigned int offset = skb_mac_offset(skb) + sizeof(struct ethhdr);
++      __be16 proto = skb->protocol;
++      struct vlan_hdr vhdr, *vh;
++
++      while (proto == htons(ETH_P_8021Q) || proto == htons(ETH_P_8021AD)) {
++              vh = skb_header_pointer(skb, offset, sizeof(vhdr), &vhdr);
++              if (!vh)
++                      break;
++
++              proto = vh->h_vlan_encapsulated_proto;
++              offset += sizeof(vhdr);
++      }
++
++      return proto;
++}
++
++static int cake_set_ce(struct sk_buff *skb)
++{
++      int wlen = skb_network_offset(skb);
++
++      switch (cake_skb_proto(skb)) {
++      case htons(ETH_P_IP):
++              wlen += sizeof(struct iphdr);
++              if (!pskb_may_pull(skb, wlen) ||
++                  skb_try_make_writable(skb, wlen))
++                      return 0;
++
++              return IP_ECN_set_ce(ip_hdr(skb));
++
++      case htons(ETH_P_IPV6):
++              wlen += sizeof(struct ipv6hdr);
++              if (!pskb_may_pull(skb, wlen) ||
++                  skb_try_make_writable(skb, wlen))
++                      return 0;
++
++              return IP6_ECN_set_ce(skb, ipv6_hdr(skb));
++
++      default:
++              return 0;
++      }
++
++      return 0;
++}
++
+ /* Call this with a freshly dequeued packet for possible congestion marking.
+  * Returns true as an instruction to drop the packet, false for delivery.
+  */
+@@ -549,7 +595,7 @@ static bool cobalt_should_drop(struct co
+       if (next_due && vars->dropping) {
+               /* Use ECN mark if possible, otherwise drop */
+-              drop = !(vars->ecn_marked = INET_ECN_set_ce(skb));
++              drop = !(vars->ecn_marked = cake_set_ce(skb));
+               vars->count++;
+               if (!vars->count)
+@@ -592,7 +638,7 @@ static bool cake_update_flowkeys(struct
+       bool rev = !skb->_nfct, upd = false;
+       __be32 ip;
+-      if (tc_skb_protocol(skb) != htons(ETH_P_IP))
++      if (cake_skb_proto(skb) != htons(ETH_P_IP))
+               return false;
+       if (!nf_ct_get_tuple_skb(&tuple, skb))
+@@ -1557,7 +1603,7 @@ static u8 cake_handle_diffserv(struct sk
+       u16 *buf, buf_;
+       u8 dscp;
+-      switch (tc_skb_protocol(skb)) {
++      switch (cake_skb_proto(skb)) {
+       case htons(ETH_P_IP):
+               buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
+               if (unlikely(!buf))
diff --git a/target/linux/generic/hack-5.4/641-sch_cake-fix-IP-protocol-handling-in-the-presence-of.patch b/target/linux/generic/hack-5.4/641-sch_cake-fix-IP-protocol-handling-in-the-presence-of.patch
new file mode 100644 (file)
index 0000000..e651743
--- /dev/null
@@ -0,0 +1,114 @@
+From a00590d570212c3c633bd463cef8ec7377cc7993 Mon Sep 17 00:00:00 2001
+From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+Date: Tue, 30 Jun 2020 12:07:44 +0100
+Subject: [PATCH] sch_cake: fix IP protocol handling in the presence of VLAN
+ tags
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ilya Ponetayev <i.ponetaev@ndmsystems.com>
+
+CAKE was using the return value of tc_skb_protocol() and expecting it to be
+the IP protocol type. This can fail in the presence of QinQ VLAN tags,
+making CAKE unable to handle ECN marking and diffserv parsing in this case.
+Fix this by implementing our own version of tc_skb_protocol(), which will
+use skb->protocol directly, but also parse and skip over any VLAN tags and
+return the inner protocol number instead.
+
+Also fix CE marking by implementing a version of INET_ECN_set_ce() that
+uses the same parsing routine.
+
+Fixes: ea82511518f4 ("sch_cake: Add NAT awareness to packet classifier")
+Fixes: b2100cc56fca ("sch_cake: Use tc_skb_protocol() helper for getting packet protocol")
+Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc")
+Signed-off-by: Ilya Ponetayev <i.ponetaev@ndmsystems.com>
+[ squash original two patches, rewrite commit message ]
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+---
+ net/sched/sch_cake.c | 52 +++++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 49 insertions(+), 3 deletions(-)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -497,6 +497,52 @@ static bool cobalt_queue_empty(struct co
+       return down;
+ }
++static __be16 cake_skb_proto(const struct sk_buff *skb)
++{
++      unsigned int offset = skb_mac_offset(skb) + sizeof(struct ethhdr);
++      __be16 proto = skb->protocol;
++      struct vlan_hdr vhdr, *vh;
++
++      while (proto == htons(ETH_P_8021Q) || proto == htons(ETH_P_8021AD)) {
++              vh = skb_header_pointer(skb, offset, sizeof(vhdr), &vhdr);
++              if (!vh)
++                      break;
++
++              proto = vh->h_vlan_encapsulated_proto;
++              offset += sizeof(vhdr);
++      }
++
++      return proto;
++}
++
++static int cake_set_ce(struct sk_buff *skb)
++{
++      int wlen = skb_network_offset(skb);
++
++      switch (cake_skb_proto(skb)) {
++      case htons(ETH_P_IP):
++              wlen += sizeof(struct iphdr);
++              if (!pskb_may_pull(skb, wlen) ||
++                  skb_try_make_writable(skb, wlen))
++                      return 0;
++
++              return IP_ECN_set_ce(ip_hdr(skb));
++
++      case htons(ETH_P_IPV6):
++              wlen += sizeof(struct ipv6hdr);
++              if (!pskb_may_pull(skb, wlen) ||
++                  skb_try_make_writable(skb, wlen))
++                      return 0;
++
++              return IP6_ECN_set_ce(skb, ipv6_hdr(skb));
++
++      default:
++              return 0;
++      }
++
++      return 0;
++}
++
+ /* Call this with a freshly dequeued packet for possible congestion marking.
+  * Returns true as an instruction to drop the packet, false for delivery.
+  */
+@@ -549,7 +595,7 @@ static bool cobalt_should_drop(struct co
+       if (next_due && vars->dropping) {
+               /* Use ECN mark if possible, otherwise drop */
+-              drop = !(vars->ecn_marked = INET_ECN_set_ce(skb));
++              drop = !(vars->ecn_marked = cake_set_ce(skb));
+               vars->count++;
+               if (!vars->count)
+@@ -592,7 +638,7 @@ static bool cake_update_flowkeys(struct
+       bool rev = !skb->_nfct, upd = false;
+       __be32 ip;
+-      if (tc_skb_protocol(skb) != htons(ETH_P_IP))
++      if (cake_skb_proto(skb) != htons(ETH_P_IP))
+               return false;
+       if (!nf_ct_get_tuple_skb(&tuple, skb))
+@@ -1557,7 +1603,7 @@ static u8 cake_handle_diffserv(struct sk
+       u16 *buf, buf_;
+       u8 dscp;
+-      switch (tc_skb_protocol(skb)) {
++      switch (cake_skb_proto(skb)) {
+       case htons(ETH_P_IP):
+               buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
+               if (unlikely(!buf))