ath25: switch default kernel to 5.15
[openwrt/openwrt.git] / target / linux / generic / backport-5.10 / 614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch
1 From b8835ba8c029b5c9ada5666754526c2b00f7ea80 Mon Sep 17 00:00:00 2001
2 From: Pablo Neira Ayuso <pablo@netfilter.org>
3 Date: Tue, 17 May 2022 10:44:14 +0200
4 Subject: netfilter: flowtable: fix TCP flow teardown
5
6 [ Upstream commit e5eaac2beb54f0a16ff851125082d9faeb475572 ]
7
8 This patch addresses three possible problems:
9
10 1. ct gc may race to undo the timeout adjustment of the packet path, leaving
11 the conntrack entry in place with the internal offload timeout (one day).
12
13 2. ct gc removes the ct because the IPS_OFFLOAD_BIT is not set and the CLOSE
14 timeout is reached before the flow offload del.
15
16 3. tcp ct is always set to ESTABLISHED with a very long timeout
17 in flow offload teardown/delete even though the state might be already
18 CLOSED. Also as a remark we cannot assume that the FIN or RST packet
19 is hitting flow table teardown as the packet might get bumped to the
20 slow path in nftables.
21
22 This patch resets IPS_OFFLOAD_BIT from flow_offload_teardown(), so
23 conntrack handles the tcp rst/fin packet which triggers the CLOSE/FIN
24 state transition.
25
26 Moreover, teturn the connection's ownership to conntrack upon teardown
27 by clearing the offload flag and fixing the established timeout value.
28 The flow table GC thread will asynchonrnously free the flow table and
29 hardware offload entries.
30
31 Before this patch, the IPS_OFFLOAD_BIT remained set for expired flows on
32 which is also misleading since the flow is back to classic conntrack
33 path.
34
35 If nf_ct_delete() removes the entry from the conntrack table, then it
36 calls nf_ct_put() which decrements the refcnt. This is not a problem
37 because the flowtable holds a reference to the conntrack object from
38 flow_offload_alloc() path which is released via flow_offload_free().
39
40 This patch also updates nft_flow_offload to skip packets in SYN_RECV
41 state. Since we might miss or bump packets to slow path, we do not know
42 what will happen there while we are still in SYN_RECV, this patch
43 postpones offload up to the next packet which also aligns to the
44 existing behaviour in tc-ct.
45
46 flow_offload_teardown() does not reset the existing tcp state from
47 flow_offload_fixup_tcp() to ESTABLISHED anymore, packets bump to slow
48 path might have already update the state to CLOSE/FIN.
49
50 Joint work with Oz and Sven.
51
52 Fixes: 1e5b2471bcc4 ("netfilter: nf_flow_table: teardown flow timeout race")
53 Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
54 Signed-off-by: Sven Auhagen <sven.auhagen@voleatech.de>
55 Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
56 Signed-off-by: Sasha Levin <sashal@kernel.org>
57 ---
58 net/netfilter/nf_flow_table_core.c | 33 +++++++-----------------------
59 net/netfilter/nft_flow_offload.c | 3 ++-
60 2 files changed, 9 insertions(+), 27 deletions(-)
61
62 --- a/net/netfilter/nf_flow_table_core.c
63 +++ b/net/netfilter/nf_flow_table_core.c
64 @@ -173,12 +173,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_ini
65
66 static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
67 {
68 - tcp->state = TCP_CONNTRACK_ESTABLISHED;
69 tcp->seen[0].td_maxwin = 0;
70 tcp->seen[1].td_maxwin = 0;
71 }
72
73 -static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
74 +static void flow_offload_fixup_ct(struct nf_conn *ct)
75 {
76 struct net *net = nf_ct_net(ct);
77 int l4num = nf_ct_protonum(ct);
78 @@ -187,7 +186,9 @@ static void flow_offload_fixup_ct_timeou
79 if (l4num == IPPROTO_TCP) {
80 struct nf_tcp_net *tn = nf_tcp_pernet(net);
81
82 - timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
83 + flow_offload_fixup_tcp(&ct->proto.tcp);
84 +
85 + timeout = tn->timeouts[ct->proto.tcp.state];
86 timeout -= tn->offload_timeout;
87 } else if (l4num == IPPROTO_UDP) {
88 struct nf_udp_net *tn = nf_udp_pernet(net);
89 @@ -205,18 +206,6 @@ static void flow_offload_fixup_ct_timeou
90 WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
91 }
92
93 -static void flow_offload_fixup_ct_state(struct nf_conn *ct)
94 -{
95 - if (nf_ct_protonum(ct) == IPPROTO_TCP)
96 - flow_offload_fixup_tcp(&ct->proto.tcp);
97 -}
98 -
99 -static void flow_offload_fixup_ct(struct nf_conn *ct)
100 -{
101 - flow_offload_fixup_ct_state(ct);
102 - flow_offload_fixup_ct_timeout(ct);
103 -}
104 -
105 static void flow_offload_route_release(struct flow_offload *flow)
106 {
107 nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
108 @@ -353,22 +342,14 @@ static void flow_offload_del(struct nf_f
109 rhashtable_remove_fast(&flow_table->rhashtable,
110 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
111 nf_flow_offload_rhash_params);
112 -
113 - clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
114 -
115 - if (nf_flow_has_expired(flow))
116 - flow_offload_fixup_ct(flow->ct);
117 - else
118 - flow_offload_fixup_ct_timeout(flow->ct);
119 -
120 flow_offload_free(flow);
121 }
122
123 void flow_offload_teardown(struct flow_offload *flow)
124 {
125 + clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
126 set_bit(NF_FLOW_TEARDOWN, &flow->flags);
127 -
128 - flow_offload_fixup_ct_state(flow->ct);
129 + flow_offload_fixup_ct(flow->ct);
130 }
131 EXPORT_SYMBOL_GPL(flow_offload_teardown);
132
133 @@ -437,7 +418,7 @@ static void nf_flow_offload_gc_step(stru
134
135 if (nf_flow_has_expired(flow) ||
136 nf_ct_is_dying(flow->ct))
137 - set_bit(NF_FLOW_TEARDOWN, &flow->flags);
138 + flow_offload_teardown(flow);
139
140 if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
141 if (test_bit(NF_FLOW_HW, &flow->flags)) {
142 --- a/net/netfilter/nft_flow_offload.c
143 +++ b/net/netfilter/nft_flow_offload.c
144 @@ -268,6 +268,12 @@ static bool nft_flow_offload_skip(struct
145 return false;
146 }
147
148 +static bool nf_conntrack_tcp_established(const struct nf_conn *ct)
149 +{
150 + return ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED &&
151 + test_bit(IPS_ASSURED_BIT, &ct->status);
152 +}
153 +
154 static void nft_flow_offload_eval(const struct nft_expr *expr,
155 struct nft_regs *regs,
156 const struct nft_pktinfo *pkt)
157 @@ -293,7 +299,8 @@ static void nft_flow_offload_eval(const
158 case IPPROTO_TCP:
159 tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff,
160 sizeof(_tcph), &_tcph);
161 - if (unlikely(!tcph || tcph->fin || tcph->rst))
162 + if (unlikely(!tcph || tcph->fin || tcph->rst ||
163 + !nf_conntrack_tcp_established(ct)))
164 goto out;
165 break;
166 case IPPROTO_UDP: