mac80211: fix another regression in the broadcast AQL patch
[openwrt/staging/stintel.git] / package / kernel / mac80211 / patches / subsys / 330-mac80211-add-AQL-support-for-broadcast-packets.patch
1 From: Felix Fietkau <nbd@nbd.name>
2 Date: Fri, 9 Feb 2024 19:43:40 +0100
3 Subject: [PATCH] mac80211: add AQL support for broadcast packets
4
5 Excessive broadcast traffic with little competing unicast traffic can easily
6 flood hardware queues, leading to throughput issues. Additionally, filling
7 the hardware queues with too many packets breaks FQ for broadcast data.
8 Fix this by enabling AQL for broadcast packets.
9
10 Signed-off-by: Felix Fietkau <nbd@nbd.name>
11 ---
12
13 --- a/include/net/cfg80211.h
14 +++ b/include/net/cfg80211.h
15 @@ -3324,6 +3324,7 @@ enum wiphy_params_flags {
16 /* The per TXQ device queue limit in airtime */
17 #define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L 5000
18 #define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H 12000
19 +#define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_BC 50000
20
21 /* The per interface airtime threshold to switch to lower queue limit */
22 #define IEEE80211_AQL_THRESHOLD 24000
23 --- a/net/mac80211/debugfs.c
24 +++ b/net/mac80211/debugfs.c
25 @@ -215,11 +215,13 @@ static ssize_t aql_pending_read(struct f
26 "VI %u us\n"
27 "BE %u us\n"
28 "BK %u us\n"
29 + "BC/MC %u us\n"
30 "total %u us\n",
31 atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_VO]),
32 atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_VI]),
33 atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_BE]),
34 atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_BK]),
35 + atomic_read(&local->aql_bc_pending_airtime),
36 atomic_read(&local->aql_total_pending_airtime));
37 return simple_read_from_buffer(user_buf, count, ppos,
38 buf, len);
39 @@ -245,7 +247,8 @@ static ssize_t aql_txq_limit_read(struct
40 "VO %u %u\n"
41 "VI %u %u\n"
42 "BE %u %u\n"
43 - "BK %u %u\n",
44 + "BK %u %u\n"
45 + "BC/MC %u\n",
46 local->aql_txq_limit_low[IEEE80211_AC_VO],
47 local->aql_txq_limit_high[IEEE80211_AC_VO],
48 local->aql_txq_limit_low[IEEE80211_AC_VI],
49 @@ -253,7 +256,8 @@ static ssize_t aql_txq_limit_read(struct
50 local->aql_txq_limit_low[IEEE80211_AC_BE],
51 local->aql_txq_limit_high[IEEE80211_AC_BE],
52 local->aql_txq_limit_low[IEEE80211_AC_BK],
53 - local->aql_txq_limit_high[IEEE80211_AC_BK]);
54 + local->aql_txq_limit_high[IEEE80211_AC_BK],
55 + local->aql_txq_limit_bc);
56 return simple_read_from_buffer(user_buf, count, ppos,
57 buf, len);
58 }
59 @@ -279,6 +283,11 @@ static ssize_t aql_txq_limit_write(struc
60 else
61 buf[count] = '\0';
62
63 + if (sscanf(buf, "mcast %u", &q_limit_low) == 1) {
64 + local->aql_txq_limit_bc = q_limit_low;
65 + return count;
66 + }
67 +
68 if (sscanf(buf, "%u %u %u", &ac, &q_limit_low, &q_limit_high) != 3)
69 return -EINVAL;
70
71 --- a/net/mac80211/ieee80211_i.h
72 +++ b/net/mac80211/ieee80211_i.h
73 @@ -1328,10 +1328,12 @@ struct ieee80211_local {
74 spinlock_t handle_wake_tx_queue_lock;
75
76 u16 airtime_flags;
77 + u32 aql_txq_limit_bc;
78 u32 aql_txq_limit_low[IEEE80211_NUM_ACS];
79 u32 aql_txq_limit_high[IEEE80211_NUM_ACS];
80 u32 aql_threshold;
81 atomic_t aql_total_pending_airtime;
82 + atomic_t aql_bc_pending_airtime;
83 atomic_t aql_ac_pending_airtime[IEEE80211_NUM_ACS];
84
85 const struct ieee80211_ops *ops;
86 --- a/net/mac80211/main.c
87 +++ b/net/mac80211/main.c
88 @@ -788,6 +788,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_
89 spin_lock_init(&local->rx_path_lock);
90 spin_lock_init(&local->queue_stop_reason_lock);
91
92 + local->aql_txq_limit_bc = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_BC;
93 for (i = 0; i < IEEE80211_NUM_ACS; i++) {
94 INIT_LIST_HEAD(&local->active_txqs[i]);
95 spin_lock_init(&local->active_txq_lock[i]);
96 --- a/net/mac80211/sta_info.c
97 +++ b/net/mac80211/sta_info.c
98 @@ -2343,13 +2343,28 @@ EXPORT_SYMBOL(ieee80211_sta_recalc_aggre
99
100 void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
101 struct sta_info *sta, u8 ac,
102 - u16 tx_airtime, bool tx_completed)
103 + u16 tx_airtime, bool tx_completed,
104 + bool mcast)
105 {
106 int tx_pending;
107
108 if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))
109 return;
110
111 + if (mcast) {
112 + if (!tx_completed) {
113 + atomic_add(tx_airtime, &local->aql_bc_pending_airtime);
114 + return;
115 + }
116 +
117 + tx_pending = atomic_sub_return(tx_airtime,
118 + &local->aql_bc_pending_airtime);
119 + if (tx_pending < 0)
120 + atomic_cmpxchg(&local->aql_bc_pending_airtime,
121 + tx_pending, 0);
122 + return;
123 + }
124 +
125 if (!tx_completed) {
126 if (sta)
127 atomic_add(tx_airtime,
128 --- a/net/mac80211/tx.c
129 +++ b/net/mac80211/tx.c
130 @@ -2536,7 +2536,7 @@ static u16 ieee80211_store_ack_skb(struc
131
132 spin_lock_irqsave(&local->ack_status_lock, flags);
133 id = idr_alloc(&local->ack_status_frames, ack_skb,
134 - 1, 0x2000, GFP_ATOMIC);
135 + 1, 0x1000, GFP_ATOMIC);
136 spin_unlock_irqrestore(&local->ack_status_lock, flags);
137
138 if (id >= 0) {
139 @@ -3958,20 +3958,20 @@ begin:
140 encap_out:
141 IEEE80211_SKB_CB(skb)->control.vif = vif;
142
143 - if (tx.sta &&
144 - wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
145 - bool ampdu = txq->ac != IEEE80211_AC_VO;
146 + if (wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
147 + bool ampdu = txq->sta && txq->ac != IEEE80211_AC_VO;
148 u32 airtime;
149
150 airtime = ieee80211_calc_expected_tx_airtime(hw, vif, txq->sta,
151 skb->len, ampdu);
152 - if (airtime) {
153 - airtime = ieee80211_info_set_tx_time_est(info, airtime);
154 - ieee80211_sta_update_pending_airtime(local, tx.sta,
155 - txq->ac,
156 - airtime,
157 - false);
158 - }
159 + if (!airtime)
160 + return skb;
161 +
162 + airtime = ieee80211_info_set_tx_time_est(info, airtime);
163 + info->tx_time_mc = !tx.sta;
164 + ieee80211_sta_update_pending_airtime(local, tx.sta, txq->ac,
165 + airtime, false,
166 + info->tx_time_mc);
167 }
168
169 return skb;
170 @@ -4026,6 +4026,7 @@ struct ieee80211_txq *ieee80211_next_txq
171 struct ieee80211_txq *ret = NULL;
172 struct txq_info *txqi = NULL, *head = NULL;
173 bool found_eligible_txq = false;
174 + bool aql_check;
175
176 spin_lock_bh(&local->active_txq_lock[ac]);
177
178 @@ -4049,26 +4050,26 @@ struct ieee80211_txq *ieee80211_next_txq
179 if (!head)
180 head = txqi;
181
182 + aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq);
183 + if (aql_check)
184 + found_eligible_txq = true;
185 +
186 if (txqi->txq.sta) {
187 struct sta_info *sta = container_of(txqi->txq.sta,
188 struct sta_info, sta);
189 - bool aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq);
190 - s32 deficit = ieee80211_sta_deficit(sta, txqi->txq.ac);
191 -
192 - if (aql_check)
193 - found_eligible_txq = true;
194 -
195 - if (deficit < 0)
196 + if (ieee80211_sta_deficit(sta, txqi->txq.ac) < 0) {
197 sta->airtime[txqi->txq.ac].deficit +=
198 sta->airtime_weight << AIRTIME_QUANTUM_SHIFT;
199 -
200 - if (deficit < 0 || !aql_check) {
201 - list_move_tail(&txqi->schedule_order,
202 - &local->active_txqs[txqi->txq.ac]);
203 - goto begin;
204 + aql_check = false;
205 }
206 }
207
208 + if (!aql_check) {
209 + list_move_tail(&txqi->schedule_order,
210 + &local->active_txqs[txqi->txq.ac]);
211 + goto begin;
212 + }
213 +
214 if (txqi->schedule_round == local->schedule_round[ac])
215 goto out;
216
217 @@ -4133,7 +4134,8 @@ bool ieee80211_txq_airtime_check(struct
218 return true;
219
220 if (!txq->sta)
221 - return true;
222 + return atomic_read(&local->aql_bc_pending_airtime) <
223 + local->aql_txq_limit_bc;
224
225 if (unlikely(txq->tid == IEEE80211_NUM_TIDS))
226 return true;
227 @@ -4182,15 +4184,15 @@ bool ieee80211_txq_may_transmit(struct i
228
229 spin_lock_bh(&local->active_txq_lock[ac]);
230
231 - if (!txqi->txq.sta)
232 - goto out;
233 -
234 if (list_empty(&txqi->schedule_order))
235 goto out;
236
237 if (!ieee80211_txq_schedule_airtime_check(local, ac))
238 goto out;
239
240 + if (!txqi->txq.sta)
241 + goto out;
242 +
243 list_for_each_entry_safe(iter, tmp, &local->active_txqs[ac],
244 schedule_order) {
245 if (iter == txqi)
246 --- a/include/net/mac80211.h
247 +++ b/include/net/mac80211.h
248 @@ -1116,6 +1116,7 @@ ieee80211_rate_get_vht_nss(const struct
249 * link the frame will be transmitted on
250 * @hw_queue: HW queue to put the frame on, skb_get_queue_mapping() gives the AC
251 * @ack_frame_id: internal frame ID for TX status, used internally
252 + * @tx_time_mc: TX time is for a multicast packet
253 * @tx_time_est: TX time estimate in units of 4us, used internally
254 * @control: union part for control data
255 * @control.rates: TX rates array to try
256 @@ -1155,8 +1156,9 @@ struct ieee80211_tx_info {
257 /* common information */
258 u32 flags;
259 u32 band:3,
260 - ack_frame_id:13,
261 + ack_frame_id:12,
262 hw_queue:4,
263 + tx_time_mc:1,
264 tx_time_est:10;
265 /* 2 free bits */
266
267 --- a/net/mac80211/sta_info.h
268 +++ b/net/mac80211/sta_info.h
269 @@ -147,7 +147,8 @@ struct airtime_info {
270
271 void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
272 struct sta_info *sta, u8 ac,
273 - u16 tx_airtime, bool tx_completed);
274 + u16 tx_airtime, bool tx_completed,
275 + bool mcast);
276
277 struct sta_info;
278
279 --- a/net/mac80211/status.c
280 +++ b/net/mac80211/status.c
281 @@ -716,7 +716,7 @@ static void ieee80211_report_used_skb(st
282 ieee80211_sta_update_pending_airtime(local, sta,
283 skb_get_queue_mapping(skb),
284 tx_time_est,
285 - true);
286 + true, info->tx_time_mc);
287 rcu_read_unlock();
288 }
289
290 @@ -1127,10 +1127,11 @@ void ieee80211_tx_status_ext(struct ieee
291 /* Do this here to avoid the expensive lookup of the sta
292 * in ieee80211_report_used_skb().
293 */
294 + bool mcast = IEEE80211_SKB_CB(skb)->tx_time_mc;
295 ieee80211_sta_update_pending_airtime(local, sta,
296 skb_get_queue_mapping(skb),
297 tx_time_est,
298 - true);
299 + true, mcast);
300 ieee80211_info_set_tx_time_est(IEEE80211_SKB_CB(skb), 0);
301 }
302