1 From: Felix Fietkau <nbd@nbd.name>
2 Date: Fri, 9 Feb 2024 19:43:40 +0100
3 Subject: [PATCH] mac80211: add AQL support for broadcast packets
5 Excessive broadcast traffic with little competing unicast traffic can easily
6 flood hardware queues, leading to throughput issues. Additionally, filling
7 the hardware queues with too many packets breaks FQ for broadcast data.
8 Fix this by enabling AQL for broadcast packets.
10 Signed-off-by: Felix Fietkau <nbd@nbd.name>
13 --- a/include/net/cfg80211.h
14 +++ b/include/net/cfg80211.h
15 @@ -3324,6 +3324,7 @@ enum wiphy_params_flags {
16 /* The per TXQ device queue limit in airtime */
17 #define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L 5000
18 #define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H 12000
19 +#define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_BC 50000
21 /* The per interface airtime threshold to switch to lower queue limit */
22 #define IEEE80211_AQL_THRESHOLD 24000
23 --- a/net/mac80211/debugfs.c
24 +++ b/net/mac80211/debugfs.c
25 @@ -215,11 +215,13 @@ static ssize_t aql_pending_read(struct f
31 atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_VO]),
32 atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_VI]),
33 atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_BE]),
34 atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_BK]),
35 + atomic_read(&local->aql_bc_pending_airtime),
36 atomic_read(&local->aql_total_pending_airtime));
37 return simple_read_from_buffer(user_buf, count, ppos,
39 @@ -245,7 +247,8 @@ static ssize_t aql_txq_limit_read(struct
46 local->aql_txq_limit_low[IEEE80211_AC_VO],
47 local->aql_txq_limit_high[IEEE80211_AC_VO],
48 local->aql_txq_limit_low[IEEE80211_AC_VI],
49 @@ -253,7 +256,8 @@ static ssize_t aql_txq_limit_read(struct
50 local->aql_txq_limit_low[IEEE80211_AC_BE],
51 local->aql_txq_limit_high[IEEE80211_AC_BE],
52 local->aql_txq_limit_low[IEEE80211_AC_BK],
53 - local->aql_txq_limit_high[IEEE80211_AC_BK]);
54 + local->aql_txq_limit_high[IEEE80211_AC_BK],
55 + local->aql_txq_limit_bc);
56 return simple_read_from_buffer(user_buf, count, ppos,
59 @@ -279,6 +283,11 @@ static ssize_t aql_txq_limit_write(struc
63 + if (sscanf(buf, "mcast %u", &q_limit_low) == 1) {
64 + local->aql_txq_limit_bc = q_limit_low;
68 if (sscanf(buf, "%u %u %u", &ac, &q_limit_low, &q_limit_high) != 3)
71 --- a/net/mac80211/ieee80211_i.h
72 +++ b/net/mac80211/ieee80211_i.h
73 @@ -1328,10 +1328,12 @@ struct ieee80211_local {
74 spinlock_t handle_wake_tx_queue_lock;
77 + u32 aql_txq_limit_bc;
78 u32 aql_txq_limit_low[IEEE80211_NUM_ACS];
79 u32 aql_txq_limit_high[IEEE80211_NUM_ACS];
81 atomic_t aql_total_pending_airtime;
82 + atomic_t aql_bc_pending_airtime;
83 atomic_t aql_ac_pending_airtime[IEEE80211_NUM_ACS];
85 const struct ieee80211_ops *ops;
86 --- a/net/mac80211/main.c
87 +++ b/net/mac80211/main.c
88 @@ -788,6 +788,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_
89 spin_lock_init(&local->rx_path_lock);
90 spin_lock_init(&local->queue_stop_reason_lock);
92 + local->aql_txq_limit_bc = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_BC;
93 for (i = 0; i < IEEE80211_NUM_ACS; i++) {
94 INIT_LIST_HEAD(&local->active_txqs[i]);
95 spin_lock_init(&local->active_txq_lock[i]);
96 --- a/net/mac80211/sta_info.c
97 +++ b/net/mac80211/sta_info.c
98 @@ -2343,13 +2343,28 @@ EXPORT_SYMBOL(ieee80211_sta_recalc_aggre
100 void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
101 struct sta_info *sta, u8 ac,
102 - u16 tx_airtime, bool tx_completed)
103 + u16 tx_airtime, bool tx_completed,
108 if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))
112 + if (!tx_completed) {
113 + atomic_add(tx_airtime, &local->aql_bc_pending_airtime);
117 + tx_pending = atomic_sub_return(tx_airtime,
118 + &local->aql_bc_pending_airtime);
119 + if (tx_pending < 0)
120 + atomic_cmpxchg(&local->aql_bc_pending_airtime,
127 atomic_add(tx_airtime,
128 --- a/net/mac80211/tx.c
129 +++ b/net/mac80211/tx.c
130 @@ -2536,7 +2536,7 @@ static u16 ieee80211_store_ack_skb(struc
132 spin_lock_irqsave(&local->ack_status_lock, flags);
133 id = idr_alloc(&local->ack_status_frames, ack_skb,
134 - 1, 0x2000, GFP_ATOMIC);
135 + 1, 0x1000, GFP_ATOMIC);
136 spin_unlock_irqrestore(&local->ack_status_lock, flags);
139 @@ -3958,20 +3958,20 @@ begin:
141 IEEE80211_SKB_CB(skb)->control.vif = vif;
144 - wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
145 - bool ampdu = txq->ac != IEEE80211_AC_VO;
146 + if (wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
147 + bool ampdu = txq->sta && txq->ac != IEEE80211_AC_VO;
150 airtime = ieee80211_calc_expected_tx_airtime(hw, vif, txq->sta,
153 - airtime = ieee80211_info_set_tx_time_est(info, airtime);
154 - ieee80211_sta_update_pending_airtime(local, tx.sta,
162 + airtime = ieee80211_info_set_tx_time_est(info, airtime);
163 + info->tx_time_mc = !tx.sta;
164 + ieee80211_sta_update_pending_airtime(local, tx.sta, txq->ac,
170 @@ -4026,6 +4026,7 @@ struct ieee80211_txq *ieee80211_next_txq
171 struct ieee80211_txq *ret = NULL;
172 struct txq_info *txqi = NULL, *head = NULL;
173 bool found_eligible_txq = false;
176 spin_lock_bh(&local->active_txq_lock[ac]);
178 @@ -4049,26 +4050,26 @@ struct ieee80211_txq *ieee80211_next_txq
182 + aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq);
184 + found_eligible_txq = true;
187 struct sta_info *sta = container_of(txqi->txq.sta,
188 struct sta_info, sta);
189 - bool aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq);
190 - s32 deficit = ieee80211_sta_deficit(sta, txqi->txq.ac);
193 - found_eligible_txq = true;
196 + if (ieee80211_sta_deficit(sta, txqi->txq.ac) < 0) {
197 sta->airtime[txqi->txq.ac].deficit +=
198 sta->airtime_weight << AIRTIME_QUANTUM_SHIFT;
200 - if (deficit < 0 || !aql_check) {
201 - list_move_tail(&txqi->schedule_order,
202 - &local->active_txqs[txqi->txq.ac]);
209 + list_move_tail(&txqi->schedule_order,
210 + &local->active_txqs[txqi->txq.ac]);
214 if (txqi->schedule_round == local->schedule_round[ac])
217 @@ -4133,7 +4134,8 @@ bool ieee80211_txq_airtime_check(struct
222 + return atomic_read(&local->aql_bc_pending_airtime) <
223 + local->aql_txq_limit_bc;
225 if (unlikely(txq->tid == IEEE80211_NUM_TIDS))
227 @@ -4182,15 +4184,15 @@ bool ieee80211_txq_may_transmit(struct i
229 spin_lock_bh(&local->active_txq_lock[ac]);
231 - if (!txqi->txq.sta)
234 if (list_empty(&txqi->schedule_order))
237 if (!ieee80211_txq_schedule_airtime_check(local, ac))
240 + if (!txqi->txq.sta)
243 list_for_each_entry_safe(iter, tmp, &local->active_txqs[ac],
246 --- a/include/net/mac80211.h
247 +++ b/include/net/mac80211.h
248 @@ -1116,6 +1116,7 @@ ieee80211_rate_get_vht_nss(const struct
249 * link the frame will be transmitted on
250 * @hw_queue: HW queue to put the frame on, skb_get_queue_mapping() gives the AC
251 * @ack_frame_id: internal frame ID for TX status, used internally
252 + * @tx_time_mc: TX time is for a multicast packet
253 * @tx_time_est: TX time estimate in units of 4us, used internally
254 * @control: union part for control data
255 * @control.rates: TX rates array to try
256 @@ -1155,8 +1156,9 @@ struct ieee80211_tx_info {
257 /* common information */
267 --- a/net/mac80211/sta_info.h
268 +++ b/net/mac80211/sta_info.h
269 @@ -147,7 +147,8 @@ struct airtime_info {
271 void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
272 struct sta_info *sta, u8 ac,
273 - u16 tx_airtime, bool tx_completed);
274 + u16 tx_airtime, bool tx_completed,
279 --- a/net/mac80211/status.c
280 +++ b/net/mac80211/status.c
281 @@ -716,7 +716,7 @@ static void ieee80211_report_used_skb(st
282 ieee80211_sta_update_pending_airtime(local, sta,
283 skb_get_queue_mapping(skb),
286 + true, info->tx_time_mc);
290 @@ -1127,10 +1127,11 @@ void ieee80211_tx_status_ext(struct ieee
291 /* Do this here to avoid the expensive lookup of the sta
292 * in ieee80211_report_used_skb().
294 + bool mcast = IEEE80211_SKB_CB(skb)->tx_time_mc;
295 ieee80211_sta_update_pending_airtime(local, sta,
296 skb_get_queue_mapping(skb),
300 ieee80211_info_set_tx_time_est(IEEE80211_SKB_CB(skb), 0);