bcm27xx: update 6.1 patches to latest version
[openwrt/openwrt.git] / target / linux / bcm27xx / patches-6.1 / 950-0882-dmaengine-dw-axi-dmac-Fixes-for-RP1.patch
1 From 0a1cd70189daec3baf4b4a233dd8e25ffbb9d512 Mon Sep 17 00:00:00 2001
2 From: Phil Elwell <phil@raspberrypi.com>
3 Date: Wed, 28 Apr 2021 17:46:01 +0100
4 Subject: [PATCH] dmaengine: dw-axi-dmac: Fixes for RP1
5
6 Don't assume that DMA addresses of devices are the same as their
7 physical addresses - convert correctly.
8
9 The CFG2 register layout is used when there are more than 8 channels,
10 but also when configured for more than 16 target peripheral devices
11 because the index of the handshake signal has to be made wider.
12
13 Reset the DMAC on probe
14
15 The driver goes to the trouble of tracking when transfers have been
16 paused, but then doesn't report that state when queried.
17
18 Not having APB registers is not an error - for most use cases it's
19 not even of interest, it's expected. Demote the message to debug level,
20 which is disabled by default.
21
22 Each channel has a descriptor pool, which is shared between transfers.
23 It is unsafe to treat the total number of descriptors allocated from a
24 pool as the number allocated to a specific transfer; doing so leads
25 to releasing buffers that shouldn't be released and walking off the
26 ends of descriptor lists. Instead, give each transfer descriptor its
27 own count.
28
29 Support partial transfers:
30 Some use cases involve streaming from a device where the transfer only
31 proceeds when the device's FIFO occupancy exceeds a certain threshold.
32 In such cases (e.g. when pulling data from a UART) it is important to
33 know how much data has been transferred so far, in order that remaining
34 bytes can be read from the FIFO directly by software.
35
36 Add the necessary code to provide this "residue" value with a finer,
37 sub-transfer granularity.
38
39 In order to prevent the occasional byte getting stuck in the DMA
40 controller's internal buffers, restrict the destination memory width
41 to the source register width.
42
43 Signed-off-by: Phil Elwell <phil@raspberrypi.com>
44 ---
45 .../dma/dw-axi-dmac/dw-axi-dmac-platform.c | 136 +++++++++++++++---
46 drivers/dma/dw-axi-dmac/dw-axi-dmac.h | 3 +
47 2 files changed, 118 insertions(+), 21 deletions(-)
48
49 --- a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
50 +++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
51 @@ -12,6 +12,7 @@
52 #include <linux/device.h>
53 #include <linux/dmaengine.h>
54 #include <linux/dmapool.h>
55 +#include <linux/dma-direct.h>
56 #include <linux/dma-mapping.h>
57 #include <linux/err.h>
58 #include <linux/interrupt.h>
59 @@ -79,6 +80,17 @@ axi_chan_iowrite64(struct axi_dma_chan *
60 iowrite32(upper_32_bits(val), chan->chan_regs + reg + 4);
61 }
62
63 +static inline u64
64 +axi_chan_ioread64(struct axi_dma_chan *chan, u32 reg)
65 +{
66 + /*
67 + * We split one 64 bit read into two 32 bit reads as some HW doesn't
68 + * support 64 bit access.
69 + */
70 + return ((u64)ioread32(chan->chan_regs + reg + 4) << 32) +
71 + ioread32(chan->chan_regs + reg);
72 +}
73 +
74 static inline void axi_chan_config_write(struct axi_dma_chan *chan,
75 struct axi_dma_chan_config *config)
76 {
77 @@ -86,7 +98,7 @@ static inline void axi_chan_config_write
78
79 cfg_lo = (config->dst_multblk_type << CH_CFG_L_DST_MULTBLK_TYPE_POS |
80 config->src_multblk_type << CH_CFG_L_SRC_MULTBLK_TYPE_POS);
81 - if (chan->chip->dw->hdata->reg_map_8_channels) {
82 + if (!chan->chip->dw->hdata->reg_map_cfg2) {
83 cfg_hi = config->tt_fc << CH_CFG_H_TT_FC_POS |
84 config->hs_sel_src << CH_CFG_H_HS_SEL_SRC_POS |
85 config->hs_sel_dst << CH_CFG_H_HS_SEL_DST_POS |
86 @@ -214,7 +226,18 @@ static void axi_dma_hw_init(struct axi_d
87 {
88 int ret;
89 u32 i;
90 + int retries = 1000;
91
92 + axi_dma_iowrite32(chip, DMAC_RESET, 1);
93 + while (axi_dma_ioread32(chip, DMAC_RESET)) {
94 + retries--;
95 + if (!retries) {
96 + dev_err(chip->dev, "%s: DMAC failed to reset\n",
97 + __func__);
98 + return;
99 + }
100 + cpu_relax();
101 + }
102 for (i = 0; i < chip->dw->hdata->nr_channels; i++) {
103 axi_chan_irq_disable(&chip->dw->chan[i], DWAXIDMAC_IRQ_ALL);
104 axi_chan_disable(&chip->dw->chan[i]);
105 @@ -276,7 +299,7 @@ static struct axi_dma_lli *axi_desc_get(
106 static void axi_desc_put(struct axi_dma_desc *desc)
107 {
108 struct axi_dma_chan *chan = desc->chan;
109 - int count = atomic_read(&chan->descs_allocated);
110 + u32 count = desc->hw_desc_count;
111 struct axi_dma_hw_desc *hw_desc;
112 int descs_put;
113
114 @@ -298,6 +321,48 @@ static void vchan_desc_put(struct virt_d
115 axi_desc_put(vd_to_axi_desc(vdesc));
116 }
117
118 +static u32 axi_dma_desc_src_pos(struct axi_dma_desc *desc, dma_addr_t addr)
119 +{
120 + unsigned int idx = 0;
121 + u32 pos = 0;
122 +
123 + while (pos < desc->length) {
124 + struct axi_dma_hw_desc *hw_desc = &desc->hw_desc[idx++];
125 + u32 len = hw_desc->len;
126 + dma_addr_t start = le64_to_cpu(hw_desc->lli->sar);
127 +
128 + if (addr >= start && addr <= (start + len)) {
129 + pos += addr - start;
130 + break;
131 + }
132 +
133 + pos += len;
134 + }
135 +
136 + return pos;
137 +}
138 +
139 +static u32 axi_dma_desc_dst_pos(struct axi_dma_desc *desc, dma_addr_t addr)
140 +{
141 + unsigned int idx = 0;
142 + u32 pos = 0;
143 +
144 + while (pos < desc->length) {
145 + struct axi_dma_hw_desc *hw_desc = &desc->hw_desc[idx++];
146 + u32 len = hw_desc->len;
147 + dma_addr_t start = le64_to_cpu(hw_desc->lli->dar);
148 +
149 + if (addr >= start && addr <= (start + len)) {
150 + pos += addr - start;
151 + break;
152 + }
153 +
154 + pos += len;
155 + }
156 +
157 + return pos;
158 +}
159 +
160 static enum dma_status
161 dma_chan_tx_status(struct dma_chan *dchan, dma_cookie_t cookie,
162 struct dma_tx_state *txstate)
163 @@ -307,10 +372,7 @@ dma_chan_tx_status(struct dma_chan *dcha
164 enum dma_status status;
165 u32 completed_length;
166 unsigned long flags;
167 - u32 completed_blocks;
168 size_t bytes = 0;
169 - u32 length;
170 - u32 len;
171
172 status = dma_cookie_status(dchan, cookie, txstate);
173 if (status == DMA_COMPLETE || !txstate)
174 @@ -319,16 +381,31 @@ dma_chan_tx_status(struct dma_chan *dcha
175 spin_lock_irqsave(&chan->vc.lock, flags);
176
177 vdesc = vchan_find_desc(&chan->vc, cookie);
178 - if (vdesc) {
179 - length = vd_to_axi_desc(vdesc)->length;
180 - completed_blocks = vd_to_axi_desc(vdesc)->completed_blocks;
181 - len = vd_to_axi_desc(vdesc)->hw_desc[0].len;
182 - completed_length = completed_blocks * len;
183 - bytes = length - completed_length;
184 + if (vdesc && vdesc == vchan_next_desc(&chan->vc)) {
185 + /* This descriptor is in-progress */
186 + struct axi_dma_desc *desc = vd_to_axi_desc(vdesc);
187 + dma_addr_t addr;
188 +
189 + if (chan->direction == DMA_MEM_TO_DEV) {
190 + addr = axi_chan_ioread64(chan, CH_SAR);
191 + completed_length = axi_dma_desc_src_pos(desc, addr);
192 + } else if (chan->direction == DMA_DEV_TO_MEM) {
193 + addr = axi_chan_ioread64(chan, CH_DAR);
194 + completed_length = axi_dma_desc_dst_pos(desc, addr);
195 + } else {
196 + completed_length = 0;
197 + }
198 + bytes = desc->length - completed_length;
199 + } else if (vdesc) {
200 + /* Still in the queue so not started */
201 + bytes = vd_to_axi_desc(vdesc)->length;
202 }
203
204 - spin_unlock_irqrestore(&chan->vc.lock, flags);
205 + if (chan->is_paused && status == DMA_IN_PROGRESS)
206 + status = DMA_PAUSED;
207 +
208 dma_set_residue(txstate, bytes);
209 + spin_unlock_irqrestore(&chan->vc.lock, flags);
210
211 return status;
212 }
213 @@ -516,7 +593,7 @@ static void dw_axi_dma_set_hw_channel(st
214 unsigned long reg_value, val;
215
216 if (!chip->apb_regs) {
217 - dev_err(chip->dev, "apb_regs not initialized\n");
218 + dev_dbg(chip->dev, "apb_regs not initialized\n");
219 return;
220 }
221
222 @@ -620,18 +697,25 @@ static int dw_axi_dma_set_hw_desc(struct
223 switch (chan->direction) {
224 case DMA_MEM_TO_DEV:
225 reg_width = __ffs(chan->config.dst_addr_width);
226 - device_addr = chan->config.dst_addr;
227 + device_addr = phys_to_dma(chan->chip->dev, chan->config.dst_addr);
228 ctllo = reg_width << CH_CTL_L_DST_WIDTH_POS |
229 mem_width << CH_CTL_L_SRC_WIDTH_POS |
230 + DWAXIDMAC_BURST_TRANS_LEN_1 << CH_CTL_L_DST_MSIZE_POS |
231 + DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_SRC_MSIZE_POS |
232 DWAXIDMAC_CH_CTL_L_NOINC << CH_CTL_L_DST_INC_POS |
233 DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_SRC_INC_POS;
234 block_ts = len >> mem_width;
235 break;
236 case DMA_DEV_TO_MEM:
237 reg_width = __ffs(chan->config.src_addr_width);
238 - device_addr = chan->config.src_addr;
239 + /* Prevent partial access units getting lost */
240 + if (mem_width > reg_width)
241 + mem_width = reg_width;
242 + device_addr = phys_to_dma(chan->chip->dev, chan->config.src_addr);
243 ctllo = reg_width << CH_CTL_L_SRC_WIDTH_POS |
244 mem_width << CH_CTL_L_DST_WIDTH_POS |
245 + DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_DST_MSIZE_POS |
246 + DWAXIDMAC_BURST_TRANS_LEN_1 << CH_CTL_L_SRC_MSIZE_POS |
247 DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_DST_INC_POS |
248 DWAXIDMAC_CH_CTL_L_NOINC << CH_CTL_L_SRC_INC_POS;
249 block_ts = len >> reg_width;
250 @@ -667,9 +751,6 @@ static int dw_axi_dma_set_hw_desc(struct
251 }
252
253 hw_desc->lli->block_ts_lo = cpu_to_le32(block_ts - 1);
254 -
255 - ctllo |= DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_DST_MSIZE_POS |
256 - DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_SRC_MSIZE_POS;
257 hw_desc->lli->ctl_lo = cpu_to_le32(ctllo);
258
259 set_desc_src_master(hw_desc);
260 @@ -764,6 +845,8 @@ dw_axi_dma_chan_prep_cyclic(struct dma_c
261 src_addr += segment_len;
262 }
263
264 + desc->hw_desc_count = total_segments;
265 +
266 llp = desc->hw_desc[0].llp;
267
268 /* Managed transfer list */
269 @@ -843,6 +926,8 @@ dw_axi_dma_chan_prep_slave_sg(struct dma
270 } while (len >= segment_len);
271 }
272
273 + desc->hw_desc_count = loop;
274 +
275 /* Set end-of-link to the last link descriptor of list */
276 set_desc_last(&desc->hw_desc[num_sgs - 1]);
277
278 @@ -950,6 +1035,8 @@ dma_chan_prep_dma_memcpy(struct dma_chan
279 num++;
280 }
281
282 + desc->hw_desc_count = num;
283 +
284 /* Set end-of-link to the last link descriptor of list */
285 set_desc_last(&desc->hw_desc[num - 1]);
286 /* Managed transfer list */
287 @@ -998,7 +1085,7 @@ static void axi_chan_dump_lli(struct axi
288 static void axi_chan_list_dump_lli(struct axi_dma_chan *chan,
289 struct axi_dma_desc *desc_head)
290 {
291 - int count = atomic_read(&chan->descs_allocated);
292 + u32 count = desc_head->hw_desc_count;
293 int i;
294
295 for (i = 0; i < count; i++)
296 @@ -1041,11 +1128,11 @@ out:
297
298 static void axi_chan_block_xfer_complete(struct axi_dma_chan *chan)
299 {
300 - int count = atomic_read(&chan->descs_allocated);
301 struct axi_dma_hw_desc *hw_desc;
302 struct axi_dma_desc *desc;
303 struct virt_dma_desc *vd;
304 unsigned long flags;
305 + u32 count;
306 u64 llp;
307 int i;
308
309 @@ -1067,6 +1154,7 @@ static void axi_chan_block_xfer_complete
310 if (chan->cyclic) {
311 desc = vd_to_axi_desc(vd);
312 if (desc) {
313 + count = desc->hw_desc_count;
314 llp = lo_hi_readq(chan->chan_regs + CH_LLP);
315 for (i = 0; i < count; i++) {
316 hw_desc = &desc->hw_desc[i];
317 @@ -1310,6 +1398,8 @@ static int parse_device_properties(struc
318 chip->dw->hdata->nr_channels = tmp;
319 if (tmp <= DMA_REG_MAP_CH_REF)
320 chip->dw->hdata->reg_map_8_channels = true;
321 + else
322 + chip->dw->hdata->reg_map_cfg2 = true;
323
324 ret = device_property_read_u32(dev, "snps,dma-masters", &tmp);
325 if (ret)
326 @@ -1319,6 +1409,10 @@ static int parse_device_properties(struc
327
328 chip->dw->hdata->nr_masters = tmp;
329
330 + ret = device_property_read_u32(dev, "snps,dma-targets", &tmp);
331 + if (!ret && tmp > 16)
332 + chip->dw->hdata->reg_map_cfg2 = true;
333 +
334 ret = device_property_read_u32(dev, "snps,data-width", &tmp);
335 if (ret)
336 return ret;
337 --- a/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
338 +++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
339 @@ -32,6 +32,8 @@ struct dw_axi_dma_hcfg {
340 u32 axi_rw_burst_len;
341 /* Register map for DMAX_NUM_CHANNELS <= 8 */
342 bool reg_map_8_channels;
343 + /* Register map for DMAX_NUM_CHANNELS > 8 || DMAX_NUM_HS_IF > 16*/
344 + bool reg_map_cfg2;
345 bool restrict_axi_burst_len;
346 };
347
348 @@ -100,6 +102,7 @@ struct axi_dma_desc {
349
350 struct virt_dma_desc vd;
351 struct axi_dma_chan *chan;
352 + u32 hw_desc_count;
353 u32 completed_blocks;
354 u32 length;
355 u32 period_len;