bcm27xx: update 6.1 patches to latest version
[openwrt/staging/dangole.git] / target / linux / bcm27xx / patches-6.1 / 950-1031-vc_mem-Add-the-DMA-memcpy-support-from-bcm2708_fb.patch
1 From 2a47ccf97c6a91bc56f8cfb387d47f59cc347dd5 Mon Sep 17 00:00:00 2001
2 From: Phil Elwell <phil@raspberrypi.com>
3 Date: Sat, 14 Oct 2023 14:57:49 +0100
4 Subject: [PATCH] vc_mem: Add the DMA memcpy support from bcm2708_fb
5
6 bcm2708_fb is disabled by the vc4-kms-v3d overlay, which means that the
7 DMA memcpy support it provides is not available to allow vclog to read
8 the VC logs from the top 16MB on Pi 2 and Pi 3. Add the code to the
9 vc_mem driver, which will still be enabled.
10
11 It ought to be possible to do a proper DMA_MEM_TO_MEM copy via the
12 generic DMA customer API, but that can be a later step.
13
14 Signed-off-by: Phil Elwell <phil@raspberrypi.com>
15 ---
16 drivers/char/broadcom/vc_mem.c | 259 +++++++++++++++++++++++++++++++++
17 1 file changed, 259 insertions(+)
18
19 --- a/drivers/char/broadcom/vc_mem.c
20 +++ b/drivers/char/broadcom/vc_mem.c
21 @@ -23,9 +23,21 @@
22 #include <linux/uaccess.h>
23 #include <linux/dma-mapping.h>
24 #include <linux/broadcom/vc_mem.h>
25 +#include <linux/compat.h>
26 +#include <linux/platform_data/dma-bcm2708.h>
27 +#include <soc/bcm2835/raspberrypi-firmware.h>
28
29 #define DRIVER_NAME "vc-mem"
30
31 +/* N.B. These use a different magic value for compatibility with bmc7208_fb */
32 +#define VC_MEM_IOC_DMACOPY _IOW('z', 0x22, struct vc_mem_dmacopy)
33 +#define VC_MEM_IOC_DMACOPY32 _IOW('z', 0x22, struct vc_mem_dmacopy32)
34 +
35 +/* address with no aliases */
36 +#define INTALIAS_NORMAL(x) ((x) & ~0xc0000000)
37 +/* cache coherent but non-allocating in L1 and L2 */
38 +#define INTALIAS_L1L2_NONALLOCATING(x) (((x) & ~0xc0000000) | 0x80000000)
39 +
40 /* Device (/dev) related variables */
41 static dev_t vc_mem_devnum;
42 static struct class *vc_mem_class;
43 @@ -36,6 +48,20 @@ static int vc_mem_inited;
44 static struct dentry *vc_mem_debugfs_entry;
45 #endif
46
47 +struct vc_mem_dmacopy {
48 + void *dst;
49 + __u32 src;
50 + __u32 length;
51 +};
52 +
53 +#ifdef CONFIG_COMPAT
54 +struct vc_mem_dmacopy32 {
55 + compat_uptr_t dst;
56 + __u32 src;
57 + __u32 length;
58 +};
59 +#endif
60 +
61 /*
62 * Videocore memory addresses and size
63 *
64 @@ -62,6 +88,20 @@ static uint phys_addr;
65 static uint mem_size;
66 static uint mem_base;
67
68 +struct vc_mem_dma {
69 + struct device *dev;
70 + int dma_chan;
71 + int dma_irq;
72 + void __iomem *dma_chan_base;
73 + wait_queue_head_t dma_waitq;
74 + void *cb_base; /* DMA control blocks */
75 + dma_addr_t cb_handle;
76 +};
77 +
78 +struct { u32 base, length; } gpu_mem;
79 +static struct mutex dma_mutex;
80 +static struct vc_mem_dma vc_mem_dma;
81 +
82 static int
83 vc_mem_open(struct inode *inode, struct file *file)
84 {
85 @@ -99,6 +139,189 @@ vc_mem_get_current_size(void)
86 }
87 EXPORT_SYMBOL_GPL(vc_mem_get_current_size);
88
89 +static int
90 +vc_mem_dma_init(void)
91 +{
92 + struct vc_mem_dma *vcdma = &vc_mem_dma;
93 + struct platform_device *pdev;
94 + struct device_node *fwnode;
95 + struct rpi_firmware *fw;
96 + struct device *dev;
97 + u32 revision;
98 + int rc;
99 +
100 + if (vcdma->dev)
101 + return 0;
102 +
103 + fwnode = of_find_node_by_path("/system");
104 + rc = of_property_read_u32(fwnode, "linux,revision", &revision);
105 + revision = (revision >> 12) & 0xf;
106 + if (revision != 1 && revision != 2) {
107 + /* Only BCM2709 and BCM2710 may have logs where the ARMs
108 + * can't see them.
109 + */
110 + return -ENXIO;
111 + }
112 +
113 + fwnode = rpi_firmware_find_node();
114 + if (!fwnode)
115 + return -ENXIO;
116 +
117 + pdev = of_find_device_by_node(fwnode);
118 + dev = &pdev->dev;
119 +
120 + rc = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32));
121 + if (rc)
122 + return rc;
123 +
124 + fw = rpi_firmware_get(fwnode);
125 + if (!fw)
126 + return -ENXIO;
127 + rc = rpi_firmware_property(fw, RPI_FIRMWARE_GET_VC_MEMORY,
128 + &gpu_mem, sizeof(gpu_mem));
129 + if (rc)
130 + return rc;
131 +
132 + gpu_mem.base = INTALIAS_NORMAL(gpu_mem.base);
133 +
134 + if (!gpu_mem.base || !gpu_mem.length) {
135 + dev_err(dev, "%s: unable to determine gpu memory (%x,%x)\n",
136 + __func__, gpu_mem.base, gpu_mem.length);
137 + return -EFAULT;
138 + }
139 +
140 + vcdma->cb_base = dma_alloc_wc(dev, SZ_4K, &vcdma->cb_handle, GFP_KERNEL);
141 + if (!vcdma->cb_base) {
142 + dev_err(dev, "failed to allocate DMA CBs\n");
143 + return -ENOMEM;
144 + }
145 +
146 + rc = bcm_dma_chan_alloc(BCM_DMA_FEATURE_BULK,
147 + &vcdma->dma_chan_base,
148 + &vcdma->dma_irq);
149 + if (rc < 0) {
150 + dev_err(dev, "failed to allocate a DMA channel\n");
151 + goto free_cb;
152 + }
153 +
154 + vcdma->dma_chan = rc;
155 +
156 + init_waitqueue_head(&vcdma->dma_waitq);
157 +
158 + vcdma->dev = dev;
159 +
160 + return 0;
161 +
162 +free_cb:
163 + dma_free_wc(dev, SZ_4K, vcdma->cb_base, vcdma->cb_handle);
164 +
165 + return rc;
166 +}
167 +
168 +static void
169 +vc_mem_dma_uninit(void)
170 +{
171 + struct vc_mem_dma *vcdma = &vc_mem_dma;
172 +
173 + if (vcdma->dev) {
174 + bcm_dma_chan_free(vcdma->dma_chan);
175 + dma_free_wc(vcdma->dev, SZ_4K, vcdma->cb_base, vcdma->cb_handle);
176 + vcdma->dev = NULL;
177 + }
178 +}
179 +
180 +static int dma_memcpy(struct vc_mem_dma *vcdma, dma_addr_t dst, dma_addr_t src,
181 + int size)
182 +{
183 + struct bcm2708_dma_cb *cb = vcdma->cb_base;
184 + int burst_size = (vcdma->dma_chan == 0) ? 8 : 2;
185 +
186 + cb->info = BCM2708_DMA_BURST(burst_size) | BCM2708_DMA_S_WIDTH |
187 + BCM2708_DMA_S_INC | BCM2708_DMA_D_WIDTH |
188 + BCM2708_DMA_D_INC;
189 + cb->dst = dst;
190 + cb->src = src;
191 + cb->length = size;
192 + cb->stride = 0;
193 + cb->pad[0] = 0;
194 + cb->pad[1] = 0;
195 + cb->next = 0;
196 +
197 + bcm_dma_start(vcdma->dma_chan_base, vcdma->cb_handle);
198 + bcm_dma_wait_idle(vcdma->dma_chan_base);
199 +
200 + return 0;
201 +}
202 +
203 +static long vc_mem_copy(struct vc_mem_dmacopy *ioparam)
204 +{
205 + struct vc_mem_dma *vcdma = &vc_mem_dma;
206 + size_t size = PAGE_SIZE;
207 + const u32 dma_xfer_chunk = 256;
208 + u32 *buf = NULL;
209 + dma_addr_t bus_addr;
210 + long rc = 0;
211 + size_t offset;
212 +
213 + /* restrict this to root user */
214 + if (!uid_eq(current_euid(), GLOBAL_ROOT_UID))
215 + return -EFAULT;
216 +
217 + if (mutex_lock_interruptible(&dma_mutex))
218 + return -EINTR;
219 +
220 + rc = vc_mem_dma_init();
221 + if (rc)
222 + goto out;
223 +
224 + vcdma = &vc_mem_dma;
225 +
226 + if (INTALIAS_NORMAL(ioparam->src) < gpu_mem.base ||
227 + INTALIAS_NORMAL(ioparam->src) >= gpu_mem.base + gpu_mem.length) {
228 + pr_err("%s: invalid memory access %x (%x-%x)", __func__,
229 + INTALIAS_NORMAL(ioparam->src), gpu_mem.base,
230 + gpu_mem.base + gpu_mem.length);
231 + rc = -EFAULT;
232 + goto out;
233 + }
234 +
235 + buf = dma_alloc_coherent(vcdma->dev, PAGE_ALIGN(size), &bus_addr,
236 + GFP_ATOMIC);
237 + if (!buf) {
238 + rc = -ENOMEM;
239 + goto out;
240 + }
241 +
242 + for (offset = 0; offset < ioparam->length; offset += size) {
243 + size_t remaining = ioparam->length - offset;
244 + size_t s = min(size, remaining);
245 + u8 *p = (u8 *)((uintptr_t)ioparam->src + offset);
246 + u8 *q = (u8 *)ioparam->dst + offset;
247 +
248 + rc = dma_memcpy(vcdma, bus_addr,
249 + INTALIAS_L1L2_NONALLOCATING((u32)(uintptr_t)p),
250 + (s + dma_xfer_chunk - 1) & ~(dma_xfer_chunk - 1));
251 + if (rc) {
252 + dev_err(vcdma->dev, "dma_memcpy failed\n");
253 + break;
254 + }
255 + if (copy_to_user(q, buf, s) != 0) {
256 + pr_err("%s: copy_to_user failed\n", __func__);
257 + rc = -EFAULT;
258 + break;
259 + }
260 + }
261 +
262 +out:
263 + if (buf)
264 + dma_free_coherent(vcdma->dev, PAGE_ALIGN(size), buf,
265 + bus_addr);
266 +
267 + mutex_unlock(&dma_mutex);
268 +
269 + return rc;
270 +}
271 +
272 static long
273 vc_mem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
274 {
275 @@ -163,6 +386,21 @@ vc_mem_ioctl(struct file *file, unsigned
276 }
277 break;
278 }
279 + case VC_MEM_IOC_DMACOPY:
280 + {
281 + struct vc_mem_dmacopy ioparam;
282 + /* Get the parameter data.
283 + */
284 + if (copy_from_user
285 + (&ioparam, (void *)arg, sizeof(ioparam))) {
286 + pr_err("%s: copy_from_user failed\n", __func__);
287 + rc = -EFAULT;
288 + break;
289 + }
290 +
291 + rc = vc_mem_copy(&ioparam);
292 + break;
293 + }
294 default:
295 {
296 return -ENOTTY;
297 @@ -193,6 +431,24 @@ vc_mem_compat_ioctl(struct file *file, u
298
299 break;
300
301 + case VC_MEM_IOC_DMACOPY32:
302 + {
303 + struct vc_mem_dmacopy32 param32;
304 + struct vc_mem_dmacopy param;
305 + /* Get the parameter data.
306 + */
307 + if (copy_from_user(&param32, (void *)arg, sizeof(param32))) {
308 + pr_err("%s: copy_from_user failed\n", __func__);
309 + rc = -EFAULT;
310 + break;
311 + }
312 + param.dst = compat_ptr(param32.dst);
313 + param.src = param32.src;
314 + param.length = param32.length;
315 + rc = vc_mem_copy(&param);
316 + break;
317 + }
318 +
319 default:
320 rc = vc_mem_ioctl(file, cmd, arg);
321 break;
322 @@ -330,6 +586,7 @@ vc_mem_init(void)
323 vc_mem_debugfs_init(dev);
324 #endif
325
326 + mutex_init(&dma_mutex);
327 vc_mem_inited = 1;
328 return 0;
329
330 @@ -352,6 +609,7 @@ vc_mem_exit(void)
331 {
332 pr_debug("%s: called\n", __func__);
333
334 + vc_mem_dma_uninit();
335 if (vc_mem_inited) {
336 #ifdef CONFIG_DEBUG_FS
337 vc_mem_debugfs_deinit();
338 @@ -360,6 +618,7 @@ vc_mem_exit(void)
339 class_destroy(vc_mem_class);
340 cdev_del(&vc_mem_cdev);
341 unregister_chrdev_region(vc_mem_devnum, 1);
342 + vc_mem_inited = 0;
343 }
344 }
345