bcm27xx: add linux 5.4 support
[openwrt/staging/jogo.git] / target / linux / bcm27xx / patches-5.4 / 950-0209-pcie-brcmstb-Changes-for-BCM2711.patch
1 From 1dab5ded41ed07adc12f26e529aa64209a7c44b6 Mon Sep 17 00:00:00 2001
2 From: Phil Elwell <phil@raspberrypi.org>
3 Date: Tue, 19 Feb 2019 22:06:59 +0000
4 Subject: [PATCH] pcie-brcmstb: Changes for BCM2711
5
6 The initial brcmstb PCIe driver - originally taken from the V3(?)
7 patch set - has been modified significantly for the BCM2711.
8
9 Signed-off-by: Phil Elwell <phil@raspberrypi.org>
10 ---
11 drivers/dma/bcm2835-dma.c | 107 ++++
12 drivers/pci/controller/Makefile | 4 +
13 drivers/pci/controller/pcie-brcmstb-bounce.c | 558 +++++++++++++++++++
14 drivers/pci/controller/pcie-brcmstb-bounce.h | 32 ++
15 drivers/pci/controller/pcie-brcmstb.c | 245 ++++----
16 drivers/soc/bcm/brcmstb/Makefile | 2 +-
17 drivers/soc/bcm/brcmstb/memory.c | 158 ++++++
18 7 files changed, 991 insertions(+), 115 deletions(-)
19 create mode 100644 drivers/pci/controller/pcie-brcmstb-bounce.c
20 create mode 100644 drivers/pci/controller/pcie-brcmstb-bounce.h
21 create mode 100644 drivers/soc/bcm/brcmstb/memory.c
22
23 --- a/drivers/dma/bcm2835-dma.c
24 +++ b/drivers/dma/bcm2835-dma.c
25 @@ -64,6 +64,17 @@ struct bcm2835_dma_cb {
26 uint32_t pad[2];
27 };
28
29 +struct bcm2838_dma40_scb {
30 + uint32_t ti;
31 + uint32_t src;
32 + uint32_t srci;
33 + uint32_t dst;
34 + uint32_t dsti;
35 + uint32_t len;
36 + uint32_t next_cb;
37 + uint32_t rsvd;
38 +};
39 +
40 struct bcm2835_cb_entry {
41 struct bcm2835_dma_cb *cb;
42 dma_addr_t paddr;
43 @@ -180,6 +191,45 @@ struct bcm2835_desc {
44 #define MAX_DMA_LEN SZ_1G
45 #define MAX_LITE_DMA_LEN (SZ_64K - 4)
46
47 +/* 40-bit DMA support */
48 +#define BCM2838_DMA40_CS 0x00
49 +#define BCM2838_DMA40_CB 0x04
50 +#define BCM2838_DMA40_DEBUG 0x0c
51 +#define BCM2858_DMA40_TI 0x10
52 +#define BCM2838_DMA40_SRC 0x14
53 +#define BCM2838_DMA40_SRCI 0x18
54 +#define BCM2838_DMA40_DEST 0x1c
55 +#define BCM2838_DMA40_DESTI 0x20
56 +#define BCM2838_DMA40_LEN 0x24
57 +#define BCM2838_DMA40_NEXT_CB 0x28
58 +#define BCM2838_DMA40_DEBUG2 0x2c
59 +
60 +#define BCM2838_DMA40_CS_ACTIVE BIT(0)
61 +#define BCM2838_DMA40_CS_END BIT(1)
62 +
63 +#define BCM2838_DMA40_CS_QOS(x) (((x) & 0x1f) << 16)
64 +#define BCM2838_DMA40_CS_PANIC_QOS(x) (((x) & 0x1f) << 20)
65 +#define BCM2838_DMA40_CS_WRITE_WAIT BIT(28)
66 +
67 +#define BCM2838_DMA40_BURST_LEN(x) ((((x) - 1) & 0xf) << 8)
68 +#define BCM2838_DMA40_INC BIT(12)
69 +#define BCM2838_DMA40_SIZE_128 (2 << 13)
70 +
71 +#define BCM2838_DMA40_MEMCPY_QOS \
72 + (BCM2838_DMA40_CS_QOS(0x0) | \
73 + BCM2838_DMA40_CS_PANIC_QOS(0x0) | \
74 + BCM2838_DMA40_CS_WRITE_WAIT)
75 +
76 +#define BCM2838_DMA40_MEMCPY_XFER_INFO \
77 + (BCM2838_DMA40_SIZE_128 | \
78 + BCM2838_DMA40_INC | \
79 + BCM2838_DMA40_BURST_LEN(16))
80 +
81 +static void __iomem *memcpy_chan;
82 +static struct bcm2838_dma40_scb *memcpy_scb;
83 +static dma_addr_t memcpy_scb_dma;
84 +DEFINE_SPINLOCK(memcpy_lock);
85 +
86 static inline size_t bcm2835_dma_max_frame_length(struct bcm2835_chan *c)
87 {
88 /* lite and normal channels have different max frame length */
89 @@ -866,6 +916,56 @@ static void bcm2835_dma_free(struct bcm2
90 DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
91 }
92
93 +int bcm2838_dma40_memcpy_init(struct device *dev)
94 +{
95 + if (memcpy_scb)
96 + return 0;
97 +
98 + memcpy_scb = dma_alloc_coherent(dev, sizeof(*memcpy_scb),
99 + &memcpy_scb_dma, GFP_KERNEL);
100 +
101 + if (!memcpy_scb) {
102 + pr_err("bcm2838_dma40_memcpy_init failed!\n");
103 + return -ENOMEM;
104 + }
105 +
106 + return 0;
107 +}
108 +EXPORT_SYMBOL(bcm2838_dma40_memcpy_init);
109 +
110 +void bcm2838_dma40_memcpy(dma_addr_t dst, dma_addr_t src, size_t size)
111 +{
112 + struct bcm2838_dma40_scb *scb = memcpy_scb;
113 + unsigned long flags;
114 +
115 + if (!scb) {
116 + pr_err("bcm2838_dma40_memcpy not initialised!\n");
117 + return;
118 + }
119 +
120 + spin_lock_irqsave(&memcpy_lock, flags);
121 +
122 + scb->ti = 0;
123 + scb->src = lower_32_bits(src);
124 + scb->srci = upper_32_bits(src) | BCM2838_DMA40_MEMCPY_XFER_INFO;
125 + scb->dst = lower_32_bits(dst);
126 + scb->dsti = upper_32_bits(dst) | BCM2838_DMA40_MEMCPY_XFER_INFO;
127 + scb->len = size;
128 + scb->next_cb = 0;
129 +
130 + writel((u32)(memcpy_scb_dma >> 5), memcpy_chan + BCM2838_DMA40_CB);
131 + writel(BCM2838_DMA40_MEMCPY_QOS + BCM2838_DMA40_CS_ACTIVE,
132 + memcpy_chan + BCM2838_DMA40_CS);
133 + /* Poll for completion */
134 + while (!(readl(memcpy_chan + BCM2838_DMA40_CS) & BCM2838_DMA40_CS_END))
135 + cpu_relax();
136 +
137 + writel(BCM2838_DMA40_CS_END, memcpy_chan + BCM2838_DMA40_CS);
138 +
139 + spin_unlock_irqrestore(&memcpy_lock, flags);
140 +}
141 +EXPORT_SYMBOL(bcm2838_dma40_memcpy);
142 +
143 static const struct of_device_id bcm2835_dma_of_match[] = {
144 { .compatible = "brcm,bcm2835-dma", },
145 {},
146 @@ -971,6 +1071,13 @@ static int bcm2835_dma_probe(struct plat
147 /* Channel 0 is used by the legacy API */
148 chans_available &= ~BCM2835_DMA_BULK_MASK;
149
150 + /* We can't use channels 11-13 yet */
151 + chans_available &= ~(BIT(11) | BIT(12) | BIT(13));
152 +
153 + /* Grab channel 14 for the 40-bit DMA memcpy */
154 + chans_available &= ~BIT(14);
155 + memcpy_chan = BCM2835_DMA_CHANIO(base, 14);
156 +
157 /* get irqs for each channel that we support */
158 for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) {
159 /* skip masked out channels */
160 --- a/drivers/pci/controller/Makefile
161 +++ b/drivers/pci/controller/Makefile
162 @@ -30,6 +30,10 @@ obj-$(CONFIG_PCIE_MEDIATEK) += pcie-medi
163 obj-$(CONFIG_PCIE_MOBIVEIL) += pcie-mobiveil.o
164 obj-$(CONFIG_PCIE_TANGO_SMP8759) += pcie-tango.o
165 obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb.o
166 +ifdef CONFIG_ARM
167 +obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce.o
168 +endif
169 +
170 obj-$(CONFIG_VMD) += vmd.o
171 # pcie-hisi.o quirks are needed even without CONFIG_PCIE_DW
172 obj-y += dwc/
173 --- /dev/null
174 +++ b/drivers/pci/controller/pcie-brcmstb-bounce.c
175 @@ -0,0 +1,558 @@
176 +/*
177 + * This code started out as a version of arch/arm/common/dmabounce.c,
178 + * modified to cope with highmem pages. Now it has been changed heavily -
179 + * it now preallocates a large block (currently 4MB) and carves it up
180 + * sequentially in ring fashion, and DMA is used to copy the data - to the
181 + * point where very little of the original remains.
182 + *
183 + * Copyright (C) 2019 Raspberry Pi (Trading) Ltd.
184 + *
185 + * Original version by Brad Parker (brad@heeltoe.com)
186 + * Re-written by Christopher Hoover <ch@murgatroid.com>
187 + * Made generic by Deepak Saxena <dsaxena@plexity.net>
188 + *
189 + * Copyright (C) 2002 Hewlett Packard Company.
190 + * Copyright (C) 2004 MontaVista Software, Inc.
191 + *
192 + * This program is free software; you can redistribute it and/or
193 + * modify it under the terms of the GNU General Public License
194 + * version 2 as published by the Free Software Foundation.
195 + */
196 +
197 +#include <linux/module.h>
198 +#include <linux/init.h>
199 +#include <linux/slab.h>
200 +#include <linux/page-flags.h>
201 +#include <linux/device.h>
202 +#include <linux/dma-mapping.h>
203 +#include <linux/dmapool.h>
204 +#include <linux/list.h>
205 +#include <linux/scatterlist.h>
206 +#include <linux/bitmap.h>
207 +
208 +#include <asm/cacheflush.h>
209 +#include <asm/dma-iommu.h>
210 +
211 +#define STATS
212 +
213 +#ifdef STATS
214 +#define DO_STATS(X) do { X ; } while (0)
215 +#else
216 +#define DO_STATS(X) do { } while (0)
217 +#endif
218 +
219 +/* ************************************************** */
220 +
221 +struct safe_buffer {
222 + struct list_head node;
223 +
224 + /* original request */
225 + size_t size;
226 + int direction;
227 +
228 + struct dmabounce_pool *pool;
229 + void *safe;
230 + dma_addr_t unsafe_dma_addr;
231 + dma_addr_t safe_dma_addr;
232 +};
233 +
234 +struct dmabounce_pool {
235 + unsigned long pages;
236 + void *virt_addr;
237 + dma_addr_t dma_addr;
238 + unsigned long *alloc_map;
239 + unsigned long alloc_pos;
240 + spinlock_t lock;
241 + struct device *dev;
242 + unsigned long num_pages;
243 +#ifdef STATS
244 + size_t max_size;
245 + unsigned long num_bufs;
246 + unsigned long max_bufs;
247 + unsigned long max_pages;
248 +#endif
249 +};
250 +
251 +struct dmabounce_device_info {
252 + struct device *dev;
253 + dma_addr_t threshold;
254 + struct list_head safe_buffers;
255 + struct dmabounce_pool pool;
256 + rwlock_t lock;
257 +#ifdef STATS
258 + unsigned long map_count;
259 + unsigned long unmap_count;
260 + unsigned long sync_dev_count;
261 + unsigned long sync_cpu_count;
262 + unsigned long fail_count;
263 + int attr_res;
264 +#endif
265 +};
266 +
267 +static struct dmabounce_device_info *g_dmabounce_device_info;
268 +
269 +extern int bcm2838_dma40_memcpy_init(struct device *dev);
270 +extern void bcm2838_dma40_memcpy(dma_addr_t dst, dma_addr_t src, size_t size);
271 +
272 +#ifdef STATS
273 +static ssize_t
274 +bounce_show(struct device *dev, struct device_attribute *attr, char *buf)
275 +{
276 + struct dmabounce_device_info *device_info = g_dmabounce_device_info;
277 + return sprintf(buf, "m:%lu/%lu s:%lu/%lu f:%lu s:%zu b:%lu/%lu a:%lu/%lu\n",
278 + device_info->map_count,
279 + device_info->unmap_count,
280 + device_info->sync_dev_count,
281 + device_info->sync_cpu_count,
282 + device_info->fail_count,
283 + device_info->pool.max_size,
284 + device_info->pool.num_bufs,
285 + device_info->pool.max_bufs,
286 + device_info->pool.num_pages * PAGE_SIZE,
287 + device_info->pool.max_pages * PAGE_SIZE);
288 +}
289 +
290 +static DEVICE_ATTR(dmabounce_stats, 0444, bounce_show, NULL);
291 +#endif
292 +
293 +static int bounce_create(struct dmabounce_pool *pool, struct device *dev,
294 + unsigned long buffer_size)
295 +{
296 + int ret = -ENOMEM;
297 + pool->pages = (buffer_size + PAGE_SIZE - 1)/PAGE_SIZE;
298 + pool->alloc_map = bitmap_zalloc(pool->pages, GFP_KERNEL);
299 + if (!pool->alloc_map)
300 + goto err_bitmap;
301 + pool->virt_addr = dma_alloc_coherent(dev, pool->pages * PAGE_SIZE,
302 + &pool->dma_addr, GFP_KERNEL);
303 + if (!pool->virt_addr)
304 + goto err_dmabuf;
305 +
306 + pool->alloc_pos = 0;
307 + spin_lock_init(&pool->lock);
308 + pool->dev = dev;
309 + pool->num_pages = 0;
310 +
311 + DO_STATS(pool->max_size = 0);
312 + DO_STATS(pool->num_bufs = 0);
313 + DO_STATS(pool->max_bufs = 0);
314 + DO_STATS(pool->max_pages = 0);
315 +
316 + return 0;
317 +
318 +err_dmabuf:
319 + bitmap_free(pool->alloc_map);
320 +err_bitmap:
321 + return ret;
322 +}
323 +
324 +static void bounce_destroy(struct dmabounce_pool *pool)
325 +{
326 + dma_free_coherent(pool->dev, pool->pages * PAGE_SIZE, pool->virt_addr,
327 + pool->dma_addr);
328 +
329 + bitmap_free(pool->alloc_map);
330 +}
331 +
332 +static void *bounce_alloc(struct dmabounce_pool *pool, size_t size,
333 + dma_addr_t *dmaaddrp)
334 +{
335 + unsigned long pages;
336 + unsigned long flags;
337 + unsigned long pos;
338 +
339 + pages = (size + PAGE_SIZE - 1)/PAGE_SIZE;
340 +
341 + DO_STATS(pool->max_size = max(size, pool->max_size));
342 +
343 + spin_lock_irqsave(&pool->lock, flags);
344 + pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages,
345 + pool->alloc_pos, pages, 0);
346 + /* If not found, try from the start */
347 + if (pos >= pool->pages && pool->alloc_pos)
348 + pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages,
349 + 0, pages, 0);
350 +
351 + if (pos >= pool->pages) {
352 + spin_unlock_irqrestore(&pool->lock, flags);
353 + return NULL;
354 + }
355 +
356 + bitmap_set(pool->alloc_map, pos, pages);
357 + pool->alloc_pos = (pos + pages) % pool->pages;
358 + pool->num_pages += pages;
359 +
360 + DO_STATS(pool->num_bufs++);
361 + DO_STATS(pool->max_bufs = max(pool->num_bufs, pool->max_bufs));
362 + DO_STATS(pool->max_pages = max(pool->num_pages, pool->max_pages));
363 +
364 + spin_unlock_irqrestore(&pool->lock, flags);
365 +
366 + *dmaaddrp = pool->dma_addr + pos * PAGE_SIZE;
367 +
368 + return pool->virt_addr + pos * PAGE_SIZE;
369 +}
370 +
371 +static void
372 +bounce_free(struct dmabounce_pool *pool, void *buf, size_t size)
373 +{
374 + unsigned long pages;
375 + unsigned long flags;
376 + unsigned long pos;
377 +
378 + pages = (size + PAGE_SIZE - 1)/PAGE_SIZE;
379 + pos = (buf - pool->virt_addr)/PAGE_SIZE;
380 +
381 + BUG_ON((buf - pool->virt_addr) & (PAGE_SIZE - 1));
382 +
383 + spin_lock_irqsave(&pool->lock, flags);
384 + bitmap_clear(pool->alloc_map, pos, pages);
385 + pool->num_pages -= pages;
386 + if (pool->num_pages == 0)
387 + pool->alloc_pos = 0;
388 + DO_STATS(pool->num_bufs--);
389 + spin_unlock_irqrestore(&pool->lock, flags);
390 +}
391 +
392 +/* allocate a 'safe' buffer and keep track of it */
393 +static struct safe_buffer *
394 +alloc_safe_buffer(struct dmabounce_device_info *device_info,
395 + dma_addr_t dma_addr, size_t size, enum dma_data_direction dir)
396 +{
397 + struct safe_buffer *buf;
398 + struct dmabounce_pool *pool = &device_info->pool;
399 + struct device *dev = device_info->dev;
400 + unsigned long flags;
401 +
402 + /*
403 + * Although one might expect this to be called in thread context,
404 + * using GFP_KERNEL here leads to hard-to-debug lockups. in_atomic()
405 + * was previously used to select the appropriate allocation mode,
406 + * but this is unsafe.
407 + */
408 + buf = kmalloc(sizeof(struct safe_buffer), GFP_ATOMIC);
409 + if (!buf) {
410 + dev_warn(dev, "%s: kmalloc failed\n", __func__);
411 + return NULL;
412 + }
413 +
414 + buf->unsafe_dma_addr = dma_addr;
415 + buf->size = size;
416 + buf->direction = dir;
417 + buf->pool = pool;
418 +
419 + buf->safe = bounce_alloc(pool, size, &buf->safe_dma_addr);
420 +
421 + if (!buf->safe) {
422 + dev_warn(dev,
423 + "%s: could not alloc dma memory (size=%d)\n",
424 + __func__, size);
425 + kfree(buf);
426 + return NULL;
427 + }
428 +
429 + write_lock_irqsave(&device_info->lock, flags);
430 + list_add(&buf->node, &device_info->safe_buffers);
431 + write_unlock_irqrestore(&device_info->lock, flags);
432 +
433 + return buf;
434 +}
435 +
436 +/* determine if a buffer is from our "safe" pool */
437 +static struct safe_buffer *
438 +find_safe_buffer(struct dmabounce_device_info *device_info,
439 + dma_addr_t safe_dma_addr)
440 +{
441 + struct safe_buffer *b, *rb = NULL;
442 + unsigned long flags;
443 +
444 + read_lock_irqsave(&device_info->lock, flags);
445 +
446 + list_for_each_entry(b, &device_info->safe_buffers, node)
447 + if (b->safe_dma_addr <= safe_dma_addr &&
448 + b->safe_dma_addr + b->size > safe_dma_addr) {
449 + rb = b;
450 + break;
451 + }
452 +
453 + read_unlock_irqrestore(&device_info->lock, flags);
454 + return rb;
455 +}
456 +
457 +static void
458 +free_safe_buffer(struct dmabounce_device_info *device_info,
459 + struct safe_buffer *buf)
460 +{
461 + unsigned long flags;
462 +
463 + write_lock_irqsave(&device_info->lock, flags);
464 + list_del(&buf->node);
465 + write_unlock_irqrestore(&device_info->lock, flags);
466 +
467 + bounce_free(buf->pool, buf->safe, buf->size);
468 +
469 + kfree(buf);
470 +}
471 +
472 +/* ************************************************** */
473 +
474 +static struct safe_buffer *
475 +find_safe_buffer_dev(struct device *dev, dma_addr_t dma_addr, const char *where)
476 +{
477 + if (!dev || !g_dmabounce_device_info)
478 + return NULL;
479 + if (dma_mapping_error(dev, dma_addr)) {
480 + dev_err(dev, "Trying to %s invalid mapping\n", where);
481 + return NULL;
482 + }
483 + return find_safe_buffer(g_dmabounce_device_info, dma_addr);
484 +}
485 +
486 +static dma_addr_t
487 +map_single(struct device *dev, struct safe_buffer *buf, size_t size,
488 + enum dma_data_direction dir, unsigned long attrs)
489 +{
490 + BUG_ON(buf->size != size);
491 + BUG_ON(buf->direction != dir);
492 +
493 + dev_dbg(dev, "map: %llx->%llx\n", (u64)buf->unsafe_dma_addr,
494 + (u64)buf->safe_dma_addr);
495 +
496 + if ((dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) &&
497 + !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
498 + bcm2838_dma40_memcpy(buf->safe_dma_addr, buf->unsafe_dma_addr,
499 + size);
500 +
501 + return buf->safe_dma_addr;
502 +}
503 +
504 +static dma_addr_t
505 +unmap_single(struct device *dev, struct safe_buffer *buf, size_t size,
506 + enum dma_data_direction dir, unsigned long attrs)
507 +{
508 + BUG_ON(buf->size != size);
509 + BUG_ON(buf->direction != dir);
510 +
511 + if ((dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) &&
512 + !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
513 + dev_dbg(dev, "unmap: %llx->%llx\n", (u64)buf->safe_dma_addr,
514 + (u64)buf->unsafe_dma_addr);
515 +
516 + bcm2838_dma40_memcpy(buf->unsafe_dma_addr, buf->safe_dma_addr,
517 + size);
518 + }
519 + return buf->unsafe_dma_addr;
520 +}
521 +
522 +/* ************************************************** */
523 +
524 +/*
525 + * see if a buffer address is in an 'unsafe' range. if it is
526 + * allocate a 'safe' buffer and copy the unsafe buffer into it.
527 + * substitute the safe buffer for the unsafe one.
528 + * (basically move the buffer from an unsafe area to a safe one)
529 + */
530 +static dma_addr_t
531 +dmabounce_map_page(struct device *dev, struct page *page, unsigned long offset,
532 + size_t size, enum dma_data_direction dir,
533 + unsigned long attrs)
534 +{
535 + struct dmabounce_device_info *device_info = g_dmabounce_device_info;
536 + dma_addr_t dma_addr;
537 +
538 + dma_addr = pfn_to_dma(dev, page_to_pfn(page)) + offset;
539 +
540 + arm_dma_ops.sync_single_for_device(dev, dma_addr, size, dir);
541 +
542 + if (device_info && (dma_addr + size) > device_info->threshold) {
543 + struct safe_buffer *buf;
544 +
545 + buf = alloc_safe_buffer(device_info, dma_addr, size, dir);
546 + if (!buf) {
547 + DO_STATS(device_info->fail_count++);
548 + return DMA_MAPPING_ERROR;
549 + }
550 +
551 + DO_STATS(device_info->map_count++);
552 +
553 + dma_addr = map_single(dev, buf, size, dir, attrs);
554 + }
555 +
556 + return dma_addr;
557 +}
558 +
559 +/*
560 + * see if a mapped address was really a "safe" buffer and if so, copy
561 + * the data from the safe buffer back to the unsafe buffer and free up
562 + * the safe buffer. (basically return things back to the way they
563 + * should be)
564 + */
565 +static void
566 +dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
567 + enum dma_data_direction dir, unsigned long attrs)
568 +{
569 + struct safe_buffer *buf;
570 +
571 + buf = find_safe_buffer_dev(dev, dma_addr, __func__);
572 + if (buf) {
573 + DO_STATS(g_dmabounce_device_info->unmap_count++);
574 + dma_addr = unmap_single(dev, buf, size, dir, attrs);
575 + free_safe_buffer(g_dmabounce_device_info, buf);
576 + }
577 +
578 + arm_dma_ops.sync_single_for_cpu(dev, dma_addr, size, dir);
579 +}
580 +
581 +/*
582 + * A version of dmabounce_map_page that assumes the mapping has already
583 + * been created - intended for streaming operation.
584 + */
585 +static void
586 +dmabounce_sync_for_device(struct device *dev, dma_addr_t dma_addr, size_t size,
587 + enum dma_data_direction dir)
588 +{
589 + struct safe_buffer *buf;
590 +
591 + arm_dma_ops.sync_single_for_device(dev, dma_addr, size, dir);
592 +
593 + buf = find_safe_buffer_dev(dev, dma_addr, __func__);
594 + if (buf) {
595 + DO_STATS(g_dmabounce_device_info->sync_dev_count++);
596 + map_single(dev, buf, size, dir, 0);
597 + }
598 +}
599 +
600 +/*
601 + * A version of dmabounce_unmap_page that doesn't destroy the mapping -
602 + * intended for streaming operation.
603 + */
604 +static void
605 +dmabounce_sync_for_cpu(struct device *dev, dma_addr_t dma_addr,
606 + size_t size, enum dma_data_direction dir)
607 +{
608 + struct safe_buffer *buf;
609 +
610 + buf = find_safe_buffer_dev(dev, dma_addr, __func__);
611 + if (buf) {
612 + DO_STATS(g_dmabounce_device_info->sync_cpu_count++);
613 + dma_addr = unmap_single(dev, buf, size, dir, 0);
614 + }
615 +
616 + arm_dma_ops.sync_single_for_cpu(dev, dma_addr, size, dir);
617 +}
618 +
619 +static int dmabounce_dma_supported(struct device *dev, u64 dma_mask)
620 +{
621 + if (g_dmabounce_device_info)
622 + return 0;
623 +
624 + return arm_dma_ops.dma_supported(dev, dma_mask);
625 +}
626 +
627 +static const struct dma_map_ops dmabounce_ops = {
628 + .alloc = arm_dma_alloc,
629 + .free = arm_dma_free,
630 + .mmap = arm_dma_mmap,
631 + .get_sgtable = arm_dma_get_sgtable,
632 + .map_page = dmabounce_map_page,
633 + .unmap_page = dmabounce_unmap_page,
634 + .sync_single_for_cpu = dmabounce_sync_for_cpu,
635 + .sync_single_for_device = dmabounce_sync_for_device,
636 + .map_sg = arm_dma_map_sg,
637 + .unmap_sg = arm_dma_unmap_sg,
638 + .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu,
639 + .sync_sg_for_device = arm_dma_sync_sg_for_device,
640 + .dma_supported = dmabounce_dma_supported,
641 +};
642 +
643 +int brcm_pcie_bounce_register_dev(struct device *dev,
644 + unsigned long buffer_size,
645 + dma_addr_t threshold)
646 +{
647 + struct dmabounce_device_info *device_info;
648 + int ret;
649 +
650 + /* Only support a single client */
651 + if (g_dmabounce_device_info)
652 + return -EBUSY;
653 +
654 + ret = bcm2838_dma40_memcpy_init(dev);
655 + if (ret)
656 + return ret;
657 +
658 + device_info = kmalloc(sizeof(struct dmabounce_device_info), GFP_ATOMIC);
659 + if (!device_info) {
660 + dev_err(dev,
661 + "Could not allocated dmabounce_device_info\n");
662 + return -ENOMEM;
663 + }
664 +
665 + ret = bounce_create(&device_info->pool, dev, buffer_size);
666 + if (ret) {
667 + dev_err(dev,
668 + "dmabounce: could not allocate %ld byte DMA pool\n",
669 + buffer_size);
670 + goto err_bounce;
671 + }
672 +
673 + device_info->dev = dev;
674 + device_info->threshold = threshold;
675 + INIT_LIST_HEAD(&device_info->safe_buffers);
676 + rwlock_init(&device_info->lock);
677 +
678 + DO_STATS(device_info->map_count = 0);
679 + DO_STATS(device_info->unmap_count = 0);
680 + DO_STATS(device_info->sync_dev_count = 0);
681 + DO_STATS(device_info->sync_cpu_count = 0);
682 + DO_STATS(device_info->fail_count = 0);
683 + DO_STATS(device_info->attr_res =
684 + device_create_file(dev, &dev_attr_dmabounce_stats));
685 +
686 + g_dmabounce_device_info = device_info;
687 + set_dma_ops(dev, &dmabounce_ops);
688 +
689 + dev_info(dev, "dmabounce: registered device - %ld kB, threshold %pad\n",
690 + buffer_size / 1024, &threshold);
691 +
692 + return 0;
693 +
694 + err_bounce:
695 + kfree(device_info);
696 + return ret;
697 +}
698 +EXPORT_SYMBOL(brcm_pcie_bounce_register_dev);
699 +
700 +void brcm_pcie_bounce_unregister_dev(struct device *dev)
701 +{
702 + struct dmabounce_device_info *device_info = g_dmabounce_device_info;
703 +
704 + g_dmabounce_device_info = NULL;
705 + set_dma_ops(dev, NULL);
706 +
707 + if (!device_info) {
708 + dev_warn(dev,
709 + "Never registered with dmabounce but attempting"
710 + "to unregister!\n");
711 + return;
712 + }
713 +
714 + if (!list_empty(&device_info->safe_buffers)) {
715 + dev_err(dev,
716 + "Removing from dmabounce with pending buffers!\n");
717 + BUG();
718 + }
719 +
720 + bounce_destroy(&device_info->pool);
721 +
722 + DO_STATS(if (device_info->attr_res == 0)
723 + device_remove_file(dev, &dev_attr_dmabounce_stats));
724 +
725 + kfree(device_info);
726 +
727 + dev_info(dev, "dmabounce: device unregistered\n");
728 +}
729 +EXPORT_SYMBOL(brcm_pcie_bounce_unregister_dev);
730 +
731 +MODULE_AUTHOR("Phil Elwell <phil@raspberrypi.org>");
732 +MODULE_DESCRIPTION("Dedicate DMA bounce support for pcie-brcmstb");
733 +MODULE_LICENSE("GPL");
734 --- /dev/null
735 +++ b/drivers/pci/controller/pcie-brcmstb-bounce.h
736 @@ -0,0 +1,32 @@
737 +/* SPDX-License-Identifier: GPL-2.0 */
738 +/*
739 + * Copyright (C) 2019 Raspberry Pi (Trading) Ltd.
740 + */
741 +
742 +#ifndef _PCIE_BRCMSTB_BOUNCE_H
743 +#define _PCIE_BRCMSTB_BOUNCE_H
744 +
745 +#ifdef CONFIG_ARM
746 +
747 +int brcm_pcie_bounce_register_dev(struct device *dev, unsigned long buffer_size,
748 + dma_addr_t threshold);
749 +
750 +int brcm_pcie_bounce_unregister_dev(struct device *dev);
751 +
752 +#else
753 +
754 +static inline int brcm_pcie_bounce_register_dev(struct device *dev,
755 + unsigned long buffer_size,
756 + dma_addr_t threshold)
757 +{
758 + return 0;
759 +}
760 +
761 +static inline int brcm_pcie_bounce_unregister_dev(struct device *dev)
762 +{
763 + return 0;
764 +}
765 +
766 +#endif
767 +
768 +#endif /* _PCIE_BRCMSTB_BOUNCE_H */
769 --- a/drivers/pci/controller/pcie-brcmstb.c
770 +++ b/drivers/pci/controller/pcie-brcmstb.c
771 @@ -29,6 +29,7 @@
772 #include <linux/string.h>
773 #include <linux/types.h>
774 #include "../pci.h"
775 +#include "pcie-brcmstb-bounce.h"
776
777 /* BRCM_PCIE_CAP_REGS - Offset for the mandatory capability config regs */
778 #define BRCM_PCIE_CAP_REGS 0x00ac
779 @@ -53,6 +54,7 @@
780 #define PCIE_MISC_MSI_BAR_CONFIG_LO 0x4044
781 #define PCIE_MISC_MSI_BAR_CONFIG_HI 0x4048
782 #define PCIE_MISC_MSI_DATA_CONFIG 0x404c
783 +#define PCIE_MISC_EOI_CTRL 0x4060
784 #define PCIE_MISC_PCIE_CTRL 0x4064
785 #define PCIE_MISC_PCIE_STATUS 0x4068
786 #define PCIE_MISC_REVISION 0x406c
787 @@ -260,12 +262,14 @@ struct brcm_pcie {
788 unsigned int rev;
789 const int *reg_offsets;
790 const int *reg_field_info;
791 + u32 max_burst_size;
792 enum pcie_type type;
793 };
794
795 struct pcie_cfg_data {
796 const int *reg_field_info;
797 const int *offsets;
798 + const u32 max_burst_size;
799 const enum pcie_type type;
800 };
801
802 @@ -288,24 +292,27 @@ static const int pcie_offset_bcm7425[] =
803 static const struct pcie_cfg_data bcm7425_cfg = {
804 .reg_field_info = pcie_reg_field_info,
805 .offsets = pcie_offset_bcm7425,
806 + .max_burst_size = BURST_SIZE_256,
807 .type = BCM7425,
808 };
809
810 static const int pcie_offsets[] = {
811 [RGR1_SW_INIT_1] = 0x9210,
812 [EXT_CFG_INDEX] = 0x9000,
813 - [EXT_CFG_DATA] = 0x9004,
814 + [EXT_CFG_DATA] = 0x8000,
815 };
816
817 static const struct pcie_cfg_data bcm7435_cfg = {
818 .reg_field_info = pcie_reg_field_info,
819 .offsets = pcie_offsets,
820 + .max_burst_size = BURST_SIZE_256,
821 .type = BCM7435,
822 };
823
824 static const struct pcie_cfg_data generic_cfg = {
825 .reg_field_info = pcie_reg_field_info,
826 .offsets = pcie_offsets,
827 + .max_burst_size = BURST_SIZE_128, // before BURST_SIZE_512
828 .type = GENERIC,
829 };
830
831 @@ -318,6 +325,7 @@ static const int pcie_offset_bcm7278[] =
832 static const struct pcie_cfg_data bcm7278_cfg = {
833 .reg_field_info = pcie_reg_field_info_bcm7278,
834 .offsets = pcie_offset_bcm7278,
835 + .max_burst_size = BURST_SIZE_512,
836 .type = BCM7278,
837 };
838
839 @@ -360,7 +368,6 @@ static struct pci_ops brcm_pcie_ops = {
840 (reg##_##field##_MASK & (field_val << reg##_##field##_SHIFT)))
841
842 static const struct dma_map_ops *arch_dma_ops;
843 -static const struct dma_map_ops *brcm_dma_ops_ptr;
844 static struct of_pci_range *dma_ranges;
845 static int num_dma_ranges;
846
847 @@ -369,6 +376,16 @@ static int num_memc;
848 static int num_pcie;
849 static DEFINE_MUTEX(brcm_pcie_lock);
850
851 +static unsigned int bounce_buffer = 32*1024*1024;
852 +module_param(bounce_buffer, uint, 0644);
853 +MODULE_PARM_DESC(bounce_buffer, "Size of bounce buffer");
854 +
855 +static unsigned int bounce_threshold = 0xc0000000;
856 +module_param(bounce_threshold, uint, 0644);
857 +MODULE_PARM_DESC(bounce_threshold, "Bounce threshold");
858 +
859 +static struct brcm_pcie *g_pcie;
860 +
861 static dma_addr_t brcm_to_pci(dma_addr_t addr)
862 {
863 struct of_pci_range *p;
864 @@ -457,12 +474,10 @@ static int brcm_map_sg(struct device *de
865 struct scatterlist *sg;
866
867 for_each_sg(sgl, sg, nents, i) {
868 -#ifdef CONFIG_NEED_SG_DMA_LENGTH
869 - sg->dma_length = sg->length;
870 -#endif
871 + sg_dma_len(sg) = sg->length;
872 sg->dma_address =
873 - brcm_dma_ops_ptr->map_page(dev, sg_page(sg), sg->offset,
874 - sg->length, dir, attrs);
875 + brcm_map_page(dev, sg_page(sg), sg->offset,
876 + sg->length, dir, attrs);
877 if (dma_mapping_error(dev, sg->dma_address))
878 goto bad_mapping;
879 }
880 @@ -470,8 +485,8 @@ static int brcm_map_sg(struct device *de
881
882 bad_mapping:
883 for_each_sg(sgl, sg, i, j)
884 - brcm_dma_ops_ptr->unmap_page(dev, sg_dma_address(sg),
885 - sg_dma_len(sg), dir, attrs);
886 + brcm_unmap_page(dev, sg_dma_address(sg),
887 + sg_dma_len(sg), dir, attrs);
888 return 0;
889 }
890
891 @@ -484,8 +499,8 @@ static void brcm_unmap_sg(struct device
892 struct scatterlist *sg;
893
894 for_each_sg(sgl, sg, nents, i)
895 - brcm_dma_ops_ptr->unmap_page(dev, sg_dma_address(sg),
896 - sg_dma_len(sg), dir, attrs);
897 + brcm_unmap_page(dev, sg_dma_address(sg),
898 + sg_dma_len(sg), dir, attrs);
899 }
900
901 static void brcm_sync_single_for_cpu(struct device *dev,
902 @@ -531,8 +546,8 @@ void brcm_sync_sg_for_cpu(struct device
903 int i;
904
905 for_each_sg(sgl, sg, nents, i)
906 - brcm_dma_ops_ptr->sync_single_for_cpu(dev, sg_dma_address(sg),
907 - sg->length, dir);
908 + brcm_sync_single_for_cpu(dev, sg_dma_address(sg),
909 + sg->length, dir);
910 }
911
912 void brcm_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
913 @@ -542,14 +557,9 @@ void brcm_sync_sg_for_device(struct devi
914 int i;
915
916 for_each_sg(sgl, sg, nents, i)
917 - brcm_dma_ops_ptr->sync_single_for_device(dev,
918 - sg_dma_address(sg),
919 - sg->length, dir);
920 -}
921 -
922 -static int brcm_mapping_error(struct device *dev, dma_addr_t dma_addr)
923 -{
924 - return arch_dma_ops->mapping_error(dev, dma_addr);
925 + brcm_sync_single_for_device(dev,
926 + sg_dma_address(sg),
927 + sg->length, dir);
928 }
929
930 static int brcm_dma_supported(struct device *dev, u64 mask)
931 @@ -572,7 +582,7 @@ static int brcm_dma_supported(struct dev
932 }
933
934 #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
935 -u64 brcm_get_required_mask)(struct device *dev)
936 +u64 brcm_get_required_mask(struct device *dev)
937 {
938 return arch_dma_ops->get_required_mask(dev);
939 }
940 @@ -593,7 +603,6 @@ static const struct dma_map_ops brcm_dma
941 .sync_single_for_device = brcm_sync_single_for_device,
942 .sync_sg_for_cpu = brcm_sync_sg_for_cpu,
943 .sync_sg_for_device = brcm_sync_sg_for_device,
944 - .mapping_error = brcm_mapping_error,
945 .dma_supported = brcm_dma_supported,
946 #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
947 .get_required_mask = brcm_get_required_mask,
948 @@ -633,17 +642,47 @@ static void brcm_set_dma_ops(struct devi
949 set_dma_ops(dev, &brcm_dma_ops);
950 }
951
952 +static inline void brcm_pcie_perst_set(struct brcm_pcie *pcie,
953 + unsigned int val);
954 static int brcmstb_platform_notifier(struct notifier_block *nb,
955 unsigned long event, void *__dev)
956 {
957 + extern unsigned long max_pfn;
958 struct device *dev = __dev;
959 + const char *rc_name = "0000:00:00.0";
960
961 - brcm_dma_ops_ptr = &brcm_dma_ops;
962 - if (event != BUS_NOTIFY_ADD_DEVICE)
963 - return NOTIFY_DONE;
964 + switch (event) {
965 + case BUS_NOTIFY_ADD_DEVICE:
966 + if (max_pfn > (bounce_threshold/PAGE_SIZE) &&
967 + strcmp(dev->kobj.name, rc_name)) {
968 + int ret;
969 +
970 + ret = brcm_pcie_bounce_register_dev(dev, bounce_buffer,
971 + (dma_addr_t)bounce_threshold);
972 + if (ret) {
973 + dev_err(dev,
974 + "brcm_pcie_bounce_register_dev() failed: %d\n",
975 + ret);
976 + return ret;
977 + }
978 + }
979 + brcm_set_dma_ops(dev);
980 + return NOTIFY_OK;
981
982 - brcm_set_dma_ops(dev);
983 - return NOTIFY_OK;
984 + case BUS_NOTIFY_DEL_DEVICE:
985 + if (!strcmp(dev->kobj.name, rc_name) && g_pcie) {
986 + /* Force a bus reset */
987 + brcm_pcie_perst_set(g_pcie, 1);
988 + msleep(100);
989 + brcm_pcie_perst_set(g_pcie, 0);
990 + } else if (max_pfn > (bounce_threshold/PAGE_SIZE)) {
991 + brcm_pcie_bounce_unregister_dev(dev);
992 + }
993 + return NOTIFY_OK;
994 +
995 + default:
996 + return NOTIFY_DONE;
997 + }
998 }
999
1000 static struct notifier_block brcmstb_platform_nb = {
1001 @@ -914,6 +953,7 @@ static void brcm_pcie_msi_isr(struct irq
1002 }
1003 }
1004 chained_irq_exit(chip, desc);
1005 + bcm_writel(1, msi->base + PCIE_MISC_EOI_CTRL);
1006 }
1007
1008 static void brcm_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
1009 @@ -930,7 +970,8 @@ static void brcm_compose_msi_msg(struct
1010 static int brcm_msi_set_affinity(struct irq_data *irq_data,
1011 const struct cpumask *mask, bool force)
1012 {
1013 - return -EINVAL;
1014 + struct brcm_msi *msi = irq_data_get_irq_chip_data(irq_data);
1015 + return __irq_set_affinity(msi->irq, mask, force);
1016 }
1017
1018 static struct irq_chip brcm_msi_bottom_irq_chip = {
1019 @@ -1168,9 +1209,9 @@ static void __iomem *brcm_pcie_map_conf(
1020 return PCI_SLOT(devfn) ? NULL : base + where;
1021
1022 /* For devices, write to the config space index register */
1023 - idx = cfg_index(bus->number, devfn, where);
1024 + idx = cfg_index(bus->number, devfn, 0);
1025 bcm_writel(idx, pcie->base + IDX_ADDR(pcie));
1026 - return base + DATA_ADDR(pcie) + (where & 0x3);
1027 + return base + DATA_ADDR(pcie) + where;
1028 }
1029
1030 static inline void brcm_pcie_bridge_sw_init_set(struct brcm_pcie *pcie,
1031 @@ -1238,20 +1279,6 @@ static int brcm_pcie_parse_map_dma_range
1032 num_dma_ranges++;
1033 }
1034
1035 - for (i = 0, num_memc = 0; i < BRCM_MAX_SCB; i++) {
1036 - u64 size = brcmstb_memory_memc_size(i);
1037 -
1038 - if (size == (u64)-1) {
1039 - dev_err(pcie->dev, "cannot get memc%d size", i);
1040 - return -EINVAL;
1041 - } else if (size) {
1042 - scb_size[i] = roundup_pow_of_two_64(size);
1043 - num_memc++;
1044 - } else {
1045 - break;
1046 - }
1047 - }
1048 -
1049 return 0;
1050 }
1051
1052 @@ -1275,26 +1302,25 @@ static int brcm_pcie_add_controller(stru
1053 if (ret)
1054 goto done;
1055
1056 - /* Determine num_memc and their sizes */
1057 - for (i = 0, num_memc = 0; i < BRCM_MAX_SCB; i++) {
1058 - u64 size = brcmstb_memory_memc_size(i);
1059 -
1060 - if (size == (u64)-1) {
1061 - dev_err(dev, "cannot get memc%d size\n", i);
1062 - ret = -EINVAL;
1063 - goto done;
1064 - } else if (size) {
1065 - scb_size[i] = roundup_pow_of_two_64(size);
1066 - num_memc++;
1067 - } else {
1068 - break;
1069 + if (!num_dma_ranges) {
1070 + /* Determine num_memc and their sizes by other means */
1071 + for (i = 0, num_memc = 0; i < BRCM_MAX_SCB; i++) {
1072 + u64 size = brcmstb_memory_memc_size(i);
1073 +
1074 + if (size == (u64)-1) {
1075 + dev_err(dev, "cannot get memc%d size\n", i);
1076 + ret = -EINVAL;
1077 + goto done;
1078 + } else if (size) {
1079 + scb_size[i] = roundup_pow_of_two_64(size);
1080 + } else {
1081 + break;
1082 + }
1083 }
1084 - }
1085 - if (!ret && num_memc == 0) {
1086 - ret = -EINVAL;
1087 - goto done;
1088 + num_memc = i;
1089 }
1090
1091 + g_pcie = pcie;
1092 num_pcie++;
1093 done:
1094 mutex_unlock(&brcm_pcie_lock);
1095 @@ -1307,6 +1333,7 @@ static void brcm_pcie_remove_controller(
1096 if (--num_pcie > 0)
1097 goto out;
1098
1099 + g_pcie = NULL;
1100 if (brcm_unregister_notifier())
1101 dev_err(pcie->dev, "failed to unregister pci bus notifier\n");
1102 kfree(dma_ranges);
1103 @@ -1367,7 +1394,7 @@ static int brcm_pcie_setup(struct brcm_p
1104 void __iomem *base = pcie->base;
1105 unsigned int scb_size_val;
1106 u64 rc_bar2_offset, rc_bar2_size, total_mem_size = 0;
1107 - u32 tmp, burst;
1108 + u32 tmp;
1109 int i, j, ret, limit;
1110 u16 nlw, cls, lnksta;
1111 bool ssc_good = false;
1112 @@ -1400,20 +1427,15 @@ static int brcm_pcie_setup(struct brcm_p
1113 /* Set SCB_MAX_BURST_SIZE, CFG_READ_UR_MODE, SCB_ACCESS_EN */
1114 tmp = INSERT_FIELD(0, PCIE_MISC_MISC_CTRL, SCB_ACCESS_EN, 1);
1115 tmp = INSERT_FIELD(tmp, PCIE_MISC_MISC_CTRL, CFG_READ_UR_MODE, 1);
1116 - burst = (pcie->type == GENERIC || pcie->type == BCM7278)
1117 - ? BURST_SIZE_512 : BURST_SIZE_256;
1118 - tmp = INSERT_FIELD(tmp, PCIE_MISC_MISC_CTRL, MAX_BURST_SIZE, burst);
1119 + tmp = INSERT_FIELD(tmp, PCIE_MISC_MISC_CTRL, MAX_BURST_SIZE,
1120 + pcie->max_burst_size);
1121 bcm_writel(tmp, base + PCIE_MISC_MISC_CTRL);
1122
1123 /*
1124 * Set up inbound memory view for the EP (called RC_BAR2,
1125 * not to be confused with the BARs that are advertised by
1126 * the EP).
1127 - */
1128 - for (i = 0; i < num_memc; i++)
1129 - total_mem_size += scb_size[i];
1130 -
1131 - /*
1132 + *
1133 * The PCIe host controller by design must set the inbound
1134 * viewport to be a contiguous arrangement of all of the
1135 * system's memory. In addition, its size mut be a power of
1136 @@ -1429,55 +1451,49 @@ static int brcm_pcie_setup(struct brcm_p
1137 * the controller will know to send outbound memory downstream
1138 * and everything else upstream.
1139 */
1140 - rc_bar2_size = roundup_pow_of_two_64(total_mem_size);
1141
1142 - if (dma_ranges) {
1143 + if (num_dma_ranges) {
1144 /*
1145 - * The best-case scenario is to place the inbound
1146 - * region in the first 4GB of pcie-space, as some
1147 - * legacy devices can only address 32bits.
1148 - * We would also like to put the MSI under 4GB
1149 - * as well, since some devices require a 32bit
1150 - * MSI target address.
1151 + * Use the base address and size(s) provided in the dma-ranges
1152 + * property.
1153 */
1154 - if (total_mem_size <= 0xc0000000ULL &&
1155 - rc_bar2_size <= 0x100000000ULL) {
1156 - rc_bar2_offset = 0;
1157 - /* If the viewport is less then 4GB we can fit
1158 - * the MSI target address under 4GB. Otherwise
1159 - * put it right below 64GB.
1160 - */
1161 - msi_target_addr =
1162 - (rc_bar2_size == 0x100000000ULL)
1163 - ? BRCM_MSI_TARGET_ADDR_GT_4GB
1164 - : BRCM_MSI_TARGET_ADDR_LT_4GB;
1165 - } else {
1166 - /*
1167 - * The system memory is 4GB or larger so we
1168 - * cannot start the inbound region at location
1169 - * 0 (since we have to allow some space for
1170 - * outbound memory @ 3GB). So instead we
1171 - * start it at the 1x multiple of its size
1172 - */
1173 - rc_bar2_offset = rc_bar2_size;
1174 -
1175 - /* Since we are starting the viewport at 4GB or
1176 - * higher, put the MSI target address below 4GB
1177 - */
1178 - msi_target_addr = BRCM_MSI_TARGET_ADDR_LT_4GB;
1179 - }
1180 - } else {
1181 + for (i = 0; i < num_dma_ranges; i++)
1182 + scb_size[i] = roundup_pow_of_two_64(dma_ranges[i].size);
1183 +
1184 + num_memc = num_dma_ranges;
1185 + rc_bar2_offset = dma_ranges[0].pci_addr;
1186 + } else if (num_memc) {
1187 /*
1188 * Set simple configuration based on memory sizes
1189 - * only. We always start the viewport at address 0,
1190 - * and set the MSI target address accordingly.
1191 + * only. We always start the viewport at address 0.
1192 */
1193 rc_bar2_offset = 0;
1194 + } else {
1195 + return -EINVAL;
1196 + }
1197 +
1198 + for (i = 0; i < num_memc; i++)
1199 + total_mem_size += scb_size[i];
1200 +
1201 + rc_bar2_size = roundup_pow_of_two_64(total_mem_size);
1202
1203 - msi_target_addr = (rc_bar2_size >= 0x100000000ULL)
1204 - ? BRCM_MSI_TARGET_ADDR_GT_4GB
1205 - : BRCM_MSI_TARGET_ADDR_LT_4GB;
1206 + /* Verify the alignment is correct */
1207 + if (rc_bar2_offset & (rc_bar2_size - 1)) {
1208 + dev_err(dev, "inbound window is misaligned\n");
1209 + return -EINVAL;
1210 }
1211 +
1212 + /*
1213 + * Position the MSI target low if possible.
1214 + *
1215 + * TO DO: Consider outbound window when choosing MSI target and
1216 + * verifying configuration.
1217 + */
1218 + msi_target_addr = BRCM_MSI_TARGET_ADDR_LT_4GB;
1219 + if (rc_bar2_offset <= msi_target_addr &&
1220 + rc_bar2_offset + rc_bar2_size > msi_target_addr)
1221 + msi_target_addr = BRCM_MSI_TARGET_ADDR_GT_4GB;
1222 +
1223 pcie->msi_target_addr = msi_target_addr;
1224
1225 tmp = lower_32_bits(rc_bar2_offset);
1226 @@ -1713,6 +1729,7 @@ static int brcm_pcie_probe(struct platfo
1227 data = of_id->data;
1228 pcie->reg_offsets = data->offsets;
1229 pcie->reg_field_info = data->reg_field_info;
1230 + pcie->max_burst_size = data->max_burst_size;
1231 pcie->type = data->type;
1232 pcie->dn = dn;
1233 pcie->dev = &pdev->dev;
1234 @@ -1732,7 +1749,7 @@ static int brcm_pcie_probe(struct platfo
1235
1236 pcie->clk = of_clk_get_by_name(dn, "sw_pcie");
1237 if (IS_ERR(pcie->clk)) {
1238 - dev_err(&pdev->dev, "could not get clock\n");
1239 + dev_warn(&pdev->dev, "could not get clock\n");
1240 pcie->clk = NULL;
1241 }
1242 pcie->base = base;
1243 @@ -1755,7 +1772,8 @@ static int brcm_pcie_probe(struct platfo
1244
1245 ret = clk_prepare_enable(pcie->clk);
1246 if (ret) {
1247 - dev_err(&pdev->dev, "could not enable clock\n");
1248 + if (ret != -EPROBE_DEFER)
1249 + dev_err(&pdev->dev, "could not enable clock\n");
1250 return ret;
1251 }
1252
1253 @@ -1818,7 +1836,6 @@ static struct platform_driver brcm_pcie_
1254 .remove = brcm_pcie_remove,
1255 .driver = {
1256 .name = "brcm-pcie",
1257 - .owner = THIS_MODULE,
1258 .of_match_table = brcm_pcie_match,
1259 .pm = &brcm_pcie_pm_ops,
1260 },
1261 --- a/drivers/soc/bcm/brcmstb/Makefile
1262 +++ b/drivers/soc/bcm/brcmstb/Makefile
1263 @@ -1,3 +1,3 @@
1264 # SPDX-License-Identifier: GPL-2.0-only
1265 -obj-y += common.o biuctrl.o
1266 +obj-y += common.o biuctrl.o memory.o
1267 obj-$(CONFIG_BRCMSTB_PM) += pm/
1268 --- /dev/null
1269 +++ b/drivers/soc/bcm/brcmstb/memory.c
1270 @@ -0,0 +1,158 @@
1271 +// SPDX-License-Identifier: GPL-2.0
1272 +/* Copyright © 2015-2017 Broadcom */
1273 +
1274 +#include <linux/device.h>
1275 +#include <linux/io.h>
1276 +#include <linux/libfdt.h>
1277 +#include <linux/of_address.h>
1278 +#include <linux/of_fdt.h>
1279 +#include <linux/sizes.h>
1280 +#include <soc/brcmstb/memory_api.h>
1281 +
1282 +/* Macro to help extract property data */
1283 +#define DT_PROP_DATA_TO_U32(b, offs) (fdt32_to_cpu(*(u32 *)(b + offs)))
1284 +
1285 +/* Constants used when retrieving memc info */
1286 +#define NUM_BUS_RANGES 10
1287 +#define BUS_RANGE_ULIMIT_SHIFT 4
1288 +#define BUS_RANGE_LLIMIT_SHIFT 4
1289 +#define BUS_RANGE_PA_SHIFT 12
1290 +
1291 +enum {
1292 + BUSNUM_MCP0 = 0x4,
1293 + BUSNUM_MCP1 = 0x5,
1294 + BUSNUM_MCP2 = 0x6,
1295 +};
1296 +
1297 +/*
1298 + * If the DT nodes are handy, determine which MEMC holds the specified
1299 + * physical address.
1300 + */
1301 +#ifdef CONFIG_ARCH_BRCMSTB
1302 +int __brcmstb_memory_phys_addr_to_memc(phys_addr_t pa, void __iomem *base)
1303 +{
1304 + int memc = -1;
1305 + int i;
1306 +
1307 + for (i = 0; i < NUM_BUS_RANGES; i++, base += 8) {
1308 + const u64 ulimit_raw = readl(base);
1309 + const u64 llimit_raw = readl(base + 4);
1310 + const u64 ulimit =
1311 + ((ulimit_raw >> BUS_RANGE_ULIMIT_SHIFT)
1312 + << BUS_RANGE_PA_SHIFT) | 0xfff;
1313 + const u64 llimit = (llimit_raw >> BUS_RANGE_LLIMIT_SHIFT)
1314 + << BUS_RANGE_PA_SHIFT;
1315 + const u32 busnum = (u32)(ulimit_raw & 0xf);
1316 +
1317 + if (pa >= llimit && pa <= ulimit) {
1318 + if (busnum >= BUSNUM_MCP0 && busnum <= BUSNUM_MCP2) {
1319 + memc = busnum - BUSNUM_MCP0;
1320 + break;
1321 + }
1322 + }
1323 + }
1324 +
1325 + return memc;
1326 +}
1327 +
1328 +int brcmstb_memory_phys_addr_to_memc(phys_addr_t pa)
1329 +{
1330 + int memc = -1;
1331 + struct device_node *np;
1332 + void __iomem *cpubiuctrl;
1333 +
1334 + np = of_find_compatible_node(NULL, NULL, "brcm,brcmstb-cpu-biu-ctrl");
1335 + if (!np)
1336 + return memc;
1337 +
1338 + cpubiuctrl = of_iomap(np, 0);
1339 + if (!cpubiuctrl)
1340 + goto cleanup;
1341 +
1342 + memc = __brcmstb_memory_phys_addr_to_memc(pa, cpubiuctrl);
1343 + iounmap(cpubiuctrl);
1344 +
1345 +cleanup:
1346 + of_node_put(np);
1347 +
1348 + return memc;
1349 +}
1350 +
1351 +#elif defined(CONFIG_MIPS)
1352 +int brcmstb_memory_phys_addr_to_memc(phys_addr_t pa)
1353 +{
1354 + /* The logic here is fairly simple and hardcoded: if pa <= 0x5000_0000,
1355 + * then this is MEMC0, else MEMC1.
1356 + *
1357 + * For systems with 2GB on MEMC0, MEMC1 starts at 9000_0000, with 1GB
1358 + * on MEMC0, MEMC1 starts at 6000_0000.
1359 + */
1360 + if (pa >= 0x50000000ULL)
1361 + return 1;
1362 + else
1363 + return 0;
1364 +}
1365 +#endif
1366 +
1367 +u64 brcmstb_memory_memc_size(int memc)
1368 +{
1369 + const void *fdt = initial_boot_params;
1370 + const int mem_offset = fdt_path_offset(fdt, "/memory");
1371 + int addr_cells = 1, size_cells = 1;
1372 + const struct fdt_property *prop;
1373 + int proplen, cellslen;
1374 + u64 memc_size = 0;
1375 + int i;
1376 +
1377 + /* Get root size and address cells if specified */
1378 + prop = fdt_get_property(fdt, 0, "#size-cells", &proplen);
1379 + if (prop)
1380 + size_cells = DT_PROP_DATA_TO_U32(prop->data, 0);
1381 +
1382 + prop = fdt_get_property(fdt, 0, "#address-cells", &proplen);
1383 + if (prop)
1384 + addr_cells = DT_PROP_DATA_TO_U32(prop->data, 0);
1385 +
1386 + if (mem_offset < 0)
1387 + return -1;
1388 +
1389 + prop = fdt_get_property(fdt, mem_offset, "reg", &proplen);
1390 + cellslen = (int)sizeof(u32) * (addr_cells + size_cells);
1391 + if ((proplen % cellslen) != 0)
1392 + return -1;
1393 +
1394 + for (i = 0; i < proplen / cellslen; ++i) {
1395 + u64 addr = 0;
1396 + u64 size = 0;
1397 + int memc_idx;
1398 + int j;
1399 +
1400 + for (j = 0; j < addr_cells; ++j) {
1401 + int offset = (cellslen * i) + (sizeof(u32) * j);
1402 +
1403 + addr |= (u64)DT_PROP_DATA_TO_U32(prop->data, offset) <<
1404 + ((addr_cells - j - 1) * 32);
1405 + }
1406 + for (j = 0; j < size_cells; ++j) {
1407 + int offset = (cellslen * i) +
1408 + (sizeof(u32) * (j + addr_cells));
1409 +
1410 + size |= (u64)DT_PROP_DATA_TO_U32(prop->data, offset) <<
1411 + ((size_cells - j - 1) * 32);
1412 + }
1413 +
1414 + if ((phys_addr_t)addr != addr) {
1415 + pr_err("phys_addr_t is smaller than provided address 0x%llx!\n",
1416 + addr);
1417 + return -1;
1418 + }
1419 +
1420 + memc_idx = brcmstb_memory_phys_addr_to_memc((phys_addr_t)addr);
1421 + if (memc_idx == memc)
1422 + memc_size += size;
1423 + }
1424 +
1425 + return memc_size;
1426 +}
1427 +EXPORT_SYMBOL_GPL(brcmstb_memory_memc_size);
1428 +