f2a65ffba5962d81f852a1e6b625f0fa60b9e1ee
[openwrt/staging/stintel.git] / target / linux / layerscape / patches-4.9 / 818-vfio-support-layerscape.patch
1 From e6af99cc1d56322fc960d072af1a7e0e9285b90c Mon Sep 17 00:00:00 2001
2 From: Yangbo Lu <yangbo.lu@nxp.com>
3 Date: Thu, 5 Jul 2018 17:39:43 +0800
4 Subject: [PATCH 30/32] vfio: support layerscape
5
6 This is an integrated patch for layerscape vfio support.
7
8 Signed-off-by: Bharat Bhushan <Bharat.Bhushan@nxp.com>
9 Signed-off-by: Eric Auger <eric.auger@redhat.com>
10 Signed-off-by: Robin Murphy <robin.murphy@arm.com>
11 Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
12 Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
13 ---
14 drivers/vfio/Kconfig | 1 +
15 drivers/vfio/Makefile | 1 +
16 drivers/vfio/fsl-mc/Kconfig | 9 +
17 drivers/vfio/fsl-mc/Makefile | 2 +
18 drivers/vfio/fsl-mc/vfio_fsl_mc.c | 752 ++++++++++++++++++++++
19 drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c | 199 ++++++
20 drivers/vfio/fsl-mc/vfio_fsl_mc_private.h | 55 ++
21 drivers/vfio/vfio_iommu_type1.c | 39 +-
22 include/uapi/linux/vfio.h | 1 +
23 9 files changed, 1057 insertions(+), 2 deletions(-)
24 create mode 100644 drivers/vfio/fsl-mc/Kconfig
25 create mode 100644 drivers/vfio/fsl-mc/Makefile
26 create mode 100644 drivers/vfio/fsl-mc/vfio_fsl_mc.c
27 create mode 100644 drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c
28 create mode 100644 drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
29
30 --- a/drivers/vfio/Kconfig
31 +++ b/drivers/vfio/Kconfig
32 @@ -48,4 +48,5 @@ menuconfig VFIO_NOIOMMU
33
34 source "drivers/vfio/pci/Kconfig"
35 source "drivers/vfio/platform/Kconfig"
36 +source "drivers/vfio/fsl-mc/Kconfig"
37 source "virt/lib/Kconfig"
38 --- a/drivers/vfio/Makefile
39 +++ b/drivers/vfio/Makefile
40 @@ -7,3 +7,4 @@ obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vf
41 obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
42 obj-$(CONFIG_VFIO_PCI) += pci/
43 obj-$(CONFIG_VFIO_PLATFORM) += platform/
44 +obj-$(CONFIG_VFIO_FSL_MC) += fsl-mc/
45 --- /dev/null
46 +++ b/drivers/vfio/fsl-mc/Kconfig
47 @@ -0,0 +1,9 @@
48 +config VFIO_FSL_MC
49 + tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices"
50 + depends on VFIO && FSL_MC_BUS && EVENTFD
51 + help
52 + Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc
53 + (Management Complex) devices. This is required to passthrough
54 + fsl-mc bus devices using the VFIO framework.
55 +
56 + If you don't know what to do here, say N.
57 --- /dev/null
58 +++ b/drivers/vfio/fsl-mc/Makefile
59 @@ -0,0 +1,2 @@
60 +vfio-fsl_mc-y := vfio_fsl_mc.o
61 +obj-$(CONFIG_VFIO_FSL_MC) += vfio_fsl_mc.o vfio_fsl_mc_intr.o
62 --- /dev/null
63 +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
64 @@ -0,0 +1,752 @@
65 +/*
66 + * Freescale Management Complex (MC) device passthrough using VFIO
67 + *
68 + * Copyright (C) 2013-2016 Freescale Semiconductor, Inc.
69 + * Copyright 2016-2017 NXP
70 + * Author: Bharat Bhushan <bharat.bhushan@nxp.com>
71 + *
72 + * This file is licensed under the terms of the GNU General Public
73 + * License version 2. This program is licensed "as is" without any
74 + * warranty of any kind, whether express or implied.
75 + */
76 +
77 +#include <linux/device.h>
78 +#include <linux/iommu.h>
79 +#include <linux/module.h>
80 +#include <linux/mutex.h>
81 +#include <linux/slab.h>
82 +#include <linux/types.h>
83 +#include <linux/vfio.h>
84 +#include <linux/delay.h>
85 +
86 +#include <linux/fsl/mc.h>
87 +
88 +#include "vfio_fsl_mc_private.h"
89 +
90 +#define DRIVER_VERSION "0.10"
91 +#define DRIVER_AUTHOR "Bharat Bhushan <bharat.bhushan@nxp.com>"
92 +#define DRIVER_DESC "VFIO for FSL-MC devices - User Level meta-driver"
93 +
94 +static DEFINE_MUTEX(driver_lock);
95 +
96 +/* FSl-MC device regions (address and size) are aligned to 64K.
97 + * While MC firmware reports size less than 64K for some objects (it actually
98 + * reports size which does not include reserved space beyond valid bytes).
99 + * Align the size to PAGE_SIZE for userspace to mmap.
100 + */
101 +static size_t aligned_region_size(struct fsl_mc_device *mc_dev, int index)
102 +{
103 + size_t size;
104 +
105 + size = resource_size(&mc_dev->regions[index]);
106 + return PAGE_ALIGN(size);
107 +}
108 +
109 +static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev)
110 +{
111 + struct fsl_mc_device *mc_dev = vdev->mc_dev;
112 + int count = mc_dev->obj_desc.region_count;
113 + int i;
114 +
115 + vdev->regions = kcalloc(count, sizeof(struct vfio_fsl_mc_region),
116 + GFP_KERNEL);
117 + if (!vdev->regions)
118 + return -ENOMEM;
119 +
120 + for (i = 0; i < mc_dev->obj_desc.region_count; i++) {
121 + vdev->regions[i].addr = mc_dev->regions[i].start;
122 + vdev->regions[i].size = aligned_region_size(mc_dev, i);
123 + vdev->regions[i].type = VFIO_FSL_MC_REGION_TYPE_MMIO;
124 + if (mc_dev->regions[i].flags & IORESOURCE_CACHEABLE)
125 + vdev->regions[i].type |=
126 + VFIO_FSL_MC_REGION_TYPE_CACHEABLE;
127 + vdev->regions[i].flags = VFIO_REGION_INFO_FLAG_MMAP;
128 + vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ;
129 + if (!(mc_dev->regions[i].flags & IORESOURCE_READONLY))
130 + vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE;
131 + }
132 +
133 + vdev->num_regions = mc_dev->obj_desc.region_count;
134 + return 0;
135 +}
136 +
137 +static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev)
138 +{
139 + int i;
140 +
141 + for (i = 0; i < vdev->num_regions; i++)
142 + iounmap(vdev->regions[i].ioaddr);
143 +
144 + vdev->num_regions = 0;
145 + kfree(vdev->regions);
146 +}
147 +
148 +static int vfio_fsl_mc_open(void *device_data)
149 +{
150 + struct vfio_fsl_mc_device *vdev = device_data;
151 + int ret;
152 +
153 + if (!try_module_get(THIS_MODULE))
154 + return -ENODEV;
155 +
156 + mutex_lock(&driver_lock);
157 + if (!vdev->refcnt) {
158 + ret = vfio_fsl_mc_regions_init(vdev);
159 + if (ret)
160 + goto error_region_init;
161 +
162 + ret = vfio_fsl_mc_irqs_init(vdev);
163 + if (ret)
164 + goto error_irq_init;
165 + }
166 +
167 + vdev->refcnt++;
168 + mutex_unlock(&driver_lock);
169 + return 0;
170 +
171 +error_irq_init:
172 + vfio_fsl_mc_regions_cleanup(vdev);
173 +error_region_init:
174 + mutex_unlock(&driver_lock);
175 + if (ret)
176 + module_put(THIS_MODULE);
177 +
178 + return ret;
179 +}
180 +
181 +static void vfio_fsl_mc_release(void *device_data)
182 +{
183 + struct vfio_fsl_mc_device *vdev = device_data;
184 + struct fsl_mc_device *mc_dev = vdev->mc_dev;
185 +
186 + mutex_lock(&driver_lock);
187 +
188 + if (!(--vdev->refcnt)) {
189 + vfio_fsl_mc_regions_cleanup(vdev);
190 + vfio_fsl_mc_irqs_cleanup(vdev);
191 + }
192 +
193 + if (strcmp(mc_dev->obj_desc.type, "dprc") == 0)
194 + dprc_reset_container(mc_dev->mc_io, 0, mc_dev->mc_handle,
195 + mc_dev->obj_desc.id);
196 +
197 + mutex_unlock(&driver_lock);
198 +
199 + module_put(THIS_MODULE);
200 +}
201 +
202 +static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd,
203 + unsigned long arg)
204 +{
205 + struct vfio_fsl_mc_device *vdev = device_data;
206 + struct fsl_mc_device *mc_dev = vdev->mc_dev;
207 + unsigned long minsz;
208 +
209 + if (WARN_ON(!mc_dev))
210 + return -ENODEV;
211 +
212 + switch (cmd) {
213 + case VFIO_DEVICE_GET_INFO:
214 + {
215 + struct vfio_device_info info;
216 +
217 + minsz = offsetofend(struct vfio_device_info, num_irqs);
218 +
219 + if (copy_from_user(&info, (void __user *)arg, minsz))
220 + return -EFAULT;
221 +
222 + if (info.argsz < minsz)
223 + return -EINVAL;
224 +
225 + info.flags = VFIO_DEVICE_FLAGS_FSL_MC;
226 + info.num_regions = mc_dev->obj_desc.region_count;
227 + info.num_irqs = mc_dev->obj_desc.irq_count;
228 +
229 + return copy_to_user((void __user *)arg, &info, minsz);
230 + }
231 + case VFIO_DEVICE_GET_REGION_INFO:
232 + {
233 + struct vfio_region_info info;
234 +
235 + minsz = offsetofend(struct vfio_region_info, offset);
236 +
237 + if (copy_from_user(&info, (void __user *)arg, minsz))
238 + return -EFAULT;
239 +
240 + if (info.argsz < minsz)
241 + return -EINVAL;
242 +
243 + if (info.index >= vdev->num_regions)
244 + return -EINVAL;
245 +
246 + /* map offset to the physical address */
247 + info.offset = VFIO_FSL_MC_INDEX_TO_OFFSET(info.index);
248 + info.size = vdev->regions[info.index].size;
249 + info.flags = vdev->regions[info.index].flags;
250 +
251 + return copy_to_user((void __user *)arg, &info, minsz);
252 + }
253 + case VFIO_DEVICE_GET_IRQ_INFO:
254 + {
255 + struct vfio_irq_info info;
256 +
257 + minsz = offsetofend(struct vfio_irq_info, count);
258 + if (copy_from_user(&info, (void __user *)arg, minsz))
259 + return -EFAULT;
260 +
261 + if (info.argsz < minsz)
262 + return -EINVAL;
263 +
264 + if (info.index >= mc_dev->obj_desc.irq_count)
265 + return -EINVAL;
266 +
267 + if (vdev->mc_irqs != NULL) {
268 + info.flags = vdev->mc_irqs[info.index].flags;
269 + info.count = vdev->mc_irqs[info.index].count;
270 + } else {
271 + /*
272 + * If IRQs are not initialized then these can not
273 + * be configuted and used by user-space/
274 + */
275 + info.flags = 0;
276 + info.count = 0;
277 + }
278 +
279 + return copy_to_user((void __user *)arg, &info, minsz);
280 + }
281 + case VFIO_DEVICE_SET_IRQS:
282 + {
283 + struct vfio_irq_set hdr;
284 + u8 *data = NULL;
285 + int ret = 0;
286 +
287 + minsz = offsetofend(struct vfio_irq_set, count);
288 +
289 + if (copy_from_user(&hdr, (void __user *)arg, minsz))
290 + return -EFAULT;
291 +
292 + if (hdr.argsz < minsz)
293 + return -EINVAL;
294 +
295 + if (hdr.index >= mc_dev->obj_desc.irq_count)
296 + return -EINVAL;
297 +
298 + if (hdr.start != 0 || hdr.count > 1)
299 + return -EINVAL;
300 +
301 + if (hdr.count == 0 &&
302 + (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE) ||
303 + !(hdr.flags & VFIO_IRQ_SET_ACTION_TRIGGER)))
304 + return -EINVAL;
305 +
306 + if (hdr.flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
307 + VFIO_IRQ_SET_ACTION_TYPE_MASK))
308 + return -EINVAL;
309 +
310 + if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
311 + size_t size;
312 +
313 + if (hdr.flags & VFIO_IRQ_SET_DATA_BOOL)
314 + size = sizeof(uint8_t);
315 + else if (hdr.flags & VFIO_IRQ_SET_DATA_EVENTFD)
316 + size = sizeof(int32_t);
317 + else
318 + return -EINVAL;
319 +
320 + if (hdr.argsz - minsz < hdr.count * size)
321 + return -EINVAL;
322 +
323 + data = memdup_user((void __user *)(arg + minsz),
324 + hdr.count * size);
325 + if (IS_ERR(data))
326 + return PTR_ERR(data);
327 + }
328 +
329 + ret = vfio_fsl_mc_set_irqs_ioctl(vdev, hdr.flags,
330 + hdr.index, hdr.start,
331 + hdr.count, data);
332 + return ret;
333 + }
334 + case VFIO_DEVICE_RESET:
335 + {
336 + return -EINVAL;
337 + }
338 + default:
339 + return -EINVAL;
340 + }
341 +}
342 +
343 +static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf,
344 + size_t count, loff_t *ppos)
345 +{
346 + struct vfio_fsl_mc_device *vdev = device_data;
347 + unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
348 + loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
349 + struct vfio_fsl_mc_region *region;
350 + uint64_t data[8];
351 + int i;
352 +
353 + /* Read ioctl supported only for DPRC and DPMCP device */
354 + if (strcmp(vdev->mc_dev->obj_desc.type, "dprc") &&
355 + strcmp(vdev->mc_dev->obj_desc.type, "dpmcp"))
356 + return -EINVAL;
357 +
358 + if (index >= vdev->num_regions)
359 + return -EINVAL;
360 +
361 + region = &vdev->regions[index];
362 +
363 + if (!(region->flags & VFIO_REGION_INFO_FLAG_READ))
364 + return -EINVAL;
365 +
366 + if (!region->type & VFIO_FSL_MC_REGION_TYPE_MMIO)
367 + return -EINVAL;
368 +
369 + if (!region->ioaddr) {
370 + region->ioaddr = ioremap_nocache(region->addr, region->size);
371 + if (!region->ioaddr)
372 + return -ENOMEM;
373 + }
374 +
375 + if (count != 64 || off != 0)
376 + return -EINVAL;
377 +
378 + for (i = 7; i >= 0; i--)
379 + data[i] = readq(region->ioaddr + i * sizeof(uint64_t));
380 +
381 + if (copy_to_user(buf, data, 64))
382 + return -EFAULT;
383 +
384 + return count;
385 +}
386 +
387 +#define MC_CMD_COMPLETION_TIMEOUT_MS 5000
388 +#define MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS 500
389 +
390 +static int vfio_fsl_mc_dprc_wait_for_response(void __iomem *ioaddr)
391 +{
392 + enum mc_cmd_status status;
393 + unsigned long timeout_usecs = MC_CMD_COMPLETION_TIMEOUT_MS * 1000;
394 +
395 + for (;;) {
396 + u64 header;
397 + struct mc_cmd_header *resp_hdr;
398 +
399 + __iormb();
400 + header = readq(ioaddr);
401 + __iormb();
402 +
403 + resp_hdr = (struct mc_cmd_header *)&header;
404 + status = (enum mc_cmd_status)resp_hdr->status;
405 + if (status != MC_CMD_STATUS_READY)
406 + break;
407 +
408 + udelay(MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS);
409 + timeout_usecs -= MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS;
410 + if (timeout_usecs == 0)
411 + return -ETIMEDOUT;
412 + }
413 +
414 + return 0;
415 +}
416 +
417 +static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t *cmd_data)
418 +{
419 + int i;
420 +
421 + /* Write at command header in the end */
422 + for (i = 7; i >= 0; i--)
423 + writeq(cmd_data[i], ioaddr + i * sizeof(uint64_t));
424 +
425 + /* Wait for response before returning to user-space
426 + * This can be optimized in future to even prepare response
427 + * before returning to user-space and avoid read ioctl.
428 + */
429 + return vfio_fsl_mc_dprc_wait_for_response(ioaddr);
430 +}
431 +
432 +static int vfio_handle_dprc_commands(void __iomem *ioaddr, uint64_t *cmd_data)
433 +{
434 + uint64_t cmd_hdr = cmd_data[0];
435 + int cmd = (cmd_hdr >> 52) & 0xfff;
436 +
437 + switch (cmd) {
438 + case DPRC_CMDID_OPEN:
439 + default:
440 + return vfio_fsl_mc_send_command(ioaddr, cmd_data);
441 + }
442 +
443 + return 0;
444 +}
445 +
446 +static ssize_t vfio_fsl_mc_write(void *device_data, const char __user *buf,
447 + size_t count, loff_t *ppos)
448 +{
449 + struct vfio_fsl_mc_device *vdev = device_data;
450 + unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
451 + loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
452 + struct vfio_fsl_mc_region *region;
453 + uint64_t data[8];
454 + int ret;
455 +
456 + /* Write ioctl supported only for DPRC and DPMCP device */
457 + if (strcmp(vdev->mc_dev->obj_desc.type, "dprc") &&
458 + strcmp(vdev->mc_dev->obj_desc.type, "dpmcp"))
459 + return -EINVAL;
460 +
461 + if (index >= vdev->num_regions)
462 + return -EINVAL;
463 +
464 + region = &vdev->regions[index];
465 +
466 + if (!(region->flags & VFIO_REGION_INFO_FLAG_WRITE))
467 + return -EINVAL;
468 +
469 + if (!region->type & VFIO_FSL_MC_REGION_TYPE_MMIO)
470 + return -EINVAL;
471 +
472 + if (!region->ioaddr) {
473 + region->ioaddr = ioremap_nocache(region->addr, region->size);
474 + if (!region->ioaddr)
475 + return -ENOMEM;
476 + }
477 +
478 + if (count != 64 || off != 0)
479 + return -EINVAL;
480 +
481 + if (copy_from_user(&data, buf, 64))
482 + return -EFAULT;
483 +
484 + ret = vfio_handle_dprc_commands(region->ioaddr, data);
485 + if (ret)
486 + return ret;
487 +
488 + return count;
489 +}
490 +
491 +static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region,
492 + struct vm_area_struct *vma)
493 +{
494 + u64 size = vma->vm_end - vma->vm_start;
495 + u64 pgoff, base;
496 +
497 + pgoff = vma->vm_pgoff &
498 + ((1U << (VFIO_FSL_MC_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
499 + base = pgoff << PAGE_SHIFT;
500 +
501 + if (region.size < PAGE_SIZE || base + size > region.size)
502 + return -EINVAL;
503 + /*
504 + * Set the REGION_TYPE_CACHEABLE (QBman CENA regs) to be the
505 + * cache inhibited area of the portal to avoid coherency issues
506 + * if a user migrates to another core.
507 + */
508 + if (region.type & VFIO_FSL_MC_REGION_TYPE_CACHEABLE)
509 + vma->vm_page_prot = pgprot_cached_ns(vma->vm_page_prot);
510 + else
511 + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
512 +
513 + vma->vm_pgoff = (region.addr >> PAGE_SHIFT) + pgoff;
514 +
515 + return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
516 + size, vma->vm_page_prot);
517 +}
518 +
519 +/* Allows mmaping fsl_mc device regions in assigned DPRC */
520 +static int vfio_fsl_mc_mmap(void *device_data, struct vm_area_struct *vma)
521 +{
522 + struct vfio_fsl_mc_device *vdev = device_data;
523 + struct fsl_mc_device *mc_dev = vdev->mc_dev;
524 + unsigned long size, addr;
525 + int index;
526 +
527 + index = vma->vm_pgoff >> (VFIO_FSL_MC_OFFSET_SHIFT - PAGE_SHIFT);
528 +
529 + if (vma->vm_end < vma->vm_start)
530 + return -EINVAL;
531 + if (vma->vm_start & ~PAGE_MASK)
532 + return -EINVAL;
533 + if (vma->vm_end & ~PAGE_MASK)
534 + return -EINVAL;
535 + if (!(vma->vm_flags & VM_SHARED))
536 + return -EINVAL;
537 + if (index >= vdev->num_regions)
538 + return -EINVAL;
539 +
540 + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_MMAP))
541 + return -EINVAL;
542 +
543 + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_READ)
544 + && (vma->vm_flags & VM_READ))
545 + return -EINVAL;
546 +
547 + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_WRITE)
548 + && (vma->vm_flags & VM_WRITE))
549 + return -EINVAL;
550 +
551 + addr = vdev->regions[index].addr;
552 + size = vdev->regions[index].size;
553 +
554 + vma->vm_private_data = mc_dev;
555 +
556 + if (vdev->regions[index].type & VFIO_FSL_MC_REGION_TYPE_MMIO)
557 + return vfio_fsl_mc_mmap_mmio(vdev->regions[index], vma);
558 +
559 + return -EFAULT;
560 +}
561 +
562 +static const struct vfio_device_ops vfio_fsl_mc_ops = {
563 + .name = "vfio-fsl-mc",
564 + .open = vfio_fsl_mc_open,
565 + .release = vfio_fsl_mc_release,
566 + .ioctl = vfio_fsl_mc_ioctl,
567 + .read = vfio_fsl_mc_read,
568 + .write = vfio_fsl_mc_write,
569 + .mmap = vfio_fsl_mc_mmap,
570 +};
571 +
572 +static int vfio_fsl_mc_initialize_dprc(struct vfio_fsl_mc_device *vdev)
573 +{
574 + struct device *root_dprc_dev;
575 + struct fsl_mc_device *mc_dev = vdev->mc_dev;
576 + struct device *dev = &mc_dev->dev;
577 + struct fsl_mc_bus *mc_bus;
578 + struct irq_domain *mc_msi_domain;
579 + unsigned int irq_count;
580 + int ret;
581 +
582 + /* device must be DPRC */
583 + if (strcmp(mc_dev->obj_desc.type, "dprc"))
584 + return -EINVAL;
585 +
586 + /* mc_io must be un-initialized */
587 + WARN_ON(mc_dev->mc_io);
588 +
589 + /* allocate a portal from the root DPRC for vfio use */
590 + fsl_mc_get_root_dprc(dev, &root_dprc_dev);
591 + if (WARN_ON(!root_dprc_dev))
592 + return -EINVAL;
593 +
594 + ret = fsl_mc_portal_allocate(to_fsl_mc_device(root_dprc_dev),
595 + FSL_MC_IO_ATOMIC_CONTEXT_PORTAL,
596 + &mc_dev->mc_io);
597 + if (ret < 0)
598 + goto clean_msi_domain;
599 +
600 + /* Reset MCP before move on */
601 + ret = fsl_mc_portal_reset(mc_dev->mc_io);
602 + if (ret < 0) {
603 + dev_err(dev, "dprc portal reset failed: error = %d\n", ret);
604 + goto free_mc_portal;
605 + }
606 +
607 + /* MSI domain set up */
608 + ret = fsl_mc_find_msi_domain(root_dprc_dev->parent, &mc_msi_domain);
609 + if (ret < 0)
610 + goto free_mc_portal;
611 +
612 + dev_set_msi_domain(&mc_dev->dev, mc_msi_domain);
613 +
614 + ret = dprc_open(mc_dev->mc_io, 0, mc_dev->obj_desc.id,
615 + &mc_dev->mc_handle);
616 + if (ret) {
617 + dev_err(dev, "dprc_open() failed: error = %d\n", ret);
618 + goto free_mc_portal;
619 + }
620 +
621 + /* Initialize resource pool */
622 + fsl_mc_init_all_resource_pools(mc_dev);
623 +
624 + mc_bus = to_fsl_mc_bus(mc_dev);
625 +
626 + if (!mc_bus->irq_resources) {
627 + irq_count = FSL_MC_IRQ_POOL_MAX_TOTAL_IRQS;
628 + ret = fsl_mc_populate_irq_pool(mc_bus, irq_count);
629 + if (ret < 0) {
630 + dev_err(dev, "%s: Failed to init irq-pool\n", __func__);
631 + goto clean_resource_pool;
632 + }
633 + }
634 +
635 + mutex_init(&mc_bus->scan_mutex);
636 +
637 + mutex_lock(&mc_bus->scan_mutex);
638 + ret = dprc_scan_objects(mc_dev, mc_dev->driver_override,
639 + &irq_count);
640 + mutex_unlock(&mc_bus->scan_mutex);
641 + if (ret) {
642 + dev_err(dev, "dprc_scan_objects() fails (%d)\n", ret);
643 + goto clean_irq_pool;
644 + }
645 +
646 + if (irq_count > FSL_MC_IRQ_POOL_MAX_TOTAL_IRQS) {
647 + dev_warn(&mc_dev->dev,
648 + "IRQs needed (%u) exceed IRQs preallocated (%u)\n",
649 + irq_count, FSL_MC_IRQ_POOL_MAX_TOTAL_IRQS);
650 + }
651 +
652 + return 0;
653 +
654 +clean_irq_pool:
655 + fsl_mc_cleanup_irq_pool(mc_bus);
656 +
657 +clean_resource_pool:
658 + fsl_mc_cleanup_all_resource_pools(mc_dev);
659 + dprc_close(mc_dev->mc_io, 0, mc_dev->mc_handle);
660 +
661 +free_mc_portal:
662 + fsl_mc_portal_free(mc_dev->mc_io);
663 +
664 +clean_msi_domain:
665 + dev_set_msi_domain(&mc_dev->dev, NULL);
666 +
667 + return ret;
668 +}
669 +
670 +static int vfio_fsl_mc_device_remove(struct device *dev, void *data)
671 +{
672 + struct fsl_mc_device *mc_dev;
673 +
674 + WARN_ON(dev == NULL);
675 +
676 + mc_dev = to_fsl_mc_device(dev);
677 + if (WARN_ON(mc_dev == NULL))
678 + return -ENODEV;
679 +
680 + fsl_mc_device_remove(mc_dev);
681 + return 0;
682 +}
683 +
684 +static void vfio_fsl_mc_cleanup_dprc(struct vfio_fsl_mc_device *vdev)
685 +{
686 + struct fsl_mc_device *mc_dev = vdev->mc_dev;
687 + struct fsl_mc_bus *mc_bus;
688 +
689 + /* device must be DPRC */
690 + if (strcmp(mc_dev->obj_desc.type, "dprc"))
691 + return;
692 +
693 + device_for_each_child(&mc_dev->dev, NULL, vfio_fsl_mc_device_remove);
694 +
695 + mc_bus = to_fsl_mc_bus(mc_dev);
696 + if (dev_get_msi_domain(&mc_dev->dev))
697 + fsl_mc_cleanup_irq_pool(mc_bus);
698 +
699 + dev_set_msi_domain(&mc_dev->dev, NULL);
700 +
701 + fsl_mc_cleanup_all_resource_pools(mc_dev);
702 + dprc_close(mc_dev->mc_io, 0, mc_dev->mc_handle);
703 + fsl_mc_portal_free(mc_dev->mc_io);
704 +}
705 +
706 +static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
707 +{
708 + struct iommu_group *group;
709 + struct vfio_fsl_mc_device *vdev;
710 + struct device *dev = &mc_dev->dev;
711 + int ret;
712 +
713 + group = vfio_iommu_group_get(dev);
714 + if (!group) {
715 + dev_err(dev, "%s: VFIO: No IOMMU group\n", __func__);
716 + return -EINVAL;
717 + }
718 +
719 + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
720 + if (!vdev) {
721 + vfio_iommu_group_put(group, dev);
722 + return -ENOMEM;
723 + }
724 +
725 + vdev->mc_dev = mc_dev;
726 +
727 + ret = vfio_add_group_dev(dev, &vfio_fsl_mc_ops, vdev);
728 + if (ret) {
729 + dev_err(dev, "%s: Failed to add to vfio group\n", __func__);
730 + goto free_vfio_device;
731 + }
732 +
733 + /* DPRC container scanned and it's chilren bound with vfio driver */
734 + if (strcmp(mc_dev->obj_desc.type, "dprc") == 0) {
735 + ret = vfio_fsl_mc_initialize_dprc(vdev);
736 + if (ret) {
737 + vfio_del_group_dev(dev);
738 + goto free_vfio_device;
739 + }
740 + } else {
741 + struct fsl_mc_device *mc_bus_dev;
742 +
743 + /* Non-dprc devices share mc_io from the parent dprc */
744 + mc_bus_dev = to_fsl_mc_device(mc_dev->dev.parent);
745 + if (mc_bus_dev == NULL) {
746 + vfio_del_group_dev(dev);
747 + goto free_vfio_device;
748 + }
749 +
750 + mc_dev->mc_io = mc_bus_dev->mc_io;
751 +
752 + /* Inherit parent MSI domain */
753 + dev_set_msi_domain(&mc_dev->dev,
754 + dev_get_msi_domain(mc_dev->dev.parent));
755 + }
756 + return 0;
757 +
758 +free_vfio_device:
759 + kfree(vdev);
760 + vfio_iommu_group_put(group, dev);
761 + return ret;
762 +}
763 +
764 +static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev)
765 +{
766 + struct vfio_fsl_mc_device *vdev;
767 + struct device *dev = &mc_dev->dev;
768 +
769 + vdev = vfio_del_group_dev(dev);
770 + if (!vdev)
771 + return -EINVAL;
772 +
773 + if (strcmp(mc_dev->obj_desc.type, "dprc") == 0)
774 + vfio_fsl_mc_cleanup_dprc(vdev);
775 + else
776 + dev_set_msi_domain(&mc_dev->dev, NULL);
777 +
778 + mc_dev->mc_io = NULL;
779 +
780 + vfio_iommu_group_put(mc_dev->dev.iommu_group, dev);
781 + kfree(vdev);
782 +
783 + return 0;
784 +}
785 +
786 +/*
787 + * vfio-fsl_mc is a meta-driver, so use driver_override interface to
788 + * bind a fsl_mc container with this driver and match_id_table is NULL.
789 + */
790 +static struct fsl_mc_driver vfio_fsl_mc_driver = {
791 + .probe = vfio_fsl_mc_probe,
792 + .remove = vfio_fsl_mc_remove,
793 + .match_id_table = NULL,
794 + .driver = {
795 + .name = "vfio-fsl-mc",
796 + .owner = THIS_MODULE,
797 + },
798 +};
799 +
800 +static int __init vfio_fsl_mc_driver_init(void)
801 +{
802 + return fsl_mc_driver_register(&vfio_fsl_mc_driver);
803 +}
804 +
805 +static void __exit vfio_fsl_mc_driver_exit(void)
806 +{
807 + fsl_mc_driver_unregister(&vfio_fsl_mc_driver);
808 +}
809 +
810 +module_init(vfio_fsl_mc_driver_init);
811 +module_exit(vfio_fsl_mc_driver_exit);
812 +
813 +MODULE_VERSION(DRIVER_VERSION);
814 +MODULE_LICENSE("GPL v2");
815 +MODULE_AUTHOR(DRIVER_AUTHOR);
816 +MODULE_DESCRIPTION(DRIVER_DESC);
817 --- /dev/null
818 +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c
819 @@ -0,0 +1,199 @@
820 +/*
821 + * Freescale Management Complex (MC) device passthrough using VFIO
822 + *
823 + * Copyright (C) 2013-2016 Freescale Semiconductor, Inc.
824 + * Author: Bharat Bhushan <bharat.bhushan@nxp.com>
825 + *
826 + * This file is licensed under the terms of the GNU General Public
827 + * License version 2. This program is licensed "as is" without any
828 + * warranty of any kind, whether express or implied.
829 + */
830 +
831 +#include <linux/vfio.h>
832 +#include <linux/slab.h>
833 +#include <linux/types.h>
834 +#include <linux/eventfd.h>
835 +#include <linux/msi.h>
836 +
837 +#include "linux/fsl/mc.h"
838 +#include "vfio_fsl_mc_private.h"
839 +
840 +static irqreturn_t vfio_fsl_mc_irq_handler(int irq_num, void *arg)
841 +{
842 + struct vfio_fsl_mc_irq *mc_irq = (struct vfio_fsl_mc_irq *)arg;
843 +
844 + eventfd_signal(mc_irq->trigger, 1);
845 + return IRQ_HANDLED;
846 +}
847 +
848 +static int vfio_fsl_mc_irq_mask(struct vfio_fsl_mc_device *vdev,
849 + unsigned int index, unsigned int start,
850 + unsigned int count, uint32_t flags,
851 + void *data)
852 +{
853 + return -EINVAL;
854 +}
855 +
856 +static int vfio_fsl_mc_irq_unmask(struct vfio_fsl_mc_device *vdev,
857 + unsigned int index, unsigned int start,
858 + unsigned int count, uint32_t flags,
859 + void *data)
860 +{
861 + return -EINVAL;
862 +}
863 +
864 +static int vfio_set_trigger(struct vfio_fsl_mc_device *vdev,
865 + int index, int fd)
866 +{
867 + struct vfio_fsl_mc_irq *irq = &vdev->mc_irqs[index];
868 + struct eventfd_ctx *trigger;
869 + int hwirq;
870 + int ret;
871 +
872 + hwirq = vdev->mc_dev->irqs[index]->msi_desc->irq;
873 + if (irq->trigger) {
874 + free_irq(hwirq, irq);
875 + kfree(irq->name);
876 + eventfd_ctx_put(irq->trigger);
877 + irq->trigger = NULL;
878 + }
879 +
880 + if (fd < 0) /* Disable only */
881 + return 0;
882 +
883 + irq->name = kasprintf(GFP_KERNEL, "vfio-irq[%d](%s)",
884 + hwirq, dev_name(&vdev->mc_dev->dev));
885 + if (!irq->name)
886 + return -ENOMEM;
887 +
888 + trigger = eventfd_ctx_fdget(fd);
889 + if (IS_ERR(trigger)) {
890 + kfree(irq->name);
891 + return PTR_ERR(trigger);
892 + }
893 +
894 + irq->trigger = trigger;
895 +
896 + ret = request_irq(hwirq, vfio_fsl_mc_irq_handler, 0,
897 + irq->name, irq);
898 + if (ret) {
899 + kfree(irq->name);
900 + eventfd_ctx_put(trigger);
901 + irq->trigger = NULL;
902 + return ret;
903 + }
904 +
905 + return 0;
906 +}
907 +
908 +int vfio_fsl_mc_irqs_init(struct vfio_fsl_mc_device *vdev)
909 +{
910 + struct fsl_mc_device *mc_dev = vdev->mc_dev;
911 + struct vfio_fsl_mc_irq *mc_irq;
912 + int irq_count;
913 + int ret, i;
914 +
915 + /* Device does not support any interrupt */
916 + if (mc_dev->obj_desc.irq_count == 0)
917 + return 0;
918 +
919 + irq_count = mc_dev->obj_desc.irq_count;
920 +
921 + mc_irq = kcalloc(irq_count, sizeof(*mc_irq), GFP_KERNEL);
922 + if (mc_irq == NULL)
923 + return -ENOMEM;
924 +
925 + /* Allocate IRQs */
926 + ret = fsl_mc_allocate_irqs(mc_dev);
927 + if (ret) {
928 + kfree(mc_irq);
929 + return ret;
930 + }
931 +
932 + for (i = 0; i < irq_count; i++) {
933 + mc_irq[i].count = 1;
934 + mc_irq[i].flags = VFIO_IRQ_INFO_EVENTFD;
935 + }
936 +
937 + vdev->mc_irqs = mc_irq;
938 +
939 + return 0;
940 +}
941 +
942 +/* Free All IRQs for the given MC object */
943 +void vfio_fsl_mc_irqs_cleanup(struct vfio_fsl_mc_device *vdev)
944 +{
945 + struct fsl_mc_device *mc_dev = vdev->mc_dev;
946 + int irq_count = mc_dev->obj_desc.irq_count;
947 + int i;
948 +
949 + /* Device does not support any interrupt */
950 + if (mc_dev->obj_desc.irq_count == 0)
951 + return;
952 +
953 + for (i = 0; i < irq_count; i++)
954 + vfio_set_trigger(vdev, i, -1);
955 +
956 + fsl_mc_free_irqs(mc_dev);
957 + kfree(vdev->mc_irqs);
958 +}
959 +
960 +static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev,
961 + unsigned int index, unsigned int start,
962 + unsigned int count, uint32_t flags,
963 + void *data)
964 +{
965 + struct vfio_fsl_mc_irq *irq = &vdev->mc_irqs[index];
966 + int hwirq;
967 +
968 + if (!count && (flags & VFIO_IRQ_SET_DATA_NONE))
969 + return vfio_set_trigger(vdev, index, -1);
970 +
971 + if (start != 0 || count != 1)
972 + return -EINVAL;
973 +
974 + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
975 + int32_t fd = *(int32_t *)data;
976 +
977 + return vfio_set_trigger(vdev, index, fd);
978 + }
979 +
980 + hwirq = vdev->mc_dev->irqs[index]->msi_desc->irq;
981 +
982 + if (flags & VFIO_IRQ_SET_DATA_NONE) {
983 + vfio_fsl_mc_irq_handler(hwirq, irq);
984 +
985 + } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
986 + uint8_t trigger = *(uint8_t *)data;
987 +
988 + if (trigger)
989 + vfio_fsl_mc_irq_handler(hwirq, irq);
990 + }
991 +
992 + return 0;
993 +}
994 +
995 +int vfio_fsl_mc_set_irqs_ioctl(struct vfio_fsl_mc_device *vdev,
996 + uint32_t flags, unsigned int index,
997 + unsigned int start, unsigned int count,
998 + void *data)
999 +{
1000 + int ret = -ENOTTY;
1001 +
1002 + switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
1003 + case VFIO_IRQ_SET_ACTION_MASK:
1004 + ret = vfio_fsl_mc_irq_mask(vdev, index, start, count,
1005 + flags, data);
1006 + break;
1007 + case VFIO_IRQ_SET_ACTION_UNMASK:
1008 + ret = vfio_fsl_mc_irq_unmask(vdev, index, start, count,
1009 + flags, data);
1010 + break;
1011 + case VFIO_IRQ_SET_ACTION_TRIGGER:
1012 + ret = vfio_fsl_mc_set_irq_trigger(vdev, index, start,
1013 + count, flags, data);
1014 + break;
1015 + }
1016 +
1017 + return ret;
1018 +}
1019 --- /dev/null
1020 +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
1021 @@ -0,0 +1,55 @@
1022 +/*
1023 + * Freescale Management Complex VFIO private declarations
1024 + *
1025 + * Copyright (C) 2013-2016 Freescale Semiconductor, Inc.
1026 + * Copyright 2016 NXP
1027 + * Author: Bharat Bhushan <bharat.bhushan@nxp.com>
1028 + *
1029 + * This file is licensed under the terms of the GNU General Public
1030 + * License version 2. This program is licensed "as is" without any
1031 + * warranty of any kind, whether express or implied.
1032 + */
1033 +
1034 +#ifndef VFIO_FSL_MC_PRIVATE_H
1035 +#define VFIO_FSL_MC_PRIVATE_H
1036 +
1037 +#define VFIO_FSL_MC_OFFSET_SHIFT 40
1038 +#define VFIO_FSL_MC_OFFSET_MASK (((u64)(1) << VFIO_FSL_MC_OFFSET_SHIFT) - 1)
1039 +
1040 +#define VFIO_FSL_MC_OFFSET_TO_INDEX(off) (off >> VFIO_FSL_MC_OFFSET_SHIFT)
1041 +
1042 +#define VFIO_FSL_MC_INDEX_TO_OFFSET(index) \
1043 + ((u64)(index) << VFIO_FSL_MC_OFFSET_SHIFT)
1044 +
1045 +struct vfio_fsl_mc_irq {
1046 + u32 flags;
1047 + u32 count;
1048 + struct eventfd_ctx *trigger;
1049 + char *name;
1050 +};
1051 +
1052 +struct vfio_fsl_mc_region {
1053 + u32 flags;
1054 +#define VFIO_FSL_MC_REGION_TYPE_MMIO 1
1055 +#define VFIO_FSL_MC_REGION_TYPE_CACHEABLE 2
1056 + u32 type;
1057 + u64 addr;
1058 + resource_size_t size;
1059 + void __iomem *ioaddr;
1060 +};
1061 +
1062 +struct vfio_fsl_mc_device {
1063 + struct fsl_mc_device *mc_dev;
1064 + int refcnt;
1065 + u32 num_regions;
1066 + struct vfio_fsl_mc_region *regions;
1067 + struct vfio_fsl_mc_irq *mc_irqs;
1068 +};
1069 +
1070 +int vfio_fsl_mc_irqs_init(struct vfio_fsl_mc_device *vdev);
1071 +void vfio_fsl_mc_irqs_cleanup(struct vfio_fsl_mc_device *vdev);
1072 +int vfio_fsl_mc_set_irqs_ioctl(struct vfio_fsl_mc_device *vdev,
1073 + uint32_t flags, unsigned int index,
1074 + unsigned int start, unsigned int count,
1075 + void *data);
1076 +#endif /* VFIO_PCI_PRIVATE_H */
1077 --- a/drivers/vfio/vfio_iommu_type1.c
1078 +++ b/drivers/vfio/vfio_iommu_type1.c
1079 @@ -36,6 +36,8 @@
1080 #include <linux/uaccess.h>
1081 #include <linux/vfio.h>
1082 #include <linux/workqueue.h>
1083 +#include <linux/dma-iommu.h>
1084 +#include <linux/irqdomain.h>
1085
1086 #define DRIVER_VERSION "0.2"
1087 #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
1088 @@ -720,6 +722,27 @@ static void vfio_test_domain_fgsp(struct
1089 __free_pages(pages, order);
1090 }
1091
1092 +static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base)
1093 +{
1094 + struct list_head group_resv_regions;
1095 + struct iommu_resv_region *region, *next;
1096 + bool ret = false;
1097 +
1098 + INIT_LIST_HEAD(&group_resv_regions);
1099 + iommu_get_group_resv_regions(group, &group_resv_regions);
1100 + list_for_each_entry(region, &group_resv_regions, list) {
1101 + if (region->type == IOMMU_RESV_SW_MSI) {
1102 + *base = region->start;
1103 + ret = true;
1104 + goto out;
1105 + }
1106 + }
1107 +out:
1108 + list_for_each_entry_safe(region, next, &group_resv_regions, list)
1109 + kfree(region);
1110 + return ret;
1111 +}
1112 +
1113 static int vfio_iommu_type1_attach_group(void *iommu_data,
1114 struct iommu_group *iommu_group)
1115 {
1116 @@ -728,6 +751,8 @@ static int vfio_iommu_type1_attach_group
1117 struct vfio_domain *domain, *d;
1118 struct bus_type *bus = NULL;
1119 int ret;
1120 + bool resv_msi, msi_remap;
1121 + phys_addr_t resv_msi_base;
1122
1123 mutex_lock(&iommu->lock);
1124
1125 @@ -774,11 +799,15 @@ static int vfio_iommu_type1_attach_group
1126 if (ret)
1127 goto out_domain;
1128
1129 + resv_msi = vfio_iommu_has_sw_msi(iommu_group, &resv_msi_base);
1130 +
1131 INIT_LIST_HEAD(&domain->group_list);
1132 list_add(&group->next, &domain->group_list);
1133
1134 - if (!allow_unsafe_interrupts &&
1135 - !iommu_capable(bus, IOMMU_CAP_INTR_REMAP)) {
1136 + msi_remap = resv_msi ? irq_domain_check_msi_remap() :
1137 + iommu_capable(bus, IOMMU_CAP_INTR_REMAP);
1138 +
1139 + if (!allow_unsafe_interrupts && !msi_remap) {
1140 pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n",
1141 __func__);
1142 ret = -EPERM;
1143 @@ -820,6 +849,12 @@ static int vfio_iommu_type1_attach_group
1144 if (ret)
1145 goto out_detach;
1146
1147 + if (resv_msi) {
1148 + ret = iommu_get_msi_cookie(domain->domain, resv_msi_base);
1149 + if (ret)
1150 + goto out_detach;
1151 + }
1152 +
1153 list_add(&domain->next, &iommu->domain_list);
1154
1155 mutex_unlock(&iommu->lock);
1156 --- a/include/uapi/linux/vfio.h
1157 +++ b/include/uapi/linux/vfio.h
1158 @@ -198,6 +198,7 @@ struct vfio_device_info {
1159 #define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */
1160 #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */
1161 #define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */
1162 +#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 5) /* vfio-fsl-mc device */
1163 __u32 num_regions; /* Max region index + 1 */
1164 __u32 num_irqs; /* Max IRQ index + 1 */
1165 };