diff --git a/Documentation/vfio-mediated-device.txt b/Documentation/vfio-mediated-device.txt index b38afec35edc..d226c7a5ba8b 100644 --- a/Documentation/vfio-mediated-device.txt +++ b/Documentation/vfio-mediated-device.txt @@ -127,22 +127,22 @@ the VFIO when devices are unbound from the driver. Physical Device Driver Interface -------------------------------- -The physical device driver interface provides the parent_ops[3] structure to -define the APIs to manage work in the mediated core driver that is related to -the physical device. +The physical device driver interface provides the mdev_parent_ops[3] structure +to define the APIs to manage work in the mediated core driver that is related +to the physical device. -The structures in the parent_ops structure are as follows: +The structures in the mdev_parent_ops structure are as follows: * dev_attr_groups: attributes of the parent device * mdev_attr_groups: attributes of the mediated device * supported_config: attributes to define supported configurations -The functions in the parent_ops structure are as follows: +The functions in the mdev_parent_ops structure are as follows: * create: allocate basic resources in a driver for a mediated device * remove: free resources in a driver when a mediated device is destroyed -The callbacks in the parent_ops structure are as follows: +The callbacks in the mdev_parent_ops structure are as follows: * open: open callback of mediated device * close: close callback of mediated device @@ -151,14 +151,14 @@ The callbacks in the parent_ops structure are as follows: * write: write emulation callback * mmap: mmap emulation callback -A driver should use the parent_ops structure in the function call to register -itself with the mdev core driver: +A driver should use the mdev_parent_ops structure in the function call to +register itself with the mdev core driver: extern int mdev_register_device(struct device *dev, - const struct parent_ops *ops); + const struct mdev_parent_ops *ops); -However, the parent_ops structure is not required in the function call that a -driver should use to unregister itself with the mdev core driver: +However, the mdev_parent_ops structure is not required in the function call +that a driver should use to unregister itself with the mdev core driver: extern void mdev_unregister_device(struct device *dev); @@ -223,6 +223,9 @@ Directories and files under the sysfs for Each Physical Device sprintf(buf, "%s-%s", dev_driver_string(parent->dev), group->name); + (or using mdev_parent_dev(mdev) to arrive at the parent device outside + of the core mdev code) + * device_api This attribute should show which device API is being created, for example, @@ -394,5 +397,5 @@ References [1] See Documentation/vfio.txt for more information on VFIO. [2] struct mdev_driver in include/linux/mdev.h -[3] struct parent_ops in include/linux/mdev.h +[3] struct mdev_parent_ops in include/linux/mdev.h [4] struct vfio_iommu_driver_ops in include/linux/vfio.h diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 934963970288..faaae07ae487 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -169,7 +169,7 @@ static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn) { - struct device *dev = &vgpu->vdev.mdev->dev; + struct device *dev = mdev_dev(vgpu->vdev.mdev); struct gvt_dma *this; unsigned long g1; int rc; @@ -198,7 +198,7 @@ static void gvt_cache_destroy(struct intel_vgpu *vgpu) { struct gvt_dma *dma; struct rb_node *node = NULL; - struct device *dev = &vgpu->vdev.mdev->dev; + struct device *dev = mdev_dev(vgpu->vdev.mdev); unsigned long gfn; mutex_lock(&vgpu->vdev.cache_lock); @@ -399,7 +399,7 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) struct device *pdev; void *gvt; - pdev = mdev->parent->dev; + pdev = mdev_parent_dev(mdev); gvt = kdev_to_i915(pdev)->gvt; type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj)); @@ -421,7 +421,7 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) mdev_set_drvdata(mdev, vgpu); gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n", - dev_name(&mdev->dev)); + dev_name(mdev_dev(mdev))); return 0; } @@ -485,7 +485,7 @@ static int intel_vgpu_open(struct mdev_device *mdev) vgpu->vdev.group_notifier.notifier_call = intel_vgpu_group_notifier; events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; - ret = vfio_register_notifier(&mdev->dev, VFIO_IOMMU_NOTIFY, &events, + ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events, &vgpu->vdev.iommu_notifier); if (ret != 0) { gvt_err("vfio_register_notifier for iommu failed: %d\n", ret); @@ -493,7 +493,7 @@ static int intel_vgpu_open(struct mdev_device *mdev) } events = VFIO_GROUP_NOTIFY_SET_KVM; - ret = vfio_register_notifier(&mdev->dev, VFIO_GROUP_NOTIFY, &events, + ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events, &vgpu->vdev.group_notifier); if (ret != 0) { gvt_err("vfio_register_notifier for group failed: %d\n", ret); @@ -508,11 +508,11 @@ static int intel_vgpu_open(struct mdev_device *mdev) return ret; undo_group: - vfio_unregister_notifier(&mdev->dev, VFIO_GROUP_NOTIFY, + vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &vgpu->vdev.group_notifier); undo_iommu: - vfio_unregister_notifier(&mdev->dev, VFIO_IOMMU_NOTIFY, + vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &vgpu->vdev.iommu_notifier); out: return ret; @@ -529,11 +529,11 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu) if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1)) return; - ret = vfio_unregister_notifier(&vgpu->vdev.mdev->dev, VFIO_IOMMU_NOTIFY, + ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY, &vgpu->vdev.iommu_notifier); WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret); - ret = vfio_unregister_notifier(&vgpu->vdev.mdev->dev, VFIO_GROUP_NOTIFY, + ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_GROUP_NOTIFY, &vgpu->vdev.group_notifier); WARN(ret, "vfio_unregister_notifier for group failed: %d\n", ret); @@ -1111,7 +1111,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, return 0; } -static const struct parent_ops intel_vgpu_ops = { +static const struct mdev_parent_ops intel_vgpu_ops = { .supported_type_groups = intel_vgpu_type_groups, .create = intel_vgpu_create, .remove = intel_vgpu_remove, @@ -1398,7 +1398,7 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn) return pfn; pfn = INTEL_GVT_INVALID_ADDR; - dev = &info->vgpu->vdev.mdev->dev; + dev = mdev_dev(info->vgpu->vdev.mdev); rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn); if (rc != 1) { gvt_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", gfn, rc); diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index be1ee89ee917..36d75c367d22 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -27,6 +27,45 @@ static LIST_HEAD(parent_list); static DEFINE_MUTEX(parent_list_lock); static struct class_compat *mdev_bus_compat_class; +static LIST_HEAD(mdev_list); +static DEFINE_MUTEX(mdev_list_lock); + +struct device *mdev_parent_dev(struct mdev_device *mdev) +{ + return mdev->parent->dev; +} +EXPORT_SYMBOL(mdev_parent_dev); + +void *mdev_get_drvdata(struct mdev_device *mdev) +{ + return mdev->driver_data; +} +EXPORT_SYMBOL(mdev_get_drvdata); + +void mdev_set_drvdata(struct mdev_device *mdev, void *data) +{ + mdev->driver_data = data; +} +EXPORT_SYMBOL(mdev_set_drvdata); + +struct device *mdev_dev(struct mdev_device *mdev) +{ + return &mdev->dev; +} +EXPORT_SYMBOL(mdev_dev); + +struct mdev_device *mdev_from_dev(struct device *dev) +{ + return dev_is_mdev(dev) ? to_mdev_device(dev) : NULL; +} +EXPORT_SYMBOL(mdev_from_dev); + +uuid_le mdev_uuid(struct mdev_device *mdev) +{ + return mdev->uuid; +} +EXPORT_SYMBOL(mdev_uuid); + static int _find_mdev_device(struct device *dev, void *data) { struct mdev_device *mdev; @@ -42,7 +81,7 @@ static int _find_mdev_device(struct device *dev, void *data) return 0; } -static bool mdev_device_exist(struct parent_device *parent, uuid_le uuid) +static bool mdev_device_exist(struct mdev_parent *parent, uuid_le uuid) { struct device *dev; @@ -56,9 +95,9 @@ static bool mdev_device_exist(struct parent_device *parent, uuid_le uuid) } /* Should be called holding parent_list_lock */ -static struct parent_device *__find_parent_device(struct device *dev) +static struct mdev_parent *__find_parent_device(struct device *dev) { - struct parent_device *parent; + struct mdev_parent *parent; list_for_each_entry(parent, &parent_list, next) { if (parent->dev == dev) @@ -69,8 +108,8 @@ static struct parent_device *__find_parent_device(struct device *dev) static void mdev_release_parent(struct kref *kref) { - struct parent_device *parent = container_of(kref, struct parent_device, - ref); + struct mdev_parent *parent = container_of(kref, struct mdev_parent, + ref); struct device *dev = parent->dev; kfree(parent); @@ -78,7 +117,7 @@ static void mdev_release_parent(struct kref *kref) } static -inline struct parent_device *mdev_get_parent(struct parent_device *parent) +inline struct mdev_parent *mdev_get_parent(struct mdev_parent *parent) { if (parent) kref_get(&parent->ref); @@ -86,7 +125,7 @@ inline struct parent_device *mdev_get_parent(struct parent_device *parent) return parent; } -static inline void mdev_put_parent(struct parent_device *parent) +static inline void mdev_put_parent(struct mdev_parent *parent) { if (parent) kref_put(&parent->ref, mdev_release_parent); @@ -95,7 +134,7 @@ static inline void mdev_put_parent(struct parent_device *parent) static int mdev_device_create_ops(struct kobject *kobj, struct mdev_device *mdev) { - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; int ret; ret = parent->ops->create(kobj, mdev); @@ -122,7 +161,7 @@ static int mdev_device_create_ops(struct kobject *kobj, */ static int mdev_device_remove_ops(struct mdev_device *mdev, bool force_remove) { - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; int ret; /* @@ -153,10 +192,10 @@ static int mdev_device_remove_cb(struct device *dev, void *data) * Add device to list of registered parent devices. * Returns a negative value on error, otherwise 0. */ -int mdev_register_device(struct device *dev, const struct parent_ops *ops) +int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops) { int ret; - struct parent_device *parent; + struct mdev_parent *parent; /* check for mandatory ops */ if (!ops || !ops->create || !ops->remove || !ops->supported_type_groups) @@ -229,7 +268,7 @@ EXPORT_SYMBOL(mdev_register_device); void mdev_unregister_device(struct device *dev) { - struct parent_device *parent; + struct mdev_parent *parent; bool force_remove = true; mutex_lock(&parent_list_lock); @@ -266,7 +305,7 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) { int ret; struct mdev_device *mdev; - struct parent_device *parent; + struct mdev_parent *parent; struct mdev_type *type = to_mdev_type(kobj); parent = mdev_get_parent(type->parent); @@ -316,6 +355,11 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) dev_dbg(&mdev->dev, "MDEV: created\n"); mutex_unlock(&parent->lock); + + mutex_lock(&mdev_list_lock); + list_add(&mdev->next, &mdev_list); + mutex_unlock(&mdev_list_lock); + return ret; create_failed: @@ -329,12 +373,30 @@ create_err: int mdev_device_remove(struct device *dev, bool force_remove) { - struct mdev_device *mdev; - struct parent_device *parent; + struct mdev_device *mdev, *tmp; + struct mdev_parent *parent; struct mdev_type *type; int ret; + bool found = false; mdev = to_mdev_device(dev); + + mutex_lock(&mdev_list_lock); + list_for_each_entry(tmp, &mdev_list, next) { + if (tmp == mdev) { + found = true; + break; + } + } + + if (found) + list_del(&mdev->next); + + mutex_unlock(&mdev_list_lock); + + if (!found) + return -ENODEV; + type = to_mdev_type(mdev->type_kobj); parent = mdev->parent; mutex_lock(&parent->lock); @@ -342,6 +404,11 @@ int mdev_device_remove(struct device *dev, bool force_remove) ret = mdev_device_remove_ops(mdev, force_remove); if (ret) { mutex_unlock(&parent->lock); + + mutex_lock(&mdev_list_lock); + list_add(&mdev->next, &mdev_list); + mutex_unlock(&mdev_list_lock); + return ret; } @@ -349,7 +416,8 @@ int mdev_device_remove(struct device *dev, bool force_remove) device_unregister(dev); mutex_unlock(&parent->lock); mdev_put_parent(parent); - return ret; + + return 0; } static int __init mdev_init(void) diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h index d35097cbf3d7..a9cefd70a705 100644 --- a/drivers/vfio/mdev/mdev_private.h +++ b/drivers/vfio/mdev/mdev_private.h @@ -16,10 +16,33 @@ int mdev_bus_register(void); void mdev_bus_unregister(void); +struct mdev_parent { + struct device *dev; + const struct mdev_parent_ops *ops; + struct kref ref; + struct mutex lock; + struct list_head next; + struct kset *mdev_types_kset; + struct list_head type_list; +}; + +struct mdev_device { + struct device dev; + struct mdev_parent *parent; + uuid_le uuid; + void *driver_data; + struct kref ref; + struct list_head next; + struct kobject *type_kobj; +}; + +#define to_mdev_device(dev) container_of(dev, struct mdev_device, dev) +#define dev_is_mdev(d) ((d)->bus == &mdev_bus_type) + struct mdev_type { struct kobject kobj; struct kobject *devices_kobj; - struct parent_device *parent; + struct mdev_parent *parent; struct list_head next; struct attribute_group *group; }; @@ -29,8 +52,8 @@ struct mdev_type { #define to_mdev_type(_kobj) \ container_of(_kobj, struct mdev_type, kobj) -int parent_create_sysfs_files(struct parent_device *parent); -void parent_remove_sysfs_files(struct parent_device *parent); +int parent_create_sysfs_files(struct mdev_parent *parent); +void parent_remove_sysfs_files(struct mdev_parent *parent); int mdev_create_sysfs_files(struct device *dev, struct mdev_type *type); void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type); diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c index 1a53deb2ee10..802df210929b 100644 --- a/drivers/vfio/mdev/mdev_sysfs.c +++ b/drivers/vfio/mdev/mdev_sysfs.c @@ -92,7 +92,7 @@ static struct kobj_type mdev_type_ktype = { .release = mdev_type_release, }; -struct mdev_type *add_mdev_supported_type(struct parent_device *parent, +struct mdev_type *add_mdev_supported_type(struct mdev_parent *parent, struct attribute_group *group) { struct mdev_type *type; @@ -158,7 +158,7 @@ static void remove_mdev_supported_type(struct mdev_type *type) kobject_put(&type->kobj); } -static int add_mdev_supported_type_groups(struct parent_device *parent) +static int add_mdev_supported_type_groups(struct mdev_parent *parent) { int i; @@ -183,7 +183,7 @@ static int add_mdev_supported_type_groups(struct parent_device *parent) } /* mdev sysfs functions */ -void parent_remove_sysfs_files(struct parent_device *parent) +void parent_remove_sysfs_files(struct mdev_parent *parent) { struct mdev_type *type, *tmp; @@ -196,7 +196,7 @@ void parent_remove_sysfs_files(struct parent_device *parent) kset_unregister(parent->mdev_types_kset); } -int parent_create_sysfs_files(struct parent_device *parent) +int parent_create_sysfs_files(struct mdev_parent *parent) { int ret; diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c index ffc36758cb84..fa848a701b8b 100644 --- a/drivers/vfio/mdev/vfio_mdev.c +++ b/drivers/vfio/mdev/vfio_mdev.c @@ -27,7 +27,7 @@ static int vfio_mdev_open(void *device_data) { struct mdev_device *mdev = device_data; - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; int ret; if (unlikely(!parent->ops->open)) @@ -46,7 +46,7 @@ static int vfio_mdev_open(void *device_data) static void vfio_mdev_release(void *device_data) { struct mdev_device *mdev = device_data; - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; if (likely(parent->ops->release)) parent->ops->release(mdev); @@ -58,7 +58,7 @@ static long vfio_mdev_unlocked_ioctl(void *device_data, unsigned int cmd, unsigned long arg) { struct mdev_device *mdev = device_data; - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; if (unlikely(!parent->ops->ioctl)) return -EINVAL; @@ -70,7 +70,7 @@ static ssize_t vfio_mdev_read(void *device_data, char __user *buf, size_t count, loff_t *ppos) { struct mdev_device *mdev = device_data; - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; if (unlikely(!parent->ops->read)) return -EINVAL; @@ -82,7 +82,7 @@ static ssize_t vfio_mdev_write(void *device_data, const char __user *buf, size_t count, loff_t *ppos) { struct mdev_device *mdev = device_data; - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; if (unlikely(!parent->ops->write)) return -EINVAL; @@ -93,7 +93,7 @@ static ssize_t vfio_mdev_write(void *device_data, const char __user *buf, static int vfio_mdev_mmap(void *device_data, struct vm_area_struct *vma) { struct mdev_device *mdev = device_data; - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; if (unlikely(!parent->ops->mmap)) return -EINVAL; diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index dcd7c2a99618..324c52e3a1a4 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1142,6 +1142,10 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) return ret; vdev->barmap[index] = pci_iomap(pdev, index, 0); + if (!vdev->barmap[index]) { + pci_release_selected_regions(pdev, 1 << index); + return -ENOMEM; + } } vma->vm_private_data = vdev; diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 5ffd1d9ad4bd..357243d76f10 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -193,7 +193,10 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, if (!vdev->has_vga) return -EINVAL; - switch (pos) { + if (pos > 0xbfffful) + return -EINVAL; + + switch ((u32)pos) { case 0xa0000 ... 0xbffff: count = min(count, (size_t)(0xc0000 - pos)); iomem = ioremap_nocache(0xa0000, 0xbffff - 0xa0000 + 1); diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index f3726ba12aa6..9266271a787a 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -268,28 +268,38 @@ static void vfio_lock_acct(struct task_struct *task, long npage) { struct vwork *vwork; struct mm_struct *mm; + bool is_current; if (!npage) return; - mm = get_task_mm(task); + is_current = (task->mm == current->mm); + + mm = is_current ? task->mm : get_task_mm(task); if (!mm) - return; /* process exited or nothing to do */ + return; /* process exited */ if (down_write_trylock(&mm->mmap_sem)) { mm->locked_vm += npage; up_write(&mm->mmap_sem); - mmput(mm); + if (!is_current) + mmput(mm); return; } + if (is_current) { + mm = get_task_mm(task); + if (!mm) + return; + } + /* * Couldn't get mmap_sem lock, so must setup to update * mm->locked_vm later. If locked_vm were atomic, we * wouldn't need this silliness */ vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL); - if (!vwork) { + if (WARN_ON(!vwork)) { mmput(mm); return; } @@ -393,77 +403,71 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, long npage, unsigned long *pfn_base) { - unsigned long limit; - bool lock_cap = ns_capable(task_active_pid_ns(dma->task)->user_ns, - CAP_IPC_LOCK); - struct mm_struct *mm; - long ret, i = 0, lock_acct = 0; + unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + bool lock_cap = capable(CAP_IPC_LOCK); + long ret, pinned = 0, lock_acct = 0; bool rsvd; dma_addr_t iova = vaddr - dma->vaddr + dma->iova; - mm = get_task_mm(dma->task); - if (!mm) + /* This code path is only user initiated */ + if (!current->mm) return -ENODEV; - ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base); + ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, pfn_base); if (ret) - goto pin_pg_remote_exit; + return ret; + pinned++; rsvd = is_invalid_reserved_pfn(*pfn_base); - limit = task_rlimit(dma->task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; /* * Reserved pages aren't counted against the user, externally pinned * pages are already counted against the user. */ if (!rsvd && !vfio_find_vpfn(dma, iova)) { - if (!lock_cap && mm->locked_vm + 1 > limit) { + if (!lock_cap && current->mm->locked_vm + 1 > limit) { put_pfn(*pfn_base, dma->prot); pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, limit << PAGE_SHIFT); - ret = -ENOMEM; - goto pin_pg_remote_exit; + return -ENOMEM; } lock_acct++; } - i++; - if (likely(!disable_hugepages)) { - /* Lock all the consecutive pages from pfn_base */ - for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; i < npage; - i++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) { - unsigned long pfn = 0; + if (unlikely(disable_hugepages)) + goto out; - ret = vaddr_get_pfn(mm, vaddr, dma->prot, &pfn); - if (ret) - break; + /* Lock all the consecutive pages from pfn_base */ + for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage; + pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) { + unsigned long pfn = 0; - if (pfn != *pfn_base + i || - rsvd != is_invalid_reserved_pfn(pfn)) { + ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, &pfn); + if (ret) + break; + + if (pfn != *pfn_base + pinned || + rsvd != is_invalid_reserved_pfn(pfn)) { + put_pfn(pfn, dma->prot); + break; + } + + if (!rsvd && !vfio_find_vpfn(dma, iova)) { + if (!lock_cap && + current->mm->locked_vm + lock_acct + 1 > limit) { put_pfn(pfn, dma->prot); + pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", + __func__, limit << PAGE_SHIFT); break; } - - if (!rsvd && !vfio_find_vpfn(dma, iova)) { - if (!lock_cap && - mm->locked_vm + lock_acct + 1 > limit) { - put_pfn(pfn, dma->prot); - pr_warn("%s: RLIMIT_MEMLOCK (%ld) " - "exceeded\n", __func__, - limit << PAGE_SHIFT); - break; - } - lock_acct++; - } + lock_acct++; } } - vfio_lock_acct(dma->task, lock_acct); - ret = i; +out: + vfio_lock_acct(current, lock_acct); -pin_pg_remote_exit: - mmput(mm); - return ret; + return pinned; } static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, @@ -473,10 +477,10 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, long unlocked = 0, locked = 0; long i; - for (i = 0; i < npage; i++) { + for (i = 0; i < npage; i++, iova += PAGE_SIZE) { if (put_pfn(pfn++, dma->prot)) { unlocked++; - if (vfio_find_vpfn(dma, iova + (i << PAGE_SHIFT))) + if (vfio_find_vpfn(dma, iova)) locked++; } } diff --git a/include/linux/mdev.h b/include/linux/mdev.h index ec819e9a115a..b6e048e1045f 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -13,34 +13,10 @@ #ifndef MDEV_H #define MDEV_H -/* Parent device */ -struct parent_device { - struct device *dev; - const struct parent_ops *ops; - - /* internal */ - struct kref ref; - struct mutex lock; - struct list_head next; - struct kset *mdev_types_kset; - struct list_head type_list; -}; - -/* Mediated device */ -struct mdev_device { - struct device dev; - struct parent_device *parent; - uuid_le uuid; - void *driver_data; - - /* internal */ - struct kref ref; - struct list_head next; - struct kobject *type_kobj; -}; +struct mdev_device; /** - * struct parent_ops - Structure to be registered for each parent device to + * struct mdev_parent_ops - Structure to be registered for each parent device to * register the device to mdev module. * * @owner: The module owner. @@ -86,10 +62,9 @@ struct mdev_device { * @mdev: mediated device structure * @vma: vma structure * Parent device that support mediated device should be registered with mdev - * module with parent_ops structure. + * module with mdev_parent_ops structure. **/ - -struct parent_ops { +struct mdev_parent_ops { struct module *owner; const struct attribute_group **dev_attr_groups; const struct attribute_group **mdev_attr_groups; @@ -103,7 +78,7 @@ struct parent_ops { size_t count, loff_t *ppos); ssize_t (*write)(struct mdev_device *mdev, const char __user *buf, size_t count, loff_t *ppos); - ssize_t (*ioctl)(struct mdev_device *mdev, unsigned int cmd, + long (*ioctl)(struct mdev_device *mdev, unsigned int cmd, unsigned long arg); int (*mmap)(struct mdev_device *mdev, struct vm_area_struct *vma); }; @@ -142,27 +117,22 @@ struct mdev_driver { }; #define to_mdev_driver(drv) container_of(drv, struct mdev_driver, driver) -#define to_mdev_device(dev) container_of(dev, struct mdev_device, dev) -static inline void *mdev_get_drvdata(struct mdev_device *mdev) -{ - return mdev->driver_data; -} - -static inline void mdev_set_drvdata(struct mdev_device *mdev, void *data) -{ - mdev->driver_data = data; -} +extern void *mdev_get_drvdata(struct mdev_device *mdev); +extern void mdev_set_drvdata(struct mdev_device *mdev, void *data); +extern uuid_le mdev_uuid(struct mdev_device *mdev); extern struct bus_type mdev_bus_type; -#define dev_is_mdev(d) ((d)->bus == &mdev_bus_type) - extern int mdev_register_device(struct device *dev, - const struct parent_ops *ops); + const struct mdev_parent_ops *ops); extern void mdev_unregister_device(struct device *dev); extern int mdev_register_driver(struct mdev_driver *drv, struct module *owner); extern void mdev_unregister_driver(struct mdev_driver *drv); +extern struct device *mdev_parent_dev(struct mdev_device *mdev); +extern struct device *mdev_dev(struct mdev_device *mdev); +extern struct mdev_device *mdev_from_dev(struct device *dev); + #endif /* MDEV_H */ diff --git a/samples/Kconfig b/samples/Kconfig index a6d2a43bbf2e..b124f62ed6cb 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -105,4 +105,11 @@ config SAMPLE_BLACKFIN_GPTIMERS help Build samples of blackfin gptimers sample module. +config SAMPLE_VFIO_MDEV_MTTY + tristate "Build VFIO mtty example mediated device sample code -- loadable modules only" + depends on VFIO_MDEV_DEVICE && m + help + Build a virtual tty sample driver for use as a VFIO + mediated device + endif # SAMPLES diff --git a/samples/Makefile b/samples/Makefile index e17d66d77f09..86a137e451d9 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -2,4 +2,5 @@ obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \ hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \ - configfs/ connector/ v4l/ trace_printk/ blackfin/ + configfs/ connector/ v4l/ trace_printk/ blackfin/ \ + vfio-mdev/ diff --git a/samples/vfio-mdev/Makefile b/samples/vfio-mdev/Makefile index a932edbe38eb..cbbd868a50a8 100644 --- a/samples/vfio-mdev/Makefile +++ b/samples/vfio-mdev/Makefile @@ -1,13 +1 @@ -# -# Makefile for mtty.c file -# -KERNEL_DIR:=/lib/modules/$(shell uname -r)/build - -obj-m:=mtty.o - -modules clean modules_install: - $(MAKE) -C $(KERNEL_DIR) SUBDIRS=$(PWD) $@ - -default: modules - -module: modules +obj-$(CONFIG_SAMPLE_VFIO_MDEV_MTTY) += mtty.o diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index 6b633a4ea333..1fc57a5093a7 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -164,7 +164,7 @@ static struct mdev_state *find_mdev_state_by_uuid(uuid_le uuid) struct mdev_state *mds; list_for_each_entry(mds, &mdev_devices_list, next) { - if (uuid_le_cmp(mds->mdev->uuid, uuid) == 0) + if (uuid_le_cmp(mdev_uuid(mds->mdev), uuid) == 0) return mds; } @@ -341,7 +341,8 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, pr_err("Serial port %d: Fifo level trigger\n", index); #endif - mtty_trigger_interrupt(mdev_state->mdev->uuid); + mtty_trigger_interrupt( + mdev_uuid(mdev_state->mdev)); } } else { #if defined(DEBUG_INTR) @@ -355,7 +356,8 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, */ if (mdev_state->s[index].uart_reg[UART_IER] & UART_IER_RLSI) - mtty_trigger_interrupt(mdev_state->mdev->uuid); + mtty_trigger_interrupt( + mdev_uuid(mdev_state->mdev)); } mutex_unlock(&mdev_state->rxtx_lock); break; @@ -374,7 +376,8 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, pr_err("Serial port %d: IER_THRI write\n", index); #endif - mtty_trigger_interrupt(mdev_state->mdev->uuid); + mtty_trigger_interrupt( + mdev_uuid(mdev_state->mdev)); } mutex_unlock(&mdev_state->rxtx_lock); @@ -445,7 +448,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, #if defined(DEBUG_INTR) pr_err("Serial port %d: MCR_OUT2 write\n", index); #endif - mtty_trigger_interrupt(mdev_state->mdev->uuid); + mtty_trigger_interrupt(mdev_uuid(mdev_state->mdev)); } if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_MSI) && @@ -453,7 +456,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, #if defined(DEBUG_INTR) pr_err("Serial port %d: MCR RTS/DTR write\n", index); #endif - mtty_trigger_interrupt(mdev_state->mdev->uuid); + mtty_trigger_interrupt(mdev_uuid(mdev_state->mdev)); } break; @@ -504,7 +507,8 @@ static void handle_bar_read(unsigned int index, struct mdev_state *mdev_state, #endif if (mdev_state->s[index].uart_reg[UART_IER] & UART_IER_THRI) - mtty_trigger_interrupt(mdev_state->mdev->uuid); + mtty_trigger_interrupt( + mdev_uuid(mdev_state->mdev)); } mutex_unlock(&mdev_state->rxtx_lock); @@ -734,7 +738,7 @@ int mtty_create(struct kobject *kobj, struct mdev_device *mdev) for (i = 0; i < 2; i++) { snprintf(name, MTTY_STRING_LEN, "%s-%d", - dev_driver_string(mdev->parent->dev), i + 1); + dev_driver_string(mdev_parent_dev(mdev)), i + 1); if (!strcmp(kobj->name, name)) { nr_ports = i + 1; break; @@ -1298,10 +1302,8 @@ static ssize_t sample_mdev_dev_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mdev_device *mdev = to_mdev_device(dev); - - if (mdev) - return sprintf(buf, "This is MDEV %s\n", dev_name(&mdev->dev)); + if (mdev_from_dev(dev)) + return sprintf(buf, "This is MDEV %s\n", dev_name(dev)); return sprintf(buf, "\n"); } @@ -1402,7 +1404,7 @@ struct attribute_group *mdev_type_groups[] = { NULL, }; -struct parent_ops mdev_fops = { +struct mdev_parent_ops mdev_fops = { .owner = THIS_MODULE, .dev_attr_groups = mtty_dev_groups, .mdev_attr_groups = mdev_dev_groups, @@ -1447,6 +1449,7 @@ static int __init mtty_dev_init(void) if (IS_ERR(mtty_dev.vd_class)) { pr_err("Error: failed to register mtty_dev class\n"); + ret = PTR_ERR(mtty_dev.vd_class); goto failed1; } @@ -1458,7 +1461,8 @@ static int __init mtty_dev_init(void) if (ret) goto failed2; - if (mdev_register_device(&mtty_dev.dev, &mdev_fops) != 0) + ret = mdev_register_device(&mtty_dev.dev, &mdev_fops); + if (ret) goto failed3; mutex_init(&mdev_list_lock);