[PATCH v3 1/2] drm/i915/gvt: Fix possible recursive locking issue
Zhenyu Wang
zhenyuw at linux.intel.com
Mon Jun 26 08:29:47 UTC 2017
pushed two fixes, thanks!
On 2017.06.26 15:20:49 +0800, Chuanxiao Dong wrote:
> vfio_unpin_pages will hold a read semaphore however it is already hold
> in the same thread by vfio ioctl. It will cause below warning:
>
> [ 5102.127454] ============================================
> [ 5102.133379] WARNING: possible recursive locking detected
> [ 5102.139304] 4.12.0-rc4+ #3 Not tainted
> [ 5102.143483] --------------------------------------------
> [ 5102.149407] qemu-system-x86/1620 is trying to acquire lock:
> [ 5102.155624] (&container->group_lock){++++++}, at: [<ffffffff817768c6>] vfio_unpin_pages+0x96/0xf0
> [ 5102.165626]
> but task is already holding lock:
> [ 5102.172134] (&container->group_lock){++++++}, at: [<ffffffff8177728f>] vfio_fops_unl_ioctl+0x5f/0x280
> [ 5102.182522]
> other info that might help us debug this:
> [ 5102.189806] Possible unsafe locking scenario:
>
> [ 5102.196411] CPU0
> [ 5102.199136] ----
> [ 5102.201861] lock(&container->group_lock);
> [ 5102.206527] lock(&container->group_lock);
> [ 5102.211191]
> *** DEADLOCK ***
>
> [ 5102.217796] May be due to missing lock nesting notation
>
> [ 5102.225370] 3 locks held by qemu-system-x86/1620:
> [ 5102.230618] #0: (&container->group_lock){++++++}, at: [<ffffffff8177728f>] vfio_fops_unl_ioctl+0x5f/0x280
> [ 5102.241482] #1: (&(&iommu->notifier)->rwsem){++++..}, at: [<ffffffff810de775>] __blocking_notifier_call_chain+0x35/0x70
> [ 5102.253713] #2: (&vgpu->vdev.cache_lock){+.+...}, at: [<ffffffff8157b007>] intel_vgpu_iommu_notifier+0x77/0x120
> [ 5102.265163]
> stack backtrace:
> [ 5102.270022] CPU: 5 PID: 1620 Comm: qemu-system-x86 Not tainted 4.12.0-rc4+ #3
> [ 5102.277991] Hardware name: Intel Corporation S1200RP/S1200RP, BIOS S1200RP.86B.03.01.APER.061220151418 06/12/2015
> [ 5102.289445] Call Trace:
> [ 5102.292175] dump_stack+0x85/0xc7
> [ 5102.295871] validate_chain.isra.21+0x9da/0xaf0
> [ 5102.300925] __lock_acquire+0x405/0x820
> [ 5102.305202] lock_acquire+0xc7/0x220
> [ 5102.309191] ? vfio_unpin_pages+0x96/0xf0
> [ 5102.313666] down_read+0x2b/0x50
> [ 5102.317259] ? vfio_unpin_pages+0x96/0xf0
> [ 5102.321732] vfio_unpin_pages+0x96/0xf0
> [ 5102.326024] intel_vgpu_iommu_notifier+0xe5/0x120
> [ 5102.331283] notifier_call_chain+0x4a/0x70
> [ 5102.335851] __blocking_notifier_call_chain+0x4d/0x70
> [ 5102.341490] blocking_notifier_call_chain+0x16/0x20
> [ 5102.346935] vfio_iommu_type1_ioctl+0x87b/0x920
> [ 5102.351994] vfio_fops_unl_ioctl+0x81/0x280
> [ 5102.356660] ? __fget+0xf0/0x210
> [ 5102.360261] do_vfs_ioctl+0x93/0x6a0
> [ 5102.364247] ? __fget+0x111/0x210
> [ 5102.367942] SyS_ioctl+0x41/0x70
> [ 5102.371542] entry_SYSCALL_64_fastpath+0x1f/0xbe
>
> put the vfio_unpin_pages in a workqueue can fix this.
>
> v2:
> - use for style instead of do{}while(1). (Zhenyu)
> v3:
> - rename gvt_cache_mark to gvt_cache_mark_remove. (Zhenyu)
>
> Fixes: 659643f7d814 ("drm/i915/gvt/kvmgt: add vfio/mdev support to KVMGT")
> Signed-off-by: Chuanxiao Dong <chuanxiao.dong at intel.com>
> Cc: Zhenyu Wang <zhenyuw at linux.intel.com>
> Cc: stable at vger.kernel.org # v4.10+
> ---
> drivers/gpu/drm/i915/gvt/gvt.h | 3 +++
> drivers/gpu/drm/i915/gvt/kvmgt.c | 55 ++++++++++++++++++++++++++++++++--------
> 2 files changed, 48 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
> index 3a74e79..28d817e 100644
> --- a/drivers/gpu/drm/i915/gvt/gvt.h
> +++ b/drivers/gpu/drm/i915/gvt/gvt.h
> @@ -182,6 +182,9 @@ struct intel_vgpu {
> struct kvm *kvm;
> struct work_struct release_work;
> atomic_t released;
> + struct work_struct unpin_work;
> + spinlock_t unpin_lock; /* To protect unpin_list */
> + struct list_head unpin_list;
> } vdev;
> #endif
> };
> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
> index 1ae0b40..56a8d6a 100644
> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
> @@ -78,6 +78,7 @@ struct gvt_dma {
> struct rb_node node;
> gfn_t gfn;
> unsigned long iova;
> + struct list_head list;
> };
>
> static inline bool handle_valid(unsigned long handle)
> @@ -166,6 +167,7 @@ static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
>
> new->gfn = gfn;
> new->iova = iova;
> + INIT_LIST_HEAD(&new->list);
>
> mutex_lock(&vgpu->vdev.cache_lock);
> while (*link) {
> @@ -197,26 +199,52 @@ static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
> kfree(entry);
> }
>
> -static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn)
> +static void intel_vgpu_unpin_work(struct work_struct *work)
> {
> + struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu,
> + vdev.unpin_work);
> struct device *dev = mdev_dev(vgpu->vdev.mdev);
> struct gvt_dma *this;
> - unsigned long g1;
> - int rc;
> + unsigned long gfn;
> +
> + for (;;) {
> + spin_lock(&vgpu->vdev.unpin_lock);
> + if (list_empty(&vgpu->vdev.unpin_list)) {
> + spin_unlock(&vgpu->vdev.unpin_lock);
> + break;
> + }
> + this = list_first_entry(&vgpu->vdev.unpin_list,
> + struct gvt_dma, list);
> + list_del(&this->list);
> + spin_unlock(&vgpu->vdev.unpin_lock);
> +
> + gfn = this->gfn;
> + vfio_unpin_pages(dev, &gfn, 1);
> + kfree(this);
> + }
> +}
> +
> +static bool gvt_cache_mark_remove(struct intel_vgpu *vgpu, gfn_t gfn)
> +{
> + struct gvt_dma *this;
>
> mutex_lock(&vgpu->vdev.cache_lock);
> this = __gvt_cache_find(vgpu, gfn);
> if (!this) {
> mutex_unlock(&vgpu->vdev.cache_lock);
> - return;
> + return false;
> }
> -
> - g1 = gfn;
> gvt_dma_unmap_iova(vgpu, this->iova);
> - rc = vfio_unpin_pages(dev, &g1, 1);
> - WARN_ON(rc != 1);
> - __gvt_cache_remove_entry(vgpu, this);
> + /* remove this from rb tree */
> + rb_erase(&this->node, &vgpu->vdev.cache);
> mutex_unlock(&vgpu->vdev.cache_lock);
> +
> + /* put this to the unpin_list */
> + spin_lock(&vgpu->vdev.unpin_lock);
> + list_move_tail(&this->list, &vgpu->vdev.unpin_list);
> + spin_unlock(&vgpu->vdev.unpin_lock);
> +
> + return true;
> }
>
> static void gvt_cache_init(struct intel_vgpu *vgpu)
> @@ -453,6 +481,9 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
> }
>
> INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work);
> + INIT_WORK(&vgpu->vdev.unpin_work, intel_vgpu_unpin_work);
> + spin_lock_init(&vgpu->vdev.unpin_lock);
> + INIT_LIST_HEAD(&vgpu->vdev.unpin_list);
>
> vgpu->vdev.mdev = mdev;
> mdev_set_drvdata(mdev, vgpu);
> @@ -482,6 +513,7 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
> struct intel_vgpu *vgpu = container_of(nb,
> struct intel_vgpu,
> vdev.iommu_notifier);
> + bool sched_unmap = false;
>
> if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
> struct vfio_iommu_type1_dma_unmap *unmap = data;
> @@ -491,7 +523,10 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
> end_gfn = gfn + unmap->size / PAGE_SIZE;
>
> while (gfn < end_gfn)
> - gvt_cache_remove(vgpu, gfn++);
> + sched_unmap |= gvt_cache_mark_remove(vgpu, gfn++);
> +
> + if (sched_unmap)
> + schedule_work(&vgpu->vdev.unpin_work);
> }
>
> return NOTIFY_OK;
> --
> 2.7.4
>
> _______________________________________________
> intel-gvt-dev mailing list
> intel-gvt-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev
--
Open Source Technology Center, Intel ltd.
$gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 195 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/intel-gvt-dev/attachments/20170626/bd94519e/attachment.sig>
More information about the intel-gvt-dev
mailing list