[Intel-gfx] [PATCH 1/3] drm/i915/userptr: Flush cancellations before mmu-notifier invalidate returns

Tue Apr 5 12:05:05 UTC 2016

On 03/04/16 18:14, Chris Wilson wrote:
> In order to ensure that all invalidations are completed before the
> operation returns to userspace (i.e. before the mmap() syscall returns)
> we need to flush the outstanding operations. To this end, we submit all
> the per-object invalidation work to a private workqueue and then flush
> the workqueue at the end of the function. We are allowed to block inside
> invalidate_range_start, and struct_mutex is the inner lock so the worker
> is not hiding any lock inversions. The advantage of using the workqueue
> over direct calling of cancel_userptr() is that it allows us to
> parallelise the potential waits and unpinning of large objects.

There was no direct calling, but running it from a shared wq which is 
now private, per task->mm.

But it sounds so obvious now that the mmu notifier needs to wait for 
cancel_userptr workers to complete that I wonder how we missed that 
until now.

I would also spell out the two new bits explicitly in the commit: 
waiting for workers and waiting on rendering.

With some more description for the latter - why it is needed? 
i915_vma_unbind would be doing a flavour of waiting already.

Otherwise I did not find any issues with the code itself, just these 
high level questions.

Regards,

Tvrtko

> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94699
> Testcase: igt/gem_userptr_blits/sync-unmap-cycles
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Michał Winiarski <michal.winiarski at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_userptr.c | 48 ++++++++++++++++++++++++++++++++-
>   1 file changed, 47 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index 291a9393493d..92b39186b05a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -49,6 +49,7 @@ struct i915_mmu_notifier {
>   	struct hlist_node node;
>   	struct mmu_notifier mn;
>   	struct rb_root objects;
> +	struct workqueue_struct *wq;
>   };
>
>   struct i915_mmu_object {
> @@ -60,6 +61,40 @@ struct i915_mmu_object {
>   	bool attached;
>   };
>
> +static void wait_rendering(struct drm_i915_gem_object *obj)
> +{
> +	struct drm_device *dev = obj->base.dev;
> +	struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
> +	unsigned reset_counter;
> +	int i, n;
> +
> +	if (!obj->active)
> +		return;
> +
> +	n = 0;
> +	for (i = 0; i < I915_NUM_ENGINES; i++) {
> +		struct drm_i915_gem_request *req;
> +
> +		req = obj->last_read_req[i];
> +		if (req == NULL)
> +			continue;
> +
> +		requests[n++] = i915_gem_request_reference(req);
> +	}
> +
> +	reset_counter = atomic_read(&to_i915(dev)->gpu_error.reset_counter);
> +	mutex_unlock(&dev->struct_mutex);
> +
> +	for (i = 0; i < n; i++)
> +		__i915_wait_request(requests[i], reset_counter, false,
> +				    NULL, NULL);
> +
> +	mutex_lock(&dev->struct_mutex);
> +
> +	for (i = 0; i < n; i++)
> +		i915_gem_request_unreference(requests[i]);
> +}
> +
>   static void cancel_userptr(struct work_struct *work)
>   {
>   	struct i915_mmu_object *mo = container_of(work, typeof(*mo), work);
> @@ -75,6 +110,8 @@ static void cancel_userptr(struct work_struct *work)
>   		struct i915_vma *vma, *tmp;
>   		bool was_interruptible;
>
> +		wait_rendering(obj);
> +
>   		was_interruptible = dev_priv->mm.interruptible;
>   		dev_priv->mm.interruptible = false;
>
> @@ -140,7 +177,7 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>   		 */
>   		mo = container_of(it, struct i915_mmu_object, it);
>   		if (kref_get_unless_zero(&mo->obj->base.refcount))
> -			schedule_work(&mo->work);
> +			queue_work(mn->wq, &mo->work);
>
>   		list_add(&mo->link, &cancelled);
>   		it = interval_tree_iter_next(it, start, end);
> @@ -148,6 +185,8 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>   	list_for_each_entry(mo, &cancelled, link)
>   		del_object(mo);
>   	spin_unlock(&mn->lock);
> +
> +	flush_workqueue(mn->wq);
>   }
>
>   static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
> @@ -167,10 +206,16 @@ i915_mmu_notifier_create(struct mm_struct *mm)
>   	spin_lock_init(&mn->lock);
>   	mn->mn.ops = &i915_gem_userptr_notifier;
>   	mn->objects = RB_ROOT;
> +	mn->wq = alloc_workqueue("i915-userptr-release", WQ_UNBOUND, 0);
> +	if (mn->wq == NULL) {
> +		kfree(mn);
> +		return ERR_PTR(-ENOMEM);
> +	}
>
>   	 /* Protected by mmap_sem (write-lock) */
>   	ret = __mmu_notifier_register(&mn->mn, mm);
>   	if (ret) {
> +		destroy_workqueue(mn->wq);
>   		kfree(mn);
>   		return ERR_PTR(ret);
>   	}
> @@ -256,6 +301,7 @@ i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
>   		return;
>
>   	mmu_notifier_unregister(&mn->mn, mm);
> +	destroy_workqueue(mn->wq);
>   	kfree(mn);
>   }
>
>