[Intel-gfx] [PATCH 27/66] drm/i915/gem: Pull execbuf dma resv under a single critical section

Mon Jul 27 18:08:39 UTC 2020

On 7/15/20 1:51 PM, Chris Wilson wrote:
> Acquire all the objects and their backing storage, and page directories,
> as used by execbuf under a single common ww_mutex. Albeit we have to
> restart the critical section a few times in order to handle various
> restrictions (such as avoiding copy_(from|to)_user and mmap_sem).
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 168 +++++++++---------
>   .../i915/gem/selftests/i915_gem_execbuffer.c  |   8 +-
>   2 files changed, 87 insertions(+), 89 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index ebabc0746d50..db433f3f18ec 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -20,6 +20,7 @@
>   #include "gt/intel_gt_pm.h"
>   #include "gt/intel_gt_requests.h"
>   #include "gt/intel_ring.h"
> +#include "mm/i915_acquire_ctx.h"
>   
>   #include "i915_drv.h"
>   #include "i915_gem_clflush.h"
> @@ -244,6 +245,8 @@ struct i915_execbuffer {
>   	struct intel_context *context; /* logical state for the request */
>   	struct i915_gem_context *gem_context; /** caller's context */
>   
> +	struct i915_acquire_ctx acquire; /** lock for _all_ DMA reservations */
> +
>   	struct i915_request *request; /** our request to build */
>   	struct eb_vma *batch; /** identity of the batch obj/vma */
>   
> @@ -389,42 +392,6 @@ static void eb_vma_array_put(struct eb_vma_array *arr)
>   	kref_put(&arr->kref, eb_vma_array_destroy);
>   }
>   
> -static int
> -eb_lock_vma(struct i915_execbuffer *eb, struct ww_acquire_ctx *acquire)
> -{
> -	struct eb_vma *ev;
> -	int err = 0;
> -
> -	list_for_each_entry(ev, &eb->submit_list, submit_link) {
> -		struct i915_vma *vma = ev->vma;
> -
> -		err = ww_mutex_lock_interruptible(&vma->resv->lock, acquire);
> -		if (err == -EDEADLK) {
> -			struct eb_vma *unlock = ev, *en;
> -
> -			list_for_each_entry_safe_continue_reverse(unlock, en,
> -								  &eb->submit_list,
> -								  submit_link) {
> -				ww_mutex_unlock(&unlock->vma->resv->lock);
> -				list_move_tail(&unlock->submit_link, &eb->submit_list);
> -			}
> -
> -			GEM_BUG_ON(!list_is_first(&ev->submit_link, &eb->submit_list));
> -			err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
> -							       acquire);
> -		}
> -		if (err) {
> -			list_for_each_entry_continue_reverse(ev,
> -							     &eb->submit_list,
> -							     submit_link)
> -				ww_mutex_unlock(&ev->vma->resv->lock);
> -			break;
> -		}
> -	}
> -
> -	return err;
> -}
> -
>   static int eb_create(struct i915_execbuffer *eb)
>   {
>   	/* Allocate an extra slot for use by the sentinel */
> @@ -668,6 +635,25 @@ eb_add_vma(struct i915_execbuffer *eb,
>   	}
>   }
>   
> +static int eb_lock_mm(struct i915_execbuffer *eb)
> +{
> +	struct eb_vma *ev;
> +	int err;
> +
> +	list_for_each_entry(ev, &eb->bind_list, bind_link) {
> +		err = i915_acquire_ctx_lock(&eb->acquire, ev->vma->obj);
> +		if (err)
> +			return err;
> +	}
> +
> +	return 0;
> +}
> +
> +static int eb_acquire_mm(struct i915_execbuffer *eb)
> +{
> +	return i915_acquire_mm(&eb->acquire);
> +}
> +
>   struct eb_vm_work {
>   	struct dma_fence_work base;
>   	struct eb_vma_array *array;
> @@ -1390,7 +1376,15 @@ static int eb_reserve_vm(struct i915_execbuffer *eb)
>   	unsigned long count;
>   	struct eb_vma *ev;
>   	unsigned int pass;
> -	int err = 0;
> +	int err;
> +
> +	err = eb_lock_mm(eb);
> +	if (err)
> +		return err;
> +
> +	err = eb_acquire_mm(eb);
> +	if (err)
> +		return err;
>   
>   	count = 0;
>   	INIT_LIST_HEAD(&unbound);
> @@ -1416,10 +1410,15 @@ static int eb_reserve_vm(struct i915_execbuffer *eb)
>   	if (count == 0)
>   		return 0;
>   
> +	/* We need to reserve page directories, release all, start over */
> +	i915_acquire_ctx_fini(&eb->acquire);
> +
>   	pass = 0;
>   	do {
>   		struct eb_vm_work *work;
>   
> +		i915_acquire_ctx_init(&eb->acquire);

Couldn't we do a i915_acquire_ctx_rollback() here to avoid losing our 
ticket? That would mean deferring i915_acquire_ctx_done() until all 
potential rollbacks have been performed.

Or even better if we defer _ctx_done(), couldn't we just continue 
locking the pts here instead of dropping and re-acquiring everything?

> +
>   		/*
>   		 * We need to hold one lock as we bind all the vma so that
>   		 * we have a consistent view of the entire vm and can plan
> @@ -1436,6 +1435,11 @@ static int eb_reserve_vm(struct i915_execbuffer *eb)
>   		 * beneath it, so we have to stage and preallocate all the
>   		 * resources we may require before taking the mutex.
>   		 */
> +
> +		err = eb_lock_mm(eb);
> +		if (err)
> +			return err;