[Intel-gfx] [RFC PATCH 138/162] drm/i915/dg1: Eliminate eviction mutex

Fri Nov 27 12:06:54 UTC 2020

From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

We can eliminate the current evict window mutex, held over the whole
eviction process, and replace it with a wait queue which takes over the
role of co-ordinating access to pre-configured window copy vmas.

Apart from the global lock not being held over whole of the copy,
additional benefit is that, since we have two pairs of copy windows, two
evict operations can now progress independently. (One swap-in plus one
swap-out.)

Also consolidate some of the eviction code into helper functions for
readability and fix cleanup if emit_init_breadcrumb fails.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c | 144 ++++++++++++---------
 drivers/gpu/drm/i915/i915_drv.h            |   2 +-
 2 files changed, 85 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 3943a184fbe3..34bbefa6d67f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -856,7 +856,8 @@ int i915_setup_blt_windows(struct drm_i915_private *i915)
 		return 0;
 	}
 
-	mutex_init(&i915->mm.window_mutex);
+	init_waitqueue_head(&i915->mm.window_queue);
+
 	for (i = 0; i < ARRAY_SIZE(lmem); i++) {
 		lmem[i] = i915_window_vma_init(i915, lmem_region);
 		if (IS_ERR_OR_NULL(lmem[i])) {
@@ -904,7 +905,6 @@ void i915_teardown_blt_windows(struct drm_i915_private *i915)
 			continue;
 		i915_window_vma_teardown(i915->mm.smem_window[i]);
 	}
-	mutex_destroy(&i915->mm.window_mutex);
 }
 
 static int i915_window_blt_copy_prepare_obj(struct drm_i915_gem_object *obj)
@@ -950,6 +950,36 @@ i915_window_blt_copy_batch_prepare(struct i915_request *rq,
 	return 0;
 }
 
+static void prepare_vma(struct i915_vma *vma,
+			struct drm_i915_gem_object *obj,
+			u32 offset,
+			u32 chunk,
+			bool is_lmem)
+{
+	struct scatterlist *sgl;
+	u32 size;
+
+	/*
+	 * Source obj size could be smaller than the dst obj size,
+	 * due to the varying min_page_size of the mem regions the
+	 * obj belongs to. But when we insert the pages into vm,
+	 * the total size of the pages supposed to be multiples of
+	 * the min page size of that mem region.
+	 */
+	size = ALIGN(chunk, obj->mm.region->min_page_size) >> PAGE_SHIFT;
+	intel_partial_pages_for_sg_table(obj, vma->pages, offset, size, &sgl);
+
+	/*
+	 * Insert pages into vm, expects the pages to the full
+	 * length of VMA. But we may have the pages of <= vma_size.
+	 * Hence altering the vma size to match the total size of
+	 * the pages attached.
+	 */
+	vma->size = size << PAGE_SHIFT;
+	i915_insert_vma_pages(vma, is_lmem);
+	sg_unmark_end(sgl);
+}
+
 int i915_window_blt_copy(struct drm_i915_gem_object *dst,
 			 struct drm_i915_gem_object *src)
 {
@@ -957,24 +987,10 @@ int i915_window_blt_copy(struct drm_i915_gem_object *dst,
 	struct intel_context *ce = i915->gt.engine[BCS0]->blitter_context;
 	bool src_is_lmem = i915_gem_object_is_lmem(src);
 	bool dst_is_lmem = i915_gem_object_is_lmem(dst);
-	struct scatterlist *last_sgl;
-	struct i915_vma *src_vma, *dst_vma;
-	struct i915_request *rq;
-	u64 cur_win_sz, blt_copied, offset;
-	long timeout;
-	u32 size;
+	u64 remain = src->base.size, offset = 0;
+	struct i915_vma *src_vma, *dst_vma, **ps, **pd;
 	int err;
 
-	src_vma = src_is_lmem ? i915->mm.lmem_window[0] :
-				i915->mm.smem_window[0];
-	dst_vma = dst_is_lmem ? i915->mm.lmem_window[1] :
-				i915->mm.smem_window[1];
-
-	if (!src_vma || !dst_vma)
-		return -ENODEV;
-
-	blt_copied = 0;
-
 	err = i915_window_blt_copy_prepare_obj(src);
 	if (err)
 		return err;
@@ -985,43 +1001,42 @@ int i915_window_blt_copy(struct drm_i915_gem_object *dst,
 		return err;
 	}
 
-	mutex_lock(&i915->mm.window_mutex);
+	ps = src_is_lmem ? &i915->mm.lmem_window[0] :
+			   &i915->mm.smem_window[0];
+	pd = dst_is_lmem ? &i915->mm.lmem_window[1] :
+			   &i915->mm.smem_window[1];
+
+	spin_lock(&i915->mm.window_queue.lock);
+
+	err = wait_event_interruptible_locked(i915->mm.window_queue,
+					      *ps && *pd);
+	if (err) {
+		spin_unlock(&i915->mm.window_queue.lock);
+		i915_gem_object_unpin_pages(src);
+		i915_gem_object_unpin_pages(dst);
+		return err;
+	}
+
+	src_vma = *ps;
+	dst_vma = *pd;
+
 	src_vma->obj = src;
 	dst_vma->obj = dst;
-	do {
-		cur_win_sz = min_t(u64, BLT_WINDOW_SZ,
-				   (src->base.size - blt_copied));
-		offset = blt_copied >> PAGE_SHIFT;
-		size = ALIGN(cur_win_sz, src->mm.region->min_page_size) >>
-		       PAGE_SHIFT;
-		intel_partial_pages_for_sg_table(src, src_vma->pages, offset,
-						 size, &last_sgl);
 
-		/*
-		 * Insert pages into vm, expects the pages to the full
-		 * length of VMA. But we may have the pages of <= vma_size.
-		 * Hence altering the vma size to match the total size of
-		 * the pages attached.
-		 */
-		src_vma->size = size << PAGE_SHIFT;
-		i915_insert_vma_pages(src_vma, src_is_lmem);
-		sg_unmark_end(last_sgl);
+	*ps = NULL;
+	*pd = NULL;
 
-		/*
-		 * Source obj size could be smaller than the dst obj size,
-		 * due to the varying min_page_size of the mem regions the
-		 * obj belongs to. But when we insert the pages into vm,
-		 * the total size of the pages supposed to be multiples of
-		 * the min page size of that mem region.
-		 */
-		size = ALIGN(cur_win_sz, dst->mm.region->min_page_size) >>
-		       PAGE_SHIFT;
-		intel_partial_pages_for_sg_table(dst, dst_vma->pages, offset,
-						 size, &last_sgl);
+	spin_unlock(&i915->mm.window_queue.lock);
+
+	do {
+		struct i915_request *rq;
+		long timeout;
+		u32 chunk;
 
-		dst_vma->size = size << PAGE_SHIFT;
-		i915_insert_vma_pages(dst_vma, dst_is_lmem);
-		sg_unmark_end(last_sgl);
+		chunk = min_t(u64, BLT_WINDOW_SZ, remain);
+
+		prepare_vma(src_vma, src, offset, chunk, src_is_lmem);
+		prepare_vma(dst_vma, dst, offset, chunk, dst_is_lmem);
 
 		rq = i915_request_create(ce);
 		if (IS_ERR(rq)) {
@@ -1032,11 +1047,14 @@ int i915_window_blt_copy(struct drm_i915_gem_object *dst,
 			err = rq->engine->emit_init_breadcrumb(rq);
 			if (unlikely(err)) {
 				DRM_ERROR("init_breadcrumb failed. %d\n", err);
+				i915_request_set_error_once(rq, err);
+				__i915_request_skip(rq);
+				i915_request_add(rq);
 				break;
 			}
 		}
 		err = i915_window_blt_copy_batch_prepare(rq, src_vma, dst_vma,
-							 cur_win_sz);
+							 chunk);
 		if (err) {
 			DRM_ERROR("Batch preparation failed. %d\n", err);
 			i915_request_set_error_once(rq, -EIO);
@@ -1045,26 +1063,32 @@ int i915_window_blt_copy(struct drm_i915_gem_object *dst,
 		i915_request_get(rq);
 		i915_request_add(rq);
 
-		timeout = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
-		if (timeout < 0) {
+		if (!err)
+			timeout = i915_request_wait(rq, 0,
+						    MAX_SCHEDULE_TIMEOUT);
+		i915_request_put(rq);
+		if (!err && timeout < 0) {
 			DRM_ERROR("BLT Request is not completed. %ld\n",
 				  timeout);
 			err = timeout;
-			i915_request_put(rq);
 			break;
 		}
 
-		blt_copied += cur_win_sz;
-		err = 0;
-		i915_request_put(rq);
-		flush_work(&i915->gt.engine[BCS0]->retire_work);
-	} while (src->base.size != blt_copied);
+		remain -= chunk;
+		offset += chunk >> PAGE_SHIFT;
+
+		flush_work(&ce->engine->retire_work);
+	} while (remain);
 
+	spin_lock(&i915->mm.window_queue.lock);
 	src_vma->size = BLT_WINDOW_SZ;
 	dst_vma->size = BLT_WINDOW_SZ;
 	src_vma->obj = NULL;
 	dst_vma->obj = NULL;
-	mutex_unlock(&i915->mm.window_mutex);
+	*ps = src_vma;
+	*pd = dst_vma;
+	wake_up_locked(&i915->mm.window_queue);
+	spin_unlock(&i915->mm.window_queue.lock);
 
 	dst->mm.dirty = true;
 	i915_gem_object_unpin_pages(src);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 07da059640a1..82f431cc38cd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -600,7 +600,7 @@ struct i915_gem_mm {
 	struct i915_vma *smem_window[2];
 
 	/* To protect above two set of vmas */
-	struct mutex window_mutex;
+	wait_queue_head_t window_queue;
 };
 
 #define I915_IDLE_ENGINES_TIMEOUT (200) /* in ms */
-- 
2.26.2