[PATCH 25/27] drm/i915: Add accelerated migration to ttm

Fri Oct 8 09:22:53 UTC 2021

Expose the fence to ttm_bo->moving, which will get picked up by i915
through the i915_gem_object_get_moving_fence call. Should be sufficient
for the needs we have.

This only converts i915_ttm_move(), which is the move callback used by
TTM. We could do the same for suspend/resume, and speed that up slightly
by adding the request to resv->excl and bo->moving, and not waiting for
completion.

However, this would need a wait for completion inside the suspend path,
so I think this conversion would need to be done separately.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 129 ++++++++++++++----------
 1 file changed, 73 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 74a1ffd0d7dd..9069c3a3b1a2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -365,8 +365,12 @@ static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
 
 static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo)
 {
-	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+	struct drm_i915_gem_object *obj;
+
+	if (bo->destroy != i915_ttm_bo_destroy)
+		return;
 
+	obj = i915_ttm_to_gem(bo);
 	if (likely(obj)) {
 		__i915_gem_object_pages_fini(obj);
 		i915_ttm_free_cached_io_st(obj);
@@ -439,11 +443,11 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
 	return intel_region_ttm_resource_to_st(obj->mm.region, res);
 }
 
-static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
-			       bool clear,
-			       struct ttm_resource *dst_mem,
-			       struct ttm_tt *dst_ttm,
-			       struct sg_table *dst_st)
+static struct i915_request *i915_ttm_accel_move(struct ttm_buffer_object *bo,
+						bool clear,
+						struct ttm_resource *dst_mem,
+						struct ttm_tt *dst_ttm,
+						struct sg_table *dst_st)
 {
 	struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
 						     bdev);
@@ -451,18 +455,18 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
 		ttm_manager_type(bo->bdev, bo->resource->mem_type);
 	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
 	struct sg_table *src_st;
-	struct i915_request *rq;
+	struct i915_request *rq = NULL;
 	struct ttm_tt *src_ttm = bo->ttm;
 	enum i915_cache_level src_level, dst_level;
 	int ret;
 
 	if (!i915->gt.migrate.context || intel_gt_is_wedged(&i915->gt))
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 
 	dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm);
 	if (clear) {
 		if (bo->type == ttm_bo_type_kernel)
-			return -EINVAL;
+			return ERR_PTR(-EINVAL);
 
 		intel_engine_pm_get(i915->gt.migrate.context->engine);
 		ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL,
@@ -470,10 +474,6 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
 						  gpu_binds_iomem(dst_mem),
 						  0, &rq);
 
-		if (!ret && rq) {
-			i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
-			i915_request_put(rq);
-		}
 		intel_engine_pm_put(i915->gt.migrate.context->engine);
 	} else {
 		src_st = src_man->use_tt ? i915_ttm_tt_get_st(src_ttm) :
@@ -487,52 +487,41 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
 						 dst_st->sgl, dst_level,
 						 gpu_binds_iomem(dst_mem),
 						 &rq);
-		if (!ret && rq) {
-			i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
-			i915_request_put(rq);
-		}
 		intel_engine_pm_put(i915->gt.migrate.context->engine);
 	}
 
-	return ret;
+	return ret ? ERR_PTR(ret) : rq;
 }
 
-static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
+static void i915_ttm_memcpy(struct ttm_buffer_object *bo, bool clear,
 			    struct ttm_resource *dst_mem,
 			    struct ttm_tt *dst_ttm,
-			    struct sg_table *dst_st,
-			    bool allow_accel)
+			    struct sg_table *dst_st)
 {
-	int ret = -EINVAL;
-
-	if (allow_accel)
-		ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm, dst_st);
-	if (ret) {
-		struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
-		struct intel_memory_region *dst_reg, *src_reg;
-		union {
-			struct ttm_kmap_iter_tt tt;
-			struct ttm_kmap_iter_iomap io;
-		} _dst_iter, _src_iter;
-		struct ttm_kmap_iter *dst_iter, *src_iter;
-
-		dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
-		src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
-		GEM_BUG_ON(!dst_reg || !src_reg);
-
-		dst_iter = !cpu_maps_iomem(dst_mem) ?
-			ttm_kmap_iter_tt_init(&_dst_iter.tt, dst_ttm) :
-			ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap,
-						 dst_st, dst_reg->region.start);
-
-		src_iter = !cpu_maps_iomem(bo->resource) ?
-			ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) :
-			ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap,
-						 obj->ttm.cached_io_st,
-						 src_reg->region.start);
-
-		ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter);
-	}
+	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+	struct intel_memory_region *dst_reg, *src_reg;
+	union {
+		struct ttm_kmap_iter_tt tt;
+		struct ttm_kmap_iter_iomap io;
+	} _dst_iter, _src_iter;
+	struct ttm_kmap_iter *dst_iter, *src_iter;
+
+	dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
+	src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
+	GEM_BUG_ON(!dst_reg || !src_reg);
+
+	dst_iter = !cpu_maps_iomem(dst_mem) ?
+		ttm_kmap_iter_tt_init(&_dst_iter.tt, dst_ttm) :
+		ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap,
+					  dst_st, dst_reg->region.start);
+
+	src_iter = !cpu_maps_iomem(bo->resource) ?
+		ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) :
+		ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap,
+					  obj->ttm.cached_io_st,
+					  src_reg->region.start);
+
+	ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter);
 }
 
 static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
@@ -547,6 +536,7 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
 	struct sg_table *dst_st;
 	bool clear;
 	int ret;
+	struct i915_request *rq;
 
 	/* Sync for now. We could do the actual copy async. */
 	ret = ttm_bo_wait_ctx(bo, ctx);
@@ -575,10 +565,22 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
 		return PTR_ERR(dst_st);
 
 	clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm));
+
+	rq = NULL;
 	if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC)))
-		__i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_st, true);
+		rq = i915_ttm_accel_move(bo, clear, dst_mem, bo->ttm, dst_st);
+
+	if (IS_ERR(rq))
+		i915_ttm_memcpy(bo, clear, dst_mem, bo->ttm, dst_st);
+
+	/* Below dst_mem becomes bo->resource. */
+	if (!IS_ERR_OR_NULL(rq)) {
+		ttm_bo_move_accel_cleanup(bo, &rq->fence, evict, true, dst_mem);
+		i915_request_put(rq);
+	} else {
+		ttm_bo_move_sync_cleanup(bo, dst_mem);
+	}
 
-	ttm_bo_move_sync_cleanup(bo, dst_mem);
 	i915_ttm_adjust_domains_after_move(obj);
 	i915_ttm_free_cached_io_st(obj);
 
@@ -1024,6 +1026,7 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
 		.interruptible = intr,
 	};
 	struct sg_table *dst_st;
+	struct i915_request *rq = ERR_PTR(-ENODEV);
 	int ret;
 
 	assert_object_held(dst);
@@ -1041,8 +1044,22 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
 	dst_st = gpu_binds_iomem(dst_bo->resource) ?
 		dst->ttm.cached_io_st : i915_ttm_tt_get_st(dst_bo->ttm);
 
-	__i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo->ttm,
-			dst_st, allow_accel);
+	if (allow_accel) {
+		rq = i915_ttm_accel_move(src_bo, false, dst_bo->resource,
+					 dst_bo->ttm, dst_st);
+		if (!IS_ERR_OR_NULL(rq)) {
+			unsigned int wait_flags =
+				 intr ? I915_WAIT_INTERRUPTIBLE : 0;
 
-	return 0;
+			ret = i915_request_wait(rq, wait_flags,
+						MAX_SCHEDULE_TIMEOUT);
+			i915_request_put(rq);
+		}
+	}
+
+	if (IS_ERR(rq))
+		i915_ttm_memcpy(src_bo, false, dst_bo->resource,
+				dst_bo->ttm, dst_st);
+
+	return ret;
 }
-- 
2.33.0