[PATCH 3/3] drm/i915/ttm: Failsafe migration blits

Fri Oct 29 15:21:21 UTC 2021

If the initial fill blit or copy blit of an object fails, the old
content of the data might be exposed and read as soon as either CPU- or
GPU PTEs are set up to point at the pages.

Intercept the blit fence with an async callback that checks the
blit fence for errors and if there are errors performs an async cpu blit
instead. If there is a failure to allocate the async dma_fence_work,
allocate it on the stack and sync wait for the blit to complete.

Add selftests that simulate gpu blit failures and failure to allocate
the async dma_fence_work.

A previous version of this pach used dma_fence_work, now that's
opencoded which adds more code but might lower the latency
somewhat in the common non-error case.

Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 326 +++++++++++++++---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h  |  15 +
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  24 +-
 3 files changed, 311 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 4bc9f1a0fb2b..06695719bbf1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -18,6 +18,18 @@
 #include "gt/intel_gt.h"
 #include "gt/intel_migrate.h"
 
+I915_SELFTEST_DECLARE(static bool fail_gpu_migration;)
+I915_SELFTEST_DECLARE(static bool fail_work_allocation;)
+
+#ifdef CONFIG_DRM_I915_SELFTEST
+void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
+					bool work_allocation)
+{
+	fail_gpu_migration = gpu_migration;
+	fail_work_allocation = work_allocation;
+}
+#endif
+
 static enum i915_cache_level
 i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
 		     struct ttm_tt *ttm)
@@ -108,11 +120,11 @@ int i915_ttm_move_notify(struct ttm_buffer_object *bo)
 	return 0;
 }
 
-static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
-			       bool clear,
-			       struct ttm_resource *dst_mem,
-			       struct ttm_tt *dst_ttm,
-			       struct sg_table *dst_st)
+static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
+					     bool clear,
+					     struct ttm_resource *dst_mem,
+					     struct ttm_tt *dst_ttm,
+					     struct sg_table *dst_st)
 {
 	struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
 						     bdev);
@@ -123,30 +135,29 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
 	int ret;
 
 	if (!i915->gt.migrate.context || intel_gt_is_wedged(&i915->gt))
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
+
+	/* With fail_gpu_migration, we always perform a GPU clear. */
+	if (I915_SELFTEST_ONLY(fail_gpu_migration))
+		clear = true;
 
 	dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm);
 	if (clear) {
-		if (bo->type == ttm_bo_type_kernel)
-			return -EINVAL;
+		if (bo->type == ttm_bo_type_kernel &&
+		    !I915_SELFTEST_ONLY(fail_gpu_migration))
+			return ERR_PTR(-EINVAL);
 
 		intel_engine_pm_get(i915->gt.migrate.context->engine);
 		ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL,
 						  dst_st->sgl, dst_level,
 						  i915_ttm_gtt_binds_lmem(dst_mem),
 						  0, &rq);
-
-		if (!ret && rq) {
-			i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
-			i915_request_put(rq);
-		}
-		intel_engine_pm_put(i915->gt.migrate.context->engine);
 	} else {
 		struct i915_refct_sgt *src_rsgt =
 			i915_ttm_resource_get_st(obj, bo->resource);
 
 		if (IS_ERR(src_rsgt))
-			return PTR_ERR(src_rsgt);
+			return ERR_CAST(src_rsgt);
 
 		src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm);
 		intel_engine_pm_get(i915->gt.migrate.context->engine);
@@ -157,15 +168,216 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
 						 dst_st->sgl, dst_level,
 						 i915_ttm_gtt_binds_lmem(dst_mem),
 						 &rq);
+
 		i915_refct_sgt_put(src_rsgt);
-		if (!ret && rq) {
-			i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
-			i915_request_put(rq);
+	}
+
+	intel_engine_pm_put(i915->gt.migrate.context->engine);
+
+	if (ret && rq) {
+		i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
+		i915_request_put(rq);
+	}
+
+	return ret ? ERR_PTR(ret) : &rq->fence;
+}
+
+/**
+ * struct i915_ttm_memcpy_work - memcpy work item under a dma-fence
+ * @_dst_iter: Storage space for the destination kmap iterator.
+ * @_src_iter: Storage space for the source kmap iterator.
+ * @dst_iter: Pointer to the destination kmap iterator.
+ * @src_iter: Pointer to the source kmap iterator.
+ * @clear: Whether to clear instead of copy.
+ * @src_rsgt: Refcounted scatter-gather list of source memory.
+ * @dst_rsgt: Refcounted scatter-gather list of destination memory.
+ */
+struct i915_ttm_memcpy_arg {
+	union {
+		struct ttm_kmap_iter_tt tt;
+		struct ttm_kmap_iter_iomap io;
+	} _dst_iter,
+	_src_iter;
+	struct ttm_kmap_iter *dst_iter;
+	struct ttm_kmap_iter *src_iter;
+	unsigned long num_pages;
+	bool clear;
+	struct i915_refct_sgt *src_rsgt;
+	struct i915_refct_sgt *dst_rsgt;
+};
+
+struct i915_ttm_memcpy_work {
+	struct dma_fence fence;
+	struct work_struct work;
+	/* The fence lock */
+	spinlock_t lock;
+	struct irq_work irq_work;
+	struct dma_fence_cb cb;
+	struct i915_ttm_memcpy_arg arg;
+};
+
+static void i915_ttm_move_memcpy(struct i915_ttm_memcpy_arg *arg)
+{
+	ttm_move_memcpy(arg->clear, arg->num_pages,
+			arg->dst_iter, arg->src_iter);
+}
+
+static void i915_ttm_memcpy_init(struct i915_ttm_memcpy_arg *arg,
+				 struct ttm_buffer_object *bo, bool clear,
+				 struct ttm_resource *dst_mem,
+				 struct ttm_tt *dst_ttm,
+				 struct i915_refct_sgt *dst_rsgt)
+{
+	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+	struct intel_memory_region *dst_reg, *src_reg;
+
+	dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
+	src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
+	GEM_BUG_ON(!dst_reg || !src_reg);
+
+	arg->dst_iter = !i915_ttm_cpu_maps_iomem(dst_mem) ?
+		ttm_kmap_iter_tt_init(&arg->_dst_iter.tt, dst_ttm) :
+		ttm_kmap_iter_iomap_init(&arg->_dst_iter.io, &dst_reg->iomap,
+					 &dst_rsgt->table, dst_reg->region.start);
+
+	arg->src_iter = !i915_ttm_cpu_maps_iomem(bo->resource) ?
+		ttm_kmap_iter_tt_init(&arg->_src_iter.tt, bo->ttm) :
+		ttm_kmap_iter_iomap_init(&arg->_src_iter.io, &src_reg->iomap,
+					 &obj->ttm.cached_io_rsgt->table,
+					 src_reg->region.start);
+	arg->clear = clear;
+	arg->num_pages = bo->base.size >> PAGE_SHIFT;
+
+	arg->dst_rsgt = i915_refct_sgt_get(dst_rsgt);
+	arg->src_rsgt = clear ? NULL :
+		i915_ttm_resource_get_st(obj, bo->resource);
+}
+
+static void i915_ttm_memcpy_release(struct i915_ttm_memcpy_arg *arg)
+{
+	i915_refct_sgt_put(arg->src_rsgt);
+	i915_refct_sgt_put(arg->dst_rsgt);
+}
+
+static void __memcpy_work(struct work_struct *work)
+{
+	struct i915_ttm_memcpy_work *copy_work =
+		container_of(work, typeof(*copy_work), work);
+	struct i915_ttm_memcpy_arg *arg = &copy_work->arg;
+	bool cookie = dma_fence_begin_signalling();
+
+	i915_ttm_move_memcpy(arg);
+	dma_fence_end_signalling(cookie);
+
+	dma_fence_signal(&copy_work->fence);
+
+	i915_ttm_memcpy_release(arg);
+	dma_fence_put(&copy_work->fence);
+}
+
+static void __memcpy_irq_work(struct irq_work *irq_work)
+{
+	struct i915_ttm_memcpy_work *copy_work =
+		container_of(irq_work, typeof(*copy_work), irq_work);
+	struct i915_ttm_memcpy_arg *arg = &copy_work->arg;
+
+	dma_fence_signal(&copy_work->fence);
+	i915_ttm_memcpy_release(arg);
+	dma_fence_put(&copy_work->fence);
+}
+
+static void __memcpy_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	struct i915_ttm_memcpy_work *copy_work =
+		container_of(cb, typeof(*copy_work), cb);
+
+	if (fence->error || I915_SELFTEST_ONLY(fail_gpu_migration)) {
+		INIT_WORK(&copy_work->work, __memcpy_work);
+		queue_work(system_unbound_wq, &copy_work->work);
+	} else {
+		init_irq_work(&copy_work->irq_work, __memcpy_irq_work);
+		irq_work_queue(&copy_work->irq_work);
+	}
+}
+
+static const char *get_driver_name(struct dma_fence *fence)
+{
+	return "i915_ttm_memcpy_work";
+}
+
+static const char *get_timeline_name(struct dma_fence *fence)
+{
+	return "unbound";
+}
+
+static const struct dma_fence_ops dma_fence_memcpy_ops = {
+	.get_driver_name = get_driver_name,
+	.get_timeline_name = get_timeline_name,
+};
+
+static struct dma_fence *
+i915_ttm_memcpy_work_arm(struct i915_ttm_memcpy_work *work,
+			 struct dma_fence *dependency)
+{
+	int ret;
+
+	spin_lock_init(&work->lock);
+	dma_fence_init(&work->fence, &dma_fence_memcpy_ops,
+		       &work->lock, 0, 0);
+
+	dma_fence_get(&work->fence);
+	ret = dma_fence_add_callback(dependency, &work->cb, __memcpy_cb);
+
+	if (ret == -ENOENT) {
+		int err = dependency->error;
+
+		if (I915_SELFTEST_ONLY(fail_gpu_migration))
+			err = -EINVAL;
+
+		return err ? ERR_PTR(err) : NULL;
+	} else if (ret) {
+		return ERR_PTR(ret);
+	}
+
+	return &work->fence;
+}
+
+/*
+ * This is only used as a last fallback if the copy_work
+ * memory allocation fails, prohibiting async moves.
+ */
+static void __i915_ttm_move_fallback(struct ttm_buffer_object *bo, bool clear,
+				     struct ttm_resource *dst_mem,
+				     struct ttm_tt *dst_ttm,
+				     struct i915_refct_sgt *dst_rsgt,
+				     bool allow_accel)
+{
+	int ret = -EINVAL;
+
+	if (allow_accel) {
+		struct dma_fence *fence;
+
+		fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
+					    &dst_rsgt->table);
+		if (IS_ERR(fence)) {
+			ret = PTR_ERR(fence);
+		} else {
+			ret = dma_fence_wait(fence, false);
+			if (!ret)
+				ret = fence->error;
+			dma_fence_put(fence);
 		}
-		intel_engine_pm_put(i915->gt.migrate.context->engine);
 	}
 
-	return ret;
+	if (ret || I915_SELFTEST_ONLY(fail_gpu_migration)) {
+		struct i915_ttm_memcpy_arg arg;
+
+		i915_ttm_memcpy_init(&arg, bo, clear, dst_mem,
+				     dst_ttm, dst_rsgt);
+
+		i915_ttm_move_memcpy(&arg);
+		i915_ttm_memcpy_release(&arg);
+	}
 }
 
 /**
@@ -177,43 +389,51 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
  * @allow_accel: Whether to allow acceleration.
  */
 void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
-		     struct ttm_resource *dst_mem,
-		     struct ttm_tt *dst_ttm,
-		     struct i915_refct_sgt *dst_rsgt,
-		     bool allow_accel)
+		     struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm,
+		     struct i915_refct_sgt *dst_rsgt, bool allow_accel)
 {
-	int ret = -EINVAL;
+	struct i915_ttm_memcpy_work *copy_work;
+	struct dma_fence *fence = ERR_PTR(-EINVAL);
+
+	if (!I915_SELFTEST_ONLY(fail_work_allocation))
+		copy_work = kzalloc(sizeof(*copy_work), GFP_KERNEL);
+	else
+		copy_work = NULL;
+
+	if (!copy_work) {
+		/* Don't fail with -ENOMEM. Move sync instead. */
+		__i915_ttm_move_fallback(bo, clear, dst_mem, dst_ttm, dst_rsgt,
+					 allow_accel);
+		return;
+	}
 
 	if (allow_accel)
-		ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
-					  &dst_rsgt->table);
-	if (ret) {
-		struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
-		struct intel_memory_region *dst_reg, *src_reg;
-		union {
-			struct ttm_kmap_iter_tt tt;
-			struct ttm_kmap_iter_iomap io;
-		} _dst_iter, _src_iter;
-		struct ttm_kmap_iter *dst_iter, *src_iter;
-
-		dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
-		src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
-		GEM_BUG_ON(!dst_reg || !src_reg);
-
-		dst_iter = !i915_ttm_cpu_maps_iomem(dst_mem) ?
-			ttm_kmap_iter_tt_init(&_dst_iter.tt, dst_ttm) :
-			ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap,
-						 &dst_rsgt->table,
-						 dst_reg->region.start);
-
-		src_iter = !i915_ttm_cpu_maps_iomem(bo->resource) ?
-			ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) :
-			ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap,
-						 &obj->ttm.cached_io_rsgt->table,
-						 src_reg->region.start);
-
-		ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter);
+		fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
+					    &dst_rsgt->table);
+
+	/* Setup async memcpy */
+	i915_ttm_memcpy_init(&copy_work->arg, bo, clear, dst_mem, dst_ttm,
+			     dst_rsgt);
+	if (!IS_ERR(fence)) {
+		struct dma_fence *dep = fence;
+
+		fence = i915_ttm_memcpy_work_arm(copy_work, dep);
+		dma_fence_put(dep);
 	}
+
+	if (IS_ERR_OR_NULL(fence)) {
+		if (IS_ERR(fence))
+			i915_ttm_move_memcpy(&copy_work->arg);
+		i915_ttm_memcpy_release(&copy_work->arg);
+		kfree(copy_work);
+		fence = NULL;
+	} else {
+		/* Sync for now */
+		dma_fence_wait(fence, false);
+		dma_fence_put(fence);
+	}
+
+	return;
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h
index 676c647a308c..769ae43fd5fd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h
@@ -5,8 +5,23 @@
 #ifndef _I915_GEM_TTM_MOVE_H_
 #define _I915_GEM_TTM_MOVE_H_
 
+#include "i915_selftest.h"
+
+struct ttm_buffer_object;
+struct ttm_resource;
+struct ttm_tt;
+struct ttm_place;
+struct ttm_operation_ctx;
+
+struct i915_refct_sgt;
+
+struct drm_i915_gem_object;
+
 int i915_ttm_move_notify(struct ttm_buffer_object *bo);
 
+I915_SELFTEST_DECLARE(void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
+							      bool work_allocation));
+
 /* Internal I915 TTM declarations and definitions below. */
 
 void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
index 28a700f08b49..4b8e6b098659 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
@@ -4,6 +4,7 @@
  */
 
 #include "gt/intel_migrate.h"
+#include "gem/i915_gem_ttm_move.h"
 
 static int igt_fill_check_buffer(struct drm_i915_gem_object *obj,
 				 bool fill)
@@ -227,13 +228,34 @@ static int igt_lmem_pages_migrate(void *arg)
 	return err;
 }
 
+static int igt_lmem_pages_failsafe_migrate(void *arg)
+{
+	int fail_gpu, fail_alloc, ret;
+
+	for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) {
+		for (fail_alloc = 0; fail_alloc < 2; ++fail_alloc) {
+			pr_info("Simulated failure modes: gpu: %d, alloc: %d\n",
+				fail_gpu, fail_alloc);
+			i915_ttm_migrate_set_failure_modes(fail_gpu,
+							   fail_alloc);
+			ret = igt_lmem_pages_migrate(arg);
+			if (ret)
+				goto out_err;
+		}
+	}
+
+out_err:
+	i915_ttm_migrate_set_failure_modes(false, false);
+	return ret;
+}
+
 int i915_gem_migrate_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_smem_create_migrate),
 		SUBTEST(igt_lmem_create_migrate),
 		SUBTEST(igt_same_create_migrate),
-		SUBTEST(igt_lmem_pages_migrate),
+		SUBTEST(igt_lmem_pages_failsafe_migrate),
 	};
 
 	if (!HAS_LMEM(i915))
-- 
2.31.1