[PATCH v3 2/6] drm/i915: Add support for asynchronous moving fence waiting

Thomas Hellström thomas.hellstrom at linux.intel.com
Sun Nov 14 11:12:14 UTC 2021


From: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>

For now, we will only allow async migration when TTM is used,
so the paths we care about are related to TTM.

The mmap path is handled by having the fence in ttm_bo->moving,
when pinning, the binding only becomes available after the moving
fence is signaled, and pinning a cpu map will only work after
the moving fence signals.

This should close all holes where userspace can read a buffer
before it's fully migrated.

v2:
- Fix a couple of SPARSE warnings
v3:
- Fix a NULL pointer dereference

Co-developed-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
---
 drivers/gpu/drm/i915/display/intel_fbdev.c    |  7 ++--
 drivers/gpu/drm/i915/display/intel_overlay.c  |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_pages.c     |  6 +++
 .../i915/gem/selftests/i915_gem_coherency.c   |  4 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c    | 22 ++++++-----
 drivers/gpu/drm/i915/i915_vma.c               | 39 ++++++++++++++++++-
 drivers/gpu/drm/i915/i915_vma.h               |  3 ++
 drivers/gpu/drm/i915/selftests/i915_vma.c     |  4 +-
 8 files changed, 69 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index adc3a81be9f7..5902ad0c2bd8 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -265,11 +265,12 @@ static int intelfb_create(struct drm_fb_helper *helper,
 		info->fix.smem_len = vma->node.size;
 	}
 
-	vaddr = i915_vma_pin_iomap(vma);
+	vaddr = i915_vma_pin_iomap_unlocked(vma);
 	if (IS_ERR(vaddr)) {
-		drm_err(&dev_priv->drm,
-			"Failed to remap framebuffer into virtual memory\n");
 		ret = PTR_ERR(vaddr);
+		if (ret != -EINTR && ret != -ERESTARTSYS)
+			drm_err(&dev_priv->drm,
+				"Failed to remap framebuffer into virtual memory\n");
 		goto out_unpin;
 	}
 	info->screen_base = vaddr;
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 7e3f5c6ca484..21593f3f2664 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -1357,7 +1357,7 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys)
 		overlay->flip_addr = sg_dma_address(obj->mm.pages->sgl);
 	else
 		overlay->flip_addr = i915_ggtt_offset(vma);
-	overlay->regs = i915_vma_pin_iomap(vma);
+	overlay->regs = i915_vma_pin_iomap_unlocked(vma);
 	i915_vma_unpin(vma);
 
 	if (IS_ERR(overlay->regs)) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index c4f684b7cc51..49c6e55c68ce 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -418,6 +418,12 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 	}
 
 	if (!ptr) {
+		err = i915_gem_object_wait_moving_fence(obj, true);
+		if (err) {
+			ptr = ERR_PTR(err);
+			goto err_unpin;
+		}
+
 		if (GEM_WARN_ON(type == I915_MAP_WC &&
 				!static_cpu_has(X86_FEATURE_PAT)))
 			ptr = ERR_PTR(-ENODEV);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 13b088cc787e..067c512961ba 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -101,7 +101,7 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
 
 	intel_gt_pm_get(vma->vm->gt);
 
-	map = i915_vma_pin_iomap(vma);
+	map = i915_vma_pin_iomap_unlocked(vma);
 	i915_vma_unpin(vma);
 	if (IS_ERR(map)) {
 		err = PTR_ERR(map);
@@ -134,7 +134,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
 
 	intel_gt_pm_get(vma->vm->gt);
 
-	map = i915_vma_pin_iomap(vma);
+	map = i915_vma_pin_iomap_unlocked(vma);
 	i915_vma_unpin(vma);
 	if (IS_ERR(map)) {
 		err = PTR_ERR(map);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 6d30cdfa80f3..5d54181c2145 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -125,12 +125,13 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
 	n = page - view.partial.offset;
 	GEM_BUG_ON(n >= view.partial.size);
 
-	io = i915_vma_pin_iomap(vma);
+	io = i915_vma_pin_iomap_unlocked(vma);
 	i915_vma_unpin(vma);
 	if (IS_ERR(io)) {
-		pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
-		       page, (int)PTR_ERR(io));
 		err = PTR_ERR(io);
+		if (err != -EINTR && err != -ERESTARTSYS)
+			pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
+			       page, err);
 		goto out;
 	}
 
@@ -219,12 +220,15 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj,
 		n = page - view.partial.offset;
 		GEM_BUG_ON(n >= view.partial.size);
 
-		io = i915_vma_pin_iomap(vma);
+		io = i915_vma_pin_iomap_unlocked(vma);
 		i915_vma_unpin(vma);
 		if (IS_ERR(io)) {
-			pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
-			       page, (int)PTR_ERR(io));
-			return PTR_ERR(io);
+			int err = PTR_ERR(io);
+
+			if (err != -EINTR && err != -ERESTARTSYS)
+				pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
+				       page, err);
+			return err;
 		}
 
 		iowrite32(page, io + n * PAGE_SIZE / sizeof(*io));
@@ -773,7 +777,7 @@ static int gtt_set(struct drm_i915_gem_object *obj)
 		return PTR_ERR(vma);
 
 	intel_gt_pm_get(vma->vm->gt);
-	map = i915_vma_pin_iomap(vma);
+	map = i915_vma_pin_iomap_unlocked(vma);
 	i915_vma_unpin(vma);
 	if (IS_ERR(map)) {
 		err = PTR_ERR(map);
@@ -799,7 +803,7 @@ static int gtt_check(struct drm_i915_gem_object *obj)
 		return PTR_ERR(vma);
 
 	intel_gt_pm_get(vma->vm->gt);
-	map = i915_vma_pin_iomap(vma);
+	map = i915_vma_pin_iomap_unlocked(vma);
 	i915_vma_unpin(vma);
 	if (IS_ERR(map)) {
 		err = PTR_ERR(map);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 8781c4f61952..069f22b3cd48 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -431,6 +431,13 @@ int i915_vma_bind(struct i915_vma *vma,
 			work->pinned = i915_gem_object_get(vma->obj);
 		}
 	} else {
+		if (vma->obj) {
+			int ret;
+
+			ret = i915_gem_object_wait_moving_fence(vma->obj, true);
+			if (ret)
+				return ret;
+		}
 		vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
 	}
 
@@ -455,6 +462,10 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 
 	ptr = READ_ONCE(vma->iomap);
 	if (ptr == NULL) {
+		err = i915_gem_object_wait_moving_fence(vma->obj, true);
+		if (err)
+			goto err;
+
 		/*
 		 * TODO: consider just using i915_gem_object_pin_map() for lmem
 		 * instead, which already supports mapping non-contiguous chunks
@@ -496,6 +507,25 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 	return IO_ERR_PTR(err);
 }
 
+void __iomem *i915_vma_pin_iomap_unlocked(struct i915_vma *vma)
+{
+	struct i915_gem_ww_ctx ww;
+	void __iomem *map;
+	int err;
+
+	for_i915_gem_ww(&ww, err, true) {
+		err = i915_gem_object_lock(vma->obj, &ww);
+		if (err)
+			continue;
+
+		map = i915_vma_pin_iomap(vma);
+	}
+	if (err)
+		map = IO_ERR_PTR(err);
+
+	return map;
+}
+
 void i915_vma_flush_writes(struct i915_vma *vma)
 {
 	if (i915_vma_unset_ggtt_write(vma))
@@ -870,6 +900,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 		    u64 size, u64 alignment, u64 flags)
 {
 	struct i915_vma_work *work = NULL;
+	struct dma_fence *moving = NULL;
 	intel_wakeref_t wakeref = 0;
 	unsigned int bound;
 	int err;
@@ -895,7 +926,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	if (flags & PIN_GLOBAL)
 		wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
 
-	if (flags & vma->vm->bind_async_flags) {
+	moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL;
+	if (flags & vma->vm->bind_async_flags || moving) {
 		/* lock VM */
 		err = i915_vm_lock_objects(vma->vm, ww);
 		if (err)
@@ -909,6 +941,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 
 		work->vm = i915_vm_get(vma->vm);
 
+		dma_fence_work_chain(&work->base, moving);
+
 		/* Allocate enough page directories to used PTE */
 		if (vma->vm->allocate_va_range) {
 			err = i915_vm_alloc_pt_stash(vma->vm,
@@ -1013,7 +1047,10 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 err_rpm:
 	if (wakeref)
 		intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
+	if (moving)
+		dma_fence_put(moving);
 	vma_put_pages(vma);
+
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 648dbe744c96..1812b2904a31 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -326,6 +326,9 @@ static inline bool i915_node_color_differs(const struct drm_mm_node *node,
  * Returns a valid iomapped pointer or ERR_PTR.
  */
 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
+
+void __iomem *i915_vma_pin_iomap_unlocked(struct i915_vma *vma);
+
 #define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x))
 
 /**
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index 1f10fe36619b..85f43b209890 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -1005,7 +1005,7 @@ static int igt_vma_remapped_gtt(void *arg)
 
 			GEM_BUG_ON(vma->ggtt_view.type != *t);
 
-			map = i915_vma_pin_iomap(vma);
+			map = i915_vma_pin_iomap_unlocked(vma);
 			i915_vma_unpin(vma);
 			if (IS_ERR(map)) {
 				err = PTR_ERR(map);
@@ -1036,7 +1036,7 @@ static int igt_vma_remapped_gtt(void *arg)
 
 			GEM_BUG_ON(vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL);
 
-			map = i915_vma_pin_iomap(vma);
+			map = i915_vma_pin_iomap_unlocked(vma);
 			i915_vma_unpin(vma);
 			if (IS_ERR(map)) {
 				err = PTR_ERR(map);
-- 
2.31.1



More information about the dri-devel mailing list