[PATCH 32/34] drm/i915: Start returning an error from i915_vma_move_to_active()

Chris Wilson chris at chris-wilson.co.uk
Thu Jan 25 17:37:22 UTC 2018


Handling such a late error in request construction is tricky, but to
accommodate future patches which may allocate here, we potentially could
err. To handle the error after already adjusting global state to track
the new request, we must finish and submit the request. But we don't
want to use the request as not everything is being tracked by it, so we
opt to cancel the commands inside the request.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gvt/scheduler.c               |  4 +++-
 drivers/gpu/drm/i915/i915_drv.h                    |  6 +++---
 drivers/gpu/drm/i915/i915_gem.c                    | 25 +++-------------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c         | 25 ++++++++++++++++------
 drivers/gpu/drm/i915/i915_gem_render_state.c       |  2 +-
 drivers/gpu/drm/i915/i915_gem_request.c            | 19 ++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_request.h            |  2 ++
 drivers/gpu/drm/i915/selftests/huge_pages.c        |  9 ++++++--
 .../gpu/drm/i915/selftests/i915_gem_coherency.c    |  4 ++--
 drivers/gpu/drm/i915/selftests/i915_gem_context.c  | 12 +++++++++--
 drivers/gpu/drm/i915/selftests/i915_gem_object.c   |  7 +++---
 drivers/gpu/drm/i915/selftests/i915_gem_request.c  |  8 +++++--
 drivers/gpu/drm/i915/selftests/intel_hangcheck.c   | 11 ++++++++--
 13 files changed, 87 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 0056638b0c16..6b6020f8af08 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -394,7 +394,9 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 		i915_gem_obj_finish_shmem_access(bb->obj);
 		bb->accessing = false;
 
-		i915_vma_move_to_active(bb->vma, workload->req, 0);
+		ret = i915_vma_move_to_active(bb->vma, workload->req, 0);
+		if (ret)
+			goto err;
 	}
 	return 0;
 err:
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2f0e38534544..895806733f29 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3287,9 +3287,9 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
 }
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
-void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct drm_i915_gem_request *req,
-			     unsigned int flags);
+int __must_check i915_vma_move_to_active(struct i915_vma *vma,
+					 struct drm_i915_gem_request *req,
+					 unsigned int flags);
 int i915_gem_dumb_create(struct drm_file *file_priv,
 			 struct drm_device *dev,
 			 struct drm_mode_create_dumb *args);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 921183e345df..ac7cfa7b48c5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2989,25 +2989,6 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
 	return err;
 }
 
-static void skip_request(struct drm_i915_gem_request *request)
-{
-	void *vaddr = request->ring->vaddr;
-	u32 head;
-
-	/* As this request likely depends on state from the lost
-	 * context, clear out all the user operations leaving the
-	 * breadcrumb at the end (so we get the fence notifications).
-	 */
-	head = request->head;
-	if (request->postfix < head) {
-		memset(vaddr + head, 0, request->ring->size - head);
-		head = 0;
-	}
-	memset(vaddr + head, 0, request->postfix - head);
-
-	dma_fence_set_error(&request->fence, -EIO);
-}
-
 static void engine_skip_context(struct drm_i915_gem_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
@@ -3022,10 +3003,10 @@ static void engine_skip_context(struct drm_i915_gem_request *request)
 
 	list_for_each_entry_continue(request, &engine->timeline->requests, link)
 		if (request->ctx == hung_ctx)
-			skip_request(request);
+			i915_gem_request_cancel(request);
 
 	list_for_each_entry(request, &timeline->requests, link)
-		skip_request(request);
+		i915_gem_request_cancel(request);
 
 	spin_unlock(&timeline->lock);
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
@@ -3059,7 +3040,7 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
 
 	if (engine_stalled(engine)) {
 		i915_gem_context_mark_guilty(request->ctx);
-		skip_request(request);
+		i915_gem_request_cancel(request);
 
 		/* If this context is now banned, skip all pending requests. */
 		if (i915_gem_context_is_banned(request->ctx))
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 09038cc59c89..e1ca67c995c9 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1118,12 +1118,16 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 		goto err_request;
 
 	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
-	i915_vma_move_to_active(batch, rq, 0);
-	i915_vma_unpin(batch);
+	err = i915_vma_move_to_active(batch, rq, 0);
+	if (err)
+		goto skip_request;
 
-	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	if (err)
+		goto skip_request;
 
 	rq->batch = batch;
+	i915_vma_unpin(batch);
 
 	cache->rq = rq;
 	cache->rq_cmd = cmd;
@@ -1132,6 +1136,8 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	/* Return with batch mapping (cmd) still pinned */
 	return 0;
 
+skip_request:
+	i915_gem_request_cancel(rq);
 err_request:
 	i915_add_request(rq);
 err_unpin:
@@ -1795,7 +1801,11 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 		unsigned int flags = eb->flags[i];
 		struct i915_vma *vma = eb->vma[i];
 
-		i915_vma_move_to_active(vma, eb->request, flags);
+		err = i915_vma_move_to_active(vma, eb->request, flags);
+		if (unlikely(err)) {
+			i915_gem_request_cancel(eb->request);
+			return err;
+		}
 
 		__eb_unreserve_vma(vma, flags);
 		vma->exec_flags = NULL;
@@ -1854,9 +1864,9 @@ static void export_fence(struct i915_vma *vma,
 	reservation_object_unlock(resv);
 }
 
-void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct drm_i915_gem_request *req,
-			     unsigned int flags)
+int i915_vma_move_to_active(struct i915_vma *vma,
+			    struct drm_i915_gem_request *req,
+			    unsigned int flags)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
 	const unsigned int idx = req->engine->id;
@@ -1893,6 +1903,7 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 		i915_gem_active_set(&vma->last_fence, req);
 
 	export_fence(vma, req, flags);
+	return 0;
 }
 
 static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index f7fc0df251ac..951701ac452b 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -222,7 +222,7 @@ int i915_gem_render_state_emit(struct drm_i915_gem_request *rq)
 			goto err_unpin;
 	}
 
-	i915_vma_move_to_active(so.vma, rq, 0);
+	err = i915_vma_move_to_active(so.vma, rq, 0);
 err_unpin:
 	i915_vma_unpin(so.vma);
 err_vma:
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index b0a2885575f1..87af456f4f9b 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -972,6 +972,25 @@ i915_gem_request_await_object(struct drm_i915_gem_request *to,
 	return ret;
 }
 
+ void i915_gem_request_cancel(struct drm_i915_gem_request *request)
+{
+	void *vaddr = request->ring->vaddr;
+	u32 head;
+
+	/* As this request likely depends on state from the lost
+	 * context, clear out all the user operations leaving the
+	 * breadcrumb at the end (so we get the fence notifications).
+	 */
+	head = request->head;
+	if (request->postfix < head) {
+		memset(vaddr + head, 0, request->ring->size - head);
+		head = 0;
+	}
+	memset(vaddr + head, 0, request->postfix - head);
+
+	dma_fence_set_error(&request->fence, -EIO);
+}
+
 /*
  * NB: This function is not allowed to fail. Doing so would mean the the
  * request is not being tracked for completion but the work itself is
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index ee1520077757..0c5cca529ac3 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -289,6 +289,8 @@ void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
 void __i915_gem_request_submit(struct drm_i915_gem_request *request);
 void i915_gem_request_submit(struct drm_i915_gem_request *request);
 
+void i915_gem_request_cancel(struct drm_i915_gem_request *request);
+
 void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request);
 void i915_gem_request_unsubmit(struct drm_i915_gem_request *request);
 
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index 24cfdfb8a74d..510a9c738726 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -985,7 +985,10 @@ static int gpu_write(struct i915_vma *vma,
 		goto err_request;
 	}
 
-	i915_vma_move_to_active(batch, rq, 0);
+	err = i915_vma_move_to_active(batch, rq, 0);
+	if (err)
+		goto err_request;
+
 	i915_gem_object_set_active_reference(batch->obj);
 	i915_vma_unpin(batch);
 	i915_vma_close(batch);
@@ -996,7 +999,9 @@ static int gpu_write(struct i915_vma *vma,
 	if (err)
 		goto err_request;
 
-	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	if (err)
+		i915_gem_request_cancel(rq);
 
 err_request:
 	__i915_add_request(rq, err == 0);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index 713693d330e7..1e35a6fb0c5d 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -222,12 +222,12 @@ static int gpu_set(struct drm_i915_gem_object *obj,
 	}
 	intel_ring_advance(rq, cs);
 
-	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 	i915_vma_unpin(vma);
 
 	__i915_add_request(rq, true);
 
-	return 0;
+	return err;
 }
 
 static bool always_valid(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 0d6d2ac16f3f..2058c5197689 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -164,18 +164,26 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	if (err)
 		goto err_request;
 
-	i915_vma_move_to_active(batch, rq, 0);
+	err = i915_vma_move_to_active(batch, rq, 0);
+	if (err)
+		goto skip_request;
+
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	if (err)
+		goto skip_request;
+
 	i915_gem_object_set_active_reference(batch->obj);
 	i915_vma_unpin(batch);
 	i915_vma_close(batch);
 
-	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 	i915_vma_unpin(vma);
 
 	__i915_add_request(rq, true);
 
 	return 0;
 
+skip_request:
+	i915_gem_request_cancel(rq);
 err_request:
 	__i915_add_request(rq, false);
 err_batch:
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index f32aa6bb79e2..8bf04ef819a6 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -448,12 +448,13 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
 		return PTR_ERR(rq);
 	}
 
-	i915_vma_move_to_active(vma, rq, 0);
+	err = i915_vma_move_to_active(vma, rq, 0);
 	i915_add_request(rq);
 
-	i915_gem_object_set_active_reference(obj);
+	__i915_gem_object_release_unless_active(obj);
 	i915_vma_unpin(vma);
-	return 0;
+
+	return err;
 }
 
 static bool assert_mmap_offset(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
index 647bf2bbd799..d8f0353aa262 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
@@ -679,7 +679,9 @@ static int live_all_engines(void *arg)
 			i915_gem_object_set_active_reference(batch->obj);
 		}
 
-		i915_vma_move_to_active(batch, request[id], 0);
+		err = i915_vma_move_to_active(batch, request[id], 0);
+		GEM_BUG_ON(err);
+
 		i915_gem_request_get(request[id]);
 		i915_add_request(request[id]);
 	}
@@ -790,7 +792,9 @@ static int live_sequential_engines(void *arg)
 		GEM_BUG_ON(err);
 		request[id]->batch = batch;
 
-		i915_vma_move_to_active(batch, request[id], 0);
+		err = i915_vma_move_to_active(batch, request[id], 0);
+		GEM_BUG_ON(err);
+
 		i915_gem_object_set_active_reference(batch->obj);
 		i915_vma_get(batch);
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index d1f91a533afa..3ab0a8577cb9 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -114,13 +114,19 @@ static int emit_recurse_batch(struct hang *h,
 	if (err)
 		goto unpin_vma;
 
-	i915_vma_move_to_active(vma, rq, 0);
+	err = i915_vma_move_to_active(vma, rq, 0);
+	if (err)
+		goto unpin_hws;
+
 	if (!i915_gem_object_has_active_reference(vma->obj)) {
 		i915_gem_object_get(vma->obj);
 		i915_gem_object_set_active_reference(vma->obj);
 	}
 
-	i915_vma_move_to_active(hws, rq, 0);
+	err = i915_vma_move_to_active(hws, rq, 0);
+	if (err)
+		goto unpin_hws;
+
 	if (!i915_gem_object_has_active_reference(hws->obj)) {
 		i915_gem_object_get(hws->obj);
 		i915_gem_object_set_active_reference(hws->obj);
@@ -189,6 +195,7 @@ static int emit_recurse_batch(struct hang *h,
 
 	err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
 
+unpin_hws:
 	i915_vma_unpin(hws);
 unpin_vma:
 	i915_vma_unpin(vma);
-- 
2.15.1



More information about the Intel-gfx-trybot mailing list