[Intel-gfx] [PATCH v2] drm/i915: Nonblocking request submission

Tue Aug 30 11:18:23 UTC 2016

Now that we have fences in place to drive request submission, we can
employ those to queue requests after their dependencies as opposed to
stalling in the middle of an execbuf ioctl. (However, we still choose to
spin before enabling the IRQ as that is faster - though contentious.)

v2: Do the fence ordering first, where we can still fail.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 ++++++-----
 drivers/gpu/drm/i915/i915_gem_request.c    | 36 +++++++++++++++++++++++-------
 2 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index ef645d89e760..c767eb7bc893 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1136,12 +1136,13 @@ eb_await_request(struct drm_i915_gem_request *to,
 
 	trace_i915_gem_ring_sync_to(to, from);
 	if (!i915.semaphores) {
-		ret = i915_wait_request(from,
-					I915_WAIT_INTERRUPTIBLE |
-					I915_WAIT_LOCKED,
-					NULL, NO_WAITBOOST);
-		if (ret)
-			return ret;
+		if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) {
+			ret = i915_sw_fence_await_dma_fence(&to->submit,
+							    &from->fence,
+							    GFP_KERNEL);
+			if (ret < 0)
+				return ret;
+		}
 	} else {
 		ret = to->engine->semaphore.sync_to(to, from);
 		if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 39f88e72310b..e0651970ee9f 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -352,7 +352,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		       struct i915_gem_context *ctx)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
-	struct drm_i915_gem_request *req;
+	struct drm_i915_gem_request *req, *prev;
 	u32 seqno;
 	int ret;
 
@@ -362,7 +362,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 */
 	ret = i915_gem_check_wedge(dev_priv);
 	if (ret)
-		return ERR_PTR(ret);
+		goto err;
 
 	/* Move the oldest request to the slab-cache (if not in use!) */
 	req = list_first_entry_or_null(&engine->request_list,
@@ -399,12 +399,14 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * Do not use kmem_cache_zalloc() here!
 	 */
 	req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
-	if (!req)
-		return ERR_PTR(-ENOMEM);
+	if (!req) {
+		ret = -ENOMEM;
+		goto err;
+	}
 
 	ret = i915_gem_get_seqno(dev_priv, &seqno);
 	if (ret)
-		goto err;
+		goto err_req;
 
 	spin_lock_init(&req->lock);
 	fence_init(&req->fence,
@@ -434,12 +436,23 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 
 	i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify);
 
+	prev = i915_gem_active_peek(&engine->last_request,
+				    &req->i915->drm.struct_mutex);
+	if (prev) {
+		ret = i915_sw_fence_await_sw_fence(&req->submit,
+						   &prev->submit,
+						   GFP_KERNEL);
+		if (ret < 0)
+			goto err_ctx;
+	}
+
+	/* After this point we are committed to the submitting the request
+	 * as the backend callback may track the request for global state.
+	 */
 	if (i915.enable_execlists)
 		ret = intel_logical_ring_alloc_request_extras(req);
 	else
 		ret = intel_ring_alloc_request_extras(req);
-	if (ret)
-		goto err_ctx;
 
 	/* Record the position of the start of the request so that
 	 * should we detect the updated seqno part-way through the
@@ -448,12 +461,19 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 */
 	req->head = req->ring->tail;
 
+	/* Submit the partial state in case we are tracking this request */
+	if (ret) {
+		__i915_add_request(req, false);
+		goto err;
+	}
+
 	return req;
 
 err_ctx:
 	i915_gem_context_put(ctx);
-err:
+err_req:
 	kmem_cache_free(dev_priv->requests, req);
+err:
 	return ERR_PTR(ret);
 }
 
-- 
2.9.3