[Intel-gfx] [PATCH 32/40] drm/i915: Add early exit to execbuff_final() if insufficient ring space

John.C.Harrison at Intel.com John.C.Harrison at Intel.com
Fri Dec 11 05:21:20 PST 2015


From: John Harrison <John.C.Harrison at Intel.com>

One of the major purposes of the GPU scheduler is to avoid stalling
the CPU when the GPU is busy and unable to accept more work. This
change adds support to the ring submission code to allow a ring space
check to be performed before attempting to submit a batch buffer to
the hardware. If insufficient space is available then the scheduler
can go away and come back later, letting the CPU get on with other
work, rather than stalling and waiting for the hardware to catch up.

v3: Updated to use locally cached request pointer.

Change-Id: I267159ce1150cb6714d34a49b841bcbe4bf66326
For: VIZ-1587
Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 42 ++++++++++++++++------
 drivers/gpu/drm/i915/intel_lrc.c           | 57 +++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/intel_ringbuffer.c    | 24 +++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  1 +
 4 files changed, 109 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 8ba426f..bf9d804 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1101,25 +1101,19 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
 {
 	struct intel_engine_cs *ring = req->ring;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret, i;
+	int i;
 
 	if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) {
 		DRM_DEBUG("sol reset is gen7/rcs only\n");
 		return -EINVAL;
 	}
 
-	ret = intel_ring_begin(req, 4 * 3);
-	if (ret)
-		return ret;
-
 	for (i = 0; i < 4; i++) {
 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
 		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
 		intel_ring_emit(ring, 0);
 	}
 
-	intel_ring_advance(ring);
-
 	return 0;
 }
 
@@ -1247,6 +1241,7 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params)
 	struct intel_engine_cs  *ring = params->ring;
 	u64 exec_start, exec_len;
 	int ret;
+	uint32_t min_space;
 
 	/* The mutex must be acquired before calling this function */
 	BUG_ON(!mutex_is_locked(&params->dev->struct_mutex));
@@ -1268,8 +1263,36 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params)
 	if (ret)
 		return ret;
 
+	/*
+	 * It would be a bad idea to run out of space while writing commands
+	 * to the ring. One of the major aims of the scheduler is to not stall
+	 * at any point for any reason. However, doing an early exit half way
+	 * through submission could result in a partial sequence being written
+	 * which would leave the engine in an unknown state. Therefore, check in
+	 * advance that there will be enough space for the entire submission
+	 * whether emitted by the code below OR by any other functions that may
+	 * be executed before the end of final().
+	 *
+	 * NB: This test deliberately overestimates, because that's easier than
+	 * tracing every potential path that could be taken!
+	 *
+	 * Current measurements suggest that we may need to emit up to 744 bytes
+	 * (186 dwords), so this is rounded up to 256 dwords here. Then we double
+	 * that to get the free space requirement, because the block isn't allowed
+	 * to span the transition from the end to the beginning of the ring.
+	 */
+#define I915_BATCH_EXEC_MAX_LEN         256	/* max dwords emitted here	*/
+	min_space = I915_BATCH_EXEC_MAX_LEN * 2 * sizeof(uint32_t);
+	ret = intel_ring_test_space(req->ringbuf, min_space);
+	if (ret)
+		goto early_error;
+
 	intel_runtime_pm_get(dev_priv);
 
+	ret = intel_ring_begin(req, I915_BATCH_EXEC_MAX_LEN);
+	if (ret)
+		goto error;
+
 	/*
 	 * Unconditionally invalidate gpu caches and ensure that we do flush
 	 * any residual writes from the previous batch.
@@ -1288,10 +1311,6 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params)
 
 	if (ring == &dev_priv->ring[RCS] &&
 	    params->instp_mode != dev_priv->relative_constants_mode) {
-		ret = intel_ring_begin(req, 4);
-		if (ret)
-			goto error;
-
 		intel_ring_emit(ring, MI_NOOP);
 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
 		intel_ring_emit(ring, INSTPM);
@@ -1328,6 +1347,7 @@ error:
 	 */
 	intel_runtime_pm_put(dev_priv);
 
+early_error:
 	if (ret)
 		intel_ring_reserved_space_cancel(req->ringbuf);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 1fa3228..d6acd2d6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -226,6 +226,30 @@ static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
 		struct drm_i915_gem_object *default_ctx_obj);
 
 
+/* Test to see if the ring has sufficient space to submit a given piece of work
+ * without causing a stall */
+static int logical_ring_test_space(struct intel_ringbuffer *ringbuf, int min_space)
+{
+	//struct intel_engine_cs *ring = ringbuf->ring;
+	//struct drm_device *dev = ring->dev;
+	//struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (ringbuf->space < min_space) {
+		/* Need to update the actual ring space. Otherwise, the system
+		 * hangs forever testing a software copy of the space value that
+		 * never changes!
+		 */
+		//ringbuf->head  = I915_READ_HEAD(ring);
+		//ringbuf->space = intel_ring_space(ringbuf);
+		intel_ring_update_space(ringbuf);
+
+		if (ringbuf->space < min_space)
+			return -EAGAIN;
+	}
+
+	return 0;
+}
+
 /**
  * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
  * @dev: DRM device.
@@ -932,6 +956,7 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params)
 	struct intel_engine_cs *ring = params->ring;
 	u64 exec_start;
 	int ret;
+	uint32_t min_space;
 
 	/* The mutex must be acquired before calling this function */
 	BUG_ON(!mutex_is_locked(&params->dev->struct_mutex));
@@ -954,6 +979,34 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params)
 		return ret;
 
 	/*
+	 * It would be a bad idea to run out of space while writing commands
+	 * to the ring. One of the major aims of the scheduler is to not stall
+	 * at any point for any reason. However, doing an early exit half way
+	 * through submission could result in a partial sequence being written
+	 * which would leave the engine in an unknown state. Therefore, check in
+	 * advance that there will be enough space for the entire submission
+	 * whether emitted by the code below OR by any other functions that may
+	 * be executed before the end of final().
+	 *
+	 * NB: This test deliberately overestimates, because that's easier than
+	 * tracing every potential path that could be taken!
+	 *
+	 * Current measurements suggest that we may need to emit up to ??? bytes
+	 * (186 dwords), so this is rounded up to 256 dwords here. Then we double
+	 * that to get the free space requirement, because the block isn't allowed
+	 * to span the transition from the end to the beginning of the ring.
+	 */
+#define I915_BATCH_EXEC_MAX_LEN         256	/* max dwords emitted here	*/
+	min_space = I915_BATCH_EXEC_MAX_LEN * 2 * sizeof(uint32_t);
+	ret = logical_ring_test_space(ringbuf, min_space);
+	if (ret)
+		goto err;
+
+	ret = intel_logical_ring_begin(req, I915_BATCH_EXEC_MAX_LEN);
+	if (ret)
+		goto err;
+
+	/*
 	 * Unconditionally invalidate gpu caches and ensure that we do flush
 	 * any residual writes from the previous batch.
 	 */
@@ -963,10 +1016,6 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params)
 
 	if (ring == &dev_priv->ring[RCS] &&
 	    params->instp_mode != dev_priv->relative_constants_mode) {
-		ret = intel_logical_ring_begin(req, 4);
-		if (ret)
-			return ret;
-
 		intel_logical_ring_emit(ringbuf, MI_NOOP);
 		intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
 		intel_logical_ring_emit(ringbuf, INSTPM);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 2d8dc54..97b9dfa 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2472,6 +2472,30 @@ int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
 	return 0;
 }
 
+/* Test to see if the ring has sufficient space to submit a given piece of work
+ * without causing a stall */
+int intel_ring_test_space(struct intel_ringbuffer *ringbuf, int min_space)
+{
+	struct drm_i915_private *dev_priv = ringbuf->ring->dev->dev_private;
+
+	/* There is a separate LRC version of this code. */
+	BUG_ON(i915.enable_execlists);
+
+	if (ringbuf->space < min_space) {
+		/* Need to update the actual ring space. Otherwise, the system
+		 * hangs forever testing a software copy of the space value that
+		 * never changes!
+		 */
+		ringbuf->head  = I915_READ_HEAD(ringbuf->ring);
+		ringbuf->space = intel_ring_space(ringbuf);
+
+		if (ringbuf->space < min_space)
+			return -EAGAIN;
+	}
+
+	return 0;
+}
+
 void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
 {
 	struct drm_device *dev = ring->dev;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index d31c94f..c472f8a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -469,6 +469,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring);
 
 int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request);
 
+int intel_ring_test_space(struct intel_ringbuffer *ringbuf, int min_space);
 int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n);
 int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
 static inline void intel_ring_emit(struct intel_engine_cs *ring,
-- 
1.9.1



More information about the Intel-gfx mailing list