[Intel-gfx] [RFC 23/44] drm/i915: Added manipulation of OLS/PLR

Thu Jun 26 19:24:14 CEST 2014

From: John Harrison <John.C.Harrison at Intel.com>

The scheduler requires each batch buffer to be tagged with the seqno it has been
assigned and for that seqno to only be attached to the given batch buffer. Note
that the seqno assigned to a batch buffer that is being submitted to the
hardware might be very different to the next seqno that would be assigned
automatically on ring submission.

This means manipulating the lazy seqno and request values around batch buffer
submission. At the start of execbuffer() the lazy seqno should be zero, if not
it means that something has been written to the ring without a request being
added. The lazy seqno also needs to be reset back to zero at the end ready for
the next request to start.

Then, execbuffer_final() needs to manually set the lazy seqno to the batch
buffer's pre-assigned value rather than grabbing the next available value. There
is no need to explictly clear the lazy seqno at the end of _final() as the
add_request() call within _retire_commands() will do that automatically.
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   68 +++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_scheduler.h      |    2 +
 2 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 6bb1fd6..98cc95e 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1328,10 +1328,22 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
 	}
 
+	/* OLS should be zero at this point. If not then this buffer is going
+	 * to be tagged as someone else's work! */
+	BUG_ON(ring->outstanding_lazy_seqno    != 0);
+	BUG_ON(ring->preallocated_lazy_request != NULL);
+
 	/* Allocate a seqno for this batch buffer nice and early. */
 	ret = intel_ring_alloc_seqno(ring);
 	if (ret)
 		goto err;
+	qe.params.seqno   = ring->outstanding_lazy_seqno;
+	qe.params.request = ring->preallocated_lazy_request;
+
+	BUG_ON(ring->outstanding_lazy_seqno    == 0);
+	BUG_ON(ring->outstanding_lazy_seqno    != qe.params.seqno);
+	BUG_ON(ring->preallocated_lazy_request != qe.params.request);
+	BUG_ON(ring->preallocated_lazy_request == NULL);
 
 	/* Save assorted stuff away to pass through to execbuffer_final() */
 	qe.params.dev                     = dev;
@@ -1373,6 +1385,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	qe.params.ctx = ctx;
 #endif // CONFIG_DRM_I915_SCHEDULER
 
+	/* OLS should have been set to something useful above */
+	BUG_ON(ring->outstanding_lazy_seqno    != qe.params.seqno);
+	BUG_ON(ring->preallocated_lazy_request != qe.params.request);
+
 	if (flags & I915_DISPATCH_SECURE)
 		qe.params.batch_obj_vm_offset = i915_gem_obj_ggtt_offset(batch_obj);
 	else
@@ -1384,6 +1400,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
 	i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
 
+	/* Make sure the OLS hasn't advanced (which would indicate a flush
+	 * of the work in progess which in turn would be a Bad Thing). */
+	BUG_ON(ring->outstanding_lazy_seqno    != qe.params.seqno);
+	BUG_ON(ring->preallocated_lazy_request != qe.params.request);
+
+	/*
+	 * A new seqno has been assigned to the buffer and saved away for
+	 * future reference. So clear the OLS to ensure that any further
+	 * work is assigned a brand new seqno:
+	 */
+	ring->outstanding_lazy_seqno    = 0;
+	ring->preallocated_lazy_request = NULL;
+
 	ret = i915_scheduler_queue_execbuffer(&qe);
 	if (ret)
 		goto err;
@@ -1425,6 +1454,12 @@ err:
 	}
 #endif // CONFIG_DRM_I915_SCHEDULER
 
+	/* Clear the OLS again in case the failure occurred after it had been
+	 * assigned. */
+	kfree(ring->preallocated_lazy_request);
+	ring->preallocated_lazy_request = NULL;
+	ring->outstanding_lazy_seqno    = 0;
+
 	mutex_unlock(&dev->struct_mutex);
 
 pre_mutex_err:
@@ -1443,6 +1478,7 @@ int i915_gem_do_execbuffer_final(struct i915_execbuffer_params *params)
 	struct intel_engine_cs  *ring = params->ring;
 	u64 exec_start, exec_len;
 	int ret, i;
+	u32 seqno;
 
 	/* The mutex must be acquired before calling this function */
 	BUG_ON(!mutex_is_locked(&params->dev->struct_mutex));
@@ -1454,6 +1490,14 @@ int i915_gem_do_execbuffer_final(struct i915_execbuffer_params *params)
 
 	intel_runtime_pm_get(dev_priv);
 
+	/* Ensure the correct seqno gets assigned to the correct buffer: */
+	BUG_ON(ring->outstanding_lazy_seqno    != 0);
+	BUG_ON(ring->preallocated_lazy_request != NULL);
+	ring->outstanding_lazy_seqno    = params->seqno;
+	ring->preallocated_lazy_request = params->request;
+
+	seqno = params->seqno;
+
 	/* Unconditionally invalidate gpu caches and ensure that we do flush
 	 * any residual writes from the previous batch.
 	 */
@@ -1466,6 +1510,10 @@ int i915_gem_do_execbuffer_final(struct i915_execbuffer_params *params)
 	if (ret)
 		goto err;
 
+	/* Seqno matches? */
+	BUG_ON(seqno != params->seqno);
+	BUG_ON(ring->outstanding_lazy_seqno != params->seqno);
+
 	if (ring == &dev_priv->ring[RCS] &&
 	    params->mode != dev_priv->relative_constants_mode) {
 		ret = intel_ring_begin(ring, 4);
@@ -1487,6 +1535,9 @@ int i915_gem_do_execbuffer_final(struct i915_execbuffer_params *params)
 			goto err;
 	}
 
+	/* Seqno matches? */
+	BUG_ON(ring->outstanding_lazy_seqno    != params->seqno);
+	BUG_ON(ring->preallocated_lazy_request != params->request);
 
 	exec_len   = params->args_batch_len;
 	exec_start = params->batch_obj_vm_offset +
@@ -1513,12 +1564,27 @@ int i915_gem_do_execbuffer_final(struct i915_execbuffer_params *params)
 			goto err;
 	}
 
-	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), params->eb_flags);
+	trace_i915_gem_ring_dispatch(ring, seqno, params->eb_flags);
+
+	/* Seqno matches? */
+	BUG_ON(params->seqno   != ring->outstanding_lazy_seqno);
+	BUG_ON(params->request != ring->preallocated_lazy_request);
 
 	i915_gem_execbuffer_retire_commands(params->dev, params->file, ring,
 					    params->batch_obj);
 
+	/* OLS should be zero by now! */
+	BUG_ON(ring->outstanding_lazy_seqno);
+	BUG_ON(ring->preallocated_lazy_request);
+
 err:
+	if (ret) {
+		/* Reset the OLS ready to try again later. */
+		kfree(ring->preallocated_lazy_request);
+		ring->preallocated_lazy_request = NULL;
+		ring->outstanding_lazy_seqno    = 0;
+	}
+
 	/* intel_gpu_busy should also get a ref, so it will free when the device
 	 * is really idle. */
 	intel_runtime_pm_put(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 7c88a26..e62254a 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -42,6 +42,8 @@ struct i915_execbuffer_params {
 	uint32_t                        mask;
 	int                             mode;
 	struct intel_context            *ctx;
+	int                             seqno;
+	struct drm_i915_gem_request     *request;
 	uint32_t                        scheduler_index;
 };
 
-- 
1.7.9.5