[Intel-gfx] [PATCH 21/43] drm/i915/bdw: Emission of requests with logical rings
Daniel Vetter
daniel at ffwll.ch
Mon Aug 11 22:56:47 CEST 2014
On Thu, Jul 24, 2014 at 05:04:29PM +0100, Thomas Daniel wrote:
> From: Oscar Mateo <oscar.mateo at intel.com>
>
> On a previous iteration of this patch, I created an Execlists
> version of __i915_add_request and asbtracted it away as a
> vfunc. Daniel Vetter wondered then why that was needed:
>
> "with the clean split in command submission I expect every
> function to know wether it'll submit to an lrc (everything in
> intel_lrc.c) or wether it'll submit to a legacy ring (existing
> code), so I don't see a need for an add_request vfunc."
>
> The honest, hairy truth is that this patch is the glue keeping
> the whole logical ring puzzle together:
Oops, I didn't spot this and it's indeed not terribly pretty.
>
> - i915_add_request is used by intel_ring_idle, which in turn is
> used by i915_gpu_idle, which in turn is used in several places
> inside the eviction and gtt codes.
This should probably be folded in with the lrc specific version of
stop_rings and so should work out.
> - Also, it is used by i915_gem_check_olr, which is littered all
> over i915_gem.c
We now always preallocate the request struct, so olr is officially dead.
Well almost, except for non-execbuf stuff that we emit through the rings.
Which is nothing for lrc/execlist mode.
Also there's the icky-bitty problem with ringbuf->ctx which makes this
patch not apply any more. I think we need to revise or at least discuss a
bit.
> - ...
>
> If I were to duplicate all the code that directly or indirectly
> uses __i915_add_request, I'll end up creating a separate driver.
>
> To show the differences between the existing legacy version and
> the new Execlists one, this time I have special-cased
> __i915_add_request instead of adding an add_request vfunc. I
> hope this helps to untangle this Gordian knot.
>
> Signed-off-by: Oscar Mateo <oscar.mateo at intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem.c | 72 ++++++++++++++++++++++++++++----------
> drivers/gpu/drm/i915/intel_lrc.c | 30 +++++++++++++---
> drivers/gpu/drm/i915/intel_lrc.h | 1 +
> 3 files changed, 80 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 9560b40..1c83b9c 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2327,10 +2327,21 @@ int __i915_add_request(struct intel_engine_cs *ring,
> {
> struct drm_i915_private *dev_priv = ring->dev->dev_private;
> struct drm_i915_gem_request *request;
> + struct intel_ringbuffer *ringbuf;
> u32 request_ring_position, request_start;
> int ret;
>
> - request_start = intel_ring_get_tail(ring->buffer);
> + request = ring->preallocated_lazy_request;
> + if (WARN_ON(request == NULL))
> + return -ENOMEM;
> +
> + if (i915.enable_execlists) {
> + struct intel_context *ctx = request->ctx;
> + ringbuf = ctx->engine[ring->id].ringbuf;
> + } else
> + ringbuf = ring->buffer;
> +
> + request_start = intel_ring_get_tail(ringbuf);
> /*
> * Emit any outstanding flushes - execbuf can fail to emit the flush
> * after having emitted the batchbuffer command. Hence we need to fix
> @@ -2338,24 +2349,32 @@ int __i915_add_request(struct intel_engine_cs *ring,
> * is that the flush _must_ happen before the next request, no matter
> * what.
> */
> - ret = intel_ring_flush_all_caches(ring);
> - if (ret)
> - return ret;
> -
> - request = ring->preallocated_lazy_request;
> - if (WARN_ON(request == NULL))
> - return -ENOMEM;
> + if (i915.enable_execlists) {
> + ret = logical_ring_flush_all_caches(ringbuf);
> + if (ret)
> + return ret;
> + } else {
> + ret = intel_ring_flush_all_caches(ring);
> + if (ret)
> + return ret;
> + }
>
> /* Record the position of the start of the request so that
> * should we detect the updated seqno part-way through the
> * GPU processing the request, we never over-estimate the
> * position of the head.
> */
> - request_ring_position = intel_ring_get_tail(ring->buffer);
> + request_ring_position = intel_ring_get_tail(ringbuf);
>
> - ret = ring->add_request(ring);
> - if (ret)
> - return ret;
> + if (i915.enable_execlists) {
> + ret = ring->emit_request(ringbuf);
> + if (ret)
> + return ret;
> + } else {
> + ret = ring->add_request(ring);
> + if (ret)
> + return ret;
> + }
>
> request->seqno = intel_ring_get_seqno(ring);
> request->ring = ring;
> @@ -2370,12 +2389,14 @@ int __i915_add_request(struct intel_engine_cs *ring,
> */
> request->batch_obj = obj;
>
> - /* Hold a reference to the current context so that we can inspect
> - * it later in case a hangcheck error event fires.
> - */
> - request->ctx = ring->last_context;
> - if (request->ctx)
> - i915_gem_context_reference(request->ctx);
> + if (!i915.enable_execlists) {
> + /* Hold a reference to the current context so that we can inspect
> + * it later in case a hangcheck error event fires.
> + */
> + request->ctx = ring->last_context;
> + if (request->ctx)
> + i915_gem_context_reference(request->ctx);
> + }
>
> request->emitted_jiffies = jiffies;
> list_add_tail(&request->list, &ring->request_list);
> @@ -2630,6 +2651,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
>
> while (!list_empty(&ring->request_list)) {
> struct drm_i915_gem_request *request;
> + struct intel_ringbuffer *ringbuf;
>
> request = list_first_entry(&ring->request_list,
> struct drm_i915_gem_request,
> @@ -2639,12 +2661,24 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
> break;
>
> trace_i915_gem_request_retire(ring, request->seqno);
> +
> + /* This is one of the few common intersection points
> + * between legacy ringbuffer submission and execlists:
> + * we need to tell them apart in order to find the correct
> + * ringbuffer to which the request belongs to.
> + */
> + if (i915.enable_execlists) {
> + struct intel_context *ctx = request->ctx;
> + ringbuf = ctx->engine[ring->id].ringbuf;
> + } else
> + ringbuf = ring->buffer;
> +
> /* We know the GPU must have read the request to have
> * sent us the seqno + interrupt, so use the position
> * of tail of the request to update the last known position
> * of the GPU head.
> */
> - ring->buffer->last_retired_head = request->tail;
> + ringbuf->last_retired_head = request->tail;
>
> i915_gem_free_request(request);
> }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 5dd63d6..dcf59c6 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -106,6 +106,22 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring)
> /* TODO */
> }
>
> +int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf)
> +{
> + struct intel_engine_cs *ring = ringbuf->ring;
> + int ret;
> +
> + if (!ring->gpu_caches_dirty)
> + return 0;
> +
> + ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS);
> + if (ret)
> + return ret;
> +
> + ring->gpu_caches_dirty = false;
> + return 0;
> +}
> +
> void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
> {
> intel_logical_ring_advance(ringbuf);
> @@ -116,7 +132,8 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
> /* TODO: how to submit a context to the ELSP is not here yet */
> }
>
> -static int logical_ring_alloc_seqno(struct intel_engine_cs *ring)
> +static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
> + struct intel_context *ctx)
> {
> if (ring->outstanding_lazy_seqno)
> return 0;
> @@ -128,6 +145,13 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs *ring)
> if (request == NULL)
> return -ENOMEM;
>
> + /* Hold a reference to the context this request belongs to
> + * (we will need it when the time comes to emit/retire the
> + * request).
> + */
> + request->ctx = ctx;
> + i915_gem_context_reference(request->ctx);
> +
> ring->preallocated_lazy_request = request;
> }
>
> @@ -165,8 +189,6 @@ static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf, int bytes
> if (ret)
> return ret;
>
> - /* TODO: make sure we update the right ringbuffer's last_retired_head
> - * when retiring requests */
> i915_gem_retire_requests_ring(ring);
> ringbuf->head = ringbuf->last_retired_head;
> ringbuf->last_retired_head = -1;
> @@ -291,7 +313,7 @@ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
> return ret;
>
> /* Preallocate the olr before touching the ring */
> - ret = logical_ring_alloc_seqno(ring);
> + ret = logical_ring_alloc_seqno(ring, ringbuf->ctx);
Ok, this is hairy. Really hairy, since this uses ringbuf->ctx. Not sure we
really want this or need this.
> if (ret)
> return ret;
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 16798b6..696e09e 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -29,6 +29,7 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring);
> void intel_logical_ring_cleanup(struct intel_engine_cs *ring);
> int intel_logical_rings_init(struct drm_device *dev);
>
> +int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf);
> void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf);
> static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf)
> {
> --
> 1.7.9.5
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
More information about the Intel-gfx
mailing list