[Intel-gfx] [PATCH v3] drm/i915/execlists: Move WA_TAIL_DWORDS to callee

Fri Feb 5 22:56:51 UTC 2016

On Fri, Feb 5, 2016 at 11:31 AM, Dave Gordon <david.s.gordon at intel.com> wrote:
> From: Chris Wilson <chris at chris-wilson.co.uk>
>
> Currently emit-request starts writing to the ring and reserves space
> for a workaround to be emitted later whilst submitting the request. It
> is easier to read if the caller only allocates sufficient space for
> its access (then the reader can quickly verify that the ring begin
> allocates the exact space for the number of dwords emitted) and closes
> the access to the ring. During submit, if we need to add the workaround,
> we can reacquire ring access, in the assurance that we reserved space
> for ourselves when beginning the request.
>
> v3:
>     Reinstated #define for WA_TAIL_DWORDS so we could accommodate GPUs
>     that required different amounts of padding, generalised NOOP fill
>     [Rodrigo Vivi], added W/A space to reserved amount and updated
>     code comments [Dave Gordon],
>
> Originally-by: Rodrigo Vivi <rodrigo.vivi at gmail.com>
> Rewritten-by: Chris Wilson <chris at chris-wilson.co.uk>
> Further-tweaked-by: Dave Gordon <david.s.gordon at intel.com>
>
> Signed-off-by: Dave Gordon <david.s.gordon at intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi at gmail.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Dave Gordon <david.s.gordon at intel.com>
> Cc: Ben Widawsky <benjamin.widawsky at intel.com>
> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 80 ++++++++++++++++++++++++----------------
>  1 file changed, 49 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 3a03646..8b278f1 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -225,6 +225,13 @@ enum {
>  #define GEN8_CTX_ID_SHIFT 32
>  #define CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT  0x17
>
> +/*
> + * Reserve space for 2 NOOPs at the end of each request,
> + * to be used as a workaround for not being allowed to
> + * do lite restore with HEAD==TAIL (WaIdleLiteRestore).
> + */
> +#define WA_TAIL_DWORDS 2

This patch really organize things better, but I still don't like the
WA_TAIL_DWORDS hardcoded here instead in a place where I can switch
for different platofms.

> +
>  static int intel_lr_context_pin(struct intel_context *ctx,
>                                 struct intel_engine_cs *engine);
>  static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
> @@ -462,10 +469,9 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
>                  */
>                 if (req0->elsp_submitted) {
>                         /*
> -                        * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL
> -                        * as we resubmit the request. See gen8_emit_request()
> -                        * for where we prepare the padding after the end of the
> -                        * request.
> +                        * Consume the W/A NOOPs to prevent ring:HEAD == req:TAIL as
> +                        * we resubmit the request. See intel_logical_ring_submit()
> +                        * where we prepare the padding after the end of the request.
>                          */
>                         struct intel_ringbuffer *ringbuf;
>
> @@ -752,33 +758,47 @@ static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
>  }
>
>  /*
> - * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload
> - * @request: Request to advance the logical ringbuffer of.
> + * intel_logical_ring_submit() - submit the workload (to GuC or execlist queue)
> + * @request: Request to submit
>   *
> - * The tail is updated in our logical ringbuffer struct, not in the actual context. What
> - * really happens during submission is that the context and current tail will be placed
> - * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
> - * point, the tail *inside* the context is updated and the ELSP written to.
> + * The tail is updated in our logical ringbuffer struct, not in the actual
> + * context. What really happens during submission is that the context and
> + * current tail will be placed on a queue waiting for the ELSP to be ready
> + * to accept a new context submission. At that point, the tail *inside* the
> + * context is updated and the ELSP written to by the submitting agent i.e.
> + * either the driver (in execlist mode), or the GuC (in GuC-submission mode).
>   */
>  static int
> -intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
> +intel_logical_ring_submit(struct drm_i915_gem_request *request)
>  {
>         struct intel_ringbuffer *ringbuf = request->ringbuf;
>         struct drm_i915_private *dev_priv = request->i915;
>         struct intel_engine_cs *engine = request->ring;
>
> -       intel_logical_ring_advance(ringbuf);
>         request->tail = ringbuf->tail;
>
>         /*
> -        * Here we add two extra NOOPs as padding to avoid
> -        * lite restore of a context with HEAD==TAIL.
> -        *
> -        * Caller must reserve WA_TAIL_DWORDS for us!
> +        * Fill in a few NOOPs after the end of the request proper,
> +        * as a buffer between requests to be used as a workaround
> +        * for not being allowed to do lite restore with HEAD==TAIL.
> +        * (WaIdleLiteRestore). These words may be consumed by the
> +        * submission mechanism if a context is *re*submitted while
> +        * (apparently) still active; otherwise, they will be left
> +        * as a harmless preamble to the next request.
>          */
> -       intel_logical_ring_emit(ringbuf, MI_NOOP);
> -       intel_logical_ring_emit(ringbuf, MI_NOOP);
> -       intel_logical_ring_advance(ringbuf);
> +       if (WA_TAIL_DWORDS > 0) {
> +               int ret, i;
> +
> +               /* Safe because we reserved the space earlier */
> +               ret = intel_logical_ring_begin(request, WA_TAIL_DWORDS);
> +               if (WARN_ON(ret != 0))
> +                       return ret;
> +
> +               for (i = 0; i < WA_TAIL_DWORDS; ++i)
> +                       intel_logical_ring_emit(ringbuf, MI_NOOP);
> +
> +               intel_logical_ring_advance(ringbuf);
> +       }
>
>         if (intel_ring_stopped(engine))
>                 return 0;
> @@ -907,7 +927,8 @@ int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request)
>          * adding any commands to it then there might not actually be
>          * sufficient room for the submission commands.
>          */
> -       intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST);
> +       intel_ring_reserved_space_reserve(request->ringbuf,
> +                       MIN_SPACE_FOR_ADD_REQUEST + WA_TAIL_DWORDS);
>
>         return intel_logical_ring_begin(request, 0);
>  }
> @@ -1875,13 +1896,6 @@ static void bxt_a_set_seqno(struct intel_engine_cs *ring, u32 seqno)
>         intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
>  }
>
> -/*
> - * Reserve space for 2 NOOPs at the end of each request to be
> - * used as a workaround for not being allowed to do lite
> - * restore with HEAD==TAIL (WaIdleLiteRestore).
> - */
> -#define WA_TAIL_DWORDS 2
> -
>  static inline u32 hws_seqno_address(struct intel_engine_cs *engine)
>  {
>         return engine->status_page.gfx_addr + I915_GEM_HWS_INDEX_ADDR;
> @@ -1892,7 +1906,7 @@ static int gen8_emit_request(struct drm_i915_gem_request *request)
>         struct intel_ringbuffer *ringbuf = request->ringbuf;
>         int ret;
>
> -       ret = intel_logical_ring_begin(request, 6 + WA_TAIL_DWORDS);
> +       ret = intel_logical_ring_begin(request, 6);
>         if (ret)
>                 return ret;
>
> @@ -1908,7 +1922,9 @@ static int gen8_emit_request(struct drm_i915_gem_request *request)
>         intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request));
>         intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
>         intel_logical_ring_emit(ringbuf, MI_NOOP);
> -       return intel_logical_ring_advance_and_submit(request);
> +       intel_logical_ring_advance(ringbuf);
> +
> +       return intel_logical_ring_submit(request);
>  }
>
>  static int gen8_emit_request_render(struct drm_i915_gem_request *request)
> @@ -1916,7 +1932,7 @@ static int gen8_emit_request_render(struct drm_i915_gem_request *request)
>         struct intel_ringbuffer *ringbuf = request->ringbuf;
>         int ret;
>
> -       ret = intel_logical_ring_begin(request, 6 + WA_TAIL_DWORDS);
> +       ret = intel_logical_ring_begin(request, 6);
>         if (ret)
>                 return ret;
>
> @@ -1933,7 +1949,9 @@ static int gen8_emit_request_render(struct drm_i915_gem_request *request)
>         intel_logical_ring_emit(ringbuf, 0);
>         intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request));
>         intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
> -       return intel_logical_ring_advance_and_submit(request);
> +       intel_logical_ring_advance(ringbuf);
> +
> +       return intel_logical_ring_submit(request);
>  }
>
>  static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req)
> --
> 1.9.1
>

-- 
Rodrigo Vivi
Blog: http://blog.vivi.eng.br