[Intel-gfx] [PATCH] drm/i915: Make closing request flush mandatory

Wed Jun 13 12:54:01 UTC 2018

Quoting Chris Wilson (2018-06-12 13:51:35)
> For symmetry, simplicity and ensuring the request is always truly idle
> upon its completion, always emit the closing flush prior to emitting the
> request breadcrumb. Previously, we would only emit the flush if we had
> started a user batch, but this just leaves all the other paths open to
> speculation (do they affect the GPU caches or not?) With mm switching, a
> key requirement is that the GPU is flushed and invalidated before hand,
> so for absolute safety, we want that closing flush be mandatory.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

No word about perf impact? The patch description matches what it does.

Assuming we're not murdering any testcases;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>

Regards, Joonas

> ---
>  drivers/gpu/drm/i915/i915_gem.c                |  4 ++--
>  drivers/gpu/drm/i915/i915_gem_context.c        |  9 +--------
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c     |  4 ++--
>  drivers/gpu/drm/i915/i915_request.c            | 18 ++----------------
>  drivers/gpu/drm/i915/i915_request.h            |  4 +---
>  drivers/gpu/drm/i915/selftests/huge_pages.c    |  2 +-
>  .../drm/i915/selftests/i915_gem_coherency.c    |  4 ++--
>  .../gpu/drm/i915/selftests/i915_gem_context.c  |  4 ++--
>  drivers/gpu/drm/i915/selftests/i915_request.c  |  2 +-
>  .../gpu/drm/i915/selftests/intel_hangcheck.c   | 16 ++++++++--------
>  drivers/gpu/drm/i915/selftests/intel_lrc.c     |  2 +-
>  .../gpu/drm/i915/selftests/intel_workarounds.c |  2 +-
>  12 files changed, 24 insertions(+), 47 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 93efd92362db..8dd4d35655af 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3213,7 +3213,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv,
>                         rq = i915_request_alloc(engine,
>                                                 dev_priv->kernel_context);
>                         if (!IS_ERR(rq))
> -                               __i915_request_add(rq, false);
> +                               i915_request_add(rq);
>                 }
>         }
>  
> @@ -5332,7 +5332,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
>                 if (engine->init_context)
>                         err = engine->init_context(rq);
>  
> -               __i915_request_add(rq, true);
> +               i915_request_add(rq);
>                 if (err)
>                         goto err_active;
>         }
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index b2c7ac1b074d..ef6ea4bcd773 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -700,14 +700,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
>                         i915_timeline_sync_set(rq->timeline, &prev->fence);
>                 }
>  
> -               /*
> -                * Force a flush after the switch to ensure that all rendering
> -                * and operations prior to switching to the kernel context hits
> -                * memory. This should be guaranteed by the previous request,
> -                * but an extra layer of paranoia before we declare the system
> -                * idle (on suspend etc) is advisable!
> -                */
> -               __i915_request_add(rq, true);
> +               i915_request_add(rq);
>         }
>  
>         return 0;
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 2d2eb3075960..60dc2a865f5f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -921,7 +921,7 @@ static void reloc_gpu_flush(struct reloc_cache *cache)
>         i915_gem_object_unpin_map(cache->rq->batch->obj);
>         i915_gem_chipset_flush(cache->rq->i915);
>  
> -       __i915_request_add(cache->rq, true);
> +       i915_request_add(cache->rq);
>         cache->rq = NULL;
>  }
>  
> @@ -2438,7 +2438,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>         trace_i915_request_queue(eb.request, eb.batch_flags);
>         err = eb_submit(&eb);
>  err_request:
> -       __i915_request_add(eb.request, err == 0);
> +       i915_request_add(eb.request);
>         add_to_client(eb.request, file);
>  
>         if (fences)
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 9092f5464c24..e1dbb544046f 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1018,14 +1018,13 @@ i915_request_await_object(struct i915_request *to,
>   * request is not being tracked for completion but the work itself is
>   * going to happen on the hardware. This would be a Bad Thing(tm).
>   */
> -void __i915_request_add(struct i915_request *request, bool flush_caches)
> +void i915_request_add(struct i915_request *request)
>  {
>         struct intel_engine_cs *engine = request->engine;
>         struct i915_timeline *timeline = request->timeline;
>         struct intel_ring *ring = request->ring;
>         struct i915_request *prev;
>         u32 *cs;
> -       int err;
>  
>         GEM_TRACE("%s fence %llx:%d\n",
>                   engine->name, request->fence.context, request->fence.seqno);
> @@ -1046,20 +1045,7 @@ void __i915_request_add(struct i915_request *request, bool flush_caches)
>          * know that it is time to use that space up.
>          */
>         request->reserved_space = 0;
> -
> -       /*
> -        * Emit any outstanding flushes - execbuf can fail to emit the flush
> -        * after having emitted the batchbuffer command. Hence we need to fix
> -        * things up similar to emitting the lazy request. The difference here
> -        * is that the flush _must_ happen before the next request, no matter
> -        * what.
> -        */
> -       if (flush_caches) {
> -               err = engine->emit_flush(request, EMIT_FLUSH);
> -
> -               /* Not allowed to fail! */
> -               WARN(err, "engine->emit_flush() failed: %d!\n", err);
> -       }
> +       engine->emit_flush(request, EMIT_FLUSH);
>  
>         /*
>          * Record the position of the start of the breadcrumb so that
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 0e9aba53d0e4..7ee220ded9c9 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -253,9 +253,7 @@ int i915_request_await_object(struct i915_request *to,
>  int i915_request_await_dma_fence(struct i915_request *rq,
>                                  struct dma_fence *fence);
>  
> -void __i915_request_add(struct i915_request *rq, bool flush_caches);
> -#define i915_request_add(rq) \
> -       __i915_request_add(rq, false)
> +void i915_request_add(struct i915_request *rq);
>  
>  void __i915_request_submit(struct i915_request *request);
>  void i915_request_submit(struct i915_request *request);
> diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
> index 7846ea4a99bc..fbe4324116d7 100644
> --- a/drivers/gpu/drm/i915/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
> @@ -1003,7 +1003,7 @@ static int gpu_write(struct i915_vma *vma,
>         reservation_object_unlock(vma->resv);
>  
>  err_request:
> -       __i915_request_add(rq, err == 0);
> +       i915_request_add(rq);
>  
>         return err;
>  }
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> index 340a98c0c804..a4900091ae3d 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> @@ -199,7 +199,7 @@ static int gpu_set(struct drm_i915_gem_object *obj,
>  
>         cs = intel_ring_begin(rq, 4);
>         if (IS_ERR(cs)) {
> -               __i915_request_add(rq, false);
> +               i915_request_add(rq);
>                 i915_vma_unpin(vma);
>                 return PTR_ERR(cs);
>         }
> @@ -229,7 +229,7 @@ static int gpu_set(struct drm_i915_gem_object *obj,
>         reservation_object_add_excl_fence(obj->resv, &rq->fence);
>         reservation_object_unlock(obj->resv);
>  
> -       __i915_request_add(rq, true);
> +       i915_request_add(rq);
>  
>         return 0;
>  }
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 708e8d721448..836f1af8b833 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -182,12 +182,12 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
>         reservation_object_add_excl_fence(obj->resv, &rq->fence);
>         reservation_object_unlock(obj->resv);
>  
> -       __i915_request_add(rq, true);
> +       i915_request_add(rq);
>  
>         return 0;
>  
>  err_request:
> -       __i915_request_add(rq, false);
> +       i915_request_add(rq);
>  err_batch:
>         i915_vma_unpin(batch);
>  err_vma:
> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
> index a3a89aadeccb..f5d00332bb31 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_request.c
> @@ -466,7 +466,7 @@ empty_request(struct intel_engine_cs *engine,
>                 goto out_request;
>  
>  out_request:
> -       __i915_request_add(request, err == 0);
> +       i915_request_add(request);
>         return err ? ERR_PTR(err) : request;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> index 390a157b37c3..fe7d3190ebfe 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> @@ -245,7 +245,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
>  
>         err = emit_recurse_batch(h, rq);
>         if (err) {
> -               __i915_request_add(rq, false);
> +               i915_request_add(rq);
>                 return ERR_PTR(err);
>         }
>  
> @@ -318,7 +318,7 @@ static int igt_hang_sanitycheck(void *arg)
>                 *h.batch = MI_BATCH_BUFFER_END;
>                 i915_gem_chipset_flush(i915);
>  
> -               __i915_request_add(rq, true);
> +               i915_request_add(rq);
>  
>                 timeout = i915_request_wait(rq,
>                                             I915_WAIT_LOCKED,
> @@ -464,7 +464,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
>                                 }
>  
>                                 i915_request_get(rq);
> -                               __i915_request_add(rq, true);
> +                               i915_request_add(rq);
>                                 mutex_unlock(&i915->drm.struct_mutex);
>  
>                                 if (!wait_until_running(&h, rq)) {
> @@ -742,7 +742,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
>                                 }
>  
>                                 i915_request_get(rq);
> -                               __i915_request_add(rq, true);
> +                               i915_request_add(rq);
>                                 mutex_unlock(&i915->drm.struct_mutex);
>  
>                                 if (!wait_until_running(&h, rq)) {
> @@ -942,7 +942,7 @@ static int igt_wait_reset(void *arg)
>         }
>  
>         i915_request_get(rq);
> -       __i915_request_add(rq, true);
> +       i915_request_add(rq);
>  
>         if (!wait_until_running(&h, rq)) {
>                 struct drm_printer p = drm_info_printer(i915->drm.dev);
> @@ -1037,7 +1037,7 @@ static int igt_reset_queue(void *arg)
>                 }
>  
>                 i915_request_get(prev);
> -               __i915_request_add(prev, true);
> +               i915_request_add(prev);
>  
>                 count = 0;
>                 do {
> @@ -1051,7 +1051,7 @@ static int igt_reset_queue(void *arg)
>                         }
>  
>                         i915_request_get(rq);
> -                       __i915_request_add(rq, true);
> +                       i915_request_add(rq);
>  
>                         /*
>                          * XXX We don't handle resetting the kernel context
> @@ -1184,7 +1184,7 @@ static int igt_handle_error(void *arg)
>         }
>  
>         i915_request_get(rq);
> -       __i915_request_add(rq, true);
> +       i915_request_add(rq);
>  
>         if (!wait_until_running(&h, rq)) {
>                 struct drm_printer p = drm_info_printer(i915->drm.dev);
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> index 0b6da08c8cae..ea27c7cfbf96 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> @@ -155,7 +155,7 @@ spinner_create_request(struct spinner *spin,
>  
>         err = emit_recurse_batch(spin, rq, arbitration_command);
>         if (err) {
> -               __i915_request_add(rq, false);
> +               i915_request_add(rq);
>                 return ERR_PTR(err);
>         }
>  
> diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> index f1cfb0fb6bea..e1ea2d2bedd2 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> @@ -75,7 +75,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
>         i915_gem_object_get(result);
>         i915_gem_object_set_active_reference(result);
>  
> -       __i915_request_add(rq, true);
> +       i915_request_add(rq);
>         i915_vma_unpin(vma);
>  
>         return result;
> -- 
> 2.17.1
>