[Intel-gfx] [PATCH] drm/i915: Make closing request flush mandatory
Joonas Lahtinen
joonas.lahtinen at linux.intel.com
Wed Jun 13 12:54:01 UTC 2018
Quoting Chris Wilson (2018-06-12 13:51:35)
> For symmetry, simplicity and ensuring the request is always truly idle
> upon its completion, always emit the closing flush prior to emitting the
> request breadcrumb. Previously, we would only emit the flush if we had
> started a user batch, but this just leaves all the other paths open to
> speculation (do they affect the GPU caches or not?) With mm switching, a
> key requirement is that the GPU is flushed and invalidated before hand,
> so for absolute safety, we want that closing flush be mandatory.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
No word about perf impact? The patch description matches what it does.
Assuming we're not murdering any testcases;
Reviewed-by: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Regards, Joonas
> ---
> drivers/gpu/drm/i915/i915_gem.c | 4 ++--
> drivers/gpu/drm/i915/i915_gem_context.c | 9 +--------
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 ++--
> drivers/gpu/drm/i915/i915_request.c | 18 ++----------------
> drivers/gpu/drm/i915/i915_request.h | 4 +---
> drivers/gpu/drm/i915/selftests/huge_pages.c | 2 +-
> .../drm/i915/selftests/i915_gem_coherency.c | 4 ++--
> .../gpu/drm/i915/selftests/i915_gem_context.c | 4 ++--
> drivers/gpu/drm/i915/selftests/i915_request.c | 2 +-
> .../gpu/drm/i915/selftests/intel_hangcheck.c | 16 ++++++++--------
> drivers/gpu/drm/i915/selftests/intel_lrc.c | 2 +-
> .../gpu/drm/i915/selftests/intel_workarounds.c | 2 +-
> 12 files changed, 24 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 93efd92362db..8dd4d35655af 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3213,7 +3213,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv,
> rq = i915_request_alloc(engine,
> dev_priv->kernel_context);
> if (!IS_ERR(rq))
> - __i915_request_add(rq, false);
> + i915_request_add(rq);
> }
> }
>
> @@ -5332,7 +5332,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
> if (engine->init_context)
> err = engine->init_context(rq);
>
> - __i915_request_add(rq, true);
> + i915_request_add(rq);
> if (err)
> goto err_active;
> }
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index b2c7ac1b074d..ef6ea4bcd773 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -700,14 +700,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
> i915_timeline_sync_set(rq->timeline, &prev->fence);
> }
>
> - /*
> - * Force a flush after the switch to ensure that all rendering
> - * and operations prior to switching to the kernel context hits
> - * memory. This should be guaranteed by the previous request,
> - * but an extra layer of paranoia before we declare the system
> - * idle (on suspend etc) is advisable!
> - */
> - __i915_request_add(rq, true);
> + i915_request_add(rq);
> }
>
> return 0;
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 2d2eb3075960..60dc2a865f5f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -921,7 +921,7 @@ static void reloc_gpu_flush(struct reloc_cache *cache)
> i915_gem_object_unpin_map(cache->rq->batch->obj);
> i915_gem_chipset_flush(cache->rq->i915);
>
> - __i915_request_add(cache->rq, true);
> + i915_request_add(cache->rq);
> cache->rq = NULL;
> }
>
> @@ -2438,7 +2438,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> trace_i915_request_queue(eb.request, eb.batch_flags);
> err = eb_submit(&eb);
> err_request:
> - __i915_request_add(eb.request, err == 0);
> + i915_request_add(eb.request);
> add_to_client(eb.request, file);
>
> if (fences)
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 9092f5464c24..e1dbb544046f 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1018,14 +1018,13 @@ i915_request_await_object(struct i915_request *to,
> * request is not being tracked for completion but the work itself is
> * going to happen on the hardware. This would be a Bad Thing(tm).
> */
> -void __i915_request_add(struct i915_request *request, bool flush_caches)
> +void i915_request_add(struct i915_request *request)
> {
> struct intel_engine_cs *engine = request->engine;
> struct i915_timeline *timeline = request->timeline;
> struct intel_ring *ring = request->ring;
> struct i915_request *prev;
> u32 *cs;
> - int err;
>
> GEM_TRACE("%s fence %llx:%d\n",
> engine->name, request->fence.context, request->fence.seqno);
> @@ -1046,20 +1045,7 @@ void __i915_request_add(struct i915_request *request, bool flush_caches)
> * know that it is time to use that space up.
> */
> request->reserved_space = 0;
> -
> - /*
> - * Emit any outstanding flushes - execbuf can fail to emit the flush
> - * after having emitted the batchbuffer command. Hence we need to fix
> - * things up similar to emitting the lazy request. The difference here
> - * is that the flush _must_ happen before the next request, no matter
> - * what.
> - */
> - if (flush_caches) {
> - err = engine->emit_flush(request, EMIT_FLUSH);
> -
> - /* Not allowed to fail! */
> - WARN(err, "engine->emit_flush() failed: %d!\n", err);
> - }
> + engine->emit_flush(request, EMIT_FLUSH);
>
> /*
> * Record the position of the start of the breadcrumb so that
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 0e9aba53d0e4..7ee220ded9c9 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -253,9 +253,7 @@ int i915_request_await_object(struct i915_request *to,
> int i915_request_await_dma_fence(struct i915_request *rq,
> struct dma_fence *fence);
>
> -void __i915_request_add(struct i915_request *rq, bool flush_caches);
> -#define i915_request_add(rq) \
> - __i915_request_add(rq, false)
> +void i915_request_add(struct i915_request *rq);
>
> void __i915_request_submit(struct i915_request *request);
> void i915_request_submit(struct i915_request *request);
> diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
> index 7846ea4a99bc..fbe4324116d7 100644
> --- a/drivers/gpu/drm/i915/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
> @@ -1003,7 +1003,7 @@ static int gpu_write(struct i915_vma *vma,
> reservation_object_unlock(vma->resv);
>
> err_request:
> - __i915_request_add(rq, err == 0);
> + i915_request_add(rq);
>
> return err;
> }
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> index 340a98c0c804..a4900091ae3d 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> @@ -199,7 +199,7 @@ static int gpu_set(struct drm_i915_gem_object *obj,
>
> cs = intel_ring_begin(rq, 4);
> if (IS_ERR(cs)) {
> - __i915_request_add(rq, false);
> + i915_request_add(rq);
> i915_vma_unpin(vma);
> return PTR_ERR(cs);
> }
> @@ -229,7 +229,7 @@ static int gpu_set(struct drm_i915_gem_object *obj,
> reservation_object_add_excl_fence(obj->resv, &rq->fence);
> reservation_object_unlock(obj->resv);
>
> - __i915_request_add(rq, true);
> + i915_request_add(rq);
>
> return 0;
> }
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 708e8d721448..836f1af8b833 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -182,12 +182,12 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
> reservation_object_add_excl_fence(obj->resv, &rq->fence);
> reservation_object_unlock(obj->resv);
>
> - __i915_request_add(rq, true);
> + i915_request_add(rq);
>
> return 0;
>
> err_request:
> - __i915_request_add(rq, false);
> + i915_request_add(rq);
> err_batch:
> i915_vma_unpin(batch);
> err_vma:
> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
> index a3a89aadeccb..f5d00332bb31 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_request.c
> @@ -466,7 +466,7 @@ empty_request(struct intel_engine_cs *engine,
> goto out_request;
>
> out_request:
> - __i915_request_add(request, err == 0);
> + i915_request_add(request);
> return err ? ERR_PTR(err) : request;
> }
>
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> index 390a157b37c3..fe7d3190ebfe 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> @@ -245,7 +245,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
>
> err = emit_recurse_batch(h, rq);
> if (err) {
> - __i915_request_add(rq, false);
> + i915_request_add(rq);
> return ERR_PTR(err);
> }
>
> @@ -318,7 +318,7 @@ static int igt_hang_sanitycheck(void *arg)
> *h.batch = MI_BATCH_BUFFER_END;
> i915_gem_chipset_flush(i915);
>
> - __i915_request_add(rq, true);
> + i915_request_add(rq);
>
> timeout = i915_request_wait(rq,
> I915_WAIT_LOCKED,
> @@ -464,7 +464,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
> }
>
> i915_request_get(rq);
> - __i915_request_add(rq, true);
> + i915_request_add(rq);
> mutex_unlock(&i915->drm.struct_mutex);
>
> if (!wait_until_running(&h, rq)) {
> @@ -742,7 +742,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
> }
>
> i915_request_get(rq);
> - __i915_request_add(rq, true);
> + i915_request_add(rq);
> mutex_unlock(&i915->drm.struct_mutex);
>
> if (!wait_until_running(&h, rq)) {
> @@ -942,7 +942,7 @@ static int igt_wait_reset(void *arg)
> }
>
> i915_request_get(rq);
> - __i915_request_add(rq, true);
> + i915_request_add(rq);
>
> if (!wait_until_running(&h, rq)) {
> struct drm_printer p = drm_info_printer(i915->drm.dev);
> @@ -1037,7 +1037,7 @@ static int igt_reset_queue(void *arg)
> }
>
> i915_request_get(prev);
> - __i915_request_add(prev, true);
> + i915_request_add(prev);
>
> count = 0;
> do {
> @@ -1051,7 +1051,7 @@ static int igt_reset_queue(void *arg)
> }
>
> i915_request_get(rq);
> - __i915_request_add(rq, true);
> + i915_request_add(rq);
>
> /*
> * XXX We don't handle resetting the kernel context
> @@ -1184,7 +1184,7 @@ static int igt_handle_error(void *arg)
> }
>
> i915_request_get(rq);
> - __i915_request_add(rq, true);
> + i915_request_add(rq);
>
> if (!wait_until_running(&h, rq)) {
> struct drm_printer p = drm_info_printer(i915->drm.dev);
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> index 0b6da08c8cae..ea27c7cfbf96 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> @@ -155,7 +155,7 @@ spinner_create_request(struct spinner *spin,
>
> err = emit_recurse_batch(spin, rq, arbitration_command);
> if (err) {
> - __i915_request_add(rq, false);
> + i915_request_add(rq);
> return ERR_PTR(err);
> }
>
> diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> index f1cfb0fb6bea..e1ea2d2bedd2 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> @@ -75,7 +75,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
> i915_gem_object_get(result);
> i915_gem_object_set_active_reference(result);
>
> - __i915_request_add(rq, true);
> + i915_request_add(rq);
> i915_vma_unpin(vma);
>
> return result;
> --
> 2.17.1
>
More information about the Intel-gfx
mailing list