[Intel-gfx] [PATCH] drm/i915: Expose exec parameter to force non IA-Coherent for Gen9+
Chris Wilson
chris at chris-wilson.co.uk
Wed Jan 13 04:53:29 PST 2016
On Wed, Jan 13, 2016 at 11:44:39AM +0100, Artur Harasimiuk wrote:
> Starting from Gen9 we can use IA-Coherent caches. Coherence may be not
> required in certain cases and can be disabled in an easy way. To do this
> we can set HDC_FORCE_NON_COHERENT bit in HDC_CHICKEN0 register. This
> register is part of HW context, however it is private and cannot be
> programmed from non-privileged batch buffer.
>
> New parameter is to override default programming and allow UMD to
> decide whether IA-Coherency is not needed for submitted batch buffer.
> When flag is set KMD emits commands to disable coherency before batch
> buffer execution starts. After execution finished state is restored.
What's the usecase for batch vs context flag?
> When WaForceEnableNonCoherent is programmed, it does not make sense to
> allow for coherency because this can lead to GPU hangs. In such
> situation flag is ignored.
>
> Signed-off-by: Artur Harasimiuk <artur.harasimiuk at intel.com>
> ---
> drivers/gpu/drm/i915/i915_dma.c | 3 +++
> drivers/gpu/drm/i915/i915_drv.h | 4 ++++
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 ++++++++
> drivers/gpu/drm/i915/intel_lrc.c | 28 ++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/intel_ringbuffer.c | 7 +++++++
> include/uapi/drm/i915_drm.h | 8 +++++++-
> 6 files changed, 57 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 44a896c..79ecf20 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -172,6 +172,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
> case I915_PARAM_HAS_EXEC_SOFTPIN:
> value = 1;
> break;
> + case I915_PARAM_HAS_EXEC_FORCE_NON_COHERENT:
> + value = INTEL_INFO(dev)->gen >= 9;
This should actually report the w/a status, or else you need to provide
some other means for userspace to detect if it can successfully force
non-coherency.
> + break;
> default:
> DRM_DEBUG("Unknown parameter %d\n", param->param);
> return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 104bd18..71d739c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -886,6 +886,10 @@ struct intel_context {
> } engine[I915_NUM_RINGS];
>
> struct list_head link;
> +
> + struct {
> + unsigned int WaForceEnableNonCoherent:1;
> + } wa;
> };
>
> enum fb_op_origin {
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index d469c47..2997a58 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1400,6 +1400,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> if (!i915_gem_check_execbuffer(args))
> return -EINVAL;
>
> + if ((args->flags & I915_EXEC_FORCE_NON_COHERENT) &&
> + INTEL_INFO(dev)->gen < 9)
> + return -EINVAL;
It would seem easier to simply ignore this request in all situations
where it doesn't apply, rather than the current select few.
> ret = validate_exec_list(dev, exec, args->buffer_count);
> if (ret)
> return ret;
> @@ -1494,6 +1498,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>
> i915_gem_context_reference(ctx);
>
> + /* Clear this flag when WA is programmed */
> + if (ctx->wa.WaForceEnableNonCoherent)
> + args->flags &= ~I915_EXEC_FORCE_NON_COHERENT;
> +
> if (ctx->ppgtt)
> vm = &ctx->ppgtt->base;
> else
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index ab344e0..4482a6a 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -879,6 +879,24 @@ int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request)
> return intel_logical_ring_begin(request, 0);
> }
>
> +static inline void
> +intel_lr_emit_force_non_coherent(struct i915_execbuffer_params *params,
> + struct drm_i915_gem_execbuffer2 *args, bool force)
> +{
> + if (args->flags & I915_EXEC_FORCE_NON_COHERENT) {
> + struct intel_ringbuffer *ringbuf =
> + params->ctx->engine[params->ring->id].ringbuf;
This should be using the request->ringbuf
> +
Missing ring_begin.
> + intel_logical_ring_emit(ringbuf, MI_NOOP);
> + intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
> + intel_logical_ring_emit(ringbuf, HDC_CHICKEN0.reg);
intel_logical_ring_emit_reg(ringbuf, HDC_CHICKEN0);
> + intel_logical_ring_emit(ringbuf, force ?
> + _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT) :
> + _MASKED_BIT_DISABLE(HDC_FORCE_NON_COHERENT));
> + intel_logical_ring_advance(ringbuf);
> + }
> +}
> +
> /**
> * execlists_submission() - submit a batchbuffer for execution, Execlists style
> * @dev: DRM device.
> @@ -959,6 +977,8 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
> dev_priv->relative_constants_mode = instp_mode;
> }
>
> + intel_lr_emit_force_non_coherent(params, args, true);
This is after cache invalidation. Do you need to send another cache
invalidate?
> +
> exec_start = params->batch_obj_vm_offset +
> args->batch_start_offset;
>
> @@ -966,6 +986,8 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
> if (ret)
> return ret;
>
> + intel_lr_emit_force_non_coherent(params, args, false);
You anticipate a context switching between modes between every batch?
This goes back to whether it is a context flag vs batch flag, and
whether we should cache the register value.
> +
> trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
>
> i915_gem_execbuffer_move_to_active(vmas, params->request);
> @@ -1112,6 +1134,9 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
> struct drm_device *dev = ring->dev;
> struct drm_i915_private *dev_priv = dev->dev_private;
> struct i915_workarounds *w = &dev_priv->workarounds;
> + struct intel_context *ctx = req->ctx;
> +
> + ctx->wa.WaForceEnableNonCoherent = 0;
>
> if (w->count == 0)
> return 0;
> @@ -1129,6 +1154,9 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
> for (i = 0; i < w->count; i++) {
> intel_logical_ring_emit_reg(ringbuf, w->reg[i].addr);
> intel_logical_ring_emit(ringbuf, w->reg[i].value);
> + ctx->wa.WaForceEnableNonCoherent |=
> + (w->reg[i].addr.reg == HDC_CHICKEN0.reg) &&
> + (w->reg[i].value & HDC_FORCE_NON_COHERENT);
I don't like this second guessing of what w/a are being applied. We
could just as easily explicitly list the w/a on the engine (i.e. a
synopsis of the wa reg list, plus those setup globally) and reference
that instead of copying that flag to every context.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
More information about the Intel-gfx
mailing list