[Intel-gfx] [PATCH v2 1/2] drm/i915: Initialize bdw workarounds in logical ring mode too
Siluvery, Arun
arun.siluvery at linux.intel.com
Wed Nov 5 10:54:18 CET 2014
On 04/11/2014 19:23, Rodrigo Vivi wrote:
> These patches got listed to -collector but got a huge conflict. If it
> is still relevant please rebase it.
>
This patch is currently not relevant, rebased version is already sent to
the list for review.
https://patchwork.kernel.org/patch/5178771/
regards
Arun
> Also my bikeshed is to findo better names to help on differentiate
> them at least.
>
> On Wed, Sep 24, 2014 at 5:02 AM, Michel Thierry
> <michel.thierry at intel.com> wrote:
>> Following the legacy ring submission example, update the
>> ring->init_context() hook to support the execlist submission mode.
>>
>> Workarounds are defined in bdw_emit_workarounds(), but the emit
>> now depends on the ring submission mode.
>>
>> v2: Updated after "Cleanup pre prod workarounds"
>>
>> For: VIZ-4092
>> Signed-off-by: Michel Thierry <michel.thierry at intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_gem_context.c | 2 +-
>> drivers/gpu/drm/i915/intel_lrc.c | 66 +++++++++++++++++++++++++++++
>> drivers/gpu/drm/i915/intel_ringbuffer.c | 75 +++++++++++++++++++--------------
>> drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++++-
>> 4 files changed, 120 insertions(+), 34 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>> index 7b73b36..d1ed21a 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>> @@ -657,7 +657,7 @@ done:
>>
>> if (uninitialized) {
>> if (ring->init_context) {
>> - ret = ring->init_context(ring);
>> + ret = ring->init_context(ring->buffer);
>> if (ret)
>> DRM_ERROR("ring init context: %d\n", ret);
>> }
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index d64d518..a0aa3f0 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -1020,6 +1020,62 @@ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
>> return 0;
>> }
>>
>> +static inline void intel_logical_ring_emit_wa(struct intel_ringbuffer *ringbuf,
>> + u32 addr, u32 value)
>> +{
>> + struct intel_engine_cs *ring = ringbuf->ring;
>> + struct drm_device *dev = ring->dev;
>> + struct drm_i915_private *dev_priv = dev->dev_private;
>> +
>> + if (WARN_ON(dev_priv->num_wa_regs >= I915_MAX_WA_REGS))
>> + return;
>> +
>> + intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
>> + intel_logical_ring_emit(ringbuf, addr);
>> + intel_logical_ring_emit(ringbuf, value);
>> +
>> + dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr;
>> + dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF;
>> + /* value is updated with the status of remaining bits of this
>> + * register when it is read from debugfs file
>> + */
>> + dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value;
>> + dev_priv->num_wa_regs++;
>> +}
>> +
>> +static int bdw_init_logical_workarounds(struct intel_ringbuffer *ringbuf)
>> +{
>> + int ret;
>> + struct intel_engine_cs *ring = ringbuf->ring;
>> + struct drm_device *dev = ring->dev;
>> + struct drm_i915_private *dev_priv = dev->dev_private;
>> +
>> + /*
>> + * workarounds applied in this fn are part of register state context,
>> + * they need to be re-initialized followed by gpu reset, suspend/resume,
>> + * module reload.
>> + */
>> + dev_priv->num_wa_regs = 0;
>> + memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
>> +
>> + /*
>> + * update the number of dwords required based on the
>> + * actual number of workarounds applied
>> + */
>> + ret = intel_logical_ring_begin(ringbuf, BDW_WA_DWORDS_SIZE);
>> + if (ret)
>> + return ret;
>> +
>> + bdw_emit_workarounds(ringbuf);
>> +
>> + intel_logical_ring_advance(ringbuf);
>> +
>> + DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n",
>> + dev_priv->num_wa_regs);
>> +
>> + return 0;
>> +}
>> +
>> static int gen8_init_common_ring(struct intel_engine_cs *ring)
>> {
>> struct drm_device *dev = ring->dev;
>> @@ -1315,6 +1371,10 @@ static int logical_render_ring_init(struct drm_device *dev)
>> if (HAS_L3_DPF(dev))
>> ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
>>
>> + if (IS_BROADWELL(dev))
>> + ring->init_context = bdw_init_logical_workarounds;
>> + ring->emit_wa = intel_logical_ring_emit_wa;
>> +
>> ring->init = gen8_init_render_ring;
>> ring->cleanup = intel_fini_pipe_control;
>> ring->get_seqno = gen8_get_seqno;
>> @@ -1802,6 +1862,12 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
>> }
>>
>> if (ring->id == RCS && !ctx->rcs_initialized) {
>> + if (ring->init_context) {
>> + ret = ring->init_context(ringbuf);
>> + if (ret)
>> + DRM_ERROR("ring init context: %d\n", ret);
>> + }
>> +
>> ret = intel_lr_context_render_state_init(ring, ctx);
>> if (ret) {
>> DRM_ERROR("Init render state failed: %d\n", ret);
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> index 395f926..e6ac913 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> @@ -677,9 +677,10 @@ err:
>> return ret;
>> }
>>
>> -static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
>> +static inline void intel_ring_emit_wa(struct intel_ringbuffer *ringbuf,
>> u32 addr, u32 value)
>> {
>> + struct intel_engine_cs *ring = ringbuf->ring;
>> struct drm_device *dev = ring->dev;
>> struct drm_i915_private *dev_priv = dev->dev_private;
>>
>> @@ -701,51 +702,33 @@ static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
>> return;
>> }
>>
>> -static int bdw_init_workarounds(struct intel_engine_cs *ring)
>> +void bdw_emit_workarounds(struct intel_ringbuffer *ringbuf)
>> {
>> - int ret;
>> - struct drm_device *dev = ring->dev;
>> - struct drm_i915_private *dev_priv = dev->dev_private;
>> -
>> - /*
>> - * workarounds applied in this fn are part of register state context,
>> - * they need to be re-initialized followed by gpu reset, suspend/resume,
>> - * module reload.
>> - */
>> - dev_priv->num_wa_regs = 0;
>> - memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
>> -
>> - /*
>> - * update the number of dwords required based on the
>> - * actual number of workarounds applied
>> - */
>> - ret = intel_ring_begin(ring, 18);
>> - if (ret)
>> - return ret;
>> + struct intel_engine_cs *ring = ringbuf->ring;
>>
>> /* WaDisablePartialInstShootdown:bdw */
>> /* WaDisableThreadStallDopClockGating:bdw */
>> /* FIXME: Unclear whether we really need this on production bdw. */
>> - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
>> + ring->emit_wa(ringbuf, GEN8_ROW_CHICKEN,
>> _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
>> | STALL_DOP_GATING_DISABLE));
>>
>> /* WaDisableDopClockGating:bdw May not be needed for production */
>> - intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
>> + ring->emit_wa(ringbuf, GEN7_ROW_CHICKEN2,
>> _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
>>
>> - intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
>> + ring->emit_wa(ringbuf, HALF_SLICE_CHICKEN3,
>> _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
>>
>> /* Use Force Non-Coherent whenever executing a 3D context. This is a
>> * workaround for for a possible hang in the unlikely event a TLB
>> * invalidation occurs during a PSD flush.
>> */
>> - intel_ring_emit_wa(ring, HDC_CHICKEN0,
>> + ring->emit_wa(ringbuf, HDC_CHICKEN0,
>> _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
>>
>> /* Wa4x4STCOptimizationDisable:bdw */
>> - intel_ring_emit_wa(ring, CACHE_MODE_1,
>> + ring->emit_wa(ringbuf, CACHE_MODE_1,
>> _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
>>
>> /*
>> @@ -756,8 +739,34 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
>> * disable bit, which we don't touch here, but it's good
>> * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
>> */
>> - intel_ring_emit_wa(ring, GEN7_GT_MODE,
>> + ring->emit_wa(ringbuf, GEN7_GT_MODE,
>> GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
>> +}
>> +
>> +static int bdw_init_workarounds(struct intel_ringbuffer *ringbuf)
>> +{
>> + int ret;
>> + struct intel_engine_cs *ring = ringbuf->ring;
>> + struct drm_device *dev = ring->dev;
>> + struct drm_i915_private *dev_priv = dev->dev_private;
>> +
>> + /*
>> + * workarounds applied in this fn are part of register state context,
>> + * they need to be re-initialized followed by gpu reset, suspend/resume,
>> + * module reload.
>> + */
>> + dev_priv->num_wa_regs = 0;
>> + memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
>> +
>> + /*
>> + * update the number of dwords required based on the
>> + * actual number of workarounds applied
>> + */
>> + ret = intel_ring_begin(ring, BDW_WA_DWORDS_SIZE);
>> + if (ret)
>> + return ret;
>> +
>> + bdw_emit_workarounds(ringbuf);
>>
>> intel_ring_advance(ring);
>>
>> @@ -767,9 +776,10 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
>> return 0;
>> }
>>
>> -static int chv_init_workarounds(struct intel_engine_cs *ring)
>> +static int chv_init_workarounds(struct intel_ringbuffer *ringbuf)
>> {
>> int ret;
>> + struct intel_engine_cs *ring = ringbuf->ring;
>> struct drm_device *dev = ring->dev;
>> struct drm_i915_private *dev_priv = dev->dev_private;
>>
>> @@ -786,19 +796,19 @@ static int chv_init_workarounds(struct intel_engine_cs *ring)
>> return ret;
>>
>> /* WaDisablePartialInstShootdown:chv */
>> - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
>> + intel_ring_emit_wa(ringbuf, GEN8_ROW_CHICKEN,
>> _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
>>
>> /* WaDisableThreadStallDopClockGating:chv */
>> - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
>> + intel_ring_emit_wa(ringbuf, GEN8_ROW_CHICKEN,
>> _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
>>
>> /* WaDisableDopClockGating:chv (pre-production hw) */
>> - intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
>> + intel_ring_emit_wa(ringbuf, GEN7_ROW_CHICKEN2,
>> _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
>>
>> /* WaDisableSamplerPowerBypass:chv (pre-production hw) */
>> - intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
>> + intel_ring_emit_wa(ringbuf, HALF_SLICE_CHICKEN3,
>> _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
>>
>> intel_ring_advance(ring);
>> @@ -2310,6 +2320,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>> ring->init_context = chv_init_workarounds;
>> else
>> ring->init_context = bdw_init_workarounds;
>> + ring->emit_wa = intel_ring_emit_wa;
>> ring->add_request = gen6_add_request;
>> ring->flush = gen8_render_ring_flush;
>> ring->irq_get = gen8_ring_get_irq;
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> index 07f66d4..417aa09 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> @@ -12,6 +12,11 @@
>> */
>> #define CACHELINE_BYTES 64
>>
>> +/* Number of dwords required based on the
>> + * actual number of workarounds applied
>> + */
>> +#define BDW_WA_DWORDS_SIZE 18
>> +
>> /*
>> * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
>> * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
>> @@ -148,7 +153,10 @@ struct intel_engine_cs {
>>
>> int (*init)(struct intel_engine_cs *ring);
>>
>> - int (*init_context)(struct intel_engine_cs *ring);
>> + int (*init_context)(struct intel_ringbuffer *ringbuf);
>> +
>> + void (*emit_wa)(struct intel_ringbuffer *ringbuf,
>> + u32 addr, u32 value);
>>
>> void (*write_tail)(struct intel_engine_cs *ring,
>> u32 value);
>> @@ -427,6 +435,7 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev);
>>
>> u64 intel_ring_get_active_head(struct intel_engine_cs *ring);
>> void intel_ring_setup_status_page(struct intel_engine_cs *ring);
>> +void bdw_emit_workarounds(struct intel_ringbuffer *ringbuf);
>>
>> static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
>> {
>> --
>> 2.0.3
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
>
>
More information about the Intel-gfx
mailing list