[Intel-gfx] [PATCH v2 1/2] drm/i915: Initialize bdw workarounds in logical ring mode too

Siluvery, Arun arun.siluvery at linux.intel.com
Wed Nov 5 10:54:18 CET 2014


On 04/11/2014 19:23, Rodrigo Vivi wrote:
> These patches got listed to -collector but got a huge conflict. If it
> is still relevant please rebase it.
>
This patch is currently not relevant, rebased version is already sent to 
the list for review.

https://patchwork.kernel.org/patch/5178771/

regards
Arun

> Also my bikeshed is to findo better names to help on differentiate
> them at least.
>
> On Wed, Sep 24, 2014 at 5:02 AM, Michel Thierry
> <michel.thierry at intel.com> wrote:
>> Following the legacy ring submission example, update the
>> ring->init_context() hook to support the execlist submission mode.
>>
>> Workarounds are defined in bdw_emit_workarounds(), but the emit
>> now depends on the ring submission mode.
>>
>> v2: Updated after "Cleanup pre prod workarounds"
>>
>> For: VIZ-4092
>> Signed-off-by: Michel Thierry <michel.thierry at intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_gem_context.c |  2 +-
>>   drivers/gpu/drm/i915/intel_lrc.c        | 66 +++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/intel_ringbuffer.c | 75 +++++++++++++++++++--------------
>>   drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++++-
>>   4 files changed, 120 insertions(+), 34 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>> index 7b73b36..d1ed21a 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>> @@ -657,7 +657,7 @@ done:
>>
>>          if (uninitialized) {
>>                  if (ring->init_context) {
>> -                       ret = ring->init_context(ring);
>> +                       ret = ring->init_context(ring->buffer);
>>                          if (ret)
>>                                  DRM_ERROR("ring init context: %d\n", ret);
>>                  }
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index d64d518..a0aa3f0 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -1020,6 +1020,62 @@ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
>>          return 0;
>>   }
>>
>> +static inline void intel_logical_ring_emit_wa(struct intel_ringbuffer *ringbuf,
>> +                                      u32 addr, u32 value)
>> +{
>> +       struct intel_engine_cs *ring = ringbuf->ring;
>> +       struct drm_device *dev = ring->dev;
>> +       struct drm_i915_private *dev_priv = dev->dev_private;
>> +
>> +       if (WARN_ON(dev_priv->num_wa_regs >= I915_MAX_WA_REGS))
>> +               return;
>> +
>> +       intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
>> +       intel_logical_ring_emit(ringbuf, addr);
>> +       intel_logical_ring_emit(ringbuf, value);
>> +
>> +       dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr;
>> +       dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF;
>> +       /* value is updated with the status of remaining bits of this
>> +        * register when it is read from debugfs file
>> +        */
>> +       dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value;
>> +       dev_priv->num_wa_regs++;
>> +}
>> +
>> +static int bdw_init_logical_workarounds(struct intel_ringbuffer *ringbuf)
>> +{
>> +       int ret;
>> +       struct intel_engine_cs *ring = ringbuf->ring;
>> +       struct drm_device *dev = ring->dev;
>> +       struct drm_i915_private *dev_priv = dev->dev_private;
>> +
>> +       /*
>> +        * workarounds applied in this fn are part of register state context,
>> +        * they need to be re-initialized followed by gpu reset, suspend/resume,
>> +        * module reload.
>> +        */
>> +       dev_priv->num_wa_regs = 0;
>> +       memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
>> +
>> +       /*
>> +        * update the number of dwords required based on the
>> +        * actual number of workarounds applied
>> +        */
>> +       ret = intel_logical_ring_begin(ringbuf, BDW_WA_DWORDS_SIZE);
>> +       if (ret)
>> +               return ret;
>> +
>> +       bdw_emit_workarounds(ringbuf);
>> +
>> +       intel_logical_ring_advance(ringbuf);
>> +
>> +       DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n",
>> +                        dev_priv->num_wa_regs);
>> +
>> +       return 0;
>> +}
>> +
>>   static int gen8_init_common_ring(struct intel_engine_cs *ring)
>>   {
>>          struct drm_device *dev = ring->dev;
>> @@ -1315,6 +1371,10 @@ static int logical_render_ring_init(struct drm_device *dev)
>>          if (HAS_L3_DPF(dev))
>>                  ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
>>
>> +       if (IS_BROADWELL(dev))
>> +               ring->init_context = bdw_init_logical_workarounds;
>> +       ring->emit_wa = intel_logical_ring_emit_wa;
>> +
>>          ring->init = gen8_init_render_ring;
>>          ring->cleanup = intel_fini_pipe_control;
>>          ring->get_seqno = gen8_get_seqno;
>> @@ -1802,6 +1862,12 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
>>          }
>>
>>          if (ring->id == RCS && !ctx->rcs_initialized) {
>> +               if (ring->init_context) {
>> +                       ret = ring->init_context(ringbuf);
>> +                       if (ret)
>> +                               DRM_ERROR("ring init context: %d\n", ret);
>> +               }
>> +
>>                  ret = intel_lr_context_render_state_init(ring, ctx);
>>                  if (ret) {
>>                          DRM_ERROR("Init render state failed: %d\n", ret);
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> index 395f926..e6ac913 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> @@ -677,9 +677,10 @@ err:
>>          return ret;
>>   }
>>
>> -static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
>> +static inline void intel_ring_emit_wa(struct intel_ringbuffer *ringbuf,
>>                                         u32 addr, u32 value)
>>   {
>> +       struct intel_engine_cs *ring = ringbuf->ring;
>>          struct drm_device *dev = ring->dev;
>>          struct drm_i915_private *dev_priv = dev->dev_private;
>>
>> @@ -701,51 +702,33 @@ static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
>>          return;
>>   }
>>
>> -static int bdw_init_workarounds(struct intel_engine_cs *ring)
>> +void bdw_emit_workarounds(struct intel_ringbuffer *ringbuf)
>>   {
>> -       int ret;
>> -       struct drm_device *dev = ring->dev;
>> -       struct drm_i915_private *dev_priv = dev->dev_private;
>> -
>> -       /*
>> -        * workarounds applied in this fn are part of register state context,
>> -        * they need to be re-initialized followed by gpu reset, suspend/resume,
>> -        * module reload.
>> -        */
>> -       dev_priv->num_wa_regs = 0;
>> -       memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
>> -
>> -       /*
>> -        * update the number of dwords required based on the
>> -        * actual number of workarounds applied
>> -        */
>> -       ret = intel_ring_begin(ring, 18);
>> -       if (ret)
>> -               return ret;
>> +       struct intel_engine_cs *ring = ringbuf->ring;
>>
>>          /* WaDisablePartialInstShootdown:bdw */
>>          /* WaDisableThreadStallDopClockGating:bdw */
>>          /* FIXME: Unclear whether we really need this on production bdw. */
>> -       intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
>> +       ring->emit_wa(ringbuf, GEN8_ROW_CHICKEN,
>>                             _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
>>                                               | STALL_DOP_GATING_DISABLE));
>>
>>          /* WaDisableDopClockGating:bdw May not be needed for production */
>> -       intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
>> +       ring->emit_wa(ringbuf, GEN7_ROW_CHICKEN2,
>>                             _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
>>
>> -       intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
>> +       ring->emit_wa(ringbuf, HALF_SLICE_CHICKEN3,
>>                             _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
>>
>>          /* Use Force Non-Coherent whenever executing a 3D context. This is a
>>           * workaround for for a possible hang in the unlikely event a TLB
>>           * invalidation occurs during a PSD flush.
>>           */
>> -       intel_ring_emit_wa(ring, HDC_CHICKEN0,
>> +       ring->emit_wa(ringbuf, HDC_CHICKEN0,
>>                             _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
>>
>>          /* Wa4x4STCOptimizationDisable:bdw */
>> -       intel_ring_emit_wa(ring, CACHE_MODE_1,
>> +       ring->emit_wa(ringbuf, CACHE_MODE_1,
>>                             _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
>>
>>          /*
>> @@ -756,8 +739,34 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
>>           * disable bit, which we don't touch here, but it's good
>>           * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
>>           */
>> -       intel_ring_emit_wa(ring, GEN7_GT_MODE,
>> +       ring->emit_wa(ringbuf, GEN7_GT_MODE,
>>                             GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
>> +}
>> +
>> +static int bdw_init_workarounds(struct intel_ringbuffer *ringbuf)
>> +{
>> +       int ret;
>> +       struct intel_engine_cs *ring = ringbuf->ring;
>> +       struct drm_device *dev = ring->dev;
>> +       struct drm_i915_private *dev_priv = dev->dev_private;
>> +
>> +       /*
>> +        * workarounds applied in this fn are part of register state context,
>> +        * they need to be re-initialized followed by gpu reset, suspend/resume,
>> +        * module reload.
>> +        */
>> +       dev_priv->num_wa_regs = 0;
>> +       memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
>> +
>> +       /*
>> +        * update the number of dwords required based on the
>> +        * actual number of workarounds applied
>> +        */
>> +       ret = intel_ring_begin(ring, BDW_WA_DWORDS_SIZE);
>> +       if (ret)
>> +               return ret;
>> +
>> +       bdw_emit_workarounds(ringbuf);
>>
>>          intel_ring_advance(ring);
>>
>> @@ -767,9 +776,10 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
>>          return 0;
>>   }
>>
>> -static int chv_init_workarounds(struct intel_engine_cs *ring)
>> +static int chv_init_workarounds(struct intel_ringbuffer *ringbuf)
>>   {
>>          int ret;
>> +       struct intel_engine_cs *ring = ringbuf->ring;
>>          struct drm_device *dev = ring->dev;
>>          struct drm_i915_private *dev_priv = dev->dev_private;
>>
>> @@ -786,19 +796,19 @@ static int chv_init_workarounds(struct intel_engine_cs *ring)
>>                  return ret;
>>
>>          /* WaDisablePartialInstShootdown:chv */
>> -       intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
>> +       intel_ring_emit_wa(ringbuf, GEN8_ROW_CHICKEN,
>>                             _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
>>
>>          /* WaDisableThreadStallDopClockGating:chv */
>> -       intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
>> +       intel_ring_emit_wa(ringbuf, GEN8_ROW_CHICKEN,
>>                             _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
>>
>>          /* WaDisableDopClockGating:chv (pre-production hw) */
>> -       intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
>> +       intel_ring_emit_wa(ringbuf, GEN7_ROW_CHICKEN2,
>>                             _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
>>
>>          /* WaDisableSamplerPowerBypass:chv (pre-production hw) */
>> -       intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
>> +       intel_ring_emit_wa(ringbuf, HALF_SLICE_CHICKEN3,
>>                             _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
>>
>>          intel_ring_advance(ring);
>> @@ -2310,6 +2320,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>>                          ring->init_context = chv_init_workarounds;
>>                  else
>>                          ring->init_context = bdw_init_workarounds;
>> +               ring->emit_wa = intel_ring_emit_wa;
>>                  ring->add_request = gen6_add_request;
>>                  ring->flush = gen8_render_ring_flush;
>>                  ring->irq_get = gen8_ring_get_irq;
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> index 07f66d4..417aa09 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> @@ -12,6 +12,11 @@
>>    */
>>   #define CACHELINE_BYTES 64
>>
>> +/* Number of dwords required based on the
>> + * actual number of workarounds applied
>> + */
>> +#define BDW_WA_DWORDS_SIZE 18
>> +
>>   /*
>>    * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
>>    * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
>> @@ -148,7 +153,10 @@ struct  intel_engine_cs {
>>
>>          int             (*init)(struct intel_engine_cs *ring);
>>
>> -       int             (*init_context)(struct intel_engine_cs *ring);
>> +       int             (*init_context)(struct intel_ringbuffer *ringbuf);
>> +
>> +       void    (*emit_wa)(struct intel_ringbuffer *ringbuf,
>> +                      u32 addr, u32 value);
>>
>>          void            (*write_tail)(struct intel_engine_cs *ring,
>>                                        u32 value);
>> @@ -427,6 +435,7 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev);
>>
>>   u64 intel_ring_get_active_head(struct intel_engine_cs *ring);
>>   void intel_ring_setup_status_page(struct intel_engine_cs *ring);
>> +void bdw_emit_workarounds(struct intel_ringbuffer *ringbuf);
>>
>>   static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
>>   {
>> --
>> 2.0.3
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
>
>




More information about the Intel-gfx mailing list