[Intel-gfx] [PATCH 3/4] drm/i915: Build workaround list in ring initialization
Siluvery, Arun
arun.siluvery at linux.intel.com
Mon Oct 20 18:13:45 CEST 2014
On 07/10/2014 15:21, Mika Kuoppala wrote:
> If we build the workaround list in ring initialization
> and decouple it from the actual writing of values, we
> gain the ability to decide where and how we want to apply
> the values.
>
> The advantage of this will become more clear when
> we need to initialize workarounds on older gens where
> it is not possible to write all the registers through ring
> LRIs.
>
> v2: rebase on newest bdw workarounds
>
> Cc: Arun Siluvery <arun.siluvery at linux.intel.com>
> Cc: Damien Lespiau <damien.lespiau at intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 20 ++--
> drivers/gpu/drm/i915/i915_drv.h | 28 ++---
> drivers/gpu/drm/i915/intel_ringbuffer.c | 185 ++++++++++++++++++--------------
> 3 files changed, 130 insertions(+), 103 deletions(-)
Hi Daniel,
Patches 3, 4 in this series are independent of the first two.
Could you please pull-in these patches?
regards
Arun
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index da4036d..87482f8 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2655,18 +2655,20 @@ static int i915_wa_registers(struct seq_file *m, void *unused)
>
> intel_runtime_pm_get(dev_priv);
>
> - seq_printf(m, "Workarounds applied: %d\n", dev_priv->num_wa_regs);
> - for (i = 0; i < dev_priv->num_wa_regs; ++i) {
> + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
> +
> + seq_printf(m, "Workarounds applied: %d\n", dev_priv->workarounds.count);
> + for (i = 0; i < dev_priv->workarounds.count; ++i) {
> u32 addr, mask;
>
> - addr = dev_priv->intel_wa_regs[i].addr;
> - mask = dev_priv->intel_wa_regs[i].mask;
> - dev_priv->intel_wa_regs[i].value = I915_READ(addr) | mask;
> - if (dev_priv->intel_wa_regs[i].addr)
> + addr = dev_priv->workarounds.reg[i].addr;
> + mask = dev_priv->workarounds.reg[i].mask;
> + dev_priv->workarounds.reg[i].value = I915_READ(addr) | mask;
> + if (dev_priv->workarounds.reg[i].addr)
> seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
> - dev_priv->intel_wa_regs[i].addr,
> - dev_priv->intel_wa_regs[i].value,
> - dev_priv->intel_wa_regs[i].mask);
> + dev_priv->workarounds.reg[i].addr,
> + dev_priv->workarounds.reg[i].value,
> + dev_priv->workarounds.reg[i].mask);
> }
>
> intel_runtime_pm_put(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 1e476b5..f7265bf 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1448,6 +1448,20 @@ struct i915_frontbuffer_tracking {
> unsigned flip_bits;
> };
>
> +struct i915_wa_reg {
> + u32 addr;
> + u32 value;
> + /* bitmask representing WA bits */
> + u32 mask;
> +};
> +
> +#define I915_MAX_WA_REGS 16
> +
> +struct i915_workarounds {
> + struct i915_wa_reg reg[I915_MAX_WA_REGS];
> + u32 count;
> +};
> +
> struct drm_i915_private {
> struct drm_device *dev;
> struct kmem_cache *slab;
> @@ -1590,19 +1604,7 @@ struct drm_i915_private {
> struct intel_shared_dpll shared_dplls[I915_NUM_PLLS];
> int dpio_phy_iosf_port[I915_NUM_PHYS_VLV];
>
> - /*
> - * workarounds are currently applied at different places and
> - * changes are being done to consolidate them so exact count is
> - * not clear at this point, use a max value for now.
> - */
> -#define I915_MAX_WA_REGS 16
> - struct {
> - u32 addr;
> - u32 value;
> - /* bitmask representing WA bits */
> - u32 mask;
> - } intel_wa_regs[I915_MAX_WA_REGS];
> - u32 num_wa_regs;
> + struct i915_workarounds workarounds;
>
> /* Reclocking support */
> bool render_reclock_avail;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 816a692..12a546f 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -665,80 +665,107 @@ err:
> return ret;
> }
>
> -static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
> - u32 addr, u32 value)
> +static int intel_ring_workarounds_emit(struct intel_engine_cs *ring)
> {
> + int ret, i;
> struct drm_device *dev = ring->dev;
> struct drm_i915_private *dev_priv = dev->dev_private;
> + struct i915_workarounds *w = &dev_priv->workarounds;
>
> - if (WARN_ON(dev_priv->num_wa_regs >= I915_MAX_WA_REGS))
> - return;
> + if (WARN_ON(w->count == 0))
> + return 0;
>
> - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> - intel_ring_emit(ring, addr);
> - intel_ring_emit(ring, value);
> + ring->gpu_caches_dirty = true;
> + ret = intel_ring_flush_all_caches(ring);
> + if (ret)
> + return ret;
>
> - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr;
> - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF;
> - /* value is updated with the status of remaining bits of this
> - * register when it is read from debugfs file
> - */
> - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value;
> - dev_priv->num_wa_regs++;
> + ret = intel_ring_begin(ring, w->count * 3);
> + if (ret)
> + return ret;
> +
> + for (i = 0; i < w->count; i++) {
> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> + intel_ring_emit(ring, w->reg[i].addr);
> + intel_ring_emit(ring, w->reg[i].value);
> + }
> +
> + intel_ring_advance(ring);
> +
> + ring->gpu_caches_dirty = true;
> + ret = intel_ring_flush_all_caches(ring);
> + if (ret)
> + return ret;
> +
> + DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count);
>
> - return;
> + return 0;
> +}
> +
> +static int wa_add(struct drm_i915_private *dev_priv,
> + const u32 addr, const u32 val, const u32 mask)
> +{
> + const u32 idx = dev_priv->workarounds.count;
> +
> + if (WARN_ON(idx >= I915_MAX_WA_REGS))
> + return -ENOSPC;
> +
> + dev_priv->workarounds.reg[idx].addr = addr;
> + dev_priv->workarounds.reg[idx].value = val;
> + dev_priv->workarounds.reg[idx].mask = mask;
> +
> + dev_priv->workarounds.count++;
> +
> + return 0;
> }
>
> +#define WA_REG(addr, val, mask) { \
> + const int r = wa_add(dev_priv, (addr), (val), (mask)); \
> + if (r) \
> + return r; \
> + }
> +
> +#define WA_SET_BIT_MASKED(addr, mask) WA_REG(addr, \
> + _MASKED_BIT_ENABLE(mask), (mask) & 0xffff)
> +
> +#define WA_CLR_BIT_MASKED(addr, mask) WA_REG(addr, \
> + _MASKED_BIT_DISABLE(mask), (mask) & 0xffff)
> +
> +#define WA_SET_BIT(addr, mask) WA_REG(addr, I915_READ(addr) | (mask), mask)
> +#define WA_CLR_BIT(addr, mask) WA_REG(addr, I915_READ(addr) & ~(mask), mask)
> +
> +#define WA_WRITE(addr, val) WA_REG(addr, val, 0xffffffff)
> +
> static int bdw_init_workarounds(struct intel_engine_cs *ring)
> {
> - int ret;
> struct drm_device *dev = ring->dev;
> struct drm_i915_private *dev_priv = dev->dev_private;
>
> - /*
> - * workarounds applied in this fn are part of register state context,
> - * they need to be re-initialized followed by gpu reset, suspend/resume,
> - * module reload.
> - */
> - dev_priv->num_wa_regs = 0;
> - memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
> -
> - /*
> - * update the number of dwords required based on the
> - * actual number of workarounds applied
> - */
> - ret = intel_ring_begin(ring, 18);
> - if (ret)
> - return ret;
> -
> /* WaDisablePartialInstShootdown:bdw */
> /* WaDisableThreadStallDopClockGating:bdw */
> - /* FIXME: Unclear whether we really need this on production bdw. */
> - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
> - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
> - | STALL_DOP_GATING_DISABLE));
> + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
> + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE |
> + STALL_DOP_GATING_DISABLE);
>
> /* WaDisableDopClockGating:bdw May not be needed for production */
> - intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
> - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
> + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
> + DOP_CLOCK_GATING_DISABLE);
>
> - intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
> - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
> + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
> + GEN8_SAMPLER_POWER_BYPASS_DIS);
>
> /* Use Force Non-Coherent whenever executing a 3D context. This is a
> * workaround for for a possible hang in the unlikely event a TLB
> * invalidation occurs during a PSD flush.
> */
> /* WaDisableFenceDestinationToSLM:bdw (GT3 pre-production) */
> - intel_ring_emit_wa(ring, HDC_CHICKEN0,
> - _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT |
> - (IS_BDW_GT3(dev) ?
> - HDC_FENCE_DEST_SLM_DISABLE : 0)
> - ));
> + WA_SET_BIT_MASKED(HDC_CHICKEN0,
> + HDC_FORCE_NON_COHERENT |
> + (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
>
> /* Wa4x4STCOptimizationDisable:bdw */
> - intel_ring_emit_wa(ring, CACHE_MODE_1,
> - _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
> + WA_SET_BIT_MASKED(CACHE_MODE_1,
> + GEN8_4x4_STC_OPTIMIZATION_DISABLE);
>
> /*
> * BSpec recommends 8x4 when MSAA is used,
> @@ -748,52 +775,50 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
> * disable bit, which we don't touch here, but it's good
> * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
> */
> - intel_ring_emit_wa(ring, GEN7_GT_MODE,
> - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
> -
> - intel_ring_advance(ring);
> -
> - DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n",
> - dev_priv->num_wa_regs);
> + WA_SET_BIT_MASKED(GEN7_GT_MODE,
> + GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
>
> return 0;
> }
>
> static int chv_init_workarounds(struct intel_engine_cs *ring)
> {
> - int ret;
> struct drm_device *dev = ring->dev;
> struct drm_i915_private *dev_priv = dev->dev_private;
>
> - /*
> - * workarounds applied in this fn are part of register state context,
> - * they need to be re-initialized followed by gpu reset, suspend/resume,
> - * module reload.
> - */
> - dev_priv->num_wa_regs = 0;
> - memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
> -
> - ret = intel_ring_begin(ring, 12);
> - if (ret)
> - return ret;
> -
> /* WaDisablePartialInstShootdown:chv */
> - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
> - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
> + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
> + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
>
> /* WaDisableThreadStallDopClockGating:chv */
> - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
> - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
> + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
> + STALL_DOP_GATING_DISABLE);
>
> /* WaDisableDopClockGating:chv (pre-production hw) */
> - intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
> - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
> + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
> + DOP_CLOCK_GATING_DISABLE);
>
> /* WaDisableSamplerPowerBypass:chv (pre-production hw) */
> - intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
> - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
> + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
> + GEN8_SAMPLER_POWER_BYPASS_DIS);
>
> - intel_ring_advance(ring);
> + return 0;
> +}
> +
> +static int init_workarounds_ring(struct intel_engine_cs *ring)
> +{
> + struct drm_device *dev = ring->dev;
> + struct drm_i915_private *dev_priv = dev->dev_private;
> +
> + WARN_ON(ring->id != RCS);
> +
> + dev_priv->workarounds.count = 0;
> +
> + if (IS_BROADWELL(dev))
> + return bdw_init_workarounds(ring);
> +
> + if (IS_CHERRYVIEW(dev))
> + return chv_init_workarounds(ring);
>
> return 0;
> }
> @@ -853,7 +878,7 @@ static int init_render_ring(struct intel_engine_cs *ring)
> if (HAS_L3_DPF(dev))
> I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
>
> - return ret;
> + return init_workarounds_ring(ring);
> }
>
> static void render_ring_cleanup(struct intel_engine_cs *ring)
> @@ -2299,10 +2324,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
> dev_priv->semaphore_obj = obj;
> }
> }
> - if (IS_CHERRYVIEW(dev))
> - ring->init_context = chv_init_workarounds;
> - else
> - ring->init_context = bdw_init_workarounds;
> +
> + ring->init_context = intel_ring_workarounds_emit;
> ring->add_request = gen6_add_request;
> ring->flush = gen8_render_ring_flush;
> ring->irq_get = gen8_ring_get_irq;
>
More information about the Intel-gfx
mailing list