[Intel-gfx] [PATCH 2/3] drm/i915: Move common engine and ring code into intel_engine_cs
Ville Syrjälä
ville.syrjala at linux.intel.com
Wed Feb 15 14:18:55 UTC 2017
On Wed, Feb 15, 2017 at 02:05:52PM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>
> This leaves the ringbuff submission code in intel_ringbuffer.c
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/intel_engine_cs.c | 834 ++++++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/intel_ringbuffer.c | 834 --------------------------------
> 2 files changed, 834 insertions(+), 834 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 538d845d7251..afaedc3adc2e 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -525,6 +525,840 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
> }
> }
>
> +static int wa_add(struct drm_i915_private *dev_priv,
> + i915_reg_t addr,
> + const u32 mask, const u32 val)
> +{
> + const u32 idx = dev_priv->workarounds.count;
> +
> + if (WARN_ON(idx >= I915_MAX_WA_REGS))
> + return -ENOSPC;
> +
> + dev_priv->workarounds.reg[idx].addr = addr;
> + dev_priv->workarounds.reg[idx].value = val;
> + dev_priv->workarounds.reg[idx].mask = mask;
> +
> + dev_priv->workarounds.count++;
> +
> + return 0;
> +}
> +
> +#define WA_REG(addr, mask, val) do { \
> + const int r = wa_add(dev_priv, (addr), (mask), (val)); \
> + if (r) \
> + return r; \
> + } while (0)
> +
> +#define WA_SET_BIT_MASKED(addr, mask) \
> + WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
> +
> +#define WA_CLR_BIT_MASKED(addr, mask) \
> + WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
> +
> +#define WA_SET_FIELD_MASKED(addr, mask, value) \
> + WA_REG(addr, mask, _MASKED_FIELD(mask, value))
> +
> +#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask))
> +#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask))
> +
> +#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
> +
> +static int wa_ring_whitelist_reg(struct intel_engine_cs *engine,
> + i915_reg_t reg)
> +{
> + struct drm_i915_private *dev_priv = engine->i915;
> + struct i915_workarounds *wa = &dev_priv->workarounds;
> + const uint32_t index = wa->hw_whitelist_count[engine->id];
> +
> + if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS))
> + return -EINVAL;
> +
> + WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index),
> + i915_mmio_reg_offset(reg));
> + wa->hw_whitelist_count[engine->id]++;
> +
> + return 0;
> +}
> +
> +static int gen8_init_workarounds(struct intel_engine_cs *engine)
> +{
> + struct drm_i915_private *dev_priv = engine->i915;
> +
> + WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
> +
> + /* WaDisableAsyncFlipPerfMode:bdw,chv */
> + WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
> +
> + /* WaDisablePartialInstShootdown:bdw,chv */
> + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
> + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
> +
> + /* Use Force Non-Coherent whenever executing a 3D context. This is a
> + * workaround for for a possible hang in the unlikely event a TLB
> + * invalidation occurs during a PSD flush.
> + */
> + /* WaForceEnableNonCoherent:bdw,chv */
> + /* WaHdcDisableFetchWhenMasked:bdw,chv */
> + WA_SET_BIT_MASKED(HDC_CHICKEN0,
> + HDC_DONOT_FETCH_MEM_WHEN_MASKED |
> + HDC_FORCE_NON_COHERENT);
> +
> + /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
> + * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
> + * polygons in the same 8x4 pixel/sample area to be processed without
> + * stalling waiting for the earlier ones to write to Hierarchical Z
> + * buffer."
> + *
> + * This optimization is off by default for BDW and CHV; turn it on.
> + */
> + WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
> +
> + /* Wa4x4STCOptimizationDisable:bdw,chv */
> + WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
> +
> + /*
> + * BSpec recommends 8x4 when MSAA is used,
> + * however in practice 16x4 seems fastest.
> + *
> + * Note that PS/WM thread counts depend on the WIZ hashing
> + * disable bit, which we don't touch here, but it's good
> + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
> + */
> + WA_SET_FIELD_MASKED(GEN7_GT_MODE,
> + GEN6_WIZ_HASHING_MASK,
> + GEN6_WIZ_HASHING_16x4);
> +
> + return 0;
> +}
> +
> +static int bdw_init_workarounds(struct intel_engine_cs *engine)
> +{
> + struct drm_i915_private *dev_priv = engine->i915;
> + int ret;
> +
> + ret = gen8_init_workarounds(engine);
> + if (ret)
> + return ret;
> +
> + /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
Hmm. We still have pre-prod workarounds for BDW?
Any volunteers to go through it all and clean things up? Or maybe
someone already did and the patches are in some kind of a limbo?
--
Ville Syrjälä
Intel OTC
More information about the Intel-gfx
mailing list