[Intel-gfx] [PATCH v2] drm/i915: Rework workaround init functions for BDW and CHV
Arun Siluvery
arun.siluvery at linux.intel.com
Tue Sep 2 11:14:17 CEST 2014
This rework is based on suggestion from Chris.
Now w/a are organized in an array and all of them are emitted in
single fn instead of sending them individually. This approach is
clean and new w/a can be added with minimal changes.
The same array can be used when exporting them to debugfs and the
temporary array in the current implementation is not required.
v2: As suggested by Damien a new w/a reg definition is added.
This helps in initializing w/a that are unmasked registers.
Signed-off-by: Arun Siluvery <arun.siluvery at linux.intel.com>
---
drivers/gpu/drm/i915/intel_ringbuffer.c | 158 ++++++++++++++------------------
drivers/gpu/drm/i915/intel_ringbuffer.h | 26 ++++++
2 files changed, 93 insertions(+), 91 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index c5e4dc7..3ed0ad5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -650,87 +650,80 @@ err:
return ret;
}
-static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
- u32 addr, u32 value)
+static int i915_request_emit_lri(struct intel_engine_cs *ring,
+ int num_registers,
+ const struct intel_wa_reg *lri_list)
{
- struct drm_device *dev = ring->dev;
- struct drm_i915_private *dev_priv = dev->dev_private;
+ int i;
+ int ret;
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
- if (dev_priv->num_wa_regs > I915_MAX_WA_REGS)
- return;
+ ret = intel_ring_begin(ring, (2 * num_registers + 1));
+ if (ret)
+ return ret;
- intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
- intel_ring_emit(ring, addr);
- intel_ring_emit(ring, value);
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_registers));
+ for (i = 0; i < num_registers; ++i) {
+ u32 value;
+ const struct intel_wa_reg *p = (lri_list + i);
- dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr;
- dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = (value) & 0xFFFF;
- /* value is updated with the status of remaining bits of this
- * register when it is read from debugfs file
- */
- dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value;
- dev_priv->num_wa_regs++;
+ if (p->masked_register)
+ value = (p->mask << 16) | p->value;
+ else
+ value = (I915_READ(p->addr) & ~p->mask) | p->value;
+ intel_ring_emit(ring, p->addr);
+ intel_ring_emit(ring, value);
+ }
- return;
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_advance(ring);
+
+ return 0;
}
-static int bdw8_init_workarounds(struct intel_engine_cs *ring)
-{
- int ret;
- struct drm_device *dev = ring->dev;
- struct drm_i915_private *dev_priv = dev->dev_private;
+static const struct intel_wa_reg bdw_ring_init_context[] = {
/*
* workarounds applied in this fn are part of register state context,
* they need to be re-initialized followed by gpu reset, suspend/resume,
* module reload.
*/
- dev_priv->num_wa_regs = 0;
- memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
-
- /*
- * update the number of dwords required based on the
- * actual number of workarounds applied
- */
- ret = intel_ring_begin(ring, 24);
- if (ret)
- return ret;
/* WaDisablePartialInstShootdown:bdw */
/* WaDisableThreadStallDopClockGating:bdw */
/* FIXME: Unclear whether we really need this on production bdw. */
- intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
- _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
- | STALL_DOP_GATING_DISABLE));
+ INIT_MASKED_WA(GEN8_ROW_CHICKEN,
+ _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE) |
+ _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)),
/* WaDisableDopClockGating:bdw May not be needed for production */
- intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
- _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+ INIT_MASKED_WA(GEN7_ROW_CHICKEN2,
+ _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)),
/*
* This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
* pre-production hardware
*/
- intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
- _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS
- | GEN8_SAMPLER_POWER_BYPASS_DIS));
+ INIT_MASKED_WA(HALF_SLICE_CHICKEN3,
+ _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS) |
+ _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)),
- intel_ring_emit_wa(ring, GEN7_HALF_SLICE_CHICKEN1,
- _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
+ INIT_MASKED_WA(GEN7_HALF_SLICE_CHICKEN1,
+ _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)),
- intel_ring_emit_wa(ring, COMMON_SLICE_CHICKEN2,
- _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
+ INIT_MASKED_WA(COMMON_SLICE_CHICKEN2,
+ _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)),
/* Use Force Non-Coherent whenever executing a 3D context. This is a
* workaround for for a possible hang in the unlikely event a TLB
* invalidation occurs during a PSD flush.
*/
- intel_ring_emit_wa(ring, HDC_CHICKEN0,
- _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
+ INIT_MASKED_WA(HDC_CHICKEN0,
+ _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT)),
/* Wa4x4STCOptimizationDisable:bdw */
- intel_ring_emit_wa(ring, CACHE_MODE_1,
- _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
+ INIT_MASKED_WA(CACHE_MODE_1,
+ _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)),
/*
* BSpec recommends 8x4 when MSAA is used,
@@ -740,54 +733,40 @@ static int bdw8_init_workarounds(struct intel_engine_cs *ring)
* disable bit, which we don't touch here, but it's good
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
*/
- intel_ring_emit_wa(ring, GEN7_GT_MODE,
- GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
-
- intel_ring_advance(ring);
-
- DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n",
- dev_priv->num_wa_regs);
-
- return 0;
-}
-
-static int chv_init_workarounds(struct intel_engine_cs *ring)
-{
- int ret;
- struct drm_device *dev = ring->dev;
- struct drm_i915_private *dev_priv = dev->dev_private;
-
- /*
- * workarounds applied in this fn are part of register state context,
- * they need to be re-initialized followed by gpu reset, suspend/resume,
- * module reload.
- */
- dev_priv->num_wa_regs = 0;
- memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
-
- ret = intel_ring_begin(ring, 12);
- if (ret)
- return ret;
+ INIT_MASKED_WA(GEN7_GT_MODE,
+ (GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4)),
+};
+static const struct intel_wa_reg chv_ring_init_context[] = {
/* WaDisablePartialInstShootdown:chv */
- intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
- _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
-
/* WaDisableThreadStallDopClockGating:chv */
- intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
- _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
+ INIT_MASKED_WA(GEN8_ROW_CHICKEN,
+ _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE) |
+ _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)),
/* WaDisableDopClockGating:chv (pre-production hw) */
- intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
- _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+ INIT_MASKED_WA(GEN7_ROW_CHICKEN2,
+ _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)),
/* WaDisableSamplerPowerBypass:chv (pre-production hw) */
- intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
- _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
+ INIT_MASKED_WA(HALF_SLICE_CHICKEN3,
+ _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)),
+};
- intel_ring_advance(ring);
+static int ring_init_context(struct intel_engine_cs *ring)
+{
+ int ret = -EINVAL;
- return 0;
+ if (IS_CHERRYVIEW(ring->dev))
+ ret = i915_request_emit_lri(ring,
+ ARRAY_SIZE(chv_ring_init_context),
+ chv_ring_init_context);
+ else
+ ret = i915_request_emit_lri(ring,
+ ARRAY_SIZE(bdw_ring_init_context),
+ bdw_ring_init_context);
+
+ return ret;
}
static int init_render_ring(struct intel_engine_cs *ring)
@@ -2275,10 +2254,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
dev_priv->semaphore_obj = obj;
}
}
- if (IS_CHERRYVIEW(dev))
- ring->init_context = chv_init_workarounds;
- else
- ring->init_context = bdw_init_workarounds;
+ ring->init_context = ring_init_context;
ring->add_request = gen6_add_request;
ring->flush = gen8_render_ring_flush;
ring->irq_get = gen8_ring_get_irq;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 96479c8..c9ed06c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -317,6 +317,32 @@ struct intel_engine_cs {
u32 (*get_cmd_length_mask)(u32 cmd_header);
};
+/*
+ * Workaround register definition
+ * mask: bits correponding to the w/a
+ * value: represents reference values of w/a bits
+ */
+struct intel_wa_reg {
+ u32 masked_register : 1;
+ u32 addr;
+ u32 mask;
+ u32 value;
+};
+
+#define INIT_WA_REG(_masked, _addr, _mask, _value) \
+{ \
+ .masked_register = _masked, \
+ .addr = _addr, \
+ .mask = _mask, \
+ .value = _value,\
+}
+
+#define INIT_MASKED_WA(_addr, _value) \
+ INIT_WA_REG(1, _addr, ((_value) >> 16), ((_value) & 0xFFFF))
+
+#define INIT_UNMASKED_WA(_addr, _value) \
+ INIT_WA_REG(0, _addr, ~(_value), _value)
+
bool intel_ring_initialized(struct intel_engine_cs *ring);
static inline unsigned
--
2.0.4
More information about the Intel-gfx
mailing list