[Intel-gfx] [PATCH] drm/i915/bdw: Initialize workarounds in render ring init fn.

arun.siluvery at linux.intel.com arun.siluvery at linux.intel.com
Thu Aug 14 18:51:00 CEST 2014


From: Arun Siluvery <arun.siluvery at linux.intel.com>

The workarounds at the moment are initialized in init_clock_gating() but
they are lost during reset, suspend/resume; this patch moves workarounds
that are part of register state context to render ring init fn otherwise
default context ends up with incorrect values as they don't get initialized
until init_clock_gating fn. This should be ok as they are not related to
display clock gating in the first place.

v2: Add macro to generate w/a table (Daniel)

Signed-off-by: Arun Siluvery <arun.siluvery at linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     | 38 +++++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h         | 32 ++++++++++++++++
 drivers/gpu/drm/i915/intel_pm.c         | 50 -------------------------
 drivers/gpu/drm/i915/intel_ringbuffer.c | 66 +++++++++++++++++++++++++++++++++
 4 files changed, 136 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 9e737b7..fc28835 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2406,6 +2406,43 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused)
 	return 0;
 }
 
+static int intel_wa_registers(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int i;
+	int ret;
+
+	if (!dev_priv->num_wa) {
+		DRM_DEBUG_DRIVER("Workaround table not available\n");
+		return -EINVAL;
+	}
+
+	if (dev_priv->num_wa > I915_MAX_WA_REGS)
+		dev_priv->num_wa = I915_MAX_WA_REGS;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	intel_runtime_pm_get(dev_priv);
+
+	seq_printf(m, "Workarounds applied: %d\n", dev_priv->num_wa);
+	for (i = 0; i < dev_priv->num_wa; ++i) {
+		if (dev_priv->wa_regs[i].addr)
+			seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
+				   dev_priv->wa_regs[i].addr,
+				   I915_READ(dev_priv->wa_regs[i].addr),
+				   dev_priv->wa_regs[i].mask);
+	}
+
+	intel_runtime_pm_put(dev_priv);
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
 struct pipe_crc_info {
 	const char *name;
 	struct drm_device *dev;
@@ -3936,6 +3973,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_semaphore_status", i915_semaphore_status, 0},
 	{"i915_shared_dplls_info", i915_shared_dplls_info, 0},
 	{"i915_dp_mst_info", i915_dp_mst_info, 0},
+	{"intel_wa_registers", intel_wa_registers, 0},
 };
 #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 18c9ad8..c4190a9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1222,6 +1222,24 @@ struct i915_gpu_error {
 	unsigned int test_irq_rings;
 };
 
+/*
+ * workarounds are currently applied at different places and changes
+ * are being done to consolidate them at a single place hence the
+ * exact count is not clear at this point; replace this with accurate
+ * number based on gen later.
+ */
+#define I915_MAX_WA_REGS      16
+
+/*
+ * structure to verify workarounds status
+ * mask: represent w/a bits
+ */
+struct intel_wa {
+	u32 addr;
+	u32 val;
+	u32 mask;
+};
+
 enum modeset_restore {
 	MODESET_ON_LID_OPEN,
 	MODESET_DONE,
@@ -1422,6 +1440,9 @@ struct drm_i915_private {
 	struct drm_i915_gem_object *semaphore_obj;
 	uint32_t last_seqno, next_seqno;
 
+	uint32_t num_wa;
+	struct intel_wa wa_regs[I915_MAX_WA_REGS];
+
 	drm_dma_handle_t *status_page_dmah;
 	struct resource mch_res;
 
@@ -2803,6 +2824,17 @@ int vlv_freq_opcode(struct drm_i915_private *dev_priv, int val);
 #define POSTING_READ(reg)	(void)I915_READ_NOTRACE(reg)
 #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)
 
+#define I915_WRITE_WA(reg, val) ({					\
+		if (dev_priv->num_wa >= 0				\
+		    && dev_priv->num_wa < I915_MAX_WA_REGS) {		\
+			dev_priv->wa_regs[dev_priv->num_wa].addr = reg;	\
+			dev_priv->wa_regs[dev_priv->num_wa].mask	\
+				= (val) & 0xFFFF;			\
+			dev_priv->num_wa++;				\
+			I915_WRITE(reg, val);				\
+		}							\
+	})
+
 /* "Broadcast RGB" property */
 #define INTEL_BROADCAST_RGB_AUTO 0
 #define INTEL_BROADCAST_RGB_FULL 1
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 1ddd4df..ab64b64 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5402,38 +5402,11 @@ static void gen8_init_clock_gating(struct drm_device *dev)
 	/* FIXME(BDW): Check all the w/a, some might only apply to
 	 * pre-production hw. */
 
-	/* WaDisablePartialInstShootdown:bdw */
-	I915_WRITE(GEN8_ROW_CHICKEN,
-		   _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
-
-	/* WaDisableThreadStallDopClockGating:bdw */
-	/* FIXME: Unclear whether we really need this on production bdw. */
-	I915_WRITE(GEN8_ROW_CHICKEN,
-		   _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
-
-	/*
-	 * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
-	 * pre-production hardware
-	 */
-	I915_WRITE(HALF_SLICE_CHICKEN3,
-		   _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS));
-	I915_WRITE(HALF_SLICE_CHICKEN3,
-		   _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
 	I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE));
 
 	I915_WRITE(_3D_CHICKEN3,
 		   _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2)));
 
-	I915_WRITE(COMMON_SLICE_CHICKEN2,
-		   _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
-
-	I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
-		   _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
-
-	/* WaDisableDopClockGating:bdw May not be needed for production */
-	I915_WRITE(GEN7_ROW_CHICKEN2,
-		   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
-
 	/* WaSwitchSolVfFArbitrationPriority:bdw */
 	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
 
@@ -5448,41 +5421,18 @@ static void gen8_init_clock_gating(struct drm_device *dev)
 			   BDW_DPRS_MASK_VBLANK_SRD);
 	}
 
-	/* Use Force Non-Coherent whenever executing a 3D context. This is a
-	 * workaround for for a possible hang in the unlikely event a TLB
-	 * invalidation occurs during a PSD flush.
-	 */
-	I915_WRITE(HDC_CHICKEN0,
-		   I915_READ(HDC_CHICKEN0) |
-		   _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
-
 	/* WaVSRefCountFullforceMissDisable:bdw */
 	/* WaDSRefCountFullforceMissDisable:bdw */
 	I915_WRITE(GEN7_FF_THREAD_MODE,
 		   I915_READ(GEN7_FF_THREAD_MODE) &
 		   ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
 
-	/*
-	 * BSpec recommends 8x4 when MSAA is used,
-	 * however in practice 16x4 seems fastest.
-	 *
-	 * Note that PS/WM thread counts depend on the WIZ hashing
-	 * disable bit, which we don't touch here, but it's good
-	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
-	 */
-	I915_WRITE(GEN7_GT_MODE,
-		   GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
-
 	I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
 		   _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
 
 	/* WaDisableSDEUnitClockGating:bdw */
 	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
 		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
-
-	/* Wa4x4STCOptimizationDisable:bdw */
-	I915_WRITE(CACHE_MODE_1,
-		   _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
 }
 
 static void haswell_init_clock_gating(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b3d8f76..abec00c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -598,6 +598,69 @@ err:
 	return ret;
 }
 
+static void bdw_init_workarounds(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/*
+	 * workarounds applied in this fn are part of register state context,
+	 * they need to be re-initialized followed by gpu reset, suspend/resume,
+	 * module reload.
+	 */
+	dev_priv->num_wa = 0;
+	memset(dev_priv->wa_regs, 0, sizeof(dev_priv->wa_regs));
+
+	/* WaDisablePartialInstShootdown:bdw */
+	/* WaDisableThreadStallDopClockGating:bdw */
+	/* FIXME: Unclear whether we really need this on production bdw. */
+
+	I915_WRITE_WA(GEN8_ROW_CHICKEN,
+		      _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
+					 | STALL_DOP_GATING_DISABLE));
+
+	/* WaDisableDopClockGating:bdw May not be needed for production */
+	I915_WRITE_WA(GEN7_ROW_CHICKEN2,
+		      _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+
+	/*
+	 * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
+	 * pre-production hardware
+	 */
+	I915_WRITE_WA(HALF_SLICE_CHICKEN3,
+		      _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS
+					 | GEN8_SAMPLER_POWER_BYPASS_DIS));
+
+	I915_WRITE_WA(GEN7_HALF_SLICE_CHICKEN1,
+		      _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
+
+	/* Wa4x4STCOptimizationDisable:bdw */
+	I915_WRITE_WA(CACHE_MODE_1,
+		      _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
+
+	/*
+	 * BSpec recommends 8x4 when MSAA is used,
+	 * however in practice 16x4 seems fastest.
+	 *
+	 * Note that PS/WM thread counts depend on the WIZ hashing
+	 * disable bit, which we don't touch here, but it's good
+	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+	 */
+	I915_WRITE_WA(GEN7_GT_MODE,
+		      GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
+
+	I915_WRITE_WA(COMMON_SLICE_CHICKEN2,
+		      _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
+
+	/* Use Force Non-Coherent whenever executing a 3D context. This is a
+	 * workaround for for a possible hang in the unlikely event a TLB
+	 * invalidation occurs during a PSD flush.
+	 */
+	I915_WRITE_WA(HDC_CHICKEN0,
+		      _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
+
+	DRM_DEBUG_DRIVER("No. of workarounds applied: %d\n", dev_priv->num_wa);
+}
+
 static int init_render_ring(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
@@ -653,6 +716,9 @@ static int init_render_ring(struct intel_engine_cs *ring)
 	if (HAS_L3_DPF(dev))
 		I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
 
+	if (IS_BROADWELL(dev))
+		bdw_init_workarounds(dev);
+
 	return ret;
 }
 
-- 
2.0.4




More information about the Intel-gfx mailing list