[Intel-gfx] [PATCH 02/21] drm/i915/gtt: Workaround for HW preload not flushing pdps

Mika Kuoppala mika.kuoppala at linux.intel.com
Fri May 22 10:04:55 PDT 2015


With BDW/SKL and 32bit addressing mode only, the hardware preloads
pdps. However the TLB invalidation only has effect on levels below
the pdps. This means that if pdps change, hw might access with
stale pdp entry.

To combat this problem, preallocate the top pdps so that hw sees
them as immutable for each context.

Cc: Ville Syrjälä <ville.syrjala at linux.intel.com>
Cc: Rafael Barbalho <rafael.barbalho at intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 50 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_reg.h     | 17 +++++++++++++
 drivers/gpu/drm/i915/intel_lrc.c    | 15 +----------
 3 files changed, 68 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0ffd459..1a5ad4c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -941,6 +941,48 @@ err_out:
 	return ret;
 }
 
+/* With some architectures and 32bit legacy mode, hardware pre-loads the
+ * top level pdps but the tlb invalidation only invalidates the lower levels.
+ * This might lead to hw fetching with stale pdp entries if top level
+ * structure changes, ie va space grows with dynamic page tables.
+ */
+static bool hw_wont_flush_pdp_tlbs(struct i915_hw_ppgtt *ppgtt)
+{
+	struct drm_device *dev = ppgtt->base.dev;
+
+	if (GEN8_CTX_ADDRESSING_MODE != LEGACY_32B_CONTEXT)
+		return false;
+
+	if (IS_BROADWELL(dev) || IS_SKYLAKE(dev))
+		return true;
+
+	return false;
+}
+
+static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
+{
+	unsigned long *new_page_dirs, **new_page_tables;
+	int ret;
+
+	/* We allocate temp bitmap for page tables for no gain
+	 * but as this is for init only, lets keep the things simple
+	 */
+	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables);
+	if (ret)
+		return ret;
+
+	/* Allocate for all pdps regardless of how the ppgtt
+	 * was defined.
+	 */
+	ret = gen8_ppgtt_alloc_page_directories(ppgtt, &ppgtt->pdp,
+						0, 1ULL << 32,
+						new_page_dirs);
+
+	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+
+	return ret;
+}
+
 /*
  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
  * with a net effect resembling a 2-level page table in normal x86 terms. Each
@@ -972,6 +1014,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 
 	ppgtt->switch_mm = gen8_mm_switch;
 
+	if (hw_wont_flush_pdp_tlbs(ppgtt)) {
+		/* Avoid the tlb flush bug by preallocating
+		 * whole top level pdp structure so it stays
+		 * static even if our va space grows.
+		 */
+		return gen8_preallocate_top_level_pdps(ppgtt);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 6eeba63..334324b 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2777,6 +2777,23 @@ enum skl_disp_power_wells {
 #define VLV_CLK_CTL2			0x101104
 #define   CLK_CTL2_CZCOUNT_30NS_SHIFT	28
 
+/* Context descriptor format bits */
+#define GEN8_CTX_VALID			(1<<0)
+#define GEN8_CTX_FORCE_PD_RESTORE	(1<<1)
+#define GEN8_CTX_FORCE_RESTORE		(1<<2)
+#define GEN8_CTX_L3LLC_COHERENT		(1<<5)
+#define GEN8_CTX_PRIVILEGE		(1<<8)
+
+enum {
+	ADVANCED_CONTEXT = 0,
+	LEGACY_32B_CONTEXT,
+	ADVANCED_AD_CONTEXT,
+	LEGACY_64B_CONTEXT
+};
+
+#define GEN8_CTX_ADDRESSING_MODE_SHIFT	3
+#define GEN8_CTX_ADDRESSING_MODE	LEGACY_32B_CONTEXT
+
 /*
  * Overlay regs
  */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 96ae90a..d793d4e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -183,12 +183,6 @@
 #define CTX_R_PWR_CLK_STATE		0x42
 #define CTX_GPGPU_CSR_BASE_ADDRESS	0x44
 
-#define GEN8_CTX_VALID (1<<0)
-#define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
-#define GEN8_CTX_FORCE_RESTORE (1<<2)
-#define GEN8_CTX_L3LLC_COHERENT (1<<5)
-#define GEN8_CTX_PRIVILEGE (1<<8)
-
 #define ASSIGN_CTX_PDP(ppgtt, reg_state, n) { \
 	const u64 _addr = test_bit(n, ppgtt->pdp.used_pdpes) ? \
 		ppgtt->pdp.page_directory[n]->daddr : \
@@ -198,13 +192,6 @@
 }
 
 enum {
-	ADVANCED_CONTEXT = 0,
-	LEGACY_CONTEXT,
-	ADVANCED_AD_CONTEXT,
-	LEGACY_64B_CONTEXT
-};
-#define GEN8_CTX_MODE_SHIFT 3
-enum {
 	FAULT_AND_HANG = 0,
 	FAULT_AND_HALT, /* Debug only */
 	FAULT_AND_STREAM,
@@ -273,7 +260,7 @@ static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring,
 	WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
 
 	desc = GEN8_CTX_VALID;
-	desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT;
+	desc |= GEN8_CTX_ADDRESSING_MODE << GEN8_CTX_ADDRESSING_MODE_SHIFT;
 	if (IS_GEN8(ctx_obj->base.dev))
 		desc |= GEN8_CTX_L3LLC_COHERENT;
 	desc |= GEN8_CTX_PRIVILEGE;
-- 
1.9.1



More information about the Intel-gfx mailing list