[PATCH 03/10] drm/i915: Fix and improve MCR selection logic

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Tue Jul 16 09:44:23 UTC 2019


From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

A couple issues were present in this code:

1.
fls() usage was incorrect causing off by one in subslice mask lookup,
which in other words means subslice mask of all zeroes is always used
(subslice mask of a slice which is not present, or even out of bounds
array access), rendering the checks in wa_init_mcr either futile or
random.

2.
Condition in WARN_ON was not correct. It is doing a bitwise and operation
between a positive (present subslices) and negative mask (disabled L3
banks).

This means that with corrected fls() usage the assert would always
incorrectly fail.

We could fix this by inverting the fuse bits in the check, but instead do
one better and improve the code so it not only asserts, but finds the
first common index between the two masks and only warns if no such index
can be found.

We also change fls() to ffs() since it is more logical to select the first
available instance respectively.

v2:
 * Simplify check for logic and redability.
 * Improve commentary explaining what is really happening ie. what the
   assert is really trying to check and why.

v3:
 * Use/add symbolic names for constants. (Stuart)

v4:
 * Find first common index instead of just asserting.
 * Switch to ffs() since it is more logical to use first available
   subslice or L3 bank.

v5:
 * Add some debug logging.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Fixes: fe864b76c2ab ("drm/i915: Implement WaProgramMgsrForL3BankSpecificMmioReads")
Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk> # v1
Cc: MichaƂ Winiarski <michal.winiarski at intel.com>
Cc: Stuart Summers <stuart.summers at intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c   |  4 +-
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 90 +++++++++++----------
 2 files changed, 51 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index ab0f0c01723b..46068bfdd33a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -962,12 +962,12 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
 u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv)
 {
 	const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
-	unsigned int slice = fls(sseu->slice_mask) - 1;
+	unsigned int slice = ffs(sseu->slice_mask) - 1;
 	unsigned int subslice;
 	u32 mcr_s_ss_select;
 
 	GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
-	subslice = fls(sseu->subslice_mask[slice]);
+	subslice = ffs(sseu->subslice_mask[slice]);
 	GEM_BUG_ON(!subslice);
 	subslice--;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 3b1fc7c8faa8..e133742fc4b2 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -762,7 +762,10 @@ static void
 wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
 {
 	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
-	u32 mcr_slice_subslice_mask;
+	unsigned int slice, subslice;
+	u32 l3_en, mcr, mcr_mask;
+
+	GEM_BUG_ON(INTEL_GEN(i915) < 10);
 
 	/*
 	 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
@@ -770,42 +773,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
 	 * the case, we might need to program MCR select to a valid L3Bank
 	 * by default, to make sure we correctly read certain registers
 	 * later on (in the range 0xB100 - 0xB3FF).
-	 * This might be incompatible with
-	 * WaProgramMgsrForCorrectSliceSpecificMmioReads.
-	 * Fortunately, this should not happen in production hardware, so
-	 * we only assert that this is the case (instead of implementing
-	 * something more complex that requires checking the range of every
-	 * MMIO read).
-	 */
-	if (INTEL_GEN(i915) >= 10 &&
-	    is_power_of_2(sseu->slice_mask)) {
-		/*
-		 * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
-		 * enabled subslice, no need to redirect MCR packet
-		 */
-		u32 slice = fls(sseu->slice_mask);
-		u32 fuse3 =
-			intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3);
-		u8 ss_mask = sseu->subslice_mask[slice];
-
-		u8 enabled_mask = (ss_mask | ss_mask >>
-				   GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
-		u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK;
-
-		/*
-		 * Production silicon should have matched L3Bank and
-		 * subslice enabled
-		 */
-		WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
-	}
-
-	if (INTEL_GEN(i915) >= 11)
-		mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
-					  GEN11_MCR_SUBSLICE_MASK;
-	else
-		mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
-					  GEN8_MCR_SUBSLICE_MASK;
-	/*
+	 *
 	 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
 	 * Before any MMIO read into slice/subslice specific registers, MCR
 	 * packet control register needs to be programmed to point to any
@@ -815,11 +783,51 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
 	 * are consistent across s/ss in almost all cases. In the rare
 	 * occasions, such as INSTDONE, where this value is dependent
 	 * on s/ss combo, the read should be done with read_subslice_reg.
+	 *
+	 * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both
+	 * to which subslice, or to which L3 bank, the respective mmio reads
+	 * will go, we have to find a common index which works for both
+	 * accesses.
+	 *
+	 * Case where we cannot find a common index fortunately should not
+	 * happen in production hardware, so we only emit a warning instead of
+	 * implementing something more complex that requires checking the range
+	 * of every MMIO read.
 	 */
-	wa_write_masked_or(wal,
-			   GEN8_MCR_SELECTOR,
-			   mcr_slice_subslice_mask,
-			   intel_calculate_mcr_s_ss_select(i915));
+
+	if (INTEL_GEN(i915) >= 10 && is_power_of_2(sseu->slice_mask)) {
+		u32 l3_fuse =
+			intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) &
+			GEN10_L3BANK_MASK;
+
+		DRM_DEBUG_DRIVER("L3 fuse = %x\n", l3_fuse);
+		l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse);
+	} else {
+		l3_en = ~0;
+	}
+
+	slice = ffs(sseu->slice_mask) - 1;
+	GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
+	subslice = ffs(l3_en & sseu->subslice_mask[slice]);
+	if (!subslice) {
+		DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n",
+			 sseu->subslice_mask[slice], l3_en);
+		subslice = ffs(l3_en);
+		WARN_ON(!subslice);
+	}
+	subslice--;
+
+	if (INTEL_GEN(i915) >= 11) {
+		mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
+		mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
+	} else {
+		mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
+		mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
+	}
+
+	DRM_DEBUG_DRIVER("MCR slice/subslice = %x\n", mcr);
+
+	wa_write_masked_or(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
 }
 
 static void
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list