[Intel-gfx] [PATCH 3/3] drm/i915: Add support for explicit L3BANK steering

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Tue Jun 15 10:14:08 UTC 2021


On 15/06/2021 04:34, Matt Roper wrote:
> Because Render Power Gating restricts us to just a single subslice as a
> valid steering target for reads of multicast registers in a SUBSLICE
> range, the default steering we setup at init may not lead to a suitable
> target for L3BANK multicast register.  In cases where it does not, use
> explicit runtime steering whenever an L3BANK multicast register is read.
> 
> While we're at it, let's simplify the function a little bit and drop its
> support for gen10/CNL since no such platforms ever materialized for real
> use.  Multicast register steering is already an area that causes enough
> confusion; no need to complicate it with what's effectively dead code.
> 
> Signed-off-by: Matt Roper <matthew.d.roper at intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_gt.c          | 18 +++++
>   drivers/gpu/drm/i915/gt/intel_gt_types.h    |  4 +
>   drivers/gpu/drm/i915/gt/intel_workarounds.c | 84 ++++++---------------
>   3 files changed, 46 insertions(+), 60 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
> index f2bea1c20d56..2c9cc34b0cbd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -83,6 +83,11 @@ void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt)
>   	gt->ggtt = ggtt;
>   }
>   
> +static const struct intel_mmio_range icl_l3bank_steering_table[] = {
> +       { 0x00B100, 0x00B3FF },
> +       { 0xFFFFFF, 0xFFFFFF }, /* terminating entry */
> +};
> +
>   int intel_gt_init_mmio(struct intel_gt *gt)
>   {
>   	intel_gt_init_clock_frequency(gt);
> @@ -90,6 +95,13 @@ int intel_gt_init_mmio(struct intel_gt *gt)
>   	intel_uc_init_mmio(&gt->uc);
>   	intel_sseu_info_init(gt);
>   
> +	if (GRAPHICS_VER(gt->i915) >= 11) {
> +		gt->steering_table[L3BANK] = icl_l3bank_steering_table;
> +		gt->info.l3bank_mask =
> +			intel_uncore_read(&gt->i915->uncore, GEN10_MIRROR_FUSE3) &

gt->uncore, unless there's a special reason not to.

Regards,

Tvrtko

> +			GEN10_L3BANK_MASK;
> +	}
> +
>   	return intel_engines_init_mmio(gt);
>   }
>   
> @@ -744,6 +756,12 @@ static void intel_gt_get_valid_steering(struct intel_gt *gt,
>   					u8 *sliceid, u8 *subsliceid)
>   {
>   	switch (type) {
> +	case L3BANK:
> +		GEM_DEBUG_WARN_ON(!gt->info.l3bank_mask); /* should be impossible! */
> +
> +		*sliceid = __ffs(gt->info.l3bank_mask);
> +		*subsliceid = 0;        /* unused */
> +		break;
>   	default:
>   		MISSING_CASE(type);
>   		*sliceid = 0;
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> index 47957837c8c0..5ecad25de6ed 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> @@ -48,6 +48,8 @@ struct intel_mmio_range {
>    * need to explicitly re-steer reads of registers of the other type.
>    */
>   enum intel_steering_type {
> +	L3BANK,
> +
>   	NUM_STEERING_TYPES
>   };
>   
> @@ -174,6 +176,8 @@ struct intel_gt {
>   		/* Media engine access to SFC per instance */
>   		u8 vdbox_sfc_access;
>   
> +		u32 l3bank_mask;
> +
>   		/* Slice/subslice/EU info */
>   		struct sseu_dev_info sseu;
>   	} info;
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 689045d3752b..a0be3c09a7f9 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -944,71 +944,37 @@ cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
>   }
>   
>   static void
> -wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
> +icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
>   {
>   	const struct sseu_dev_info *sseu = &i915->gt.info.sseu;
>   	unsigned int slice, subslice;
> -	u32 l3_en, mcr, mcr_mask;
> +	u32 mcr, mcr_mask;
>   
> -	GEM_BUG_ON(GRAPHICS_VER(i915) < 10);
> +	GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
> +	GEM_BUG_ON(hweight8(sseu->slice_mask) > 1);
> +	slice = 0;
>   
>   	/*
> -	 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
> -	 * L3Banks could be fused off in single slice scenario. If that is
> -	 * the case, we might need to program MCR select to a valid L3Bank
> -	 * by default, to make sure we correctly read certain registers
> -	 * later on (in the range 0xB100 - 0xB3FF).
> -	 *
> -	 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
> -	 * Before any MMIO read into slice/subslice specific registers, MCR
> -	 * packet control register needs to be programmed to point to any
> -	 * enabled s/ss pair. Otherwise, incorrect values will be returned.
> -	 * This means each subsequent MMIO read will be forwarded to an
> -	 * specific s/ss combination, but this is OK since these registers
> -	 * are consistent across s/ss in almost all cases. In the rare
> -	 * occasions, such as INSTDONE, where this value is dependent
> -	 * on s/ss combo, the read should be done with read_subslice_reg.
> -	 *
> -	 * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both
> -	 * to which subslice, or to which L3 bank, the respective mmio reads
> -	 * will go, we have to find a common index which works for both
> -	 * accesses.
> -	 *
> -	 * Case where we cannot find a common index fortunately should not
> -	 * happen in production hardware, so we only emit a warning instead of
> -	 * implementing something more complex that requires checking the range
> -	 * of every MMIO read.
> +	 * Although a platform may have subslices, we need to always steer
> +	 * reads to the lowest instance that isn't fused off.  When Render
> +	 * Power Gating is enabled, grabbing forcewake will only power up a
> +	 * single subslice (the "minconfig") if there isn't a real workload
> +	 * that needs to be run; this means that if we steer register reads to
> +	 * one of the higher subslices, we run the risk of reading back 0's or
> +	 * random garbage.
>   	 */
> +	subslice = __ffs(intel_sseu_get_subslices(sseu, slice));
>   
> -	if (GRAPHICS_VER(i915) >= 10 && is_power_of_2(sseu->slice_mask)) {
> -		u32 l3_fuse =
> -			intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) &
> -			GEN10_L3BANK_MASK;
> -
> -		drm_dbg(&i915->drm, "L3 fuse = %x\n", l3_fuse);
> -		l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse);
> -	} else {
> -		l3_en = ~0;
> -	}
> +	/*
> +	 * If the subslice we picked above also steers us to a valid L3 bank,
> +	 * then we can just rely on the default steering and won't need to
> +	 * worry about explicitly re-steering L3BANK reads later.
> +	 */
> +	if (i915->gt.info.l3bank_mask & BIT(subslice))
> +		i915->gt.steering_table[L3BANK] = NULL;
>   
> -	slice = fls(sseu->slice_mask) - 1;
> -	subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
> -	if (!subslice) {
> -		drm_warn(&i915->drm,
> -			 "No common index found between subslice mask %x and L3 bank mask %x!\n",
> -			 intel_sseu_get_subslices(sseu, slice), l3_en);
> -		subslice = fls(l3_en);
> -		drm_WARN_ON(&i915->drm, !subslice);
> -	}
> -	subslice--;
> -
> -	if (GRAPHICS_VER(i915) >= 11) {
> -		mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
> -		mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
> -	} else {
> -		mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
> -		mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
> -	}
> +	mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
> +	mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
>   
>   	drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr);
>   
> @@ -1018,8 +984,6 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
>   static void
>   cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
>   {
> -	wa_init_mcr(i915, wal);
> -
>   	/* WaInPlaceDecompressionHang:cnl */
>   	wa_write_or(wal,
>   		    GEN9_GAMT_ECO_REG_RW_IA,
> @@ -1029,7 +993,7 @@ cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
>   static void
>   icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
>   {
> -	wa_init_mcr(i915, wal);
> +	icl_wa_init_mcr(i915, wal);
>   
>   	/* WaInPlaceDecompressionHang:icl */
>   	wa_write_or(wal,
> @@ -1111,7 +1075,7 @@ static void
>   gen12_gt_workarounds_init(struct drm_i915_private *i915,
>   			  struct i915_wa_list *wal)
>   {
> -	wa_init_mcr(i915, wal);
> +	icl_wa_init_mcr(i915, wal);
>   
>   	/* Wa_14011060649:tgl,rkl,dg1,adls */
>   	wa_14011060649(i915, wal);
> 


More information about the Intel-gfx mailing list