[Intel-gfx] [PATCH v7 1/2] drm/i915/cnl: Implement WaProgramMgsrForCorrectSliceSpecificMmioReads
Oscar Mateo
oscar.mateo at intel.com
Mon Apr 16 22:09:58 UTC 2018
On 04/16/2018 02:22 PM, Yunwei Zhang wrote:
> WaProgramMgsrForCorrectSliceSpecificMmioReads dictate that before any MMIO
> read into Slice/Subslice specific registers, MCR packet control
> register(0xFDC) needs to be programmed to point to any enabled
> slice/subslice pair. Otherwise, incorrect value will be returned.
>
> However, that means each subsequent MMIO read will be forwarded to a
> specific slice/subslice combination as read is unicast. This is OK since
> slice/subslice specific register values are consistent in almost all cases
> across slice/subslice. There are rare occasions such as INSTDONE that this
> value will be dependent on slice/subslice combo, in such cases, we need to
> program 0xFDC and recover this after. This is already covered by
> read_subslice_reg.
>
> Also, 0xFDC will lose its information after TDR/engine reset/power state
> change.
>
> References: HSD#1405586840, BSID#0575
>
> v2:
> - use fls() instead of find_last_bit() (Chris)
> - added INTEL_SSEU to extract sseu from device info. (Chris)
> v3:
> - rebase on latest tip
> v5:
> - Added references (Mika)
> - Change the ordered of passing arguments and etc. (Ursulin)
> v7:
> - Rebased.
>
> Cc: Oscar Mateo <oscar.mateo at intel.com>
> Cc: Michel Thierry <michel.thierry at intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
> Signed-off-by: Yunwei Zhang <yunwei.zhang at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 2 ++
> drivers/gpu/drm/i915/intel_engine_cs.c | 30 +++++++++++++++++++++++++++---
> drivers/gpu/drm/i915/intel_workarounds.c | 12 ++++++++++++
> 3 files changed, 41 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 8e8667d..43498a47 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2725,6 +2725,8 @@ int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
> int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
> int intel_engines_init(struct drm_i915_private *dev_priv);
>
> +u32 calculate_mcr(struct drm_i915_private *dev_priv, u32 mcr);
> +
As a global function, this could use a better prefix (intel_something_)
Or, alternatively, make it local and store the calculation somewhere.
> /* intel_hotplug.c */
> void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
> u32 pin_mask, u32 long_mask);
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 1a83707..3b6bc5e 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -799,6 +799,18 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
> }
> }
>
> +u32 calculate_mcr(struct drm_i915_private *dev_priv, u32 mcr)
> +{
> + const struct sseu_dev_info *sseu = &(INTEL_INFO(dev_priv)->sseu);
> + u32 slice = fls(sseu->slice_mask);
> + u32 subslice = fls(sseu->subslice_mask[slice]);
> +
> + mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
> + mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
> +
> + return mcr;
> +}
> +
> static inline uint32_t
> read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
> int subslice, i915_reg_t reg)
> @@ -831,18 +843,30 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
> intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
>
> mcr = I915_READ_FW(GEN8_MCR_SELECTOR);
> +
> /*
> * The HW expects the slice and sublice selectors to be reset to 0
> - * after reading out the registers.
> + * before GEN10 or to a enabled s/ss post GEN10 after reading out the
> + * registers.
> */
> - WARN_ON_ONCE(mcr & mcr_slice_subslice_mask);
> + WARN_ON_ONCE(INTEL_GEN(dev_priv) < 10 &&
> + (mcr & mcr_slice_subslice_mask));
Advantage of storing the calculation: you can assert here for the
expected value, independently of the platform.
> mcr &= ~mcr_slice_subslice_mask;
> mcr |= mcr_slice_subslice_select;
> I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
>
> ret = I915_READ_FW(reg);
>
> - mcr &= ~mcr_slice_subslice_mask;
> + /*
> + * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl
> + * expects mcr to be programed to a enabled slice/subslice pair
> + * before any MMIO read into slice/subslice register
> + */
> + if (INTEL_GEN(dev_priv) < 10)
> + mcr &= ~mcr_slice_subslice_mask;
> + else
> + mcr = calculate_mcr(dev_priv, mcr);
Another advantage: no branching here either.
> +
> I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
>
> intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
> index ec9d340..8a2354e 100644
> --- a/drivers/gpu/drm/i915/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/intel_workarounds.c
> @@ -645,8 +645,20 @@ static void cfl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
> GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
> }
>
> +static void wa_init_mcr(struct drm_i915_private *dev_priv)
> +{
> + u32 mcr;
> +
> + mcr = I915_READ(GEN8_MCR_SELECTOR);
> + mcr = calculate_mcr(dev_priv, mcr);
> + I915_WRITE(GEN8_MCR_SELECTOR, mcr);
> +}
> +
> static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
> {
> + /* WaProgramMgsrForCorrectSliceSpecificMmioReads: cnl */
> + wa_init_mcr(dev_priv);
> +
> /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
> if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
> I915_WRITE(GAMT_CHKN_BIT_REG,
With one of the two above (appropriate prefix or store value), this is:
Reviewed-by: Oscar Mateo <oscar.mateo at intel.com>
And as a side note: this is also needed for Icelake.
More information about the Intel-gfx
mailing list