[Intel-gfx] [PATCH 01/22] drm/i915/icl: Introduce initial Icelake Workarounds
Oscar Mateo
oscar.mateo at intel.com
Mon Apr 30 17:12:03 UTC 2018
On 04/26/2018 08:01 AM, Mika Kuoppala wrote:
> Oscar Mateo <oscar.mateo at intel.com> writes:
>
>> Inherit workarounds from previous platforms that are still valid for
>> Icelake.
>>
>> v2: GEN7_ROW_CHICKEN2 is masked
>> v3:
>> - Since it has been fixed already in upstream, removed the TODO
>> comment about WA_SET_BIT for WaInPlaceDecompressionHang.
>> - Squashed with this patch:
>> drm/i915/icl: add icelake_init_clock_gating()
>> from Paulo Zanoni <paulo.r.zanoni at intel.com>
>> - Squashed with this patch:
>> drm/i915/icl: WaForceEnableNonCoherent
>> from Oscar Mateo <oscar.mateo at intel.com>
>> - WaPushConstantDereferenceHoldDisable is now Wa_1604370585 and
>> applies to B0 as well.
>> - WaPipeControlBefore3DStateSamplePattern WABB was being applied
>> to ICL incorrectly.
>> v4:
>> - Wrap the commit message
>> - s/dev_priv/p to please checkpatch
>> v5: Rebased on top of the WA refactoring
>> v6: Rebased on top of further whitelist registers refactoring (Michel)
>> v7: Added WaRsForcewakeAddDelayForAck
>>
>> Cc: Tomasz Lis <tomasz.lis at intel.com>
>> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
>> Signed-off-by: Oscar Mateo <oscar.mateo at intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_drv.h | 9 +++++++
>> drivers/gpu/drm/i915/i915_gem_gtt.c | 4 +--
>> drivers/gpu/drm/i915/i915_reg.h | 1 +
>> drivers/gpu/drm/i915/intel_lrc.c | 2 ++
>> drivers/gpu/drm/i915/intel_pm.c | 4 ++-
>> drivers/gpu/drm/i915/intel_uncore.c | 7 +++--
>> drivers/gpu/drm/i915/intel_workarounds.c | 46 ++++++++++++++++++++++++++++++++
>> 7 files changed, 68 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 0286911..1dc157f 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2462,6 +2462,15 @@ static inline unsigned int i915_sg_segment_size(void)
>> #define IS_CNL_REVID(p, since, until) \
>> (IS_CANNONLAKE(p) && IS_REVID(p, since, until))
>>
>> +#define ICL_REVID_A0 0x0
>> +#define ICL_REVID_A2 0x1
> Just noted that for some reason bspec puts A0 and A2 under
> same revid. Bspec err?
That's what I hope. I have opened a bug against the BSpec to be 100% sure.
>> +#define ICL_REVID_B0 0x3
>> +#define ICL_REVID_B2 0x4
>> +#define ICL_REVID_C0 0x5
>> +
>> +#define IS_ICL_REVID(p, since, until) \
>> + (IS_ICELAKE(p) && IS_REVID(p, since, until))
>> +
>> /*
>> * The genX designation typically refers to the render engine, so render
>> * capability related checks should use IS_GEN, while display and other checks
>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> index 21d72f6..221b873 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> @@ -2140,12 +2140,12 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
>> * called on driver load and after a GPU reset, so you can place
>> * workarounds here even if they get overwritten by GPU reset.
>> */
>> - /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl */
>> + /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
>> if (IS_BROADWELL(dev_priv))
>> I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
>> else if (IS_CHERRYVIEW(dev_priv))
>> I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
>> - else if (IS_GEN9_BC(dev_priv) || IS_GEN10(dev_priv))
>> + else if (IS_GEN9_BC(dev_priv) || IS_GEN10(dev_priv) || IS_GEN11(dev_priv))
>> I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
>> else if (IS_GEN9_LP(dev_priv))
>> I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index fb10602..f2ee225 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -7203,6 +7203,7 @@ enum {
>> /* GEN8 chicken */
>> #define HDC_CHICKEN0 _MMIO(0x7300)
>> #define CNL_HDC_CHICKEN0 _MMIO(0xE5F0)
>> +#define ICL_HDC_CHICKEN0 _MMIO(0xE5F4)
>> #define HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE (1<<15)
>> #define HDC_FENCE_DEST_SLM_DISABLE (1<<14)
>> #define HDC_DONOT_FETCH_MEM_WHEN_MASKED (1<<11)
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index 029901a..2d6572a 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -1636,6 +1636,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
>> return -EINVAL;
>>
>> switch (INTEL_GEN(engine->i915)) {
>> + case 11:
>> + return 0;
>> case 10:
>> wa_bb_fn[0] = gen10_init_indirectctx_bb;
>> wa_bb_fn[1] = NULL;
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
>> index 4baab85..3b7d804 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -9123,7 +9123,9 @@ static void nop_init_clock_gating(struct drm_i915_private *dev_priv)
>> */
>> void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
>> {
>> - if (IS_CANNONLAKE(dev_priv))
>> + if (IS_ICELAKE(dev_priv))
>> + dev_priv->display.init_clock_gating = nop_init_clock_gating;
>> + else if (IS_CANNONLAKE(dev_priv))
>> dev_priv->display.init_clock_gating = cnl_init_clock_gating;
>> else if (IS_COFFEELAKE(dev_priv))
>> dev_priv->display.init_clock_gating = cfl_init_clock_gating;
>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
>> index d6e20f0..448293e 100644
>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>> @@ -139,7 +139,9 @@ enum ack_type {
>> * in the hope that the original ack will be delivered along with
>> * the fallback ack.
>> *
>> - * This workaround is described in HSDES #1604254524
>> + * This workaround is described in HSDES #1604254524 and it's known as:
>> + * WaRsForcewakeAddDelayForAck:skl,bxt,kbl,glk,cfl,cnl,icl
>> + * although the name is a bit misleading.
> Just for the record:
>
> When I implemented this there was recommendation to do both, delaying
> for ack and then this method of using a reserver bit. My interpretation
> was that the delay was used as a first weapon to combat the issue. And
> then later, reserve bit method appeared.
>
> I did not use WaRsForcewakeAddDelayForAck as I thought that this will be
> named differently. And also I think this method is a superset,
> making delaying irrelevant. As we fallback to reserve is we miss ack
> so no need to delay before polling. And adding delay to hotpath should
> be the last resort anyways.
>
> I think this is the evolution of WaRsForcewakeAddDelayForAck
> (v2) and there is no better name, we should keep it.
Yes, I read the mailing lists comments about this. AFAICT, there is no
better name.
>> */
>>
>> pass = 1;
>> @@ -1394,7 +1396,8 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
>> if (INTEL_GEN(dev_priv) >= 11) {
>> int i;
>>
>> - dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
>> + dev_priv->uncore.funcs.force_wake_get =
>> + fw_domains_get_with_fallback;
>> dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
>> fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
>> FORCEWAKE_RENDER_GEN9,
>> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
>> index ec9d340..3f00623 100644
>> --- a/drivers/gpu/drm/i915/intel_workarounds.c
>> +++ b/drivers/gpu/drm/i915/intel_workarounds.c
>> @@ -441,6 +441,27 @@ static int cnl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
>> return 0;
>> }
>>
>> +static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
>> +{
>> + /* Wa_1604370585:icl (pre-prod)
>> + * Formerly known as WaPushConstantDereferenceHoldDisable
>> + */
>> + if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0))
>> + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
>> + PUSH_CONSTANT_DEREF_DISABLE);
> Inherited from CNL and had to check if we have that on cnl. We do.
>
>> +
>> + /* WaForceEnableNonCoherent:icl
>> + * This is not the same workaround as in early Gen9 platforms, where
>> + * lacking this could cause system hangs, but coherency performance
>> + * overhead is high and only a few compute workloads really need it
>> + * (the register is whitelisted in hardware now, so UMDs can opt in
>> + * for coherency if they have a good reason).
>> + */
>> + WA_SET_BIT_MASKED(ICL_HDC_CHICKEN0, HDC_FORCE_NON_COHERENT);
> Right, but the register name should be ICL_HDC_MODE.
ACK
>> +
>> + return 0;
>> +}
>> +
>> int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv)
>> {
>> int err = 0;
>> @@ -465,6 +486,8 @@ int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv)
>> err = cfl_ctx_workarounds_init(dev_priv);
>> else if (IS_CANNONLAKE(dev_priv))
>> err = cnl_ctx_workarounds_init(dev_priv);
>> + else if (IS_ICELAKE(dev_priv))
>> + err = icl_ctx_workarounds_init(dev_priv);
>> else
>> MISSING_CASE(INTEL_GEN(dev_priv));
>> if (err)
>> @@ -663,6 +686,21 @@ static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
>> _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
>> }
>>
>> +static void icl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
>> +{
>> + /* This is not an Wa. Enable for better image quality */
>> + I915_WRITE(_3D_CHICKEN3,
>> + _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
>> +
>> + /* WaInPlaceDecompressionHang:icl */
>> + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
>> + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
>> +
>> + /* WaPipelineFlushCoherentLines:icl */
>> + I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
>> + GEN8_LQSC_FLUSH_COHERENT_LINES));
> Didn't find a HSDES entry for this. The workaround name and the reg/bit
> matches tho.
>
> But the real question in here is that do we need to set this through
> indirect bb like we do with gen[8,9].
>
> And just to note that cnl is missing this too. But that can be done
> as a followup when we first figure out that should we use the indirect
> bb for all >= gen8.
>
> -Mika
Hmmmm... looking at it more carefully: isn't that a slightly different
WA: WaPipelineFlushCoherentLines (not needed starting CNL)?
This is equivalent to a different WA: WaOCLCoherentLineFlush.
But indeed, it seems to be required for CNL as well. In fact, I'm pretty
sure I added it to this patch because I saw it in CNL. Did it get lost?
>> +}
>> +
>> void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv)
>> {
>> if (INTEL_GEN(dev_priv) < 8)
>> @@ -683,6 +721,8 @@ void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv)
>> cfl_gt_workarounds_apply(dev_priv);
>> else if (IS_CANNONLAKE(dev_priv))
>> cnl_gt_workarounds_apply(dev_priv);
>> + else if (IS_ICELAKE(dev_priv))
>> + icl_gt_workarounds_apply(dev_priv);
>> else
>> MISSING_CASE(INTEL_GEN(dev_priv));
>> }
>> @@ -761,6 +801,10 @@ static void cnl_whitelist_build(struct whitelist *w)
>> whitelist_reg(w, GEN8_CS_CHICKEN1);
>> }
>>
>> +static void icl_whitelist_build(struct whitelist *w)
>> +{
>> +}
>> +
>> static struct whitelist *whitelist_build(struct intel_engine_cs *engine,
>> struct whitelist *w)
>> {
>> @@ -789,6 +833,8 @@ static struct whitelist *whitelist_build(struct intel_engine_cs *engine,
>> cfl_whitelist_build(w);
>> else if (IS_CANNONLAKE(i915))
>> cnl_whitelist_build(w);
>> + else if (IS_ICELAKE(i915))
>> + icl_whitelist_build(w);
>> else
>> MISSING_CASE(INTEL_GEN(i915));
>>
>> --
>> 1.9.1
More information about the Intel-gfx
mailing list