[Intel-xe] [PATCH 1/3] drm/xe/xe2: Add initial workarounds
Matt Roper
matthew.d.roper at intel.com
Mon Oct 23 16:29:22 UTC 2023
On Mon, Oct 23, 2023 at 08:21:36AM -0700, Lucas De Marchi wrote:
> From: Dnyaneshwar Bhadane <dnyaneshwar.bhadane at intel.com>
>
> Add the initial collection of gt/engine/lrc workarounds.
>
> Signed-off-by: Dnyaneshwar Bhadane <dnyaneshwar.bhadane at intel.com>
> Signed-off-by: Shekhar Chauhan <shekhar.chauhan at intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
> ---
> drivers/gpu/drm/xe/regs/xe_gt_regs.h | 24 ++++++++++
> drivers/gpu/drm/xe/xe_wa.c | 68 ++++++++++++++++++++++++++++
> 2 files changed, 92 insertions(+)
>
> diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> index cd1821d96a5d..8e01ae49ef21 100644
> --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> @@ -94,7 +94,14 @@
> #define CHICKEN_RASTER_2 XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED)
> #define TBIMR_FAST_CLIP REG_BIT(5)
>
> +#define FF_MODE XE_REG(0x6210)
This register should be marked as MCR.
> +#define DIS_TE_AUTOSTRIP REG_BIT(31)
> +#define DIS_MESH_PARTIAL_AUTOSTRIP REG_BIT(16)
> +#define DIS_MESH_AUTOSTRIP REG_BIT(15)
> +
> #define VFLSKPD XE_REG_MCR(0x62a8, XE_REG_OPTION_MASKED)
> +#define DIS_PARTIAL_AUTOSTRIP REG_BIT(9)
> +#define DIS_AUTOSTRIP REG_BIT(6)
> #define DIS_OVER_FETCH_CACHE REG_BIT(1)
> #define DIS_MULT_MISS_RD_SQUASH REG_BIT(0)
>
> @@ -111,6 +118,9 @@
> #define XEHP_PSS_MODE2 XE_REG_MCR(0x703c, XE_REG_OPTION_MASKED)
> #define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5)
>
> +#define XEHP_PSS_CHICKEN XE_REG_MCR(0x7044, XE_REG_OPTION_MASKED)
> +#define FD_END_COLLECT REG_BIT(5)
> +
> #define HIZ_CHICKEN XE_REG(0x7018, XE_REG_OPTION_MASKED)
> #define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14)
> #define HZ_DEPTH_TEST_LE_GE_OPT_DISABLE REG_BIT(13)
> @@ -133,6 +143,9 @@
> #define VF_PREEMPTION XE_REG(0x83a4, XE_REG_OPTION_MASKED)
> #define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0)
>
> +#define VF_SCRATCHPAD XE_REG(0x83a8, XE_REG_OPTION_MASKED)
> +#define XE2_VFG_TED_CREDIT_INTERFACE_DISABLE REG_BIT(13)
> +
> #define VFG_PREEMPTION_CHICKEN XE_REG(0x83b4, XE_REG_OPTION_MASKED)
> #define POLYGON_TRIFAN_LINELOOP_DISABLE REG_BIT(4)
>
> @@ -225,6 +238,7 @@
> #define MSCUNIT_CLKGATE_DIS REG_BIT(10)
> #define RCCUNIT_CLKGATE_DIS REG_BIT(7)
> #define SARBUNIT_CLKGATE_DIS REG_BIT(5)
> +#define SBEUNIT_CLKGATE_DIS REG_BIT(4)
>
> #define UNSLICE_UNIT_LEVEL_CLKGATE2 XE_REG(0x94e4)
> #define VSUNIT_CLKGATE2_DIS REG_BIT(19)
> @@ -276,6 +290,8 @@
> #define XEHP_L3SCQREG7 XE_REG_MCR(0xb188)
> #define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3)
>
> +#define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8)
> +
> #define XEHP_MERT_MOD_CTRL XE_REG_MCR(0xcf28)
> #define RENDER_MOD_CTRL XE_REG_MCR(0xcf2c)
> #define COMP_MOD_CTRL XE_REG_MCR(0xcf30)
> @@ -299,6 +315,9 @@
> #define XE_OAG_BLT_BUSY_FREE XE_REG(0xdbbc)
> #define XE_OAG_RENDER_BUSY_FREE XE_REG(0xdbdc)
>
> +#define HALF_SLICE_CHICKEN5 XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED)
> +#define DISABLE_SAMPLE_G_PERFORMANCE REG_BIT(0)
> +
> #define SAMPLER_MODE XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED)
> #define ENABLE_SMALLPL REG_BIT(15)
> #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9)
> @@ -328,6 +347,7 @@
> #define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED)
> #define UGM_BACKUP_MODE REG_BIT(13)
> #define MDQ_ARBITRATION_MODE REG_BIT(12)
> +#define EARLY_EOT_DIS REG_BIT(1)
>
> #define ROW_CHICKEN2 XE_REG_MCR(0xe4f4, XE_REG_OPTION_MASKED)
> #define DISABLE_READ_SUPPRESSION REG_BIT(15)
> @@ -345,11 +365,15 @@
>
> #define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8)
> #define DISABLE_D8_D16_COASLESCE REG_BIT(30)
> +#define TGM_WRITE_EOM_FORCE REG_BIT(17)
> #define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15)
> +#define SEQUENTIAL_ACCESS_UPGRADE_DISABLE REG_BIT(13)
>
> #define LSC_CHICKEN_BIT_0_UDW XE_REG_MCR(0xe7c8 + 4)
> #define UGM_FRAGMENT_THRESHOLD_TO_3 REG_BIT(58 - 32)
> #define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32)
> +#define XE2_ALLOC_DPA_STARVE_FIX_DIS REG_BIT(47 - 32)
> +#define ENABLE_SMP_LD_RENDER_SURFACE_CONTROL REG_BIT(44 - 32)
> #define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32)
> #define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32)
> #define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32)
> diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
> index 1450af6cab34..b22c9f90b080 100644
> --- a/drivers/gpu/drm/xe/xe_wa.c
> +++ b/drivers/gpu/drm/xe/xe_wa.c
> @@ -245,6 +245,19 @@ static const struct xe_rtp_entry_sr gt_was[] = {
> XE_RTP_ACTIONS(SET(SQCNT1, ENFORCE_RAR))
> },
>
> + /* Xe2_LPG */
> + { XE_RTP_NAME("16020975621"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)),
> + XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, SBEUNIT_CLKGATE_DIS))
> + },
> + { XE_RTP_NAME("14018157293"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)),
> + XE_RTP_ACTIONS(SET(XEHPC_L3CLOS_MASK(0), ~0),
> + SET(XEHPC_L3CLOS_MASK(1), ~0),
> + SET(XEHPC_L3CLOS_MASK(2), ~0),
> + SET(XEHPC_L3CLOS_MASK(3), ~0))
> + },
> +
> {}
> };
>
> @@ -266,6 +279,11 @@ static const struct xe_rtp_entry_sr engine_was[] = {
> IS_INTEGRATED),
> XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
> },
> + { XE_RTP_NAME("18032247524"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004),
> + FUNC(xe_rtp_match_first_render_or_compute)),
> + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE))
> + },
Any specific reason this one isn't down in the "Xe2_LPG" section below
with the others?
> { XE_RTP_NAME("1606931601"),
> XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
> XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_EARLY_READ))
> @@ -539,6 +557,40 @@ static const struct xe_rtp_entry_sr engine_was[] = {
> XE_RTP_NOCHECK))
> },
>
> + /* Xe2_LPG */
> +
> + { XE_RTP_NAME("16018712365"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
> + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS))
> + },
> + { XE_RTP_NAME("14018957109"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
> + FUNC(xe_rtp_match_first_render_or_compute)),
> + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN5, DISABLE_SAMPLE_G_PERFORMANCE))
> + },
> + { XE_RTP_NAME("14019877138"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
> + XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
According to bspec 65182, this register is part of the RCS engine's LRC
image, so this probably needs to move to lrc_was[] and become
RCS-specific.
> + },
> + { XE_RTP_NAME("16021540221"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
> + FUNC(xe_rtp_match_first_render_or_compute)),
> + XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
> + },
> + { XE_RTP_NAME("14019322943"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
> + FUNC(xe_rtp_match_first_render_or_compute)),
> + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, TGM_WRITE_EOM_FORCE))
> + },
> + { XE_RTP_NAME("14018471104"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
> + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL))
> + },
> + { XE_RTP_NAME("16018737384"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
> + XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS))
> + },
> +
> {}
> };
>
> @@ -630,6 +682,22 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
> XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE))
> },
>
> + /* Xe2_LPG */
> + { XE_RTP_NAME("16020518922"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)),
We also need ENGINE_CLASS(RENDER) on this since this is specifically on
the RCS LRC.
Actually all of our existing lrc_was[] look like they're missing this
too, but we can update those in a separate patch.
Matt
> + XE_RTP_ACTIONS(SET(FF_MODE,
> + DIS_TE_AUTOSTRIP |
> + DIS_MESH_PARTIAL_AUTOSTRIP |
> + DIS_MESH_AUTOSTRIP),
> + SET(VFLSKPD,
> + DIS_PARTIAL_AUTOSTRIP |
> + DIS_AUTOSTRIP))
> + },
> + { XE_RTP_NAME("14019386621"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
> + XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE))
> + },
> +
> {}
> };
>
> --
> 2.40.1
>
>
--
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation
More information about the Intel-xe
mailing list