[PATCH 1/3] drm/xe/xe2: Extend performance tuning to media GT

Upadhyay, Tejas tejas.upadhyay at intel.com
Thu Sep 19 08:00:22 UTC 2024



> -----Original Message-----
> From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf Of Gustavo
> Sousa
> Sent: Thursday, September 19, 2024 2:17 AM
> To: intel-xe at lists.freedesktop.org
> Cc: Roper, Matthew D <matthew.d.roper at intel.com>
> Subject: [PATCH 1/3] drm/xe/xe2: Extend performance tuning to media GT
> 
> With exception of "Tuning: L3 cache - media", we are currently applying
> recommended performance tuning settings only for the primary GT. Let's also
> implement them for the media GT when applicable.
> 
> According to our spec, media GT registers CCCHKNREG1 and L3SQCREG* exist
> only in Xe2_LPM and their offsets do not match their primary GT
> counterparts. Furthermore, the range where CCCHKNREG1 belongs is not
> listed as a multicast range on the media GT. As such, we need to have
> Xe2_LPM-specific definitions for those registers and apply the setting only for
> that specific IP.
> 
> Both Xe2_HPM and Xe2_LPM contain STATELESS_COMPRESSION_CTRL and
> the offset on the media GT matches the one on the primary one. However,
> the range that contains that register is not is not listed as a multicast range, so
> we need two different entries for media.
> 
> v2:
>   - Fix implementation with respect to multicast vs non-multicast
>     registers. (Matt)
>   - Add missing XE2LPM_CCCHKNREG1 on second action of "Tuning:
>     Compression Overfetch - media".
> 
> Bspec: 72161
> Cc: Matt Roper <matthew.d.roper at intel.com>
> Signed-off-by: Gustavo Sousa <gustavo.sousa at intel.com>
> ---
>  drivers/gpu/drm/xe/regs/xe_gt_regs.h |  7 +++++++
>  drivers/gpu/drm/xe/xe_tuning.c       | 24 ++++++++++++++++++++++++
>  2 files changed, 31 insertions(+)
> 
> diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> index cf21de3adca6..6ec2d2c11d77 100644
> --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> @@ -80,6 +80,7 @@
>  #define   LE_CACHEABILITY_MASK			REG_GENMASK(1, 0)
>  #define   LE_CACHEABILITY(value)
> 	REG_FIELD_PREP(LE_CACHEABILITY_MASK, value)
> 
> +#define XELPMP_STATELESS_COMPRESSION_CTRL	XE_REG(0x4148)

Were trying to say, XE2LPM_ here? Also this seems to be MCR register.

>  #define STATELESS_COMPRESSION_CTRL
> 	XE_REG_MCR(0x4148)
>  #define   UNIFIED_COMPRESSION_FORMAT		REG_GENMASK(3, 0)
> 
> @@ -169,6 +170,8 @@
>  #define XEHP_SLICE_COMMON_ECO_CHICKEN1
> 	XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED)
>  #define   MSC_MSAA_REODER_BUF_BYPASS_DISABLE	REG_BIT(14)
> 
> +#define XE2LPM_CCCHKNREG1			XE_REG(0x82a8)
> +
>  #define VF_PREEMPTION				XE_REG(0x83a4,
> XE_REG_OPTION_MASKED)
>  #define   PREEMPTION_VERTEX_COUNT		REG_GENMASK(15, 0)
> 
> @@ -399,6 +402,10 @@
>  #define SCRATCH1LPFC				XE_REG(0xb474)
>  #define   EN_L3_RW_CCS_CACHE_FLUSH		REG_BIT(0)
> 
> +#define XE2LPM_L3SQCREG2			XE_REG_MCR(0xb604)
> +
> +#define XE2LPM_L3SQCREG3			XE_REG_MCR(0xb608)
> +

These are not marked MCR in bspec. Is there something I missed looking.

>  #define XE2LPM_L3SQCREG5			XE_REG_MCR(0xb658)
> 
>  #define XE2_TDF_CTRL				XE_REG(0xb418)
> diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
> index faa1bf42e50e..7a5b852af8d7 100644
> --- a/drivers/gpu/drm/xe/xe_tuning.c
> +++ b/drivers/gpu/drm/xe/xe_tuning.c
> @@ -42,20 +42,44 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
>  	  XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX),
>  			 SET(CCCHKNREG1, L3CMPCTRL))
>  	},
> +	{ XE_RTP_NAME("Tuning: Compression Overfetch - media"),
> +	  XE_RTP_RULES(MEDIA_VERSION(2000)),
> +	  XE_RTP_ACTIONS(CLR(XE2LPM_CCCHKNREG1, ENCOMPPERFFIX),
> +			 SET(XE2LPM_CCCHKNREG1, L3CMPCTRL))
> +	},
>  	{ XE_RTP_NAME("Tuning: Enable compressible partial write overfetch
> in L3"),
>  	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001,
> XE_RTP_END_VERSION_UNDEFINED)),
>  	  XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN))
>  	},
> +	{ XE_RTP_NAME("Tuning: Enable compressible partial write overfetch
> in L3 - media"),
> +	  XE_RTP_RULES(MEDIA_VERSION(2000)),
> +	  XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3,
> COMPPWOVERFETCHEN))
> +	},
>  	{ XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"),
>  	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001,
> XE_RTP_END_VERSION_UNDEFINED)),
>  	  XE_RTP_ACTIONS(SET(L3SQCREG2,
>  			     COMPMEMRD256BOVRFETCHEN))
>  	},
> +	{ XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"),
> +	  XE_RTP_RULES(MEDIA_VERSION(2000)),
> +	  XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2,
> +			     COMPMEMRD256BOVRFETCHEN))
> +	},
>  	{ XE_RTP_NAME("Tuning: Stateless compression control"),
>  	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001,
> XE_RTP_END_VERSION_UNDEFINED)),
>  	  XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL,
> UNIFIED_COMPRESSION_FORMAT,
> 
> REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
>  	},
> +	{ XE_RTP_NAME("Tuning: Stateless compression control - media"),
> +	  XE_RTP_RULES(MEDIA_VERSION(2000)),
> +	  XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL,
> UNIFIED_COMPRESSION_FORMAT,
> +
> REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
> +	},
> +	{ XE_RTP_NAME("Tuning: Stateless compression control - media
> (Xe2_HPM)"),
> +	  XE_RTP_RULES(MEDIA_VERSION(1301)),
> +
> XE_RTP_ACTIONS(FIELD_SET(XELPMP_STATELESS_COMPRESSION_CTRL,
> UNIFIED_COMPRESSION_FORMAT,
> +
> REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
> +	},
>  	{}
>  };
> 
> --
> 2.46.1



More information about the Intel-xe mailing list