[Intel-xe] [PATCH 4/4] drm/xe/xe2: Update MOCS fields in blitter instructions

Matt Roper matthew.d.roper at intel.com
Fri Sep 29 16:20:07 UTC 2023


On Thu, Sep 28, 2023 at 09:43:51PM -0700, Lucas De Marchi wrote:
> From: Haridhar Kalvala <haridhar.kalvala at intel.com>
> 
> Xe2 changes or adds bits for mocs in a few BLT instructions:
> XY_CTRL_SURF_COPY_BLT, XY_FAST_COLOR_BLT, XY_FAST_COPY_BLT, and MEM_SET.
> Modify the code to deal with the new location.

We should also mention that unlike Xe1, the documented "MOCS" field is
only the MOCS index and that the PXP bit is now explicitly documented
separately.

Aside from that,

Reviewed-by: Matt Roper <matthew.d.roper at intel.com>

> 
> Bspec: 57567,57566,57565,57562
> Cc: Matt Roper <matthew.d.roper at intel.com>
> Signed-off-by: Haridhar Kalvala <haridhar.kalvala at intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
> ---
>  drivers/gpu/drm/xe/regs/xe_gpu_commands.h |  4 ++
>  drivers/gpu/drm/xe/xe_migrate.c           | 47 +++++++++++++++--------
>  2 files changed, 34 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> index cc7b56763f10..21738281bdd0 100644
> --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> @@ -45,6 +45,7 @@
>  #define   CCS_SIZE_MASK			0x3FF
>  #define   CCS_SIZE_SHIFT		8
>  #define   XY_CTRL_SURF_MOCS_MASK	GENMASK(31, 26)
> +#define   XE2_XY_CTRL_SURF_MOCS_INDEX_MASK	GENMASK(31, 28)
>  #define   NUM_CCS_BYTES_PER_BLOCK	256
>  #define   NUM_BYTES_PER_CCS_BYTE	256
>  #define   NUM_CCS_BLKS_PER_XFER		1024
> @@ -53,12 +54,14 @@
>  #define   XY_FAST_COLOR_BLT_DEPTH_32	(2 << 19)
>  #define   XY_FAST_COLOR_BLT_DW		16
>  #define   XY_FAST_COLOR_BLT_MOCS_MASK	GENMASK(27, 22)
> +#define   XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK	GENMASK(27, 24)
>  #define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
>  
>  #define XY_FAST_COPY_BLT_CMD		(2 << 29 | 0x42 << 22)
>  #define   XY_FAST_COPY_BLT_DEPTH_32	(3<<24)
>  #define   XY_FAST_COPY_BLT_D1_SRC_TILE4	REG_BIT(31)
>  #define   XY_FAST_COPY_BLT_D1_DST_TILE4	REG_BIT(30)
> +#define   XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK	GENMASK(23, 20)
>  
>  #define	PVC_MEM_SET_CMD		(2 << 29 | 0x5b << 22)
>  #define   PVC_MEM_SET_CMD_LEN_DW	7
> @@ -66,6 +69,7 @@
>  #define   PVC_MEM_SET_DATA_FIELD	GENMASK(31, 24)
>  /* Bspec lists field as [6:0], but index alone is from [6:1] */
>  #define   PVC_MEM_SET_MOCS_INDEX_MASK	GENMASK(6, 1)
> +#define   XE2_MEM_SET_MOCS_INDEX_MASK	GENMASK(6, 3)
>  
>  #define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
>  
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> index b682d34bc1e5..1b4afa8c0560 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -517,23 +517,28 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
>  			  u64 src_ofs, bool src_is_indirect,
>  			  u32 size)
>  {
> +	struct xe_device *xe = gt_to_xe(gt);
>  	u32 *cs = bb->cs + bb->len;
>  	u32 num_ccs_blks;
> -	u32 mocs = gt->mocs.uc_index;
> +	u32 mocs;
>  
>  	num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size),
>  				    NUM_CCS_BYTES_PER_BLOCK);
>  	xe_gt_assert(gt, num_ccs_blks <= NUM_CCS_BLKS_PER_XFER);
> +
> +	if (GRAPHICS_VERx100(xe) >= 2000)
> +		mocs = FIELD_PREP(XE2_XY_CTRL_SURF_MOCS_INDEX_MASK, gt->mocs.uc_index);
> +	else
> +		mocs = FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, gt->mocs.uc_index);
> +
>  	*cs++ = XY_CTRL_SURF_COPY_BLT |
>  		(src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT |
>  		(dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT |
>  		((num_ccs_blks - 1) & CCS_SIZE_MASK) << CCS_SIZE_SHIFT;
>  	*cs++ = lower_32_bits(src_ofs);
> -	*cs++ = upper_32_bits(src_ofs) |
> -		FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
> +	*cs++ = upper_32_bits(src_ofs) | mocs;
>  	*cs++ = lower_32_bits(dst_ofs);
> -	*cs++ = upper_32_bits(dst_ofs) |
> -		FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
> +	*cs++ = upper_32_bits(dst_ofs) | mocs;
>  
>  	bb->len = cs - bb->cs;
>  }
> @@ -544,24 +549,26 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
>  		      unsigned int pitch)
>  {
>  	struct xe_device *xe = gt_to_xe(gt);
> +	u32 mocs = 0;
> +	u32 tile_y = 0;
>  
>  	xe_gt_assert(gt, size / pitch <= S16_MAX);
>  	xe_gt_assert(gt, pitch / 4 <= S16_MAX);
>  	xe_gt_assert(gt, pitch <= U16_MAX);
>  
> +	if (GRAPHICS_VERx100(xe) >= 2000) {
> +		mocs = FIELD_PREP(XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index);
> +		tile_y = XY_FAST_COPY_BLT_D1_SRC_TILE4 | XY_FAST_COPY_BLT_D1_DST_TILE4;
> +	}
> +
>  	bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2);
> -	if (GRAPHICS_VER(xe) >= 20)
> -		bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch |
> -				    XY_FAST_COPY_BLT_D1_SRC_TILE4 |
> -				    XY_FAST_COPY_BLT_D1_DST_TILE4;
> -	else
> -		bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch;
> +	bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch | tile_y | mocs;
>  	bb->cs[bb->len++] = 0;
>  	bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4;
>  	bb->cs[bb->len++] = lower_32_bits(dst_ofs);
>  	bb->cs[bb->len++] = upper_32_bits(dst_ofs);
>  	bb->cs[bb->len++] = 0;
> -	bb->cs[bb->len++] = pitch;
> +	bb->cs[bb->len++] = pitch | mocs;
>  	bb->cs[bb->len++] = lower_32_bits(src_ofs);
>  	bb->cs[bb->len++] = upper_32_bits(src_ofs);
>  }
> @@ -812,8 +819,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
>  static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
>  				 u32 size, u32 pitch)
>  {
> +	struct xe_device *xe = gt_to_xe(gt);
>  	u32 *cs = bb->cs + bb->len;
> -	u32 mocs = gt->mocs.uc_index;
>  	u32 len = PVC_MEM_SET_CMD_LEN_DW;
>  
>  	*cs++ = PVC_MEM_SET_CMD | PVC_MEM_SET_MATRIX | (len - 2);
> @@ -822,7 +829,10 @@ static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs
>  	*cs++ = pitch - 1;
>  	*cs++ = lower_32_bits(src_ofs);
>  	*cs++ = upper_32_bits(src_ofs);
> -	*cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, mocs);
> +	if (GRAPHICS_VERx100(xe) >= 2000)
> +		*cs++ = FIELD_PREP(XE2_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index);
> +	else
> +		*cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index);
>  
>  	xe_gt_assert(gt, cs - bb->cs == len + bb->len);
>  
> @@ -835,15 +845,18 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
>  	struct xe_device *xe = gt_to_xe(gt);
>  	u32 *cs = bb->cs + bb->len;
>  	u32 len = XY_FAST_COLOR_BLT_DW;
> -	u32 mocs = gt->mocs.uc_index;
>  
>  	if (GRAPHICS_VERx100(xe) < 1250)
>  		len = 11;
>  
>  	*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
>  		(len - 2);
> -	*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
> -		(pitch - 1);
> +	if (GRAPHICS_VERx100(xe) >= 2000)
> +		*cs++ = FIELD_PREP(XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index) |
> +			(pitch - 1);
> +	else
> +		*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, gt->mocs.uc_index) |
> +			(pitch - 1);
>  	*cs++ = 0;
>  	*cs++ = (size / pitch) << 16 | pitch / 4;
>  	*cs++ = lower_32_bits(src_ofs);
> -- 
> 2.40.1
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


More information about the Intel-xe mailing list