[Intel-xe] [PATCH v2] drm/xe: Skip XY_FAST_COLOR instruction on link copy engines

Balasubramani Vivekanandan balasubramani.vivekanandan at intel.com
Wed Mar 8 07:23:42 UTC 2023


On 07.03.2023 17:17, Maarten Lankhorst wrote:
> 
> On 2023-03-07 09:09, Balasubramani Vivekanandan wrote:
> > Link copy engines doesn't support the XY_FAST_COLOR instruction.
> > Currently this instruction is used only at one place to clear a ttm
> > resource while migrating a BO.
> > A new device_info member is created to know if a platform has link copy
> > engine. If it supports, then instead of using XY_FAST_COLOR instruction,
> > MEM_SET is used which is available both in main and link copy engines.
> > 
> > BSpec: 68433
> > 
> > Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan at intel.com>
> > ---
> >   drivers/gpu/drm/xe/regs/xe_gpu_commands.h |  9 ++++
> >   drivers/gpu/drm/xe/xe_device_types.h      |  2 +
> >   drivers/gpu/drm/xe/xe_migrate.c           | 65 ++++++++++++++++-------
> >   drivers/gpu/drm/xe/xe_pci.c               |  4 ++
> >   4 files changed, 60 insertions(+), 20 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> > index 288576035ce3..df9ed4fbf2bf 100644
> > --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> > +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
> > @@ -6,6 +6,8 @@
> >   #ifndef _XE_GPU_COMMANDS_H_
> >   #define _XE_GPU_COMMANDS_H_
> > +#include "regs/xe_reg_defs.h"
> > +
> >   #define INSTR_CLIENT_SHIFT      29
> >   #define   INSTR_MI_CLIENT       0x0
> >   #define __INSTR(client) ((client) << INSTR_CLIENT_SHIFT)
> > @@ -56,6 +58,13 @@
> >   #define GEN9_XY_FAST_COPY_BLT_CMD	(2 << 29 | 0x42 << 22)
> >   #define   BLT_DEPTH_32			(3<<24)
> > +#define	PVC_MEM_SET_CMD		(2 << 29 | 0x5b << 22)
> > +#define   PVC_MEM_SET_CMD_LEN_DW	7
> > +#define   PVC_MS_MATRIX			REG_BIT(17)
> > +/* Bspec lists field as [6:0], but index alone is from [6:1] */
> > +#define   PVC_MS_MOCS_INDEX_MASK	GENMASK(6, 1)
> > +#define   PVC_MS_DATA_FIELD		GENMASK(31, 24)
> > +
> >   #define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
> >   #define   PIPE_CONTROL_TILE_CACHE_FLUSH			(1<<28)
> >   #define   PIPE_CONTROL_AMFS_FLUSH			(1<<25)
> > diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> > index 199bd37fce9a..a73c5e1d7503 100644
> > --- a/drivers/gpu/drm/xe/xe_device_types.h
> > +++ b/drivers/gpu/drm/xe/xe_device_types.h
> > @@ -95,6 +95,8 @@ struct xe_device {
> >   		bool has_4tile;
> >   		/** @has_range_tlb_invalidation: Has range based TLB invalidations */
> >   		bool has_range_tlb_invalidation;
> > +		/** @has_link_copy_engines: Whether the platform has link copy engines */
> > +		bool has_link_copy_engine;
> >   		/** @enable_display: display enabled */
> >   		bool enable_display;
> > diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> > index bc69ec17d5ad..59fd588a1faf 100644
> > --- a/drivers/gpu/drm/xe/xe_migrate.c
> > +++ b/drivers/gpu/drm/xe/xe_migrate.c
> > @@ -750,32 +750,57 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
> >   		      u32 size, u32 pitch, u32 value, bool is_vram)
> >   {
> >   	u32 *cs = bb->cs + bb->len;
> > -	u32 len = XY_FAST_COLOR_BLT_DW;
> > +	u32 len;
> >   	u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
> > +	struct xe_device *xe = gt_to_xe(gt);
> > -	if (GRAPHICS_VERx100(gt->xe) < 1250)
> > -		len = 11;
> > -
> > -	*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
> > -		(len - 2);
> > -	*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
> > -		(pitch - 1);
> > -	*cs++ = 0;
> > -	*cs++ = (size / pitch) << 16 | pitch / 4;
> > -	*cs++ = lower_32_bits(src_ofs);
> > -	*cs++ = upper_32_bits(src_ofs);
> > -	*cs++ = (is_vram ? 0x0 : 0x1) <<  XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT;
> > -	*cs++ = value;
> > -	*cs++ = 0;
> > -	*cs++ = 0;
> > -	*cs++ = 0;
> > -
> > -	if (len > 11) {
> > -		*cs++ = 0;
> > +	if (xe->info.has_link_copy_engine) {
> > +		/* MEM_SET command supports setting only 8-bit value.
> > +		 * This function is currently used only to clear the address
> > +		 * range.  So the value agrument is not really used. Need to
> > +		 * have a better handling when there is a need to actually set
> > +		 * a value. Print a warning if a value bigger than 8-bit is
> > +		 * passed
> > +		 */
> > +		XE_WARN_ON(value > U8_MAX);
> 
> I would change it to if (value); 0xffffffff would likely work for example,
> but is bigger than U8_MAX.

I gave a thought about changing to `if (value)` but dropped it because
it would then restrict the function emit_clear() to only clear and not
set any value. Though emit_clear() is currently used in the driver to
only clear the memory region, there are some kunit test which try
setting different values. 

0xFFFFFFFF was caught by the warning. Confirmed through testing.

Regards,
Bala

> 
> ~Maarten
> 
> > +
> > +		len = PVC_MEM_SET_CMD_LEN_DW;
> > +
> > +		*cs++ = PVC_MEM_SET_CMD | PVC_MS_MATRIX |
> > +				(PVC_MEM_SET_CMD_LEN_DW - 2);
> > +		*cs++ = pitch - 1;
> > +		*cs++ = (size / pitch) - 1;
> > +		*cs++ = pitch - 1;
> > +		*cs++ = lower_32_bits(src_ofs);
> > +		*cs++ = upper_32_bits(src_ofs);
> > +		*cs++ = FIELD_PREP(PVC_MS_DATA_FIELD, value) |
> > +				FIELD_PREP(PVC_MS_MOCS_INDEX_MASK, mocs);
> > +	} else {
> > +		len = XY_FAST_COLOR_BLT_DW;
> > +		if (GRAPHICS_VERx100(gt->xe) < 1250)
> > +			len = 11;
> > +
> > +		*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
> > +			(len - 2);
> > +		*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
> > +			(pitch - 1);
> >   		*cs++ = 0;
> > +		*cs++ = (size / pitch) << 16 | pitch / 4;
> > +		*cs++ = lower_32_bits(src_ofs);
> > +		*cs++ = upper_32_bits(src_ofs);
> > +		*cs++ = (is_vram ? 0x0 : 0x1) <<  XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT;
> > +		*cs++ = value;
> >   		*cs++ = 0;
> >   		*cs++ = 0;
> >   		*cs++ = 0;
> > +
> > +		if (len > 11) {
> > +			*cs++ = 0;
> > +			*cs++ = 0;
> > +			*cs++ = 0;
> > +			*cs++ = 0;
> > +			*cs++ = 0;
> > +		}
> >   	}
> >   	XE_BUG_ON(cs - bb->cs != len + bb->len);
> > diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
> > index c4d9fd2e7b2b..e555f13395ab 100644
> > --- a/drivers/gpu/drm/xe/xe_pci.c
> > +++ b/drivers/gpu/drm/xe/xe_pci.c
> > @@ -72,6 +72,8 @@ struct xe_device_desc {
> >   	bool has_4tile;
> >   	bool has_range_tlb_invalidation;
> >   	bool has_asid;
> > +
> > +	bool has_link_copy_engine;
> >   };
> >   __diag_push();
> > @@ -224,6 +226,7 @@ static const struct xe_device_desc pvc_desc = {
> >   	.vm_max_level = 4,
> >   	.supports_usm = true,
> >   	.has_asid = true,
> > +	.has_link_copy_engine = true,
> >   };
> >   #define MTL_MEDIA_ENGINES \
> > @@ -413,6 +416,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> >   	xe->info.has_flat_ccs = desc->has_flat_ccs;
> >   	xe->info.has_4tile = desc->has_4tile;
> >   	xe->info.has_range_tlb_invalidation = desc->has_range_tlb_invalidation;
> > +	xe->info.has_link_copy_engine = desc->has_link_copy_engine;
> >   	spd = subplatform_get(xe, desc);
> >   	xe->info.subplatform = spd ? spd->subplatform : XE_SUBPLATFORM_NONE;


More information about the Intel-xe mailing list