[Intel-xe] [PATCH v5 2/2] drm/xe: Skip XY_FAST_COLOR instruction on link copy engines

Lucas De Marchi lucas.demarchi at intel.com
Fri Mar 17 16:13:17 UTC 2023


On Fri, Mar 17, 2023 at 09:05:31PM +0530, Balasubramani Vivekanandan wrote:
>Link copy engines doesn't support the XY_FAST_COLOR instruction.
>Currently this instruction is used only at one place to clear a ttm
>resource while migrating a BO.
>A new device_info member is created to know if a platform has link copy
>engine. If it supports, then instead of using XY_FAST_COLOR instruction,
>MEM_SET is used which is available both in main and link copy engines.
>
>BSpec: 68433
>
>Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan at intel.com>
>---
> drivers/gpu/drm/xe/regs/xe_gpu_commands.h |  9 ++++
> drivers/gpu/drm/xe/xe_device_types.h      |  2 +
> drivers/gpu/drm/xe/xe_migrate.c           | 57 +++++++++++++++++++++--
> drivers/gpu/drm/xe/xe_pci.c               |  4 ++
> 4 files changed, 67 insertions(+), 5 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
>index 288576035ce3..e60372a82723 100644
>--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
>+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
>@@ -6,6 +6,8 @@
> #ifndef _XE_GPU_COMMANDS_H_
> #define _XE_GPU_COMMANDS_H_
>
>+#include "regs/xe_reg_defs.h"
>+
> #define INSTR_CLIENT_SHIFT      29
> #define   INSTR_MI_CLIENT       0x0
> #define __INSTR(client) ((client) << INSTR_CLIENT_SHIFT)
>@@ -56,6 +58,13 @@
> #define GEN9_XY_FAST_COPY_BLT_CMD	(2 << 29 | 0x42 << 22)
> #define   BLT_DEPTH_32			(3<<24)
>
>+#define	PVC_MEM_SET_CMD		(2 << 29 | 0x5b << 22)
>+#define   PVC_MEM_SET_CMD_LEN_DW	7
>+#define   PVC_MS_MATRIX			REG_BIT(17)
>+#define   PVC_MS_DATA_FIELD		GENMASK(31, 24)
>+/* Bspec lists field as [6:0], but index alone is from [6:1] */
>+#define   PVC_MS_MOCS_INDEX_MASK	GENMASK(6, 1)
>+
> #define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
> #define   PIPE_CONTROL_TILE_CACHE_FLUSH			(1<<28)
> #define   PIPE_CONTROL_AMFS_FLUSH			(1<<25)
>diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
>index 034e0956f4ea..46cf37224090 100644
>--- a/drivers/gpu/drm/xe/xe_device_types.h
>+++ b/drivers/gpu/drm/xe/xe_device_types.h
>@@ -95,6 +95,8 @@ struct xe_device {
> 		bool has_4tile;
> 		/** @has_range_tlb_invalidation: Has range based TLB invalidations */
> 		bool has_range_tlb_invalidation;
>+		/** @has_link_copy_engines: Whether the platform has link copy engines */
>+		bool has_link_copy_engine;
> 		/** @enable_display: display enabled */
> 		bool enable_display;
>
>diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
>index fdcbab31a418..1316614bff20 100644
>--- a/drivers/gpu/drm/xe/xe_migrate.c
>+++ b/drivers/gpu/drm/xe/xe_migrate.c
>@@ -746,14 +746,36 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
> 	return fence;
> }
>
>-static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
>-		      u32 size, u32 pitch, bool is_vram)
>+static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
>+				 u32 size, u32 pitch)
>+{
>+	u32 *cs = bb->cs + bb->len;
>+	u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
>+	u32 len = PVC_MEM_SET_CMD_LEN_DW;
>+
>+	*cs++ = PVC_MEM_SET_CMD | PVC_MS_MATRIX | (len - 2);
>+	*cs++ = pitch - 1;
>+	*cs++ = (size / pitch) - 1;
>+	*cs++ = pitch - 1;
>+	*cs++ = lower_32_bits(src_ofs);
>+	*cs++ = upper_32_bits(src_ofs);
>+	*cs++ = FIELD_PREP(PVC_MS_MOCS_INDEX_MASK, mocs);
>+
>+	XE_BUG_ON(cs - bb->cs != len + bb->len);
>+
>+	bb->len += len;
>+}
>+
>+static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
>+				 u64 src_ofs, u32 size, u32 pitch, bool is_vram)
> {
> 	u32 *cs = bb->cs + bb->len;
>-	u32 len = XY_FAST_COLOR_BLT_DW;
>+	u32 len;
> 	u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
>+	struct xe_device *xe = gt_to_xe(gt);

nit: this is usually the first line in a function so other things can use it.

>
>-	if (GRAPHICS_VERx100(gt->xe) < 1250)
>+	len = XY_FAST_COLOR_BLT_DW;
>+	if (GRAPHICS_VERx100(xe) < 1250)
> 		len = 11;

there is no behavior change in this function, or am I missing anything?
Was it done just to match the style from another function?  I don't mind
the change.

>
> 	*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
>@@ -779,7 +801,31 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
> 	}
>
> 	XE_BUG_ON(cs - bb->cs != len + bb->len);
>+
> 	bb->len += len;
>+}
>+
>+static u32 emit_clear_cmd_len(struct xe_device *xe)
>+{
>+	if (xe->info.has_link_copy_engine)
>+		return PVC_MEM_SET_CMD_LEN_DW;
>+	else
>+		return XY_FAST_COLOR_BLT_DW;
>+}
>+
>+static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
>+		      u32 size, u32 pitch, bool is_vram)
>+{
>+	struct xe_device *xe = gt_to_xe(gt);
>+
>+	if (xe->info.has_link_copy_engine) {
>+		emit_clear_link_copy(gt, bb, src_ofs, size, pitch);
>+
>+	} else {
>+		emit_clear_main_copy(gt, bb, src_ofs, size, pitch,
>+				     is_vram);
>+	}
>+

nit: stray newline

>
> 	return 0;
> }
>@@ -834,7 +880,8 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
> 		batch_size = 2 +
> 			pte_update_size(m, clear_vram, &src_it,
> 					&clear_L0, &clear_L0_ofs, &clear_L0_pt,
>-					XY_FAST_COLOR_BLT_DW, 0, NUM_PT_PER_BLIT);
>+					emit_clear_cmd_len(xe), 0,
>+					NUM_PT_PER_BLIT);
> 		if (xe_device_has_flat_ccs(xe) && clear_vram)
> 			batch_size += EMIT_COPY_CCS_DW;
>
>diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
>index c4d9fd2e7b2b..e555f13395ab 100644
>--- a/drivers/gpu/drm/xe/xe_pci.c
>+++ b/drivers/gpu/drm/xe/xe_pci.c
>@@ -72,6 +72,8 @@ struct xe_device_desc {
> 	bool has_4tile;
> 	bool has_range_tlb_invalidation;
> 	bool has_asid;
>+

nit: stray newline

>+	bool has_link_copy_engine;
> };
>
> __diag_push();
>@@ -224,6 +226,7 @@ static const struct xe_device_desc pvc_desc = {
> 	.vm_max_level = 4,
> 	.supports_usm = true,
> 	.has_asid = true,
>+	.has_link_copy_engine = true,
> };
>
> #define MTL_MEDIA_ENGINES \
>@@ -413,6 +416,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> 	xe->info.has_flat_ccs = desc->has_flat_ccs;
> 	xe->info.has_4tile = desc->has_4tile;
> 	xe->info.has_range_tlb_invalidation = desc->has_range_tlb_invalidation;
>+	xe->info.has_link_copy_engine = desc->has_link_copy_engine;

mostly cosmetic issues. Feel free to add my

	
	Reviewed-by: Lucas De Marchi <lucas.demarchi at intel.com>

when fixing those

thanks
Lucas De Marchi

>
> 	spd = subplatform_get(xe, desc);
> 	xe->info.subplatform = spd ? spd->subplatform : XE_SUBPLATFORM_NONE;
>-- 
>2.25.1
>


More information about the Intel-xe mailing list