[Intel-xe] [PATCH v5 2/2] drm/xe: Skip XY_FAST_COLOR instruction on link copy engines
Balasubramani Vivekanandan
balasubramani.vivekanandan at intel.com
Fri Mar 17 15:35:31 UTC 2023
Link copy engines doesn't support the XY_FAST_COLOR instruction.
Currently this instruction is used only at one place to clear a ttm
resource while migrating a BO.
A new device_info member is created to know if a platform has link copy
engine. If it supports, then instead of using XY_FAST_COLOR instruction,
MEM_SET is used which is available both in main and link copy engines.
BSpec: 68433
Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan at intel.com>
---
drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 9 ++++
drivers/gpu/drm/xe/xe_device_types.h | 2 +
drivers/gpu/drm/xe/xe_migrate.c | 57 +++++++++++++++++++++--
drivers/gpu/drm/xe/xe_pci.c | 4 ++
4 files changed, 67 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index 288576035ce3..e60372a82723 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -6,6 +6,8 @@
#ifndef _XE_GPU_COMMANDS_H_
#define _XE_GPU_COMMANDS_H_
+#include "regs/xe_reg_defs.h"
+
#define INSTR_CLIENT_SHIFT 29
#define INSTR_MI_CLIENT 0x0
#define __INSTR(client) ((client) << INSTR_CLIENT_SHIFT)
@@ -56,6 +58,13 @@
#define GEN9_XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22)
#define BLT_DEPTH_32 (3<<24)
+#define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22)
+#define PVC_MEM_SET_CMD_LEN_DW 7
+#define PVC_MS_MATRIX REG_BIT(17)
+#define PVC_MS_DATA_FIELD GENMASK(31, 24)
+/* Bspec lists field as [6:0], but index alone is from [6:1] */
+#define PVC_MS_MOCS_INDEX_MASK GENMASK(6, 1)
+
#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
#define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28)
#define PIPE_CONTROL_AMFS_FLUSH (1<<25)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 034e0956f4ea..46cf37224090 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -95,6 +95,8 @@ struct xe_device {
bool has_4tile;
/** @has_range_tlb_invalidation: Has range based TLB invalidations */
bool has_range_tlb_invalidation;
+ /** @has_link_copy_engines: Whether the platform has link copy engines */
+ bool has_link_copy_engine;
/** @enable_display: display enabled */
bool enable_display;
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index fdcbab31a418..1316614bff20 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -746,14 +746,36 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
return fence;
}
-static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
- u32 size, u32 pitch, bool is_vram)
+static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+ u32 size, u32 pitch)
+{
+ u32 *cs = bb->cs + bb->len;
+ u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
+ u32 len = PVC_MEM_SET_CMD_LEN_DW;
+
+ *cs++ = PVC_MEM_SET_CMD | PVC_MS_MATRIX | (len - 2);
+ *cs++ = pitch - 1;
+ *cs++ = (size / pitch) - 1;
+ *cs++ = pitch - 1;
+ *cs++ = lower_32_bits(src_ofs);
+ *cs++ = upper_32_bits(src_ofs);
+ *cs++ = FIELD_PREP(PVC_MS_MOCS_INDEX_MASK, mocs);
+
+ XE_BUG_ON(cs - bb->cs != len + bb->len);
+
+ bb->len += len;
+}
+
+static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
+ u64 src_ofs, u32 size, u32 pitch, bool is_vram)
{
u32 *cs = bb->cs + bb->len;
- u32 len = XY_FAST_COLOR_BLT_DW;
+ u32 len;
u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
+ struct xe_device *xe = gt_to_xe(gt);
- if (GRAPHICS_VERx100(gt->xe) < 1250)
+ len = XY_FAST_COLOR_BLT_DW;
+ if (GRAPHICS_VERx100(xe) < 1250)
len = 11;
*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
@@ -779,7 +801,31 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
}
XE_BUG_ON(cs - bb->cs != len + bb->len);
+
bb->len += len;
+}
+
+static u32 emit_clear_cmd_len(struct xe_device *xe)
+{
+ if (xe->info.has_link_copy_engine)
+ return PVC_MEM_SET_CMD_LEN_DW;
+ else
+ return XY_FAST_COLOR_BLT_DW;
+}
+
+static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+ u32 size, u32 pitch, bool is_vram)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ if (xe->info.has_link_copy_engine) {
+ emit_clear_link_copy(gt, bb, src_ofs, size, pitch);
+
+ } else {
+ emit_clear_main_copy(gt, bb, src_ofs, size, pitch,
+ is_vram);
+ }
+
return 0;
}
@@ -834,7 +880,8 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
batch_size = 2 +
pte_update_size(m, clear_vram, &src_it,
&clear_L0, &clear_L0_ofs, &clear_L0_pt,
- XY_FAST_COLOR_BLT_DW, 0, NUM_PT_PER_BLIT);
+ emit_clear_cmd_len(xe), 0,
+ NUM_PT_PER_BLIT);
if (xe_device_has_flat_ccs(xe) && clear_vram)
batch_size += EMIT_COPY_CCS_DW;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index c4d9fd2e7b2b..e555f13395ab 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -72,6 +72,8 @@ struct xe_device_desc {
bool has_4tile;
bool has_range_tlb_invalidation;
bool has_asid;
+
+ bool has_link_copy_engine;
};
__diag_push();
@@ -224,6 +226,7 @@ static const struct xe_device_desc pvc_desc = {
.vm_max_level = 4,
.supports_usm = true,
.has_asid = true,
+ .has_link_copy_engine = true,
};
#define MTL_MEDIA_ENGINES \
@@ -413,6 +416,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
xe->info.has_flat_ccs = desc->has_flat_ccs;
xe->info.has_4tile = desc->has_4tile;
xe->info.has_range_tlb_invalidation = desc->has_range_tlb_invalidation;
+ xe->info.has_link_copy_engine = desc->has_link_copy_engine;
spd = subplatform_get(xe, desc);
xe->info.subplatform = spd ? spd->subplatform : XE_SUBPLATFORM_NONE;
--
2.25.1
More information about the Intel-xe
mailing list