[Intel-xe] [PATCH] drm/xe/mtl: Increase prefetch size

Matthew Brost matthew.brost at intel.com
Fri Jul 21 19:10:27 UTC 2023


MTL requires 2k prefetch for RCS hw engine, fix this. Also wire up hwe
class in this calculation to not waste space in the common case of using
a BCS hw engine.

Bspec: 45718

Cc: Matt Roper <matthew.d.roper at intel.com>
Signed-off-by: Matthew Brost <matthew.brost at intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c |  3 ++-
 drivers/gpu/drm/xe/xe_bb.c            | 21 ++++++++++++---------
 drivers/gpu/drm/xe/xe_bb.h            |  4 +++-
 drivers/gpu/drm/xe/xe_gt.c            |  4 ++--
 drivers/gpu/drm/xe/xe_migrate.c       |  7 ++++---
 5 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 30e5fdf6ca63..71a127efe99a 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -286,7 +286,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 		goto free_pt;
 	}
 
-	bb = xe_bb_new(tile->primary_gt, 32, xe->info.supports_usm);
+	bb = xe_bb_new(tile->primary_gt, m->eng->hwe->class, 32,
+		       xe->info.supports_usm);
 	if (IS_ERR(bb)) {
 		KUNIT_FAIL(test, "Failed to create batchbuffer: %li\n",
 			   PTR_ERR(bb));
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index f9b6b7adf99f..15e616fcccef 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -14,21 +14,23 @@
 #include "xe_sched_job.h"
 #include "xe_vm_types.h"
 
-static int bb_prefetch(struct xe_gt *gt)
+static int bb_prefetch(struct xe_gt *gt, enum xe_engine_class class)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 
-	if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt))
-		/*
-		 * RCS and CCS require 1K, although other engines would be
-		 * okay with 512.
-		 */
+	if (xe->info.platform == XE_METEORLAKE &&
+	    class == XE_ENGINE_CLASS_RENDER)
+		return SZ_2K;
+	else if (GRAPHICS_VERx100(xe) >= 1250 &&
+		 (class == XE_ENGINE_CLASS_RENDER ||
+		  class == XE_ENGINE_CLASS_COMPUTE))
 		return SZ_1K;
 	else
 		return SZ_512;
 }
 
-struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
+struct xe_bb *xe_bb_new(struct xe_gt *gt, enum xe_engine_class class,
+			u32 dwords, bool usm)
 {
 	struct xe_tile *tile = gt_to_tile(gt);
 	struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
@@ -44,7 +46,7 @@ struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
 	 * requirements.
 	 */
 	bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool,
-			      4 * (dwords + 1) + bb_prefetch(gt));
+			      4 * (dwords + 1) + bb_prefetch(gt, class));
 	if (IS_ERR(bb->bo)) {
 		err = PTR_ERR(bb->bo);
 		goto err;
@@ -66,7 +68,8 @@ __xe_bb_create_job(struct xe_engine *kernel_eng, struct xe_bb *bb, u64 *addr)
 
 	bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
 
-	WARN_ON(bb->len * 4 + bb_prefetch(kernel_eng->gt) > size);
+	WARN_ON(bb->len * 4 + bb_prefetch(kernel_eng->gt,
+					  kernel_eng->hwe->class) > size);
 
 	xe_sa_bo_flush_write(bb->bo);
 
diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h
index 0cc9260c9634..ad6170b0b23a 100644
--- a/drivers/gpu/drm/xe/xe_bb.h
+++ b/drivers/gpu/drm/xe/xe_bb.h
@@ -8,13 +8,15 @@
 
 #include "xe_bb_types.h"
 
+enum xe_engine_class;
 struct dma_fence;
 
 struct xe_gt;
 struct xe_engine;
 struct xe_sched_job;
 
-struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm);
+struct xe_bb *xe_bb_new(struct xe_gt *gt, enum xe_engine_class class, u32 size,
+			bool usm);
 struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng,
 				      struct xe_bb *bb);
 struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng,
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 3e32d38aeeea..d928b161c480 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -88,7 +88,7 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_engine *e)
 	u64 batch_ofs;
 	long timeout;
 
-	bb = xe_bb_new(gt, 4, false);
+	bb = xe_bb_new(gt, e->hwe->class, 4, false);
 	if (IS_ERR(bb))
 		return PTR_ERR(bb);
 
@@ -126,7 +126,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e)
 	long timeout;
 	int count = 0;
 
-	bb = xe_bb_new(gt, SZ_4K, false);	/* Just pick a large BB size */
+	bb = xe_bb_new(gt, e->hwe->class, SZ_4K, false);	/* Just pick a large BB size */
 	if (IS_ERR(bb))
 		return PTR_ERR(bb);
 
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index bc7dac4e2086..649ba38bca81 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -685,7 +685,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 		batch_size += EMIT_COPY_DW +
 			(xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0);
 
-		bb = xe_bb_new(gt, batch_size, usm);
+		bb = xe_bb_new(gt, m->eng->hwe->class, batch_size, usm);
 		if (IS_ERR(bb)) {
 			err = PTR_ERR(bb);
 			goto err_sync;
@@ -914,7 +914,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		if (WARN_ON_ONCE(!clear_L0))
 			break;
 
-		bb = xe_bb_new(gt, batch_size, usm);
+		bb = xe_bb_new(gt, m->eng->hwe->class, batch_size, usm);
 		if (IS_ERR(bb)) {
 			err = PTR_ERR(bb);
 			goto err_sync;
@@ -1196,7 +1196,8 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 	 */
 	XE_BUG_ON(batch_size >= SZ_128K);
 
-	bb = xe_bb_new(gt, batch_size, !eng && xe->info.supports_usm);
+	bb = xe_bb_new(gt, eng_override->hwe->class, batch_size,
+		       !eng && xe->info.supports_usm);
 	if (IS_ERR(bb))
 		return ERR_CAST(bb);
 
-- 
2.34.1



More information about the Intel-xe mailing list