[Intel-xe] [PATCH] drm/xe/mtl: Increase prefetch size
Matthew Brost
matthew.brost at intel.com
Fri Jul 21 19:10:27 UTC 2023
MTL requires 2k prefetch for RCS hw engine, fix this. Also wire up hwe
class in this calculation to not waste space in the common case of using
a BCS hw engine.
Bspec: 45718
Cc: Matt Roper <matthew.d.roper at intel.com>
Signed-off-by: Matthew Brost <matthew.brost at intel.com>
---
drivers/gpu/drm/xe/tests/xe_migrate.c | 3 ++-
drivers/gpu/drm/xe/xe_bb.c | 21 ++++++++++++---------
drivers/gpu/drm/xe/xe_bb.h | 4 +++-
drivers/gpu/drm/xe/xe_gt.c | 4 ++--
drivers/gpu/drm/xe/xe_migrate.c | 7 ++++---
5 files changed, 23 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 30e5fdf6ca63..71a127efe99a 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -286,7 +286,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
goto free_pt;
}
- bb = xe_bb_new(tile->primary_gt, 32, xe->info.supports_usm);
+ bb = xe_bb_new(tile->primary_gt, m->eng->hwe->class, 32,
+ xe->info.supports_usm);
if (IS_ERR(bb)) {
KUNIT_FAIL(test, "Failed to create batchbuffer: %li\n",
PTR_ERR(bb));
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index f9b6b7adf99f..15e616fcccef 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -14,21 +14,23 @@
#include "xe_sched_job.h"
#include "xe_vm_types.h"
-static int bb_prefetch(struct xe_gt *gt)
+static int bb_prefetch(struct xe_gt *gt, enum xe_engine_class class)
{
struct xe_device *xe = gt_to_xe(gt);
- if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt))
- /*
- * RCS and CCS require 1K, although other engines would be
- * okay with 512.
- */
+ if (xe->info.platform == XE_METEORLAKE &&
+ class == XE_ENGINE_CLASS_RENDER)
+ return SZ_2K;
+ else if (GRAPHICS_VERx100(xe) >= 1250 &&
+ (class == XE_ENGINE_CLASS_RENDER ||
+ class == XE_ENGINE_CLASS_COMPUTE))
return SZ_1K;
else
return SZ_512;
}
-struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
+struct xe_bb *xe_bb_new(struct xe_gt *gt, enum xe_engine_class class,
+ u32 dwords, bool usm)
{
struct xe_tile *tile = gt_to_tile(gt);
struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
@@ -44,7 +46,7 @@ struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
* requirements.
*/
bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool,
- 4 * (dwords + 1) + bb_prefetch(gt));
+ 4 * (dwords + 1) + bb_prefetch(gt, class));
if (IS_ERR(bb->bo)) {
err = PTR_ERR(bb->bo);
goto err;
@@ -66,7 +68,8 @@ __xe_bb_create_job(struct xe_engine *kernel_eng, struct xe_bb *bb, u64 *addr)
bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
- WARN_ON(bb->len * 4 + bb_prefetch(kernel_eng->gt) > size);
+ WARN_ON(bb->len * 4 + bb_prefetch(kernel_eng->gt,
+ kernel_eng->hwe->class) > size);
xe_sa_bo_flush_write(bb->bo);
diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h
index 0cc9260c9634..ad6170b0b23a 100644
--- a/drivers/gpu/drm/xe/xe_bb.h
+++ b/drivers/gpu/drm/xe/xe_bb.h
@@ -8,13 +8,15 @@
#include "xe_bb_types.h"
+enum xe_engine_class;
struct dma_fence;
struct xe_gt;
struct xe_engine;
struct xe_sched_job;
-struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm);
+struct xe_bb *xe_bb_new(struct xe_gt *gt, enum xe_engine_class class, u32 size,
+ bool usm);
struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng,
struct xe_bb *bb);
struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng,
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 3e32d38aeeea..d928b161c480 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -88,7 +88,7 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_engine *e)
u64 batch_ofs;
long timeout;
- bb = xe_bb_new(gt, 4, false);
+ bb = xe_bb_new(gt, e->hwe->class, 4, false);
if (IS_ERR(bb))
return PTR_ERR(bb);
@@ -126,7 +126,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e)
long timeout;
int count = 0;
- bb = xe_bb_new(gt, SZ_4K, false); /* Just pick a large BB size */
+ bb = xe_bb_new(gt, e->hwe->class, SZ_4K, false); /* Just pick a large BB size */
if (IS_ERR(bb))
return PTR_ERR(bb);
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index bc7dac4e2086..649ba38bca81 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -685,7 +685,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
batch_size += EMIT_COPY_DW +
(xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0);
- bb = xe_bb_new(gt, batch_size, usm);
+ bb = xe_bb_new(gt, m->eng->hwe->class, batch_size, usm);
if (IS_ERR(bb)) {
err = PTR_ERR(bb);
goto err_sync;
@@ -914,7 +914,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
if (WARN_ON_ONCE(!clear_L0))
break;
- bb = xe_bb_new(gt, batch_size, usm);
+ bb = xe_bb_new(gt, m->eng->hwe->class, batch_size, usm);
if (IS_ERR(bb)) {
err = PTR_ERR(bb);
goto err_sync;
@@ -1196,7 +1196,8 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
*/
XE_BUG_ON(batch_size >= SZ_128K);
- bb = xe_bb_new(gt, batch_size, !eng && xe->info.supports_usm);
+ bb = xe_bb_new(gt, eng_override->hwe->class, batch_size,
+ !eng && xe->info.supports_usm);
if (IS_ERR(bb))
return ERR_CAST(bb);
--
2.34.1
More information about the Intel-xe
mailing list