[CI 11/14] drm/xe: Add GGTT updates to migration engine
Maarten Lankhorst
dev at lankhorst.se
Mon Apr 7 13:04:46 UTC 2025
Allow for pipelining of GGTT updates, as pinning huge buffers to GGTT
might end up being really slow.
Signed-off-by: Maarten Lankhorst <dev at lankhorst.se>
---
.../gpu/drm/xe/instructions/xe_mi_commands.h | 2 +
drivers/gpu/drm/xe/tests/xe_migrate.c | 74 ++++++++++++++++
drivers/gpu/drm/xe/xe_migrate.c | 87 +++++++++++++++++++
drivers/gpu/drm/xe/xe_migrate.h | 12 +++
4 files changed, 175 insertions(+)
diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
index eba582058d550..11477ef8fd241 100644
--- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
@@ -48,6 +48,8 @@
#define MI_LRI_FORCE_POSTED REG_BIT(12)
#define MI_LRI_LEN(x) (((x) & 0xff) + 1)
+#define MI_UPDATE_GTT __MI_INSTR(0x23)
+
#define MI_FLUSH_DW __MI_INSTR(0x26)
#define MI_FLUSH_DW_PROTECTED_MEM_EN REG_BIT(22)
#define MI_FLUSH_DW_STORE_INDEX REG_BIT(21)
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index d5fe0ea889ad8..d6770ed4126c1 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -365,6 +365,79 @@ static void xe_migrate_sanity_kunit(struct kunit *test)
migrate_test_run_device(xe);
}
+static void update_gtt(void *arg, u32 ggtt_offset, u32 local_offset, u64 *pte, u32 num_pte)
+{
+ while (num_pte--) {
+ *pte++ = 0x1234567890000abcULL | local_offset;
+
+ local_offset += XE_PAGE_SIZE;
+ }
+}
+
+static void xe_migrate_test_ggtt(struct kunit *test,
+ struct xe_migrate *m,
+ struct xe_ggtt *ggtt)
+{
+ struct xe_ggtt_node *node = xe_ggtt_node_init(ggtt);
+ struct dma_fence *fence;
+ u32 i;
+ int ret;
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, node);
+ if (IS_ERR(node))
+ return;
+
+ ret = xe_ggtt_node_insert(node, SZ_32M, XE_PAGE_SIZE);
+ KUNIT_ASSERT_EQ(test, ret, 0);
+ if (ret)
+ goto out;
+
+ fence = xe_migrate_update_gtt(m, update_gtt, NULL, node->base.start, node->base.size / XE_PAGE_SIZE);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, fence);
+ if (IS_ERR(fence))
+ goto out;
+
+ dma_fence_wait(fence, false);
+
+ for (i = 0; i < node->base.size; i += SZ_1M) {
+ u64 pte = xe_ggtt_read_pte(ggtt, node->base.start + i);
+ u64 expected = 0x1234567890000abc | i;
+
+ check(pte, expected, "GGTT update doesn't match expected update", test);
+
+ }
+ dma_fence_put(fence);
+
+out:
+ xe_ggtt_node_remove(node, false);
+}
+
+static void xe_migrate_test_ggtt_device(struct kunit *test, struct xe_device *xe)
+{
+ struct xe_tile *tile;
+ int id;
+
+ xe_pm_runtime_get(xe);
+
+ for_each_tile(tile, xe, id) {
+ struct xe_migrate *m = tile->migrate;
+
+ kunit_info(test, "Testing tile id %d.\n", id);
+ xe_vm_lock(m->q->vm, false);
+ xe_migrate_test_ggtt(test, m, tile->mem.ggtt);
+ xe_vm_unlock(m->q->vm);
+ }
+
+ xe_pm_runtime_put(xe);
+}
+
+static void xe_migrate_ggtt_kunit(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+
+ xe_migrate_test_ggtt_device(test, xe);
+}
+
static struct dma_fence *blt_copy(struct xe_tile *tile,
struct xe_bo *src_bo, struct xe_bo *dst_bo,
bool copy_only_ccs, const char *str, struct kunit *test)
@@ -773,6 +846,7 @@ static void xe_validate_ccs_kunit(struct kunit *test)
static struct kunit_case xe_migrate_tests[] = {
KUNIT_CASE_PARAM(xe_migrate_sanity_kunit, xe_pci_live_device_gen_param),
+ KUNIT_CASE_PARAM(xe_migrate_ggtt_kunit, xe_pci_live_device_gen_param),
KUNIT_CASE_PARAM(xe_validate_ccs_kunit, xe_pci_live_device_gen_param),
{}
};
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index c1277d599a11d..cf35ba0487d51 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -1723,6 +1723,93 @@ struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m,
#endif
+struct dma_fence *xe_migrate_update_gtt(struct xe_migrate *m,
+ xe_migrate_update_gtt_cb set_ptes_cb,
+ void *arg,
+ u32 ggtt_offset, u32 num_pte)
+{
+ struct xe_gt *gt = m->tile->primary_gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct dma_fence *fence = NULL;
+ u32 local_offset = 0;
+ int err;
+
+ while (num_pte) {
+ struct xe_sched_job *job;
+ struct xe_bb *bb;
+ u32 batch_size, update_idx;
+ bool usm = xe->info.has_usm;
+ /*
+ * batch up to 33 MI_UPDATE_GTT commands,
+ * this is enough to map a 64MiB 3840x2160x8 buffer
+ * with the size ending up just above 32kB.
+ */
+ u32 avail_ptes = min(num_pte, 33 * 511);
+ num_pte -= avail_ptes;
+
+ /* 2 * MI_BATCH_BUFFER_END + align + #PTEs + MI_UPDATE_GTT */
+ batch_size = 4 + 2 * avail_ptes + 2 * DIV_ROUND_UP(avail_ptes, 511);
+
+ bb = xe_bb_new(gt, batch_size, usm);
+ if (IS_ERR(bb)) {
+ err = PTR_ERR(bb);
+ goto err_sync;
+ }
+
+ bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+ bb->len++; /* align to u64 */
+ update_idx = bb->len;
+ while (avail_ptes) {
+ u32 batched_ptes = min(avail_ptes, 511);
+ bb->cs[bb->len++] = MI_UPDATE_GTT | (2 * batched_ptes);
+ bb->cs[bb->len++] = ggtt_offset + local_offset;
+
+ set_ptes_cb(arg, ggtt_offset, local_offset, (u64 *)&bb->cs[bb->len], batched_ptes);
+ bb->len += 2 * batched_ptes;
+
+ local_offset += XE_PAGE_SIZE * batched_ptes;
+ avail_ptes -= batched_ptes;
+ }
+
+ job = xe_bb_create_migration_job(m->q, bb,
+ xe_migrate_batch_base(m, usm),
+ update_idx);
+ if (IS_ERR(job)) {
+ err = PTR_ERR(job);
+ goto err;
+ }
+
+ xe_sched_job_add_migrate_flush(job, MI_FLUSH_DW_CCS);
+
+ mutex_lock(&m->job_mutex);
+ xe_sched_job_arm(job);
+ dma_fence_put(fence);
+ fence = dma_fence_get(&job->drm.s_fence->finished);
+ xe_sched_job_push(job);
+
+ dma_fence_put(m->fence);
+ m->fence = dma_fence_get(fence);
+
+ mutex_unlock(&m->job_mutex);
+
+ xe_bb_free(bb, fence);
+ continue;
+
+err:
+ xe_bb_free(bb, NULL);
+err_sync:
+ /* Sync partial copies if any. FIXME: job_mutex? */
+ if (fence) {
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ }
+
+ return ERR_PTR(err);
+ }
+
+ return fence;
+}
+
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
#include "tests/xe_migrate.c"
#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index 6ff9a963425c1..e03f28b7a021d 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -130,4 +130,16 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
void xe_migrate_wait(struct xe_migrate *m);
struct xe_exec_queue *xe_tile_migrate_exec_queue(struct xe_tile *tile);
+
+typedef void (*xe_migrate_update_gtt_cb)(void *arg,
+ u32 ggtt_offset,
+ u32 local_offset,
+ u64 *pte,
+ u32 num_pte);
+
+struct dma_fence *xe_migrate_update_gtt(struct xe_migrate *m,
+ xe_migrate_update_gtt_cb set_ptes_cb,
+ void *arg,
+ u32 ggtt_offset, u32 num_pte);
+
#endif
--
2.45.2
More information about the Intel-xe
mailing list