[CI 13/14] drm/xe: Allow for optimization of xe_ggtt_map_bo

Maarten Lankhorst dev at lankhorst.se
Mon Apr 7 13:04:48 UTC 2025


Signed-off-by: Maarten Lankhorst <dev at lankhorst.se>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c |  1 +
 drivers/gpu/drm/xe/xe_ggtt.c          | 74 ++++++++++++++++++---------
 2 files changed, 50 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index d6770ed4126c1..772b6db3784d9 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -9,6 +9,7 @@
 #include "tests/xe_kunit_helpers.h"
 #include "tests/xe_pci_test.h"
 
+#include "xe_ggtt.h"
 #include "xe_pci.h"
 #include "xe_pm.h"
 
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index c7b6369a88a9e..1f46d299b5149 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -26,6 +26,7 @@
 #include "xe_gt_sriov_vf.h"
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_map.h"
+#include "xe_migrate.h"
 #include "xe_mmio.h"
 #include "xe_pm.h"
 #include "xe_res_cursor.h"
@@ -607,6 +608,25 @@ bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node)
 	return drm_mm_node_allocated(&node->base);
 }
 
+struct xe_ggtt_cb_data {
+	struct xe_res_cursor cur;
+	u64 pte_flags;
+	bool sysmem;
+};
+
+static void xe_ggtt_map_bo_cb(void *args, u32 ggtt_offset, u32 local_offset, u64 *ptes, u32 num_ptes)
+{
+	struct xe_ggtt_cb_data *data = args;
+
+	while (num_ptes--) {
+		u64 addr = data->sysmem ? xe_res_dma(&data->cur) : data->cur.start;
+
+		*ptes++ = data->pte_flags + addr;
+
+		xe_res_next(&data->cur, XE_PAGE_SIZE);
+	}
+}
+
 /**
  * xe_ggtt_map_bo - Map the BO into GGTT
  * @ggtt: the &xe_ggtt where node will be mapped
@@ -615,39 +635,43 @@ bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node)
  * @pat_index: Which pat_index to use.
  */
 static void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node,
-			   struct xe_bo *bo, u16 pat_index)
+			   struct xe_bo *bo, u16 pat_index, bool allow_accel)
 {
-	u64 start, pte;
-	struct xe_res_cursor cur;
+	struct xe_ggtt_cb_data data;
+	struct dma_fence *fence = NULL;
 
 	if (XE_WARN_ON(!node))
 		return;
 
-	start = node->base.start;
-
-	pte = ggtt->pt_ops->pte_encode_flags(bo, pat_index);
-	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
+	data.pte_flags = ggtt->pt_ops->pte_encode_flags(bo, pat_index);
+	data.sysmem = !xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo);
+	if (data.sysmem) {
 		xe_assert(xe_bo_device(bo), bo->ttm.ttm);
 
-		for (xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &cur);
-		     cur.remaining;
-		     xe_res_next(&cur, XE_PAGE_SIZE)) {
-			u64 addr = xe_res_dma(&cur);
-
-			ggtt->pt_ops->ggtt_set_pte(ggtt, start + cur.start, addr | pte);
-		}
+		xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &data.cur);
 	} else {
-		u64 end = start + bo->size;
-
 		/* Prepend GPU offset */
-		pte |= vram_region_gpu_offset(bo->ttm.resource);
+		data.pte_flags |= vram_region_gpu_offset(bo->ttm.resource);
+
+		xe_res_first(bo->ttm.resource, 0, bo->size, &data.cur);
+	}
+
+	if (allow_accel && node->base.size >= SZ_4K && ggtt->tile->migrate)
+		fence = xe_migrate_update_gtt(ggtt->tile->migrate, xe_ggtt_map_bo_cb, &data,
+					      node->base.start,
+					      node->base.size / XE_PAGE_SIZE);
+
+	if (!IS_ERR_OR_NULL(fence)) {
+		dma_fence_wait(fence, false);
+		dma_fence_put(fence);
+	} else {
+		/* Eat error, force map */
 
-		for (xe_res_first(bo->ttm.resource, 0, bo->size, &cur);
-		     cur.remaining;
-		     xe_res_next(&cur, XE_PAGE_SIZE)) {
+		for (u32 local_offset = 0; local_offset < node->base.size; local_offset += XE_PAGE_SIZE) {
+			u64 pte;
+			xe_ggtt_map_bo_cb(&data, node->base.start, local_offset, &pte, 1);
 
-			ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining,
-						   pte + cur.start);
+			ggtt->pt_ops->ggtt_set_pte(ggtt, node->base.start + local_offset, pte);
 		}
 	}
 }
@@ -665,7 +689,7 @@ void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo)
 	u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode];
 
 	mutex_lock(&ggtt->lock);
-	xe_ggtt_map_bo(ggtt, bo->ggtt_node[ggtt->tile->id], bo, pat_index);
+	xe_ggtt_map_bo(ggtt, bo->ggtt_node[ggtt->tile->id], bo, pat_index, false);
 	mutex_unlock(&ggtt->lock);
 }
 
@@ -709,7 +733,7 @@ struct xe_ggtt_node *xe_ggtt_node_insert_transform(struct xe_ggtt *ggtt,
 		u64 pte_flags = ggtt->pt_ops->pte_encode_flags(bo, pat_index);
 		transform(ggtt, node, pte_flags, ggtt->pt_ops->ggtt_set_pte, arg);
 	} else {
-		xe_ggtt_map_bo(ggtt, node, bo, pat_index);
+		xe_ggtt_map_bo(ggtt, node, bo, pat_index, true);
 	}
 
 
@@ -762,7 +786,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
 		u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB;
 		u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode];
 
-		xe_ggtt_map_bo(ggtt, bo->ggtt_node[tile_id], bo, pat_index);
+		xe_ggtt_map_bo(ggtt, bo->ggtt_node[tile_id], bo, pat_index, false);
 	}
 	mutex_unlock(&ggtt->lock);
 
-- 
2.45.2



More information about the Intel-xe mailing list