[CI 13/14] drm/xe: Allow for optimization of xe_ggtt_map_bo
Maarten Lankhorst
dev at lankhorst.se
Mon Apr 7 13:04:48 UTC 2025
Signed-off-by: Maarten Lankhorst <dev at lankhorst.se>
---
drivers/gpu/drm/xe/tests/xe_migrate.c | 1 +
drivers/gpu/drm/xe/xe_ggtt.c | 74 ++++++++++++++++++---------
2 files changed, 50 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index d6770ed4126c1..772b6db3784d9 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -9,6 +9,7 @@
#include "tests/xe_kunit_helpers.h"
#include "tests/xe_pci_test.h"
+#include "xe_ggtt.h"
#include "xe_pci.h"
#include "xe_pm.h"
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index c7b6369a88a9e..1f46d299b5149 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -26,6 +26,7 @@
#include "xe_gt_sriov_vf.h"
#include "xe_gt_tlb_invalidation.h"
#include "xe_map.h"
+#include "xe_migrate.h"
#include "xe_mmio.h"
#include "xe_pm.h"
#include "xe_res_cursor.h"
@@ -607,6 +608,25 @@ bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node)
return drm_mm_node_allocated(&node->base);
}
+struct xe_ggtt_cb_data {
+ struct xe_res_cursor cur;
+ u64 pte_flags;
+ bool sysmem;
+};
+
+static void xe_ggtt_map_bo_cb(void *args, u32 ggtt_offset, u32 local_offset, u64 *ptes, u32 num_ptes)
+{
+ struct xe_ggtt_cb_data *data = args;
+
+ while (num_ptes--) {
+ u64 addr = data->sysmem ? xe_res_dma(&data->cur) : data->cur.start;
+
+ *ptes++ = data->pte_flags + addr;
+
+ xe_res_next(&data->cur, XE_PAGE_SIZE);
+ }
+}
+
/**
* xe_ggtt_map_bo - Map the BO into GGTT
* @ggtt: the &xe_ggtt where node will be mapped
@@ -615,39 +635,43 @@ bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node)
* @pat_index: Which pat_index to use.
*/
static void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node,
- struct xe_bo *bo, u16 pat_index)
+ struct xe_bo *bo, u16 pat_index, bool allow_accel)
{
- u64 start, pte;
- struct xe_res_cursor cur;
+ struct xe_ggtt_cb_data data;
+ struct dma_fence *fence = NULL;
if (XE_WARN_ON(!node))
return;
- start = node->base.start;
-
- pte = ggtt->pt_ops->pte_encode_flags(bo, pat_index);
- if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
+ data.pte_flags = ggtt->pt_ops->pte_encode_flags(bo, pat_index);
+ data.sysmem = !xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo);
+ if (data.sysmem) {
xe_assert(xe_bo_device(bo), bo->ttm.ttm);
- for (xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &cur);
- cur.remaining;
- xe_res_next(&cur, XE_PAGE_SIZE)) {
- u64 addr = xe_res_dma(&cur);
-
- ggtt->pt_ops->ggtt_set_pte(ggtt, start + cur.start, addr | pte);
- }
+ xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &data.cur);
} else {
- u64 end = start + bo->size;
-
/* Prepend GPU offset */
- pte |= vram_region_gpu_offset(bo->ttm.resource);
+ data.pte_flags |= vram_region_gpu_offset(bo->ttm.resource);
+
+ xe_res_first(bo->ttm.resource, 0, bo->size, &data.cur);
+ }
+
+ if (allow_accel && node->base.size >= SZ_4K && ggtt->tile->migrate)
+ fence = xe_migrate_update_gtt(ggtt->tile->migrate, xe_ggtt_map_bo_cb, &data,
+ node->base.start,
+ node->base.size / XE_PAGE_SIZE);
+
+ if (!IS_ERR_OR_NULL(fence)) {
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ } else {
+ /* Eat error, force map */
- for (xe_res_first(bo->ttm.resource, 0, bo->size, &cur);
- cur.remaining;
- xe_res_next(&cur, XE_PAGE_SIZE)) {
+ for (u32 local_offset = 0; local_offset < node->base.size; local_offset += XE_PAGE_SIZE) {
+ u64 pte;
+ xe_ggtt_map_bo_cb(&data, node->base.start, local_offset, &pte, 1);
- ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining,
- pte + cur.start);
+ ggtt->pt_ops->ggtt_set_pte(ggtt, node->base.start + local_offset, pte);
}
}
}
@@ -665,7 +689,7 @@ void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo)
u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode];
mutex_lock(&ggtt->lock);
- xe_ggtt_map_bo(ggtt, bo->ggtt_node[ggtt->tile->id], bo, pat_index);
+ xe_ggtt_map_bo(ggtt, bo->ggtt_node[ggtt->tile->id], bo, pat_index, false);
mutex_unlock(&ggtt->lock);
}
@@ -709,7 +733,7 @@ struct xe_ggtt_node *xe_ggtt_node_insert_transform(struct xe_ggtt *ggtt,
u64 pte_flags = ggtt->pt_ops->pte_encode_flags(bo, pat_index);
transform(ggtt, node, pte_flags, ggtt->pt_ops->ggtt_set_pte, arg);
} else {
- xe_ggtt_map_bo(ggtt, node, bo, pat_index);
+ xe_ggtt_map_bo(ggtt, node, bo, pat_index, true);
}
@@ -762,7 +786,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB;
u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode];
- xe_ggtt_map_bo(ggtt, bo->ggtt_node[tile_id], bo, pat_index);
+ xe_ggtt_map_bo(ggtt, bo->ggtt_node[tile_id], bo, pat_index, false);
}
mutex_unlock(&ggtt->lock);
--
2.45.2
More information about the Intel-xe
mailing list