[PATCH 2/7] drm/xe: split pinned save/restore into phases
Matthew Auld
matthew.auld at intel.com
Mon Dec 16 16:29:43 UTC 2024
Early phase we can only use memcpy to bootstrap enough of the system to
get working submission. For late phase we can switch over to using GPU
blitter to do the copy. As part of this move the user pinned stuff into
the late phase. In the future we can move more kernel pinned stuff into
the late phase.
Signed-off-by: Matthew Auld <matthew.auld at intel.com>
Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
Cc: Matthew Brost <matthew.brost at intel.com>
---
drivers/gpu/drm/xe/tests/xe_bo.c | 8 +-
drivers/gpu/drm/xe/xe_bo.c | 7 +-
drivers/gpu/drm/xe/xe_bo.h | 1 +
drivers/gpu/drm/xe/xe_bo_evict.c | 220 ++++++++++++---------------
drivers/gpu/drm/xe/xe_bo_evict.h | 4 +-
drivers/gpu/drm/xe/xe_device.c | 3 +-
drivers/gpu/drm/xe/xe_device_types.h | 6 +-
drivers/gpu/drm/xe/xe_pm.c | 16 +-
8 files changed, 122 insertions(+), 143 deletions(-)
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index c9ec7a313c6b..7619eba2ef1f 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -252,7 +252,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
for_each_gt(__gt, xe, id)
xe_gt_sanitize(__gt);
- err = xe_bo_restore_kernel(xe);
+ err = xe_bo_restore_pinned_early(xe);
/*
* Snapshotting the CTB and copying back a potentially old
* version seems risky, depending on what might have been
@@ -269,14 +269,14 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
flush_work(&__gt->reset.worker);
}
if (err) {
- KUNIT_FAIL(test, "restore kernel err=%pe\n",
+ KUNIT_FAIL(test, "restore early err=%pe\n",
ERR_PTR(err));
goto cleanup_all;
}
- err = xe_bo_restore_user(xe);
+ err = xe_bo_restore_pinned_late(xe);
if (err) {
- KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err));
+ KUNIT_FAIL(test, "restore late err=%pe\n", ERR_PTR(err));
goto cleanup_all;
}
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 6f073d12359a..e95909963e06 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -124,7 +124,7 @@ bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
}
-static bool xe_bo_is_user(struct xe_bo *bo)
+bool xe_bo_is_user(struct xe_bo *bo)
{
return bo->flags & XE_BO_FLAG_USER;
}
@@ -1859,8 +1859,7 @@ int xe_bo_pin_external(struct xe_bo *bo)
if (xe_bo_is_vram(bo)) {
spin_lock(&xe->pinned.lock);
- list_add_tail(&bo->pinned_link,
- &xe->pinned.external_vram);
+ list_add_tail(&bo->pinned_link, &xe->pinned.present);
spin_unlock(&xe->pinned.lock);
}
}
@@ -1904,7 +1903,7 @@ int xe_bo_pin(struct xe_bo *bo)
if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
spin_lock(&xe->pinned.lock);
- list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
+ list_add_tail(&bo->pinned_link, &xe->pinned.present);
spin_unlock(&xe->pinned.lock);
}
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index d9386ab03140..a0ac2b9c903d 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -235,6 +235,7 @@ bool xe_bo_is_vram(struct xe_bo *bo);
bool xe_bo_is_stolen(struct xe_bo *bo);
bool xe_bo_is_stolen_devmem(struct xe_bo *bo);
bool xe_bo_has_single_placement(struct xe_bo *bo);
+bool xe_bo_is_user(struct xe_bo *bo);
uint64_t vram_region_gpu_offset(struct ttm_resource *res);
bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index 96764d8169f4..a3c0d852af60 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -10,6 +10,49 @@
#include "xe_ggtt.h"
#include "xe_tile.h"
+static int xe_evict_pinned(struct xe_device *xe, bool memcpy_only)
+{
+ struct xe_tile *tile;
+ LIST_HEAD(skipped);
+ struct xe_bo *bo;
+ u8 id;
+ int ret = 0;
+
+ spin_lock(&xe->pinned.lock);
+ for (;;) {
+ bo = list_first_entry_or_null(&xe->pinned.present, typeof(*bo), pinned_link);
+ if (!bo)
+ break;
+
+ if (memcpy_only && xe_bo_is_user(bo)) {
+ list_move_tail(&bo->pinned_link, &skipped);
+ continue;
+ }
+
+ xe_bo_get(bo);
+ list_move_tail(&bo->pinned_link, &xe->pinned.evicted);
+ spin_unlock(&xe->pinned.lock);
+
+ ret = xe_bo_evict_pinned(bo);
+ xe_bo_put(bo);
+ spin_lock(&xe->pinned.lock);
+ if (ret) {
+ list_move_tail(&bo->pinned_link, &xe->pinned.present);
+ break;
+ }
+ }
+
+ list_splice_tail(&skipped, &xe->pinned.evicted);
+ spin_unlock(&xe->pinned.lock);
+
+ if (!memcpy_only) {
+ for_each_tile(tile, xe, id)
+ xe_tile_migrate_wait(tile);
+ }
+
+ return ret;
+}
+
/**
* xe_bo_evict_all - evict all BOs from VRAM
*
@@ -27,11 +70,7 @@
int xe_bo_evict_all(struct xe_device *xe)
{
struct ttm_device *bdev = &xe->ttm;
- struct xe_bo *bo;
- struct xe_tile *tile;
- struct list_head still_in_list;
u32 mem_type;
- u8 id;
int ret;
/* User memory */
@@ -57,92 +96,57 @@ int xe_bo_evict_all(struct xe_device *xe)
}
}
- /* Pinned user memory in VRAM */
- INIT_LIST_HEAD(&still_in_list);
- spin_lock(&xe->pinned.lock);
- for (;;) {
- bo = list_first_entry_or_null(&xe->pinned.external_vram,
- typeof(*bo), pinned_link);
- if (!bo)
- break;
- xe_bo_get(bo);
- list_move_tail(&bo->pinned_link, &still_in_list);
- spin_unlock(&xe->pinned.lock);
-
- ret = xe_bo_evict_pinned(bo);
- xe_bo_put(bo);
- if (ret) {
- spin_lock(&xe->pinned.lock);
- list_splice_tail(&still_in_list,
- &xe->pinned.external_vram);
- spin_unlock(&xe->pinned.lock);
- return ret;
- }
-
- spin_lock(&xe->pinned.lock);
- }
- list_splice_tail(&still_in_list, &xe->pinned.external_vram);
- spin_unlock(&xe->pinned.lock);
-
- /*
- * Wait for all user BO to be evicted as those evictions depend on the
- * memory moved below.
- */
- for_each_tile(tile, xe, id)
- xe_tile_migrate_wait(tile);
-
- spin_lock(&xe->pinned.lock);
- for (;;) {
- bo = list_first_entry_or_null(&xe->pinned.kernel_bo_present,
- typeof(*bo), pinned_link);
- if (!bo)
- break;
- xe_bo_get(bo);
- list_move_tail(&bo->pinned_link, &xe->pinned.evicted);
- spin_unlock(&xe->pinned.lock);
-
- ret = xe_bo_evict_pinned(bo);
- xe_bo_put(bo);
- if (ret)
- return ret;
-
- spin_lock(&xe->pinned.lock);
- }
- spin_unlock(&xe->pinned.lock);
+ /* Pinned user memory in VRAM and anything else that can be blitted */
+ ret = xe_evict_pinned(xe, false);
+ if (ret)
+ return ret;
- return 0;
+ /* Pinned kernel memory that must use memcpy only */
+ return xe_evict_pinned(xe, true);
}
/**
- * xe_bo_restore_kernel - restore kernel BOs to VRAM
+ * xe_bo_restore_pinned - restore pinned BOs to VRAM
*
* @xe: xe device
+ * @memcpy_only: skip objects that dont need memcpy
*
- * Move kernel BOs from temporary (typically system) memory to VRAM via CPU. All
+ * Move BOs from temporary (typically system) memory to VRAM. All
* moves done via TTM calls.
*
- * This function should be called early, before trying to init the GT, on device
- * resume.
+ * If this function is called early, before trying to init the GT, on device
+ * resume then @memcpy_only should be set.
*/
-int xe_bo_restore_kernel(struct xe_device *xe)
+static int xe_restore_pinned(struct xe_device *xe, bool memcpy_only)
{
+ struct xe_tile *tile;
+ LIST_HEAD(skipped);
struct xe_bo *bo;
- int ret;
+ u8 id;
+ int ret = 0;
spin_lock(&xe->pinned.lock);
for (;;) {
- bo = list_first_entry_or_null(&xe->pinned.evicted,
- typeof(*bo), pinned_link);
+ bo = list_first_entry_or_null(&xe->pinned.evicted, typeof(*bo),
+ pinned_link);
if (!bo)
break;
+
+ if (memcpy_only && xe_bo_is_user(bo)) {
+ list_move_tail(&bo->pinned_link, &skipped);
+ continue;
+ }
+
xe_bo_get(bo);
- list_move_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
+ list_move_tail(&bo->pinned_link, &xe->pinned.present);
spin_unlock(&xe->pinned.lock);
ret = xe_bo_restore_pinned(bo);
if (ret) {
+ spin_lock(&xe->pinned.lock);
+ list_move_tail(&bo->pinned_link, &xe->pinned.evicted);
xe_bo_put(bo);
- return ret;
+ break;
}
if (bo->flags & XE_BO_FLAG_GGTT) {
@@ -159,72 +163,50 @@ int xe_bo_restore_kernel(struct xe_device *xe)
}
}
- /*
- * We expect validate to trigger a move VRAM and our move code
- * should setup the iosys map.
- */
- xe_assert(xe, !iosys_map_is_null(&bo->vmap));
-
xe_bo_put(bo);
spin_lock(&xe->pinned.lock);
}
+
+ list_splice_tail(&skipped, &xe->pinned.evicted);
spin_unlock(&xe->pinned.lock);
- return 0;
+ if (!memcpy_only) {
+ for_each_tile(tile, xe, id)
+ xe_tile_migrate_wait(tile);
+ }
+
+ return ret;
}
/**
- * xe_bo_restore_user - restore pinned user BOs to VRAM
+ * xe_bo_restore_pinned_early - restore kernel BOs to VRAM during early resume, before
+ * we have working migration/GuC, using memcopy
*
* @xe: xe device
*
- * Move pinned user BOs from temporary (typically system) memory to VRAM via
- * CPU. All moves done via TTM calls.
+ * Copy kernel BOs from temporary (typically system) memory to VRAM via CPU. All
+ * moves done via TTM calls.
*
- * This function should be called late, after GT init, on device resume.
+ * This function should be called early, before trying to init the GT, on device
+ * resume.
*/
-int xe_bo_restore_user(struct xe_device *xe)
+int xe_bo_restore_pinned_early(struct xe_device *xe)
{
- struct xe_bo *bo;
- struct xe_tile *tile;
- struct list_head still_in_list;
- u8 id;
- int ret;
-
- if (!IS_DGFX(xe))
- return 0;
-
- /* Pinned user memory in VRAM should be validated on resume */
- INIT_LIST_HEAD(&still_in_list);
- spin_lock(&xe->pinned.lock);
- for (;;) {
- bo = list_first_entry_or_null(&xe->pinned.external_vram,
- typeof(*bo), pinned_link);
- if (!bo)
- break;
- list_move_tail(&bo->pinned_link, &still_in_list);
- xe_bo_get(bo);
- spin_unlock(&xe->pinned.lock);
-
- ret = xe_bo_restore_pinned(bo);
- xe_bo_put(bo);
- if (ret) {
- spin_lock(&xe->pinned.lock);
- list_splice_tail(&still_in_list,
- &xe->pinned.external_vram);
- spin_unlock(&xe->pinned.lock);
- return ret;
- }
-
- spin_lock(&xe->pinned.lock);
- }
- list_splice_tail(&still_in_list, &xe->pinned.external_vram);
- spin_unlock(&xe->pinned.lock);
-
- /* Wait for restore to complete */
- for_each_tile(tile, xe, id)
- xe_tile_migrate_wait(tile);
+ return xe_restore_pinned(xe, true);
+}
- return 0;
+/**
+ * xe_bo_restore_pinned_late - restore pinned BOs to VRAM using GPU copy
+ *
+ * @xe: xe device
+ *
+ * Copy kernel BOs from temporary (typically system) memory to VRAM via CPU. All
+ * moves done via TTM calls.
+ *
+ * This function should be called late, once submission is working.
+ */
+int xe_bo_restore_pinned_late(struct xe_device *xe)
+{
+ return xe_restore_pinned(xe, false);
}
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.h b/drivers/gpu/drm/xe/xe_bo_evict.h
index 746894798852..094f69e69cdb 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.h
+++ b/drivers/gpu/drm/xe/xe_bo_evict.h
@@ -9,7 +9,7 @@
struct xe_device;
int xe_bo_evict_all(struct xe_device *xe);
-int xe_bo_restore_kernel(struct xe_device *xe);
-int xe_bo_restore_user(struct xe_device *xe);
+int xe_bo_restore_pinned_early(struct xe_device *xe);
+int xe_bo_restore_pinned_late(struct xe_device *xe);
#endif
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index bf36e4fb4679..14b691753ac8 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -349,8 +349,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
}
spin_lock_init(&xe->pinned.lock);
- INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
- INIT_LIST_HEAD(&xe->pinned.external_vram);
+ INIT_LIST_HEAD(&xe->pinned.present);
INIT_LIST_HEAD(&xe->pinned.evicted);
xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq",
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 8a7b15972413..aacf2eb37ac3 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -405,12 +405,10 @@ struct xe_device {
struct {
/** @pinned.lock: protected pinned BO list state */
spinlock_t lock;
- /** @pinned.kernel_bo_present: pinned kernel BO that are present */
- struct list_head kernel_bo_present;
+ /** @pinned.present: pinned BO that are present */
+ struct list_head present;
/** @pinned.evicted: pinned BO that have been evicted */
struct list_head evicted;
- /** @pinned.external_vram: pinned external BO in vram*/
- struct list_head external_vram;
} pinned;
/** @ufence_wq: user fence wait queue */
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index a6761cb769b2..5a3c3436c491 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -179,7 +179,7 @@ int xe_pm_resume(struct xe_device *xe)
* This only restores pinned memory which is the memory required for the
* GT(s) to resume.
*/
- err = xe_bo_restore_kernel(xe);
+ err = xe_bo_restore_pinned_early(xe);
if (err)
goto err;
@@ -188,12 +188,12 @@ int xe_pm_resume(struct xe_device *xe)
for_each_gt(gt, xe, id)
xe_gt_resume(gt);
- xe_display_pm_resume(xe);
-
- err = xe_bo_restore_user(xe);
+ err = xe_bo_restore_pinned_late(xe);
if (err)
goto err;
+ xe_display_pm_resume(xe);
+
drm_dbg(&xe->drm, "Device resumed\n");
return 0;
err:
@@ -453,7 +453,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
* This only restores pinned memory which is the memory
* required for the GT(s) to resume.
*/
- err = xe_bo_restore_kernel(xe);
+ err = xe_bo_restore_pinned_early(xe);
if (err)
goto out;
}
@@ -463,14 +463,14 @@ int xe_pm_runtime_resume(struct xe_device *xe)
for_each_gt(gt, xe, id)
xe_gt_resume(gt);
- xe_display_pm_runtime_resume(xe);
-
if (xe->d3cold.allowed) {
- err = xe_bo_restore_user(xe);
+ err = xe_bo_restore_pinned_late(xe);
if (err)
goto out;
}
+ xe_display_pm_runtime_resume(xe);
+
out:
xe_rpm_lockmap_release(xe);
xe_pm_write_callback_task(xe, NULL);
--
2.47.1
More information about the Intel-xe
mailing list