[PATCH 3/3] drm/xe: Add XE_BO_FLAG_PINNED_NEED_LOAD

Matthew Brost matthew.brost at intel.com
Tue Oct 29 00:32:24 UTC 2024


Not all pinned BOs need a memcpy to restored, rather just ones which are
required for resume to complete. Add XE_BO_FLAG_PINNED_NEED_LOAD which
indicates a BO needs to restored via a memcpy prior to loading GuC. This
should speedup resume / d3cold exit slightly as the GPU can be used to
copy some of the pinned BOs.

Marking most kernel BOs and migration LRC with
XE_BO_FLAG_PINNED_NEED_LOAD to be safe. This could be trimmed down in
future.

Signed-off-by: Matthew Brost <matthew.brost at intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c                 |  10 +-
 drivers/gpu/drm/xe/xe_bo.h                 |   1 +
 drivers/gpu/drm/xe/xe_bo_evict.c           | 117 ++++++++++++++++-----
 drivers/gpu/drm/xe/xe_ggtt.c               |   2 +-
 drivers/gpu/drm/xe/xe_gsc.c                |   1 +
 drivers/gpu/drm/xe/xe_gsc_proxy.c          |   1 +
 drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c |   1 +
 drivers/gpu/drm/xe/xe_huc.c                |   1 +
 drivers/gpu/drm/xe/xe_hw_engine.c          |   1 +
 drivers/gpu/drm/xe/xe_lmtt.c               |   1 +
 drivers/gpu/drm/xe/xe_lrc.c                |   3 +
 drivers/gpu/drm/xe/xe_memirq.c             |   1 +
 drivers/gpu/drm/xe/xe_migrate.c            |   1 +
 drivers/gpu/drm/xe/xe_uc_fw.c              |   1 +
 14 files changed, 108 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 6c8fd5ced2a2..4c48e7ef1e1f 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -773,15 +773,12 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 
 	if (bo->flags & XE_BO_FLAG_PINNED_WONTNEED) {
 		ttm_bo_move_null(&bo->ttm, new_mem);
-	} else if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
+	} else if (xe_bo_is_pinned(bo) && bo->flags & XE_BO_FLAG_PINNED_NEED_LOAD) {
 		/*
 		 * Kernel memory that is pinned should only be moved on suspend
 		 * / resume, some of the pinned memory is required for the
 		 * device to resume / use the GPU to move other evicted memory
-		 * (user memory) around. This likely could be optimized a bit
-		 * futher where we find the minimum set of pinned memory
-		 * required for resume but for simplity doing a memcpy for all
-		 * pinned memory.
+		 * (user memory, pinned kernel not required for load) around.
 		 */
 		ret = xe_bo_vmap(bo);
 		if (!ret) {
@@ -1706,7 +1703,8 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str
 	u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
 
 	dst_flags |= (*src)->flags & (XE_BO_FLAG_GGTT_INVALIDATE |
-				      XE_BO_FLAG_PINNED_WONTNEED);
+				      XE_BO_FLAG_PINNED_WONTNEED |
+				      XE_BO_FLAG_PINNED_NEED_LOAD);
 
 	xe_assert(xe, IS_DGFX(xe));
 	xe_assert(xe, !(*src)->vmap.is_iomem);
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 854ab8624d7a..cbda820c9d79 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -40,6 +40,7 @@
 #define XE_BO_FLAG_NEEDS_2M		BIT(16)
 #define XE_BO_FLAG_GGTT_INVALIDATE	BIT(17)
 #define XE_BO_FLAG_PINNED_WONTNEED	BIT(18)
+#define XE_BO_FLAG_PINNED_NEED_LOAD	BIT(19)
 /* this one is trigger internally only */
 #define XE_BO_FLAG_INTERNAL_TEST	BIT(30)
 #define XE_BO_FLAG_INTERNAL_64K		BIT(31)
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index 32043e1e5a86..049868610e0b 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -78,6 +78,37 @@ int xe_bo_evict_all(struct xe_device *xe)
 	list_splice_tail(&still_in_list, &xe->pinned.external_vram);
 	spin_unlock(&xe->pinned.lock);
 
+	/* Kernel memory with GPU copy */
+	INIT_LIST_HEAD(&still_in_list);
+	spin_lock(&xe->pinned.lock);
+	for (;;) {
+		bo = list_first_entry_or_null(&xe->pinned.kernel_bo_present,
+					      typeof(*bo), pinned_link);
+		if (!bo)
+			break;
+
+		if (bo->flags & (XE_BO_FLAG_PINNED_NEED_LOAD |
+				 XE_BO_FLAG_PINNED_WONTNEED)) {
+			list_move_tail(&bo->pinned_link, &still_in_list);
+			continue;
+		}
+
+		xe_bo_get(bo);
+		list_move_tail(&bo->pinned_link, &xe->pinned.evicted);
+		spin_unlock(&xe->pinned.lock);
+
+		xe_bo_lock(bo, false);
+		ret = xe_bo_evict_pinned(bo);
+		xe_bo_unlock(bo);
+		xe_bo_put(bo);
+		if (ret)
+			return ret;
+
+		spin_lock(&xe->pinned.lock);
+	}
+	list_splice_tail(&still_in_list, &xe->pinned.kernel_bo_present);
+	spin_unlock(&xe->pinned.lock);
+
 	/*
 	 * Wait for all user BO to be evicted as those evictions depend on the
 	 * memory moved below.
@@ -109,6 +140,43 @@ int xe_bo_evict_all(struct xe_device *xe)
 	return 0;
 }
 
+static int do_restore_kernel(struct xe_device *xe, struct xe_bo *bo)
+{
+	int ret;
+
+	xe_bo_get(bo);
+	list_move_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
+	spin_unlock(&xe->pinned.lock);
+
+	xe_bo_lock(bo, false);
+	ret = xe_bo_restore_pinned(bo);
+	xe_bo_unlock(bo);
+	if (ret) {
+		xe_bo_put(bo);
+		return ret;
+	}
+
+	if (bo->flags & XE_BO_FLAG_GGTT) {
+		struct xe_tile *tile = bo->tile;
+
+		mutex_lock(&tile->mem.ggtt->lock);
+		xe_ggtt_map_bo(tile->mem.ggtt, bo);
+		mutex_unlock(&tile->mem.ggtt->lock);
+	}
+
+	/*
+	 * We expect validate to trigger a move VRAM and our move code
+	 * should setup the iosys map.
+	 */
+	xe_assert(xe, !iosys_map_is_null(&bo->vmap));
+
+	xe_bo_put(bo);
+
+	spin_lock(&xe->pinned.lock);
+
+	return 0;
+}
+
 /**
  * xe_bo_restore_kernel - restore kernel BOs to VRAM
  *
@@ -122,48 +190,32 @@ int xe_bo_evict_all(struct xe_device *xe)
  */
 int xe_bo_restore_kernel(struct xe_device *xe)
 {
+	struct list_head still_in_list;
 	struct xe_bo *bo;
 	int ret;
 
 	if (!IS_DGFX(xe))
 		return 0;
 
+	INIT_LIST_HEAD(&still_in_list);
 	spin_lock(&xe->pinned.lock);
 	for (;;) {
 		bo = list_first_entry_or_null(&xe->pinned.evicted,
 					      typeof(*bo), pinned_link);
 		if (!bo)
 			break;
-		xe_bo_get(bo);
-		list_move_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
-		spin_unlock(&xe->pinned.lock);
 
-		xe_bo_lock(bo, false);
-		ret = xe_bo_restore_pinned(bo);
-		xe_bo_unlock(bo);
-		if (ret) {
-			xe_bo_put(bo);
-			return ret;
+		if (!(bo->flags & (XE_BO_FLAG_PINNED_NEED_LOAD |
+				   XE_BO_FLAG_PINNED_WONTNEED))) {
+			list_move_tail(&bo->pinned_link, &still_in_list);
+			continue;
 		}
 
-		if (bo->flags & XE_BO_FLAG_GGTT) {
-			struct xe_tile *tile = bo->tile;
-
-			mutex_lock(&tile->mem.ggtt->lock);
-			xe_ggtt_map_bo(tile->mem.ggtt, bo);
-			mutex_unlock(&tile->mem.ggtt->lock);
-		}
-
-		/*
-		 * We expect validate to trigger a move VRAM and our move code
-		 * should setup the iosys map.
-		 */
-		xe_assert(xe, !iosys_map_is_null(&bo->vmap));
-
-		xe_bo_put(bo);
-
-		spin_lock(&xe->pinned.lock);
+		ret = do_restore_kernel(xe, bo);
+		if (ret)
+			return ret;
 	}
+	list_splice_tail(&still_in_list, &xe->pinned.evicted);
 	spin_unlock(&xe->pinned.lock);
 
 	return 0;
@@ -190,9 +242,20 @@ int xe_bo_restore_user(struct xe_device *xe)
 	if (!IS_DGFX(xe))
 		return 0;
 
-	/* Pinned user memory in VRAM should be validated on resume */
 	INIT_LIST_HEAD(&still_in_list);
 	spin_lock(&xe->pinned.lock);
+	/* Pinned kernel memory with GPU copy */
+	for (;;) {
+		bo = list_first_entry_or_null(&xe->pinned.evicted,
+					      typeof(*bo), pinned_link);
+		if (!bo)
+			break;
+
+		ret = do_restore_kernel(xe, bo);
+		if (ret)
+			return ret;
+	}
+	/* Pinned user memory in VRAM should be validated on resume */
 	for (;;) {
 		bo = list_first_entry_or_null(&xe->pinned.external_vram,
 					      typeof(*bo), pinned_link);
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 0124ad120c04..d42dbda983c1 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -357,7 +357,7 @@ void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate)
 int xe_ggtt_init(struct xe_ggtt *ggtt)
 {
 	struct xe_device *xe = tile_to_xe(ggtt->tile);
-	unsigned int flags;
+	unsigned int flags = XE_BO_FLAG_PINNED_NEED_LOAD;
 	int err;
 
 	/*
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index 1eb791ddc375..bbb68575a207 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -474,6 +474,7 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
 		return -ENODEV;
 
 	bo = xe_managed_bo_create_pin_map(xe, tile, SZ_4M,
+					  XE_BO_FLAG_PINNED_NEED_LOAD |
 					  XE_BO_FLAG_STOLEN |
 					  XE_BO_FLAG_GGTT);
 	if (IS_ERR(bo))
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c
index fc64b45d324b..df271f9b657d 100644
--- a/drivers/gpu/drm/xe/xe_gsc_proxy.c
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -384,6 +384,7 @@ static int proxy_channel_alloc(struct xe_gsc *gsc)
 		return -ENOMEM;
 
 	bo = xe_managed_bo_create_pin_map(xe, tile, GSC_PROXY_CHANNEL_SIZE,
+					  XE_BO_FLAG_PINNED_NEED_LOAD |
 					  XE_BO_FLAG_SYSTEM |
 					  XE_BO_FLAG_GGTT);
 	if (IS_ERR(bo))
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 062a0c2fd2cd..75b225cbddf8 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -1405,6 +1405,7 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
 	bo = xe_bo_create_pin_map(xe, tile, NULL,
 				  ALIGN(size, PAGE_SIZE),
 				  ttm_bo_type_kernel,
+				  XE_BO_FLAG_PINNED_NEED_LOAD |
 				  XE_BO_FLAG_VRAM_IF_DGFX(tile) |
 				  XE_BO_FLAG_NEEDS_2M |
 				  XE_BO_FLAG_PINNED);
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index 6a846e4cb221..1008626b517e 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -53,6 +53,7 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc)
 	/* we use a single object for both input and output */
 	bo = xe_managed_bo_create_pin_map(xe, gt_to_tile(gt),
 					  PXP43_HUC_AUTH_INOUT_SIZE * 2,
+					  XE_BO_FLAG_PINNED_NEED_LOAD |
 					  XE_BO_FLAG_SYSTEM |
 					  XE_BO_FLAG_GGTT);
 	if (IS_ERR(bo))
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 1557acee3523..46e4342cc9cd 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -577,6 +577,7 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
 	xe_reg_sr_apply_whitelist(hwe);
 
 	hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
+						 XE_BO_FLAG_PINNED_NEED_LOAD |
 						 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
 						 XE_BO_FLAG_GGTT |
 						 XE_BO_FLAG_GGTT_INVALIDATE);
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
index a60ceae4c6dd..c3e76ddf24be 100644
--- a/drivers/gpu/drm/xe/xe_lmtt.c
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -70,6 +70,7 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level
 				  PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
 					     lmtt->ops->lmtt_pte_num(level)),
 				  ttm_bo_type_kernel,
+				  XE_BO_FLAG_PINNED_NEED_LOAD |
 				  XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) |
 				  XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_PINNED);
 	if (IS_ERR(bo)) {
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 4f64c7f4e68d..fec5aa700ce1 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -884,6 +884,8 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 	void *init_data = NULL;
 	u32 arb_enable;
 	u32 lrc_size;
+	unsigned long restore_flags = vm && vm->flags & XE_VM_FLAG_MIGRATION ?
+		XE_BO_FLAG_PINNED_NEED_LOAD : 0;
 	int err;
 
 	kref_init(&lrc->refcount);
@@ -898,6 +900,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 	 */
 	lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
 				       ttm_bo_type_kernel,
+				       restore_flags |
 				       XE_BO_FLAG_VRAM_IF_DGFX(tile) |
 				       XE_BO_FLAG_GGTT |
 				       XE_BO_FLAG_GGTT_INVALIDATE);
diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c
index f833da88150a..6e63a7c4206e 100644
--- a/drivers/gpu/drm/xe/xe_memirq.c
+++ b/drivers/gpu/drm/xe/xe_memirq.c
@@ -187,6 +187,7 @@ static int memirq_alloc_pages(struct xe_memirq *memirq)
 	/* XXX: convert to managed bo */
 	bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size,
 				  ttm_bo_type_kernel,
+				  XE_BO_FLAG_PINNED_NEED_LOAD |
 				  XE_BO_FLAG_SYSTEM |
 				  XE_BO_FLAG_GGTT |
 				  XE_BO_FLAG_GGTT_INVALIDATE |
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index cfd31ae49cc1..cf8ad0c47dfb 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -208,6 +208,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	bo = xe_bo_create_pin_map(vm->xe, tile, vm,
 				  num_entries * XE_PAGE_SIZE,
 				  ttm_bo_type_kernel,
+				  XE_BO_FLAG_PINNED_NEED_LOAD |
 				  XE_BO_FLAG_VRAM_IF_DGFX(tile) |
 				  XE_BO_FLAG_PINNED);
 	if (IS_ERR(bo))
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index b0deb082ab2b..984b0e0a6241 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -793,6 +793,7 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 		return 0;
 
 	err = uc_fw_copy(uc_fw, fw->data, fw->size,
+			 XE_BO_FLAG_PINNED_NEED_LOAD |
 			 XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT |
 			 XE_BO_FLAG_GGTT_INVALIDATE);
 
-- 
2.34.1



More information about the Intel-xe mailing list