[Intel-xe] [PATCH] drm/xe: fix suspend-resume for dgfx

Matthew Auld matthew.auld at intel.com
Thu Apr 6 15:18:45 UTC 2023


This stopped working now that TTM treats moving a pinned object through
ttm_bo_validate() as an error, for the general case. Add some new
routines to handle the new special casing needed for suspend-resume.

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/244
Signed-off-by: Matthew Auld <matthew.auld at intel.com>
Cc: Matthew Brost <matthew.brost at intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c       | 129 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_bo.h       |   3 +
 drivers/gpu/drm/xe/xe_bo_evict.c |   8 +-
 3 files changed, 136 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 5460e6fe3c1f..002250209116 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -696,6 +696,135 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 
 }
 
+/**
+ * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
+ * @bo: The buffer object to move.
+ *
+ * On successful completion, the object memory will be moved to sytem memory.
+ * This function blocks until the object has been fully moved.
+ *
+ * This is needed to for special handling of pinned VRAM object during
+ * suspend-resume.
+ *
+ * Return: 0 on success. Negative error code on failure.
+ */
+int xe_bo_evict_pinned(struct xe_bo *bo)
+{
+	struct ttm_place place = {
+		.mem_type = XE_PL_TT,
+	};
+	struct ttm_placement placement = {
+		.placement = &place,
+		.num_placement = 1,
+	};
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+	};
+	struct ttm_resource *new_mem;
+	int ret;
+
+	xe_bo_assert_held(bo);
+
+	if (WARN_ON(!bo->ttm.resource))
+		return -EINVAL;
+
+	if (WARN_ON(!xe_bo_is_pinned(bo)))
+		return -EINVAL;
+
+	if (WARN_ON(!xe_bo_is_vram(bo)))
+		return -EINVAL;
+
+	ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
+	if (ret)
+		return ret;
+
+	if (!bo->ttm.ttm) {
+		bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
+		if (!bo->ttm.ttm) {
+			ret = -ENOMEM;
+			goto err_res_free;
+		}
+	}
+
+	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
+	if (ret)
+		goto err_res_free;
+
+	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
+	if (ret)
+		goto err_res_free;
+
+	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
+	if (ret)
+		goto err_res_free;
+
+	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
+			      false, MAX_SCHEDULE_TIMEOUT);
+
+	return 0;
+
+err_res_free:
+	ttm_resource_free(&bo->ttm, &new_mem);
+	return ret;
+}
+
+/**
+ * xe_bo_restore_pinned() - Restore a pinned VRAM object
+ * @bo: The buffer object to move.
+ *
+ * On successful completion, the object memory will be moved back to VRAM.
+ * This function blocks until the object has been fully moved.
+ *
+ * This is needed to for special handling of pinned VRAM object during
+ * suspend-resume.
+ *
+ * Return: 0 on success. Negative error code on failure.
+ */
+int xe_bo_restore_pinned(struct xe_bo *bo)
+{
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+	};
+	struct ttm_resource *new_mem;
+	int ret;
+
+	xe_bo_assert_held(bo);
+
+	if (WARN_ON(!bo->ttm.resource))
+		return -EINVAL;
+
+	if (WARN_ON(!xe_bo_is_pinned(bo)))
+		return -EINVAL;
+
+	if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm))
+		return -EINVAL;
+
+	ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
+	if (ret)
+		return ret;
+
+	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
+	if (ret)
+		goto err_res_free;
+
+	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
+	if (ret)
+		goto err_res_free;
+
+	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
+	if (ret)
+		goto err_res_free;
+
+	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
+			      false, MAX_SCHEDULE_TIMEOUT);
+
+	return 0;
+
+err_res_free:
+	ttm_resource_free(&bo->ttm, &new_mem);
+	return ret;
+}
+
 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
 				       unsigned long page_offset)
 {
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 8f5a7ad10d09..3a6c6852be86 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -230,6 +230,9 @@ bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type);
 int xe_bo_evict(struct xe_bo *bo, bool force_alloc);
 
+int xe_bo_evict_pinned(struct xe_bo *bo);
+int xe_bo_restore_pinned(struct xe_bo *bo);
+
 extern struct ttm_device_funcs xe_ttm_funcs;
 
 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index bbf89a58cdf5..6642c5f52009 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -63,7 +63,7 @@ int xe_bo_evict_all(struct xe_device *xe)
 		spin_unlock(&xe->pinned.lock);
 
 		xe_bo_lock(bo, &ww, 0, false);
-		ret = xe_bo_evict(bo, true);
+		ret = xe_bo_evict_pinned(bo);
 		xe_bo_unlock(bo, &ww);
 		xe_bo_put(bo);
 		if (ret) {
@@ -97,7 +97,7 @@ int xe_bo_evict_all(struct xe_device *xe)
 		spin_unlock(&xe->pinned.lock);
 
 		xe_bo_lock(bo, &ww, 0, false);
-		ret = xe_bo_evict(bo, true);
+		ret = xe_bo_evict_pinned(bo);
 		xe_bo_unlock(bo, &ww);
 		xe_bo_put(bo);
 		if (ret)
@@ -141,7 +141,7 @@ int xe_bo_restore_kernel(struct xe_device *xe)
 		spin_unlock(&xe->pinned.lock);
 
 		xe_bo_lock(bo, &ww, 0, false);
-		ret = xe_bo_validate(bo, NULL, false);
+		ret = xe_bo_restore_pinned(bo);
 		xe_bo_unlock(bo, &ww);
 		if (ret) {
 			xe_bo_put(bo);
@@ -205,7 +205,7 @@ int xe_bo_restore_user(struct xe_device *xe)
 		spin_unlock(&xe->pinned.lock);
 
 		xe_bo_lock(bo, &ww, 0, false);
-		ret = xe_bo_validate(bo, NULL, false);
+		ret = xe_bo_restore_pinned(bo);
 		xe_bo_unlock(bo, &ww);
 		xe_bo_put(bo);
 		if (ret) {
-- 
2.39.2



More information about the Intel-xe mailing list