[Intel-xe] [PATCH] drm/xe: fix suspend-resume for dgfx

Rodrigo Vivi rodrigo.vivi at intel.com
Fri Apr 7 11:26:52 UTC 2023


On Thu, Apr 06, 2023 at 04:18:45PM +0100, Matthew Auld wrote:
> This stopped working now that TTM treats moving a pinned object through
> ttm_bo_validate() as an error, for the general case. Add some new
> routines to handle the new special casing needed for suspend-resume.
> 
> Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/244
> Signed-off-by: Matthew Auld <matthew.auld at intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>

Neat!

Reviewed-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
Tested-by: Rodrigo Vivi <rodrigo.vivi at intel.com>

rdvivi at DG2 igt-gpu-tools$ sudo ./build/tests/xe_pm --r s2idle-exec-after
IGT-Version: 1.27.1-g41be8b4ab (x86_64) (Linux: 6.3.0-rc4+ x86_64)
Starting subtest: s2idle-exec-after
[cmd] rtcwake: wakeup from "freeze" using /dev/rtc0 at Fri Apr  7 07:22:10 2023
Subtest s2idle-exec-after: SUCCESS (3.327s)

rdvivi at DG2 igt-gpu-tools$ sudo ./build/tests/xe_pm --r s3-exec-after
IGT-Version: 1.27.1-g41be8b4ab (x86_64) (Linux: 6.3.0-rc4+ x86_64)
Starting subtest: s3-exec-after
[cmd] rtcwake: wakeup from "mem" using /dev/rtc0 at Fri Apr  7 07:22:45 2023
Subtest s3-exec-after: SUCCESS (2.417s)

> ---
>  drivers/gpu/drm/xe/xe_bo.c       | 129 +++++++++++++++++++++++++++++++
>  drivers/gpu/drm/xe/xe_bo.h       |   3 +
>  drivers/gpu/drm/xe/xe_bo_evict.c |   8 +-
>  3 files changed, 136 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 5460e6fe3c1f..002250209116 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -696,6 +696,135 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
>  
>  }
>  
> +/**
> + * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
> + * @bo: The buffer object to move.
> + *
> + * On successful completion, the object memory will be moved to sytem memory.
> + * This function blocks until the object has been fully moved.
> + *
> + * This is needed to for special handling of pinned VRAM object during
> + * suspend-resume.
> + *
> + * Return: 0 on success. Negative error code on failure.
> + */
> +int xe_bo_evict_pinned(struct xe_bo *bo)
> +{
> +	struct ttm_place place = {
> +		.mem_type = XE_PL_TT,
> +	};
> +	struct ttm_placement placement = {
> +		.placement = &place,
> +		.num_placement = 1,
> +	};
> +	struct ttm_operation_ctx ctx = {
> +		.interruptible = false,
> +	};
> +	struct ttm_resource *new_mem;
> +	int ret;
> +
> +	xe_bo_assert_held(bo);
> +
> +	if (WARN_ON(!bo->ttm.resource))
> +		return -EINVAL;
> +
> +	if (WARN_ON(!xe_bo_is_pinned(bo)))
> +		return -EINVAL;
> +
> +	if (WARN_ON(!xe_bo_is_vram(bo)))
> +		return -EINVAL;
> +
> +	ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
> +	if (ret)
> +		return ret;
> +
> +	if (!bo->ttm.ttm) {
> +		bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
> +		if (!bo->ttm.ttm) {
> +			ret = -ENOMEM;
> +			goto err_res_free;
> +		}
> +	}
> +
> +	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
> +	if (ret)
> +		goto err_res_free;
> +
> +	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
> +	if (ret)
> +		goto err_res_free;
> +
> +	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
> +	if (ret)
> +		goto err_res_free;
> +
> +	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
> +			      false, MAX_SCHEDULE_TIMEOUT);
> +
> +	return 0;
> +
> +err_res_free:
> +	ttm_resource_free(&bo->ttm, &new_mem);
> +	return ret;
> +}
> +
> +/**
> + * xe_bo_restore_pinned() - Restore a pinned VRAM object
> + * @bo: The buffer object to move.
> + *
> + * On successful completion, the object memory will be moved back to VRAM.
> + * This function blocks until the object has been fully moved.
> + *
> + * This is needed to for special handling of pinned VRAM object during
> + * suspend-resume.
> + *
> + * Return: 0 on success. Negative error code on failure.
> + */
> +int xe_bo_restore_pinned(struct xe_bo *bo)
> +{
> +	struct ttm_operation_ctx ctx = {
> +		.interruptible = false,
> +	};
> +	struct ttm_resource *new_mem;
> +	int ret;
> +
> +	xe_bo_assert_held(bo);
> +
> +	if (WARN_ON(!bo->ttm.resource))
> +		return -EINVAL;
> +
> +	if (WARN_ON(!xe_bo_is_pinned(bo)))
> +		return -EINVAL;
> +
> +	if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm))
> +		return -EINVAL;
> +
> +	ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
> +	if (ret)
> +		return ret;
> +
> +	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
> +	if (ret)
> +		goto err_res_free;
> +
> +	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
> +	if (ret)
> +		goto err_res_free;
> +
> +	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
> +	if (ret)
> +		goto err_res_free;
> +
> +	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
> +			      false, MAX_SCHEDULE_TIMEOUT);
> +
> +	return 0;
> +
> +err_res_free:
> +	ttm_resource_free(&bo->ttm, &new_mem);
> +	return ret;
> +}
> +
>  static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
>  				       unsigned long page_offset)
>  {
> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> index 8f5a7ad10d09..3a6c6852be86 100644
> --- a/drivers/gpu/drm/xe/xe_bo.h
> +++ b/drivers/gpu/drm/xe/xe_bo.h
> @@ -230,6 +230,9 @@ bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
>  int xe_bo_migrate(struct xe_bo *bo, u32 mem_type);
>  int xe_bo_evict(struct xe_bo *bo, bool force_alloc);
>  
> +int xe_bo_evict_pinned(struct xe_bo *bo);
> +int xe_bo_restore_pinned(struct xe_bo *bo);
> +
>  extern struct ttm_device_funcs xe_ttm_funcs;
>  
>  int xe_gem_create_ioctl(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
> index bbf89a58cdf5..6642c5f52009 100644
> --- a/drivers/gpu/drm/xe/xe_bo_evict.c
> +++ b/drivers/gpu/drm/xe/xe_bo_evict.c
> @@ -63,7 +63,7 @@ int xe_bo_evict_all(struct xe_device *xe)
>  		spin_unlock(&xe->pinned.lock);
>  
>  		xe_bo_lock(bo, &ww, 0, false);
> -		ret = xe_bo_evict(bo, true);
> +		ret = xe_bo_evict_pinned(bo);
>  		xe_bo_unlock(bo, &ww);
>  		xe_bo_put(bo);
>  		if (ret) {
> @@ -97,7 +97,7 @@ int xe_bo_evict_all(struct xe_device *xe)
>  		spin_unlock(&xe->pinned.lock);
>  
>  		xe_bo_lock(bo, &ww, 0, false);
> -		ret = xe_bo_evict(bo, true);
> +		ret = xe_bo_evict_pinned(bo);
>  		xe_bo_unlock(bo, &ww);
>  		xe_bo_put(bo);
>  		if (ret)
> @@ -141,7 +141,7 @@ int xe_bo_restore_kernel(struct xe_device *xe)
>  		spin_unlock(&xe->pinned.lock);
>  
>  		xe_bo_lock(bo, &ww, 0, false);
> -		ret = xe_bo_validate(bo, NULL, false);
> +		ret = xe_bo_restore_pinned(bo);
>  		xe_bo_unlock(bo, &ww);
>  		if (ret) {
>  			xe_bo_put(bo);
> @@ -205,7 +205,7 @@ int xe_bo_restore_user(struct xe_device *xe)
>  		spin_unlock(&xe->pinned.lock);
>  
>  		xe_bo_lock(bo, &ww, 0, false);
> -		ret = xe_bo_validate(bo, NULL, false);
> +		ret = xe_bo_restore_pinned(bo);
>  		xe_bo_unlock(bo, &ww);
>  		xe_bo_put(bo);
>  		if (ret) {
> -- 
> 2.39.2
> 


More information about the Intel-xe mailing list