[PATCH 1/2] drm/xe: Avoid the OOM killer on buffer object memory allocation

Matthew Brost matthew.brost at intel.com
Thu Oct 17 14:55:16 UTC 2024


On Thu, Oct 17, 2024 at 04:44:06PM +0200, Thomas Hellström wrote:
> Rather than invoking the OOM killer on buffer object memory
> allocations and validations, have the allocations fail and
> pass the error to user-space if applicable.
> 
> Cc: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>

Reviewed-by: Matthew Brost <matthew.brost at intel.com>

> Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2701
> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> ---
>  drivers/gpu/drm/xe/xe_bo.c | 9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index c74c121ea7bb..d060f4f019a2 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -876,6 +876,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
>  	};
>  	struct ttm_operation_ctx ctx = {
>  		.interruptible = false,
> +		.gfp_retry_mayfail = true,
>  	};
>  	struct ttm_resource *new_mem;
>  	int ret;
> @@ -937,6 +938,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
>  {
>  	struct ttm_operation_ctx ctx = {
>  		.interruptible = false,
> +		.gfp_retry_mayfail = false,
>  	};
>  	struct ttm_resource *new_mem;
>  	int ret;
> @@ -1099,7 +1101,8 @@ static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operati
>  static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
>  {
>  	struct ttm_operation_ctx ctx = {
> -		.interruptible = false
> +		.interruptible = false,
> +		.gfp_retry_mayfail = false,
>  	};
>  
>  	if (ttm_bo->ttm) {
> @@ -1294,6 +1297,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
>  	struct ttm_operation_ctx ctx = {
>  		.interruptible = true,
>  		.no_wait_gpu = false,
> +		.gfp_retry_mayfail = true,
>  	};
>  	struct ttm_placement *placement;
>  	uint32_t alignment;
> @@ -1880,6 +1884,7 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
>  	struct ttm_operation_ctx ctx = {
>  		.interruptible = true,
>  		.no_wait_gpu = false,
> +		.gfp_retry_mayfail = true,
>  	};
>  
>  	if (vm) {
> @@ -2223,6 +2228,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
>  	struct ttm_operation_ctx ctx = {
>  		.interruptible = true,
>  		.no_wait_gpu = false,
> +		.gfp_retry_mayfail = true,
>  	};
>  	struct ttm_placement placement;
>  	struct ttm_place requested;
> @@ -2273,6 +2279,7 @@ int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
>  		.interruptible = false,
>  		.no_wait_gpu = false,
>  		.force_alloc = force_alloc,
> +		.gfp_retry_mayfail = true,
>  	};
>  	struct ttm_placement placement;
>  	int ret;
> -- 
> 2.46.0
> 


More information about the Intel-xe mailing list