[PATCH] drm/amdgpu/: Remove bo_create_kernel_at path from virt page

Christian König christian.koenig at amd.com
Wed Mar 13 09:03:28 UTC 2024


Am 12.03.24 um 18:50 schrieb Victor Skvortsov:
> Use amdgpu_vram_mgr to reserve bad page ranges.
> Reserved ranges will be freed by amdgpu_vram_mgr_fini()
> Delete bo_create path as it is redundant.
>
> Suggested-by: Christian König <christian.koenig at amd.com>
> Signed-off-by: Victor Skvortsov <victor.skvortsov at amd.com>

Acked-by: Christian König <christian.koenig at amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 55 ++----------------------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  2 -
>   2 files changed, 3 insertions(+), 54 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 7a4eae36778a..2a20714b9c16 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -244,7 +244,6 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)
>   	 */
>   	unsigned int align_space = 512;
>   	void *bps = NULL;
> -	struct amdgpu_bo **bps_bo = NULL;
>   
>   	*data = kmalloc(sizeof(struct amdgpu_virt_ras_err_handler_data), GFP_KERNEL);
>   	if (!*data)
> @@ -254,12 +253,7 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)
>   	if (!bps)
>   		goto bps_failure;
>   
> -	bps_bo = kmalloc_array(align_space, sizeof(*(*data)->bps_bo), GFP_KERNEL);
> -	if (!bps_bo)
> -		goto bps_bo_failure;
> -
>   	(*data)->bps = bps;
> -	(*data)->bps_bo = bps_bo;
>   	(*data)->count = 0;
>   	(*data)->last_reserved = 0;
>   
> @@ -267,34 +261,12 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)
>   
>   	return 0;
>   
> -bps_bo_failure:
> -	kfree(bps);
>   bps_failure:
>   	kfree(*data);
>   data_failure:
>   	return -ENOMEM;
>   }
>   
> -static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev)
> -{
> -	struct amdgpu_virt *virt = &adev->virt;
> -	struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
> -	struct amdgpu_bo *bo;
> -	int i;
> -
> -	if (!data)
> -		return;
> -
> -	for (i = data->last_reserved - 1; i >= 0; i--) {
> -		bo = data->bps_bo[i];
> -		if (bo) {
> -			amdgpu_bo_free_kernel(&bo, NULL, NULL);
> -			data->bps_bo[i] = bo;
> -		}
> -		data->last_reserved = i;
> -	}
> -}
> -
>   void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev)
>   {
>   	struct amdgpu_virt *virt = &adev->virt;
> @@ -305,10 +277,7 @@ void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev)
>   	if (!data)
>   		return;
>   
> -	amdgpu_virt_ras_release_bp(adev);
> -
>   	kfree(data->bps);
> -	kfree(data->bps_bo);
>   	kfree(data);
>   	virt->virt_eh_data = NULL;
>   }
> @@ -330,9 +299,6 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
>   {
>   	struct amdgpu_virt *virt = &adev->virt;
>   	struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
> -	struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
> -	struct ttm_resource_manager *man = &mgr->manager;
> -	struct amdgpu_bo *bo = NULL;
>   	uint64_t bp;
>   	int i;
>   
> @@ -341,26 +307,11 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
>   
>   	for (i = data->last_reserved; i < data->count; i++) {
>   		bp = data->bps[i].retired_page;
> +		if (amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr,
> +			bp << AMDGPU_GPU_PAGE_SHIFT, AMDGPU_GPU_PAGE_SIZE))
> +			DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
>   
> -		/* There are two cases of reserve error should be ignored:
> -		 * 1) a ras bad page has been allocated (used by someone);
> -		 * 2) a ras bad page has been reserved (duplicate error injection
> -		 *    for one page);
> -		 */
> -		if  (ttm_resource_manager_used(man)) {
> -			amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr,
> -				bp << AMDGPU_GPU_PAGE_SHIFT,
> -				AMDGPU_GPU_PAGE_SIZE);
> -			data->bps_bo[i] = NULL;
> -		} else {
> -			if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
> -							AMDGPU_GPU_PAGE_SIZE,
> -							&bo, NULL))
> -				DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
> -			data->bps_bo[i] = bo;
> -		}
>   		data->last_reserved = i + 1;
> -		bo = NULL;
>   	}
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index 3f59b7b5523f..15599951e7b8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -224,8 +224,6 @@ struct amdgim_vf2pf_info_v2 {
>   struct amdgpu_virt_ras_err_handler_data {
>   	/* point to bad page records array */
>   	struct eeprom_table_record *bps;
> -	/* point to reserved bo array */
> -	struct amdgpu_bo **bps_bo;
>   	/* the count of entries */
>   	int count;
>   	/* last reserved entry's index + 1 */



More information about the amd-gfx mailing list