[PATCH v4 3/3] drm/amdgpu: recover gart table at resume

Nirmoy nirmodas at amd.com
Fri Oct 22 10:59:55 UTC 2021


On 10/22/21 12:54 PM, Nirmoy Das wrote:
> Get rid off pin/unpin of gart BO at resume/suspend and
> instead pin only once and try to recover gart content
> at resume time. This is much more stable in case there
> is OOM situation at 2nd call to amdgpu_device_evict_resources()
> while evicting GART table.
>
> v4: return amdgpu_bo_create_kernel() directly without checking
> its return value.
> v3: remove gart recovery from other places
> v2: pin gart at amdgpu_gart_table_vram_alloc()
> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   | 82 ++--------------------
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c     |  3 +-
>   drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c      |  3 +-
>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c      |  3 +-
>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c      |  3 +-
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      |  3 +-
>   7 files changed, 11 insertions(+), 97 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 2b53d86aebac..f0c70e9d37fb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3935,16 +3935,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
>   	if (!adev->in_s0ix)
>   		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
>
> -	/* First evict vram memory */
>   	amdgpu_device_evict_resources(adev);
>
>   	amdgpu_fence_driver_hw_fini(adev);
>
>   	amdgpu_device_ip_suspend_phase2(adev);
> -	/* This second call to evict device resources is to evict
> -	 * the gart page table using the CPU.
> -	 */
> -	amdgpu_device_evict_resources(adev);
>
>   	return 0;
>   }
> @@ -4286,8 +4281,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
>   		goto error;
>
>   	amdgpu_virt_init_data_exchange(adev);
> -	/* we need recover gart prior to run SMC/CP/SDMA resume */
> -	amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
>
>   	r = amdgpu_device_fw_loading(adev);
>   	if (r)
> @@ -4604,10 +4597,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
>   					amdgpu_inc_vram_lost(tmp_adev);
>   				}
>
> -				r = amdgpu_gtt_mgr_recover(&tmp_adev->mman.gtt_mgr);
> -				if (r)
> -					goto out;
> -
>   				r = amdgpu_device_fw_loading(tmp_adev);
>   				if (r)
>   					return r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> index d3e4203f6217..3525f87dc1af 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> @@ -116,78 +116,12 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
>   {
>   	int r;


I forgot to remove this unused r and now resent[1] this patch again but 
forgot add in-reply-to.


https://patchwork.freedesktop.org/patch/460939/


Regards,

Nirmoy


> -	if (adev->gart.bo == NULL) {
> -		struct amdgpu_bo_param bp;
> -
> -		memset(&bp, 0, sizeof(bp));
> -		bp.size = adev->gart.table_size;
> -		bp.byte_align = PAGE_SIZE;
> -		bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
> -		bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
> -			AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
> -		bp.type = ttm_bo_type_kernel;
> -		bp.resv = NULL;
> -		bp.bo_ptr_size = sizeof(struct amdgpu_bo);
> -
> -		r = amdgpu_bo_create(adev, &bp, &adev->gart.bo);
> -		if (r) {
> -			return r;
> -		}
> -	}
> -	return 0;
> -}
> -
> -/**
> - * amdgpu_gart_table_vram_pin - pin gart page table in vram
> - *
> - * @adev: amdgpu_device pointer
> - *
> - * Pin the GART page table in vram so it will not be moved
> - * by the memory manager (pcie r4xx, r5xx+).  These asics require the
> - * gart table to be in video memory.
> - * Returns 0 for success, error for failure.
> - */
> -int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev)
> -{
> -	int r;
> -
> -	r = amdgpu_bo_reserve(adev->gart.bo, false);
> -	if (unlikely(r != 0))
> -		return r;
> -	r = amdgpu_bo_pin(adev->gart.bo, AMDGPU_GEM_DOMAIN_VRAM);
> -	if (r) {
> -		amdgpu_bo_unreserve(adev->gart.bo);
> -		return r;
> -	}
> -	r = amdgpu_bo_kmap(adev->gart.bo, &adev->gart.ptr);
> -	if (r)
> -		amdgpu_bo_unpin(adev->gart.bo);
> -	amdgpu_bo_unreserve(adev->gart.bo);
> -	return r;
> -}
> -
> -/**
> - * amdgpu_gart_table_vram_unpin - unpin gart page table in vram
> - *
> - * @adev: amdgpu_device pointer
> - *
> - * Unpin the GART page table in vram (pcie r4xx, r5xx+).
> - * These asics require the gart table to be in video memory.
> - */
> -void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
> -{
> -	int r;
> +	if (adev->gart.bo != NULL)
> +		return 0;
>
> -	if (adev->gart.bo == NULL) {
> -		return;
> -	}
> -	r = amdgpu_bo_reserve(adev->gart.bo, true);
> -	if (likely(r == 0)) {
> -		amdgpu_bo_kunmap(adev->gart.bo);
> -		amdgpu_bo_unpin(adev->gart.bo);
> -		amdgpu_bo_unreserve(adev->gart.bo);
> -		adev->gart.ptr = NULL;
> -	}
> +	return amdgpu_bo_create_kernel(adev,  adev->gart.table_size, PAGE_SIZE,
> +				       AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo,
> +				       NULL, (void *)&adev->gart.ptr);
>   }
>
>   /**
> @@ -201,11 +135,7 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
>    */
>   void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
>   {
> -	if (adev->gart.bo == NULL) {
> -		return;
> -	}
> -	amdgpu_bo_unref(&adev->gart.bo);
> -	adev->gart.ptr = NULL;
> +	amdgpu_bo_free_kernel(&adev->gart.bo, NULL, (void *)&adev->gart.ptr);
>   }
>
>   /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 3ec5ff5a6dbe..2440278a3b4b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -992,7 +992,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
>   		return -EINVAL;
>   	}
>
> -	r = amdgpu_gart_table_vram_pin(adev);
> +	r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
>   	if (r)
>   		return r;
>
> @@ -1062,7 +1062,6 @@ static void gmc_v10_0_gart_disable(struct amdgpu_device *adev)
>   {
>   	adev->gfxhub.funcs->gart_disable(adev);
>   	adev->mmhub.funcs->gart_disable(adev);
> -	amdgpu_gart_table_vram_unpin(adev);
>   }
>
>   static int gmc_v10_0_hw_fini(void *handle)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> index 0e81e03e9b49..3a8c9d4ee06a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> @@ -476,7 +476,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
>   		dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
>   		return -EINVAL;
>   	}
> -	r = amdgpu_gart_table_vram_pin(adev);
> +	r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
>   	if (r)
>   		return r;
>
> @@ -608,7 +608,6 @@ static void gmc_v6_0_gart_disable(struct amdgpu_device *adev)
>   	WREG32(mmVM_L2_CNTL3,
>   	       VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK |
>   	       (0UL << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT));
> -	amdgpu_gart_table_vram_unpin(adev);
>   }
>
>   static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index 0a50fdaced7e..ab8adbff9e2d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -620,7 +620,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
>   		dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
>   		return -EINVAL;
>   	}
> -	r = amdgpu_gart_table_vram_pin(adev);
> +	r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
>   	if (r)
>   		return r;
>
> @@ -758,7 +758,6 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev)
>   	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
>   	WREG32(mmVM_L2_CNTL, tmp);
>   	WREG32(mmVM_L2_CNTL2, 0);
> -	amdgpu_gart_table_vram_unpin(adev);
>   }
>
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 492ebed2915b..cd3a4bd8fed9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -837,7 +837,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
>   		dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
>   		return -EINVAL;
>   	}
> -	r = amdgpu_gart_table_vram_pin(adev);
> +	r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
>   	if (r)
>   		return r;
>
> @@ -992,7 +992,6 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
>   	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
>   	WREG32(mmVM_L2_CNTL, tmp);
>   	WREG32(mmVM_L2_CNTL2, 0);
> -	amdgpu_gart_table_vram_unpin(adev);
>   }
>
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index cb82404df534..8732da11ad98 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -1714,7 +1714,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
>   		return -EINVAL;
>   	}
>
> -	r = amdgpu_gart_table_vram_pin(adev);
> +	r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
>   	if (r)
>   		return r;
>
> @@ -1793,7 +1793,6 @@ static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
>   {
>   	adev->gfxhub.funcs->gart_disable(adev);
>   	adev->mmhub.funcs->gart_disable(adev);
> -	amdgpu_gart_table_vram_unpin(adev);
>   }
>
>   static int gmc_v9_0_hw_fini(void *handle)
> --
> 2.32.0
>


More information about the amd-gfx mailing list