[PATCH V2] drm/amdgpu: move GTT allocation from gmc_sw_init to gmc_hw_init(V2)

Christian König ckoenig.leichtzumerken at gmail.com
Thu Jan 27 09:12:28 UTC 2022


Am 27.01.22 um 08:52 schrieb Aaron Liu:
> The below patch causes system hang for harvested ASICs.
> d015e9861e55 drm/amdgpu: improve debug VRAM access performance using sdma
>
> The root cause is that GTT buffer should be allocated after GC SA harvest
> programming completed.

That doesn't make much sense. We intentionally modified the code so that 
this is possible.

What is the rational that we need to program the HW before allocating 
system memory?

Regards,
Christian.

>
> For harvested AISC, the GC SA harvest process(see utcl2_harvest) is
> programmed in gmc_v10_0_hw_init function. This is a hardware programming.
> Therefore should be located in hw init. Hence need to move GTT allocation
> from gmc_v*_0_sw_init to gmc_v*_0_hw_init.
>
> V2: expand to all gmc_v*_0_hw_init functions.
>
> Signed-off-by: Aaron Liu <aaron.liu at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 21 +++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  3 +++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  8 --------
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  4 +++-
>   drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c   | 13 +++++++++----
>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 13 +++++++++----
>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 13 +++++++++----
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 13 +++++++++----
>   8 files changed, 63 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index d426de48d299..ac75bde8ac61 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -876,3 +876,24 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
>   
>   	return 0;
>   }
> +
> +int amdgpu_gmc_allocate_sdma_access_gtt(struct amdgpu_device *adev)
> +{
> +	int ret;
> +
> +	ret = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
> +					AMDGPU_GEM_DOMAIN_GTT,
> +					&adev->mman.sdma_access_bo, NULL,
> +					&adev->mman.sdma_access_ptr);
> +
> +	if (ret)
> +		DRM_WARN("Debug VRAM access will use slowpath MM access\n");
> +
> +	return ret;
> +}
> +
> +void amdgpu_gmc_free_sdma_access_gtt(struct amdgpu_device *adev)
> +{
> +	amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
> +					&adev->mman.sdma_access_ptr);
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index 93505bb0a36c..b8ba16de5e1c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -338,4 +338,7 @@ uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr);
>   uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo);
>   uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo);
>   int amdgpu_gmc_vram_checking(struct amdgpu_device *adev);
> +int amdgpu_gmc_allocate_sdma_access_gtt(struct amdgpu_device *adev);
> +void amdgpu_gmc_free_sdma_access_gtt(struct amdgpu_device *adev);
> +
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 3d8a20956b74..7ce0478b2908 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1823,12 +1823,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>   		return r;
>   	}
>   
> -	if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
> -				AMDGPU_GEM_DOMAIN_GTT,
> -				&adev->mman.sdma_access_bo, NULL,
> -				&adev->mman.sdma_access_ptr))
> -		DRM_WARN("Debug VRAM access will use slowpath MM access\n");
> -
>   	return 0;
>   }
>   
> @@ -1850,8 +1844,6 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
>   	if (adev->mman.stolen_reserved_size)
>   		amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
>   				      NULL, NULL);
> -	amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
> -					&adev->mman.sdma_access_ptr);
>   	amdgpu_ttm_fw_reserve_vram_fini(adev);
>   
>   	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 73ab0eebe4e2..d923e4127c87 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -1062,7 +1062,7 @@ static int gmc_v10_0_hw_init(void *handle)
>   	if (adev->umc.funcs && adev->umc.funcs->init_registers)
>   		adev->umc.funcs->init_registers(adev);
>   
> -	return 0;
> +	return amdgpu_gmc_allocate_sdma_access_gtt(adev);
>   }
>   
>   /**
> @@ -1082,6 +1082,8 @@ static int gmc_v10_0_hw_fini(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   
> +	amdgpu_gmc_free_sdma_access_gtt(adev);
> +
>   	gmc_v10_0_gart_disable(adev);
>   
>   	if (amdgpu_sriov_vf(adev)) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> index ec291d28edff..ca397d4d4aa8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> @@ -919,16 +919,21 @@ static int gmc_v6_0_hw_init(void *handle)
>   	if (r)
>   		return r;
>   
> -	if (amdgpu_emu_mode == 1)
> -		return amdgpu_gmc_vram_checking(adev);
> -	else
> -		return r;
> +	if (amdgpu_emu_mode == 1) {
> +		r = amdgpu_gmc_vram_checking(adev);
> +		if (r)
> +			return r;
> +	}
> +
> +	return amdgpu_gmc_allocate_sdma_access_gtt(adev);
>   }
>   
>   static int gmc_v6_0_hw_fini(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   
> +	amdgpu_gmc_free_sdma_access_gtt(adev);
> +
>   	amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
>   	gmc_v6_0_gart_disable(adev);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index 344d819b4c1b..11f87f8adae8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -1107,16 +1107,21 @@ static int gmc_v7_0_hw_init(void *handle)
>   	if (r)
>   		return r;
>   
> -	if (amdgpu_emu_mode == 1)
> -		return amdgpu_gmc_vram_checking(adev);
> -	else
> -		return r;
> +	if (amdgpu_emu_mode == 1) {
> +		r = amdgpu_gmc_vram_checking(adev);
> +		if (r)
> +			return r;
> +	}
> +
> +	return amdgpu_gmc_allocate_sdma_access_gtt(adev);
>   }
>   
>   static int gmc_v7_0_hw_fini(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   
> +	amdgpu_gmc_free_sdma_access_gtt(adev);
> +
>   	amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
>   	gmc_v7_0_gart_disable(adev);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index ca9841d5669f..c90f9016d093 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -1238,16 +1238,21 @@ static int gmc_v8_0_hw_init(void *handle)
>   	if (r)
>   		return r;
>   
> -	if (amdgpu_emu_mode == 1)
> -		return amdgpu_gmc_vram_checking(adev);
> -	else
> -		return r;
> +	if (amdgpu_emu_mode == 1) {
> +		r = amdgpu_gmc_vram_checking(adev);
> +		if (r)
> +			return r;
> +	}
> +
> +	return amdgpu_gmc_allocate_sdma_access_gtt(adev);
>   }
>   
>   static int gmc_v8_0_hw_fini(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   
> +	amdgpu_gmc_free_sdma_access_gtt(adev);
> +
>   	amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
>   	gmc_v8_0_gart_disable(adev);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 4595027a8c63..9b4b1d9a0769 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -1852,10 +1852,13 @@ static int gmc_v9_0_hw_init(void *handle)
>   	if (r)
>   		return r;
>   
> -	if (amdgpu_emu_mode == 1)
> -		return amdgpu_gmc_vram_checking(adev);
> -	else
> -		return r;
> +	if (amdgpu_emu_mode == 1) {
> +		r = amdgpu_gmc_vram_checking(adev);
> +		if (r)
> +			return r;
> +	}
> +
> +	return amdgpu_gmc_allocate_sdma_access_gtt(adev);
>   }
>   
>   /**
> @@ -1875,6 +1878,8 @@ static int gmc_v9_0_hw_fini(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   
> +	amdgpu_gmc_free_sdma_access_gtt(adev);
> +
>   	gmc_v9_0_gart_disable(adev);
>   
>   	if (amdgpu_sriov_vf(adev)) {



More information about the amd-gfx mailing list