[PATCH 1/2] drm/amdgpu: refine amdgpu_bo_create_kernel_at()

Christian König christian.koenig at amd.com
Thu Jul 27 11:23:58 UTC 2023


Am 27.07.23 um 10:15 schrieb Lang Yu:
> On 07/27/ , Christian König wrote:
>> Am 27.07.23 um 09:56 schrieb Lang Yu:
>>> amdgpu_bo_create_kernel_at() is used to create a physical
>>> contiguous VRAM BO at the specific offset. It calls
>>> amdgpu_bo_create_reserved() to create a VRAM BO first,
>>> then frees its old memory and allocates new memory at
>>> the specific offset. But amdgpu_bo_create_reserved() would
>>> fail if requested VRAM BO size is too large(>128MB) on
>>> APU which usually has a small default VRAM size(512MB).
>>>
>>> That is because VRAM fragmentation and DRM_BUDDY_RANGE_ALLOCATION
>>> is not natively supported by amdgpu_bo_create().
>>>
>>> The approach is using amdgpu_bo_create_reserved() to
>>> create a BO in CPU domain first, it will always succeed.
>>> Then use amdgpu_bo_pin_restricted() to move the BO to
>>> requested domain and location.
>> That won't work like that.
>>
>> The amdgpu_bo_create_kernel_at() function is used to take over specific
>> memory areas from the BIOS and *not* to create a large VRAM BO.
>   
>    Literally, it is creating a VRAM BO.

No, it doesn't.

amdgpu_bo_create_kernel_at() creates a BO for a predefined offset in VRAM.

E.g. the BIOS says to us you can find the table at offsets 0x12345678000 
in VRAM and during driver load we create a buffer object for this so 
that we can map or copy it away.

>
>> Allocating the initial dummy in the CPU domain is a good idea to avoid
>> overlap, but you are messing this up quite a bit here and will probably
>> break tons of stuff.
>    I don't see it breaks something.amdgpu_bo_create() doesn't support
>    DRM_BUDDY_RANGE_ALLOCATION. Actually it works, this approach is just using
>    DRM_BUDDY_RANGE_ALLOCATION to satify such allocation request.

The big difference between amdgpu_bo_create_kernel_at() and 
amdgpu_bo_create_kernel() is that the allocated VRAM is not initialized 
to zero.

E.g. we keep the original content the BIOS has placed there for us. With 
your modifications that content would be overwritten.

Regards,
Christian.

>
>    Reagrds,
>    Lang
>    
>> Regards,
>> Christian.
>>
>>
>>> Signed-off-by: Lang Yu <Lang.Yu at amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 30 ++++++++--------------
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  3 ++-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 15 +++++------
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   |  2 +-
>>>    4 files changed, 21 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> index ff73cc11d47e..331daa47a444 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> @@ -358,6 +358,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
>>>     * @offset: offset of the BO
>>>     * @size: size of the BO
>>>     * @bo_ptr:  used to initialize BOs in structures
>>> + * @gpu_addr: GPU addr of the pinned BO
>>>     * @cpu_addr: optional CPU address mapping
>>>     *
>>>     * Creates a kernel BO at a specific offset in VRAM.
>>> @@ -367,42 +368,33 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
>>>     */
>>>    int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
>>>    			       uint64_t offset, uint64_t size,
>>> -			       struct amdgpu_bo **bo_ptr, void **cpu_addr)
>>> +			       struct amdgpu_bo **bo_ptr,
>>> +			       u64 *gpu_addr, void **cpu_addr)
>>>    {
>>> -	struct ttm_operation_ctx ctx = { false, false };
>>> -	unsigned int i;
>>>    	int r;
>>>    	offset &= PAGE_MASK;
>>>    	size = ALIGN(size, PAGE_SIZE);
>>>    	r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
>>> -				      AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL,
>>> -				      cpu_addr);
>>> +				      AMDGPU_GEM_DOMAIN_CPU,
>>> +				      bo_ptr, NULL, cpu_addr);
>>>    	if (r)
>>>    		return r;
>>>    	if ((*bo_ptr) == NULL)
>>>    		return 0;
>>> -	/*
>>> -	 * Remove the original mem node and create a new one at the request
>>> -	 * position.
>>> -	 */
>>> -	if (cpu_addr)
>>> -		amdgpu_bo_kunmap(*bo_ptr);
>>> -
>>> -	ttm_resource_free(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.resource);
>>> +	amdgpu_bo_unpin(*bo_ptr);
>>> -	for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) {
>>> -		(*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT;
>>> -		(*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
>>> -	}
>>> -	r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement,
>>> -			     &(*bo_ptr)->tbo.resource, &ctx);
>>> +	r = amdgpu_bo_pin_restricted(*bo_ptr, AMDGPU_GEM_DOMAIN_VRAM,
>>> +				     offset, offset + size);
>>>    	if (r)
>>>    		goto error;
>>> +	if (gpu_addr)
>>> +		*gpu_addr = amdgpu_bo_gpu_offset(*bo_ptr);
>>> +
>>>    	if (cpu_addr) {
>>>    		r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
>>>    		if (r)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> index 5d3440d719e4..8f5b5664a1b6 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> @@ -315,7 +315,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
>>>    			    u64 *gpu_addr, void **cpu_addr);
>>>    int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
>>>    			       uint64_t offset, uint64_t size,
>>> -			       struct amdgpu_bo **bo_ptr, void **cpu_addr);
>>> +			       struct amdgpu_bo **bo_ptr,
>>> +			       u64 *gpu_addr, void **cpu_addr);
>>>    int amdgpu_bo_create_user(struct amdgpu_device *adev,
>>>    			  struct amdgpu_bo_param *bp,
>>>    			  struct amdgpu_bo_user **ubo_ptr);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index 7c6dd3de1867..a210c243dac0 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -1619,7 +1619,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
>>>    					  adev->mman.fw_vram_usage_start_offset,
>>>    					  adev->mman.fw_vram_usage_size,
>>>    					  &adev->mman.fw_vram_usage_reserved_bo,
>>> -					  &adev->mman.fw_vram_usage_va);
>>> +					  NULL, &adev->mman.fw_vram_usage_va);
>>>    }
>>>    /**
>>> @@ -1644,7 +1644,7 @@ static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev)
>>>    					  adev->mman.drv_vram_usage_start_offset,
>>>    					  adev->mman.drv_vram_usage_size,
>>>    					  &adev->mman.drv_vram_usage_reserved_bo,
>>> -					  &adev->mman.drv_vram_usage_va);
>>> +					  NULL, &adev->mman.drv_vram_usage_va);
>>>    }
>>>    /*
>>> @@ -1729,8 +1729,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
>>>    		ret = amdgpu_bo_create_kernel_at(adev,
>>>    						 ctx->c2p_train_data_offset,
>>>    						 ctx->train_data_size,
>>> -						 &ctx->c2p_bo,
>>> -						 NULL);
>>> +						 &ctx->c2p_bo, NULL, NULL);
>>>    		if (ret) {
>>>    			DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
>>>    			amdgpu_ttm_training_reserve_vram_fini(adev);
>>> @@ -1742,7 +1741,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
>>>    	if (!adev->gmc.is_app_apu) {
>>>    		ret = amdgpu_bo_create_kernel_at(
>>>    			adev, adev->gmc.real_vram_size - reserve_size,
>>> -			reserve_size, &adev->mman.fw_reserved_memory, NULL);
>>> +			reserve_size, &adev->mman.fw_reserved_memory, NULL, NULL);
>>>    		if (ret) {
>>>    			DRM_ERROR("alloc tmr failed(%d)!\n", ret);
>>>    			amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory,
>>> @@ -1885,14 +1884,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>>>    		r = amdgpu_bo_create_kernel_at(adev, 0,
>>>    					       adev->mman.stolen_vga_size,
>>>    					       &adev->mman.stolen_vga_memory,
>>> -					       NULL);
>>> +					       NULL, NULL);
>>>    		if (r)
>>>    			return r;
>>>    		r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
>>>    					       adev->mman.stolen_extended_size,
>>>    					       &adev->mman.stolen_extended_memory,
>>> -					       NULL);
>>> +					       NULL, NULL);
>>>    		if (r)
>>>    			return r;
>>> @@ -1901,7 +1900,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>>>    					       adev->mman.stolen_reserved_offset,
>>>    					       adev->mman.stolen_reserved_size,
>>>    					       &adev->mman.stolen_reserved_memory,
>>> -					       NULL);
>>> +					       NULL, NULL);
>>>    		if (r)
>>>    			return r;
>>>    	} else {
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>>> index 41aa853a07d2..b93b42b916ce 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>>> @@ -397,7 +397,7 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
>>>    		 */
>>>    		if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
>>>    					       AMDGPU_GPU_PAGE_SIZE,
>>> -					       &bo, NULL))
>>> +					       &bo, NULL, NULL))
>>>    			DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
>>>    		data->bps_bo[i] = bo;



More information about the amd-gfx mailing list