[PATCH v5 2/2] drm/amdgpu: Add address alignment support to DCC buffers
Christian König
christian.koenig at amd.com
Wed Jul 17 14:50:34 UTC 2024
As far as I know, yes.
Regards,
Christian.
Am 17.07.24 um 16:38 schrieb Paneer Selvam, Arunpravin:
> Hi Christian,
>
> Can we use the below combination flags to kick in hardware workaround
> while pinning BO's for this specific hw generation.
>
> if (place->flags & TTM_PL_FLAG_CONTIGUOUS) &&
> (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) ||
> amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) {
> }
>
> Regards,
> Arun.
>
> On 7/17/2024 2:38 PM, Christian König wrote:
>> Well that approach was discussed before and seemed to be to complicated.
>>
>> But I totally agree that the AMDGPU_GEM_CREATE_GFX12_DCC flag is a
>> bad idea. This isn't anything userspace should need to specify in the
>> first place.
>>
>> All we need is a hardware workaround which kicks in all the time
>> while pinning BOs for this specific hw generation and texture channel
>> configuration.
>>
>> Please remove the AMDGPU_GEM_CREATE_GFX12_DCC flag again if possible
>> or specify why it is actually necessary?
>>
>> Regards,
>> Christian.
>>
>> Am 17.07.24 um 05:44 schrieb Marek Olšák:
>>> AMDGPU_GEM_CREATE_GFX12_DCC is set on 90% of all memory allocations,
>>> and almost all of them are not displayable. Shouldn't we use a
>>> different way to indicate that we need a non-power-of-two alignment,
>>> such as looking at the alignment field directly?
>>>
>>> Marek
>>>
>>> On Tue, Jul 16, 2024, 11:45 Arunpravin Paneer Selvam
>>> <Arunpravin.PaneerSelvam at amd.com> wrote:
>>>
>>> Add address alignment support to the DCC VRAM buffers.
>>>
>>> v2:
>>> - adjust size based on the max_texture_channel_caches values
>>> only for GFX12 DCC buffers.
>>> - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only
>>> for DCC buffers.
>>> - roundup non power of two DCC buffer adjusted size to nearest
>>> power of two number as the buddy allocator does not support non
>>> power of two alignments. This applies only to the contiguous
>>> DCC buffers.
>>>
>>> v3:(Alex)
>>> - rewrite the max texture channel caches comparison code in an
>>> algorithmic way to determine the alignment size.
>>>
>>> v4:(Alex)
>>> - Move the logic from amdgpu_vram_mgr_dcc_alignment() to
>>> gmc_v12_0.c
>>> and add a new gmc func callback for dcc alignment. If the
>>> callback
>>> is non-NULL, call it to get the alignment, otherwise, use
>>> the default.
>>>
>>> v5:(Alex)
>>> - Set the Alignment to a default value if the callback doesn't
>>> exist.
>>> - Add the callback to amdgpu_gmc_funcs.
>>>
>>> v6:
>>> - Fix checkpatch error reported by Intel CI.
>>>
>>> Signed-off-by: Arunpravin Paneer Selvam
>>> <Arunpravin.PaneerSelvam at amd.com>
>>> Acked-by: Alex Deucher <alexander.deucher at amd.com>
>>> Acked-by: Christian König <christian.koenig at amd.com>
>>> Reviewed-by: Frank Min <Frank.Min at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++++
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 36
>>> ++++++++++++++++++--
>>> drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 15 ++++++++
>>> 3 files changed, 55 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>> index febca3130497..654d0548a3f8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>> @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs {
>>> uint64_t addr, uint64_t
>>> *flags);
>>> /* get the amount of memory used by the vbios for pre-OS
>>> console */
>>> unsigned int (*get_vbios_fb_size)(struct amdgpu_device
>>> *adev);
>>> + /* get the DCC buffer alignment */
>>> + u64 (*get_dcc_alignment)(struct amdgpu_device *adev);
>>>
>>> enum amdgpu_memory_partition (*query_mem_partition_mode)(
>>> struct amdgpu_device *adev);
>>> @@ -363,6 +365,10 @@ struct amdgpu_gmc {
>>> (adev)->gmc.gmc_funcs->override_vm_pte_flags \
>>> ((adev), (vm), (addr), (pte_flags))
>>> #define amdgpu_gmc_get_vbios_fb_size(adev)
>>> (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
>>> +#define amdgpu_gmc_get_dcc_alignment(_adev) ({ \
>>> + typeof(_adev) (adev) = (_adev); \
>>> + ((adev)->gmc.gmc_funcs->get_dcc_alignment((adev))); \
>>> +})
>>>
>>> /**
>>> * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible
>>> through the BAR
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> index f91cc149d06c..aa9dca12371c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> @@ -512,6 +512,16 @@ static int amdgpu_vram_mgr_new(struct
>>> ttm_resource_manager *man,
>>> vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>>>
>>> remaining_size = (u64)vres->base.size;
>>> + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>>> + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {
>>> + u64 adjust_size;
>>> +
>>> + if (adev->gmc.gmc_funcs->get_dcc_alignment) {
>>> + adjust_size =
>>> amdgpu_gmc_get_dcc_alignment(adev);
>>> + remaining_size =
>>> roundup_pow_of_two(remaining_size + adjust_size);
>>> + vres->flags |= DRM_BUDDY_TRIM_DISABLE;
>>> + }
>>> + }
>>>
>>> mutex_lock(&mgr->lock);
>>> while (remaining_size) {
>>> @@ -521,8 +531,12 @@ static int amdgpu_vram_mgr_new(struct
>>> ttm_resource_manager *man,
>>> min_block_size = mgr->default_page_size;
>>>
>>> size = remaining_size;
>>> - if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
>>> - !(size & (((u64)pages_per_block <<
>>> PAGE_SHIFT) - 1)))
>>> +
>>> + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>>> + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
>>> + min_block_size = size;
>>> + else if ((size >= (u64)pages_per_block <<
>>> PAGE_SHIFT) &&
>>> + !(size & (((u64)pages_per_block <<
>>> PAGE_SHIFT) - 1)))
>>> min_block_size = (u64)pages_per_block <<
>>> PAGE_SHIFT;
>>>
>>> BUG_ON(min_block_size < mm->chunk_size);
>>> @@ -553,6 +567,24 @@ static int amdgpu_vram_mgr_new(struct
>>> ttm_resource_manager *man,
>>> }
>>> mutex_unlock(&mgr->lock);
>>>
>>> + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>>> + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {
>>> + struct drm_buddy_block *dcc_block;
>>> + u64 dcc_start, alignment;
>>> +
>>> + dcc_block =
>>> amdgpu_vram_mgr_first_block(&vres->blocks);
>>> + dcc_start = amdgpu_vram_mgr_block_start(dcc_block);
>>> +
>>> + if (adev->gmc.gmc_funcs->get_dcc_alignment) {
>>> + alignment =
>>> amdgpu_gmc_get_dcc_alignment(adev);
>>> + /* Adjust the start address for DCC
>>> buffers only */
>>> + dcc_start = roundup(dcc_start, alignment);
>>> + drm_buddy_block_trim(mm, &dcc_start,
>>> + (u64)vres->base.size,
>>> + &vres->blocks);
>>> + }
>>> + }
>>> +
>>> vres->base.start = 0;
>>> size = max_t(u64,
>>> amdgpu_vram_mgr_blocks_size(&vres->blocks),
>>> vres->base.size);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>> index fd3ac483760e..4259edcdec8a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>> @@ -542,6 +542,20 @@ static unsigned
>>> gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev)
>>> return 0;
>>> }
>>>
>>> +static u64 gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev)
>>> +{
>>> + u64 max_tex_channel_caches, alignment;
>>> +
>>> + max_tex_channel_caches =
>>> adev->gfx.config.max_texture_channel_caches;
>>> + if (is_power_of_2(max_tex_channel_caches))
>>> + alignment = (max_tex_channel_caches / SZ_4) *
>>> max_tex_channel_caches;
>>> + else
>>> + alignment =
>>> roundup_pow_of_two(max_tex_channel_caches) *
>>> + max_tex_channel_caches;
>>> +
>>> + return (u64)alignment * SZ_1K;
>>> +}
>>> +
>>> static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {
>>> .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb,
>>> .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid,
>>> @@ -551,6 +565,7 @@ static const struct amdgpu_gmc_funcs
>>> gmc_v12_0_gmc_funcs = {
>>> .get_vm_pde = gmc_v12_0_get_vm_pde,
>>> .get_vm_pte = gmc_v12_0_get_vm_pte,
>>> .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size,
>>> + .get_dcc_alignment = gmc_v12_0_get_dcc_alignment,
>>> };
>>>
>>> static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)
>>> --
>>> 2.25.1
>>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/dri-devel/attachments/20240717/8f5ad04f/attachment-0001.htm>
More information about the dri-devel
mailing list