[PATCH v5 2/2] drm/amdgpu: Add address alignment support to DCC buffers

Christian König christian.koenig at amd.com
Wed Jul 17 14:50:34 UTC 2024


As far as I know, yes.

Regards,
Christian.

Am 17.07.24 um 16:38 schrieb Paneer Selvam, Arunpravin:
> Hi Christian,
>
> Can we use the below combination flags to kick in hardware workaround 
> while pinning BO's for this specific hw generation.
>
> if (place->flags & TTM_PL_FLAG_CONTIGUOUS) &&
> (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) ||
> amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) {
> }
>
> Regards,
> Arun.
>
> On 7/17/2024 2:38 PM, Christian König wrote:
>> Well that approach was discussed before and seemed to be to complicated.
>>
>> But I totally agree that the AMDGPU_GEM_CREATE_GFX12_DCC flag is a 
>> bad idea. This isn't anything userspace should need to specify in the 
>> first place.
>>
>> All we need is a hardware workaround which kicks in all the time 
>> while pinning BOs for this specific hw generation and texture channel 
>> configuration.
>>
>> Please remove the AMDGPU_GEM_CREATE_GFX12_DCC flag again if possible 
>> or specify why it is actually necessary?
>>
>> Regards,
>> Christian.
>>
>> Am 17.07.24 um 05:44 schrieb Marek Olšák:
>>> AMDGPU_GEM_CREATE_GFX12_DCC is set on 90% of all memory allocations, 
>>> and almost all of them are not displayable. Shouldn't we use a 
>>> different way to indicate that we need a non-power-of-two alignment, 
>>> such as looking at the alignment field directly?
>>>
>>> Marek
>>>
>>> On Tue, Jul 16, 2024, 11:45 Arunpravin Paneer Selvam 
>>> <Arunpravin.PaneerSelvam at amd.com> wrote:
>>>
>>>     Add address alignment support to the DCC VRAM buffers.
>>>
>>>     v2:
>>>       - adjust size based on the max_texture_channel_caches values
>>>         only for GFX12 DCC buffers.
>>>       - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only
>>>         for DCC buffers.
>>>       - roundup non power of two DCC buffer adjusted size to nearest
>>>         power of two number as the buddy allocator does not support non
>>>         power of two alignments. This applies only to the contiguous
>>>         DCC buffers.
>>>
>>>     v3:(Alex)
>>>       - rewrite the max texture channel caches comparison code in an
>>>         algorithmic way to determine the alignment size.
>>>
>>>     v4:(Alex)
>>>       - Move the logic from amdgpu_vram_mgr_dcc_alignment() to
>>>     gmc_v12_0.c
>>>         and add a new gmc func callback for dcc alignment. If the
>>>     callback
>>>         is non-NULL, call it to get the alignment, otherwise, use
>>>     the default.
>>>
>>>     v5:(Alex)
>>>       - Set the Alignment to a default value if the callback doesn't
>>>     exist.
>>>       - Add the callback to amdgpu_gmc_funcs.
>>>
>>>     v6:
>>>       - Fix checkpatch error reported by Intel CI.
>>>
>>>     Signed-off-by: Arunpravin Paneer Selvam
>>>     <Arunpravin.PaneerSelvam at amd.com>
>>>     Acked-by: Alex Deucher <alexander.deucher at amd.com>
>>>     Acked-by: Christian König <christian.koenig at amd.com>
>>>     Reviewed-by: Frank Min <Frank.Min at amd.com>
>>>     ---
>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h      |  6 ++++
>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 36
>>>     ++++++++++++++++++--
>>>      drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c       | 15 ++++++++
>>>      3 files changed, 55 insertions(+), 2 deletions(-)
>>>
>>>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>>     index febca3130497..654d0548a3f8 100644
>>>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>>     @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs {
>>>                                           uint64_t addr, uint64_t
>>>     *flags);
>>>             /* get the amount of memory used by the vbios for pre-OS
>>>     console */
>>>             unsigned int (*get_vbios_fb_size)(struct amdgpu_device
>>>     *adev);
>>>     +       /* get the DCC buffer alignment */
>>>     +       u64 (*get_dcc_alignment)(struct amdgpu_device *adev);
>>>
>>>             enum amdgpu_memory_partition (*query_mem_partition_mode)(
>>>                     struct amdgpu_device *adev);
>>>     @@ -363,6 +365,10 @@ struct amdgpu_gmc {
>>>     (adev)->gmc.gmc_funcs->override_vm_pte_flags                 \
>>>                     ((adev), (vm), (addr), (pte_flags))
>>>      #define amdgpu_gmc_get_vbios_fb_size(adev)
>>>     (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
>>>     +#define amdgpu_gmc_get_dcc_alignment(_adev) ({          \
>>>     +       typeof(_adev) (adev) = (_adev);          \
>>>     +  ((adev)->gmc.gmc_funcs->get_dcc_alignment((adev)));    \
>>>     +})
>>>
>>>      /**
>>>       * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible
>>>     through the BAR
>>>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>     index f91cc149d06c..aa9dca12371c 100644
>>>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>     @@ -512,6 +512,16 @@ static int amdgpu_vram_mgr_new(struct
>>>     ttm_resource_manager *man,
>>>                     vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>>>
>>>             remaining_size = (u64)vres->base.size;
>>>     +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>>>     +           bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {
>>>     +               u64 adjust_size;
>>>     +
>>>     +               if (adev->gmc.gmc_funcs->get_dcc_alignment) {
>>>     +                       adjust_size =
>>>     amdgpu_gmc_get_dcc_alignment(adev);
>>>     +                       remaining_size =
>>>     roundup_pow_of_two(remaining_size + adjust_size);
>>>     +                       vres->flags |= DRM_BUDDY_TRIM_DISABLE;
>>>     +               }
>>>     +       }
>>>
>>>             mutex_lock(&mgr->lock);
>>>             while (remaining_size) {
>>>     @@ -521,8 +531,12 @@ static int amdgpu_vram_mgr_new(struct
>>>     ttm_resource_manager *man,
>>>                             min_block_size = mgr->default_page_size;
>>>
>>>                     size = remaining_size;
>>>     -               if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
>>>     -                   !(size & (((u64)pages_per_block <<
>>>     PAGE_SHIFT) - 1)))
>>>     +
>>>     +               if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>>>     +                   bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
>>>     +                       min_block_size = size;
>>>     +               else if ((size >= (u64)pages_per_block <<
>>>     PAGE_SHIFT) &&
>>>     +                        !(size & (((u64)pages_per_block <<
>>>     PAGE_SHIFT) - 1)))
>>>                             min_block_size = (u64)pages_per_block <<
>>>     PAGE_SHIFT;
>>>
>>>                     BUG_ON(min_block_size < mm->chunk_size);
>>>     @@ -553,6 +567,24 @@ static int amdgpu_vram_mgr_new(struct
>>>     ttm_resource_manager *man,
>>>             }
>>>             mutex_unlock(&mgr->lock);
>>>
>>>     +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>>>     +           bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {
>>>     +               struct drm_buddy_block *dcc_block;
>>>     +               u64 dcc_start, alignment;
>>>     +
>>>     +               dcc_block =
>>>     amdgpu_vram_mgr_first_block(&vres->blocks);
>>>     +               dcc_start = amdgpu_vram_mgr_block_start(dcc_block);
>>>     +
>>>     +               if (adev->gmc.gmc_funcs->get_dcc_alignment) {
>>>     +                       alignment =
>>>     amdgpu_gmc_get_dcc_alignment(adev);
>>>     +                       /* Adjust the start address for DCC
>>>     buffers only */
>>>     +                       dcc_start = roundup(dcc_start, alignment);
>>>     +                       drm_buddy_block_trim(mm, &dcc_start,
>>>     + (u64)vres->base.size,
>>>     + &vres->blocks);
>>>     +               }
>>>     +       }
>>>     +
>>>             vres->base.start = 0;
>>>             size = max_t(u64,
>>>     amdgpu_vram_mgr_blocks_size(&vres->blocks),
>>>                          vres->base.size);
>>>     diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>>     b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>>     index fd3ac483760e..4259edcdec8a 100644
>>>     --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>>     +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>>     @@ -542,6 +542,20 @@ static unsigned
>>>     gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev)
>>>             return 0;
>>>      }
>>>
>>>     +static u64 gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev)
>>>     +{
>>>     +       u64 max_tex_channel_caches, alignment;
>>>     +
>>>     +       max_tex_channel_caches =
>>>     adev->gfx.config.max_texture_channel_caches;
>>>     +       if (is_power_of_2(max_tex_channel_caches))
>>>     +               alignment = (max_tex_channel_caches / SZ_4) *
>>>     max_tex_channel_caches;
>>>     +       else
>>>     +               alignment =
>>>     roundup_pow_of_two(max_tex_channel_caches) *
>>>     +  max_tex_channel_caches;
>>>     +
>>>     +       return (u64)alignment * SZ_1K;
>>>     +}
>>>     +
>>>      static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {
>>>             .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb,
>>>             .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid,
>>>     @@ -551,6 +565,7 @@ static const struct amdgpu_gmc_funcs
>>>     gmc_v12_0_gmc_funcs = {
>>>             .get_vm_pde = gmc_v12_0_get_vm_pde,
>>>             .get_vm_pte = gmc_v12_0_get_vm_pte,
>>>             .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size,
>>>     +       .get_dcc_alignment = gmc_v12_0_get_dcc_alignment,
>>>      };
>>>
>>>      static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)
>>>     -- 
>>>     2.25.1
>>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20240717/8f5ad04f/attachment-0001.htm>


More information about the amd-gfx mailing list