[PATCH v5 2/2] drm/amdgpu: Add address alignment support to DCC buffers

Wed Jul 17 14:38:33 UTC 2024

Hi Christian,

Can we use the below combination flags to kick in hardware workaround 
while pinning BO's for this specific hw generation.

if (place->flags & TTM_PL_FLAG_CONTIGUOUS) &&
(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) {
}

Regards,
Arun.

On 7/17/2024 2:38 PM, Christian König wrote:
> Well that approach was discussed before and seemed to be to complicated.
>
> But I totally agree that the AMDGPU_GEM_CREATE_GFX12_DCC flag is a bad 
> idea. This isn't anything userspace should need to specify in the 
> first place.
>
> All we need is a hardware workaround which kicks in all the time while 
> pinning BOs for this specific hw generation and texture channel 
> configuration.
>
> Please remove the AMDGPU_GEM_CREATE_GFX12_DCC flag again if possible 
> or specify why it is actually necessary?
>
> Regards,
> Christian.
>
> Am 17.07.24 um 05:44 schrieb Marek Olšák:
>> AMDGPU_GEM_CREATE_GFX12_DCC is set on 90% of all memory allocations, 
>> and almost all of them are not displayable. Shouldn't we use a 
>> different way to indicate that we need a non-power-of-two alignment, 
>> such as looking at the alignment field directly?
>>
>> Marek
>>
>> On Tue, Jul 16, 2024, 11:45 Arunpravin Paneer Selvam 
>> <Arunpravin.PaneerSelvam at amd.com> wrote:
>>
>>     Add address alignment support to the DCC VRAM buffers.
>>
>>     v2:
>>       - adjust size based on the max_texture_channel_caches values
>>         only for GFX12 DCC buffers.
>>       - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only
>>         for DCC buffers.
>>       - roundup non power of two DCC buffer adjusted size to nearest
>>         power of two number as the buddy allocator does not support non
>>         power of two alignments. This applies only to the contiguous
>>         DCC buffers.
>>
>>     v3:(Alex)
>>       - rewrite the max texture channel caches comparison code in an
>>         algorithmic way to determine the alignment size.
>>
>>     v4:(Alex)
>>       - Move the logic from amdgpu_vram_mgr_dcc_alignment() to
>>     gmc_v12_0.c
>>         and add a new gmc func callback for dcc alignment. If the
>>     callback
>>         is non-NULL, call it to get the alignment, otherwise, use the
>>     default.
>>
>>     v5:(Alex)
>>       - Set the Alignment to a default value if the callback doesn't
>>     exist.
>>       - Add the callback to amdgpu_gmc_funcs.
>>
>>     v6:
>>       - Fix checkpatch error reported by Intel CI.
>>
>>     Signed-off-by: Arunpravin Paneer Selvam
>>     <Arunpravin.PaneerSelvam at amd.com>
>>     Acked-by: Alex Deucher <alexander.deucher at amd.com>
>>     Acked-by: Christian König <christian.koenig at amd.com>
>>     Reviewed-by: Frank Min <Frank.Min at amd.com>
>>     ---
>>      drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h      |  6 ++++
>>      drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 36
>>     ++++++++++++++++++--
>>      drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c       | 15 ++++++++
>>      3 files changed, 55 insertions(+), 2 deletions(-)
>>
>>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>     index febca3130497..654d0548a3f8 100644
>>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>     @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs {
>>                                           uint64_t addr, uint64_t
>>     *flags);
>>             /* get the amount of memory used by the vbios for pre-OS
>>     console */
>>             unsigned int (*get_vbios_fb_size)(struct amdgpu_device
>>     *adev);
>>     +       /* get the DCC buffer alignment */
>>     +       u64 (*get_dcc_alignment)(struct amdgpu_device *adev);
>>
>>             enum amdgpu_memory_partition (*query_mem_partition_mode)(
>>                     struct amdgpu_device *adev);
>>     @@ -363,6 +365,10 @@ struct amdgpu_gmc {
>>     (adev)->gmc.gmc_funcs->override_vm_pte_flags               \
>>                     ((adev), (vm), (addr), (pte_flags))
>>      #define amdgpu_gmc_get_vbios_fb_size(adev)
>>     (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
>>     +#define amdgpu_gmc_get_dcc_alignment(_adev) ({        \
>>     +       typeof(_adev) (adev) = (_adev);        \
>>     +  ((adev)->gmc.gmc_funcs->get_dcc_alignment((adev)));    \
>>     +})
>>
>>      /**
>>       * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible
>>     through the BAR
>>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>     index f91cc149d06c..aa9dca12371c 100644
>>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>     @@ -512,6 +512,16 @@ static int amdgpu_vram_mgr_new(struct
>>     ttm_resource_manager *man,
>>                     vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>>
>>             remaining_size = (u64)vres->base.size;
>>     +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>>     +           bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {
>>     +               u64 adjust_size;
>>     +
>>     +               if (adev->gmc.gmc_funcs->get_dcc_alignment) {
>>     +                       adjust_size =
>>     amdgpu_gmc_get_dcc_alignment(adev);
>>     +                       remaining_size =
>>     roundup_pow_of_two(remaining_size + adjust_size);
>>     +                       vres->flags |= DRM_BUDDY_TRIM_DISABLE;
>>     +               }
>>     +       }
>>
>>             mutex_lock(&mgr->lock);
>>             while (remaining_size) {
>>     @@ -521,8 +531,12 @@ static int amdgpu_vram_mgr_new(struct
>>     ttm_resource_manager *man,
>>                             min_block_size = mgr->default_page_size;
>>
>>                     size = remaining_size;
>>     -               if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
>>     -                   !(size & (((u64)pages_per_block <<
>>     PAGE_SHIFT) - 1)))
>>     +
>>     +               if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>>     +                   bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
>>     +                       min_block_size = size;
>>     +               else if ((size >= (u64)pages_per_block <<
>>     PAGE_SHIFT) &&
>>     +                        !(size & (((u64)pages_per_block <<
>>     PAGE_SHIFT) - 1)))
>>                             min_block_size = (u64)pages_per_block <<
>>     PAGE_SHIFT;
>>
>>                     BUG_ON(min_block_size < mm->chunk_size);
>>     @@ -553,6 +567,24 @@ static int amdgpu_vram_mgr_new(struct
>>     ttm_resource_manager *man,
>>             }
>>             mutex_unlock(&mgr->lock);
>>
>>     +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>>     +           bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {
>>     +               struct drm_buddy_block *dcc_block;
>>     +               u64 dcc_start, alignment;
>>     +
>>     +               dcc_block =
>>     amdgpu_vram_mgr_first_block(&vres->blocks);
>>     +               dcc_start = amdgpu_vram_mgr_block_start(dcc_block);
>>     +
>>     +               if (adev->gmc.gmc_funcs->get_dcc_alignment) {
>>     +                       alignment =
>>     amdgpu_gmc_get_dcc_alignment(adev);
>>     +                       /* Adjust the start address for DCC
>>     buffers only */
>>     +                       dcc_start = roundup(dcc_start, alignment);
>>     +                       drm_buddy_block_trim(mm, &dcc_start,
>>     + (u64)vres->base.size,
>>     + &vres->blocks);
>>     +               }
>>     +       }
>>     +
>>             vres->base.start = 0;
>>             size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
>>                          vres->base.size);
>>     diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>     b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>     index fd3ac483760e..4259edcdec8a 100644
>>     --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>     +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>     @@ -542,6 +542,20 @@ static unsigned
>>     gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev)
>>             return 0;
>>      }
>>
>>     +static u64 gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev)
>>     +{
>>     +       u64 max_tex_channel_caches, alignment;
>>     +
>>     +       max_tex_channel_caches =
>>     adev->gfx.config.max_texture_channel_caches;
>>     +       if (is_power_of_2(max_tex_channel_caches))
>>     +               alignment = (max_tex_channel_caches / SZ_4) *
>>     max_tex_channel_caches;
>>     +       else
>>     +               alignment =
>>     roundup_pow_of_two(max_tex_channel_caches) *
>>     +                               max_tex_channel_caches;
>>     +
>>     +       return (u64)alignment * SZ_1K;
>>     +}
>>     +
>>      static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {
>>             .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb,
>>             .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid,
>>     @@ -551,6 +565,7 @@ static const struct amdgpu_gmc_funcs
>>     gmc_v12_0_gmc_funcs = {
>>             .get_vm_pde = gmc_v12_0_get_vm_pde,
>>             .get_vm_pte = gmc_v12_0_get_vm_pte,
>>             .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size,
>>     +       .get_dcc_alignment = gmc_v12_0_get_dcc_alignment,
>>      };
>>
>>      static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)
>>     -- 
>>     2.25.1
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/intel-gfx/attachments/20240717/4489b368/attachment-0001.htm>