[PATCH v5 2/2] drm/amdgpu: Add address alignment support to DCC buffers

Wed Jul 17 09:08:36 UTC 2024

Well that approach was discussed before and seemed to be to complicated.

But I totally agree that the AMDGPU_GEM_CREATE_GFX12_DCC flag is a bad 
idea. This isn't anything userspace should need to specify in the first 
place.

All we need is a hardware workaround which kicks in all the time while 
pinning BOs for this specific hw generation and texture channel 
configuration.

Please remove the AMDGPU_GEM_CREATE_GFX12_DCC flag again if possible or 
specify why it is actually necessary?

Regards,
Christian.

Am 17.07.24 um 05:44 schrieb Marek Olšák:
> AMDGPU_GEM_CREATE_GFX12_DCC is set on 90% of all memory allocations, 
> and almost all of them are not displayable. Shouldn't we use a 
> different way to indicate that we need a non-power-of-two alignment, 
> such as looking at the alignment field directly?
>
> Marek
>
> On Tue, Jul 16, 2024, 11:45 Arunpravin Paneer Selvam 
> <Arunpravin.PaneerSelvam at amd.com> wrote:
>
>     Add address alignment support to the DCC VRAM buffers.
>
>     v2:
>       - adjust size based on the max_texture_channel_caches values
>         only for GFX12 DCC buffers.
>       - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only
>         for DCC buffers.
>       - roundup non power of two DCC buffer adjusted size to nearest
>         power of two number as the buddy allocator does not support non
>         power of two alignments. This applies only to the contiguous
>         DCC buffers.
>
>     v3:(Alex)
>       - rewrite the max texture channel caches comparison code in an
>         algorithmic way to determine the alignment size.
>
>     v4:(Alex)
>       - Move the logic from amdgpu_vram_mgr_dcc_alignment() to gmc_v12_0.c
>         and add a new gmc func callback for dcc alignment. If the callback
>         is non-NULL, call it to get the alignment, otherwise, use the
>     default.
>
>     v5:(Alex)
>       - Set the Alignment to a default value if the callback doesn't
>     exist.
>       - Add the callback to amdgpu_gmc_funcs.
>
>     v6:
>       - Fix checkpatch error reported by Intel CI.
>
>     Signed-off-by: Arunpravin Paneer Selvam
>     <Arunpravin.PaneerSelvam at amd.com>
>     Acked-by: Alex Deucher <alexander.deucher at amd.com>
>     Acked-by: Christian König <christian.koenig at amd.com>
>     Reviewed-by: Frank Min <Frank.Min at amd.com>
>     ---
>      drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h      |  6 ++++
>      drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 36
>     ++++++++++++++++++--
>      drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c       | 15 ++++++++
>      3 files changed, 55 insertions(+), 2 deletions(-)
>
>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>     index febca3130497..654d0548a3f8 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>     @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs {
>                                           uint64_t addr, uint64_t *flags);
>             /* get the amount of memory used by the vbios for pre-OS
>     console */
>             unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
>     +       /* get the DCC buffer alignment */
>     +       u64 (*get_dcc_alignment)(struct amdgpu_device *adev);
>
>             enum amdgpu_memory_partition (*query_mem_partition_mode)(
>                     struct amdgpu_device *adev);
>     @@ -363,6 +365,10 @@ struct amdgpu_gmc {
>     (adev)->gmc.gmc_funcs->override_vm_pte_flags             \
>                     ((adev), (vm), (addr), (pte_flags))
>      #define amdgpu_gmc_get_vbios_fb_size(adev)
>     (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
>     +#define amdgpu_gmc_get_dcc_alignment(_adev) ({      \
>     +       typeof(_adev) (adev) = (_adev);      \
>     +  ((adev)->gmc.gmc_funcs->get_dcc_alignment((adev)));    \
>     +})
>
>      /**
>       * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible
>     through the BAR
>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>     index f91cc149d06c..aa9dca12371c 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>     @@ -512,6 +512,16 @@ static int amdgpu_vram_mgr_new(struct
>     ttm_resource_manager *man,
>                     vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>
>             remaining_size = (u64)vres->base.size;
>     +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>     +           bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {
>     +               u64 adjust_size;
>     +
>     +               if (adev->gmc.gmc_funcs->get_dcc_alignment) {
>     +                       adjust_size =
>     amdgpu_gmc_get_dcc_alignment(adev);
>     +                       remaining_size =
>     roundup_pow_of_two(remaining_size + adjust_size);
>     +                       vres->flags |= DRM_BUDDY_TRIM_DISABLE;
>     +               }
>     +       }
>
>             mutex_lock(&mgr->lock);
>             while (remaining_size) {
>     @@ -521,8 +531,12 @@ static int amdgpu_vram_mgr_new(struct
>     ttm_resource_manager *man,
>                             min_block_size = mgr->default_page_size;
>
>                     size = remaining_size;
>     -               if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
>     -                   !(size & (((u64)pages_per_block << PAGE_SHIFT)
>     - 1)))
>     +
>     +               if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>     +                   bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
>     +                       min_block_size = size;
>     +               else if ((size >= (u64)pages_per_block <<
>     PAGE_SHIFT) &&
>     +                        !(size & (((u64)pages_per_block <<
>     PAGE_SHIFT) - 1)))
>                             min_block_size = (u64)pages_per_block <<
>     PAGE_SHIFT;
>
>                     BUG_ON(min_block_size < mm->chunk_size);
>     @@ -553,6 +567,24 @@ static int amdgpu_vram_mgr_new(struct
>     ttm_resource_manager *man,
>             }
>             mutex_unlock(&mgr->lock);
>
>     +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>     +           bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {
>     +               struct drm_buddy_block *dcc_block;
>     +               u64 dcc_start, alignment;
>     +
>     +               dcc_block =
>     amdgpu_vram_mgr_first_block(&vres->blocks);
>     +               dcc_start = amdgpu_vram_mgr_block_start(dcc_block);
>     +
>     +               if (adev->gmc.gmc_funcs->get_dcc_alignment) {
>     +                       alignment =
>     amdgpu_gmc_get_dcc_alignment(adev);
>     +                       /* Adjust the start address for DCC
>     buffers only */
>     +                       dcc_start = roundup(dcc_start, alignment);
>     +                       drm_buddy_block_trim(mm, &dcc_start,
>     + (u64)vres->base.size,
>     + &vres->blocks);
>     +               }
>     +       }
>     +
>             vres->base.start = 0;
>             size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
>                          vres->base.size);
>     diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>     b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>     index fd3ac483760e..4259edcdec8a 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>     +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>     @@ -542,6 +542,20 @@ static unsigned
>     gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev)
>             return 0;
>      }
>
>     +static u64 gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev)
>     +{
>     +       u64 max_tex_channel_caches, alignment;
>     +
>     +       max_tex_channel_caches =
>     adev->gfx.config.max_texture_channel_caches;
>     +       if (is_power_of_2(max_tex_channel_caches))
>     +               alignment = (max_tex_channel_caches / SZ_4) *
>     max_tex_channel_caches;
>     +       else
>     +               alignment =
>     roundup_pow_of_two(max_tex_channel_caches) *
>     +                               max_tex_channel_caches;
>     +
>     +       return (u64)alignment * SZ_1K;
>     +}
>     +
>      static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {
>             .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb,
>             .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid,
>     @@ -551,6 +565,7 @@ static const struct amdgpu_gmc_funcs
>     gmc_v12_0_gmc_funcs = {
>             .get_vm_pde = gmc_v12_0_get_vm_pde,
>             .get_vm_pte = gmc_v12_0_get_vm_pte,
>             .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size,
>     +       .get_dcc_alignment = gmc_v12_0_get_dcc_alignment,
>      };
>
>      static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)
>     -- 
>     2.25.1
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/intel-gfx/attachments/20240717/af31fc5f/attachment-0001.htm>