<div dir="auto"><div>AMDGPU_GEM_CREATE_GFX12_DCC is set on 90% of all memory allocations, and almost all of them are not displayable. Shouldn't we use a different way to indicate that we need a non-power-of-two alignment, such as looking at the alignment field directly? </div><div dir="auto"><br></div><div dir="auto">Marek<br><br><div class="gmail_quote" dir="auto"><div dir="ltr" class="gmail_attr">On Tue, Jul 16, 2024, 11:45 Arunpravin Paneer Selvam <<a href="mailto:Arunpravin.PaneerSelvam@amd.com">Arunpravin.PaneerSelvam@amd.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Add address alignment support to the DCC VRAM buffers.<br>
<br>
v2:<br>
  - adjust size based on the max_texture_channel_caches values<br>
    only for GFX12 DCC buffers.<br>
  - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only<br>
    for DCC buffers.<br>
  - roundup non power of two DCC buffer adjusted size to nearest<br>
    power of two number as the buddy allocator does not support non<br>
    power of two alignments. This applies only to the contiguous<br>
    DCC buffers.<br>
<br>
v3:(Alex)<br>
  - rewrite the max texture channel caches comparison code in an<br>
    algorithmic way to determine the alignment size.<br>
<br>
v4:(Alex)<br>
  - Move the logic from amdgpu_vram_mgr_dcc_alignment() to gmc_v12_0.c<br>
    and add a new gmc func callback for dcc alignment. If the callback<br>
    is non-NULL, call it to get the alignment, otherwise, use the default.<br>
<br>
v5:(Alex)<br>
  - Set the Alignment to a default value if the callback doesn't exist.<br>
  - Add the callback to amdgpu_gmc_funcs.<br>
<br>
v6:<br>
  - Fix checkpatch error reported by Intel CI.<br>
<br>
Signed-off-by: Arunpravin Paneer Selvam <<a href="mailto:Arunpravin.PaneerSelvam@amd.com" target="_blank" rel="noreferrer">Arunpravin.PaneerSelvam@amd.com</a>><br>
Acked-by: Alex Deucher <<a href="mailto:alexander.deucher@amd.com" target="_blank" rel="noreferrer">alexander.deucher@amd.com</a>><br>
Acked-by: Christian König <<a href="mailto:christian.koenig@amd.com" target="_blank" rel="noreferrer">christian.koenig@amd.com</a>><br>
Reviewed-by: Frank Min <<a href="mailto:Frank.Min@amd.com" target="_blank" rel="noreferrer">Frank.Min@amd.com</a>><br>
---<br>
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h      |  6 ++++<br>
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 36 ++++++++++++++++++--<br>
 drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c       | 15 ++++++++<br>
 3 files changed, 55 insertions(+), 2 deletions(-)<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h<br>
index febca3130497..654d0548a3f8 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h<br>
@@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs {<br>
                                      uint64_t addr, uint64_t *flags);<br>
        /* get the amount of memory used by the vbios for pre-OS console */<br>
        unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);<br>
+       /* get the DCC buffer alignment */<br>
+       u64 (*get_dcc_alignment)(struct amdgpu_device *adev);<br>
<br>
        enum amdgpu_memory_partition (*query_mem_partition_mode)(<br>
                struct amdgpu_device *adev);<br>
@@ -363,6 +365,10 @@ struct amdgpu_gmc {<br>
        (adev)->gmc.gmc_funcs->override_vm_pte_flags                    \<br>
                ((adev), (vm), (addr), (pte_flags))<br>
 #define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))<br>
+#define amdgpu_gmc_get_dcc_alignment(_adev) ({                 \<br>
+       typeof(_adev) (adev) = (_adev);                         \<br>
+       ((adev)->gmc.gmc_funcs->get_dcc_alignment((adev)));     \<br>
+})<br>
<br>
 /**<br>
  * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c<br>
index f91cc149d06c..aa9dca12371c 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c<br>
@@ -512,6 +512,16 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,<br>
                vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;<br>
<br>
        remaining_size = (u64)vres->base.size;<br>
+       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&<br>
+           bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {<br>
+               u64 adjust_size;<br>
+<br>
+               if (adev->gmc.gmc_funcs->get_dcc_alignment) {<br>
+                       adjust_size = amdgpu_gmc_get_dcc_alignment(adev);<br>
+                       remaining_size = roundup_pow_of_two(remaining_size + adjust_size);<br>
+                       vres->flags |= DRM_BUDDY_TRIM_DISABLE;<br>
+               }<br>
+       }<br>
<br>
        mutex_lock(&mgr->lock);<br>
        while (remaining_size) {<br>
@@ -521,8 +531,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,<br>
                        min_block_size = mgr->default_page_size;<br>
<br>
                size = remaining_size;<br>
-               if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&<br>
-                   !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))<br>
+<br>
+               if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&<br>
+                   bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)<br>
+                       min_block_size = size;<br>
+               else if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&<br>
+                        !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))<br>
                        min_block_size = (u64)pages_per_block << PAGE_SHIFT;<br>
<br>
                BUG_ON(min_block_size < mm->chunk_size);<br>
@@ -553,6 +567,24 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,<br>
        }<br>
        mutex_unlock(&mgr->lock);<br>
<br>
+       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&<br>
+           bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {<br>
+               struct drm_buddy_block *dcc_block;<br>
+               u64 dcc_start, alignment;<br>
+<br>
+               dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks);<br>
+               dcc_start = amdgpu_vram_mgr_block_start(dcc_block);<br>
+<br>
+               if (adev->gmc.gmc_funcs->get_dcc_alignment) {<br>
+                       alignment = amdgpu_gmc_get_dcc_alignment(adev);<br>
+                       /* Adjust the start address for DCC buffers only */<br>
+                       dcc_start = roundup(dcc_start, alignment);<br>
+                       drm_buddy_block_trim(mm, &dcc_start,<br>
+                                            (u64)vres->base.size,<br>
+                                            &vres->blocks);<br>
+               }<br>
+       }<br>
+<br>
        vres->base.start = 0;<br>
        size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),<br>
                     vres->base.size);<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c<br>
index fd3ac483760e..4259edcdec8a 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c<br>
@@ -542,6 +542,20 @@ static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev)<br>
        return 0;<br>
 }<br>
<br>
+static u64 gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev)<br>
+{<br>
+       u64 max_tex_channel_caches, alignment;<br>
+<br>
+       max_tex_channel_caches = adev->gfx.config.max_texture_channel_caches;<br>
+       if (is_power_of_2(max_tex_channel_caches))<br>
+               alignment = (max_tex_channel_caches / SZ_4) * max_tex_channel_caches;<br>
+       else<br>
+               alignment = roundup_pow_of_two(max_tex_channel_caches) *<br>
+                               max_tex_channel_caches;<br>
+<br>
+       return (u64)alignment * SZ_1K;<br>
+}<br>
+<br>
 static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {<br>
        .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb,<br>
        .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid,<br>
@@ -551,6 +565,7 @@ static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {<br>
        .get_vm_pde = gmc_v12_0_get_vm_pde,<br>
        .get_vm_pte = gmc_v12_0_get_vm_pte,<br>
        .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size,<br>
+       .get_dcc_alignment = gmc_v12_0_get_dcc_alignment,<br>
 };<br>
<br>
 static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)<br>
-- <br>
2.25.1<br>
<br>
</blockquote></div></div></div>