<!DOCTYPE html><html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
</head>
<body>
Hi Christian,<br>
<br>
Can we use the below combination flags to kick in hardware
workaround while pinning BO's for this specific hw generation.<br>
<br>
if (place->flags & TTM_PL_FLAG_CONTIGUOUS) &&<br>
(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) ||<br>
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) {<br>
}<br>
<br>
Regards,<br>
Arun.<br>
<br>
<div class="moz-cite-prefix">On 7/17/2024 2:38 PM, Christian König
wrote:<br>
</div>
<blockquote type="cite" cite="mid:2a2ea485-50e0-48cb-bb02-736a03368ab7@amd.com">
Well that approach was discussed before and seemed to be to
complicated.<br>
<br>
But I totally agree that the AMDGPU_GEM_CREATE_GFX12_DCC flag is a
bad idea. This isn't anything userspace should need to specify in
the first place.<br>
<br>
All we need is a hardware workaround which kicks in all the time
while pinning BOs for this specific hw generation and texture
channel configuration.<br>
<br>
Please remove the AMDGPU_GEM_CREATE_GFX12_DCC flag again if
possible or specify why it is actually necessary?<br>
<br>
Regards,<br>
Christian.<br>
<br>
<div class="moz-cite-prefix">Am 17.07.24 um 05:44 schrieb Marek
Olšák:<br>
</div>
<blockquote type="cite" cite="mid:CAAxE2A6gpvLcq7zDhTDgV1BiBZdbK6t7147ESpq0ufYqW-6gzQ@mail.gmail.com">
<div dir="auto">
<div>AMDGPU_GEM_CREATE_GFX12_DCC is set on 90% of all memory
allocations, and almost all of them are not displayable.
Shouldn't we use a different way to indicate that we need a
non-power-of-two alignment, such as looking at the alignment
field directly? </div>
<div dir="auto"><br>
</div>
<div dir="auto">Marek<br>
<br>
<div class="gmail_quote" dir="auto">
<div dir="ltr" class="gmail_attr">On Tue, Jul 16, 2024,
11:45 Arunpravin Paneer Selvam <<a href="mailto:Arunpravin.PaneerSelvam@amd.com" moz-do-not-send="true" class="moz-txt-link-freetext">Arunpravin.PaneerSelvam@amd.com</a>>
wrote:<br>
</div>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Add
address alignment support to the DCC VRAM buffers.<br>
<br>
v2:<br>
- adjust size based on the max_texture_channel_caches
values<br>
only for GFX12 DCC buffers.<br>
- used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply
change only<br>
for DCC buffers.<br>
- roundup non power of two DCC buffer adjusted size to
nearest<br>
power of two number as the buddy allocator does not
support non<br>
power of two alignments. This applies only to the
contiguous<br>
DCC buffers.<br>
<br>
v3:(Alex)<br>
- rewrite the max texture channel caches comparison
code in an<br>
algorithmic way to determine the alignment size.<br>
<br>
v4:(Alex)<br>
- Move the logic from amdgpu_vram_mgr_dcc_alignment()
to gmc_v12_0.c<br>
and add a new gmc func callback for dcc alignment.
If the callback<br>
is non-NULL, call it to get the alignment,
otherwise, use the default.<br>
<br>
v5:(Alex)<br>
- Set the Alignment to a default value if the callback
doesn't exist.<br>
- Add the callback to amdgpu_gmc_funcs.<br>
<br>
v6:<br>
- Fix checkpatch error reported by Intel CI.<br>
<br>
Signed-off-by: Arunpravin Paneer Selvam <<a href="mailto:Arunpravin.PaneerSelvam@amd.com" target="_blank" rel="noreferrer" moz-do-not-send="true" class="moz-txt-link-freetext">Arunpravin.PaneerSelvam@amd.com</a>><br>
Acked-by: Alex Deucher <<a href="mailto:alexander.deucher@amd.com" target="_blank" rel="noreferrer" moz-do-not-send="true" class="moz-txt-link-freetext">alexander.deucher@amd.com</a>><br>
Acked-by: Christian König <<a href="mailto:christian.koenig@amd.com" target="_blank" rel="noreferrer" moz-do-not-send="true" class="moz-txt-link-freetext">christian.koenig@amd.com</a>><br>
Reviewed-by: Frank Min <<a href="mailto:Frank.Min@amd.com" target="_blank" rel="noreferrer" moz-do-not-send="true" class="moz-txt-link-freetext">Frank.Min@amd.com</a>><br>
---<br>
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++++<br>
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 36
++++++++++++++++++--<br>
drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 15
++++++++<br>
3 files changed, 55 insertions(+), 2 deletions(-)<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h<br>
index febca3130497..654d0548a3f8 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h<br>
@@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs {<br>
uint64_t addr,
uint64_t *flags);<br>
/* get the amount of memory used by the vbios
for pre-OS console */<br>
unsigned int (*get_vbios_fb_size)(struct
amdgpu_device *adev);<br>
+ /* get the DCC buffer alignment */<br>
+ u64 (*get_dcc_alignment)(struct amdgpu_device
*adev);<br>
<br>
enum amdgpu_memory_partition
(*query_mem_partition_mode)(<br>
struct amdgpu_device *adev);<br>
@@ -363,6 +365,10 @@ struct amdgpu_gmc {<br>
(adev)->gmc.gmc_funcs->override_vm_pte_flags
\<br>
((adev), (vm), (addr), (pte_flags))<br>
#define amdgpu_gmc_get_vbios_fb_size(adev)
(adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))<br>
+#define amdgpu_gmc_get_dcc_alignment(_adev) ({
\<br>
+ typeof(_adev) (adev) = (_adev);
\<br>
+
((adev)->gmc.gmc_funcs->get_dcc_alignment((adev)));
\<br>
+})<br>
<br>
/**<br>
* amdgpu_gmc_vram_full_visible - Check if full VRAM is
visible through the BAR<br>
diff --git
a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c<br>
index f91cc149d06c..aa9dca12371c 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c<br>
@@ -512,6 +512,16 @@ static int
amdgpu_vram_mgr_new(struct ttm_resource_manager *man,<br>
vres->flags |=
DRM_BUDDY_RANGE_ALLOCATION;<br>
<br>
remaining_size = (u64)vres->base.size;<br>
+ if (bo->flags &
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&<br>
+ bo->flags &
AMDGPU_GEM_CREATE_GFX12_DCC) {<br>
+ u64 adjust_size;<br>
+<br>
+ if
(adev->gmc.gmc_funcs->get_dcc_alignment) {<br>
+ adjust_size =
amdgpu_gmc_get_dcc_alignment(adev);<br>
+ remaining_size =
roundup_pow_of_two(remaining_size + adjust_size);<br>
+ vres->flags |=
DRM_BUDDY_TRIM_DISABLE;<br>
+ }<br>
+ }<br>
<br>
mutex_lock(&mgr->lock);<br>
while (remaining_size) {<br>
@@ -521,8 +531,12 @@ static int
amdgpu_vram_mgr_new(struct ttm_resource_manager *man,<br>
min_block_size =
mgr->default_page_size;<br>
<br>
size = remaining_size;<br>
- if ((size >= (u64)pages_per_block
<< PAGE_SHIFT) &&<br>
- !(size & (((u64)pages_per_block
<< PAGE_SHIFT) - 1)))<br>
+<br>
+ if (bo->flags &
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&<br>
+ bo->flags &
AMDGPU_GEM_CREATE_GFX12_DCC)<br>
+ min_block_size = size;<br>
+ else if ((size >=
(u64)pages_per_block << PAGE_SHIFT) &&<br>
+ !(size &
(((u64)pages_per_block << PAGE_SHIFT) - 1)))<br>
min_block_size =
(u64)pages_per_block << PAGE_SHIFT;<br>
<br>
BUG_ON(min_block_size <
mm->chunk_size);<br>
@@ -553,6 +567,24 @@ static int
amdgpu_vram_mgr_new(struct ttm_resource_manager *man,<br>
}<br>
mutex_unlock(&mgr->lock);<br>
<br>
+ if (bo->flags &
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&<br>
+ bo->flags &
AMDGPU_GEM_CREATE_GFX12_DCC) {<br>
+ struct drm_buddy_block *dcc_block;<br>
+ u64 dcc_start, alignment;<br>
+<br>
+ dcc_block =
amdgpu_vram_mgr_first_block(&vres->blocks);<br>
+ dcc_start =
amdgpu_vram_mgr_block_start(dcc_block);<br>
+<br>
+ if
(adev->gmc.gmc_funcs->get_dcc_alignment) {<br>
+ alignment =
amdgpu_gmc_get_dcc_alignment(adev);<br>
+ /* Adjust the start address for
DCC buffers only */<br>
+ dcc_start = roundup(dcc_start,
alignment);<br>
+ drm_buddy_block_trim(mm,
&dcc_start,<br>
+
(u64)vres->base.size,<br>
+
&vres->blocks);<br>
+ }<br>
+ }<br>
+<br>
vres->base.start = 0;<br>
size = max_t(u64,
amdgpu_vram_mgr_blocks_size(&vres->blocks),<br>
vres->base.size);<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c<br>
index fd3ac483760e..4259edcdec8a 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c<br>
@@ -542,6 +542,20 @@ static unsigned
gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev)<br>
return 0;<br>
}<br>
<br>
+static u64 gmc_v12_0_get_dcc_alignment(struct
amdgpu_device *adev)<br>
+{<br>
+ u64 max_tex_channel_caches, alignment;<br>
+<br>
+ max_tex_channel_caches =
adev->gfx.config.max_texture_channel_caches;<br>
+ if (is_power_of_2(max_tex_channel_caches))<br>
+ alignment = (max_tex_channel_caches /
SZ_4) * max_tex_channel_caches;<br>
+ else<br>
+ alignment =
roundup_pow_of_two(max_tex_channel_caches) *<br>
+ max_tex_channel_caches;<br>
+<br>
+ return (u64)alignment * SZ_1K;<br>
+}<br>
+<br>
static const struct amdgpu_gmc_funcs
gmc_v12_0_gmc_funcs = {<br>
.flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb,<br>
.flush_gpu_tlb_pasid =
gmc_v12_0_flush_gpu_tlb_pasid,<br>
@@ -551,6 +565,7 @@ static const struct amdgpu_gmc_funcs
gmc_v12_0_gmc_funcs = {<br>
.get_vm_pde = gmc_v12_0_get_vm_pde,<br>
.get_vm_pte = gmc_v12_0_get_vm_pte,<br>
.get_vbios_fb_size =
gmc_v12_0_get_vbios_fb_size,<br>
+ .get_dcc_alignment =
gmc_v12_0_get_dcc_alignment,<br>
};<br>
<br>
static void gmc_v12_0_set_gmc_funcs(struct
amdgpu_device *adev)<br>
-- <br>
2.25.1<br>
<br>
</blockquote>
</div>
</div>
</div>
</blockquote>
<br>
</blockquote>
<br>
</body>
</html>