[PATCH v5 2/2] drm/amdgpu: Add address alignment support to DCC buffers
Paneer Selvam, Arunpravin
arunpravin.paneerselvam at amd.com
Wed Jul 17 14:38:33 UTC 2024
Hi Christian,
Can we use the below combination flags to kick in hardware workaround
while pinning BO's for this specific hw generation.
if (place->flags & TTM_PL_FLAG_CONTIGUOUS) &&
(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) {
}
Regards,
Arun.
On 7/17/2024 2:38 PM, Christian König wrote:
> Well that approach was discussed before and seemed to be to complicated.
>
> But I totally agree that the AMDGPU_GEM_CREATE_GFX12_DCC flag is a bad
> idea. This isn't anything userspace should need to specify in the
> first place.
>
> All we need is a hardware workaround which kicks in all the time while
> pinning BOs for this specific hw generation and texture channel
> configuration.
>
> Please remove the AMDGPU_GEM_CREATE_GFX12_DCC flag again if possible
> or specify why it is actually necessary?
>
> Regards,
> Christian.
>
> Am 17.07.24 um 05:44 schrieb Marek Olšák:
>> AMDGPU_GEM_CREATE_GFX12_DCC is set on 90% of all memory allocations,
>> and almost all of them are not displayable. Shouldn't we use a
>> different way to indicate that we need a non-power-of-two alignment,
>> such as looking at the alignment field directly?
>>
>> Marek
>>
>> On Tue, Jul 16, 2024, 11:45 Arunpravin Paneer Selvam
>> <Arunpravin.PaneerSelvam at amd.com> wrote:
>>
>> Add address alignment support to the DCC VRAM buffers.
>>
>> v2:
>> - adjust size based on the max_texture_channel_caches values
>> only for GFX12 DCC buffers.
>> - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only
>> for DCC buffers.
>> - roundup non power of two DCC buffer adjusted size to nearest
>> power of two number as the buddy allocator does not support non
>> power of two alignments. This applies only to the contiguous
>> DCC buffers.
>>
>> v3:(Alex)
>> - rewrite the max texture channel caches comparison code in an
>> algorithmic way to determine the alignment size.
>>
>> v4:(Alex)
>> - Move the logic from amdgpu_vram_mgr_dcc_alignment() to
>> gmc_v12_0.c
>> and add a new gmc func callback for dcc alignment. If the
>> callback
>> is non-NULL, call it to get the alignment, otherwise, use the
>> default.
>>
>> v5:(Alex)
>> - Set the Alignment to a default value if the callback doesn't
>> exist.
>> - Add the callback to amdgpu_gmc_funcs.
>>
>> v6:
>> - Fix checkpatch error reported by Intel CI.
>>
>> Signed-off-by: Arunpravin Paneer Selvam
>> <Arunpravin.PaneerSelvam at amd.com>
>> Acked-by: Alex Deucher <alexander.deucher at amd.com>
>> Acked-by: Christian König <christian.koenig at amd.com>
>> Reviewed-by: Frank Min <Frank.Min at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++++
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 36
>> ++++++++++++++++++--
>> drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 15 ++++++++
>> 3 files changed, 55 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> index febca3130497..654d0548a3f8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs {
>> uint64_t addr, uint64_t
>> *flags);
>> /* get the amount of memory used by the vbios for pre-OS
>> console */
>> unsigned int (*get_vbios_fb_size)(struct amdgpu_device
>> *adev);
>> + /* get the DCC buffer alignment */
>> + u64 (*get_dcc_alignment)(struct amdgpu_device *adev);
>>
>> enum amdgpu_memory_partition (*query_mem_partition_mode)(
>> struct amdgpu_device *adev);
>> @@ -363,6 +365,10 @@ struct amdgpu_gmc {
>> (adev)->gmc.gmc_funcs->override_vm_pte_flags \
>> ((adev), (vm), (addr), (pte_flags))
>> #define amdgpu_gmc_get_vbios_fb_size(adev)
>> (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
>> +#define amdgpu_gmc_get_dcc_alignment(_adev) ({ \
>> + typeof(_adev) (adev) = (_adev); \
>> + ((adev)->gmc.gmc_funcs->get_dcc_alignment((adev))); \
>> +})
>>
>> /**
>> * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible
>> through the BAR
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> index f91cc149d06c..aa9dca12371c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> @@ -512,6 +512,16 @@ static int amdgpu_vram_mgr_new(struct
>> ttm_resource_manager *man,
>> vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>>
>> remaining_size = (u64)vres->base.size;
>> + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>> + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {
>> + u64 adjust_size;
>> +
>> + if (adev->gmc.gmc_funcs->get_dcc_alignment) {
>> + adjust_size =
>> amdgpu_gmc_get_dcc_alignment(adev);
>> + remaining_size =
>> roundup_pow_of_two(remaining_size + adjust_size);
>> + vres->flags |= DRM_BUDDY_TRIM_DISABLE;
>> + }
>> + }
>>
>> mutex_lock(&mgr->lock);
>> while (remaining_size) {
>> @@ -521,8 +531,12 @@ static int amdgpu_vram_mgr_new(struct
>> ttm_resource_manager *man,
>> min_block_size = mgr->default_page_size;
>>
>> size = remaining_size;
>> - if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
>> - !(size & (((u64)pages_per_block <<
>> PAGE_SHIFT) - 1)))
>> +
>> + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>> + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
>> + min_block_size = size;
>> + else if ((size >= (u64)pages_per_block <<
>> PAGE_SHIFT) &&
>> + !(size & (((u64)pages_per_block <<
>> PAGE_SHIFT) - 1)))
>> min_block_size = (u64)pages_per_block <<
>> PAGE_SHIFT;
>>
>> BUG_ON(min_block_size < mm->chunk_size);
>> @@ -553,6 +567,24 @@ static int amdgpu_vram_mgr_new(struct
>> ttm_resource_manager *man,
>> }
>> mutex_unlock(&mgr->lock);
>>
>> + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>> + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {
>> + struct drm_buddy_block *dcc_block;
>> + u64 dcc_start, alignment;
>> +
>> + dcc_block =
>> amdgpu_vram_mgr_first_block(&vres->blocks);
>> + dcc_start = amdgpu_vram_mgr_block_start(dcc_block);
>> +
>> + if (adev->gmc.gmc_funcs->get_dcc_alignment) {
>> + alignment =
>> amdgpu_gmc_get_dcc_alignment(adev);
>> + /* Adjust the start address for DCC
>> buffers only */
>> + dcc_start = roundup(dcc_start, alignment);
>> + drm_buddy_block_trim(mm, &dcc_start,
>> + (u64)vres->base.size,
>> + &vres->blocks);
>> + }
>> + }
>> +
>> vres->base.start = 0;
>> size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
>> vres->base.size);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>> index fd3ac483760e..4259edcdec8a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>> @@ -542,6 +542,20 @@ static unsigned
>> gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev)
>> return 0;
>> }
>>
>> +static u64 gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev)
>> +{
>> + u64 max_tex_channel_caches, alignment;
>> +
>> + max_tex_channel_caches =
>> adev->gfx.config.max_texture_channel_caches;
>> + if (is_power_of_2(max_tex_channel_caches))
>> + alignment = (max_tex_channel_caches / SZ_4) *
>> max_tex_channel_caches;
>> + else
>> + alignment =
>> roundup_pow_of_two(max_tex_channel_caches) *
>> + max_tex_channel_caches;
>> +
>> + return (u64)alignment * SZ_1K;
>> +}
>> +
>> static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {
>> .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb,
>> .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid,
>> @@ -551,6 +565,7 @@ static const struct amdgpu_gmc_funcs
>> gmc_v12_0_gmc_funcs = {
>> .get_vm_pde = gmc_v12_0_get_vm_pde,
>> .get_vm_pte = gmc_v12_0_get_vm_pte,
>> .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size,
>> + .get_dcc_alignment = gmc_v12_0_get_dcc_alignment,
>> };
>>
>> static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)
>> --
>> 2.25.1
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/intel-gfx/attachments/20240717/4489b368/attachment-0001.htm>
More information about the Intel-gfx
mailing list