[Patch v2 2/2] drm/amdgpu: Use ttm_pages_limit to override vram reporting

Tue Oct 3 18:55:56 UTC 2023

On 10/3/2023 2:07 PM, Felix Kuehling wrote:
>
> On 2023-10-02 16:21, Rajneesh Bhardwaj wrote:
>> On GFXIP9.4.3 APU, allow the memory reporting as per the ttm pages
>> limit in NPS1 mode.
>>
>> Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 17 ++++++++++++++++-
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      |  9 +--------
>>   2 files changed, 17 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> index 38b5457baded..131e150d8a93 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> @@ -28,6 +28,7 @@
>>   #include "amdgpu.h"
>>   #include "amdgpu_gfx.h"
>>   #include "amdgpu_dma_buf.h"
>> +#include <drm/ttm/ttm_tt.h>
>>   #include <linux/module.h>
>>   #include <linux/dma-buf.h>
>>   #include "amdgpu_xgmi.h"
>> @@ -806,10 +807,24 @@ void amdgpu_amdkfd_unlock_kfd(struct 
>> amdgpu_device *adev)
>>   u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int 
>> xcp_id)
>>   {
>>       u64 tmp;
>> +    int num_nodes;
>>       s8 mem_id = KFD_XCP_MEM_ID(adev, xcp_id);
>>         if (adev->gmc.num_mem_partitions && xcp_id >= 0 && mem_id >= 
>> 0) {
>> -        tmp = adev->gmc.mem_partitions[mem_id].size;
>> +        if (adev->gmc.is_app_apu && adev->gmc.num_mem_partitions == 
>> 1) {
>> +            num_nodes = num_online_nodes();
>> +            /* In NPS1 mode, we should restrict the vram reporting
>> +             * tied to the ttm_pages_limit which is 1/2 of the system
>> +             * memory. For other partition modes, the HBM is uniformly
>> +             * divided already per numa node reported. If user wants to
>> +             * go beyond the default ttm limit and maximize the ROCm
>> +             * allocations, they can go up to max ttm and sysmem 
>> limits.
>> +             */
>> +
>> +            tmp = (ttm_tt_pages_limit() << PAGE_SHIFT) / num_nodes;
>
> I don't know why you need a local variable for num_nodes. Just divide 
> by num_online_nodes(). Other than that, the series is
>
> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>

Thanks for the review. I will amend this and push with your reviewed-by 
tag to amd-staging-drm-next.

>
>
>> +        } else {
>> +            tmp = adev->gmc.mem_partitions[mem_id].size;
>> +        }
>>           do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
>>           return ALIGN_DOWN(tmp, PAGE_SIZE);
>>       } else {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index 268ee533e7c1..b090cd42f81f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -1896,15 +1896,14 @@ static void
>>   gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev,
>>                     struct amdgpu_mem_partition_info *mem_ranges)
>>   {
>> -    int num_ranges = 0, ret, mem_groups;
>>       struct amdgpu_numa_info numa_info;
>>       int node_ids[MAX_MEM_RANGES];
>> +    int num_ranges = 0, ret;
>>       int num_xcc, xcc_id;
>>       uint32_t xcc_mask;
>>         num_xcc = NUM_XCC(adev->gfx.xcc_mask);
>>       xcc_mask = (1U << num_xcc) - 1;
>> -    mem_groups = hweight32(adev->aid_mask);
>>         for_each_inst(xcc_id, xcc_mask)    {
>>           ret = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
>> @@ -1929,12 +1928,6 @@ gmc_v9_0_init_acpi_mem_ranges(struct 
>> amdgpu_device *adev,
>>       }
>>         adev->gmc.num_mem_partitions = num_ranges;
>> -
>> -    /* If there is only partition, don't use entire size */
>> -    if (adev->gmc.num_mem_partitions == 1) {
>> -        mem_ranges[0].size = mem_ranges[0].size * (mem_groups - 1);
>> -        do_div(mem_ranges[0].size, mem_groups);
>> -    }
>>   }
>>     static void