[PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
Eric Huang
jinhuieric.huang at amd.com
Mon May 3 13:52:21 UTC 2021
Thanks Felix for your review. I will send another patch.
Eric
On 2021-04-30 7:42 p.m., Felix Kuehling wrote:
> Am 2021-04-28 um 11:11 a.m. schrieb Eric Huang:
>> In NPS4 BIOS we need to find the closest numa node when creating
>> topology io link between cpu and gpu, if PCI driver doesn't set
>> it.
>>
>> Signed-off-by: Eric Huang <jinhuieric.huang at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++++++++++++++++++++++++++-
>> 1 file changed, 91 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>> index 38d45711675f..57518136c7d7 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>> @@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
>> return 0;
>> }
>>
>> +#ifdef CONFIG_ACPI
>> +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
>> + int *numa_node)
>> +{
>> + struct acpi_table_header *table_header = NULL;
>> + struct acpi_subtable_header *sub_header = NULL;
>> + unsigned long table_end, subtable_len;
>> + u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
>> + pci_dev_id(kdev->pdev);
>> + u32 bdf;
>> + acpi_status status;
>> + struct acpi_srat_cpu_affinity *cpu;
>> + struct acpi_srat_generic_affinity *gpu;
>> + int pxm = 0, max_pxm = 0;
>> + bool found = false;
>> +
>> + /* Fetch the SRAT table from ACPI */
>> + status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
>> + if (status == AE_NOT_FOUND) {
>> + pr_warn("SRAT table not found\n");
>> + return;
>> + } else if (ACPI_FAILURE(status)) {
>> + const char *err = acpi_format_exception(status);
>> + pr_err("SRAT table error: %s\n", err);
>> + return;
>> + }
> After a successful call to acpi_get_table you need to call
> acpi_put_table before this function returns to avoid leaking memory.
>
>
>> +
>> + table_end = (unsigned long)table_header + table_header->length;
>> +
>> + /* Parse all entries looking for a match. */
>> +
>> + sub_header = (struct acpi_subtable_header *)
>> + ((unsigned long)table_header +
>> + sizeof(struct acpi_table_srat));
>> + subtable_len = sub_header->length;
>> +
>> + while (((unsigned long)sub_header) + subtable_len < table_end) {
>> + /*
>> + * If length is 0, break from this loop to avoid
>> + * infinite loop.
>> + */
>> + if (subtable_len == 0) {
>> + pr_err("SRAT invalid zero length\n");
>> + break;
>> + }
>> +
>> + switch (sub_header->type) {
>> + case ACPI_SRAT_TYPE_CPU_AFFINITY:
>> + cpu = (struct acpi_srat_cpu_affinity *)sub_header;
>> + pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
>> + cpu->proximity_domain_lo;
>> + if (pxm > max_pxm)
>> + max_pxm = pxm;
>> + break;
>> + case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
>> + gpu = (struct acpi_srat_generic_affinity *)sub_header;
>> + bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
>> + *((u16 *)(&gpu->device_handle[2]));
>> + if (bdf == pci_id) {
>> + found = true;
>> + *numa_node = pxm_to_node(gpu->proximity_domain);
>> + }
>> + break;
>> + default:
>> + break;
>> + }
>> +
>> + if (found)
>> + break;
>> +
>> + sub_header = (struct acpi_subtable_header *)
>> + ((unsigned long)sub_header + subtable_len);
>> + subtable_len = sub_header->length;
>> + }
>> +
>> + /* workaround bad cpu-gpu binding case */
>> + if (found && (*numa_node < 0 || *numa_node > max_pxm))
>> + *numa_node = 0;
> A suggestion: If you find a sensible NUMA node, call set_dev_node here.
> That simplifies the caller. See below
>
>
>> +}
>> +#endif
>> +
>> /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
>> * to its NUMA node
>> * @avail_size: Available size in the memory
>> @@ -1774,6 +1855,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>> uint32_t proximity_domain)
>> {
>> struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
>> +#ifdef CONFIG_NUMA
>> + int numa_node = 0;
> Should this be NUMA_NO_NODE?
>
>
>> +#endif
>>
>> *avail_size -= sizeof(struct crat_subtype_iolink);
>> if (*avail_size < 0)
>> @@ -1805,9 +1889,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>>
>> sub_type_hdr->proximity_domain_from = proximity_domain;
>> #ifdef CONFIG_NUMA
>> - if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
>> - sub_type_hdr->proximity_domain_to = 0;
>> - else
>> + if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {
>> +#ifdef CONFIG_ACPI
>> + kfd_find_numa_node_in_srat(kdev, &numa_node);
>> +#endif
>> + sub_type_hdr->proximity_domain_to = numa_node;
>> + set_dev_node(&kdev->pdev->dev, numa_node);
>> + } else
>> sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
> It's better style to use braces on all if/else branches, if one branch
> needs them.
>
> But with my suggestion above this would become simpler:
>
> +#ifdef CONFIG_ACPI
> + if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
> + kfd_find_numa_node_in_srat(kdev);
> +#endif
> if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
> sub_type_hdr->proximity_domain_to = 0;
> else
> sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
>
> Regards,
> Felix
>
>
>> #else
>> sub_type_hdr->proximity_domain_to = 0;
More information about the amd-gfx
mailing list