[PATCH 3/5] drm/amdgpu: Refactor amdgpu_xgmi_add_device

Grodzovsky, Andrey Andrey.Grodzovsky at amd.com
Wed Nov 21 19:36:13 UTC 2018



On 11/21/2018 02:29 PM, Alex Deucher wrote:
> On Wed, Nov 21, 2018 at 1:11 PM Andrey Grodzovsky
> <andrey.grodzovsky at amd.com> wrote:
>> This is prep work for updating each PSP FW in hive after
>> GPU reset.
>> Split into build topology SW state and update each PSP FW in the hive.
>> Save topology and count of XGMI devices for reuse.
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  5 +++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 55 +++++++++++++++++++-------------
>>   2 files changed, 38 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 2c80453..3e5bede 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -1226,6 +1226,11 @@ long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
>>   /*
>>    * functions used by amdgpu_xgmi.c
>>    */
>> +
>> +struct amdgpu_hive_info;
>> +
>> +struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
>> +int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive);
>>   int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
> We should move these to their own header, amdgpu_xgmi.h, rather than
> dumping them in amdgpu.h
>
>>   /*
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>> index 909216a..23e4e16 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>> @@ -34,12 +34,14 @@ static DEFINE_MUTEX(xgmi_mutex);
>>   struct amdgpu_hive_info {
>>          uint64_t                hive_id;
>>          struct list_head        device_list;
>> +       struct psp_xgmi_topology_info   topology_info;
>> +       int number_devices;
>>   };
>>
>>   static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE];
>>   static unsigned hive_count = 0;
>>
>> -static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
>> +struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
> Any reason to make this public?

It is used in Patch 5.

Andrey

>
>>   {
>>          int i;
>>          struct amdgpu_hive_info *tmp;
>> @@ -61,12 +63,33 @@ static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
>>          return tmp;
>>   }
>>
>> +int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev)
>> +{
>> +       int ret = -EINVAL;
>> +
>> +       /* Each psp need to set the latest topology */
>> +               ret = psp_xgmi_set_topology_info(&adev->psp,
>> +                                                hive->number_devices,
>> +                                                &hive->topology_info);
>> +               if (ret)
>> +                       dev_err(adev->dev,
>> +                               "XGMI: Set topology failure on device %llx, hive %llx, ret %d",
>> +                               adev->gmc.xgmi.node_id,
>> +                               adev->gmc.xgmi.hive_id, ret);
>> +               else
>> +                       dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n",
>> +                                adev->gmc.xgmi.physical_node_id,
>> +                                adev->gmc.xgmi.hive_id);
>> +
>> +       return ret;
>> +}
> Indentation in this function looks wrong.
>
>> +
>>   int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
>>   {
>> -       struct psp_xgmi_topology_info *tmp_topology;
>> +       struct psp_xgmi_topology_info *hive_topology;
>>          struct amdgpu_hive_info *hive;
>>          struct amdgpu_xgmi      *entry;
>> -       struct amdgpu_device    *tmp_adev;
>> +       struct amdgpu_device *tmp_adev = NULL;
>>
>>          int count = 0, ret = -EINVAL;
>>
>> @@ -76,21 +99,21 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
>>          adev->gmc.xgmi.node_id = psp_xgmi_get_node_id(&adev->psp);
>>          adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp);
>>
>> -       tmp_topology = kzalloc(sizeof(struct psp_xgmi_topology_info), GFP_KERNEL);
>> -       if (!tmp_topology)
>> -               return -ENOMEM;
>>          mutex_lock(&xgmi_mutex);
>>          hive = amdgpu_get_xgmi_hive(adev);
>>          if (!hive)
>>                  goto exit;
>>
>> +       hive_topology = &hive->topology_info;
>> +
>>          list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
>>          list_for_each_entry(entry, &hive->device_list, head)
>> -               tmp_topology->nodes[count++].node_id = entry->node_id;
>> +               hive_topology->nodes[count++].node_id = entry->node_id;
>> +       hive->number_devices = count;
>>
>>          /* Each psp need to get the latest topology */
>>          list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
>> -               ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, tmp_topology);
>> +               ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology);
>>                  if (ret) {
>>                          dev_err(tmp_adev->dev,
>>                                  "XGMI: Get topology failure on device %llx, hive %llx, ret %d",
>> @@ -101,25 +124,13 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
>>                  }
>>          }
>>
>> -       /* Each psp need to set the latest topology */
>>          list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
>> -               ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology);
>> -               if (ret) {
>> -                       dev_err(tmp_adev->dev,
>> -                               "XGMI: Set topology failure on device %llx, hive %llx, ret %d",
>> -                               tmp_adev->gmc.xgmi.node_id,
>> -                               tmp_adev->gmc.xgmi.hive_id, ret);
>> -                       /* To do : continue with some  node failed or disable the  whole  hive */
>> +               ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
>> +               if (ret)
>>                          break;
>> -               }
>>          }
>> -       if (!ret)
>> -               dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n",
>> -                       adev->gmc.xgmi.physical_node_id,
>> -                       adev->gmc.xgmi.hive_id);
>>
>>   exit:
>>          mutex_unlock(&xgmi_mutex);
>> -       kfree(tmp_topology);
>>          return ret;
>>   }
>> --
>> 2.7.4
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx



More information about the amd-gfx mailing list