[PATCH 3/5] drm/amdgpu: Refactor amdgpu_xgmi_add_device

Alex Deucher alexdeucher at gmail.com
Wed Nov 21 19:38:41 UTC 2018


On Wed, Nov 21, 2018 at 2:36 PM Grodzovsky, Andrey
<Andrey.Grodzovsky at amd.com> wrote:
>
>
>
> On 11/21/2018 02:29 PM, Alex Deucher wrote:
> > On Wed, Nov 21, 2018 at 1:11 PM Andrey Grodzovsky
> > <andrey.grodzovsky at amd.com> wrote:
> >> This is prep work for updating each PSP FW in hive after
> >> GPU reset.
> >> Split into build topology SW state and update each PSP FW in the hive.
> >> Save topology and count of XGMI devices for reuse.
> >>
> >> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  5 +++
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 55 +++++++++++++++++++-------------
> >>   2 files changed, 38 insertions(+), 22 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> index 2c80453..3e5bede 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> @@ -1226,6 +1226,11 @@ long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
> >>   /*
> >>    * functions used by amdgpu_xgmi.c
> >>    */
> >> +
> >> +struct amdgpu_hive_info;
> >> +
> >> +struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
> >> +int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive);
> >>   int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
> > We should move these to their own header, amdgpu_xgmi.h, rather than
> > dumping them in amdgpu.h
> >
> >>   /*
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> >> index 909216a..23e4e16 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> >> @@ -34,12 +34,14 @@ static DEFINE_MUTEX(xgmi_mutex);
> >>   struct amdgpu_hive_info {
> >>          uint64_t                hive_id;
> >>          struct list_head        device_list;
> >> +       struct psp_xgmi_topology_info   topology_info;
> >> +       int number_devices;
> >>   };
> >>
> >>   static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE];
> >>   static unsigned hive_count = 0;
> >>
> >> -static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
> >> +struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
> > Any reason to make this public?
>
> It is used in Patch 5.

Ok. thanks.

Alex

>
> Andrey
>
> >
> >>   {
> >>          int i;
> >>          struct amdgpu_hive_info *tmp;
> >> @@ -61,12 +63,33 @@ static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
> >>          return tmp;
> >>   }
> >>
> >> +int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev)
> >> +{
> >> +       int ret = -EINVAL;
> >> +
> >> +       /* Each psp need to set the latest topology */
> >> +               ret = psp_xgmi_set_topology_info(&adev->psp,
> >> +                                                hive->number_devices,
> >> +                                                &hive->topology_info);
> >> +               if (ret)
> >> +                       dev_err(adev->dev,
> >> +                               "XGMI: Set topology failure on device %llx, hive %llx, ret %d",
> >> +                               adev->gmc.xgmi.node_id,
> >> +                               adev->gmc.xgmi.hive_id, ret);
> >> +               else
> >> +                       dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n",
> >> +                                adev->gmc.xgmi.physical_node_id,
> >> +                                adev->gmc.xgmi.hive_id);
> >> +
> >> +       return ret;
> >> +}
> > Indentation in this function looks wrong.
> >
> >> +
> >>   int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
> >>   {
> >> -       struct psp_xgmi_topology_info *tmp_topology;
> >> +       struct psp_xgmi_topology_info *hive_topology;
> >>          struct amdgpu_hive_info *hive;
> >>          struct amdgpu_xgmi      *entry;
> >> -       struct amdgpu_device    *tmp_adev;
> >> +       struct amdgpu_device *tmp_adev = NULL;
> >>
> >>          int count = 0, ret = -EINVAL;
> >>
> >> @@ -76,21 +99,21 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
> >>          adev->gmc.xgmi.node_id = psp_xgmi_get_node_id(&adev->psp);
> >>          adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp);
> >>
> >> -       tmp_topology = kzalloc(sizeof(struct psp_xgmi_topology_info), GFP_KERNEL);
> >> -       if (!tmp_topology)
> >> -               return -ENOMEM;
> >>          mutex_lock(&xgmi_mutex);
> >>          hive = amdgpu_get_xgmi_hive(adev);
> >>          if (!hive)
> >>                  goto exit;
> >>
> >> +       hive_topology = &hive->topology_info;
> >> +
> >>          list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
> >>          list_for_each_entry(entry, &hive->device_list, head)
> >> -               tmp_topology->nodes[count++].node_id = entry->node_id;
> >> +               hive_topology->nodes[count++].node_id = entry->node_id;
> >> +       hive->number_devices = count;
> >>
> >>          /* Each psp need to get the latest topology */
> >>          list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
> >> -               ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, tmp_topology);
> >> +               ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology);
> >>                  if (ret) {
> >>                          dev_err(tmp_adev->dev,
> >>                                  "XGMI: Get topology failure on device %llx, hive %llx, ret %d",
> >> @@ -101,25 +124,13 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
> >>                  }
> >>          }
> >>
> >> -       /* Each psp need to set the latest topology */
> >>          list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
> >> -               ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology);
> >> -               if (ret) {
> >> -                       dev_err(tmp_adev->dev,
> >> -                               "XGMI: Set topology failure on device %llx, hive %llx, ret %d",
> >> -                               tmp_adev->gmc.xgmi.node_id,
> >> -                               tmp_adev->gmc.xgmi.hive_id, ret);
> >> -                       /* To do : continue with some  node failed or disable the  whole  hive */
> >> +               ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
> >> +               if (ret)
> >>                          break;
> >> -               }
> >>          }
> >> -       if (!ret)
> >> -               dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n",
> >> -                       adev->gmc.xgmi.physical_node_id,
> >> -                       adev->gmc.xgmi.hive_id);
> >>
> >>   exit:
> >>          mutex_unlock(&xgmi_mutex);
> >> -       kfree(tmp_topology);
> >>          return ret;
> >>   }
> >> --
> >> 2.7.4
> >>
> >> _______________________________________________
> >> amd-gfx mailing list
> >> amd-gfx at lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>


More information about the amd-gfx mailing list