[PATCH 3/5] drm/amdgpu: Refactor amdgpu_xgmi_add_device

Alex Deucher alexdeucher at gmail.com
Wed Nov 21 19:29:17 UTC 2018


On Wed, Nov 21, 2018 at 1:11 PM Andrey Grodzovsky
<andrey.grodzovsky at amd.com> wrote:
>
> This is prep work for updating each PSP FW in hive after
> GPU reset.
> Split into build topology SW state and update each PSP FW in the hive.
> Save topology and count of XGMI devices for reuse.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  5 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 55 +++++++++++++++++++-------------
>  2 files changed, 38 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 2c80453..3e5bede 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1226,6 +1226,11 @@ long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
>  /*
>   * functions used by amdgpu_xgmi.c
>   */
> +
> +struct amdgpu_hive_info;
> +
> +struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
> +int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive);
>  int amdgpu_xgmi_add_device(struct amdgpu_device *adev);

We should move these to their own header, amdgpu_xgmi.h, rather than
dumping them in amdgpu.h

>
>  /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index 909216a..23e4e16 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> @@ -34,12 +34,14 @@ static DEFINE_MUTEX(xgmi_mutex);
>  struct amdgpu_hive_info {
>         uint64_t                hive_id;
>         struct list_head        device_list;
> +       struct psp_xgmi_topology_info   topology_info;
> +       int number_devices;
>  };
>
>  static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE];
>  static unsigned hive_count = 0;
>
> -static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
> +struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)

Any reason to make this public?

>  {
>         int i;
>         struct amdgpu_hive_info *tmp;
> @@ -61,12 +63,33 @@ static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
>         return tmp;
>  }
>
> +int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev)
> +{
> +       int ret = -EINVAL;
> +
> +       /* Each psp need to set the latest topology */
> +               ret = psp_xgmi_set_topology_info(&adev->psp,
> +                                                hive->number_devices,
> +                                                &hive->topology_info);
> +               if (ret)
> +                       dev_err(adev->dev,
> +                               "XGMI: Set topology failure on device %llx, hive %llx, ret %d",
> +                               adev->gmc.xgmi.node_id,
> +                               adev->gmc.xgmi.hive_id, ret);
> +               else
> +                       dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n",
> +                                adev->gmc.xgmi.physical_node_id,
> +                                adev->gmc.xgmi.hive_id);
> +
> +       return ret;
> +}

Indentation in this function looks wrong.

> +
>  int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
>  {
> -       struct psp_xgmi_topology_info *tmp_topology;
> +       struct psp_xgmi_topology_info *hive_topology;
>         struct amdgpu_hive_info *hive;
>         struct amdgpu_xgmi      *entry;
> -       struct amdgpu_device    *tmp_adev;
> +       struct amdgpu_device *tmp_adev = NULL;
>
>         int count = 0, ret = -EINVAL;
>
> @@ -76,21 +99,21 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
>         adev->gmc.xgmi.node_id = psp_xgmi_get_node_id(&adev->psp);
>         adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp);
>
> -       tmp_topology = kzalloc(sizeof(struct psp_xgmi_topology_info), GFP_KERNEL);
> -       if (!tmp_topology)
> -               return -ENOMEM;
>         mutex_lock(&xgmi_mutex);
>         hive = amdgpu_get_xgmi_hive(adev);
>         if (!hive)
>                 goto exit;
>
> +       hive_topology = &hive->topology_info;
> +
>         list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
>         list_for_each_entry(entry, &hive->device_list, head)
> -               tmp_topology->nodes[count++].node_id = entry->node_id;
> +               hive_topology->nodes[count++].node_id = entry->node_id;
> +       hive->number_devices = count;
>
>         /* Each psp need to get the latest topology */
>         list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
> -               ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, tmp_topology);
> +               ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology);
>                 if (ret) {
>                         dev_err(tmp_adev->dev,
>                                 "XGMI: Get topology failure on device %llx, hive %llx, ret %d",
> @@ -101,25 +124,13 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
>                 }
>         }
>
> -       /* Each psp need to set the latest topology */
>         list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
> -               ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology);
> -               if (ret) {
> -                       dev_err(tmp_adev->dev,
> -                               "XGMI: Set topology failure on device %llx, hive %llx, ret %d",
> -                               tmp_adev->gmc.xgmi.node_id,
> -                               tmp_adev->gmc.xgmi.hive_id, ret);
> -                       /* To do : continue with some  node failed or disable the  whole  hive */
> +               ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
> +               if (ret)
>                         break;
> -               }
>         }
> -       if (!ret)
> -               dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n",
> -                       adev->gmc.xgmi.physical_node_id,
> -                       adev->gmc.xgmi.hive_id);
>
>  exit:
>         mutex_unlock(&xgmi_mutex);
> -       kfree(tmp_topology);
>         return ret;
>  }
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


More information about the amd-gfx mailing list