[PATCH] drm/amdgpu: xgmi_fill_topology_info

Kim, Jonathan Jonathan.Kim at amd.com
Fri Dec 8 22:08:38 UTC 2023


[Public]

> -----Original Message-----
> From: Chander, Vignesh <Vignesh.Chander at amd.com>
> Sent: Thursday, December 7, 2023 7:42 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Lazar, Lijo <Lijo.Lazar at amd.com>; Luo, Zhigang
> <Zhigang.Luo at amd.com>; Kim, Jonathan <Jonathan.Kim at amd.com>;
> Chander, Vignesh <Vignesh.Chander at amd.com>
> Subject: [PATCH] drm/amdgpu: xgmi_fill_topology_info
>
> 1. Use the mirrored topology info to fill links for VF.
> The new solution is required to simplify and optimize host driver logic.
> Only use the new solution for VFs that support full duplex and
> extended_peer_link_info otherwise the info would be incomplete.
>
> 2. avoid calling extended_link_info on VF as its not supported
>
> Signed-off-by: Vignesh Chander <Vignesh.Chander at amd.com>

Reviewed-by: Jonathan Kim <jonathan.kim at amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c  |  4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 58
> ++++++++++++++++++++----
>  2 files changed, 52 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> index a21045d018f2..1bf975b8d083 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> @@ -1433,8 +1433,8 @@ int psp_xgmi_get_topology_info(struct
> psp_context *psp,
>                        get_extended_data) ||
>                       amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
>                               IP_VERSION(13, 0, 6);
> -             bool ta_port_num_support = psp-
> >xgmi_context.xgmi_ta_caps &
> -
>       EXTEND_PEER_LINK_INFO_CMD_FLAG;
> +             bool ta_port_num_support = amdgpu_sriov_vf(psp->adev) ?
> 0 :
> +                             psp->xgmi_context.xgmi_ta_caps &
> EXTEND_PEER_LINK_INFO_CMD_FLAG;
>
>               /* popluate the shared output buffer rather than the cmd
> input buffer
>                * with node_ids as the input for GET_PEER_LINKS command
> execution.
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index 44d8c1a11e1b..dd82d73daed6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> @@ -823,6 +823,28 @@ static int
> amdgpu_xgmi_initialize_hive_get_data_partition(struct amdgpu_hive_inf
>       return 0;
>  }
>
> +void amdgpu_xgmi_fill_topology_info(struct amdgpu_device *adev,
> +     struct amdgpu_device *peer_adev)
> +{
> +     struct psp_xgmi_topology_info *top_info = &adev-
> >psp.xgmi_context.top_info;
> +     struct psp_xgmi_topology_info *peer_info = &peer_adev-
> >psp.xgmi_context.top_info;
> +
> +     for (int i = 0; i < peer_info->num_nodes; i++) {
> +             if (peer_info->nodes[i].node_id == adev->gmc.xgmi.node_id)
> {
> +                     for (int j = 0; j < top_info->num_nodes; j++) {
> +                             if (top_info->nodes[j].node_id == peer_adev-
> >gmc.xgmi.node_id) {
> +                                     peer_info->nodes[i].num_hops =
> top_info->nodes[j].num_hops;
> +                                     peer_info-
> >nodes[i].is_sharing_enabled =
> +                                                     top_info-
> >nodes[j].is_sharing_enabled;
> +                                     peer_info->nodes[i].num_links =
> +                                                     top_info-
> >nodes[j].num_links;
> +                                     return;
> +                             }
> +                     }
> +             }
> +     }
> +}
> +
>  int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
>  {
>       struct psp_xgmi_topology_info *top_info;
> @@ -897,18 +919,38 @@ int amdgpu_xgmi_add_device(struct
> amdgpu_device *adev)
>                               goto exit_unlock;
>               }
>
> -             /* get latest topology info for each device from psp */
> -             list_for_each_entry(tmp_adev, &hive->device_list,
> gmc.xgmi.head) {
> -                     ret = psp_xgmi_get_topology_info(&tmp_adev->psp,
> count,
> -                                     &tmp_adev-
> >psp.xgmi_context.top_info, false);
> +             if (amdgpu_sriov_vf(adev) &&
> +                     psp->xgmi_context.xgmi_ta_caps &
> EXTEND_PEER_LINK_INFO_CMD_FLAG) {
> +                     /* only get topology for VF being init if it can
> support full duplex */
> +                     ret = psp_xgmi_get_topology_info(&adev->psp,
> count,
> +                                             &adev-
> >psp.xgmi_context.top_info, false);
>                       if (ret) {
> -                             dev_err(tmp_adev->dev,
> +                             dev_err(adev->dev,
>                                       "XGMI: Get topology failure on
> device %llx, hive %llx, ret %d",
> -                                     tmp_adev->gmc.xgmi.node_id,
> -                                     tmp_adev->gmc.xgmi.hive_id, ret);
> -                             /* To do : continue with some node failed or
> disable the whole hive */
> +                                     adev->gmc.xgmi.node_id,
> +                                     adev->gmc.xgmi.hive_id, ret);
> +                             /* To do: continue with some node failed or
> disable the whole hive*/
>                               goto exit_unlock;
>                       }
> +
> +                     /* fill the topology info for peers instead of getting
> from PSP */
> +                     list_for_each_entry(tmp_adev, &hive->device_list,
> gmc.xgmi.head) {
> +                             amdgpu_xgmi_fill_topology_info(adev,
> tmp_adev);
> +                     }
> +             } else {
> +                     /* get latest topology info for each device from psp
> */
> +                     list_for_each_entry(tmp_adev, &hive->device_list,
> gmc.xgmi.head) {
> +                             ret =
> psp_xgmi_get_topology_info(&tmp_adev->psp, count,
> +                                     &tmp_adev-
> >psp.xgmi_context.top_info, false);
> +                             if (ret) {
> +                                     dev_err(tmp_adev->dev,
> +                                             "XGMI: Get topology failure
> on device %llx, hive %llx, ret %d",
> +                                             tmp_adev-
> >gmc.xgmi.node_id,
> +                                             tmp_adev-
> >gmc.xgmi.hive_id, ret);
> +                                     /* To do : continue with some node
> failed or disable the whole hive */
> +                                     goto exit_unlock;
> +                             }
> +                     }
>               }
>
>               /* get topology again for hives that support extended data
> */
>
> base-commit: 44cb338138f7670ce2e1f8b9ef14e32c6ace282c
> --
> 2.25.1



More information about the amd-gfx mailing list