[PATCH 1/2] drm/amdgpu: Implement get num of hops between two xgmi device
Kuehling, Felix
Felix.Kuehling at amd.com
Tue Apr 23 18:09:01 UTC 2019
See inline.
On 2019-04-17 2:58 p.m., Liu, Shaoyun wrote:
> KFD need to provide the info for upper level to determine the data path
>
> Change-Id: Idc809e8f3381b9222dd7be96539522d440f3ee7d
> Signed-off-by: shaoyunl <shaoyun.liu at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 15 +++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 21 +++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 2 ++
> 4 files changed, 39 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index acf8ae0..3fe9a38 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -27,6 +27,7 @@
> #include "amdgpu_gfx.h"
> #include <linux/module.h>
> #include <linux/dma-buf.h>
> +#include "amdgpu_xgmi.h"
>
> static const unsigned int compute_vmid_bitmap = 0xFF00;
>
> @@ -481,6 +482,20 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
>
> return adev->gmc.xgmi.hive_id;
> }
> +uint32_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src)
> +{
> + struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
> + struct amdgpu_device *adev = (struct amdgpu_device *)dst;
> + int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
> +
> + if (ret < 0) {
> + DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n",
> + adev->gmc.xgmi.physical_node_id,
> + peer_adev->gmc.xgmi.physical_node_id, ret);
> + ret = 0;
> + }
> + return (uint32_t)ret;
> +}
>
> int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
> uint32_t vmid, uint64_t gpu_addr,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index e6a5037..8cc8a5a1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -154,6 +154,7 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
> uint32_t *flags);
> uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
> uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
> +uint32_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
>
> #define read_user_wptr(mmptr, wptr, dst) \
> ({ \
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index a48c84c..eaebd47 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> @@ -248,6 +248,27 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
> return ret;
> }
>
> +
> +int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
> + struct amdgpu_device *peer_adev)
> +{
> + struct psp_xgmi_topology_info *top;
> + int ret = 0;
> +
> + top = kzalloc(sizeof(struct psp_xgmi_topology_info), GFP_KERNEL);
Where does this get freed? Looks like you have a memory leak.
Also, instead of allocating a new copy and querying the info from the
PSP many times, could the psp_xgmi_topology_info structure be persistent
somewhere in adev. I think after the driver initialization it won't
change any more.
Regards,
Felix
> + if (!top)
> + return -ENOMEM;
> + top->num_nodes = 1;
> + top->nodes[0].node_id = peer_adev->gmc.xgmi.node_id;
> + ret = psp_xgmi_get_topology_info(&adev->psp, 1, top);
> + if (ret) {
> + dev_err(adev->dev,
> + "XGMI: Failed to get topology info\n");
> + return ret;
> + }
> + return top->nodes[0].num_hops;
> +}
> +
> int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
> {
> struct psp_xgmi_topology_info *hive_topology;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> index 3e9c91e..8a945bf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> @@ -41,6 +41,8 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
> int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
> void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
> int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
> +int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
> + struct amdgpu_device *peer_adev);
>
> static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
> struct amdgpu_device *bo_adev)
More information about the amd-gfx
mailing list