[PATCH 2/4] drm/amdkfd: report num xgmi links between direct peers to the kfd

Felix Kuehling felix.kuehling at amd.com
Sat Jul 10 00:43:52 UTC 2021


On 2021-06-21 3:23 p.m., Jonathan Kim wrote:
> Since Min/Max bandwidth was never used, it will repurposed to report the
> number of xgmi links between direct peers to the KFD topology.
>
> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 15 +++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 12 ++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  2 ++
>   drivers/gpu/drm/amd/amdkfd/kfd_crat.c      | 11 +++++++++--
>   drivers/gpu/drm/amd/amdkfd/kfd_crat.h      |  4 ++--
>   6 files changed, 41 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index bfab2f9fdd17..c84989eda8eb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -553,6 +553,21 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s
>   	return  (uint8_t)ret;
>   }
>   
> +uint8_t amdgpu_amdkfd_get_xgmi_num_links(struct kgd_dev *dst, struct kgd_dev *src)
> +{
> +	struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
> +	struct amdgpu_device *adev = (struct amdgpu_device *)dst;
> +	int ret = amdgpu_xgmi_get_num_links(adev, peer_adev);
> +
> +	if (ret < 0) {
> +		DRM_ERROR("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n",
> +			adev->gmc.xgmi.physical_node_id,
> +			peer_adev->gmc.xgmi.physical_node_id, ret);
> +		ret = 0;
> +	}
> +	return  (uint8_t)ret;
> +}
> +
>   uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index fabc68eec36a..20e4bfce62be 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -226,6 +226,7 @@ uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd);
>   uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd);
>   int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd);
>   uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
> +uint8_t amdgpu_amdkfd_get_xgmi_num_links(struct kgd_dev *dst, struct kgd_dev *src);
>   
>   /* Read user wptr from a specified user address space with page fault
>    * disabled. The memory must be pinned and mapped to the hardware when
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index 8567d5d77346..258cf86b32f6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> @@ -486,6 +486,18 @@ int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
>   	return	-EINVAL;
>   }
>   
> +int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev,
> +		struct amdgpu_device *peer_adev)
> +{
> +	struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
> +	int i;
> +
> +	for (i = 0 ; i < top->num_nodes; ++i)
> +		if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
> +			return top->nodes[i].num_links;
> +	return	-EINVAL;
> +}
> +
>   int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
>   {
>   	struct psp_xgmi_topology_info *top_info;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> index 12969c0830d5..d2189bf7d428 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> @@ -59,6 +59,8 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
>   int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
>   int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
>   		struct amdgpu_device *peer_adev);
> +int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev,
> +		struct amdgpu_device *peer_adev);
>   uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
>   					   uint64_t addr);
>   static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> index c6b02aee4993..75047b77649b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -1034,8 +1034,8 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
>   
>   			props->min_latency = iolink->minimum_latency;
>   			props->max_latency = iolink->maximum_latency;
> -			props->min_bandwidth = iolink->minimum_bandwidth_mbs;
> -			props->max_bandwidth = iolink->maximum_bandwidth_mbs;
> +			props->min_bandwidth = iolink->minimum_bandwidth;
> +			props->max_bandwidth = iolink->maximum_bandwidth;
>   			props->rec_transfer_size =
>   					iolink->recommended_transfer_size;
>   
> @@ -1989,6 +1989,8 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>   		sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
>   		sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
>   		sub_type_hdr->num_hops_xgmi = 1;
> +		sub_type_hdr->minimum_bandwidth = 1;
> +		sub_type_hdr->maximum_bandwidth = 1;
>   	} else {
>   		sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
>   	}
> @@ -2033,6 +2035,11 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
>   	sub_type_hdr->proximity_domain_to = proximity_domain_to;
>   	sub_type_hdr->num_hops_xgmi =
>   		amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd);
> +	sub_type_hdr->maximum_bandwidth =
> +		amdgpu_amdkfd_get_xgmi_num_links(kdev->kgd, peer_kdev->kgd);
> +	sub_type_hdr->minimum_bandwidth =
> +		sub_type_hdr->maximum_bandwidth ? 1 : 0;

Reporting the number of XGMI links directly was not my intention. We 
should use it to calculate the actual bandwidth. It depends on the 
number of links and the clocks. You can use minimum and maximum clock to 
calculate the min and max bandwidth. I don't think the number of 
parallel links changes dynamically, so minimum bandwidth should use the 
same number of links.

The CRAT definition seems to use MB/s as the unit.


> +
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
> index d54ceebd346b..d1f6de5edfb9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
> @@ -271,8 +271,8 @@ struct crat_subtype_iolink {
>   	uint16_t	version_minor;
>   	uint32_t	minimum_latency;
>   	uint32_t	maximum_latency;
> -	uint32_t	minimum_bandwidth_mbs;
> -	uint32_t	maximum_bandwidth_mbs;
> +	uint32_t	minimum_bandwidth;
> +	uint32_t	maximum_bandwidth;

I don't think we should change the CRAT definition.

Regards,
   Felix


>   	uint32_t	recommended_transfer_size;
>   	uint8_t		reserved2[CRAT_IOLINK_RESERVED_LENGTH - 1];
>   	uint8_t		num_hops_xgmi;


More information about the amd-gfx mailing list