[PATCH] drm/amdkfd: correct sdma queue number in kfd device init

Sider, Graham Graham.Sider at amd.com
Fri Dec 17 15:05:52 UTC 2021


[Public]

> -----Original Message-----
> From: Chen, Guchun <Guchun.Chen at amd.com>
> Sent: Friday, December 17, 2021 9:31 AM
> To: amd-gfx at lists.freedesktop.org; Deucher, Alexander
> <Alexander.Deucher at amd.com>; Sider, Graham
> <Graham.Sider at amd.com>; Kuehling, Felix <Felix.Kuehling at amd.com>;
> Kim, Jonathan <Jonathan.Kim at amd.com>
> Cc: Chen, Guchun <Guchun.Chen at amd.com>
> Subject: [PATCH] drm/amdkfd: correct sdma queue number in kfd device init
> 
> sdma queue number is not correct like on vega20, this patch promises the
> setting keeps the same after code refactor.
> Additionally, improve code to use switch case to list IP version to complete
> kfd device_info structure filling.
> This keeps consistency with the IP parse code in amdgpu_discovery.c.
> 
> Fixes: a9e2c4dc6cc4("drm/amdkfd: add kfd_device_info_init function")
> Signed-off-by: Guchun Chen <guchun.chen at amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 74
> ++++++++++++++++++++++---
>  1 file changed, 65 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index facc28f58c1f..e50bf992f298 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -59,11 +59,72 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
> 
>  static int kfd_resume(struct kfd_dev *kfd);
> 
> +static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd) {
> +	uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
> +
> +	switch (sdma_version) {
> +		case IP_VERSION(4, 0, 0):/* VEGA10 */
> +		case IP_VERSION(4, 0, 1):/* VEGA12 */
> +		case IP_VERSION(4, 1, 0):/* RAVEN */
> +		case IP_VERSION(4, 1, 1):/* RAVEN */
> +		case IP_VERSION(4, 1, 2):/* RENIOR */
> +		case IP_VERSION(5, 2, 1):/* VANGOGH */
> +		case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
> +			kfd->device_info.num_sdma_queues_per_engine =
> 2;
> +			break;
> +		case IP_VERSION(4, 2, 0):/* VEGA20 */

Thanks for spotting this Guchun. My previous patch should have used a "<" instead of a "<=" on IP_VERSION(4, 2, 0).

> +		case IP_VERSION(4, 2, 2):/* ARCTUTUS */
> +		case IP_VERSION(4, 4, 0):/* ALDEBARAN */
> +		case IP_VERSION(5, 0, 0):/* NAVI10 */
> +		case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
> +		case IP_VERSION(5, 0, 2):/* NAVI14 */
> +		case IP_VERSION(5, 0, 5):/* NAVI12 */
> +		case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
> +		case IP_VERSION(5, 2, 2):/* NAVY_FLOUDER */
> +		case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
> +			kfd->device_info.num_sdma_queues_per_engine =
> 8;
> +			break;
> +		default:
> +			dev_err(kfd_device,
> +				"Failed to find sdma ip
> blocks(SDMA_HWIP:0x%x) in %s\n",
> +                                sdma_version, __func__);
> +	}
> +}
> +
> +static void kfd_device_info_set_event_interrupt_class(struct kfd_dev
> +*kfd) {
> +	uint32_t gc_version = KFD_GC_VERSION(kfd);
> +
> +	switch (gc_version) {
> +	case IP_VERSION(9, 0, 1): /* VEGA10 */
> +	case IP_VERSION(9, 2, 1): /* VEGA12 */
> +	case IP_VERSION(9, 3, 0): /* RENOIR */
> +	case IP_VERSION(9, 4, 0): /* VEGA20 */
> +	case IP_VERSION(9, 4, 1): /* ARCTURUS */
> +	case IP_VERSION(9, 4, 2): /* ALDEBARAN */
> +	case IP_VERSION(10, 3, 1): /* VANGOGH */
> +	case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
> +	case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */
> +	case IP_VERSION(10, 1, 10): /* NAVI10 */
> +	case IP_VERSION(10, 1, 2): /* NAVI12 */
> +	case IP_VERSION(10, 1, 1): /* NAVI14 */
> +	case IP_VERSION(10, 3, 0): /* SIENNA_CICHLID */
> +	case IP_VERSION(10, 3, 2): /* NAVY_FLOUNDER */
> +	case IP_VERSION(10, 3, 4): /* DIMGREY_CAVEFISH */
> +	case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */
> +		kfd->device_info.event_interrupt_class =
> &event_interrupt_class_v9;
> +		break;
> +	default:
> +		dev_err(kfd_device, "Failed to find gc ip
> blocks(GC_HWIP:0x%x) in %s\n",
> +				gc_version, __func__);
> +	}
> +}

I understand the appeal of moving to a switch for the SDMA queue num above since its setting isn't very linear w.r.t. the SDMA versioning. That said I don't know if I understand moving to a switch for the event interrupt class here. To clarify, originally when I set all SOC15 to event_interrupt_class_v9 it was to follow what was in the device_info structs in drm-staging-next at that time--that said if the plan is in a following patch to change this such that gfx10 are set to to event_interrupt_class_v10, what's the reasoning not to format it along the lines of:

if (gc_version >= IP_VERSION(10, 1, 1)
	kfd->device_info.event_interrupt_class = &event_interrupt_class_v10;
else
	kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;

(Assuming this is done in the SOC15 case, and of course cases would need to be added here eventually for e.g. event_interrupt_class_v11, but not necessarily for every asic).

Best,
Graham

> +
>  static void kfd_device_info_init(struct kfd_dev *kfd,
>  				 bool vf, uint32_t gfx_target_version)  {
>  	uint32_t gc_version = KFD_GC_VERSION(kfd);
> -	uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
>  	uint32_t asic_type = kfd->adev->asic_type;
> 
>  	kfd->device_info.max_pasid_bits = 16;
> @@ -75,16 +136,11 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
>  	if (KFD_IS_SOC15(kfd)) {
>  		kfd->device_info.doorbell_size = 8;
>  		kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
> -		kfd->device_info.event_interrupt_class =
> &event_interrupt_class_v9;
>  		kfd->device_info.supports_cwsr = true;
> 
> -		if ((sdma_version >= IP_VERSION(4, 0, 0)  &&
> -		     sdma_version <= IP_VERSION(4, 2, 0)) ||
> -		     sdma_version == IP_VERSION(5, 2, 1)  ||
> -		     sdma_version == IP_VERSION(5, 2, 3))
> -			kfd->device_info.num_sdma_queues_per_engine =
> 2;
> -		else
> -			kfd->device_info.num_sdma_queues_per_engine =
> 8;
> +		kfd_device_info_set_sdma_queue_num(kfd);
> +
> +		kfd_device_info_set_event_interrupt_class(kfd);
> 
>  		/* Raven */
>  		if (gc_version == IP_VERSION(9, 1, 0) ||
> --
> 2.17.1


More information about the amd-gfx mailing list