[PATCH v2 08/12] drm/amdgpu: use doorbell manager for kfd kernel doorbells

Christian König christian.koenig at amd.com
Thu Apr 13 11:02:52 UTC 2023


Am 12.04.23 um 18:25 schrieb Shashank Sharma:
> This patch:
> - adds a doorbell bo in kfd device structure.
> - creates doorbell page for kfd kernel usages.
> - updates the get_kernel_doorbell and free_kernel_doorbell functions
>    accordingly
>
> V2: Do not use wrapper API, use direct amdgpu_create_kernel(Alex)
>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Cc: Christian Koenig <christian.koenig at amd.com>
> Cc: Felix Kuehling <Felix.Kuehling at amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_device.c   |   2 -
>   drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 110 ++++++----------------
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h     |   6 ++
>   3 files changed, 36 insertions(+), 82 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index b8936340742b..49e3c4e021af 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -435,8 +435,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
>   	atomic_set(&kfd->compute_profile, 0);
>   
>   	mutex_init(&kfd->doorbell_mutex);
> -	memset(&kfd->doorbell_available_index, 0,
> -		sizeof(kfd->doorbell_available_index));
>   
>   	atomic_set(&kfd->sram_ecc_flag, 0);
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> index cd4e61bf0493..82b4a56b0afc 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> @@ -61,81 +61,39 @@ size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
>   /* Doorbell calculations for device init. */
>   int kfd_doorbell_init(struct kfd_dev *kfd)
>   {
> -	size_t doorbell_start_offset;
> -	size_t doorbell_aperture_size;
> -	size_t doorbell_process_limit;
> +	int r;
> +	int size = PAGE_SIZE;

It's usually good practice to declare variables like "r" and "i" last. 
Some upstream maintainers even require reverse xmas tree here.

>   
> -	/*
> -	 * With MES enabled, just set the doorbell base as it is needed
> -	 * to calculate doorbell physical address.
> -	 */
> -	if (kfd->shared_resources.enable_mes) {
> -		kfd->doorbell_base =
> -			kfd->shared_resources.doorbell_physical_address;
> -		return 0;
> -	}
> -
> -	/*
> -	 * We start with calculations in bytes because the input data might
> -	 * only be byte-aligned.
> -	 * Only after we have done the rounding can we assume any alignment.
> -	 */
> -
> -	doorbell_start_offset =
> -			roundup(kfd->shared_resources.doorbell_start_offset,
> -					kfd_doorbell_process_slice(kfd));
> -
> -	doorbell_aperture_size =
> -			rounddown(kfd->shared_resources.doorbell_aperture_size,
> -					kfd_doorbell_process_slice(kfd));
> -
> -	if (doorbell_aperture_size > doorbell_start_offset)
> -		doorbell_process_limit =
> -			(doorbell_aperture_size - doorbell_start_offset) /
> -						kfd_doorbell_process_slice(kfd);
> -	else
> -		return -ENOSPC;
> -
> -	if (!kfd->max_doorbell_slices ||
> -	    doorbell_process_limit < kfd->max_doorbell_slices)
> -		kfd->max_doorbell_slices = doorbell_process_limit;
> -
> -	kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
> -				doorbell_start_offset;
> -
> -	kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
> -
> -	kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
> -					   kfd_doorbell_process_slice(kfd));
> -
> -	if (!kfd->doorbell_kernel_ptr)
> +	/* Bitmap to dynamically allocate doorbells from kernel page */
> +	kfd->doorbell_bitmap = bitmap_zalloc(size / sizeof(u32), GFP_KERNEL);
> +	if (!kfd->doorbell_bitmap) {
> +		DRM_ERROR("Failed to allocate kernel doorbell bitmap\n");
>   		return -ENOMEM;
> +	}
>   
> -	pr_debug("Doorbell initialization:\n");
> -	pr_debug("doorbell base           == 0x%08lX\n",
> -			(uintptr_t)kfd->doorbell_base);
> -
> -	pr_debug("doorbell_base_dw_offset      == 0x%08lX\n",
> -			kfd->doorbell_base_dw_offset);
> -
> -	pr_debug("doorbell_process_limit  == 0x%08lX\n",
> -			doorbell_process_limit);
> -
> -	pr_debug("doorbell_kernel_offset  == 0x%08lX\n",
> -			(uintptr_t)kfd->doorbell_base);
> -
> -	pr_debug("doorbell aperture size  == 0x%08lX\n",
> -			kfd->shared_resources.doorbell_aperture_size);
> -
> -	pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
> +	/* Alloc a doorbell page for KFD kernel usages */
> +	r = amdgpu_bo_create_kernel(kfd->adev,
> +				    size,
> +				    PAGE_SIZE,
> +				    AMDGPU_GEM_DOMAIN_DOORBELL,
> +				    &kfd->doorbells,
> +				    NULL,
> +				    (void **)&kfd->doorbell_kernel_ptr);
> +	if (r) {
> +		pr_err("failed to allocate kernel doorbells\n");
> +		bitmap_free(kfd->doorbell_bitmap);
> +		return r;
> +	}
>   
> +	pr_debug("Doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
>   	return 0;
>   }
>   
>   void kfd_doorbell_fini(struct kfd_dev *kfd)
>   {
> -	if (kfd->doorbell_kernel_ptr)
> -		iounmap(kfd->doorbell_kernel_ptr);
> +	bitmap_free(kfd->doorbell_bitmap);
> +	amdgpu_bo_free_kernel(&kfd->doorbells, NULL,
> +			     (void **)&kfd->doorbell_kernel_ptr);
>   }
>   
>   int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
> @@ -188,22 +146,15 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
>   	u32 inx;
>   
>   	mutex_lock(&kfd->doorbell_mutex);
> -	inx = find_first_zero_bit(kfd->doorbell_available_index,
> -					KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
> +	inx = find_first_zero_bit(kfd->doorbell_bitmap, PAGE_SIZE / sizeof(u32));
>   
> -	__set_bit(inx, kfd->doorbell_available_index);
> +	__set_bit(inx, kfd->doorbell_bitmap);
>   	mutex_unlock(&kfd->doorbell_mutex);
>   
>   	if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
>   		return NULL;
>   
> -	inx *= kfd->device_info.doorbell_size / sizeof(u32);
> -
> -	/*
> -	 * Calculating the kernel doorbell offset using the first
> -	 * doorbell page.
> -	 */
> -	*doorbell_off = kfd->doorbell_base_dw_offset + inx;
> +	*doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx);
>   
>   	pr_debug("Get kernel queue doorbell\n"
>   			"     doorbell offset   == 0x%08X\n"
> @@ -217,11 +168,10 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
>   {
>   	unsigned int inx;
>   
> -	inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
> -		* sizeof(u32) / kfd->device_info.doorbell_size;
> +	inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
>   
>   	mutex_lock(&kfd->doorbell_mutex);
> -	__clear_bit(inx, kfd->doorbell_available_index);
> +	__clear_bit(inx, kfd->doorbell_bitmap);
>   	mutex_unlock(&kfd->doorbell_mutex);
>   }
>   
> @@ -280,7 +230,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
>   	if (!pdd->doorbell_index) {
>   		int r = kfd_alloc_process_doorbells(pdd->dev,
>   						    &pdd->doorbell_index);
> -		if (r)
> +		if (r < 0)
>   			return 0;
>   	}
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 552c3ac85a13..0b199eb6dc88 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -346,6 +346,12 @@ struct kfd_dev {
>   
>   	/* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
>   	struct dev_pagemap pgmap;
> +
> +	/* Kernel doorbells for KFD device */
> +	struct amdgpu_bo *doorbells;
> +
> +	/* bitmap for dynamic doorbell allocation from this object */
> +	unsigned long *doorbell_bitmap;

When doorbell_available_index is now not longer used you should probably 
remove it as well.

Christian.

>   };
>   
>   enum kfd_mempool {



More information about the amd-gfx mailing list