[PATCH 12/16] drm/amdgpu: use doorbell manager for kfd kernel doorbells
Alex Deucher
alexdeucher at gmail.com
Thu Mar 30 20:46:18 UTC 2023
On Wed, Mar 29, 2023 at 11:48 AM Shashank Sharma
<shashank.sharma at amd.com> wrote:
>
> This patch:
> - adds a doorbell manager structure in kfd device structure.
> - plugs-in doorbell manager APIs for KFD kernel doorbell allocations
> an free functions.
> - removes the doorbell bitmap, uses the one into the doorbell manager
> structure for all the allocations.
> - updates the get_kernel_doorbell and free_kernel_doorbell functions
> accordingly
>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Cc: Christian Koenig <christian.koenig at amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 +-
> drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 109 ++++++----------------
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 +
> 3 files changed, 35 insertions(+), 81 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index b8936340742b..a2e4cbddba26 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -435,8 +435,8 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
> atomic_set(&kfd->compute_profile, 0);
>
> mutex_init(&kfd->doorbell_mutex);
> - memset(&kfd->doorbell_available_index, 0,
> - sizeof(kfd->doorbell_available_index));
> + memset(kfd->kernel_doorbells.doorbell_bitmap, 0,
> + kfd->kernel_doorbells.size / BITS_PER_LONG);
>
> atomic_set(&kfd->sram_ecc_flag, 0);
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> index cd4e61bf0493..df259f2cc58a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> @@ -61,81 +61,37 @@ size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
> /* Doorbell calculations for device init. */
> int kfd_doorbell_init(struct kfd_dev *kfd)
> {
> - size_t doorbell_start_offset;
> - size_t doorbell_aperture_size;
> - size_t doorbell_process_limit;
> + int r;
> + struct amdgpu_doorbell_obj *kernel_doorbells = &kfd->kernel_doorbells;
>
> - /*
> - * With MES enabled, just set the doorbell base as it is needed
> - * to calculate doorbell physical address.
> - */
> - if (kfd->shared_resources.enable_mes) {
> - kfd->doorbell_base =
> - kfd->shared_resources.doorbell_physical_address;
> - return 0;
> - }
> -
> - /*
> - * We start with calculations in bytes because the input data might
> - * only be byte-aligned.
> - * Only after we have done the rounding can we assume any alignment.
> - */
> -
> - doorbell_start_offset =
> - roundup(kfd->shared_resources.doorbell_start_offset,
> - kfd_doorbell_process_slice(kfd));
> -
> - doorbell_aperture_size =
> - rounddown(kfd->shared_resources.doorbell_aperture_size,
> - kfd_doorbell_process_slice(kfd));
> -
> - if (doorbell_aperture_size > doorbell_start_offset)
> - doorbell_process_limit =
> - (doorbell_aperture_size - doorbell_start_offset) /
> - kfd_doorbell_process_slice(kfd);
> - else
> - return -ENOSPC;
> -
> - if (!kfd->max_doorbell_slices ||
> - doorbell_process_limit < kfd->max_doorbell_slices)
> - kfd->max_doorbell_slices = doorbell_process_limit;
> -
> - kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
> - doorbell_start_offset;
> -
> - kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
> -
> - kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
> - kfd_doorbell_process_slice(kfd));
> -
> - if (!kfd->doorbell_kernel_ptr)
> + /* Bitmap to dynamically allocate doorbells from kernel page */
> + kernel_doorbells->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE, GFP_KERNEL);
> + if (!kernel_doorbells->doorbell_bitmap) {
> + DRM_ERROR("Failed to allocate kernel doorbell bitmap\n");
> return -ENOMEM;
> + }
>
> - pr_debug("Doorbell initialization:\n");
> - pr_debug("doorbell base == 0x%08lX\n",
> - (uintptr_t)kfd->doorbell_base);
> -
> - pr_debug("doorbell_base_dw_offset == 0x%08lX\n",
> - kfd->doorbell_base_dw_offset);
> -
> - pr_debug("doorbell_process_limit == 0x%08lX\n",
> - doorbell_process_limit);
> -
> - pr_debug("doorbell_kernel_offset == 0x%08lX\n",
> - (uintptr_t)kfd->doorbell_base);
> -
> - pr_debug("doorbell aperture size == 0x%08lX\n",
> - kfd->shared_resources.doorbell_aperture_size);
> + /* Alloc and reserve doorbells for KFD kernel usages */
> + kernel_doorbells->size = PAGE_SIZE;
> + r = amdgpu_doorbell_alloc_page(kfd->adev, kernel_doorbells);
Just do something like:
r = amdgpu_bo_create_kernel(kfd->adev, PAGE_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_DOORBELL,
&kfd->doorbell_kernel_bo, NULL, &kfd->doorbell_kernel_ptr);
Then you have your KFD pointer to its doorbell memory and no need to
track the ranges.
Alex
> + if (r) {
> + pr_err("failed to allocate kernel doorbells\n");
> + bitmap_free(kernel_doorbells->doorbell_bitmap);
> + return r;
> + }
>
> - pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
> + kfd->doorbell_kernel_ptr = kernel_doorbells->cpu_addr;
> + pr_debug("Doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
>
> return 0;
> }
>
> void kfd_doorbell_fini(struct kfd_dev *kfd)
> {
> - if (kfd->doorbell_kernel_ptr)
> - iounmap(kfd->doorbell_kernel_ptr);
> + struct amdgpu_doorbell_obj *kernel_doorbells = &kfd->kernel_doorbells;
> +
> + bitmap_free(kernel_doorbells->doorbell_bitmap);
> + amdgpu_doorbell_free_page(kfd->adev, kernel_doorbells);
> }
>
> int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
> @@ -186,24 +142,19 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
> unsigned int *doorbell_off)
> {
> u32 inx;
> + struct amdgpu_doorbell_obj *kernel_doorbells = &kfd->kernel_doorbells;
>
> mutex_lock(&kfd->doorbell_mutex);
> - inx = find_first_zero_bit(kfd->doorbell_available_index,
> - KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
> + inx = find_first_zero_bit(kernel_doorbells->doorbell_bitmap,
> + kernel_doorbells->size);
>
> - __set_bit(inx, kfd->doorbell_available_index);
> + __set_bit(inx, kernel_doorbells->doorbell_bitmap);
> mutex_unlock(&kfd->doorbell_mutex);
>
> if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
> return NULL;
>
> - inx *= kfd->device_info.doorbell_size / sizeof(u32);
> -
> - /*
> - * Calculating the kernel doorbell offset using the first
> - * doorbell page.
> - */
> - *doorbell_off = kfd->doorbell_base_dw_offset + inx;
> + *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kernel_doorbells->bo, inx);
>
> pr_debug("Get kernel queue doorbell\n"
> " doorbell offset == 0x%08X\n"
> @@ -216,12 +167,12 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
> void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
> {
> unsigned int inx;
> + struct amdgpu_doorbell_obj *kernel_doorbells = &kfd->kernel_doorbells;
>
> - inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
> - * sizeof(u32) / kfd->device_info.doorbell_size;
> + inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
>
> mutex_lock(&kfd->doorbell_mutex);
> - __clear_bit(inx, kfd->doorbell_available_index);
> + __clear_bit(inx, kernel_doorbells->doorbell_bitmap);
> mutex_unlock(&kfd->doorbell_mutex);
> }
>
> @@ -280,7 +231,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
> if (!pdd->doorbell_index) {
> int r = kfd_alloc_process_doorbells(pdd->dev,
> &pdd->doorbell_index);
> - if (r)
> + if (r < 0)
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 552c3ac85a13..0ed33416c35f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -346,6 +346,9 @@ struct kfd_dev {
>
> /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
> struct dev_pagemap pgmap;
> +
> + /* Kernel doorbells for KFD device */
> + struct amdgpu_doorbell_obj kernel_doorbells;
> };
>
> enum kfd_mempool {
> --
> 2.40.0
>
More information about the amd-gfx
mailing list