[PATCH 12/16] drm/amdgpu: use doorbell manager for kfd kernel doorbells

Shashank Sharma shashank.sharma at amd.com
Fri Mar 31 08:27:35 UTC 2023


On 30/03/2023 22:46, Alex Deucher wrote:
> On Wed, Mar 29, 2023 at 11:48 AM Shashank Sharma
> <shashank.sharma at amd.com> wrote:
>> This patch:
>> - adds a doorbell manager structure in kfd device structure.
>> - plugs-in doorbell manager APIs for KFD kernel doorbell allocations
>>    an free functions.
>> - removes the doorbell bitmap, uses the one into the doorbell manager
>>    structure for all the allocations.
>> - updates the get_kernel_doorbell and free_kernel_doorbell functions
>>    accordingly
>>
>> Cc: Alex Deucher <alexander.deucher at amd.com>
>> Cc: Christian Koenig <christian.koenig at amd.com>
>> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_device.c   |   4 +-
>>   drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 109 ++++++----------------
>>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h     |   3 +
>>   3 files changed, 35 insertions(+), 81 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> index b8936340742b..a2e4cbddba26 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> @@ -435,8 +435,8 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
>>          atomic_set(&kfd->compute_profile, 0);
>>
>>          mutex_init(&kfd->doorbell_mutex);
>> -       memset(&kfd->doorbell_available_index, 0,
>> -               sizeof(kfd->doorbell_available_index));
>> +       memset(kfd->kernel_doorbells.doorbell_bitmap, 0,
>> +              kfd->kernel_doorbells.size / BITS_PER_LONG);
>>
>>          atomic_set(&kfd->sram_ecc_flag, 0);
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
>> index cd4e61bf0493..df259f2cc58a 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
>> @@ -61,81 +61,37 @@ size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
>>   /* Doorbell calculations for device init. */
>>   int kfd_doorbell_init(struct kfd_dev *kfd)
>>   {
>> -       size_t doorbell_start_offset;
>> -       size_t doorbell_aperture_size;
>> -       size_t doorbell_process_limit;
>> +       int r;
>> +       struct amdgpu_doorbell_obj *kernel_doorbells = &kfd->kernel_doorbells;
>>
>> -       /*
>> -        * With MES enabled, just set the doorbell base as it is needed
>> -        * to calculate doorbell physical address.
>> -        */
>> -       if (kfd->shared_resources.enable_mes) {
>> -               kfd->doorbell_base =
>> -                       kfd->shared_resources.doorbell_physical_address;
>> -               return 0;
>> -       }
>> -
>> -       /*
>> -        * We start with calculations in bytes because the input data might
>> -        * only be byte-aligned.
>> -        * Only after we have done the rounding can we assume any alignment.
>> -        */
>> -
>> -       doorbell_start_offset =
>> -                       roundup(kfd->shared_resources.doorbell_start_offset,
>> -                                       kfd_doorbell_process_slice(kfd));
>> -
>> -       doorbell_aperture_size =
>> -                       rounddown(kfd->shared_resources.doorbell_aperture_size,
>> -                                       kfd_doorbell_process_slice(kfd));
>> -
>> -       if (doorbell_aperture_size > doorbell_start_offset)
>> -               doorbell_process_limit =
>> -                       (doorbell_aperture_size - doorbell_start_offset) /
>> -                                               kfd_doorbell_process_slice(kfd);
>> -       else
>> -               return -ENOSPC;
>> -
>> -       if (!kfd->max_doorbell_slices ||
>> -           doorbell_process_limit < kfd->max_doorbell_slices)
>> -               kfd->max_doorbell_slices = doorbell_process_limit;
>> -
>> -       kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
>> -                               doorbell_start_offset;
>> -
>> -       kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
>> -
>> -       kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
>> -                                          kfd_doorbell_process_slice(kfd));
>> -
>> -       if (!kfd->doorbell_kernel_ptr)
>> +       /* Bitmap to dynamically allocate doorbells from kernel page */
>> +       kernel_doorbells->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE, GFP_KERNEL);
>> +       if (!kernel_doorbells->doorbell_bitmap) {
>> +               DRM_ERROR("Failed to allocate kernel doorbell bitmap\n");
>>                  return -ENOMEM;
>> +       }
>>
>> -       pr_debug("Doorbell initialization:\n");
>> -       pr_debug("doorbell base           == 0x%08lX\n",
>> -                       (uintptr_t)kfd->doorbell_base);
>> -
>> -       pr_debug("doorbell_base_dw_offset      == 0x%08lX\n",
>> -                       kfd->doorbell_base_dw_offset);
>> -
>> -       pr_debug("doorbell_process_limit  == 0x%08lX\n",
>> -                       doorbell_process_limit);
>> -
>> -       pr_debug("doorbell_kernel_offset  == 0x%08lX\n",
>> -                       (uintptr_t)kfd->doorbell_base);
>> -
>> -       pr_debug("doorbell aperture size  == 0x%08lX\n",
>> -                       kfd->shared_resources.doorbell_aperture_size);
>> +       /* Alloc and reserve doorbells for KFD kernel usages */
>> +       kernel_doorbells->size = PAGE_SIZE;
>> +       r = amdgpu_doorbell_alloc_page(kfd->adev, kernel_doorbells);
>
> Just do something like:
> r = amdgpu_bo_create_kernel(kfd->adev, PAGE_SIZE, PAGE_SIZE,
> AMDGPU_GEM_DOMAIN_DOORBELL,
>
> &kfd->doorbell_kernel_bo, NULL, &kfd->doorbell_kernel_ptr);
>
> Then you have your KFD pointer to its doorbell memory and no need to
> track the ranges.

Noted,

- Shashank

>
> Alex
>
>
>> +       if (r) {
>> +               pr_err("failed to allocate kernel doorbells\n");
>> +               bitmap_free(kernel_doorbells->doorbell_bitmap);
>> +               return r;
>> +       }
>>
>> -       pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
>> +       kfd->doorbell_kernel_ptr = kernel_doorbells->cpu_addr;
>> +       pr_debug("Doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
>>
>>          return 0;
>>   }
>>
>>   void kfd_doorbell_fini(struct kfd_dev *kfd)
>>   {
>> -       if (kfd->doorbell_kernel_ptr)
>> -               iounmap(kfd->doorbell_kernel_ptr);
>> +       struct amdgpu_doorbell_obj *kernel_doorbells = &kfd->kernel_doorbells;
>> +
>> +       bitmap_free(kernel_doorbells->doorbell_bitmap);
>> +       amdgpu_doorbell_free_page(kfd->adev, kernel_doorbells);
>>   }
>>
>>   int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
>> @@ -186,24 +142,19 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
>>                                          unsigned int *doorbell_off)
>>   {
>>          u32 inx;
>> +       struct amdgpu_doorbell_obj *kernel_doorbells = &kfd->kernel_doorbells;
>>
>>          mutex_lock(&kfd->doorbell_mutex);
>> -       inx = find_first_zero_bit(kfd->doorbell_available_index,
>> -                                       KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
>> +       inx = find_first_zero_bit(kernel_doorbells->doorbell_bitmap,
>> +                                 kernel_doorbells->size);
>>
>> -       __set_bit(inx, kfd->doorbell_available_index);
>> +       __set_bit(inx, kernel_doorbells->doorbell_bitmap);
>>          mutex_unlock(&kfd->doorbell_mutex);
>>
>>          if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
>>                  return NULL;
>>
>> -       inx *= kfd->device_info.doorbell_size / sizeof(u32);
>> -
>> -       /*
>> -        * Calculating the kernel doorbell offset using the first
>> -        * doorbell page.
>> -        */
>> -       *doorbell_off = kfd->doorbell_base_dw_offset + inx;
>> +       *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kernel_doorbells->bo, inx);
>>
>>          pr_debug("Get kernel queue doorbell\n"
>>                          "     doorbell offset   == 0x%08X\n"
>> @@ -216,12 +167,12 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
>>   void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
>>   {
>>          unsigned int inx;
>> +       struct amdgpu_doorbell_obj *kernel_doorbells = &kfd->kernel_doorbells;
>>
>> -       inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
>> -               * sizeof(u32) / kfd->device_info.doorbell_size;
>> +       inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
>>
>>          mutex_lock(&kfd->doorbell_mutex);
>> -       __clear_bit(inx, kfd->doorbell_available_index);
>> +       __clear_bit(inx, kernel_doorbells->doorbell_bitmap);
>>          mutex_unlock(&kfd->doorbell_mutex);
>>   }
>>
>> @@ -280,7 +231,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
>>          if (!pdd->doorbell_index) {
>>                  int r = kfd_alloc_process_doorbells(pdd->dev,
>>                                                      &pdd->doorbell_index);
>> -               if (r)
>> +               if (r < 0)
>>                          return 0;
>>          }
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index 552c3ac85a13..0ed33416c35f 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -346,6 +346,9 @@ struct kfd_dev {
>>
>>          /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
>>          struct dev_pagemap pgmap;
>> +
>> +       /* Kernel doorbells for KFD device */
>> +       struct amdgpu_doorbell_obj kernel_doorbells;
>>   };
>>
>>   enum kfd_mempool {
>> --
>> 2.40.0
>>


More information about the amd-gfx mailing list