[PATCH v2 08/12] drm/amdgpu: use doorbell manager for kfd kernel doorbells

Shashank Sharma shashank.sharma at amd.com
Thu Apr 13 13:08:44 UTC 2023


Hey Christian,

On 13/04/2023 13:02, Christian König wrote:
> Am 12.04.23 um 18:25 schrieb Shashank Sharma:
>> This patch:
>> - adds a doorbell bo in kfd device structure.
>> - creates doorbell page for kfd kernel usages.
>> - updates the get_kernel_doorbell and free_kernel_doorbell functions
>>    accordingly
>>
>> V2: Do not use wrapper API, use direct amdgpu_create_kernel(Alex)
>>
>> Cc: Alex Deucher <alexander.deucher at amd.com>
>> Cc: Christian Koenig <christian.koenig at amd.com>
>> Cc: Felix Kuehling <Felix.Kuehling at amd.com>
>> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_device.c   |   2 -
>>   drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 110 ++++++----------------
>>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h     |   6 ++
>>   3 files changed, 36 insertions(+), 82 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> index b8936340742b..49e3c4e021af 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> @@ -435,8 +435,6 @@ struct kfd_dev *kgd2kfd_probe(struct 
>> amdgpu_device *adev, bool vf)
>>       atomic_set(&kfd->compute_profile, 0);
>>         mutex_init(&kfd->doorbell_mutex);
>> -    memset(&kfd->doorbell_available_index, 0,
>> -        sizeof(kfd->doorbell_available_index));
>>         atomic_set(&kfd->sram_ecc_flag, 0);
>>   diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
>> index cd4e61bf0493..82b4a56b0afc 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
>> @@ -61,81 +61,39 @@ size_t kfd_doorbell_process_slice(struct kfd_dev 
>> *kfd)
>>   /* Doorbell calculations for device init. */
>>   int kfd_doorbell_init(struct kfd_dev *kfd)
>>   {
>> -    size_t doorbell_start_offset;
>> -    size_t doorbell_aperture_size;
>> -    size_t doorbell_process_limit;
>> +    int r;
>> +    int size = PAGE_SIZE;
>
> It's usually good practice to declare variables like "r" and "i" last. 
> Some upstream maintainers even require reverse xmas tree here.
Noted, will change it.
>
>>   -    /*
>> -     * With MES enabled, just set the doorbell base as it is needed
>> -     * to calculate doorbell physical address.
>> -     */
>> -    if (kfd->shared_resources.enable_mes) {
>> -        kfd->doorbell_base =
>> -            kfd->shared_resources.doorbell_physical_address;
>> -        return 0;
>> -    }
>> -
>> -    /*
>> -     * We start with calculations in bytes because the input data might
>> -     * only be byte-aligned.
>> -     * Only after we have done the rounding can we assume any 
>> alignment.
>> -     */
>> -
>> -    doorbell_start_offset =
>> - roundup(kfd->shared_resources.doorbell_start_offset,
>> -                    kfd_doorbell_process_slice(kfd));
>> -
>> -    doorbell_aperture_size =
>> - rounddown(kfd->shared_resources.doorbell_aperture_size,
>> -                    kfd_doorbell_process_slice(kfd));
>> -
>> -    if (doorbell_aperture_size > doorbell_start_offset)
>> -        doorbell_process_limit =
>> -            (doorbell_aperture_size - doorbell_start_offset) /
>> -                        kfd_doorbell_process_slice(kfd);
>> -    else
>> -        return -ENOSPC;
>> -
>> -    if (!kfd->max_doorbell_slices ||
>> -        doorbell_process_limit < kfd->max_doorbell_slices)
>> -        kfd->max_doorbell_slices = doorbell_process_limit;
>> -
>> -    kfd->doorbell_base = 
>> kfd->shared_resources.doorbell_physical_address +
>> -                doorbell_start_offset;
>> -
>> -    kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
>> -
>> -    kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
>> -                       kfd_doorbell_process_slice(kfd));
>> -
>> -    if (!kfd->doorbell_kernel_ptr)
>> +    /* Bitmap to dynamically allocate doorbells from kernel page */
>> +    kfd->doorbell_bitmap = bitmap_zalloc(size / sizeof(u32), 
>> GFP_KERNEL);
>> +    if (!kfd->doorbell_bitmap) {
>> +        DRM_ERROR("Failed to allocate kernel doorbell bitmap\n");
>>           return -ENOMEM;
>> +    }
>>   -    pr_debug("Doorbell initialization:\n");
>> -    pr_debug("doorbell base           == 0x%08lX\n",
>> -            (uintptr_t)kfd->doorbell_base);
>> -
>> -    pr_debug("doorbell_base_dw_offset      == 0x%08lX\n",
>> -            kfd->doorbell_base_dw_offset);
>> -
>> -    pr_debug("doorbell_process_limit  == 0x%08lX\n",
>> -            doorbell_process_limit);
>> -
>> -    pr_debug("doorbell_kernel_offset  == 0x%08lX\n",
>> -            (uintptr_t)kfd->doorbell_base);
>> -
>> -    pr_debug("doorbell aperture size  == 0x%08lX\n",
>> -            kfd->shared_resources.doorbell_aperture_size);
>> -
>> -    pr_debug("doorbell kernel address == %p\n", 
>> kfd->doorbell_kernel_ptr);
>> +    /* Alloc a doorbell page for KFD kernel usages */
>> +    r = amdgpu_bo_create_kernel(kfd->adev,
>> +                    size,
>> +                    PAGE_SIZE,
>> +                    AMDGPU_GEM_DOMAIN_DOORBELL,
>> +                    &kfd->doorbells,
>> +                    NULL,
>> +                    (void **)&kfd->doorbell_kernel_ptr);
>> +    if (r) {
>> +        pr_err("failed to allocate kernel doorbells\n");
>> +        bitmap_free(kfd->doorbell_bitmap);
>> +        return r;
>> +    }
>>   +    pr_debug("Doorbell kernel address == %p\n", 
>> kfd->doorbell_kernel_ptr);
>>       return 0;
>>   }
>>     void kfd_doorbell_fini(struct kfd_dev *kfd)
>>   {
>> -    if (kfd->doorbell_kernel_ptr)
>> -        iounmap(kfd->doorbell_kernel_ptr);
>> +    bitmap_free(kfd->doorbell_bitmap);
>> +    amdgpu_bo_free_kernel(&kfd->doorbells, NULL,
>> +                 (void **)&kfd->doorbell_kernel_ptr);
>>   }
>>     int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process 
>> *process,
>> @@ -188,22 +146,15 @@ void __iomem *kfd_get_kernel_doorbell(struct 
>> kfd_dev *kfd,
>>       u32 inx;
>>         mutex_lock(&kfd->doorbell_mutex);
>> -    inx = find_first_zero_bit(kfd->doorbell_available_index,
>> -                    KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
>> +    inx = find_first_zero_bit(kfd->doorbell_bitmap, PAGE_SIZE / 
>> sizeof(u32));
>>   -    __set_bit(inx, kfd->doorbell_available_index);
>> +    __set_bit(inx, kfd->doorbell_bitmap);
>>       mutex_unlock(&kfd->doorbell_mutex);
>>         if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
>>           return NULL;
>>   -    inx *= kfd->device_info.doorbell_size / sizeof(u32);
>> -
>> -    /*
>> -     * Calculating the kernel doorbell offset using the first
>> -     * doorbell page.
>> -     */
>> -    *doorbell_off = kfd->doorbell_base_dw_offset + inx;
>> +    *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, 
>> kfd->doorbells, inx);
>>         pr_debug("Get kernel queue doorbell\n"
>>               "     doorbell offset   == 0x%08X\n"
>> @@ -217,11 +168,10 @@ void kfd_release_kernel_doorbell(struct kfd_dev 
>> *kfd, u32 __iomem *db_addr)
>>   {
>>       unsigned int inx;
>>   -    inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
>> -        * sizeof(u32) / kfd->device_info.doorbell_size;
>> +    inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
>>         mutex_lock(&kfd->doorbell_mutex);
>> -    __clear_bit(inx, kfd->doorbell_available_index);
>> +    __clear_bit(inx, kfd->doorbell_bitmap);
>>       mutex_unlock(&kfd->doorbell_mutex);
>>   }
>>   @@ -280,7 +230,7 @@ phys_addr_t kfd_get_process_doorbells(struct 
>> kfd_process_device *pdd)
>>       if (!pdd->doorbell_index) {
>>           int r = kfd_alloc_process_doorbells(pdd->dev,
>>                               &pdd->doorbell_index);
>> -        if (r)
>> +        if (r < 0)
>>               return 0;
>>       }
>>   diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index 552c3ac85a13..0b199eb6dc88 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -346,6 +346,12 @@ struct kfd_dev {
>>         /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
>>       struct dev_pagemap pgmap;
>> +
>> +    /* Kernel doorbells for KFD device */
>> +    struct amdgpu_bo *doorbells;
>> +
>> +    /* bitmap for dynamic doorbell allocation from this object */
>> +    unsigned long *doorbell_bitmap;
>
> When doorbell_available_index is now not longer used you should 
> probably remove it as well.

Actually, KFD used two levels of bitmaps to allocate doorbell:

- first level of bitmap to assign a pool of 1024 doorbells to a client 
from big doorbell pool,

- second level of bitmap to allocate one doorbell to the queue from the 
1024 doorbells of the clients.

In the new design, we are allocating and saving a page of doorbells for 
KFD kernel, but we still need one bitmap to dynamically allocate and 
assign single doorbell from the page, both at process and kernel level. 
So doorbell_bitmap is replacement for doorbell_available_index in kfd, 
to make code symmetrical everywhere (kfd kernel, mes kernel and kfd 
process).

- Shashank

>
>>   };
>>     enum kfd_mempool {
>


More information about the amd-gfx mailing list