[PATCH v2 09/12] drm/amdgpu: use doorbell manager for kfd process doorbells
Christian König
ckoenig.leichtzumerken at gmail.com
Thu Apr 13 11:07:16 UTC 2023
Am 12.04.23 um 18:25 schrieb Shashank Sharma:
> This patch:
> - adds a doorbell object in kfd pdd structure.
> - allocates doorbells for a process while creating its pdd.
> - frees the doorbells with pdd destroy.
> - removes previous calls to allocate process doorbells as
> its not required anymore.
>
> PS: This patch ensures that we don't break the existing KFD
> functionality, but now KFD userspace library should also
> create doorbell pages as AMDGPU GEM objects using libdrm
> functions in userspace. The reference code for the same
> is available with AMDGPU Usermode queue libdrm MR. Once
> this is done, we will not need to create process doorbells
> in kernel.
>
> V2: - Do not use doorbell wrapper API, use amdgpu_bo_create_kernel
> instead (Alex).
> - Do not use custom doorbell structure, instead use separate
> variables for bo and doorbell_bitmap (Alex)
>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Cc: Christian Koenig <christian.koenig at amd.com>
> Cc: Felix Kuehling <Felix.Kuehling at amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
Felix needs to take a look at this, I only understand halve of what's
going on here.
Feel free to add an Acked-by: Christian König <christian.koenig at amd.com>
cause the halve I understand looks good.
Regards,
Christian.
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 13 ----
> .../drm/amd/amdkfd/kfd_device_queue_manager.c | 8 +--
> drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 65 ++++++++++---------
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 9 ++-
> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 21 +++---
> .../amd/amdkfd/kfd_process_queue_manager.c | 22 +++----
> 6 files changed, 64 insertions(+), 74 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 6d291aa6386b..0e40756417e5 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -327,12 +327,6 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
> goto err_bind_process;
> }
>
> - if (!pdd->doorbell_index &&
> - kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
> - err = -ENOMEM;
> - goto err_alloc_doorbells;
> - }
> -
> /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
> * on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
> */
> @@ -410,7 +404,6 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
> if (wptr_bo)
> amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo);
> err_wptr_map_gart:
> -err_alloc_doorbells:
> err_bind_process:
> err_pdd:
> mutex_unlock(&p->mutex);
> @@ -2163,12 +2156,6 @@ static int criu_restore_devices(struct kfd_process *p,
> ret = PTR_ERR(pdd);
> goto exit;
> }
> -
> - if (!pdd->doorbell_index &&
> - kfd_alloc_process_doorbells(pdd->dev, &pdd->doorbell_index) < 0) {
> - ret = -ENOMEM;
> - goto exit;
> - }
> }
>
> /*
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index ecb4c3abc629..855bf8ce3f16 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -371,7 +371,7 @@ static int allocate_doorbell(struct qcm_process_device *qpd,
> unsigned int found;
>
> found = find_first_zero_bit(qpd->doorbell_bitmap,
> - KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
> + KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
> if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
> pr_debug("No doorbells available");
> return -EBUSY;
> @@ -381,9 +381,9 @@ static int allocate_doorbell(struct qcm_process_device *qpd,
> }
> }
>
> - q->properties.doorbell_off =
> - kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
> - q->doorbell_id);
> + q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev,
> + qpd->proc_doorbells,
> + q->doorbell_id);
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> index 82b4a56b0afc..718cfe9cb4f5 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> @@ -227,46 +227,47 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
>
> phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
> {
> - if (!pdd->doorbell_index) {
> - int r = kfd_alloc_process_doorbells(pdd->dev,
> - &pdd->doorbell_index);
> - if (r < 0)
> - return 0;
> - }
> + struct amdgpu_device *adev = pdd->dev->adev;
> + uint32_t first_db_index;
>
> - return pdd->dev->doorbell_base +
> - pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev);
> + first_db_index = amdgpu_doorbell_index_on_bar(adev, pdd->qpd.proc_doorbells, 0);
> + return adev->doorbell.base + first_db_index * sizeof(uint32_t);
> }
>
> -int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
> +int kfd_alloc_process_doorbells(struct kfd_dev *kfd, struct kfd_process_device *pdd)
> {
> - int r = 0;
> -
> - if (!kfd->shared_resources.enable_mes)
> - r = ida_simple_get(&kfd->doorbell_ida, 1,
> - kfd->max_doorbell_slices, GFP_KERNEL);
> - else
> - r = amdgpu_mes_alloc_process_doorbells(
> - (struct amdgpu_device *)kfd->adev,
> - doorbell_index);
> + int r;
> + struct qcm_process_device *qpd = &pdd->qpd;
>
> - if (r > 0)
> - *doorbell_index = r;
> + /* Allocate bitmap for dynamic doorbell allocation */
> + qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
> + GFP_KERNEL);
> + if (!qpd->doorbell_bitmap) {
> + DRM_ERROR("Failed to allocate process doorbell bitmap\n");
> + return -ENOMEM;
> + }
>
> - if (r < 0)
> - pr_err("Failed to allocate process doorbells\n");
> + /* Allocate doorbells for this process */
> + r = amdgpu_bo_create_kernel(kfd->adev,
> + kfd_doorbell_process_slice(kfd),
> + PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_DOORBELL,
> + &qpd->proc_doorbells,
> + NULL,
> + NULL);
> + if (r) {
> + DRM_ERROR("Failed to allocate process doorbells\n");
> + bitmap_free(qpd->doorbell_bitmap);
> + return r;
> + }
>
> - return r;
> + return 0;
> }
>
> -void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
> +void kfd_free_process_doorbells(struct kfd_dev *kfd, struct kfd_process_device *pdd)
> {
> - if (doorbell_index) {
> - if (!kfd->shared_resources.enable_mes)
> - ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
> - else
> - amdgpu_mes_free_process_doorbells(
> - (struct amdgpu_device *)kfd->adev,
> - doorbell_index);
> - }
> + struct qcm_process_device *qpd = &pdd->qpd;
> +
> + bitmap_free(qpd->doorbell_bitmap);
> + amdgpu_bo_free_kernel(&qpd->proc_doorbells, NULL, NULL);
> }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 0b199eb6dc88..dfff77379acb 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -661,7 +661,10 @@ struct qcm_process_device {
> uint64_t ib_base;
> void *ib_kaddr;
>
> - /* doorbell resources per process per device */
> + /* doorbells for kfd process */
> + struct amdgpu_bo *proc_doorbells;
> +
> + /* bitmap for dynamic doorbell allocation from the bo */
> unsigned long *doorbell_bitmap;
> };
>
> @@ -1009,9 +1012,9 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
> unsigned int doorbell_id);
> phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd);
> int kfd_alloc_process_doorbells(struct kfd_dev *kfd,
> - unsigned int *doorbell_index);
> + struct kfd_process_device *pdd);
> void kfd_free_process_doorbells(struct kfd_dev *kfd,
> - unsigned int doorbell_index);
> + struct kfd_process_device *pdd);
> /* GTT Sub-Allocator */
>
> int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 51b1683ac5c1..217ff72a97b0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -1037,10 +1037,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
> free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
> get_order(KFD_CWSR_TBA_TMA_SIZE));
>
> - bitmap_free(pdd->qpd.doorbell_bitmap);
> idr_destroy(&pdd->alloc_idr);
>
> - kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
> + kfd_free_process_doorbells(pdd->dev, pdd);
>
> if (pdd->dev->shared_resources.enable_mes)
> amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
> @@ -1453,11 +1452,6 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,
> if (!KFD_IS_SOC15(dev))
> return 0;
>
> - qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
> - GFP_KERNEL);
> - if (!qpd->doorbell_bitmap)
> - return -ENOMEM;
> -
> /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
> pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
> pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
> @@ -1499,9 +1493,15 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
> if (!pdd)
> return NULL;
>
> + retval = kfd_alloc_process_doorbells(dev, pdd);
> + if (retval) {
> + pr_err("failed to allocate process doorbells\n");
> + goto err_free_pdd;
> + }
> +
> if (init_doorbell_bitmap(&pdd->qpd, dev)) {
> pr_err("Failed to init doorbell for process\n");
> - goto err_free_pdd;
> + goto err_free_db;
> }
>
> pdd->dev = dev;
> @@ -1529,7 +1529,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
> false);
> if (retval) {
> pr_err("failed to allocate process context bo\n");
> - goto err_free_pdd;
> + goto err_free_db;
> }
> memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
> }
> @@ -1541,6 +1541,9 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
>
> return pdd;
>
> +err_free_db:
> + kfd_free_process_doorbells(pdd->dev, pdd);
> +
> err_free_pdd:
> kfree(pdd);
> return NULL;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> index 5137476ec18e..6c95586f08a6 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> @@ -344,17 +344,19 @@ int pqm_create_queue(struct process_queue_manager *pqm,
> goto err_create_queue;
> }
>
> - if (q && p_doorbell_offset_in_process)
> + if (q && p_doorbell_offset_in_process) {
> /* Return the doorbell offset within the doorbell page
> * to the caller so it can be passed up to user mode
> * (in bytes).
> - * There are always 1024 doorbells per process, so in case
> - * of 8-byte doorbells, there are two doorbell pages per
> - * process.
> + * relative doorbell index = Absolute doorbell index -
> + * absolute index of first doorbell in the page.
> */
> - *p_doorbell_offset_in_process =
> - (q->properties.doorbell_off * sizeof(uint32_t)) &
> - (kfd_doorbell_process_slice(dev) - 1);
> + uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev,
> + pdd->qpd.proc_doorbells, 0);
> +
> + *p_doorbell_offset_in_process = (q->properties.doorbell_off
> + - first_db_index) * sizeof(uint32_t);
> + }
>
> pr_debug("PQM After DQM create queue\n");
>
> @@ -858,12 +860,6 @@ int kfd_criu_restore_queue(struct kfd_process *p,
> goto exit;
> }
>
> - if (!pdd->doorbell_index &&
> - kfd_alloc_process_doorbells(pdd->dev, &pdd->doorbell_index) < 0) {
> - ret = -ENOMEM;
> - goto exit;
> - }
> -
> /* data stored in this order: mqd, ctl_stack */
> mqd = q_extra_data;
> ctl_stack = mqd + q_data->mqd_size;
More information about the amd-gfx
mailing list