[PATCH] drm/amdkfd: Move process doorbell allocation into kfd device
Felix Kuehling
felix.kuehling at amd.com
Tue Sep 15 05:03:29 UTC 2020
Am 2020-09-01 um 8:21 p.m. schrieb Mukul Joshi:
> Move doorbell allocation for a process into kfd device and
> allocate doorbell space in each PDD during process creation.
> Currently, KFD manages its own doorbell space but for some
> devices, amdgpu would allocate the complete doorbell
> space instead of leaving a chunk of doorbell space for KFD to
> manage. In a system with mix of such devices, KFD would need
> to request process doorbell space based on the type of device,
> either from amdgpu or from its own doorbell space.
>
> Signed-off-by: Mukul Joshi <mukul.joshi at amd.com>
Two nit-picks inline. With those fixed, the patch is
Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 30 +++++++++------
> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 ++
> .../drm/amd/amdkfd/kfd_device_queue_manager.c | 3 +-
> drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 37 ++++++++++---------
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 17 ++++++---
> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 21 ++++++-----
> 6 files changed, 64 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index b7b16adb0615..b23caa78328b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1290,18 +1290,6 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
> return -EINVAL;
> }
>
> - if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
> - if (args->size != kfd_doorbell_process_slice(dev))
> - return -EINVAL;
> - offset = kfd_get_process_doorbells(dev, p);
> - } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
> - if (args->size != PAGE_SIZE)
> - return -EINVAL;
> - offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
> - if (!offset)
> - return -ENOMEM;
> - }
> -
> mutex_lock(&p->mutex);
>
> pdd = kfd_bind_process_to_device(dev, p);
> @@ -1310,6 +1298,24 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
> goto err_unlock;
> }
>
> + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
> + if (args->size != kfd_doorbell_process_slice(dev)) {
> + err = -EINVAL;
> + goto err_unlock;
> + }
> + offset = kfd_get_process_doorbells(dev, pdd);
> + } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
> + if (args->size != PAGE_SIZE) {
> + err = -EINVAL;
> + goto err_unlock;
> + }
> + offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
> + if (!offset) {
> + err = -ENOMEM;
> + goto err_unlock;
> + }
> + }
> +
> err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
> dev->kgd, args->va_addr, args->size,
> pdd->vm, (struct kgd_mem **) &mem, &offset,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 0e71a0543f98..a857282f3d09 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -583,6 +583,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
>
> atomic_set(&kfd->sram_ecc_flag, 0);
>
> + ida_init(&kfd->doorbell_ida);
> +
> return kfd;
> }
>
> @@ -798,6 +800,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
> kfd_interrupt_exit(kfd);
> kfd_topology_remove_device(kfd);
> kfd_doorbell_fini(kfd);
> + ida_destroy(&kfd->doorbell_ida);
> kfd_gtt_sa_fini(kfd);
> amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
> if (kfd->gws)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 560adc57a050..b9d1359c6fe0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -191,9 +191,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
> }
>
> q->properties.doorbell_off =
> - kfd_get_doorbell_dw_offset_in_bar(dev, q->process,
> + kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
> q->doorbell_id);
> -
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> index 8e0c00b9555e..5946bfb6b75c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> @@ -31,9 +31,6 @@
> * kernel queues using the first doorbell page reserved for the kernel.
> */
>
> -static DEFINE_IDA(doorbell_ida);
> -static unsigned int max_doorbell_slices;
> -
> /*
> * Each device exposes a doorbell aperture, a PCI MMIO aperture that
> * receives 32-bit writes that are passed to queues as wptr values.
> @@ -84,9 +81,9 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
> else
> return -ENOSPC;
>
> - if (!max_doorbell_slices ||
> - doorbell_process_limit < max_doorbell_slices)
> - max_doorbell_slices = doorbell_process_limit;
> + if (!kfd->max_doorbell_slices ||
> + doorbell_process_limit < kfd->max_doorbell_slices)
> + kfd->max_doorbell_slices = doorbell_process_limit;
>
> kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
> doorbell_start_offset;
> @@ -130,6 +127,7 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
> struct vm_area_struct *vma)
> {
> phys_addr_t address;
> + struct kfd_process_device *pdd;
>
> /*
> * For simplicitly we only allow mapping of the entire doorbell
> @@ -138,9 +136,12 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
> if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
> return -EINVAL;
>
> - /* Calculate physical address of doorbell */
> - address = kfd_get_process_doorbells(dev, process);
> + pdd = kfd_get_process_device_data(dev, process);
> + if (!pdd)
> + return -EINVAL;
>
> + /* Calculate physical address of doorbell */
> + address = kfd_get_process_doorbells(dev, pdd);
> vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
> VM_DONTDUMP | VM_PFNMAP;
>
> @@ -226,7 +227,7 @@ void write_kernel_doorbell64(void __iomem *db, u64 value)
> }
>
> unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
> - struct kfd_process *process,
> + struct kfd_process_device *pdd,
> unsigned int doorbell_id)
> {
> /*
> @@ -236,7 +237,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
> * units regardless of the ASIC-dependent doorbell size.
> */
> return kfd->doorbell_base_dw_offset +
> - process->doorbell_index
> + pdd->doorbell_index
> * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
> doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
> }
> @@ -252,24 +253,24 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
> }
>
> phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
> - struct kfd_process *process)
> + struct kfd_process_device *pdd)
> {
> return dev->doorbell_base +
> - process->doorbell_index * kfd_doorbell_process_slice(dev);
> + pdd->doorbell_index * kfd_doorbell_process_slice(dev);
If you already have a pdd parameter, the dev parameter is redundant.
Just use pdd->dev.
> }
>
> -int kfd_alloc_process_doorbells(struct kfd_process *process)
> +int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
> {
> - int r = ida_simple_get(&doorbell_ida, 1, max_doorbell_slices,
> + int r = ida_simple_get(&kfd->doorbell_ida, 1, kfd->max_doorbell_slices,
> GFP_KERNEL);
> if (r > 0)
> - process->doorbell_index = r;
> + *doorbell_index = r;
>
> return r;
> }
>
> -void kfd_free_process_doorbells(struct kfd_process *process)
> +void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
Doorbell index doesn't need to be a pointer here, because it's an input
parameter. Just pass the index by value.
Regards,
Felix
> {
> - if (process->doorbell_index)
> - ida_simple_remove(&doorbell_ida, process->doorbell_index);
> + if (*doorbell_index)
> + ida_simple_remove(&kfd->doorbell_ida, *doorbell_index);
> }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 023629f28495..93b51de14aa0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -314,6 +314,9 @@ struct kfd_dev {
> spinlock_t smi_lock;
>
> uint32_t reset_seq_num;
> +
> + struct ida doorbell_ida;
> + unsigned int max_doorbell_slices;
> };
>
> enum kfd_mempool {
> @@ -692,6 +695,8 @@ struct kfd_process_device {
> uint64_t sdma_past_activity_counter;
> struct attribute attr_sdma;
> char sdma_filename[MAX_SYSFS_FILENAME_LEN];
> +
> + unsigned int doorbell_index;
> };
>
> #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
> @@ -729,7 +734,6 @@ struct kfd_process {
> struct mmu_notifier mmu_notifier;
>
> uint16_t pasid;
> - unsigned int doorbell_index;
>
> /*
> * List of kfd_process_device structures,
> @@ -862,13 +866,14 @@ u32 read_kernel_doorbell(u32 __iomem *db);
> void write_kernel_doorbell(void __iomem *db, u32 value);
> void write_kernel_doorbell64(void __iomem *db, u64 value);
> unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
> - struct kfd_process *process,
> + struct kfd_process_device *pdd,
> unsigned int doorbell_id);
> phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
> - struct kfd_process *process);
> -int kfd_alloc_process_doorbells(struct kfd_process *process);
> -void kfd_free_process_doorbells(struct kfd_process *process);
> -
> + struct kfd_process_device *pdd);
> +int kfd_alloc_process_doorbells(struct kfd_dev *kfd,
> + unsigned int *doorbell_index);
> +void kfd_free_process_doorbells(struct kfd_dev *kfd,
> + unsigned int *doorbell_index);
> /* GTT Sub-Allocator */
>
> int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index a0e12a79ab7d..28bbdebb4703 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -781,6 +781,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
> kfree(pdd->qpd.doorbell_bitmap);
> idr_destroy(&pdd->alloc_idr);
>
> + kfd_free_process_doorbells(pdd->dev, &pdd->doorbell_index);
> +
> /*
> * before destroying pdd, make sure to report availability
> * for auto suspend
> @@ -833,8 +835,6 @@ static void kfd_process_wq_release(struct work_struct *work)
> kfd_event_free_process(p);
>
> kfd_pasid_free(p->pasid);
> - kfd_free_process_doorbells(p);
> -
> mutex_destroy(&p->mutex);
>
> put_task_struct(p->lead_thread);
> @@ -1012,9 +1012,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
> if (process->pasid == 0)
> goto err_alloc_pasid;
>
> - if (kfd_alloc_process_doorbells(process) < 0)
> - goto err_alloc_doorbells;
> -
> err = pqm_init(&process->pqm, process);
> if (err != 0)
> goto err_process_pqm_init;
> @@ -1042,8 +1039,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
> err_init_apertures:
> pqm_uninit(&process->pqm);
> err_process_pqm_init:
> - kfd_free_process_doorbells(process);
> -err_alloc_doorbells:
> kfd_pasid_free(process->pasid);
> err_alloc_pasid:
> mutex_destroy(&process->mutex);
> @@ -1106,10 +1101,14 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
> if (!pdd)
> return NULL;
>
> + if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
> + pr_err("Failed to alloc doorbell for pdd\n");
> + goto err_free_pdd;
> + }
> +
> if (init_doorbell_bitmap(&pdd->qpd, dev)) {
> pr_err("Failed to init doorbell for process\n");
> - kfree(pdd);
> - return NULL;
> + goto err_free_pdd;
> }
>
> pdd->dev = dev;
> @@ -1131,6 +1130,10 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
> idr_init(&pdd->alloc_idr);
>
> return pdd;
> +
> +err_free_pdd:
> + kfree(pdd);
> + return NULL;
> }
>
> /**
More information about the amd-gfx
mailing list