[PATCH] drm/amdkfd: Move process doorbell allocation into kfd device

Felix Kuehling felix.kuehling at amd.com
Tue Sep 15 05:03:29 UTC 2020


Am 2020-09-01 um 8:21 p.m. schrieb Mukul Joshi:
> Move doorbell allocation for a process into kfd device and
> allocate doorbell space in each PDD during process creation.
> Currently, KFD manages its own doorbell space but for some
> devices, amdgpu would allocate the complete doorbell
> space instead of leaving a chunk of doorbell space for KFD to
> manage. In a system with mix of such devices, KFD would need
> to request process doorbell space based on the type of device,
> either from amdgpu or from its own doorbell space.
>
> Signed-off-by: Mukul Joshi <mukul.joshi at amd.com>

Two nit-picks inline. With those fixed, the patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>


> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 30 +++++++++------
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c       |  3 ++
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  3 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c     | 37 ++++++++++---------
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h         | 17 ++++++---
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c      | 21 ++++++-----
>  6 files changed, 64 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index b7b16adb0615..b23caa78328b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1290,18 +1290,6 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
>  		return -EINVAL;
>  	}
>  
> -	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
> -		if (args->size != kfd_doorbell_process_slice(dev))
> -			return -EINVAL;
> -		offset = kfd_get_process_doorbells(dev, p);
> -	} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
> -		if (args->size != PAGE_SIZE)
> -			return -EINVAL;
> -		offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
> -		if (!offset)
> -			return -ENOMEM;
> -	}
> -
>  	mutex_lock(&p->mutex);
>  
>  	pdd = kfd_bind_process_to_device(dev, p);
> @@ -1310,6 +1298,24 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
>  		goto err_unlock;
>  	}
>  
> +	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
> +		if (args->size != kfd_doorbell_process_slice(dev)) {
> +			err = -EINVAL;
> +			goto err_unlock;
> +		}
> +		offset = kfd_get_process_doorbells(dev, pdd);
> +	} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
> +		if (args->size != PAGE_SIZE) {
> +			err = -EINVAL;
> +			goto err_unlock;
> +		}
> +		offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
> +		if (!offset) {
> +			err = -ENOMEM;
> +			goto err_unlock;
> +		}
> +	}
> +
>  	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>  		dev->kgd, args->va_addr, args->size,
>  		pdd->vm, (struct kgd_mem **) &mem, &offset,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 0e71a0543f98..a857282f3d09 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -583,6 +583,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
>  
>  	atomic_set(&kfd->sram_ecc_flag, 0);
>  
> +	ida_init(&kfd->doorbell_ida);
> +
>  	return kfd;
>  }
>  
> @@ -798,6 +800,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
>  		kfd_interrupt_exit(kfd);
>  		kfd_topology_remove_device(kfd);
>  		kfd_doorbell_fini(kfd);
> +		ida_destroy(&kfd->doorbell_ida);
>  		kfd_gtt_sa_fini(kfd);
>  		amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
>  		if (kfd->gws)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 560adc57a050..b9d1359c6fe0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -191,9 +191,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
>  	}
>  
>  	q->properties.doorbell_off =
> -		kfd_get_doorbell_dw_offset_in_bar(dev, q->process,
> +		kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
>  					  q->doorbell_id);
> -
>  	return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> index 8e0c00b9555e..5946bfb6b75c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> @@ -31,9 +31,6 @@
>   * kernel queues using the first doorbell page reserved for the kernel.
>   */
>  
> -static DEFINE_IDA(doorbell_ida);
> -static unsigned int max_doorbell_slices;
> -
>  /*
>   * Each device exposes a doorbell aperture, a PCI MMIO aperture that
>   * receives 32-bit writes that are passed to queues as wptr values.
> @@ -84,9 +81,9 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
>  	else
>  		return -ENOSPC;
>  
> -	if (!max_doorbell_slices ||
> -	    doorbell_process_limit < max_doorbell_slices)
> -		max_doorbell_slices = doorbell_process_limit;
> +	if (!kfd->max_doorbell_slices ||
> +	    doorbell_process_limit < kfd->max_doorbell_slices)
> +		kfd->max_doorbell_slices = doorbell_process_limit;
>  
>  	kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
>  				doorbell_start_offset;
> @@ -130,6 +127,7 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
>  		      struct vm_area_struct *vma)
>  {
>  	phys_addr_t address;
> +	struct kfd_process_device *pdd;
>  
>  	/*
>  	 * For simplicitly we only allow mapping of the entire doorbell
> @@ -138,9 +136,12 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
>  	if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
>  		return -EINVAL;
>  
> -	/* Calculate physical address of doorbell */
> -	address = kfd_get_process_doorbells(dev, process);
> +	pdd = kfd_get_process_device_data(dev, process);
> +	if (!pdd)
> +		return -EINVAL;
>  
> +	/* Calculate physical address of doorbell */
> +	address = kfd_get_process_doorbells(dev, pdd);
>  	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
>  				VM_DONTDUMP | VM_PFNMAP;
>  
> @@ -226,7 +227,7 @@ void write_kernel_doorbell64(void __iomem *db, u64 value)
>  }
>  
>  unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
> -					struct kfd_process *process,
> +					struct kfd_process_device *pdd,
>  					unsigned int doorbell_id)
>  {
>  	/*
> @@ -236,7 +237,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
>  	 * units regardless of the ASIC-dependent doorbell size.
>  	 */
>  	return kfd->doorbell_base_dw_offset +
> -		process->doorbell_index
> +		pdd->doorbell_index
>  		* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
>  		doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
>  }
> @@ -252,24 +253,24 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
>  }
>  
>  phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
> -					struct kfd_process *process)
> +				      struct kfd_process_device *pdd)
>  {
>  	return dev->doorbell_base +
> -		process->doorbell_index * kfd_doorbell_process_slice(dev);
> +		pdd->doorbell_index * kfd_doorbell_process_slice(dev);

If you already have a pdd parameter, the dev parameter is redundant.
Just use pdd->dev.


>  }
>  
> -int kfd_alloc_process_doorbells(struct kfd_process *process)
> +int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
>  {
> -	int r = ida_simple_get(&doorbell_ida, 1, max_doorbell_slices,
> +	int r = ida_simple_get(&kfd->doorbell_ida, 1, kfd->max_doorbell_slices,
>  				GFP_KERNEL);
>  	if (r > 0)
> -		process->doorbell_index = r;
> +		*doorbell_index = r;
>  
>  	return r;
>  }
>  
> -void kfd_free_process_doorbells(struct kfd_process *process)
> +void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)

Doorbell index doesn't need to be a pointer here, because it's an input
parameter. Just pass the index by value.

Regards,
  Felix


>  {
> -	if (process->doorbell_index)
> -		ida_simple_remove(&doorbell_ida, process->doorbell_index);
> +	if (*doorbell_index)
> +		ida_simple_remove(&kfd->doorbell_ida, *doorbell_index);
>  }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 023629f28495..93b51de14aa0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -314,6 +314,9 @@ struct kfd_dev {
>  	spinlock_t smi_lock;
>  
>  	uint32_t reset_seq_num;
> +
> +	struct ida doorbell_ida;
> +	unsigned int max_doorbell_slices;
>  };
>  
>  enum kfd_mempool {
> @@ -692,6 +695,8 @@ struct kfd_process_device {
>  	uint64_t sdma_past_activity_counter;
>  	struct attribute attr_sdma;
>  	char sdma_filename[MAX_SYSFS_FILENAME_LEN];
> +
> +	unsigned int doorbell_index;
>  };
>  
>  #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
> @@ -729,7 +734,6 @@ struct kfd_process {
>  	struct mmu_notifier mmu_notifier;
>  
>  	uint16_t pasid;
> -	unsigned int doorbell_index;
>  
>  	/*
>  	 * List of kfd_process_device structures,
> @@ -862,13 +866,14 @@ u32 read_kernel_doorbell(u32 __iomem *db);
>  void write_kernel_doorbell(void __iomem *db, u32 value);
>  void write_kernel_doorbell64(void __iomem *db, u64 value);
>  unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
> -					struct kfd_process *process,
> +					struct kfd_process_device *pdd,
>  					unsigned int doorbell_id);
>  phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
> -					struct kfd_process *process);
> -int kfd_alloc_process_doorbells(struct kfd_process *process);
> -void kfd_free_process_doorbells(struct kfd_process *process);
> -
> +				      struct kfd_process_device *pdd);
> +int kfd_alloc_process_doorbells(struct kfd_dev *kfd,
> +				unsigned int *doorbell_index);
> +void kfd_free_process_doorbells(struct kfd_dev *kfd,
> +				unsigned int *doorbell_index);
>  /* GTT Sub-Allocator */
>  
>  int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index a0e12a79ab7d..28bbdebb4703 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -781,6 +781,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
>  		kfree(pdd->qpd.doorbell_bitmap);
>  		idr_destroy(&pdd->alloc_idr);
>  
> +		kfd_free_process_doorbells(pdd->dev, &pdd->doorbell_index);
> +
>  		/*
>  		 * before destroying pdd, make sure to report availability
>  		 * for auto suspend
> @@ -833,8 +835,6 @@ static void kfd_process_wq_release(struct work_struct *work)
>  	kfd_event_free_process(p);
>  
>  	kfd_pasid_free(p->pasid);
> -	kfd_free_process_doorbells(p);
> -
>  	mutex_destroy(&p->mutex);
>  
>  	put_task_struct(p->lead_thread);
> @@ -1012,9 +1012,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
>  	if (process->pasid == 0)
>  		goto err_alloc_pasid;
>  
> -	if (kfd_alloc_process_doorbells(process) < 0)
> -		goto err_alloc_doorbells;
> -
>  	err = pqm_init(&process->pqm, process);
>  	if (err != 0)
>  		goto err_process_pqm_init;
> @@ -1042,8 +1039,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
>  err_init_apertures:
>  	pqm_uninit(&process->pqm);
>  err_process_pqm_init:
> -	kfd_free_process_doorbells(process);
> -err_alloc_doorbells:
>  	kfd_pasid_free(process->pasid);
>  err_alloc_pasid:
>  	mutex_destroy(&process->mutex);
> @@ -1106,10 +1101,14 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
>  	if (!pdd)
>  		return NULL;
>  
> +	if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
> +		pr_err("Failed to alloc doorbell for pdd\n");
> +		goto err_free_pdd;
> +	}
> +
>  	if (init_doorbell_bitmap(&pdd->qpd, dev)) {
>  		pr_err("Failed to init doorbell for process\n");
> -		kfree(pdd);
> -		return NULL;
> +		goto err_free_pdd;
>  	}
>  
>  	pdd->dev = dev;
> @@ -1131,6 +1130,10 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
>  	idr_init(&pdd->alloc_idr);
>  
>  	return pdd;
> +
> +err_free_pdd:
> +	kfree(pdd);
> +	return NULL;
>  }
>  
>  /**


More information about the amd-gfx mailing list