[PATCH 07/21] drm/amdkfd: Clean up KFD_MMAP_ offset handling
Oded Gabbay
oded.gabbay at gmail.com
Fri May 11 08:52:50 UTC 2018
On Wed, Apr 11, 2018 at 12:33 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote:
> From: Harish Kasiviswanathan <Harish.Kasiviswanathan at amd.com>
>
> Use bit-rotate for better clarity and remove _MASK from the #defines as
> these represent mmap types.
>
> Centralize all the parsing of the mmap offset in kfd_mmap and add device
> parameter to doorbell and reserved_mem map functions.
>
> Encode gpu_id into upper bits of vm_pgoff. This frees up the lower bits
> for encoding the the doorbell ID on Vega10.
>
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan at amd.com>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 35 ++++++++++++++++++----------
> drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 9 ++------
> drivers/gpu/drm/amd/amdkfd/kfd_events.c | 2 +-
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 38 ++++++++++++++++++++++++-------
> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 8 +++----
> 5 files changed, 59 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index b5e5f0e..f6b35f4 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -292,7 +292,8 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
>
>
> /* Return gpu_id as doorbell offset for mmap usage */
> - args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id);
> + args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
> + args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
> args->doorbell_offset <<= PAGE_SHIFT;
>
> mutex_unlock(&p->mutex);
> @@ -1645,23 +1646,33 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
> static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
> {
> struct kfd_process *process;
> + struct kfd_dev *dev = NULL;
> + unsigned long vm_pgoff;
> + unsigned int gpu_id;
>
> process = kfd_get_process(current);
> if (IS_ERR(process))
> return PTR_ERR(process);
>
> - if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) ==
> - KFD_MMAP_DOORBELL_MASK) {
> - vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK;
> - return kfd_doorbell_mmap(process, vma);
> - } else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) ==
> - KFD_MMAP_EVENTS_MASK) {
> - vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK;
> + vm_pgoff = vma->vm_pgoff;
> + vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
> + gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
> + if (gpu_id)
> + dev = kfd_device_by_id(gpu_id);
> +
> + switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
> + case KFD_MMAP_TYPE_DOORBELL:
> + if (!dev)
> + return -ENODEV;
> + return kfd_doorbell_mmap(dev, process, vma);
> +
> + case KFD_MMAP_TYPE_EVENTS:
> return kfd_event_mmap(process, vma);
> - } else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) ==
> - KFD_MMAP_RESERVED_MEM_MASK) {
> - vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK;
> - return kfd_reserved_mem_mmap(process, vma);
> +
> + case KFD_MMAP_TYPE_RESERVED_MEM:
> + if (!dev)
> + return -ENODEV;
> + return kfd_reserved_mem_mmap(dev, process, vma);
> }
>
> return -EFAULT;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> index 4840314..efc59de 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> @@ -126,15 +126,10 @@ void kfd_doorbell_fini(struct kfd_dev *kfd)
> iounmap(kfd->doorbell_kernel_ptr);
> }
>
> -int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
> +int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
> + struct vm_area_struct *vma)
> {
> phys_addr_t address;
> - struct kfd_dev *dev;
> -
> - /* Find kfd device according to gpu id */
> - dev = kfd_device_by_id(vma->vm_pgoff);
> - if (!dev)
> - return -EINVAL;
>
> /*
> * For simplicitly we only allow mapping of the entire doorbell
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> index 4890a90..bccf2f7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> @@ -345,7 +345,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
> case KFD_EVENT_TYPE_DEBUG:
> ret = create_signal_event(devkfd, p, ev);
> if (!ret) {
> - *event_page_offset = KFD_MMAP_EVENTS_MASK;
> + *event_page_offset = KFD_MMAP_TYPE_EVENTS;
> *event_page_offset <<= PAGE_SHIFT;
> *event_slot_index = ev->event_id;
> }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index d9c0fe12..2d575c0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -41,9 +41,33 @@
>
> #define KFD_SYSFS_FILE_MODE 0444
>
> -#define KFD_MMAP_DOORBELL_MASK 0x8000000000000ull
> -#define KFD_MMAP_EVENTS_MASK 0x4000000000000ull
> -#define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000ull
> +/* GPU ID hash width in bits */
> +#define KFD_GPU_ID_HASH_WIDTH 16
> +
> +/* Use upper bits of mmap offset to store KFD driver specific information.
> + * BITS[63:62] - Encode MMAP type
> + * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to
> + * BITS[45:0] - MMAP offset value
> + *
> + * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
> + * defines are w.r.t to PAGE_SIZE
> + */
> +#define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT)
> +#define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT)
> +#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
> +#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
> +#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
Isn't this new definition breaks existing user-space library (kfd thunk) ?
If that is the case we have a problem here.
Oded
> +
> +#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
> +#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
> + << KFD_MMAP_GPU_ID_SHIFT)
> +#define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
> + & KFD_MMAP_GPU_ID_MASK)
> +#define KFD_MMAP_GPU_ID_GET(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \
> + >> KFD_MMAP_GPU_ID_SHIFT)
> +
> +#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFULL >> PAGE_SHIFT)
> +#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK)
>
> /*
> * When working with cp scheduler we should assign the HIQ manually or via
> @@ -55,9 +79,6 @@
> #define KFD_CIK_HIQ_PIPE 4
> #define KFD_CIK_HIQ_QUEUE 0
>
> -/* GPU ID hash width in bits */
> -#define KFD_GPU_ID_HASH_WIDTH 16
> -
> /* Macro for allocating structures */
> #define kfd_alloc_struct(ptr_to_struct) \
> ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))
> @@ -698,7 +719,7 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
> struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
> struct kfd_process *p);
>
> -int kfd_reserved_mem_mmap(struct kfd_process *process,
> +int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
> struct vm_area_struct *vma);
>
> /* KFD process API for creating and translating handles */
> @@ -728,7 +749,8 @@ void kfd_pasid_free(unsigned int pasid);
> /* Doorbells */
> int kfd_doorbell_init(struct kfd_dev *kfd);
> void kfd_doorbell_fini(struct kfd_dev *kfd);
> -int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma);
> +int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
> + struct vm_area_struct *vma);
> void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
> unsigned int *doorbell_off);
> void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 2791e72..131fe2a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -451,7 +451,8 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
> if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
> continue;
>
> - offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT;
> + offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id))
> + << PAGE_SHIFT;
> qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
> KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
> MAP_SHARED, offset);
> @@ -989,15 +990,12 @@ int kfd_resume_all_processes(void)
> return ret;
> }
>
> -int kfd_reserved_mem_mmap(struct kfd_process *process,
> +int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
> struct vm_area_struct *vma)
> {
> - struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff);
> struct kfd_process_device *pdd;
> struct qcm_process_device *qpd;
>
> - if (!dev)
> - return -EINVAL;
> if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
> pr_err("Incorrect CWSR mapping size.\n");
> return -EINVAL;
> --
> 2.7.4
>
More information about the amd-gfx
mailing list