[PATCH 4/6] drm/amdkfd: Record vmid pasid mapping in the driver
Kuehling, Felix
Felix.Kuehling at amd.com
Thu Sep 26 18:54:30 UTC 2019
On 2019-09-26 2:38 p.m., Zhao, Yong wrote:
> This makes possible the vmid pasid mapping query through software.
>
> Change-Id: Ib539aae277a227cc39f6469ae23c46c4d289b87b
> Signed-off-by: Yong Zhao <Yong.Zhao at amd.com>
> ---
> .../drm/amd/amdkfd/kfd_device_queue_manager.c | 34 +++++++++++++------
> .../drm/amd/amdkfd/kfd_device_queue_manager.h | 3 +-
> 2 files changed, 26 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index e7f0a32e0e44..d006adefef55 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -224,20 +224,30 @@ static int allocate_vmid(struct device_queue_manager *dqm,
> struct qcm_process_device *qpd,
> struct queue *q)
> {
> - int bit, allocated_vmid;
> + int idx = -1, allocated_vmid, i;
>
> - if (dqm->vmid_bitmap == 0)
> + for (i = 0; i < dqm->dev->vm_info.vmid_num_kfd; i++) {
> + if (!dqm->vmid_pasid[i]) {
> + idx = i;
> + break;
> + }
> + }
> +
> + if (idx < 0) {
> + pr_err("no more vmid to allocate\n");
> return -ENOMEM;
> + }
> +
> + dqm->vmid_pasid[idx] = q->process->pasid;
>
> - bit = ffs(dqm->vmid_bitmap) - 1;
> - dqm->vmid_bitmap &= ~(1 << bit);
> + allocated_vmid = idx + dqm->dev->vm_info.first_vmid_kfd;
> + pr_debug("vmid allocated: %d\n", allocated_vmid);
> +
> + set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
>
> - allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
> - pr_debug("vmid allocation %d\n", allocated_vmid);
> qpd->vmid = allocated_vmid;
> q->properties.vmid = allocated_vmid;
>
> - set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
> program_sh_mem_settings(dqm, qpd);
>
> /* qpd->page_table_base is set earlier when register_process()
> @@ -278,7 +288,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
> struct qcm_process_device *qpd,
> struct queue *q)
> {
> - int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
> + int idx;
>
> /* On GFX v7, CP doesn't flush TC at dequeue */
> if (q->device->device_info->asic_family == CHIP_HAWAII)
> @@ -290,7 +300,9 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
> /* Release the vmid mapping */
> set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
>
> - dqm->vmid_bitmap |= (1 << bit);
> + idx = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
> + dqm->vmid_pasid[idx] = 0;
> +
> qpd->vmid = 0;
> q->properties.vmid = 0;
> }
> @@ -1017,7 +1029,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
> dqm->allocated_queues[pipe] |= 1 << queue;
> }
>
> - dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
> + dqm->vmid_pasid = kcalloc(dqm->dev->vm_info.vmid_num_kfd,
> + sizeof(uint16_t), GFP_KERNEL);
If you allocate this dynamically, you need to check the return value.
But see below ...
> dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
> dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
>
> @@ -1030,6 +1043,7 @@ static void uninitialize(struct device_queue_manager *dqm)
>
> WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
>
> + kfree(dqm->vmid_pasid);
> kfree(dqm->allocated_queues);
> for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
> kfree(dqm->mqd_mgrs[i]);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> index eed8f950b663..67b5e5fadd95 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> @@ -188,7 +188,8 @@ struct device_queue_manager {
> unsigned int *allocated_queues;
> uint64_t sdma_bitmap;
> uint64_t xgmi_sdma_bitmap;
> - unsigned int vmid_bitmap;
> + /* the pasid mapping for each kfd vmid */
> + uint16_t *vmid_pasid;
This could be a fixed-size array since the number of user mode VMIDs is
limited to 15 by the HW. The size of the pointer alone is enough to
store 4 PASIDs. Add overhead of kmalloc and you don't really save
anything by allocating this dynamically. It only adds indirection,
complexity (error handling) and the risk of memory leaks.
Regards,
Felix
> uint64_t pipelines_addr;
> struct kfd_mem_obj *pipeline_mem;
> uint64_t fence_gpu_addr;
More information about the amd-gfx
mailing list