[PATCH 6/8] drm/amdkfd: Add function to add/remove gws to kfd process
Kuehling, Felix
Felix.Kuehling at amd.com
Fri May 10 20:17:00 UTC 2019
The subject for this one should start with drm/amdgpu: because it's a
change in amdgpu, not KFD.
See two more comments inline.
Regards,
Felix
On 2019-05-10 12:01 p.m., Zeng, Oak wrote:
> [CAUTION: External Email]
>
> GWS bo is shared between all kfd processes. Add function to add gws
> to kfd process's bo list so gws can be evicted from and restored
> for process.
>
> Change-Id: I75d74cfdadb7075ff8b2b68634e205deb73dc1ea
> Signed-off-by: Oak Zeng <Oak.Zeng at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 96 ++++++++++++++++++++++++
> 2 files changed, 98 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index c00c974..f968bf1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -155,6 +155,8 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
> void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
> int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj);
> void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj);
> +int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem);
> +int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
> uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
> enum kgd_engine_type type);
> void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index e1cae4a..322c1db 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -457,6 +457,17 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
> mutex_unlock(&process_info->lock);
> }
>
> +static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
> + struct amdkfd_process_info *process_info)
> +{
> + struct ttm_validate_buffer *bo_list_entry;
> +
> + bo_list_entry = &mem->validate_list;
> + mutex_lock(&process_info->lock);
> + list_del(&bo_list_entry->head);
> + mutex_unlock(&process_info->lock);
> +}
> +
> /* Initializes user pages. It registers the MMU notifier and validates
> * the userptr BO in the GTT domain.
> *
> @@ -1197,6 +1208,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
> return 0;
>
> allocate_init_user_pages_failed:
> + remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
I think you meant to replace some code a few lines up:
if (user_addr) {
ret = init_user_pages(*mem, current->mm, user_addr);
if (ret) {
- mutex_lock(&avm->process_info->lock);
- list_del(&(*mem)->validate_list.head);
- mutex_unlock(&avm->process_info->lock);
goto allocate_init_user_pages_failed;
}
}
But you're not removing that original code in this patch so you'd end up
removing it from the list twice.
> amdgpu_bo_unref(&bo);
> /* Don't unreserve system mem limit twice */
> goto err_reserve_limit;
> @@ -2104,3 +2116,87 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
> kfree(pd_bo_list);
> return ret;
> }
> +
> +int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem)
> +{
> + struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
> + struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
> + int ret;
> +
> + if (!info || !gws)
> + return -EINVAL;
> +
> + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
> + if (!*mem)
> + return -EINVAL;
> +
> + mutex_init(&(*mem)->lock);
> + (*mem)->bo = amdgpu_bo_ref(gws_bo);
> + (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
> + (*mem)->process_info = process_info;
> + add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
> + amdgpu_sync_create(&(*mem)->sync);
> +
> +
> + /* Validate gws bo the first time it is added to process */
> + mutex_lock(&(*mem)->process_info->lock);
> + ret = amdgpu_bo_reserve(gws_bo, false);
> + if (unlikely(ret)) {
> + pr_err("Reserve gws bo failed %d\n", ret);
> + goto bo_reservation_failure;
> + }
> +
> + ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, false);
> + if (ret) {
> + pr_err("GWS BO validate failed %d\n", ret);
> + goto bo_validation_failure;
> + }
> + /* GWS resource is shared b/t amdgpu and amdkfd
> + * Add process eviction fence to bo so they can
> + * evict each other.
> + */
> + amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
> + amdgpu_bo_unreserve(gws_bo);
> + mutex_unlock(&(*mem)->process_info->lock);
> +
> + return ret;
> +
> +bo_validation_failure:
> + amdgpu_bo_unreserve(gws_bo);
> +bo_reservation_failure:
> + mutex_unlock(&(*mem)->process_info->lock);
> + amdgpu_sync_free(&(*mem)->sync);
> + remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
> + amdgpu_bo_unref(&gws_bo);
> + mutex_destroy(&(*mem)->lock);
> + kfree(*mem);
Set *mem = NULL after kfree to avoid returning a dangling pointer.
> + return ret;
> +}
> +
> +int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
> +{
> + int ret;
> + struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
> + struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
> + struct amdgpu_bo *gws_bo = kgd_mem->bo;
> +
> + /* Remove BO from process's validate list so restore worker won't touch
> + * it anymore
> + */
> + remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
> +
> + ret = amdgpu_bo_reserve(gws_bo, false);
> + if (unlikely(ret)) {
> + pr_err("Reserve gws bo failed %d\n", ret);
> + //TODO add BO back to validate_list?
> + return ret;
> + }
> + amdgpu_amdkfd_remove_eviction_fence(gws_bo,
> + process_info->eviction_fence);
> + amdgpu_bo_unreserve(gws_bo);
> + amdgpu_sync_free(&kgd_mem->sync);
> + amdgpu_bo_unref(&gws_bo);
> + mutex_destroy(&kgd_mem->lock);
> + kfree(mem);
> + return 0;
> +}
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list