[PATCH 3/6] drm/amdkfd: switch over to using drm_exec v2

Tue Jul 11 19:35:29 UTC 2023

On 2023-07-11 09:31, Christian König wrote:
> Avoids quite a bit of logic and kmalloc overhead.
>
> v2: fix multiple problems pointed out by Felix
>
> Signed-off-by: Christian König <christian.koenig at amd.com>

Two nit-picks inline about DRM_EXEC_INTERRUPTIBLE_WAIT. With those 
fixed, the patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>



> ---
>   drivers/gpu/drm/amd/amdgpu/Kconfig            |   1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |   5 +-
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 299 +++++++-----------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |  18 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   4 +
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c          |  45 ++-
>   6 files changed, 162 insertions(+), 210 deletions(-)
[snip]
> @@ -2538,50 +2489,41 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
>    */
>   static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>   {
> -	struct amdgpu_bo_list_entry *pd_bo_list_entries;
> -	struct list_head resv_list, duplicates;
> -	struct ww_acquire_ctx ticket;
> +	struct ttm_operation_ctx ctx = { false, false };
>   	struct amdgpu_sync sync;
> +	struct drm_exec exec;
>   
>   	struct amdgpu_vm *peer_vm;
>   	struct kgd_mem *mem, *tmp_mem;
>   	struct amdgpu_bo *bo;
> -	struct ttm_operation_ctx ctx = { false, false };
> -	int i, ret;
> -
> -	pd_bo_list_entries = kcalloc(process_info->n_vms,
> -				     sizeof(struct amdgpu_bo_list_entry),
> -				     GFP_KERNEL);
> -	if (!pd_bo_list_entries) {
> -		pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
> -		ret = -ENOMEM;
> -		goto out_no_mem;
> -	}
> -
> -	INIT_LIST_HEAD(&resv_list);
> -	INIT_LIST_HEAD(&duplicates);
> +	int ret;
>   
> -	/* Get all the page directory BOs that need to be reserved */
> -	i = 0;
> -	list_for_each_entry(peer_vm, &process_info->vm_list_head,
> -			    vm_list_node)
> -		amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
> -				    &pd_bo_list_entries[i++]);
> -	/* Add the userptr_inval_list entries to resv_list */
> -	list_for_each_entry(mem, &process_info->userptr_inval_list,
> -			    validate_list.head) {
> -		list_add_tail(&mem->resv_list.head, &resv_list);
> -		mem->resv_list.bo = mem->validate_list.bo;
> -		mem->resv_list.num_shared = mem->validate_list.num_shared;
> -	}
> +	amdgpu_sync_create(&sync);
>   
> +	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);

This runs in a worker thread. So I think it doesn't need to be 
interruptible.


>   	/* Reserve all BOs and page tables for validation */
> -	ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
> -	WARN(!list_empty(&duplicates), "Duplicates should be empty");
> -	if (ret)
> -		goto out_free;
> +	drm_exec_until_all_locked(&exec) {
> +		/* Reserve all the page directories */
> +		list_for_each_entry(peer_vm, &process_info->vm_list_head,
> +				    vm_list_node) {
> +			ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
> +			drm_exec_retry_on_contention(&exec);
> +			if (unlikely(ret))
> +				goto unreserve_out;
> +		}
>   
> -	amdgpu_sync_create(&sync);
> +		/* Reserve the userptr_inval_list entries to resv_list */
> +		list_for_each_entry(mem, &process_info->userptr_inval_list,
> +				    validate_list) {
> +			struct drm_gem_object *gobj;
> +
> +			gobj = &mem->bo->tbo.base;
> +			ret = drm_exec_prepare_obj(&exec, gobj, 1);
> +			drm_exec_retry_on_contention(&exec);
> +			if (unlikely(ret))
> +				goto unreserve_out;
> +		}
> +	}
>   
>   	ret = process_validate_vms(process_info);
>   	if (ret)

[snip]

@@ -1467,25 +1467,24 @@ static int svm_range_reserve_bos(struct 
svm_validate_context *ctx)
>   	uint32_t gpuidx;
>   	int r;
>   
> -	INIT_LIST_HEAD(&ctx->validate_list);
> -	for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
> -		pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
> -		if (!pdd) {
> -			pr_debug("failed to find device idx %d\n", gpuidx);
> -			return -EINVAL;
> -		}
> -		vm = drm_priv_to_vm(pdd->drm_priv);
> -
> -		ctx->tv[gpuidx].bo = &vm->root.bo->tbo;
> -		ctx->tv[gpuidx].num_shared = 4;
> -		list_add(&ctx->tv[gpuidx].head, &ctx->validate_list);
> -	}
> +	drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);

This function is only called from svm_range_validate_and_map, which has 
an "intr" parameter. If you pass that through, you could make 
DRM_EXEC_INTERRUPTIBLE_WAIT conditional on that.

Regards,
   Felix


> +	drm_exec_until_all_locked(&ctx->exec) {
> +		for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
> +			pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
> +			if (!pdd) {
> +				pr_debug("failed to find device idx %d\n", gpuidx);
> +				r = -EINVAL;
> +				goto unreserve_out;
> +			}
> +			vm = drm_priv_to_vm(pdd->drm_priv);
>   
> -	r = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->validate_list,
> -				   ctx->intr, NULL);
> -	if (r) {
> -		pr_debug("failed %d to reserve bo\n", r);
> -		return r;
> +			r = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
> +			drm_exec_retry_on_contention(&ctx->exec);
> +			if (unlikely(r)) {
> +				pr_debug("failed %d to reserve bo\n", r);
> +				goto unreserve_out;
> +			}
> +		}
>   	}
>   
>   	for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
> @@ -1508,13 +1507,13 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx)
>   	return 0;
>   
>   unreserve_out:
> -	ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
> +	drm_exec_fini(&ctx->exec);
>   	return r;
>   }
>   
>   static void svm_range_unreserve_bos(struct svm_validate_context *ctx)
>   {
> -	ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
> +	drm_exec_fini(&ctx->exec);
>   }
>   
>   static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)