[PATCH 3/6] drm/amdkfd: switch over to using drm_exec v2
Felix Kuehling
felix.kuehling at amd.com
Tue Jul 11 19:35:29 UTC 2023
On 2023-07-11 09:31, Christian König wrote:
> Avoids quite a bit of logic and kmalloc overhead.
>
> v2: fix multiple problems pointed out by Felix
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
Two nit-picks inline about DRM_EXEC_INTERRUPTIBLE_WAIT. With those
fixed, the patch is
Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/Kconfig | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 5 +-
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 299 +++++++-----------
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 +
> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 45 ++-
> 6 files changed, 162 insertions(+), 210 deletions(-)
[snip]
> @@ -2538,50 +2489,41 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
> */
> static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
> {
> - struct amdgpu_bo_list_entry *pd_bo_list_entries;
> - struct list_head resv_list, duplicates;
> - struct ww_acquire_ctx ticket;
> + struct ttm_operation_ctx ctx = { false, false };
> struct amdgpu_sync sync;
> + struct drm_exec exec;
>
> struct amdgpu_vm *peer_vm;
> struct kgd_mem *mem, *tmp_mem;
> struct amdgpu_bo *bo;
> - struct ttm_operation_ctx ctx = { false, false };
> - int i, ret;
> -
> - pd_bo_list_entries = kcalloc(process_info->n_vms,
> - sizeof(struct amdgpu_bo_list_entry),
> - GFP_KERNEL);
> - if (!pd_bo_list_entries) {
> - pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
> - ret = -ENOMEM;
> - goto out_no_mem;
> - }
> -
> - INIT_LIST_HEAD(&resv_list);
> - INIT_LIST_HEAD(&duplicates);
> + int ret;
>
> - /* Get all the page directory BOs that need to be reserved */
> - i = 0;
> - list_for_each_entry(peer_vm, &process_info->vm_list_head,
> - vm_list_node)
> - amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
> - &pd_bo_list_entries[i++]);
> - /* Add the userptr_inval_list entries to resv_list */
> - list_for_each_entry(mem, &process_info->userptr_inval_list,
> - validate_list.head) {
> - list_add_tail(&mem->resv_list.head, &resv_list);
> - mem->resv_list.bo = mem->validate_list.bo;
> - mem->resv_list.num_shared = mem->validate_list.num_shared;
> - }
> + amdgpu_sync_create(&sync);
>
> + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
This runs in a worker thread. So I think it doesn't need to be
interruptible.
> /* Reserve all BOs and page tables for validation */
> - ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
> - WARN(!list_empty(&duplicates), "Duplicates should be empty");
> - if (ret)
> - goto out_free;
> + drm_exec_until_all_locked(&exec) {
> + /* Reserve all the page directories */
> + list_for_each_entry(peer_vm, &process_info->vm_list_head,
> + vm_list_node) {
> + ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
> + drm_exec_retry_on_contention(&exec);
> + if (unlikely(ret))
> + goto unreserve_out;
> + }
>
> - amdgpu_sync_create(&sync);
> + /* Reserve the userptr_inval_list entries to resv_list */
> + list_for_each_entry(mem, &process_info->userptr_inval_list,
> + validate_list) {
> + struct drm_gem_object *gobj;
> +
> + gobj = &mem->bo->tbo.base;
> + ret = drm_exec_prepare_obj(&exec, gobj, 1);
> + drm_exec_retry_on_contention(&exec);
> + if (unlikely(ret))
> + goto unreserve_out;
> + }
> + }
>
> ret = process_validate_vms(process_info);
> if (ret)
[snip]
@@ -1467,25 +1467,24 @@ static int svm_range_reserve_bos(struct
svm_validate_context *ctx)
> uint32_t gpuidx;
> int r;
>
> - INIT_LIST_HEAD(&ctx->validate_list);
> - for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
> - pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
> - if (!pdd) {
> - pr_debug("failed to find device idx %d\n", gpuidx);
> - return -EINVAL;
> - }
> - vm = drm_priv_to_vm(pdd->drm_priv);
> -
> - ctx->tv[gpuidx].bo = &vm->root.bo->tbo;
> - ctx->tv[gpuidx].num_shared = 4;
> - list_add(&ctx->tv[gpuidx].head, &ctx->validate_list);
> - }
> + drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
This function is only called from svm_range_validate_and_map, which has
an "intr" parameter. If you pass that through, you could make
DRM_EXEC_INTERRUPTIBLE_WAIT conditional on that.
Regards,
Felix
> + drm_exec_until_all_locked(&ctx->exec) {
> + for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
> + pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
> + if (!pdd) {
> + pr_debug("failed to find device idx %d\n", gpuidx);
> + r = -EINVAL;
> + goto unreserve_out;
> + }
> + vm = drm_priv_to_vm(pdd->drm_priv);
>
> - r = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->validate_list,
> - ctx->intr, NULL);
> - if (r) {
> - pr_debug("failed %d to reserve bo\n", r);
> - return r;
> + r = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
> + drm_exec_retry_on_contention(&ctx->exec);
> + if (unlikely(r)) {
> + pr_debug("failed %d to reserve bo\n", r);
> + goto unreserve_out;
> + }
> + }
> }
>
> for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
> @@ -1508,13 +1507,13 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx)
> return 0;
>
> unreserve_out:
> - ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
> + drm_exec_fini(&ctx->exec);
> return r;
> }
>
> static void svm_range_unreserve_bos(struct svm_validate_context *ctx)
> {
> - ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
> + drm_exec_fini(&ctx->exec);
> }
>
> static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
More information about the amd-gfx
mailing list