[PATCH 06/13] drm/amdgpu: use the new drm_exec object for CS v2

Tue Jun 20 04:07:40 UTC 2023

+Boris and +Matthew in case you want to take over this patch set.

Here are some review / testing comments, including those I posted before 
to ease tracking.

On 5/4/23 20:51, Christian König wrote:
> Use the new component here as well and remove the old handling.
> 
> v2: drop dupplicate handling
> 
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h         |   1 -
>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c |  71 ++-----
>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |   5 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 210 +++++++++-----------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h      |   7 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      |  22 --
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |   3 -
>   7 files changed, 115 insertions(+), 204 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 02b827785e39..eba3e4f01ea6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -133,6 +141,8 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
>   
>   	list->first_userptr = first_userptr;
>   	list->num_entries = num_entries;
> +	sort(array, last_entry, sizeof(struct amdgpu_bo_list_entry),
> +	     amdgpu_bo_list_entry_cmp, NULL);

Previously amdgpu_bo_list_get_list sorted all entries, but this one only 
sorts userptr entries. I think this changes behavior?

> @@ -928,18 +874,56 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>   		e->user_invalidated = userpage_invalidated;
>   	}
>   
> -	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
> -				   &duplicates);
> -	if (unlikely(r != 0)) {
> -		if (r != -ERESTARTSYS)
> -			DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
> -		goto out_free_user_pages;
> +	drm_exec_while_not_all_locked(&p->exec) {
> +		r = amdgpu_vm_lock_pd(&fpriv->vm, &p->exec);
> +		drm_exec_continue_on_contention(&p->exec);

Duplicate handling is needed for pretty much every call of 
amdgpu_vm_lock_pd, as bo->tbo.base.resv == vm->root.bo->tbo.base.resv 
for AMDGPU_GEM_CREATE_VM_ALWAYS_VALID.

I think Boris's suggestion of having this through a common 
DRM_EXEC_FLAG_ALLOW_DUPLICATES flag fits well.

> +		if (unlikely(r))
> +			goto out_free_user_pages;
> +
> +		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +			r = drm_exec_prepare_obj(&p->exec, &e->bo->tbo.base, 2);

Previously there were comments for how the fence count is calculated, 
now they seem to be removed. I'd prefer if they were properly retained 
as finding out who calls drm_resv_add_fence is not trivial, and wrong 
reservation count can also be really hard to debug.

Likewise for amdgpu_vm_lock_pd (which was added in another commit).

> +			drm_exec_break_on_contention(&p->exec);
> +			if (unlikely(r))
> +				goto out_free_user_pages;
> +
> +			e->bo_va = amdgpu_vm_bo_find(vm, e->bo);
> +			e->range = NULL;
> +		}
> +		drm_exec_continue_on_contention(&p->exec);
> +
> +		if (p->uf_bo) {
> +			r = drm_exec_prepare_obj(&p->exec, &p->uf_bo->tbo.base,
> +						 2);
> +			drm_exec_continue_on_contention(&p->exec);
> +			if (unlikely(r))
> +				goto out_free_user_pages;
> +		}
>   	}
>   
> -	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> -		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> +	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
> +		struct mm_struct *usermm;
>   
> -		e->bo_va = amdgpu_vm_bo_find(vm, bo);
> +		usermm = amdgpu_ttm_tt_get_usermm(e->bo->tbo.ttm);
> +		if (usermm && usermm != current->mm) {
> +			r = -EPERM;
> +			goto out_free_user_pages;
> +		}
> +
> +		if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) &&
> +		    e->user_invalidated && e->user_pages) {
> +			amdgpu_bo_placement_from_domain(e->bo,
> +							AMDGPU_GEM_DOMAIN_CPU);
> +			r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement,
> +					    &ctx);
> +			if (r)
> +				goto out_free_user_pages;
> +
> +			amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm,
> +						     e->user_pages);
> +		}
> +
> +		kvfree(e->user_pages);
> +		e->user_pages = NULL;
>   	}
>   
>   	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
> @@ -1296,9 +1271,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>   	 */
>   	r = 0;
>   	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
> -		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> -
> -		r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);
> +		r |= !amdgpu_ttm_tt_get_user_pages_done(e->bo->tbo.ttm,
> +							e->range);
>   		e->range = NULL;

e->range = NULL; needs to be removed, or it's causing *massive* memory 
leaks.

>   	}
>   	if (r) {
> @@ -1307,20 +1281,22 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>   	}
>   
>   	p->fence = dma_fence_get(&leader->base.s_fence->finished);
> -	list_for_each_entry(e, &p->validated, tv.head) {
> +	drm_exec_for_each_locked_object(&p->exec, index, gobj) {
> +
> +		ttm_bo_move_to_lru_tail_unlocked(&gem_to_amdgpu_bo(gobj)->tbo);
>   
>   		/* Everybody except for the gang leader uses READ */
>   		for (i = 0; i < p->gang_size; ++i) {
>   			if (p->jobs[i] == leader)
>   				continue;
>   
> -			dma_resv_add_fence(e->tv.bo->base.resv,
> +			dma_resv_add_fence(gobj->resv,
>   					   &p->jobs[i]->base.s_fence->finished,
>   					   DMA_RESV_USAGE_READ);
>   		}
>   
> -		/* The gang leader is remembered as writer */
> -		e->tv.num_shared = 0;
> +		/* The gang leader as remembered as writer */
> +		dma_resv_add_fence(gobj->resv, p->fence, DMA_RESV_USAGE_WRITE);
>   	}

Previously PD used READ accesses, now everything is WRITE. This probably 
isn't right.

Regards,
Tatsuyuki