[PATCH 1/3] drm/amdgpu: re-work VM syncing

Wed Aug 21 17:39:48 UTC 2024

On 21.08.24 14:03, Christian König wrote:
> Rework how VM operations synchronize to submissions. Provide an
> amdgpu_sync container to the backends instead of an reservation
> object and fill in the amdgpu_sync object in the higher layers
> of the code.
>
> No intended functional change, just prepares for upcomming changes.

This looks like a really nice cleanup! I'm not sure how much it's worth
given that I'm not a maintainer, but this one and patch 3 are

Reviewed-by: Friedrich Vock <friedrich.vock at gmx.de>

Patch 2 looks ok to me as well, but I'm not in the loop as to what
problem this fixes. If the problem already occurs in the wild, it might
make sense to consider backporting?

Regards,
Friedrich

>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      | 84 +++++++++++++--------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      | 11 +--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  |  7 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c   |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 16 +---
>   5 files changed, 65 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index bcb729094521..ba99d428610a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -838,7 +838,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
>   	params.vm = vm;
>   	params.immediate = immediate;
>
> -	r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
> +	r = vm->update_funcs->prepare(&params, NULL);
>   	if (r)
>   		goto error;
>
> @@ -933,7 +933,7 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
>    * @unlocked: unlocked invalidation during MM callback
>    * @flush_tlb: trigger tlb invalidation after update completed
>    * @allow_override: change MTYPE for local NUMA nodes
> - * @resv: fences we need to sync to
> + * @sync: fences we need to sync to
>    * @start: start of mapped range
>    * @last: last mapped entry
>    * @flags: flags for the entries
> @@ -949,16 +949,16 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
>    * 0 for success, negative erro code for failure.
>    */
>   int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> -			   bool immediate, bool unlocked, bool flush_tlb, bool allow_override,
> -			   struct dma_resv *resv, uint64_t start, uint64_t last,
> -			   uint64_t flags, uint64_t offset, uint64_t vram_base,
> +			   bool immediate, bool unlocked, bool flush_tlb,
> +			   bool allow_override, struct amdgpu_sync *sync,
> +			   uint64_t start, uint64_t last, uint64_t flags,
> +			   uint64_t offset, uint64_t vram_base,
>   			   struct ttm_resource *res, dma_addr_t *pages_addr,
>   			   struct dma_fence **fence)
>   {
>   	struct amdgpu_vm_tlb_seq_struct *tlb_cb;
>   	struct amdgpu_vm_update_params params;
>   	struct amdgpu_res_cursor cursor;
> -	enum amdgpu_sync_mode sync_mode;
>   	int r, idx;
>
>   	if (!drm_dev_enter(adev_to_drm(adev), &idx))
> @@ -991,14 +991,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	params.allow_override = allow_override;
>   	INIT_LIST_HEAD(&params.tlb_flush_waitlist);
>
> -	/* Implicitly sync to command submissions in the same VM before
> -	 * unmapping. Sync to moving fences before mapping.
> -	 */
> -	if (!(flags & AMDGPU_PTE_VALID))
> -		sync_mode = AMDGPU_SYNC_EQ_OWNER;
> -	else
> -		sync_mode = AMDGPU_SYNC_EXPLICIT;
> -
>   	amdgpu_vm_eviction_lock(vm);
>   	if (vm->evicting) {
>   		r = -EBUSY;
> @@ -1013,7 +1005,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		dma_fence_put(tmp);
>   	}
>
> -	r = vm->update_funcs->prepare(&params, resv, sync_mode);
> +	r = vm->update_funcs->prepare(&params, sync);
>   	if (r)
>   		goto error_free;
>
> @@ -1155,23 +1147,30 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>   	struct amdgpu_bo *bo = bo_va->base.bo;
>   	struct amdgpu_vm *vm = bo_va->base.vm;
>   	struct amdgpu_bo_va_mapping *mapping;
> +	struct dma_fence **last_update;
>   	dma_addr_t *pages_addr = NULL;
>   	struct ttm_resource *mem;
> -	struct dma_fence **last_update;
> +	struct amdgpu_sync sync;
>   	bool flush_tlb = clear;
> -	bool uncached;
> -	struct dma_resv *resv;
>   	uint64_t vram_base;
>   	uint64_t flags;
> +	bool uncached;
>   	int r;
>
> +	amdgpu_sync_create(&sync);
>   	if (clear || !bo) {
>   		mem = NULL;
> -		resv = vm->root.bo->tbo.base.resv;
> +
> +		/* Implicitly sync to command submissions in the same VM before
> +		 * unmapping.
> +		 */
> +		r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
> +				     AMDGPU_SYNC_EQ_OWNER, vm);
> +		if (r)
> +			goto error_free;
>   	} else {
>   		struct drm_gem_object *obj = &bo->tbo.base;
>
> -		resv = bo->tbo.base.resv;
>   		if (obj->import_attach && bo_va->is_xgmi) {
>   			struct dma_buf *dma_buf = obj->import_attach->dmabuf;
>   			struct drm_gem_object *gobj = dma_buf->priv;
> @@ -1185,6 +1184,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>   		if (mem && (mem->mem_type == TTM_PL_TT ||
>   			    mem->mem_type == AMDGPU_PL_PREEMPT))
>   			pages_addr = bo->tbo.ttm->dma_address;
> +
> +		/* Implicitly sync to moving fences before mapping anything */
> +		r = amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv,
> +				     AMDGPU_SYNC_EXPLICIT, vm);
> +		if (r)
> +			goto error_free;
>   	}
>
>   	if (bo) {
> @@ -1234,12 +1239,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>   		trace_amdgpu_vm_bo_update(mapping);
>
>   		r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
> -					   !uncached, resv, mapping->start, mapping->last,
> -					   update_flags, mapping->offset,
> -					   vram_base, mem, pages_addr,
> -					   last_update);
> +					   !uncached, &sync, mapping->start,
> +					   mapping->last, update_flags,
> +					   mapping->offset, vram_base, mem,
> +					   pages_addr, last_update);
>   		if (r)
> -			return r;
> +			goto error_free;
>   	}
>
>   	/* If the BO is not in its preferred location add it back to
> @@ -1267,7 +1272,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>   			trace_amdgpu_vm_bo_mapping(mapping);
>   	}
>
> -	return 0;
> +error_free:
> +	amdgpu_sync_free(&sync);
> +	return r;
>   }
>
>   /**
> @@ -1414,25 +1421,34 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>   			  struct amdgpu_vm *vm,
>   			  struct dma_fence **fence)
>   {
> -	struct dma_resv *resv = vm->root.bo->tbo.base.resv;
>   	struct amdgpu_bo_va_mapping *mapping;
> -	uint64_t init_pte_value = 0;
>   	struct dma_fence *f = NULL;
> +	struct amdgpu_sync sync;
>   	int r;
>
> +
> +	/*
> +	 * Implicitly sync to command submissions in the same VM before
> +	 * unmapping.
> +	 */
> +	amdgpu_sync_create(&sync);
> +	r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
> +			     AMDGPU_SYNC_EQ_OWNER, vm);
> +	if (r)
> +		goto error_free;
> +
>   	while (!list_empty(&vm->freed)) {
>   		mapping = list_first_entry(&vm->freed,
>   			struct amdgpu_bo_va_mapping, list);
>   		list_del(&mapping->list);
>
>   		r = amdgpu_vm_update_range(adev, vm, false, false, true, false,
> -					   resv, mapping->start, mapping->last,
> -					   init_pte_value, 0, 0, NULL, NULL,
> -					   &f);
> +					   &sync, mapping->start, mapping->last,
> +					   0, 0, 0, NULL, NULL, &f);
>   		amdgpu_vm_free_mapping(adev, vm, mapping, f);
>   		if (r) {
>   			dma_fence_put(f);
> -			return r;
> +			goto error_free;
>   		}
>   	}
>
> @@ -1443,7 +1459,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>   		dma_fence_put(f);
>   	}
>
> -	return 0;
> +error_free:
> +	amdgpu_sync_free(&sync);
> +	return r;
>
>   }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 046949c4b695..1a759012ce93 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -304,8 +304,8 @@ struct amdgpu_vm_update_params {
>
>   struct amdgpu_vm_update_funcs {
>   	int (*map_table)(struct amdgpu_bo_vm *bo);
> -	int (*prepare)(struct amdgpu_vm_update_params *p, struct dma_resv *resv,
> -		       enum amdgpu_sync_mode sync_mode);
> +	int (*prepare)(struct amdgpu_vm_update_params *p,
> +		       struct amdgpu_sync *sync);
>   	int (*update)(struct amdgpu_vm_update_params *p,
>   		      struct amdgpu_bo_vm *bo, uint64_t pe, uint64_t addr,
>   		      unsigned count, uint32_t incr, uint64_t flags);
> @@ -505,9 +505,10 @@ int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
>   void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>   			    struct amdgpu_vm *vm, struct amdgpu_bo *bo);
>   int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> -			   bool immediate, bool unlocked, bool flush_tlb, bool allow_override,
> -			   struct dma_resv *resv, uint64_t start, uint64_t last,
> -			   uint64_t flags, uint64_t offset, uint64_t vram_base,
> +			   bool immediate, bool unlocked, bool flush_tlb,
> +			   bool allow_override, struct amdgpu_sync *sync,
> +			   uint64_t start, uint64_t last, uint64_t flags,
> +			   uint64_t offset, uint64_t vram_base,
>   			   struct ttm_resource *res, dma_addr_t *pages_addr,
>   			   struct dma_fence **fence);
>   int amdgpu_vm_bo_update(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
> index 3895bd7d176a..9ff59a4e6f15 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
> @@ -46,13 +46,12 @@ static int amdgpu_vm_cpu_map_table(struct amdgpu_bo_vm *table)
>    * Negativ errno, 0 for success.
>    */
>   static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p,
> -				 struct dma_resv *resv,
> -				 enum amdgpu_sync_mode sync_mode)
> +				 struct amdgpu_sync *sync)
>   {
> -	if (!resv)
> +	if (!sync)
>   		return 0;
>
> -	return amdgpu_bo_sync_wait_resv(p->adev, resv, sync_mode, p->vm, true);
> +	return amdgpu_sync_wait(sync, true);
>   }
>
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> index e39d6e7643bf..a076f43097e4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> @@ -403,7 +403,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	params.vm = vm;
>   	params.immediate = immediate;
>
> -	r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
> +	r = vm->update_funcs->prepare(&params, NULL);
>   	if (r)
>   		goto exit;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> index 9b748d7058b5..4772fba33285 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> @@ -77,32 +77,24 @@ static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p,
>    * amdgpu_vm_sdma_prepare - prepare SDMA command submission
>    *
>    * @p: see amdgpu_vm_update_params definition
> - * @resv: reservation object with embedded fence
> - * @sync_mode: synchronization mode
> + * @sync: amdgpu_sync object with fences to wait for
>    *
>    * Returns:
>    * Negativ errno, 0 for success.
>    */
>   static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
> -				  struct dma_resv *resv,
> -				  enum amdgpu_sync_mode sync_mode)
> +				  struct amdgpu_sync *sync)
>   {
> -	struct amdgpu_sync sync;
>   	int r;
>
>   	r = amdgpu_vm_sdma_alloc_job(p, 0);
>   	if (r)
>   		return r;
>
> -	if (!resv)
> +	if (!sync)
>   		return 0;
>
> -	amdgpu_sync_create(&sync);
> -	r = amdgpu_sync_resv(p->adev, &sync, resv, sync_mode, p->vm);
> -	if (!r)
> -		r = amdgpu_sync_push_to_job(&sync, p->job);
> -	amdgpu_sync_free(&sync);
> -
> +	r = amdgpu_sync_push_to_job(sync, p->job);
>   	if (r) {
>   		p->num_dw_left = 0;
>   		amdgpu_job_free(p->job);