[PATCH 3/6] drm/amdgpu: Flush VM updates for split bindings eagerly.

Tue Oct 31 13:57:05 UTC 2023

Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi:
> The current amdgpu_gem_va_update_vm only tries to perform updates for the
> BO specified in the GEM ioctl; however, when a binding is split, the
> adjacent bindings also need to be updated. Such updates currently ends up
> getting deferred until next submission which causes stalls.

Yeah, that is a necessity. The hardware simply doesn't support what you 
try to do here in all cases.

So this approach won't work in general.

Regards,
Christian.

>
> Introduce a new state "dirty", shared between per-VM BOs and traditional
> BOs, containing all BOs that have pending updates in `invalids`.
> amdgpu_gem_va_update_vm will now simply flush any pending updates for BOs
> in the dirty state.
>
> Signed-off-by: Tatsuyuki Ishi <ishitatsuyuki at gmail.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ++++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 66 ++++++++++++++++++-------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++
>   3 files changed, 63 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index a1b15d0d6c48..01d3a97248b0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -604,10 +604,9 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
>    * vital here, so they are not reported back to userspace.
>    */
>   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
> -				    struct amdgpu_vm *vm,
> -				    struct amdgpu_bo_va *bo_va,
> -				    uint32_t operation)
> +				    struct amdgpu_vm *vm)
>   {
> +	struct amdgpu_bo_va *bo_va;
>   	int r;
>   
>   	if (!amdgpu_vm_ready(vm))
> @@ -617,12 +616,18 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>   	if (r)
>   		goto error;
>   
> -	if (operation == AMDGPU_VA_OP_MAP ||
> -	    operation == AMDGPU_VA_OP_REPLACE) {
> +	spin_lock(&vm->status_lock);
> +	while (!list_empty(&vm->dirty)) {
> +		bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
> +					 base.vm_status);
> +		spin_unlock(&vm->status_lock);
> +
>   		r = amdgpu_vm_bo_update(adev, bo_va, false);
>   		if (r)
>   			goto error;
> +		spin_lock(&vm->status_lock);
>   	}
> +	spin_unlock(&vm->status_lock);
>   
>   	r = amdgpu_vm_update_pdes(adev, vm, false);
>   
> @@ -792,8 +797,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>   		break;
>   	}
>   	if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
> -		amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
> -					args->operation);
> +		amdgpu_gem_va_update_vm(adev, &fpriv->vm);
>   
>   error:
>   	drm_exec_fini(&exec);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index dd6f72e2a1d6..01d31891cd05 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -191,6 +191,21 @@ static void amdgpu_vm_bo_set_evicted(struct amdgpu_vm_bo_base *vm_bo, bool evict
>   	spin_unlock(&vm_bo->vm->status_lock);
>   }
>   
> +/**
> + * amdgpu_vm_bo_dirty - vm_bo is dirty
> + *
> + * @vm_bo: vm_bo which is dirty
> + *
> + * State for normal and per VM BOs that are not moved, but have new entries in
> + * bo_va->invalids.
> + */
> +static void amdgpu_vm_bo_dirty(struct amdgpu_vm_bo_base *vm_bo)
> +{
> +	spin_lock(&vm_bo->vm->status_lock);
> +	list_move(&vm_bo->vm_status, &vm_bo->vm->dirty);
> +	spin_unlock(&vm_bo->vm->status_lock);
> +}
> +
>   /**
>    * amdgpu_vm_bo_moved - vm_bo is moved
>    *
> @@ -1042,6 +1057,9 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
>   	list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.eviction_status)
>   		amdgpu_vm_bo_get_memory(bo_va, stats);
>   
> +	list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status)
> +		amdgpu_vm_bo_get_memory(bo_va, stats);
> +
>   	list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status)
>   		amdgpu_vm_bo_get_memory(bo_va, stats);
>   
> @@ -1411,6 +1429,17 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>   			dma_resv_unlock(resv);
>   		spin_lock(&vm->status_lock);
>   	}
> +
> +	while (!list_empty(&vm->dirty)) {
> +		bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
> +					 base.vm_status);
> +		spin_unlock(&vm->status_lock);
> +
> +		r = amdgpu_vm_bo_update(adev, bo_va, false);
> +		if (r)
> +			return r;
> +		spin_lock(&vm->status_lock);
> +	}
>   	spin_unlock(&vm->status_lock);
>   
>   	return 0;
> @@ -1476,19 +1505,16 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
>   				    struct amdgpu_bo_va_mapping *mapping)
>   {
>   	struct amdgpu_vm *vm = bo_va->base.vm;
> -	struct amdgpu_bo *bo = bo_va->base.bo;
>   
>   	mapping->bo_va = bo_va;
>   	list_add(&mapping->list, &bo_va->invalids);
>   	amdgpu_vm_it_insert(mapping, &vm->va);
> +	if (!bo_va->base.moved)
> +		amdgpu_vm_bo_dirty(&bo_va->base);
>   
>   	if (mapping->flags & AMDGPU_PTE_PRT)
>   		amdgpu_vm_prt_get(adev);
>   
> -	if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
> -	    !bo_va->base.moved) {
> -		amdgpu_vm_bo_moved(&bo_va->base);
> -	}
>   	trace_amdgpu_vm_bo_map(bo_va, mapping);
>   }
>   
> @@ -1725,6 +1751,8 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
>   			before->flags = tmp->flags;
>   			before->bo_va = tmp->bo_va;
>   			list_add(&before->list, &tmp->bo_va->invalids);
> +			if (!tmp->bo_va->base.moved)
> +				amdgpu_vm_bo_dirty(&tmp->bo_va->base);
>   		}
>   
>   		/* Remember mapping split at the end */
> @@ -1736,6 +1764,8 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
>   			after->flags = tmp->flags;
>   			after->bo_va = tmp->bo_va;
>   			list_add(&after->list, &tmp->bo_va->invalids);
> +			if (!tmp->bo_va->base.moved)
> +				amdgpu_vm_bo_dirty(&tmp->bo_va->base);
>   		}
>   
>   		list_del(&tmp->list);
> @@ -1761,30 +1791,18 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
>   
>   	/* Insert partial mapping before the range */
>   	if (!list_empty(&before->list)) {
> -		struct amdgpu_bo *bo = before->bo_va->base.bo;
> -
>   		amdgpu_vm_it_insert(before, &vm->va);
>   		if (before->flags & AMDGPU_PTE_PRT)
>   			amdgpu_vm_prt_get(adev);
> -
> -		if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
> -		    !before->bo_va->base.moved)
> -			amdgpu_vm_bo_moved(&before->bo_va->base);
>   	} else {
>   		kfree(before);
>   	}
>   
>   	/* Insert partial mapping after the range */
>   	if (!list_empty(&after->list)) {
> -		struct amdgpu_bo *bo = after->bo_va->base.bo;
> -
>   		amdgpu_vm_it_insert(after, &vm->va);
>   		if (after->flags & AMDGPU_PTE_PRT)
>   			amdgpu_vm_prt_get(adev);
> -
> -		if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
> -		    !after->bo_va->base.moved)
> -			amdgpu_vm_bo_moved(&after->bo_va->base);
>   	} else {
>   		kfree(after);
>   	}
> @@ -2136,6 +2154,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp
>   	INIT_LIST_HEAD(&vm->evicted);
>   	INIT_LIST_HEAD(&vm->relocated);
>   	INIT_LIST_HEAD(&vm->moved);
> +	INIT_LIST_HEAD(&vm->dirty);
>   	INIT_LIST_HEAD(&vm->idle);
>   	INIT_LIST_HEAD(&vm->invalidated);
>   	spin_lock_init(&vm->status_lock);
> @@ -2648,11 +2667,13 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
>   {
>   	struct amdgpu_bo_va *bo_va, *tmp;
>   	u64 total_idle = 0;
> +	u64 total_dirty = 0;
>   	u64 total_relocated = 0;
>   	u64 total_moved = 0;
>   	u64 total_invalidated = 0;
>   	u64 total_done = 0;
>   	unsigned int total_idle_objs = 0;
> +	unsigned int total_dirty_objs = 0;
>   	unsigned int total_relocated_objs = 0;
>   	unsigned int total_moved_objs = 0;
>   	unsigned int total_invalidated_objs = 0;
> @@ -2669,6 +2690,15 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
>   	total_idle_objs = id;
>   	id = 0;
>   
> +	seq_puts(m, "\tDirty BOs:\n");
> +	list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status) {
> +		if (!bo_va->base.bo)
> +			continue;
> +		total_dirty += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
> +	}
> +	total_dirty_objs = id;
> +	id = 0;
> +
>   	seq_puts(m, "\tRelocated BOs:\n");
>   	list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) {
>   		if (!bo_va->base.bo)
> @@ -2707,6 +2737,8 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
>   
>   	seq_printf(m, "\tTotal idle size:        %12lld\tobjs:\t%d\n", total_idle,
>   		   total_idle_objs);
> +	seq_printf(m, "\tTotal dirty size:       %12lld\tobjs:\t%d\n", total_dirty,
> +		   total_dirty_objs);
>   	seq_printf(m, "\tTotal relocated size:   %12lld\tobjs:\t%d\n", total_relocated,
>   		   total_relocated_objs);
>   	seq_printf(m, "\tTotal moved size:       %12lld\tobjs:\t%d\n", total_moved,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index d9ab97eabda9..f91d4fcf80b8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -276,6 +276,9 @@ struct amdgpu_vm {
>   	/* per VM BOs moved, but not yet updated in the PT */
>   	struct list_head	moved;
>   
> +	/* normal and per VM BOs that are not moved, but have new PT entries */
> +	struct list_head	dirty;
> +
>   	/* All BOs of this VM not currently in the state machine */
>   	struct list_head	idle;
>