[Intel-xe] [PATCH 1/3] drm/xe: Fixup unwind on VM ops errors

Tue Aug 22 23:33:48 UTC 2023

On Wed, Aug 16, 2023 at 09:31:46PM -0700, Matthew Brost wrote:
> Remap ops have 3 parts: unmap, prev, and next. The commit step can fail
> on any of these. Add a flag for each to these so the unwind is only done
> the steps that have been committed.
> 
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_vm.c       | 24 +++++++++++++++++-------
>  drivers/gpu/drm/xe/xe_vm_types.h | 10 +++++++---
>  2 files changed, 24 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 2e99f865d7ec..bd20840616ca 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -2618,18 +2618,25 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
>  	switch (op->base.op) {
>  	case DRM_GPUVA_OP_MAP:
>  		err |= xe_vm_insert_vma(vm, op->map.vma);
> +		if (!err)
> +			op->flags |= XE_VMA_OP_COMMITTED;
>  		break;
>  	case DRM_GPUVA_OP_REMAP:
>  		prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
>  				 true);
> +		op->flags |= XE_VMA_OP_COMMITTED;
>  
>  		if (op->remap.prev) {
>  			err |= xe_vm_insert_vma(vm, op->remap.prev);
> +			if (!err)
> +				op->flags |= XE_VMA_OP_PREV_COMMITTED;
>  			if (!err && op->remap.skip_prev)
>  				op->remap.prev = NULL;
>  		}
>  		if (op->remap.next) {
>  			err |= xe_vm_insert_vma(vm, op->remap.next);
> +			if (!err)
> +				op->flags |= XE_VMA_OP_NEXT_COMMITTED;
>  			if (!err && op->remap.skip_next)
>  				op->remap.next = NULL;
>  		}
> @@ -2642,15 +2649,15 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
>  		break;
>  	case DRM_GPUVA_OP_UNMAP:
>  		prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
> +		op->flags |= XE_VMA_OP_COMMITTED;
>  		break;
>  	case DRM_GPUVA_OP_PREFETCH:
> -		/* Nothing to do */
> +		op->flags |= XE_VMA_OP_COMMITTED;
>  		break;
>  	default:
>  		XE_WARN_ON("NOT POSSIBLE");
>  	}
>  
> -	op->flags |= XE_VMA_OP_COMMITTED;
>  	return err;
>  }
>  
> @@ -2855,7 +2862,8 @@ static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
>  }
>  
>  static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
> -			     bool post_commit)
> +			     bool post_commit, bool prev_post_commit,
> +			     bool next_post_commit)
>  {
>  	lockdep_assert_held_write(&vm->lock);
>  
> @@ -2882,11 +2890,11 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
>  		struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
>  
>  		if (op->remap.prev) {
> -			prep_vma_destroy(vm, op->remap.prev, post_commit);
> +			prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
>  			xe_vma_destroy_unlocked(op->remap.prev);
>  		}
>  		if (op->remap.next) {
> -			prep_vma_destroy(vm, op->remap.next, post_commit);
> +			prep_vma_destroy(vm, op->remap.next, next_post_commit);
>  			xe_vma_destroy_unlocked(op->remap.next);
>  		}
>  		down_read(&vm->userptr.notifier_lock);
> @@ -3025,7 +3033,9 @@ static int vm_bind_ioctl_ops_commit(struct xe_vm *vm,
>  
>  unwind:
>  	list_for_each_entry_reverse(op, ops_list, link)
> -		xe_vma_op_unwind(vm, op, op->flags & XE_VMA_OP_COMMITTED);
> +		xe_vma_op_unwind(vm, op, op->flags & XE_VMA_OP_COMMITTED,
> +				 op->flags & XE_VMA_OP_PREV_COMMITTED,
> +				 op->flags & XE_VMA_OP_NEXT_COMMITTED);
>  	list_for_each_entry_safe(op, next, ops_list, link)
>  		xe_vma_op_cleanup(vm, op);
>  
> @@ -3052,7 +3062,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
>  		drm_gpuva_for_each_op(__op, __ops) {
>  			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
>  
> -			xe_vma_op_unwind(vm, op, false);
> +			xe_vma_op_unwind(vm, op, false, false, false);
>  		}
>  	}
>  }
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index 3681a5ff588b..23b6e10a5080 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -377,11 +377,15 @@ struct xe_vma_op_prefetch {
>  /** enum xe_vma_op_flags - flags for VMA operation */
>  enum xe_vma_op_flags {
>  	/** @XE_VMA_OP_FIRST: first VMA operation for a set of syncs */
> -	XE_VMA_OP_FIRST		= (0x1 << 0),
> +	XE_VMA_OP_FIRST			= (0x1 << 0),
>  	/** @XE_VMA_OP_LAST: last VMA operation for a set of syncs */
> -	XE_VMA_OP_LAST		= (0x1 << 1),
> +	XE_VMA_OP_LAST			= (0x1 << 1),
>  	/** @XE_VMA_OP_COMMITTED: VMA operation committed */
> -	XE_VMA_OP_COMMITTED	= (0x1 << 2),
> +	XE_VMA_OP_COMMITTED		= (0x1 << 2),
> +	/** @XE_VMA_OP_PREV_COMMITTED: Previous VMA operation committed */
> +	XE_VMA_OP_PREV_COMMITTED	= (0x1 << 3),
> +	/** @XE_VMA_OP_NEXT_COMMITTED: Next VMA operation committed */
> +	XE_VMA_OP_NEXT_COMMITTED	= (0x1 << 4),

only small comment is that it should be good to change to BIT() macros here.

But up to you:

Reviewed-by: Rodrigo Vivi <rodrigo.vivi at intel.com>

>  };
>  
>  /** struct xe_vma_op - VMA operation */
> -- 
> 2.34.1
>