[Intel-xe] [PATCH v2 18/31] drm/xe: Avoid doing rebinds

Rodrigo Vivi rodrigo.vivi at intel.com
Tue May 9 14:48:38 UTC 2023


On Mon, May 01, 2023 at 05:17:14PM -0700, Matthew Brost wrote:
> If we dont change page sizes we can avoid doing rebinds rather just do a
> partial unbind. The algorithm to determine is page size is greedy as we

There's something off in this phrase...      ^ around here...
maybe s/is/its/ ?

But about the rebinds and remaps I was not able to follow the changes
below... probably if this patch was in a smaller series or if the code
for the remap that this is based on was already merged that could be
easier... or maybe someone with more deep knowledge in this area like
Thomas would be the best one to review this.

> assume all pages in the removed VMA are the largest page used in the
> VMA.
> 
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_pt.c       |  4 ++
>  drivers/gpu/drm/xe/xe_vm.c       | 71 +++++++++++++++++++++++++-------
>  drivers/gpu/drm/xe/xe_vm_types.h | 17 ++++----
>  3 files changed, 67 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index b4edb751bfbb..010f44260cda 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -412,6 +412,8 @@ struct xe_pt_stage_bind_walk {
>  	/* Input parameters for the walk */
>  	/** @vm: The vm we're building for. */
>  	struct xe_vm *vm;
> +	/** @vma: The vma we are binding for. */
> +	struct xe_vma *vma;
>  	/** @gt: The gt we're building for. */
>  	struct xe_gt *gt;
>  	/** @cache: Desired cache level for the ptes */
> @@ -688,6 +690,7 @@ xe_pt_stage_bind_entry(struct drm_pt *parent, pgoff_t offset,
>  		if (!null)
>  			xe_res_next(curs, next - addr);
>  		xe_walk->va_curs_start = next;
> +		xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level);
>  		*action = ACTION_CONTINUE;
>  
>  		return ret;
> @@ -776,6 +779,7 @@ xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma,
>  			.max_level = XE_PT_HIGHEST_LEVEL,
>  		},
>  		.vm = xe_vma_vm(vma),
> +		.vma = vma,
>  		.gt = gt,
>  		.curs = &curs,
>  		.va_curs_start = xe_vma_start(vma),
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index a46f44ab2546..e0ed7201aeb0 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -2276,6 +2276,16 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
>  	return vma;
>  }
>  
> +static u64 xe_vma_max_pte_size(struct xe_vma *vma)
> +{
> +	if (vma->gpuva.flags & XE_VMA_PTE_1G)
> +		return SZ_1G;
> +	else if (vma->gpuva.flags & XE_VMA_PTE_2M)
> +		return SZ_2M;
> +
> +	return SZ_4K;
> +}
> +
>  /*
>   * Parse operations list and create any resources needed for the operations
>   * prior to fully commiting to the operations. This setp can fail.
> @@ -2352,6 +2362,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
>  				break;
>  			}
>  			case DRM_GPUVA_OP_REMAP:
> +			{
> +				struct xe_vma *old =
> +					gpuva_to_vma(op->base.remap.unmap->va);
> +
> +				op->remap.start = xe_vma_start(old);
> +				op->remap.range = xe_vma_size(old);
> +
>  				if (op->base.remap.prev) {
>  					struct xe_vma *vma;
>  					bool read_only =
> @@ -2370,6 +2387,20 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
>  					}
>  
>  					op->remap.prev = vma;
> +
> +					/*
> +					 * XXX: Not sure why userptr doesn't
> +					 * work but really shouldn't be a use
> +					 * case.
> +					 */
> +					op->remap.skip_prev = !xe_vma_is_userptr(old) &&
> +						IS_ALIGNED(xe_vma_end(vma), xe_vma_max_pte_size(old));
> +					if (op->remap.skip_prev) {
> +						op->remap.range -=
> +							xe_vma_end(vma) -
> +							xe_vma_start(old);
> +						op->remap.start = xe_vma_end(vma);
> +					}
>  				}
>  
>  				if (op->base.remap.next) {
> @@ -2391,20 +2422,16 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
>  					}
>  
>  					op->remap.next = vma;
> +					op->remap.skip_next = !xe_vma_is_userptr(old) &&
> +						IS_ALIGNED(xe_vma_start(vma), xe_vma_max_pte_size(old));
> +					if (op->remap.skip_next)
> +						op->remap.range -=
> +							xe_vma_end(old) -
> +							xe_vma_start(vma);
>  				}
> -
> -				/* XXX: Support no doing remaps */
> -				op->remap.start =
> -					xe_vma_start(gpuva_to_vma(op->base.remap.unmap->va));
> -				op->remap.range =
> -					xe_vma_size(gpuva_to_vma(op->base.remap.unmap->va));
>  				break;
> +			}
>  			case DRM_GPUVA_OP_UNMAP:
> -				op->unmap.start =
> -					xe_vma_start(gpuva_to_vma(op->base.unmap.va));
> -				op->unmap.range =
> -					xe_vma_size(gpuva_to_vma(op->base.unmap.va));
> -				break;
>  			case DRM_GPUVA_OP_PREFETCH:
>  				/* Nothing to do */
>  				break;
> @@ -2445,10 +2472,23 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
>  	case DRM_GPUVA_OP_REMAP:
>  		prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
>  				 true);
> -		if (op->remap.prev)
> +
> +		if (op->remap.prev) {
>  			err |= xe_vm_insert_vma(vm, op->remap.prev);
> -		if (op->remap.next)
> +			if (!err && op->remap.skip_prev)
> +				op->remap.prev = NULL;
> +		}
> +		if (op->remap.next) {
>  			err |= xe_vm_insert_vma(vm, op->remap.next);
> +			if (!err && op->remap.skip_next)
> +				op->remap.next = NULL;
> +		}
> +
> +		/* Adjust for partial unbind after removin VMA from VM */
> +		if (!err) {
> +			op->base.remap.unmap->va->va.addr = op->remap.start;
> +			op->base.remap.unmap->va->va.range = op->remap.range;
> +		}
>  		break;
>  	case DRM_GPUVA_OP_UNMAP:
>  		prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
> @@ -2518,9 +2558,10 @@ static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
>  		bool next = !!op->remap.next;
>  
>  		if (!op->remap.unmap_done) {
> -			vm->async_ops.munmap_rebind_inflight = true;
> -			if (prev || next)
> +			if (prev || next) {
> +				vm->async_ops.munmap_rebind_inflight = true;
>  				vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
> +			}
>  			err = xe_vm_unbind(vm, vma, op->engine, op->syncs,
>  					   op->num_syncs,
>  					   !prev && !next ? op->fence : NULL,
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index b61007b70502..d55ec8156caa 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -30,6 +30,9 @@ struct xe_vm;
>  #define XE_VMA_FIRST_REBIND	(DRM_GPUVA_USERBITS << 3)
>  #define XE_VMA_LAST_REBIND	(DRM_GPUVA_USERBITS << 4)
>  #define XE_VMA_NULL		(DRM_GPUVA_USERBITS << 5)
> +#define XE_VMA_PTE_4K		(DRM_GPUVA_USERBITS << 6)
> +#define XE_VMA_PTE_2M		(DRM_GPUVA_USERBITS << 7)
> +#define XE_VMA_PTE_1G		(DRM_GPUVA_USERBITS << 8)
>  
>  struct xe_vma {
>  	/** @gpuva: Base GPUVA object */
> @@ -320,14 +323,6 @@ struct xe_vma_op_map {
>  	bool null;
>  };
>  
> -/** struct xe_vma_op_unmap - VMA unmap operation */
> -struct xe_vma_op_unmap {
> -	/** @start: start of the VMA unmap */
> -	u64 start;
> -	/** @range: range of the VMA unmap */
> -	u64 range;
> -};
> -
>  /** struct xe_vma_op_remap - VMA remap operation */
>  struct xe_vma_op_remap {
>  	/** @prev: VMA preceding part of a split mapping */
> @@ -338,6 +333,10 @@ struct xe_vma_op_remap {
>  	u64 start;
>  	/** @range: range of the VMA unmap */
>  	u64 range;
> +	/** @skip_prev: skip prev rebind */
> +	bool skip_prev;
> +	/** @skip_next: skip next rebind */
> +	bool skip_next;
>  	/** @unmap_done: unmap operation in done */
>  	bool unmap_done;
>  };
> @@ -395,8 +394,6 @@ struct xe_vma_op {
>  	union {
>  		/** @map: VMA map operation specific data */
>  		struct xe_vma_op_map map;
> -		/** @unmap: VMA unmap operation specific data */
> -		struct xe_vma_op_unmap unmap;
>  		/** @remap: VMA remap operation specific data */
>  		struct xe_vma_op_remap remap;
>  		/** @prefetch: VMA prefetch operation specific data */
> -- 
> 2.34.1
> 


More information about the Intel-xe mailing list