[PATCH v2 2/6] drm/xe: Add xe_vm_pgtable_update_op to xe_vma_ops

Wed May 15 21:56:14 UTC 2024

-----Original Message-----
> From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf Of Matthew Brost
> Sent: Tuesday, May 14, 2024 5:40 PM
> To: intel-xe at lists.freedesktop.org
> Cc: Brost, Matthew <matthew.brost at intel.com>; Zeng, Oak <oak.zeng at intel.com>; Thomas Hellström <thomas.hellstrom at linux.intel.com>
> Subject: [PATCH v2 2/6] drm/xe: Add xe_vm_pgtable_update_op to xe_vma_ops
> 
> Each xe_vma_op resolves to 0-3 pt_ops. Add storage for the pt_ops to
> xe_vma_ops which is dynamically allocated based the number and types of
> xe_vma_op in the xe_vma_ops list. Allocation only implemented in this
> patch.
> 
> This will help with converting xe_vma_ops (multiple xe_vma_op) in a
> atomic update unit.
> 
> Cc: Oak Zeng <oak.zeng at intel.com>
> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>

I have a minor nit lower down, but nothing worth blocking over:
Reviewed-by: Jonathan Cavitt <jonathan.cavitt at intel.com>

> ---
>  drivers/gpu/drm/xe/xe_pt_types.h | 12 ++++++
>  drivers/gpu/drm/xe/xe_vm.c       | 66 +++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/xe/xe_vm_types.h |  8 ++++
>  3 files changed, 84 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
> index cee70cb0f014..2093150f461e 100644
> --- a/drivers/gpu/drm/xe/xe_pt_types.h
> +++ b/drivers/gpu/drm/xe/xe_pt_types.h
> @@ -74,4 +74,16 @@ struct xe_vm_pgtable_update {
>  	u32 flags;
>  };
>  
> +/** struct xe_vm_pgtable_update_op - Page table update operation */
> +struct xe_vm_pgtable_update_op {
> +	/** @entries: entries to update for this operation */
> +	struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
> +	/** @num_entries: number of entries for this update operation */
> +	u32 num_entries;
> +	/** @bind: is a bind */
> +	bool bind;
> +	/** @rebind: is a rebind */
> +	bool rebind;

I wonder if using a single "mask" variable for both bind and rebind
would be preferable instead of having separate Booleans for bind
and rebind?  Something like:

#define VM_PGTABLE_UPDATE_OP_BIND	BIT(1)
#define VM_PGTABLE_UPDATE_OP_REBIND	BIT(2)
	/** @bind_mask: Mask of bind types to perform */
	u32 bind_mask;

If you decide to take it this direction, you'd probably also want helper
functions to set, unset, and check the bind and rebind flags for the
bind_mask.  That's... six helper functions in total?

I can see why this wasn't chosen initially: the complexity of using a
mask instead of two separate Booleans rapidly gets out of hand.  But
it might be considered preferable from an upstream perspective to
use a mask instead, if only from a style perspective?  I'm not certain.

Feel free to disregard this, it's just a suggestion.
-Jonathan Cavitt

> +};
> +
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index c5b1694b292f..17b43b567bf3 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -712,6 +712,42 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm)
>  		list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
>  }
>  
> +static int xe_vma_ops_alloc(struct xe_vma_ops *vops)
> +{
> +	int i;
> +
> +	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
> +		if (!vops->pt_update_ops[i].num_ops)
> +			continue;
> +
> +		vops->pt_update_ops[i].ops =
> +			kmalloc_array(vops->pt_update_ops[i].num_ops,
> +				      sizeof(*vops->pt_update_ops[i].ops),
> +				      GFP_KERNEL);
> +		if (!vops->pt_update_ops[i].ops)
> +			return -ENOMEM;
> +	}
> +
> +	return 0;
> +}
> +
> +static void xe_vma_ops_fini(struct xe_vma_ops *vops)
> +{
> +	int i;
> +
> +	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
> +		kfree(vops->pt_update_ops[i].ops);
> +}
> +
> +static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask)
> +{
> +	int i;
> +
> +	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
> +		if (BIT(i) & tile_mask)
> +			++vops->pt_update_ops[i].num_ops;
> +}
> +
>  static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
>  				  u8 tile_mask)
>  {
> @@ -739,6 +775,7 @@ static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
>  
>  	xe_vm_populate_rebind(op, vma, tile_mask);
>  	list_add_tail(&op->link, &vops->list);
> +	xe_vma_ops_incr_pt_update_ops(vops, tile_mask);
>  
>  	return 0;
>  }
> @@ -779,6 +816,10 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
>  			goto free_ops;
>  	}
>  
> +	err = xe_vma_ops_alloc(&vops);
> +	if (err)
> +		goto free_ops;
> +
>  	fence = ops_execute(vm, &vops);
>  	if (IS_ERR(fence)) {
>  		err = PTR_ERR(fence);
> @@ -793,6 +834,7 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
>  		list_del(&op->link);
>  		kfree(op);
>  	}
> +	xe_vma_ops_fini(&vops);
>  
>  	return err;
>  }
> @@ -814,12 +856,20 @@ struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_ma
>  	if (err)
>  		return ERR_PTR(err);
>  
> +	err = xe_vma_ops_alloc(&vops);
> +	if (err) {
> +		fence = ERR_PTR(err);
> +		goto free_ops;
> +	}
> +
>  	fence = ops_execute(vm, &vops);
>  
> +free_ops:
>  	list_for_each_entry_safe(op, next_op, &vops.list, link) {
>  		list_del(&op->link);
>  		kfree(op);
>  	}
> +	xe_vma_ops_fini(&vops);
>  
>  	return fence;
>  }
> @@ -2276,7 +2326,6 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
>  	return err;
>  }
>  
> -
>  static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
>  				   struct drm_gpuva_ops *ops,
>  				   struct xe_sync_entry *syncs, u32 num_syncs,
> @@ -2328,6 +2377,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
>  				return PTR_ERR(vma);
>  
>  			op->map.vma = vma;
> +			if (op->map.immediate || !xe_vm_in_fault_mode(vm))
> +				xe_vma_ops_incr_pt_update_ops(vops,
> +							      op->tile_mask);
>  			break;
>  		}
>  		case DRM_GPUVA_OP_REMAP:
> @@ -2372,6 +2424,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
>  					vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
>  					       (ULL)op->remap.start,
>  					       (ULL)op->remap.range);
> +				} else {
> +					xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
>  				}
>  			}
>  
> @@ -2408,13 +2462,16 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
>  					vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
>  					       (ULL)op->remap.start,
>  					       (ULL)op->remap.range);
> +				} else {
> +					xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
>  				}
>  			}
> +			xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
>  			break;
>  		}
>  		case DRM_GPUVA_OP_UNMAP:
>  		case DRM_GPUVA_OP_PREFETCH:
> -			/* Nothing to do */
> +			xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
>  			break;
>  		default:
>  			drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> @@ -3261,11 +3318,16 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  		goto unwind_ops;
>  	}
>  
> +	err = xe_vma_ops_alloc(&vops);
> +	if (err)
> +		goto unwind_ops;
> +
>  	err = vm_bind_ioctl_ops_execute(vm, &vops);
>  
>  unwind_ops:
>  	if (err && err != -ENODATA)
>  		vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
> +	xe_vma_ops_fini(&vops);
>  	for (i = args->num_binds - 1; i >= 0; --i)
>  		if (ops[i])
>  			drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index ce1a63a5e3e7..211c88801182 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -21,6 +21,7 @@ struct xe_bo;
>  struct xe_sync_entry;
>  struct xe_user_fence;
>  struct xe_vm;
> +struct xe_vm_pgtable_update_op;
>  
>  #define XE_VMA_READ_ONLY	DRM_GPUVA_USERBITS
>  #define XE_VMA_DESTROYED	(DRM_GPUVA_USERBITS << 1)
> @@ -368,6 +369,13 @@ struct xe_vma_ops {
>  	struct xe_sync_entry *syncs;
>  	/** @num_syncs: number of syncs */
>  	u32 num_syncs;
> +	/** @pt_update_ops: page table update operations */
> +	struct {
> +		/** @ops: operations */
> +		struct xe_vm_pgtable_update_op *ops;
> +		/** @num_ops: number of operations */
> +		u32 num_ops;
> +	} pt_update_ops[XE_MAX_TILES_PER_DEVICE];
>  };
>  
>  #endif
> -- 
> 2.34.1
> 
>