[PATCH v2 2/6] drm/xe: Add xe_vm_pgtable_update_op to xe_vma_ops
Cavitt, Jonathan
jonathan.cavitt at intel.com
Wed May 15 21:56:14 UTC 2024
-----Original Message-----
> From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf Of Matthew Brost
> Sent: Tuesday, May 14, 2024 5:40 PM
> To: intel-xe at lists.freedesktop.org
> Cc: Brost, Matthew <matthew.brost at intel.com>; Zeng, Oak <oak.zeng at intel.com>; Thomas Hellström <thomas.hellstrom at linux.intel.com>
> Subject: [PATCH v2 2/6] drm/xe: Add xe_vm_pgtable_update_op to xe_vma_ops
>
> Each xe_vma_op resolves to 0-3 pt_ops. Add storage for the pt_ops to
> xe_vma_ops which is dynamically allocated based the number and types of
> xe_vma_op in the xe_vma_ops list. Allocation only implemented in this
> patch.
>
> This will help with converting xe_vma_ops (multiple xe_vma_op) in a
> atomic update unit.
>
> Cc: Oak Zeng <oak.zeng at intel.com>
> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
I have a minor nit lower down, but nothing worth blocking over:
Reviewed-by: Jonathan Cavitt <jonathan.cavitt at intel.com>
> ---
> drivers/gpu/drm/xe/xe_pt_types.h | 12 ++++++
> drivers/gpu/drm/xe/xe_vm.c | 66 +++++++++++++++++++++++++++++++-
> drivers/gpu/drm/xe/xe_vm_types.h | 8 ++++
> 3 files changed, 84 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
> index cee70cb0f014..2093150f461e 100644
> --- a/drivers/gpu/drm/xe/xe_pt_types.h
> +++ b/drivers/gpu/drm/xe/xe_pt_types.h
> @@ -74,4 +74,16 @@ struct xe_vm_pgtable_update {
> u32 flags;
> };
>
> +/** struct xe_vm_pgtable_update_op - Page table update operation */
> +struct xe_vm_pgtable_update_op {
> + /** @entries: entries to update for this operation */
> + struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
> + /** @num_entries: number of entries for this update operation */
> + u32 num_entries;
> + /** @bind: is a bind */
> + bool bind;
> + /** @rebind: is a rebind */
> + bool rebind;
I wonder if using a single "mask" variable for both bind and rebind
would be preferable instead of having separate Booleans for bind
and rebind? Something like:
#define VM_PGTABLE_UPDATE_OP_BIND BIT(1)
#define VM_PGTABLE_UPDATE_OP_REBIND BIT(2)
/** @bind_mask: Mask of bind types to perform */
u32 bind_mask;
If you decide to take it this direction, you'd probably also want helper
functions to set, unset, and check the bind and rebind flags for the
bind_mask. That's... six helper functions in total?
I can see why this wasn't chosen initially: the complexity of using a
mask instead of two separate Booleans rapidly gets out of hand. But
it might be considered preferable from an upstream perspective to
use a mask instead, if only from a style perspective? I'm not certain.
Feel free to disregard this, it's just a suggestion.
-Jonathan Cavitt
> +};
> +
> #endif
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index c5b1694b292f..17b43b567bf3 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -712,6 +712,42 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm)
> list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
> }
>
> +static int xe_vma_ops_alloc(struct xe_vma_ops *vops)
> +{
> + int i;
> +
> + for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
> + if (!vops->pt_update_ops[i].num_ops)
> + continue;
> +
> + vops->pt_update_ops[i].ops =
> + kmalloc_array(vops->pt_update_ops[i].num_ops,
> + sizeof(*vops->pt_update_ops[i].ops),
> + GFP_KERNEL);
> + if (!vops->pt_update_ops[i].ops)
> + return -ENOMEM;
> + }
> +
> + return 0;
> +}
> +
> +static void xe_vma_ops_fini(struct xe_vma_ops *vops)
> +{
> + int i;
> +
> + for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
> + kfree(vops->pt_update_ops[i].ops);
> +}
> +
> +static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask)
> +{
> + int i;
> +
> + for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
> + if (BIT(i) & tile_mask)
> + ++vops->pt_update_ops[i].num_ops;
> +}
> +
> static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
> u8 tile_mask)
> {
> @@ -739,6 +775,7 @@ static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
>
> xe_vm_populate_rebind(op, vma, tile_mask);
> list_add_tail(&op->link, &vops->list);
> + xe_vma_ops_incr_pt_update_ops(vops, tile_mask);
>
> return 0;
> }
> @@ -779,6 +816,10 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
> goto free_ops;
> }
>
> + err = xe_vma_ops_alloc(&vops);
> + if (err)
> + goto free_ops;
> +
> fence = ops_execute(vm, &vops);
> if (IS_ERR(fence)) {
> err = PTR_ERR(fence);
> @@ -793,6 +834,7 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
> list_del(&op->link);
> kfree(op);
> }
> + xe_vma_ops_fini(&vops);
>
> return err;
> }
> @@ -814,12 +856,20 @@ struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_ma
> if (err)
> return ERR_PTR(err);
>
> + err = xe_vma_ops_alloc(&vops);
> + if (err) {
> + fence = ERR_PTR(err);
> + goto free_ops;
> + }
> +
> fence = ops_execute(vm, &vops);
>
> +free_ops:
> list_for_each_entry_safe(op, next_op, &vops.list, link) {
> list_del(&op->link);
> kfree(op);
> }
> + xe_vma_ops_fini(&vops);
>
> return fence;
> }
> @@ -2276,7 +2326,6 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
> return err;
> }
>
> -
> static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
> struct drm_gpuva_ops *ops,
> struct xe_sync_entry *syncs, u32 num_syncs,
> @@ -2328,6 +2377,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
> return PTR_ERR(vma);
>
> op->map.vma = vma;
> + if (op->map.immediate || !xe_vm_in_fault_mode(vm))
> + xe_vma_ops_incr_pt_update_ops(vops,
> + op->tile_mask);
> break;
> }
> case DRM_GPUVA_OP_REMAP:
> @@ -2372,6 +2424,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
> vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
> (ULL)op->remap.start,
> (ULL)op->remap.range);
> + } else {
> + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
> }
> }
>
> @@ -2408,13 +2462,16 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
> vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
> (ULL)op->remap.start,
> (ULL)op->remap.range);
> + } else {
> + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
> }
> }
> + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
> break;
> }
> case DRM_GPUVA_OP_UNMAP:
> case DRM_GPUVA_OP_PREFETCH:
> - /* Nothing to do */
> + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
> break;
> default:
> drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> @@ -3261,11 +3318,16 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> goto unwind_ops;
> }
>
> + err = xe_vma_ops_alloc(&vops);
> + if (err)
> + goto unwind_ops;
> +
> err = vm_bind_ioctl_ops_execute(vm, &vops);
>
> unwind_ops:
> if (err && err != -ENODATA)
> vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
> + xe_vma_ops_fini(&vops);
> for (i = args->num_binds - 1; i >= 0; --i)
> if (ops[i])
> drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index ce1a63a5e3e7..211c88801182 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -21,6 +21,7 @@ struct xe_bo;
> struct xe_sync_entry;
> struct xe_user_fence;
> struct xe_vm;
> +struct xe_vm_pgtable_update_op;
>
> #define XE_VMA_READ_ONLY DRM_GPUVA_USERBITS
> #define XE_VMA_DESTROYED (DRM_GPUVA_USERBITS << 1)
> @@ -368,6 +369,13 @@ struct xe_vma_ops {
> struct xe_sync_entry *syncs;
> /** @num_syncs: number of syncs */
> u32 num_syncs;
> + /** @pt_update_ops: page table update operations */
> + struct {
> + /** @ops: operations */
> + struct xe_vm_pgtable_update_op *ops;
> + /** @num_ops: number of operations */
> + u32 num_ops;
> + } pt_update_ops[XE_MAX_TILES_PER_DEVICE];
> };
>
> #endif
> --
> 2.34.1
>
>
More information about the Intel-xe
mailing list