[PATCH 07/13] drm/xe: Use xe_vma_ops to implement page fault rebinds
Zeng, Oak
oak.zeng at intel.com
Fri Apr 19 14:22:29 UTC 2024
> -----Original Message-----
> From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf Of
> Matthew Brost
> Sent: Wednesday, April 10, 2024 1:41 AM
> To: intel-xe at lists.freedesktop.org
> Cc: Brost, Matthew <matthew.brost at intel.com>
> Subject: [PATCH 07/13] drm/xe: Use xe_vma_ops to implement page fault
> rebinds
>
> All page tables updates are moving to a xe_vma_ops interface to
> implement 1 job per VM bind IOCTL.
Can you explain why using xe_vma_ops interface is necessary even to bind one vma? I understand it make sense to use this interface to bind multiple vmas. See also below
Add xe_vma_rebind function which is
> implemented using xe_vma_ops interface. Use xe_vma_rebind in page
> faults
> for rebinds.
>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
> drivers/gpu/drm/xe/xe_gt_pagefault.c | 16 ++++----
> drivers/gpu/drm/xe/xe_vm.c | 57 +++++++++++++++++++++++-----
> drivers/gpu/drm/xe/xe_vm.h | 2 +
> drivers/gpu/drm/xe/xe_vm_types.h | 2 +
> 4 files changed, 58 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> index fa9e9853c53b..040dd142c49c 100644
> --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> @@ -19,7 +19,6 @@
> #include "xe_guc.h"
> #include "xe_guc_ct.h"
> #include "xe_migrate.h"
> -#include "xe_pt.h"
> #include "xe_trace.h"
> #include "xe_vm.h"
>
> @@ -204,15 +203,14 @@ static int handle_pagefault(struct xe_gt *gt, struct
> pagefault *pf)
> drm_exec_retry_on_contention(&exec);
> if (ret)
> goto unlock_dma_resv;
> - }
>
> - /* Bind VMA only to the GT that has faulted */
> - trace_xe_vma_pf_bind(vma);
> - fence = __xe_pt_bind_vma(tile, vma, xe_tile_migrate_engine(tile),
> NULL, 0,
> - vma->tile_present & BIT(tile->id));
> - if (IS_ERR(fence)) {
> - ret = PTR_ERR(fence);
> - goto unlock_dma_resv;
> + /* Bind VMA only to the GT that has faulted */
> + trace_xe_vma_pf_bind(vma);
> + fence = xe_vma_rebind(vm, vma, BIT(tile->id));
> + if (IS_ERR(fence)) {
> + ret = PTR_ERR(fence);
> + goto unlock_dma_resv;
> + }
> }
>
> /*
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 8f5b24c8f6cd..54a69fbfbb00 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -815,6 +815,7 @@ static void xe_vm_populate_rebind(struct
> xe_vma_op *op, struct xe_vma *vma,
> u8 tile_mask)
> {
> INIT_LIST_HEAD(&op->link);
> + op->tile_mask = tile_mask;
> op->base.op = DRM_GPUVA_OP_MAP;
> op->base.map.va.addr = vma->gpuva.va.addr;
> op->base.map.va.range = vma->gpuva.va.range;
> @@ -893,6 +894,33 @@ int xe_vm_rebind(struct xe_vm *vm, bool
> rebind_worker)
> return err;
> }
>
> +struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma,
> u8 tile_mask)
I try to figure out why this function is necessary. We are only binding one vma here. Why we need to create xe_vma_ops list? We are only adding one vma to this list....
Oak
> +{
> + struct dma_fence *fence = NULL;
> + struct xe_vma_ops vops;
> + struct xe_vma_op *op, *next_op;
> + int err;
> +
> + lockdep_assert_held(&vm->lock);
> + xe_vm_assert_held(vm);
> + xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
> +
> + xe_vma_ops_init(&vops);
> +
> + err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
> + if (err)
> + return ERR_PTR(err);
> +
> + fence = ops_execute(vm, &vops);
> +
> + list_for_each_entry_safe(op, next_op, &vops.list, link) {
> + list_del(&op->link);
> + kfree(op);
> + }
> +
> + return fence;
> +}
> +
> static void xe_vma_free(struct xe_vma *vma)
> {
> if (xe_vma_is_userptr(vma))
> @@ -1796,7 +1824,7 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct
> xe_exec_queue *q,
> static struct dma_fence *
> xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
> struct xe_sync_entry *syncs, u32 num_syncs,
> - bool first_op, bool last_op)
> + u8 tile_mask, bool first_op, bool last_op)
> {
> struct xe_tile *tile;
> struct dma_fence *fence;
> @@ -1804,7 +1832,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct
> xe_exec_queue *q,
> struct dma_fence_array *cf = NULL;
> struct xe_vm *vm = xe_vma_vm(vma);
> int cur_fence = 0, i;
> - int number_tiles = hweight8(vma->tile_mask);
> + int number_tiles = hweight8(tile_mask);
> int err;
> u8 id;
>
> @@ -1818,7 +1846,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct
> xe_exec_queue *q,
> }
>
> for_each_tile(tile, vm->xe, id) {
> - if (!(vma->tile_mask & BIT(id)))
> + if (!(tile_mask & BIT(id)))
> goto next;
>
> fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
> @@ -1886,7 +1914,7 @@ find_ufence_get(struct xe_sync_entry *syncs, u32
> num_syncs)
> static struct dma_fence *
> xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct
> xe_exec_queue *q,
> struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs,
> - bool immediate, bool first_op, bool last_op)
> + u8 tile_mask, bool immediate, bool first_op, bool last_op)
> {
> struct dma_fence *fence;
> struct xe_exec_queue *wait_exec_queue =
> to_wait_exec_queue(vm, q);
> @@ -1902,8 +1930,8 @@ xe_vm_bind(struct xe_vm *vm, struct xe_vma
> *vma, struct xe_exec_queue *q,
> vma->ufence = ufence ?: vma->ufence;
>
> if (immediate) {
> - fence = xe_vm_bind_vma(vma, q, syncs, num_syncs,
> first_op,
> - last_op);
> + fence = xe_vm_bind_vma(vma, q, syncs, num_syncs,
> tile_mask,
> + first_op, last_op);
> if (IS_ERR(fence))
> return fence;
> } else {
> @@ -2095,7 +2123,7 @@ xe_vm_prefetch(struct xe_vm *vm, struct xe_vma
> *vma,
>
> if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated))
> {
> return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs,
> num_syncs,
> - true, first_op, last_op);
> + vma->tile_mask, true, first_op, last_op);
> } else {
> struct dma_fence *fence =
> xe_exec_queue_last_fence_get(wait_exec_queue,
> vm);
> @@ -2408,10 +2436,15 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm
> *vm, struct xe_exec_queue *q,
> struct xe_device *xe = vm->xe;
> struct xe_vma_op *last_op = NULL;
> struct drm_gpuva_op *__op;
> + struct xe_tile *tile;
> + u8 id, tile_mask = 0;
> int err = 0;
>
> lockdep_assert_held_write(&vm->lock);
>
> + for_each_tile(tile, vm->xe, id)
> + tile_mask |= 0x1 << id;
> +
> drm_gpuva_for_each_op(__op, ops) {
> struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
> struct xe_vma *vma;
> @@ -2428,6 +2461,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm
> *vm, struct xe_exec_queue *q,
> }
>
> op->q = q;
> + op->tile_mask = tile_mask;
>
> switch (op->base.op) {
> case DRM_GPUVA_OP_MAP:
> @@ -2574,6 +2608,7 @@ static struct dma_fence *op_execute(struct xe_vm
> *vm, struct xe_vma *vma,
> fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
> op->syncs, op->num_syncs,
> op->map.immediate
> || !xe_vm_in_fault_mode(vm),
> + op->tile_mask,
> op->flags & XE_VMA_OP_FIRST,
> op->flags & XE_VMA_OP_LAST);
> break;
> @@ -2600,7 +2635,9 @@ static struct dma_fence *op_execute(struct xe_vm
> *vm, struct xe_vma *vma,
> dma_fence_put(fence);
> fence = xe_vm_bind(vm, op->remap.prev, op->q,
> xe_vma_bo(op->remap.prev), op-
> >syncs,
> - op->num_syncs, true, false,
> + op->num_syncs,
> + op->remap.prev->tile_mask, true,
> + false,
> op->flags & XE_VMA_OP_LAST
> && !next);
> op->remap.prev->gpuva.flags &=
> ~XE_VMA_LAST_REBIND;
> if (IS_ERR(fence))
> @@ -2614,8 +2651,8 @@ static struct dma_fence *op_execute(struct xe_vm
> *vm, struct xe_vma *vma,
> fence = xe_vm_bind(vm, op->remap.next, op->q,
> xe_vma_bo(op->remap.next),
> op->syncs, op->num_syncs,
> - true, false,
> - op->flags & XE_VMA_OP_LAST);
> + op->remap.next->tile_mask, true,
> + false, op->flags &
> XE_VMA_OP_LAST);
> op->remap.next->gpuva.flags &=
> ~XE_VMA_LAST_REBIND;
> if (IS_ERR(fence))
> break;
> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> index 306cd0934a19..204a4ff63f88 100644
> --- a/drivers/gpu/drm/xe/xe_vm.h
> +++ b/drivers/gpu/drm/xe/xe_vm.h
> @@ -208,6 +208,8 @@ int __xe_vm_userptr_needs_repin(struct xe_vm
> *vm);
> int xe_vm_userptr_check_repin(struct xe_vm *vm);
>
> int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
> +struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma,
> + u8 tile_mask);
>
> int xe_vm_invalidate_vma(struct xe_vma *vma);
>
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h
> b/drivers/gpu/drm/xe/xe_vm_types.h
> index 149ab892967e..e9cd6da6263a 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -343,6 +343,8 @@ struct xe_vma_op {
> struct list_head link;
> /** @flags: operation flags */
> enum xe_vma_op_flags flags;
> + /** @tile_mask: Tile mask for operation */
> + u8 tile_mask;
>
> union {
> /** @map: VMA map operation specific data */
> --
> 2.34.1
More information about the Intel-xe
mailing list