[PATCH 12/13] drm/xe: Add xe_gt_tlb_invalidation_range and convert PT layer to use this
Zeng, Oak
oak.zeng at intel.com
Fri Apr 19 16:00:37 UTC 2024
Reviewed-by: Oak Zeng <oak.zeng at intel.com>
> -----Original Message-----
> From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf Of
> Matthew Brost
> Sent: Wednesday, April 10, 2024 1:41 AM
> To: intel-xe at lists.freedesktop.org
> Cc: Brost, Matthew <matthew.brost at intel.com>
> Subject: [PATCH 12/13] drm/xe: Add xe_gt_tlb_invalidation_range and
> convert PT layer to use this
>
> xe_gt_tlb_invalidation_range accepts a start and end address rather than
> a VMA. This will enable multiple VMAs to be invalidated in a single
> invalidation. Update the PT layer to use this new function.
>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
> drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 59 +++++++++++++++------
> drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 3 ++
> drivers/gpu/drm/xe/xe_pt.c | 25 ++++++---
> 3 files changed, 65 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> index 93df2d7969b3..65409f494f59 100644
> --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> @@ -263,11 +263,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
> }
>
> /**
> - * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
> + * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an
> + * address range
> + *
> * @gt: graphics tile
> * @fence: invalidation fence which will be signal on TLB invalidation
> * completion, can be NULL
> - * @vma: VMA to invalidate
> + * @start: start address
> + * @end: end address
> + * @asid: address space id
> *
> * Issue a range based TLB invalidation if supported, if not fallback to a full
> * TLB invalidation. Completion of TLB is asynchronous and caller can either
> use
> @@ -277,17 +281,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
> * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on
> success,
> * negative error code on error.
> */
> -int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> - struct xe_gt_tlb_invalidation_fence *fence,
> - struct xe_vma *vma)
> +int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
> + struct xe_gt_tlb_invalidation_fence *fence,
> + u64 start, u64 end, u32 asid)
> {
> struct xe_device *xe = gt_to_xe(gt);
> #define MAX_TLB_INVALIDATION_LEN 7
> u32 action[MAX_TLB_INVALIDATION_LEN];
> int len = 0;
>
> - xe_gt_assert(gt, vma);
> -
> /* Execlists not supported */
> if (gt_to_xe(gt)->info.force_execlist) {
> if (fence)
> @@ -301,8 +303,8 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> if (!xe->info.has_range_tlb_invalidation) {
> action[len++] =
> MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
> } else {
> - u64 start = xe_vma_start(vma);
> - u64 length = xe_vma_size(vma);
> + u64 orig_start = start;
> + u64 length = end - start;
> u64 align, end;
>
> if (length < SZ_4K)
> @@ -315,12 +317,12 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> * address mask covering the required range.
> */
> align = roundup_pow_of_two(length);
> - start = ALIGN_DOWN(xe_vma_start(vma), align);
> - end = ALIGN(xe_vma_end(vma), align);
> + start = ALIGN_DOWN(start, align);
> + end = ALIGN(end, align);
> length = align;
> while (start + length < end) {
> length <<= 1;
> - start = ALIGN_DOWN(xe_vma_start(vma), length);
> + start = ALIGN_DOWN(orig_start, length);
> }
>
> /*
> @@ -329,16 +331,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> */
> if (length >= SZ_2M) {
> length = max_t(u64, SZ_16M, length);
> - start = ALIGN_DOWN(xe_vma_start(vma), length);
> + start = ALIGN_DOWN(orig_start, length);
> }
>
> xe_gt_assert(gt, length >= SZ_4K);
> xe_gt_assert(gt, is_power_of_2(length));
> - xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
> ilog2(SZ_2M) + 1)));
> + xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
> + ilog2(SZ_2M) + 1)));
> xe_gt_assert(gt, IS_ALIGNED(start, length));
>
> action[len++] =
> MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
> - action[len++] = xe_vma_vm(vma)->usm.asid;
> + action[len++] = asid;
> action[len++] = lower_32_bits(start);
> action[len++] = upper_32_bits(start);
> action[len++] = ilog2(length) - ilog2(SZ_4K);
> @@ -349,6 +352,32 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> return send_tlb_invalidation(>->uc.guc, fence, action, len);
> }
>
> +/**
> + * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a
> VMA
> + * @gt: graphics tile
> + * @fence: invalidation fence which will be signal on TLB invalidation
> + * completion, can be NULL
> + * @vma: VMA to invalidate
> + *
> + * Issue a range based TLB invalidation if supported, if not fallback to a full
> + * TLB invalidation. Completion of TLB is asynchronous and caller can either
> use
> + * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
> + * completion.
> + *
> + * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on
> success,
> + * negative error code on error.
> + */
> +int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> + struct xe_gt_tlb_invalidation_fence *fence,
> + struct xe_vma *vma)
> +{
> + xe_gt_assert(gt, vma);
> +
> + return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma),
> + xe_vma_end(vma),
> + xe_vma_vm(vma)->usm.asid);
> +}
> +
> /**
> * xe_gt_tlb_invalidation_wait - Wait for TLB to complete
> * @gt: graphics tile
> diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> index fbb743d80d2c..bf3bebd9f985 100644
> --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> @@ -20,6 +20,9 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt);
> int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> struct xe_gt_tlb_invalidation_fence *fence,
> struct xe_vma *vma);
> +int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
> + struct xe_gt_tlb_invalidation_fence *fence,
> + u64 start, u64 end, u32 asid);
> int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
> int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg,
> u32 len);
>
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index 5b7930f46cf3..8d3765d3351e 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -1075,10 +1075,12 @@ static const struct xe_migrate_pt_update_ops
> userptr_bind_ops = {
> struct invalidation_fence {
> struct xe_gt_tlb_invalidation_fence base;
> struct xe_gt *gt;
> - struct xe_vma *vma;
> struct dma_fence *fence;
> struct dma_fence_cb cb;
> struct work_struct work;
> + u64 start;
> + u64 end;
> + u32 asid;
> };
>
> static const char *
> @@ -1121,13 +1123,14 @@ static void invalidation_fence_work_func(struct
> work_struct *w)
> container_of(w, struct invalidation_fence, work);
>
> trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
> - xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence-
> >vma);
> + xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence-
> >start,
> + ifence->end, ifence->asid);
> }
>
> static int invalidation_fence_init(struct xe_gt *gt,
> struct invalidation_fence *ifence,
> struct dma_fence *fence,
> - struct xe_vma *vma)
> + u64 start, u64 end, u32 asid)
> {
> int ret;
>
> @@ -1144,7 +1147,9 @@ static int invalidation_fence_init(struct xe_gt *gt,
> dma_fence_get(&ifence->base.base); /* Ref for caller */
> ifence->fence = fence;
> ifence->gt = gt;
> - ifence->vma = vma;
> + ifence->start = start;
> + ifence->end = end;
> + ifence->asid = asid;
>
> INIT_WORK(&ifence->work, invalidation_fence_work_func);
> ret = dma_fence_add_callback(fence, &ifence->cb,
> invalidation_fence_cb);
> @@ -1295,8 +1300,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct
> xe_vma *vma, struct xe_exec_queue
>
> /* TLB invalidation must be done before signaling rebind */
> if (ifence) {
> - int err = invalidation_fence_init(tile->primary_gt,
> ifence, fence,
> - vma);
> + int err = invalidation_fence_init(tile->primary_gt,
> + ifence, fence,
> + xe_vma_start(vma),
> + xe_vma_end(vma),
> + xe_vma_vm(vma)-
> >usm.asid);
> if (err) {
> dma_fence_put(fence);
> kfree(ifence);
> @@ -1641,7 +1649,10 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct
> xe_vma *vma, struct xe_exec_queu
> dma_fence_wait(fence, false);
>
> /* TLB invalidation must be done before signaling unbind */
> - err = invalidation_fence_init(tile->primary_gt, ifence, fence,
> vma);
> + err = invalidation_fence_init(tile->primary_gt, ifence, fence,
> + xe_vma_start(vma),
> + xe_vma_end(vma),
> + xe_vma_vm(vma)->usm.asid);
> if (err) {
> dma_fence_put(fence);
> kfree(ifence);
> --
> 2.34.1
More information about the Intel-xe
mailing list