[PATCH v4 14/30] drm/xe: Add xe_gt_tlb_invalidation_range and convert PT layer to use this
Matthew Brost
matthew.brost at intel.com
Tue Mar 26 18:57:08 UTC 2024
On Mon, Mar 25, 2024 at 03:35:16PM -0600, Zeng, Oak wrote:
> Hi Matt,
>
> Patch looks good to me. See one question inline
>
> > -----Original Message-----
> > From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf Of Matthew
> > Brost
> > Sent: Friday, March 8, 2024 12:08 AM
> > To: intel-xe at lists.freedesktop.org
> > Cc: Brost, Matthew <matthew.brost at intel.com>
> > Subject: [PATCH v4 14/30] drm/xe: Add xe_gt_tlb_invalidation_range and
> > convert PT layer to use this
> >
> > xe_gt_tlb_invalidation_range accepts a start and end address rather than
> > a VMA. This will enable multiple VMAs to be invalidated in a single
> > invalidation. Update the PT layer to use this new function.
> >
> > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 59 +++++++++++++++------
> > drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 3 ++
> > drivers/gpu/drm/xe/xe_pt.c | 25 ++++++---
> > 3 files changed, 65 insertions(+), 22 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> > b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> > index a3c4ffba679d..ac2bf86de39a 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> > +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> > @@ -264,11 +264,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
> > }
> >
> > /**
> > - * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
> > + * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an
> > + * address range
> > + *
> > * @gt: graphics tile
> > * @fence: invalidation fence which will be signal on TLB invalidation
> > * completion, can be NULL
> > - * @vma: VMA to invalidate
> > + * @start: start address
> > + * @end: end address
> > + * @asid: address space id
> > *
> > * Issue a range based TLB invalidation if supported, if not fallback to a full
> > * TLB invalidation. Completion of TLB is asynchronous and caller can either use
> > @@ -278,17 +282,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
> > * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
> > * negative error code on error.
> > */
> > -int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> > - struct xe_gt_tlb_invalidation_fence *fence,
> > - struct xe_vma *vma)
> > +int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
> > + struct xe_gt_tlb_invalidation_fence *fence,
> > + u64 start, u64 end, u32 asid)
> > {
> > struct xe_device *xe = gt_to_xe(gt);
> > #define MAX_TLB_INVALIDATION_LEN 7
> > u32 action[MAX_TLB_INVALIDATION_LEN];
> > int len = 0;
> >
> > - xe_gt_assert(gt, vma);
> > -
> > /* Execlists not supported */
> > if (gt_to_xe(gt)->info.force_execlist) {
> > if (fence)
> > @@ -302,8 +304,8 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> > if (!xe->info.has_range_tlb_invalidation) {
> > action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
> > } else {
> > - u64 start = xe_vma_start(vma);
> > - u64 length = xe_vma_size(vma);
> > + u64 orig_start = start;
> > + u64 length = end - start;
> > u64 align, end;
> >
> > if (length < SZ_4K)
> > @@ -316,12 +318,12 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> > * address mask covering the required range.
> > */
> > align = roundup_pow_of_two(length);
> > - start = ALIGN_DOWN(xe_vma_start(vma), align);
> > - end = ALIGN(xe_vma_end(vma), align);
> > + start = ALIGN_DOWN(start, align);
> > + end = ALIGN(end, align);
> > length = align;
> > while (start + length < end) {
> > length <<= 1;
> > - start = ALIGN_DOWN(xe_vma_start(vma), length);
> > + start = ALIGN_DOWN(orig_start, length);
> > }
> >
> > /*
> > @@ -330,16 +332,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> > */
> > if (length >= SZ_2M) {
> > length = max_t(u64, SZ_16M, length);
> > - start = ALIGN_DOWN(xe_vma_start(vma), length);
> > + start = ALIGN_DOWN(orig_start, length);
> > }
> >
> > xe_gt_assert(gt, length >= SZ_4K);
> > xe_gt_assert(gt, is_power_of_2(length));
> > - xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
> > ilog2(SZ_2M) + 1)));
> > + xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
> > + ilog2(SZ_2M) + 1)));
> > xe_gt_assert(gt, IS_ALIGNED(start, length));
> >
> > action[len++] =
> > MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
> > - action[len++] = xe_vma_vm(vma)->usm.asid;
> > + action[len++] = asid;
> > action[len++] = lower_32_bits(start);
> > action[len++] = upper_32_bits(start);
> > action[len++] = ilog2(length) - ilog2(SZ_4K);
> > @@ -350,6 +353,32 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> > return send_tlb_invalidation(>->uc.guc, fence, action, len);
> > }
> >
> > +/**
> > + * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
> > + * @gt: graphics tile
> > + * @fence: invalidation fence which will be signal on TLB invalidation
> > + * completion, can be NULL
> > + * @vma: VMA to invalidate
> > + *
> > + * Issue a range based TLB invalidation if supported, if not fallback to a full
> > + * TLB invalidation. Completion of TLB is asynchronous and caller can either use
> > + * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
> > + * completion.
> > + *
> > + * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
> > + * negative error code on error.
> > + */
> > +int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> > + struct xe_gt_tlb_invalidation_fence *fence,
> > + struct xe_vma *vma)
> > +{
> > + xe_gt_assert(gt, vma);
> > +
> > + return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma),
> > + xe_vma_end(vma),
> > + xe_vma_vm(vma)->usm.asid);
> > +}
> > +
> > /**
> > * xe_gt_tlb_invalidation_wait - Wait for TLB to complete
> > * @gt: graphics tile
> > diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> > b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> > index fbb743d80d2c..bf3bebd9f985 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> > +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> > @@ -20,6 +20,9 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt);
> > int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> > struct xe_gt_tlb_invalidation_fence *fence,
> > struct xe_vma *vma);
> > +int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
> > + struct xe_gt_tlb_invalidation_fence *fence,
> > + u64 start, u64 end, u32 asid);
> > int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
> > int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32
> > len);
> >
> > diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> > index 7f54bc3e389d..110d6917089b 100644
> > --- a/drivers/gpu/drm/xe/xe_pt.c
> > +++ b/drivers/gpu/drm/xe/xe_pt.c
> > @@ -1074,10 +1074,12 @@ static const struct xe_migrate_pt_update_ops
> > userptr_bind_ops = {
> > struct invalidation_fence {
> > struct xe_gt_tlb_invalidation_fence base;
> > struct xe_gt *gt;
> > - struct xe_vma *vma;
> > struct dma_fence *fence;
> > struct dma_fence_cb cb;
> > struct work_struct work;
> > + u64 start;
> > + u64 end;
> > + u32 asid;
>
>
> I didn't see start/end/asid is used anywhere else, except in the below fence_init function... do we really need those members?
>
Yes, see below.
> Oak
>
> > };
> >
> > static const char *
> > @@ -1120,13 +1122,14 @@ static void invalidation_fence_work_func(struct
> > work_struct *w)
> > container_of(w, struct invalidation_fence, work);
> >
> > trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
> > - xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma);
> > + xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start,
> > + ifence->end, ifence->asid);
They are used right here ^^^
Matt
> > }
> >
> > static int invalidation_fence_init(struct xe_gt *gt,
> > struct invalidation_fence *ifence,
> > struct dma_fence *fence,
> > - struct xe_vma *vma)
> > + u64 start, u64 end, u32 asid)
> > {
> > int ret;
> >
> > @@ -1144,7 +1147,9 @@ static int invalidation_fence_init(struct xe_gt *gt,
> > dma_fence_get(&ifence->base.base); /* Ref for caller */
> > ifence->fence = fence;
> > ifence->gt = gt;
> > - ifence->vma = vma;
> > + ifence->start = start;
> > + ifence->end = end;
> > + ifence->asid = asid;
> >
> > INIT_WORK(&ifence->work, invalidation_fence_work_func);
> > ret = dma_fence_add_callback(fence, &ifence->cb,
> > invalidation_fence_cb);
> > @@ -1286,8 +1291,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma
> > *vma, struct xe_exec_queue
> >
> > /* TLB invalidation must be done before signaling rebind */
> > if (ifence) {
> > - int err = invalidation_fence_init(tile->primary_gt, ifence,
> > fence,
> > - vma);
> > + int err = invalidation_fence_init(tile->primary_gt,
> > + ifence, fence,
> > + xe_vma_start(vma),
> > + xe_vma_end(vma),
> > + xe_vma_vm(vma)-
> > >usm.asid);
> > if (err) {
> > dma_fence_put(fence);
> > kfree(ifence);
> > @@ -1625,7 +1633,10 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct
> > xe_vma *vma, struct xe_exec_queu
> > dma_fence_wait(fence, false);
> >
> > /* TLB invalidation must be done before signaling unbind */
> > - err = invalidation_fence_init(tile->primary_gt, ifence, fence,
> > vma);
> > + err = invalidation_fence_init(tile->primary_gt, ifence, fence,
> > + xe_vma_start(vma),
> > + xe_vma_end(vma),
> > + xe_vma_vm(vma)->usm.asid);
> > if (err) {
> > dma_fence_put(fence);
> > kfree(ifence);
> > --
> > 2.34.1
>
More information about the Intel-xe
mailing list