[PATCH v2 02/11] drm/xe: Drop xe_gt_tlb_invalidation_wait
Nirmoy Das
nirmoy.das at linux.intel.com
Tue Jul 9 15:57:29 UTC 2024
On 7/8/2024 6:03 AM, Matthew Brost wrote:
> Having two methods to wait on GT TLB invalidations is not ideal. Remove
> xe_gt_tlb_invalidation_wait and only use GT TLB invalidation fences.
>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das at intel.com>
> ---
> drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 101 ++++++--------------
> drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 7 +-
> drivers/gpu/drm/xe/xe_vm.c | 28 +++---
> 3 files changed, 49 insertions(+), 87 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> index 92a18a0e4acd..687214d07ac7 100644
> --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> @@ -111,7 +111,6 @@ invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fe
> void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
> {
> struct xe_gt_tlb_invalidation_fence *fence, *next;
> - struct xe_guc *guc = >->uc.guc;
> int pending_seqno;
>
> /*
> @@ -134,7 +133,6 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
> else
> pending_seqno = gt->tlb_invalidation.seqno - 1;
> WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno);
> - wake_up_all(&guc->ct.wq);
>
> list_for_each_entry_safe(fence, next,
> >->tlb_invalidation.pending_fences, link)
> @@ -165,6 +163,8 @@ static int send_tlb_invalidation(struct xe_guc *guc,
> int seqno;
> int ret;
>
> + xe_gt_assert(gt, fence);
> +
> /*
> * XXX: The seqno algorithm relies on TLB invalidation being processed
> * in order which they currently are, if that changes the algorithm will
> @@ -173,10 +173,8 @@ static int send_tlb_invalidation(struct xe_guc *guc,
>
> mutex_lock(&guc->ct.lock);
> seqno = gt->tlb_invalidation.seqno;
> - if (fence) {
> - fence->seqno = seqno;
> - trace_xe_gt_tlb_invalidation_fence_send(xe, fence);
> - }
> + fence->seqno = seqno;
> + trace_xe_gt_tlb_invalidation_fence_send(xe, fence);
> action[1] = seqno;
> ret = xe_guc_ct_send_locked(&guc->ct, action, len,
> G2H_LEN_DW_TLB_INVALIDATE, 1);
> @@ -209,7 +207,6 @@ static int send_tlb_invalidation(struct xe_guc *guc,
> TLB_INVALIDATION_SEQNO_MAX;
> if (!gt->tlb_invalidation.seqno)
> gt->tlb_invalidation.seqno = 1;
> - ret = seqno;
> }
> mutex_unlock(&guc->ct.lock);
>
> @@ -223,14 +220,16 @@ static int send_tlb_invalidation(struct xe_guc *guc,
> /**
> * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC
> * @gt: graphics tile
> + * @fence: invalidation fence which will be signal on TLB invalidation
> + * completion
> *
> * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and
> - * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion.
> + * caller can use the invalidation fence to wait for completion.
> *
> - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
> - * negative error code on error.
> + * Return: 0 on success, negative error code on error
> */
> -static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
> +static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt,
> + struct xe_gt_tlb_invalidation_fence *fence)
> {
> u32 action[] = {
> XE_GUC_ACTION_TLB_INVALIDATION,
> @@ -238,7 +237,7 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
> MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
> };
>
> - return send_tlb_invalidation(>->uc.guc, NULL, action,
> + return send_tlb_invalidation(>->uc.guc, fence, action,
> ARRAY_SIZE(action));
> }
>
> @@ -257,13 +256,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
>
> if (xe_guc_ct_enabled(>->uc.guc.ct) &&
> gt->uc.guc.submission_state.enabled) {
> - int seqno;
> + struct xe_gt_tlb_invalidation_fence fence;
> + int ret;
>
> - seqno = xe_gt_tlb_invalidation_guc(gt);
> - if (seqno <= 0)
> - return seqno;
> + xe_gt_tlb_invalidation_fence_init(gt, &fence);
> + ret = xe_gt_tlb_invalidation_guc(gt, &fence);
> + if (ret < 0)
> + return ret;
>
> - xe_gt_tlb_invalidation_wait(gt, seqno);
> + xe_gt_tlb_invalidation_fence_wait(&fence);
> } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
> if (IS_SRIOV_VF(xe))
> return 0;
> @@ -290,18 +291,16 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
> *
> * @gt: graphics tile
> * @fence: invalidation fence which will be signal on TLB invalidation
> - * completion, can be NULL
> + * completion
> * @start: start address
> * @end: end address
> * @asid: address space id
> *
> * Issue a range based TLB invalidation if supported, if not fallback to a full
> - * TLB invalidation. Completion of TLB is asynchronous and caller can either use
> - * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
> - * completion.
> + * TLB invalidation. Completion of TLB is asynchronous and caller can use
> + * the invalidation fence to wait for completion.
> *
> - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
> - * negative error code on error.
> + * Return: Negative error code on error, 0 on success
> */
> int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
> struct xe_gt_tlb_invalidation_fence *fence,
> @@ -312,11 +311,11 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
> u32 action[MAX_TLB_INVALIDATION_LEN];
> int len = 0;
>
> + xe_gt_assert(gt, fence);
> +
> /* Execlists not supported */
> if (gt_to_xe(gt)->info.force_execlist) {
> - if (fence)
> - __invalidation_fence_signal(xe, fence);
> -
> + __invalidation_fence_signal(xe, fence);
> return 0;
> }
>
> @@ -382,12 +381,10 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
> * @vma: VMA to invalidate
> *
> * Issue a range based TLB invalidation if supported, if not fallback to a full
> - * TLB invalidation. Completion of TLB is asynchronous and caller can either use
> - * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
> - * completion.
> + * TLB invalidation. Completion of TLB is asynchronous and caller can use
> + * the invalidation fence to wait for completion.
> *
> - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
> - * negative error code on error.
> + * Return: Negative error code on error, 0 on success
> */
> int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> struct xe_gt_tlb_invalidation_fence *fence,
> @@ -400,43 +397,6 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> xe_vma_vm(vma)->usm.asid);
> }
>
> -/**
> - * xe_gt_tlb_invalidation_wait - Wait for TLB to complete
> - * @gt: graphics tile
> - * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation
> - *
> - * Wait for tlb_timeout_jiffies() for a TLB invalidation to complete.
> - *
> - * Return: 0 on success, -ETIME on TLB invalidation timeout
> - */
> -int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
> -{
> - struct xe_guc *guc = >->uc.guc;
> - int ret;
> -
> - /* Execlists not supported */
> - if (gt_to_xe(gt)->info.force_execlist)
> - return 0;
> -
> - /*
> - * XXX: See above, this algorithm only works if seqno are always in
> - * order
> - */
> - ret = wait_event_timeout(guc->ct.wq,
> - tlb_invalidation_seqno_past(gt, seqno),
> - tlb_timeout_jiffies(gt));
> - if (!ret) {
> - struct drm_printer p = xe_gt_err_printer(gt);
> -
> - xe_gt_err(gt, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
> - seqno, gt->tlb_invalidation.seqno_recv);
> - xe_guc_ct_print(&guc->ct, &p, true);
> - return -ETIME;
> - }
> -
> - return 0;
> -}
> -
> /**
> * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler
> * @guc: guc
> @@ -480,12 +440,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
> return 0;
> }
>
> - /*
> - * wake_up_all() and wait_event_timeout() already have the correct
> - * barriers.
> - */
> WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]);
> - wake_up_all(&guc->ct.wq);
>
> list_for_each_entry_safe(fence, next,
> >->tlb_invalidation.pending_fences, link) {
> diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> index 948f4a2f5214..cbf49b3d0265 100644
> --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> @@ -23,10 +23,15 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
> struct xe_gt_tlb_invalidation_fence *fence,
> u64 start, u64 end, u32 asid);
> -int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
> int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
>
> void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
> struct xe_gt_tlb_invalidation_fence *fence);
>
> +static inline void
> +xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence)
> +{
> + dma_fence_wait(&fence->base, false);
> +}
> +
> #endif /* _XE_GT_TLB_INVALIDATION_ */
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index cf3aea5d8cdc..478932fb7718 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -3176,8 +3176,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
> {
> struct xe_device *xe = xe_vma_vm(vma)->xe;
> struct xe_tile *tile;
> + struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE];
> u32 tile_needs_invalidate = 0;
> - int seqno[XE_MAX_TILES_PER_DEVICE];
> u8 id;
> int ret;
>
> @@ -3204,29 +3204,31 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
>
> for_each_tile(tile, xe, id) {
> if (xe_pt_zap_ptes(tile, vma)) {
> - tile_needs_invalidate |= BIT(id);
> xe_device_wmb(xe);
> + xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
> + &fence[id]);
> +
> /*
> * FIXME: We potentially need to invalidate multiple
> * GTs within the tile
> */
> - seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma);
> - if (seqno[id] < 0)
> - return seqno[id];
> - }
> - }
> -
> - for_each_tile(tile, xe, id) {
> - if (tile_needs_invalidate & BIT(id)) {
> - ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]);
> + ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
> + &fence[id], vma);
> if (ret < 0)
> - return ret;
> + goto wait;
> +
> + tile_needs_invalidate |= BIT(id);
> }
> }
>
> +wait:
> + for_each_tile(tile, xe, id)
> + if (tile_needs_invalidate & BIT(id))
> + xe_gt_tlb_invalidation_fence_wait(&fence[id]);
> +
> vma->tile_invalidated = vma->tile_mask;
>
> - return 0;
> + return ret;
> }
>
> struct xe_vm_snapshot {
More information about the Intel-xe
mailing list