[PATCH 2/3] drm/xe/pf: Invalidate LMTT during LMEM unprovisioning
Matthew Brost
matthew.brost at intel.com
Thu Jul 3 17:31:13 UTC 2025
On Thu, Jul 03, 2025 at 12:30:40AM +0200, Michal Wajdeczko wrote:
> Invalidate LMTT immediately after removing VF's LMTT page tables
> and clearing root PTE in the LMTT PD to avoid any invalid access
> by the hardware (and VF) due to stale data.
>
> Signed-off-by: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Michał Winiarski <michal.winiarski at intel.com>
> Cc: Piotr Piórkowski <piotr.piorkowski at intel.com>
> ---
> drivers/gpu/drm/xe/xe_device.h | 4 ++
> drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 35 +++++++++++++
> drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 2 +
> drivers/gpu/drm/xe/xe_lmtt.c | 56 +++++++++++++++++++++
> drivers/gpu/drm/xe/xe_lmtt.h | 1 +
> 5 files changed, 98 insertions(+)
>
> diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
> index e4da797a984b..a7acd899aa76 100644
> --- a/drivers/gpu/drm/xe/xe_device.h
> +++ b/drivers/gpu/drm/xe/xe_device.h
> @@ -130,6 +130,10 @@ static inline bool xe_device_uc_enabled(struct xe_device *xe)
> for ((id__) = 1; (id__) < (xe__)->info.tile_count; (id__)++) \
> for_each_if((tile__) = &(xe__)->tiles[(id__)])
>
> +#define for_each_gt_on_tile(gt__, tile__, id__) \
> + for ((id__) = 0; (id__) < XE_MAX_GT_PER_TILE; (id__)++) \
> + for_each_if((gt__) = xe_tile_get_gt((tile__), (id__)))
> +
> /*
> * FIXME: This only works for now since multi-tile and standalone media
> * happen to be mutually exclusive. Future platforms may change this...
> diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> index 6088df8e159c..4fdd5b300265 100644
> --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> @@ -330,6 +330,41 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
> return 0;
> }
>
> +static int send_tlb_invalidation_all(struct xe_gt *gt,
> + struct xe_gt_tlb_invalidation_fence *fence)
> +{
> + u32 action[] = {
> + XE_GUC_ACTION_TLB_INVALIDATION_ALL,
> + 0, /* seqno, replaced in send_tlb_invalidation */
> + MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL),
> + };
> +
> + return send_tlb_invalidation(>->uc.guc, fence, action, ARRAY_SIZE(action));
> +}
> +
> +/**
> + * xe_gt_tlb_invalidation_all_async - Invalidate all TLBs across PF and all VFs.
> + * @gt: the &xe_gt structure
> + * @fence: the &xe_gt_tlb_invalidation_fence to be signaled on completion
> + *
> + * Send a request to invalidate all TLBs across PF and all VFs.
> + *
> + * Return: 0 on success, negative error code on error
> + */
> +int xe_gt_tlb_invalidation_all_async(struct xe_gt *gt,
> + struct xe_gt_tlb_invalidation_fence *fence)
I'd drop _async part of the naming as I think it is implied with the
fence argument that is this async, like xe_gt_tlb_invalidation_range.
All the changes around GT TLB invalidations look correct (for now, this
whole layer needs rework).
Matt
> +{
> + int err;
> +
> + xe_gt_assert(gt, gt == fence->gt);
> +
> + err = send_tlb_invalidation_all(gt, fence);
> + if (err)
> + xe_gt_err(gt, "TLB invalidation request failed (%pe)", ERR_PTR(err));
> +
> + return err;
> +}
> +
> /*
> * Ensure that roundup_pow_of_two(length) doesn't overflow.
> * Note that roundup_pow_of_two() operates on unsigned long,
> diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> index 31072dbcad8e..40648f952aee 100644
> --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> @@ -20,6 +20,8 @@ int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt);
> void xe_gt_tlb_invalidation_reset(struct xe_gt *gt);
> int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt);
> void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm);
> +int xe_gt_tlb_invalidation_all_async(struct xe_gt *gt,
> + struct xe_gt_tlb_invalidation_fence *fence);
> int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
> struct xe_gt_tlb_invalidation_fence *fence,
> u64 start, u64 end, u32 asid);
> diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
> index b56437a816e4..aa51c06e2416 100644
> --- a/drivers/gpu/drm/xe/xe_lmtt.c
> +++ b/drivers/gpu/drm/xe/xe_lmtt.c
> @@ -11,6 +11,7 @@
>
> #include "xe_assert.h"
> #include "xe_bo.h"
> +#include "xe_gt_tlb_invalidation.h"
> #include "xe_lmtt.h"
> #include "xe_map.h"
> #include "xe_mmio.h"
> @@ -216,6 +217,60 @@ void xe_lmtt_init_hw(struct xe_lmtt *lmtt)
> lmtt_setup_dir_ptr(lmtt);
> }
>
> +static int lmtt_invalidate_hw(struct xe_lmtt *lmtt)
> +{
> + struct xe_gt_tlb_invalidation_fence fences[XE_MAX_GT_PER_TILE];
> + struct xe_gt_tlb_invalidation_fence *fence = fences;
> + struct xe_tile *tile = lmtt_to_tile(lmtt);
> + struct xe_gt *gt;
> + int num_fences;
> + int result = 0;
> + int err;
> + u8 id;
> +
> + for_each_gt_on_tile(gt, tile, id) {
> + xe_gt_tlb_invalidation_fence_init(gt, fence, true);
> + err = xe_gt_tlb_invalidation_all_async(gt, fence);
> + result = result ?: err;
> + fence++;
> + }
> +
> + num_fences = fence - fences;
> + lmtt_debug(lmtt, "num_fences=%d err=%d\n", num_fences, result);
> +
> + /*
> + * It is fine to wait for all fences, even for those which covers the
> + * invalidation request that failed, as such fence should be already
> + * marked as signaled.
> + */
> + fence = fences;
> + for_each_gt_on_tile(gt, tile, id)
> + xe_gt_tlb_invalidation_fence_wait(fence++);
> +
> + return result;
> +}
> +
> +/**
> + * xe_lmtt_invalidate_hw - Invalidate LMTT hardware.
> + * @lmtt: the &xe_lmtt to invalidate
> + *
> + * Send requests to all GuCs on this tile to invalidate all TLBs.
> + *
> + * This function should be called only when running as a PF driver.
> + */
> +void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt)
> +{
> + struct xe_device *xe = lmtt_to_xe(lmtt);
> + int err;
> +
> + lmtt_assert(lmtt, IS_SRIOV_PF(xe));
> +
> + err = lmtt_invalidate_hw(lmtt);
> + if (err)
> + xe_sriov_warn(xe, "LMTT%u invalidation failed (%pe)",
> + lmtt_to_tile(lmtt)->id, ERR_PTR(err));
> +}
> +
> static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt,
> u64 pte, unsigned int idx)
> {
> @@ -265,6 +320,7 @@ static void lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid)
> return;
>
> lmtt_write_pte(lmtt, pd, LMTT_PTE_INVALID, vfid);
> + lmtt_invalidate_hw(lmtt);
>
> lmtt_assert(lmtt, pd->level > 0);
> lmtt_assert(lmtt, pt->level == pd->level - 1);
> diff --git a/drivers/gpu/drm/xe/xe_lmtt.h b/drivers/gpu/drm/xe/xe_lmtt.h
> index cb10ef994db6..75a234fbf367 100644
> --- a/drivers/gpu/drm/xe/xe_lmtt.h
> +++ b/drivers/gpu/drm/xe/xe_lmtt.h
> @@ -15,6 +15,7 @@ struct xe_lmtt_ops;
> #ifdef CONFIG_PCI_IOV
> int xe_lmtt_init(struct xe_lmtt *lmtt);
> void xe_lmtt_init_hw(struct xe_lmtt *lmtt);
> +void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt);
> int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range);
> int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset);
> void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid);
> --
> 2.47.1
>
More information about the Intel-xe
mailing list