[PATCH v4 3/4] drm/xe: Hold a PM ref when GT TLB invalidations are inflight

Matthew Auld matthew.auld at intel.com
Fri Jul 19 07:50:02 UTC 2024


On 18/07/2024 19:55, Matthew Brost wrote:
> Avoid GT TLB invalidation timeouts by holding a PM ref when
> invalidations are inflight.
> 
> v2:
>   - Drop PM ref before signaling fence (CI)
> 
> Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Cc: Nirmoy Das <nirmoy.das at intel.com>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> Reviewed-by: Nirmoy Das <nirmoy.das at intel.com>
> ---
>   drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c   | 62 ++++++++++++-------
>   drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h   |  1 +
>   .../gpu/drm/xe/xe_gt_tlb_invalidation_types.h |  4 ++
>   drivers/gpu/drm/xe/xe_vm.c                    |  4 +-
>   4 files changed, 47 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> index 668c1a3f06ac..481d83d07367 100644
> --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> @@ -13,6 +13,7 @@
>   #include "xe_guc.h"
>   #include "xe_guc_ct.h"
>   #include "xe_mmio.h"
> +#include "xe_pm.h"
>   #include "xe_sriov.h"
>   #include "xe_trace.h"
>   #include "regs/xe_guc_regs.h"
> @@ -35,6 +36,24 @@ static long tlb_timeout_jiffies(struct xe_gt *gt)
>   	return hw_tlb_timeout + 2 * delay;
>   }
>   
> +static void
> +__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
> +{
> +	bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags);
> +
> +	trace_xe_gt_tlb_invalidation_fence_signal(xe, fence);
> +	xe_gt_tlb_invalidation_fence_fini(fence);
> +	dma_fence_signal(&fence->base);
> +	if (!stack)
> +		dma_fence_put(&fence->base);
> +}
> +
> +static void
> +invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
> +{
> +	list_del(&fence->link);
> +	__invalidation_fence_signal(xe, fence);
> +}
>   
>   static void xe_gt_tlb_fence_timeout(struct work_struct *work)
>   {
> @@ -56,10 +75,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
>   		xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d",
>   			  fence->seqno, gt->tlb_invalidation.seqno_recv);
>   
> -		list_del(&fence->link);
>   		fence->base.error = -ETIME;
> -		dma_fence_signal(&fence->base);
> -		dma_fence_put(&fence->base);
> +		invalidation_fence_signal(xe, fence);

Should this be moved to the previous patch, which introduces the fence 
on stack?


More information about the Intel-xe mailing list