[PATCH v2] drm/xe: Add stats for vma page faults

Mon Feb 10 16:28:09 UTC 2025

On 06/02/2025 13:45, Francois Dugast wrote:
> Add new entries in stats for vma page faults. If CONFIG_DEBUG_FS is
> enabled, the count and number of bytes can be viewed per GT in the
> stat debugfs file. This helps when testing, to confirm page faults
> have been triggered as expected. It also helps when looking at the
> performance impact of page faults. Data is simply collected when
> entering the page fault handler so there is no indication whether
> it completed successfully, with or without retries, etc.
> 
> Example output:
> 
>      cat /sys/kernel/debug/dri/0/gt0/stats
>      tlb_inval_count: 129
>      vma_pagefault_count: 12
>      vma_pagefault_bytes: 98304
> 
> v2: Rebase
> 
> Signed-off-by: Francois Dugast <francois.dugast at intel.com>
> ---
>   drivers/gpu/drm/xe/xe_gt_pagefault.c   | 10 +++++++---
>   drivers/gpu/drm/xe/xe_gt_stats.c       |  2 ++
>   drivers/gpu/drm/xe/xe_gt_stats_types.h |  2 ++
>   3 files changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> index cb92fb5cbc75..46701ca11ce0 100644
> --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> @@ -14,6 +14,7 @@
>   #include "abi/guc_actions_abi.h"
>   #include "xe_bo.h"
>   #include "xe_gt.h"
> +#include "xe_gt_stats.h"
>   #include "xe_gt_tlb_invalidation.h"
>   #include "xe_guc.h"
>   #include "xe_guc_ct.h"
> @@ -124,16 +125,20 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma,
>   	return 0;
>   }
>   
> -static int handle_vma_pagefault(struct xe_tile *tile, struct pagefault *pf,
> +static int handle_vma_pagefault(struct xe_gt *gt, struct pagefault *pf,
>   				struct xe_vma *vma)
>   {
>   	struct xe_vm *vm = xe_vma_vm(vma);
> +	struct xe_tile *tile = gt_to_tile(gt);
>   	struct drm_exec exec;
>   	struct dma_fence *fence;
>   	ktime_t end = 0;
>   	int err;
>   	bool atomic;
>   
> +	xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1);
> +	xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_BYTES, xe_vma_size(vma));

Since this can be quite a large number, and a normal workload could 
easily be many GB over some number of vma, maybe we will find that we 
hit the atomic int limit quite easily (only ~2G bytes)?

Should we switch the unit over to MB/KB and also start using 64b atomic 
if we want to count stuff like this? What do you think?

> +
>   	trace_xe_vma_pagefault(vma);
>   	atomic = access_is_atomic(pf->access_type);
>   
> @@ -202,7 +207,6 @@ static struct xe_vm *asid_to_vm(struct xe_device *xe, u32 asid)
>   static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
>   {
>   	struct xe_device *xe = gt_to_xe(gt);
> -	struct xe_tile *tile = gt_to_tile(gt);
>   	struct xe_vm *vm;
>   	struct xe_vma *vma = NULL;
>   	int err;
> @@ -231,7 +235,7 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
>   		goto unlock_vm;
>   	}
>   
> -	err = handle_vma_pagefault(tile, pf, vma);
> +	err = handle_vma_pagefault(gt, pf, vma);
>   
>   unlock_vm:
>   	if (!err)
> diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c
> index 7a6c1d808e41..2e9879ea4674 100644
> --- a/drivers/gpu/drm/xe/xe_gt_stats.c
> +++ b/drivers/gpu/drm/xe/xe_gt_stats.c
> @@ -28,6 +28,8 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr)
>   
>   static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = {
>   	"tlb_inval_count",
> +	"vma_pagefault_count",
> +	"vma_pagefault_bytes",
>   };
>   
>   /**
> diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h
> index 2fc055e39f27..b072bd80c4b9 100644
> --- a/drivers/gpu/drm/xe/xe_gt_stats_types.h
> +++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h
> @@ -8,6 +8,8 @@
>   
>   enum xe_gt_stats_id {
>   	XE_GT_STATS_ID_TLB_INVAL,
> +	XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT,
> +	XE_GT_STATS_ID_VMA_PAGEFAULT_BYTES,
>   	/* must be the last entry */
>   	__XE_GT_STATS_NUM_IDS,
>   };