[Intel-xe] [PATCH v3] drm/xe: Fix pagefault and access counter worker functions

Fri Nov 3 02:46:47 UTC 2023

Hi,

Can someone explain why we introduced an circular buffer for page fault handling in xekmd?

I915 page fault is also handled in a work queue/worker way. Whenever we have a guc2host request, we queue_work to process the incoming request. No circular buffer. We didn't have a problem there.

Thanks,
Oak

> -----Original Message-----
> From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf Of Brian Welty
> Sent: Thursday, November 2, 2023 9:43 PM
> To: intel-xe at lists.freedesktop.org
> Subject: [Intel-xe] [PATCH v3] drm/xe: Fix pagefault and access counter worker
> functions
> 
> When processing G2H messages for pagefault or access counters, we queue a
> work item and call queue_work(). This fails if the worker thread is already
> queued to run.
> The expectation is that the worker function will do more than process a
> single item and return. It needs to either process all pending items or
> requeue itself if items are pending. But requeuing will add latency and
> potential context switch can occur.
> 
> We don't want to add unnecessary latency and so the worker should process
> as many faults as it can within a reasonable duration of time.
> We also do not want to hog the cpu core, so here we execute in a loop
> and requeue if still running after more than 20 ms.
> This seems reasonable framework and easy to tune this further if needed.
> 
> This resolves issues seen with several igt at xe_exec_fault_mode subtests
> where the GPU will hang when KMD ignores a pending pagefault.
> 
> v2: requeue the worker instead of having an internal processing loop.
> v3: implement hybrid model of v1 and v2
>     now, run for 20 msec before we will requeue if still running
> 
> Signed-off-by: Brian Welty <brian.welty at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_gt_pagefault.c | 80 ++++++++++++++++------------
>  1 file changed, 46 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> index ab6daebbd77c..c99af751d1fb 100644
> --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> @@ -276,10 +276,10 @@ static void print_pagefault(struct xe_device *xe, struct
> pagefault *pf)
> 
>  #define PF_MSG_LEN_DW	4
> 
> -static int get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
> +static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
>  {
>  	const struct xe_guc_pagefault_desc *desc;
> -	int ret = 0;
> +	bool ret = false;
> 
>  	spin_lock_irq(&pf_queue->lock);
>  	if (pf_queue->head != pf_queue->tail) {
> @@ -303,8 +303,7 @@ static int get_pagefault(struct pf_queue *pf_queue, struct
> pagefault *pf)
> 
>  		pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) %
>  			PF_QUEUE_NUM_DW;
> -	} else {
> -		ret = -1;
> +		ret = true;
>  	}
>  	spin_unlock_irq(&pf_queue->lock);
> 
> @@ -348,6 +347,8 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg,
> u32 len)
>  	return full ? -ENOSPC : 0;
>  }
> 
> +#define USM_QUEUE_MAX_RUNTIME_MS	20
> +
>  static void pf_queue_work_func(struct work_struct *w)
>  {
>  	struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker);
> @@ -355,31 +356,37 @@ static void pf_queue_work_func(struct work_struct *w)
>  	struct xe_device *xe = gt_to_xe(gt);
>  	struct xe_guc_pagefault_reply reply = {};
>  	struct pagefault pf = {};
> +	unsigned long threshold;
>  	int ret;
> 
> -	ret = get_pagefault(pf_queue, &pf);
> -	if (ret)
> -		return;
> +	threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS);
> 
> -	ret = handle_pagefault(gt, &pf);
> -	if (unlikely(ret)) {
> -		print_pagefault(xe, &pf);
> -		pf.fault_unsuccessful = 1;
> -		drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n", ret);
> -	}
> +	while (get_pagefault(pf_queue, &pf)) {
> +		ret = handle_pagefault(gt, &pf);
> +		if (unlikely(ret)) {
> +			print_pagefault(xe, &pf);
> +			pf.fault_unsuccessful = 1;
> +			drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n",
> ret);
> +		}
> +
> +		reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
> +			FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) |
> +			FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
> +			FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) |
> +			FIELD_PREP(PFR_ASID, pf.asid);
> 
> -	reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
> -		FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) |
> -		FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
> -		FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) |
> -		FIELD_PREP(PFR_ASID, pf.asid);
> +		reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
> +			FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) |
> +			FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
> +			FIELD_PREP(PFR_PDATA, pf.pdata);
> 
> -	reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
> -		FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) |
> -		FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
> -		FIELD_PREP(PFR_PDATA, pf.pdata);
> +		send_pagefault_reply(&gt->uc.guc, &reply);
> 
> -	send_pagefault_reply(&gt->uc.guc, &reply);
> +		if (time_after(jiffies, threshold)) {
> +			queue_work(gt->usm.pf_wq, w);
> +			break;
> +		}
> +	}
>  }
> 
>  static void acc_queue_work_func(struct work_struct *w);
> @@ -544,10 +551,10 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc)
> 
>  #define ACC_MSG_LEN_DW        4
> 
> -static int get_acc(struct acc_queue *acc_queue, struct acc *acc)
> +static bool get_acc(struct acc_queue *acc_queue, struct acc *acc)
>  {
>  	const struct xe_guc_acc_desc *desc;
> -	int ret = 0;
> +	bool ret = false;
> 
>  	spin_lock(&acc_queue->lock);
>  	if (acc_queue->head != acc_queue->tail) {
> @@ -567,8 +574,7 @@ static int get_acc(struct acc_queue *acc_queue, struct acc
> *acc)
> 
>  		acc_queue->head = (acc_queue->head + ACC_MSG_LEN_DW) %
>  				  ACC_QUEUE_NUM_DW;
> -	} else {
> -		ret = -1;
> +		ret = true;
>  	}
>  	spin_unlock(&acc_queue->lock);
> 
> @@ -581,16 +587,22 @@ static void acc_queue_work_func(struct work_struct *w)
>  	struct xe_gt *gt = acc_queue->gt;
>  	struct xe_device *xe = gt_to_xe(gt);
>  	struct acc acc = {};
> +	unsigned long threshold;
>  	int ret;
> 
> -	ret = get_acc(acc_queue, &acc);
> -	if (ret)
> -		return;
> +	threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS);
> 
> -	ret = handle_acc(gt, &acc);
> -	if (unlikely(ret)) {
> -		print_acc(xe, &acc);
> -		drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret);
> +	while (get_acc(acc_queue, &acc)) {
> +		ret = handle_acc(gt, &acc);
> +		if (unlikely(ret)) {
> +			print_acc(xe, &acc);
> +			drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret);
> +		}
> +
> +		if (time_after(jiffies, threshold)) {
> +			queue_work(gt->usm.acc_wq, w);
> +			break;
> +		}
>  	}
>  }
> 
> --
> 2.38.0