[Intel-xe] [PATCH 1/1] drm/xe: Fix pagefault and access counter worker functions

Thu Nov 2 14:26:34 UTC 2023

On Wed, 2023-11-01 at 15:52 -0700, Brian Welty wrote:
> When processing G2H messages for pagefault or access counters, we
> queue a
> work item and call queue_work(). This fails if the worker thread is
> already
> queued to run.
> The expectation is that the worker function will do more than process
> a
> single item and return. It needs to either process all pending items
> or
> requeue itself (when items are pending) before returning.
> 
> Here, we choose to requeue the worker if faults are pending. This
> will
> introduce some unnecessary latency, but if this is the preference, we
> can
> start here and tune this later.
> 
> This resolves issues seen with several igt at xe_exec_fault_mode
> subtests
> where the GPU will hang when KMD ignores a pending pagefault.
> 
> v2: requeue the worker instead of having an internal processing loop.
>     Likely, we want a hybrid model which processes more than a single
>     fault before requeuing.  (Matt B)

IMO we should keep this simple. Either go one or the other, unless
there is a really good performance reason to do otherwise which I don't
see obviously here.

I don't understand the reasoning to take this back into the requeue
approach. IMO this is less easy to understand and as Brian mentioned,
likely less performant.

There could be a downside that holding the thread open for many
incoming faults at the same time will eat up a CPU to do that work,
perhaps impacting numa allocation of some application, but that seems
like a corner case that isn't worth pursuing at this point in the
implementation without any way to get that performance data/impact.

Matt, what is your reason to switch here?

Thanks,
Stuart

> 
> Signed-off-by: Brian Welty <brian.welty at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_gt_pagefault.c | 33 ++++++++++++++++++--------
> --
>  1 file changed, 21 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> index ab6daebbd77c..9b082817c877 100644
> --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> @@ -276,10 +276,11 @@ static void print_pagefault(struct xe_device
> *xe, struct pagefault *pf)
>  
>  #define PF_MSG_LEN_DW  4
>  
> -static int get_pagefault(struct pf_queue *pf_queue, struct pagefault
> *pf)
> +static bool get_pagefault(struct pf_queue *pf_queue, struct
> pagefault *pf,
> +                         bool *pending)
>  {
>         const struct xe_guc_pagefault_desc *desc;
> -       int ret = 0;
> +       bool ret = false;
>  
>         spin_lock_irq(&pf_queue->lock);
>         if (pf_queue->head != pf_queue->tail) {
> @@ -303,8 +304,8 @@ static int get_pagefault(struct pf_queue
> *pf_queue, struct pagefault *pf)
>  
>                 pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) %
>                         PF_QUEUE_NUM_DW;
> -       } else {
> -               ret = -1;
> +               *pending = pf_queue->head != pf_queue->tail;
> +               ret = true;
>         }
>         spin_unlock_irq(&pf_queue->lock);
>  
> @@ -355,10 +356,10 @@ static void pf_queue_work_func(struct
> work_struct *w)
>         struct xe_device *xe = gt_to_xe(gt);
>         struct xe_guc_pagefault_reply reply = {};
>         struct pagefault pf = {};
> +       bool requeue;
>         int ret;
>  
> -       ret = get_pagefault(pf_queue, &pf);
> -       if (ret)
> +       if (!get_pagefault(pf_queue, &pf, &requeue))
>                 return;
>  
>         ret = handle_pagefault(gt, &pf);
> @@ -380,6 +381,10 @@ static void pf_queue_work_func(struct
> work_struct *w)
>                 FIELD_PREP(PFR_PDATA, pf.pdata);
>  
>         send_pagefault_reply(&gt->uc.guc, &reply);
> +
> +       /* TODO: evaluate to service 'x' number of faults before
> requeue */
> +       if (requeue)
> +               queue_work(gt->usm.pf_wq, w);
>  }
>  
>  static void acc_queue_work_func(struct work_struct *w);
> @@ -544,10 +549,10 @@ static int handle_acc(struct xe_gt *gt, struct
> acc *acc)
>  
>  #define ACC_MSG_LEN_DW        4
>  
> -static int get_acc(struct acc_queue *acc_queue, struct acc *acc)
> +static bool get_acc(struct acc_queue *acc_queue, struct acc *acc,
> bool *pending)
>  {
>         const struct xe_guc_acc_desc *desc;
> -       int ret = 0;
> +       bool ret = false;
>  
>         spin_lock(&acc_queue->lock);
>         if (acc_queue->head != acc_queue->tail) {
> @@ -567,8 +572,8 @@ static int get_acc(struct acc_queue *acc_queue,
> struct acc *acc)
>  
>                 acc_queue->head = (acc_queue->head + ACC_MSG_LEN_DW)
> %
>                                   ACC_QUEUE_NUM_DW;
> -       } else {
> -               ret = -1;
> +               *pending = acc_queue->head != acc_queue->tail;
> +               ret = true;
>         }
>         spin_unlock(&acc_queue->lock);
>  
> @@ -581,10 +586,10 @@ static void acc_queue_work_func(struct
> work_struct *w)
>         struct xe_gt *gt = acc_queue->gt;
>         struct xe_device *xe = gt_to_xe(gt);
>         struct acc acc = {};
> +       bool requeue;
>         int ret;
>  
> -       ret = get_acc(acc_queue, &acc);
> -       if (ret)
> +       if (!get_acc(acc_queue, &acc, &requeue))
>                 return;
>  
>         ret = handle_acc(gt, &acc);
> @@ -592,6 +597,10 @@ static void acc_queue_work_func(struct
> work_struct *w)
>                 print_acc(xe, &acc);
>                 drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret);
>         }
> +
> +       /* TODO: evaluate to service 'x' number of acc before requeue
> */
> +       if (requeue)
> +               queue_work(gt->usm.acc_wq, w);
>  }
>  
>  static bool acc_queue_full(struct acc_queue *acc_queue)