[Intel-xe] [PATCH v3] drm/xe: Fix pagefault and access counter worker functions

Fri Nov 3 14:50:54 UTC 2023

On Fri, 2023-11-03 at 02:46 +0000, Zeng, Oak wrote:
> Hi,
> 
> Can someone explain why we introduced an circular buffer for page
> fault handling in xekmd?
> 
> I915 page fault is also handled in a work queue/worker way. Whenever
> we have a guc2host request, we queue_work to process the incoming
> request. No circular buffer. We didn't have a problem there.

i915 also uses a circular buffer:
static noinline void ct_incoming_request_worker_func(struct work_struct
*w)
{
        struct intel_guc_ct *ct =
                container_of(w, struct intel_guc_ct, requests.worker);
        bool done;

        do {
                done = ct_process_incoming_requests(ct, &ct-
>requests.incoming);
        } while (!done);
}

The issue with Xe here is the queue_work function is used to queue each
incoming fault. If we get too many concurrent faults though, there's a
chance we could queue something while a prior fault is in the process
of being launched. In that case the queue_work() function will return
false and we miss the fault handling.

In the i915 case (see above), we do handle this in a worker, but we
continue to handle requests from the GuC until nothing is left to
handle - circular buffer. In that case, the worker queue processing
those incoming CT requests shouldn't ever hit that corner case of
queue_work() returning false.

Thanks,
Stuart

> 
> Thanks,
> Oak
> 
> > -----Original Message-----
> > From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf
> > Of Brian Welty
> > Sent: Thursday, November 2, 2023 9:43 PM
> > To: intel-xe at lists.freedesktop.org
> > Subject: [Intel-xe] [PATCH v3] drm/xe: Fix pagefault and access
> > counter worker
> > functions
> > 
> > When processing G2H messages for pagefault or access counters, we
> > queue a
> > work item and call queue_work(). This fails if the worker thread is
> > already
> > queued to run.
> > The expectation is that the worker function will do more than
> > process a
> > single item and return. It needs to either process all pending
> > items or
> > requeue itself if items are pending. But requeuing will add latency
> > and
> > potential context switch can occur.
> > 
> > We don't want to add unnecessary latency and so the worker should
> > process
> > as many faults as it can within a reasonable duration of time.
> > We also do not want to hog the cpu core, so here we execute in a
> > loop
> > and requeue if still running after more than 20 ms.
> > This seems reasonable framework and easy to tune this further if
> > needed.
> > 
> > This resolves issues seen with several igt at xe_exec_fault_mode
> > subtests
> > where the GPU will hang when KMD ignores a pending pagefault.
> > 
> > v2: requeue the worker instead of having an internal processing
> > loop.
> > v3: implement hybrid model of v1 and v2
> >     now, run for 20 msec before we will requeue if still running
> > 
> > Signed-off-by: Brian Welty <brian.welty at intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_gt_pagefault.c | 80 ++++++++++++++++--------
> > ----
> >  1 file changed, 46 insertions(+), 34 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > index ab6daebbd77c..c99af751d1fb 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > @@ -276,10 +276,10 @@ static void print_pagefault(struct xe_device
> > *xe, struct
> > pagefault *pf)
> > 
> >  #define PF_MSG_LEN_DW  4
> > 
> > -static int get_pagefault(struct pf_queue *pf_queue, struct
> > pagefault *pf)
> > +static bool get_pagefault(struct pf_queue *pf_queue, struct
> > pagefault *pf)
> >  {
> >         const struct xe_guc_pagefault_desc *desc;
> > -       int ret = 0;
> > +       bool ret = false;
> > 
> >         spin_lock_irq(&pf_queue->lock);
> >         if (pf_queue->head != pf_queue->tail) {
> > @@ -303,8 +303,7 @@ static int get_pagefault(struct pf_queue
> > *pf_queue, struct
> > pagefault *pf)
> > 
> >                 pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) %
> >                         PF_QUEUE_NUM_DW;
> > -       } else {
> > -               ret = -1;
> > +               ret = true;
> >         }
> >         spin_unlock_irq(&pf_queue->lock);
> > 
> > @@ -348,6 +347,8 @@ int xe_guc_pagefault_handler(struct xe_guc
> > *guc, u32 *msg,
> > u32 len)
> >         return full ? -ENOSPC : 0;
> >  }
> > 
> > +#define USM_QUEUE_MAX_RUNTIME_MS       20
> > +
> >  static void pf_queue_work_func(struct work_struct *w)
> >  {
> >         struct pf_queue *pf_queue = container_of(w, struct
> > pf_queue, worker);
> > @@ -355,31 +356,37 @@ static void pf_queue_work_func(struct
> > work_struct *w)
> >         struct xe_device *xe = gt_to_xe(gt);
> >         struct xe_guc_pagefault_reply reply = {};
> >         struct pagefault pf = {};
> > +       unsigned long threshold;
> >         int ret;
> > 
> > -       ret = get_pagefault(pf_queue, &pf);
> > -       if (ret)
> > -               return;
> > +       threshold = jiffies +
> > msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS);
> > 
> > -       ret = handle_pagefault(gt, &pf);
> > -       if (unlikely(ret)) {
> > -               print_pagefault(xe, &pf);
> > -               pf.fault_unsuccessful = 1;
> > -               drm_dbg(&xe->drm, "Fault response: Unsuccessful
> > %d\n", ret);
> > -       }
> > +       while (get_pagefault(pf_queue, &pf)) {
> > +               ret = handle_pagefault(gt, &pf);
> > +               if (unlikely(ret)) {
> > +                       print_pagefault(xe, &pf);
> > +                       pf.fault_unsuccessful = 1;
> > +                       drm_dbg(&xe->drm, "Fault response:
> > Unsuccessful %d\n",
> > ret);
> > +               }
> > +
> > +               reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
> > +                       FIELD_PREP(PFR_SUCCESS,
> > pf.fault_unsuccessful) |
> > +                       FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
> > +                       FIELD_PREP(PFR_DESC_TYPE,
> > FAULT_RESPONSE_DESC) |
> > +                       FIELD_PREP(PFR_ASID, pf.asid);
> > 
> > -       reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
> > -               FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) |
> > -               FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
> > -               FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) |
> > -               FIELD_PREP(PFR_ASID, pf.asid);
> > +               reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
> > +                       FIELD_PREP(PFR_ENG_INSTANCE,
> > pf.engine_instance) |
> > +                       FIELD_PREP(PFR_ENG_CLASS, pf.engine_class)
> > |
> > +                       FIELD_PREP(PFR_PDATA, pf.pdata);
> > 
> > -       reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
> > -               FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) |
> > -               FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
> > -               FIELD_PREP(PFR_PDATA, pf.pdata);
> > +               send_pagefault_reply(&gt->uc.guc, &reply);
> > 
> > -       send_pagefault_reply(&gt->uc.guc, &reply);
> > +               if (time_after(jiffies, threshold)) {
> > +                       queue_work(gt->usm.pf_wq, w);
> > +                       break;
> > +               }
> > +       }
> >  }
> > 
> >  static void acc_queue_work_func(struct work_struct *w);
> > @@ -544,10 +551,10 @@ static int handle_acc(struct xe_gt *gt,
> > struct acc *acc)
> > 
> >  #define ACC_MSG_LEN_DW        4
> > 
> > -static int get_acc(struct acc_queue *acc_queue, struct acc *acc)
> > +static bool get_acc(struct acc_queue *acc_queue, struct acc *acc)
> >  {
> >         const struct xe_guc_acc_desc *desc;
> > -       int ret = 0;
> > +       bool ret = false;
> > 
> >         spin_lock(&acc_queue->lock);
> >         if (acc_queue->head != acc_queue->tail) {
> > @@ -567,8 +574,7 @@ static int get_acc(struct acc_queue *acc_queue,
> > struct acc
> > *acc)
> > 
> >                 acc_queue->head = (acc_queue->head +
> > ACC_MSG_LEN_DW) %
> >                                   ACC_QUEUE_NUM_DW;
> > -       } else {
> > -               ret = -1;
> > +               ret = true;
> >         }
> >         spin_unlock(&acc_queue->lock);
> > 
> > @@ -581,16 +587,22 @@ static void acc_queue_work_func(struct
> > work_struct *w)
> >         struct xe_gt *gt = acc_queue->gt;
> >         struct xe_device *xe = gt_to_xe(gt);
> >         struct acc acc = {};
> > +       unsigned long threshold;
> >         int ret;
> > 
> > -       ret = get_acc(acc_queue, &acc);
> > -       if (ret)
> > -               return;
> > +       threshold = jiffies +
> > msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS);
> > 
> > -       ret = handle_acc(gt, &acc);
> > -       if (unlikely(ret)) {
> > -               print_acc(xe, &acc);
> > -               drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret);
> > +       while (get_acc(acc_queue, &acc)) {
> > +               ret = handle_acc(gt, &acc);
> > +               if (unlikely(ret)) {
> > +                       print_acc(xe, &acc);
> > +                       drm_warn(&xe->drm, "ACC: Unsuccessful
> > %d\n", ret);
> > +               }
> > +
> > +               if (time_after(jiffies, threshold)) {
> > +                       queue_work(gt->usm.acc_wq, w);
> > +                       break;
> > +               }
> >         }
> >  }
> > 
> > --
> > 2.38.0
>