[Intel-xe] [PATCH v3] drm/xe: Fix pagefault and access counter worker functions
Summers, Stuart
stuart.summers at intel.com
Fri Nov 3 14:50:54 UTC 2023
On Fri, 2023-11-03 at 02:46 +0000, Zeng, Oak wrote:
> Hi,
>
> Can someone explain why we introduced an circular buffer for page
> fault handling in xekmd?
>
> I915 page fault is also handled in a work queue/worker way. Whenever
> we have a guc2host request, we queue_work to process the incoming
> request. No circular buffer. We didn't have a problem there.
i915 also uses a circular buffer:
static noinline void ct_incoming_request_worker_func(struct work_struct
*w)
{
struct intel_guc_ct *ct =
container_of(w, struct intel_guc_ct, requests.worker);
bool done;
do {
done = ct_process_incoming_requests(ct, &ct-
>requests.incoming);
} while (!done);
}
The issue with Xe here is the queue_work function is used to queue each
incoming fault. If we get too many concurrent faults though, there's a
chance we could queue something while a prior fault is in the process
of being launched. In that case the queue_work() function will return
false and we miss the fault handling.
In the i915 case (see above), we do handle this in a worker, but we
continue to handle requests from the GuC until nothing is left to
handle - circular buffer. In that case, the worker queue processing
those incoming CT requests shouldn't ever hit that corner case of
queue_work() returning false.
Thanks,
Stuart
>
> Thanks,
> Oak
>
> > -----Original Message-----
> > From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf
> > Of Brian Welty
> > Sent: Thursday, November 2, 2023 9:43 PM
> > To: intel-xe at lists.freedesktop.org
> > Subject: [Intel-xe] [PATCH v3] drm/xe: Fix pagefault and access
> > counter worker
> > functions
> >
> > When processing G2H messages for pagefault or access counters, we
> > queue a
> > work item and call queue_work(). This fails if the worker thread is
> > already
> > queued to run.
> > The expectation is that the worker function will do more than
> > process a
> > single item and return. It needs to either process all pending
> > items or
> > requeue itself if items are pending. But requeuing will add latency
> > and
> > potential context switch can occur.
> >
> > We don't want to add unnecessary latency and so the worker should
> > process
> > as many faults as it can within a reasonable duration of time.
> > We also do not want to hog the cpu core, so here we execute in a
> > loop
> > and requeue if still running after more than 20 ms.
> > This seems reasonable framework and easy to tune this further if
> > needed.
> >
> > This resolves issues seen with several igt at xe_exec_fault_mode
> > subtests
> > where the GPU will hang when KMD ignores a pending pagefault.
> >
> > v2: requeue the worker instead of having an internal processing
> > loop.
> > v3: implement hybrid model of v1 and v2
> > now, run for 20 msec before we will requeue if still running
> >
> > Signed-off-by: Brian Welty <brian.welty at intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_gt_pagefault.c | 80 ++++++++++++++++--------
> > ----
> > 1 file changed, 46 insertions(+), 34 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > index ab6daebbd77c..c99af751d1fb 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > @@ -276,10 +276,10 @@ static void print_pagefault(struct xe_device
> > *xe, struct
> > pagefault *pf)
> >
> > #define PF_MSG_LEN_DW 4
> >
> > -static int get_pagefault(struct pf_queue *pf_queue, struct
> > pagefault *pf)
> > +static bool get_pagefault(struct pf_queue *pf_queue, struct
> > pagefault *pf)
> > {
> > const struct xe_guc_pagefault_desc *desc;
> > - int ret = 0;
> > + bool ret = false;
> >
> > spin_lock_irq(&pf_queue->lock);
> > if (pf_queue->head != pf_queue->tail) {
> > @@ -303,8 +303,7 @@ static int get_pagefault(struct pf_queue
> > *pf_queue, struct
> > pagefault *pf)
> >
> > pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) %
> > PF_QUEUE_NUM_DW;
> > - } else {
> > - ret = -1;
> > + ret = true;
> > }
> > spin_unlock_irq(&pf_queue->lock);
> >
> > @@ -348,6 +347,8 @@ int xe_guc_pagefault_handler(struct xe_guc
> > *guc, u32 *msg,
> > u32 len)
> > return full ? -ENOSPC : 0;
> > }
> >
> > +#define USM_QUEUE_MAX_RUNTIME_MS 20
> > +
> > static void pf_queue_work_func(struct work_struct *w)
> > {
> > struct pf_queue *pf_queue = container_of(w, struct
> > pf_queue, worker);
> > @@ -355,31 +356,37 @@ static void pf_queue_work_func(struct
> > work_struct *w)
> > struct xe_device *xe = gt_to_xe(gt);
> > struct xe_guc_pagefault_reply reply = {};
> > struct pagefault pf = {};
> > + unsigned long threshold;
> > int ret;
> >
> > - ret = get_pagefault(pf_queue, &pf);
> > - if (ret)
> > - return;
> > + threshold = jiffies +
> > msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS);
> >
> > - ret = handle_pagefault(gt, &pf);
> > - if (unlikely(ret)) {
> > - print_pagefault(xe, &pf);
> > - pf.fault_unsuccessful = 1;
> > - drm_dbg(&xe->drm, "Fault response: Unsuccessful
> > %d\n", ret);
> > - }
> > + while (get_pagefault(pf_queue, &pf)) {
> > + ret = handle_pagefault(gt, &pf);
> > + if (unlikely(ret)) {
> > + print_pagefault(xe, &pf);
> > + pf.fault_unsuccessful = 1;
> > + drm_dbg(&xe->drm, "Fault response:
> > Unsuccessful %d\n",
> > ret);
> > + }
> > +
> > + reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
> > + FIELD_PREP(PFR_SUCCESS,
> > pf.fault_unsuccessful) |
> > + FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
> > + FIELD_PREP(PFR_DESC_TYPE,
> > FAULT_RESPONSE_DESC) |
> > + FIELD_PREP(PFR_ASID, pf.asid);
> >
> > - reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
> > - FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) |
> > - FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
> > - FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) |
> > - FIELD_PREP(PFR_ASID, pf.asid);
> > + reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
> > + FIELD_PREP(PFR_ENG_INSTANCE,
> > pf.engine_instance) |
> > + FIELD_PREP(PFR_ENG_CLASS, pf.engine_class)
> > |
> > + FIELD_PREP(PFR_PDATA, pf.pdata);
> >
> > - reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
> > - FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) |
> > - FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
> > - FIELD_PREP(PFR_PDATA, pf.pdata);
> > + send_pagefault_reply(>->uc.guc, &reply);
> >
> > - send_pagefault_reply(>->uc.guc, &reply);
> > + if (time_after(jiffies, threshold)) {
> > + queue_work(gt->usm.pf_wq, w);
> > + break;
> > + }
> > + }
> > }
> >
> > static void acc_queue_work_func(struct work_struct *w);
> > @@ -544,10 +551,10 @@ static int handle_acc(struct xe_gt *gt,
> > struct acc *acc)
> >
> > #define ACC_MSG_LEN_DW 4
> >
> > -static int get_acc(struct acc_queue *acc_queue, struct acc *acc)
> > +static bool get_acc(struct acc_queue *acc_queue, struct acc *acc)
> > {
> > const struct xe_guc_acc_desc *desc;
> > - int ret = 0;
> > + bool ret = false;
> >
> > spin_lock(&acc_queue->lock);
> > if (acc_queue->head != acc_queue->tail) {
> > @@ -567,8 +574,7 @@ static int get_acc(struct acc_queue *acc_queue,
> > struct acc
> > *acc)
> >
> > acc_queue->head = (acc_queue->head +
> > ACC_MSG_LEN_DW) %
> > ACC_QUEUE_NUM_DW;
> > - } else {
> > - ret = -1;
> > + ret = true;
> > }
> > spin_unlock(&acc_queue->lock);
> >
> > @@ -581,16 +587,22 @@ static void acc_queue_work_func(struct
> > work_struct *w)
> > struct xe_gt *gt = acc_queue->gt;
> > struct xe_device *xe = gt_to_xe(gt);
> > struct acc acc = {};
> > + unsigned long threshold;
> > int ret;
> >
> > - ret = get_acc(acc_queue, &acc);
> > - if (ret)
> > - return;
> > + threshold = jiffies +
> > msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS);
> >
> > - ret = handle_acc(gt, &acc);
> > - if (unlikely(ret)) {
> > - print_acc(xe, &acc);
> > - drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret);
> > + while (get_acc(acc_queue, &acc)) {
> > + ret = handle_acc(gt, &acc);
> > + if (unlikely(ret)) {
> > + print_acc(xe, &acc);
> > + drm_warn(&xe->drm, "ACC: Unsuccessful
> > %d\n", ret);
> > + }
> > +
> > + if (time_after(jiffies, threshold)) {
> > + queue_work(gt->usm.acc_wq, w);
> > + break;
> > + }
> > }
> > }
> >
> > --
> > 2.38.0
>
More information about the Intel-xe
mailing list