[Intel-xe] [2/2] drm/xe: Fix pagefault and access counter worker functions
Summers, Stuart
stuart.summers at intel.com
Wed Nov 1 14:32:53 UTC 2023
On Tue, 2023-10-31 at 21:53 -0700, Welty, Brian wrote:
>
>
> On 10/31/2023 3:53 PM, Summers, Stuart wrote:
> > On Tue, 2023-10-31 at 15:15 -0700, Brian Welty wrote:
> > > When processing G2H messages for pagefault or access counters, we
> > > queue a
> > > work item and call queue_work(). This fails if the worker thread
> > > is
> > > already
> > > queued to run.
> > > The expectation is that the worker function will process more
> > > than a
> > > single
> > > item, and is updated here to continue executing until all queued
> > > work
> > > items
> > > are handled.
> > >
> > > This resolves issues seen with several igt at xe_exec_fault_mode
> > > subtests.
> > >
> > > Signed-off-by: Brian Welty <brian.welty at intel.com>
> > > ---
> > > drivers/gpu/drm/xe/xe_gt_pagefault.c | 66 +++++++++++++--------
> > > -----
> > > --
> > > 1 file changed, 30 insertions(+), 36 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > index ab6daebbd77c..63fba83c0839 100644
> > > --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > @@ -276,10 +276,10 @@ static void print_pagefault(struct
> > > xe_device
> > > *xe, struct pagefault *pf)
> > >
> > > #define PF_MSG_LEN_DW 4
> > >
> > > -static int get_pagefault(struct pf_queue *pf_queue, struct
> > > pagefault
> > > *pf)
> > > +static bool get_pagefault(struct pf_queue *pf_queue, struct
> > > pagefault *pf)
> > > {
> > > const struct xe_guc_pagefault_desc *desc;
> > > - int ret = 0;
> > > + bool ret = false;
> > >
> > > spin_lock_irq(&pf_queue->lock);
> > > if (pf_queue->head != pf_queue->tail) {
> > > @@ -303,8 +303,7 @@ static int get_pagefault(struct pf_queue
> > > *pf_queue, struct pagefault *pf)
> > >
> > > pf_queue->head = (pf_queue->head +
> > > PF_MSG_LEN_DW) %
> > > PF_QUEUE_NUM_DW;
> > > - } else {
Yeah ok I'm just blind. I missed that you dropped the else condition
here...
Ok no problem from me, looks great. Thanks for the patch!
Reviewed-by: Stuart Summers <stuart.summers at intel.com>
> > > - ret = -1;
> > > + ret = true;
> > > }
> > > spin_unlock_irq(&pf_queue->lock);
> > >
> > > @@ -357,29 +356,27 @@ static void pf_queue_work_func(struct
> > > work_struct *w)
> > > struct pagefault pf = {};
> > > int ret;
> > >
> > > - ret = get_pagefault(pf_queue, &pf);
> > > - if (ret)
> > > - return;
> > > -
> > > - ret = handle_pagefault(gt, &pf);
> > > - if (unlikely(ret)) {
> > > - print_pagefault(xe, &pf);
> > > - pf.fault_unsuccessful = 1;
> > > - drm_dbg(&xe->drm, "Fault response: Unsuccessful
> > > %d\n", ret);
> > > - }
> > > + while (get_pagefault(pf_queue, &pf)) {
> >
> > Sorry maybe I'm missing... don't we want this to be
> > while(!get_pagefault())
> >
>
> That would be true if get_pagefault() was still identical to the
> current
> code in drm-xe-next.... but it's less readable that way.
> So I changed it. get_pagefault() now returns true if it dequeued
> a work item. And false if it did not.
>
> Make sense now?
>
>
> > ?
> >
> > -Stuart
> >
> > > + ret = handle_pagefault(gt, &pf);
> > > + if (unlikely(ret)) {
> > > + print_pagefault(xe, &pf);
> > > + pf.fault_unsuccessful = 1;
> > > + drm_dbg(&xe->drm, "Fault response:
> > > Unsuccessful %d\n", ret);
> > > + }
> > >
> > > - reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
> > > - FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) |
> > > - FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
> > > - FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) |
> > > - FIELD_PREP(PFR_ASID, pf.asid);
> > > + reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
> > > + FIELD_PREP(PFR_SUCCESS,
> > > pf.fault_unsuccessful) |
> > > + FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
> > > + FIELD_PREP(PFR_DESC_TYPE,
> > > FAULT_RESPONSE_DESC) |
> > > + FIELD_PREP(PFR_ASID, pf.asid);
> > >
> > > - reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
> > > - FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance)
> > > |
> > > - FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
> > > - FIELD_PREP(PFR_PDATA, pf.pdata);
> > > + reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
> > > + FIELD_PREP(PFR_ENG_INSTANCE,
> > > pf.engine_instance) |
> > > + FIELD_PREP(PFR_ENG_CLASS,
> > > pf.engine_class) |
> > > + FIELD_PREP(PFR_PDATA, pf.pdata);
> > >
> > > - send_pagefault_reply(>->uc.guc, &reply);
> > > + send_pagefault_reply(>->uc.guc, &reply);
> > > + }
> > > }
> > >
> > > static void acc_queue_work_func(struct work_struct *w);
> > > @@ -544,10 +541,10 @@ static int handle_acc(struct xe_gt *gt,
> > > struct
> > > acc *acc)
> > >
> > > #define ACC_MSG_LEN_DW 4
> > >
> > > -static int get_acc(struct acc_queue *acc_queue, struct acc *acc)
> > > +static bool get_acc(struct acc_queue *acc_queue, struct acc
> > > *acc)
> > > {
> > > const struct xe_guc_acc_desc *desc;
> > > - int ret = 0;
> > > + bool ret = false;
> > >
> > > spin_lock(&acc_queue->lock);
> > > if (acc_queue->head != acc_queue->tail) {
> > > @@ -567,8 +564,7 @@ static int get_acc(struct acc_queue
> > > *acc_queue,
> > > struct acc *acc)
> > >
> > > acc_queue->head = (acc_queue->head +
> > > ACC_MSG_LEN_DW)
> > > %
> > > ACC_QUEUE_NUM_DW;
> > > - } else {
> > > - ret = -1;
> > > + ret = true;
> > > }
> > > spin_unlock(&acc_queue->lock);
> > >
> > > @@ -583,14 +579,12 @@ static void acc_queue_work_func(struct
> > > work_struct *w)
> > > struct acc acc = {};
> > > int ret;
> > >
> > > - ret = get_acc(acc_queue, &acc);
> > > - if (ret)
> > > - return;
> > > -
> > > - ret = handle_acc(gt, &acc);
> > > - if (unlikely(ret)) {
> > > - print_acc(xe, &acc);
> > > - drm_warn(&xe->drm, "ACC: Unsuccessful %d\n",
> > > ret);
> > > + while (get_acc(acc_queue, &acc)) {
> > > + ret = handle_acc(gt, &acc);
> > > + if (unlikely(ret)) {
> > > + print_acc(xe, &acc);
> > > + drm_warn(&xe->drm, "ACC: Unsuccessful
> > > %d\n",
> > > ret);
> > > + }
> > > }
> > > }
> > >
> >
More information about the Intel-xe
mailing list