[Intel-xe] [2/2] drm/xe: Fix pagefault and access counter worker functions

Summers, Stuart stuart.summers at intel.com
Tue Oct 31 22:53:16 UTC 2023


On Tue, 2023-10-31 at 15:15 -0700, Brian Welty wrote:
> When processing G2H messages for pagefault or access counters, we
> queue a
> work item and call queue_work(). This fails if the worker thread is
> already
> queued to run.
> The expectation is that the worker function will process more than a
> single
> item, and is updated here to continue executing until all queued work
> items
> are handled.
> 
> This resolves issues seen with several igt at xe_exec_fault_mode
> subtests.
> 
> Signed-off-by: Brian Welty <brian.welty at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_gt_pagefault.c | 66 +++++++++++++-------------
> --
>  1 file changed, 30 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> index ab6daebbd77c..63fba83c0839 100644
> --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> @@ -276,10 +276,10 @@ static void print_pagefault(struct xe_device
> *xe, struct pagefault *pf)
>  
>  #define PF_MSG_LEN_DW  4
>  
> -static int get_pagefault(struct pf_queue *pf_queue, struct pagefault
> *pf)
> +static bool get_pagefault(struct pf_queue *pf_queue, struct
> pagefault *pf)
>  {
>         const struct xe_guc_pagefault_desc *desc;
> -       int ret = 0;
> +       bool ret = false;
>  
>         spin_lock_irq(&pf_queue->lock);
>         if (pf_queue->head != pf_queue->tail) {
> @@ -303,8 +303,7 @@ static int get_pagefault(struct pf_queue
> *pf_queue, struct pagefault *pf)
>  
>                 pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) %
>                         PF_QUEUE_NUM_DW;
> -       } else {
> -               ret = -1;
> +               ret = true;
>         }
>         spin_unlock_irq(&pf_queue->lock);
>  
> @@ -357,29 +356,27 @@ static void pf_queue_work_func(struct
> work_struct *w)
>         struct pagefault pf = {};
>         int ret;
>  
> -       ret = get_pagefault(pf_queue, &pf);
> -       if (ret)
> -               return;
> -
> -       ret = handle_pagefault(gt, &pf);
> -       if (unlikely(ret)) {
> -               print_pagefault(xe, &pf);
> -               pf.fault_unsuccessful = 1;
> -               drm_dbg(&xe->drm, "Fault response: Unsuccessful
> %d\n", ret);
> -       }
> +       while (get_pagefault(pf_queue, &pf)) {

Sorry maybe I'm missing... don't we want this to be
while(!get_pagefault())

?

-Stuart

> +               ret = handle_pagefault(gt, &pf);
> +               if (unlikely(ret)) {
> +                       print_pagefault(xe, &pf);
> +                       pf.fault_unsuccessful = 1;
> +                       drm_dbg(&xe->drm, "Fault response:
> Unsuccessful %d\n", ret);
> +               }
>  
> -       reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
> -               FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) |
> -               FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
> -               FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) |
> -               FIELD_PREP(PFR_ASID, pf.asid);
> +               reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
> +                       FIELD_PREP(PFR_SUCCESS,
> pf.fault_unsuccessful) |
> +                       FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
> +                       FIELD_PREP(PFR_DESC_TYPE,
> FAULT_RESPONSE_DESC) |
> +                       FIELD_PREP(PFR_ASID, pf.asid);
>  
> -       reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
> -               FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) |
> -               FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
> -               FIELD_PREP(PFR_PDATA, pf.pdata);
> +               reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
> +                       FIELD_PREP(PFR_ENG_INSTANCE,
> pf.engine_instance) |
> +                       FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
> +                       FIELD_PREP(PFR_PDATA, pf.pdata);
>  
> -       send_pagefault_reply(&gt->uc.guc, &reply);
> +               send_pagefault_reply(&gt->uc.guc, &reply);
> +       }
>  }
>  
>  static void acc_queue_work_func(struct work_struct *w);
> @@ -544,10 +541,10 @@ static int handle_acc(struct xe_gt *gt, struct
> acc *acc)
>  
>  #define ACC_MSG_LEN_DW        4
>  
> -static int get_acc(struct acc_queue *acc_queue, struct acc *acc)
> +static bool get_acc(struct acc_queue *acc_queue, struct acc *acc)
>  {
>         const struct xe_guc_acc_desc *desc;
> -       int ret = 0;
> +       bool ret = false;
>  
>         spin_lock(&acc_queue->lock);
>         if (acc_queue->head != acc_queue->tail) {
> @@ -567,8 +564,7 @@ static int get_acc(struct acc_queue *acc_queue,
> struct acc *acc)
>  
>                 acc_queue->head = (acc_queue->head + ACC_MSG_LEN_DW)
> %
>                                   ACC_QUEUE_NUM_DW;
> -       } else {
> -               ret = -1;
> +               ret = true;
>         }
>         spin_unlock(&acc_queue->lock);
>  
> @@ -583,14 +579,12 @@ static void acc_queue_work_func(struct
> work_struct *w)
>         struct acc acc = {};
>         int ret;
>  
> -       ret = get_acc(acc_queue, &acc);
> -       if (ret)
> -               return;
> -
> -       ret = handle_acc(gt, &acc);
> -       if (unlikely(ret)) {
> -               print_acc(xe, &acc);
> -               drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret);
> +       while (get_acc(acc_queue, &acc)) {
> +               ret = handle_acc(gt, &acc);
> +               if (unlikely(ret)) {
> +                       print_acc(xe, &acc);
> +                       drm_warn(&xe->drm, "ACC: Unsuccessful %d\n",
> ret);
> +               }
>         }
>  }
>  



More information about the Intel-xe mailing list