[Intel-xe] [2/2] drm/xe: Fix pagefault and access counter worker functions

Wed Nov 1 04:53:31 UTC 2023

On 10/31/2023 3:53 PM, Summers, Stuart wrote:
> On Tue, 2023-10-31 at 15:15 -0700, Brian Welty wrote:
>> When processing G2H messages for pagefault or access counters, we
>> queue a
>> work item and call queue_work(). This fails if the worker thread is
>> already
>> queued to run.
>> The expectation is that the worker function will process more than a
>> single
>> item, and is updated here to continue executing until all queued work
>> items
>> are handled.
>>
>> This resolves issues seen with several igt at xe_exec_fault_mode
>> subtests.
>>
>> Signed-off-by: Brian Welty <brian.welty at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_gt_pagefault.c | 66 +++++++++++++-------------
>> --
>>   1 file changed, 30 insertions(+), 36 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
>> b/drivers/gpu/drm/xe/xe_gt_pagefault.c
>> index ab6daebbd77c..63fba83c0839 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
>> +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
>> @@ -276,10 +276,10 @@ static void print_pagefault(struct xe_device
>> *xe, struct pagefault *pf)
>>   
>>   #define PF_MSG_LEN_DW  4
>>   
>> -static int get_pagefault(struct pf_queue *pf_queue, struct pagefault
>> *pf)
>> +static bool get_pagefault(struct pf_queue *pf_queue, struct
>> pagefault *pf)
>>   {
>>          const struct xe_guc_pagefault_desc *desc;
>> -       int ret = 0;
>> +       bool ret = false;
>>   
>>          spin_lock_irq(&pf_queue->lock);
>>          if (pf_queue->head != pf_queue->tail) {
>> @@ -303,8 +303,7 @@ static int get_pagefault(struct pf_queue
>> *pf_queue, struct pagefault *pf)
>>   
>>                  pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) %
>>                          PF_QUEUE_NUM_DW;
>> -       } else {
>> -               ret = -1;
>> +               ret = true;
>>          }
>>          spin_unlock_irq(&pf_queue->lock);
>>   
>> @@ -357,29 +356,27 @@ static void pf_queue_work_func(struct
>> work_struct *w)
>>          struct pagefault pf = {};
>>          int ret;
>>   
>> -       ret = get_pagefault(pf_queue, &pf);
>> -       if (ret)
>> -               return;
>> -
>> -       ret = handle_pagefault(gt, &pf);
>> -       if (unlikely(ret)) {
>> -               print_pagefault(xe, &pf);
>> -               pf.fault_unsuccessful = 1;
>> -               drm_dbg(&xe->drm, "Fault response: Unsuccessful
>> %d\n", ret);
>> -       }
>> +       while (get_pagefault(pf_queue, &pf)) {
> 
> Sorry maybe I'm missing... don't we want this to be
> while(!get_pagefault())
> 

That would be true if get_pagefault() was still identical to the current
code in drm-xe-next....  but it's less readable that way.
So I changed it. get_pagefault() now returns true if it dequeued
a work item.  And false if it did not.

Make sense now?

> ?
> 
> -Stuart
> 
>> +               ret = handle_pagefault(gt, &pf);
>> +               if (unlikely(ret)) {
>> +                       print_pagefault(xe, &pf);
>> +                       pf.fault_unsuccessful = 1;
>> +                       drm_dbg(&xe->drm, "Fault response:
>> Unsuccessful %d\n", ret);
>> +               }
>>   
>> -       reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
>> -               FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) |
>> -               FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
>> -               FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) |
>> -               FIELD_PREP(PFR_ASID, pf.asid);
>> +               reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
>> +                       FIELD_PREP(PFR_SUCCESS,
>> pf.fault_unsuccessful) |
>> +                       FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
>> +                       FIELD_PREP(PFR_DESC_TYPE,
>> FAULT_RESPONSE_DESC) |
>> +                       FIELD_PREP(PFR_ASID, pf.asid);
>>   
>> -       reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
>> -               FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) |
>> -               FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
>> -               FIELD_PREP(PFR_PDATA, pf.pdata);
>> +               reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
>> +                       FIELD_PREP(PFR_ENG_INSTANCE,
>> pf.engine_instance) |
>> +                       FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
>> +                       FIELD_PREP(PFR_PDATA, pf.pdata);
>>   
>> -       send_pagefault_reply(&gt->uc.guc, &reply);
>> +               send_pagefault_reply(&gt->uc.guc, &reply);
>> +       }
>>   }
>>   
>>   static void acc_queue_work_func(struct work_struct *w);
>> @@ -544,10 +541,10 @@ static int handle_acc(struct xe_gt *gt, struct
>> acc *acc)
>>   
>>   #define ACC_MSG_LEN_DW        4
>>   
>> -static int get_acc(struct acc_queue *acc_queue, struct acc *acc)
>> +static bool get_acc(struct acc_queue *acc_queue, struct acc *acc)
>>   {
>>          const struct xe_guc_acc_desc *desc;
>> -       int ret = 0;
>> +       bool ret = false;
>>   
>>          spin_lock(&acc_queue->lock);
>>          if (acc_queue->head != acc_queue->tail) {
>> @@ -567,8 +564,7 @@ static int get_acc(struct acc_queue *acc_queue,
>> struct acc *acc)
>>   
>>                  acc_queue->head = (acc_queue->head + ACC_MSG_LEN_DW)
>> %
>>                                    ACC_QUEUE_NUM_DW;
>> -       } else {
>> -               ret = -1;
>> +               ret = true;
>>          }
>>          spin_unlock(&acc_queue->lock);
>>   
>> @@ -583,14 +579,12 @@ static void acc_queue_work_func(struct
>> work_struct *w)
>>          struct acc acc = {};
>>          int ret;
>>   
>> -       ret = get_acc(acc_queue, &acc);
>> -       if (ret)
>> -               return;
>> -
>> -       ret = handle_acc(gt, &acc);
>> -       if (unlikely(ret)) {
>> -               print_acc(xe, &acc);
>> -               drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret);
>> +       while (get_acc(acc_queue, &acc)) {
>> +               ret = handle_acc(gt, &acc);
>> +               if (unlikely(ret)) {
>> +                       print_acc(xe, &acc);
>> +                       drm_warn(&xe->drm, "ACC: Unsuccessful %d\n",
>> ret);
>> +               }
>>          }
>>   }
>>   
>