[Intel-xe] [2/2] drm/xe: Fix pagefault and access counter worker functions
Welty, Brian
brian.welty at intel.com
Wed Nov 1 04:53:31 UTC 2023
On 10/31/2023 3:53 PM, Summers, Stuart wrote:
> On Tue, 2023-10-31 at 15:15 -0700, Brian Welty wrote:
>> When processing G2H messages for pagefault or access counters, we
>> queue a
>> work item and call queue_work(). This fails if the worker thread is
>> already
>> queued to run.
>> The expectation is that the worker function will process more than a
>> single
>> item, and is updated here to continue executing until all queued work
>> items
>> are handled.
>>
>> This resolves issues seen with several igt at xe_exec_fault_mode
>> subtests.
>>
>> Signed-off-by: Brian Welty <brian.welty at intel.com>
>> ---
>> drivers/gpu/drm/xe/xe_gt_pagefault.c | 66 +++++++++++++-------------
>> --
>> 1 file changed, 30 insertions(+), 36 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
>> b/drivers/gpu/drm/xe/xe_gt_pagefault.c
>> index ab6daebbd77c..63fba83c0839 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
>> +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
>> @@ -276,10 +276,10 @@ static void print_pagefault(struct xe_device
>> *xe, struct pagefault *pf)
>>
>> #define PF_MSG_LEN_DW 4
>>
>> -static int get_pagefault(struct pf_queue *pf_queue, struct pagefault
>> *pf)
>> +static bool get_pagefault(struct pf_queue *pf_queue, struct
>> pagefault *pf)
>> {
>> const struct xe_guc_pagefault_desc *desc;
>> - int ret = 0;
>> + bool ret = false;
>>
>> spin_lock_irq(&pf_queue->lock);
>> if (pf_queue->head != pf_queue->tail) {
>> @@ -303,8 +303,7 @@ static int get_pagefault(struct pf_queue
>> *pf_queue, struct pagefault *pf)
>>
>> pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) %
>> PF_QUEUE_NUM_DW;
>> - } else {
>> - ret = -1;
>> + ret = true;
>> }
>> spin_unlock_irq(&pf_queue->lock);
>>
>> @@ -357,29 +356,27 @@ static void pf_queue_work_func(struct
>> work_struct *w)
>> struct pagefault pf = {};
>> int ret;
>>
>> - ret = get_pagefault(pf_queue, &pf);
>> - if (ret)
>> - return;
>> -
>> - ret = handle_pagefault(gt, &pf);
>> - if (unlikely(ret)) {
>> - print_pagefault(xe, &pf);
>> - pf.fault_unsuccessful = 1;
>> - drm_dbg(&xe->drm, "Fault response: Unsuccessful
>> %d\n", ret);
>> - }
>> + while (get_pagefault(pf_queue, &pf)) {
>
> Sorry maybe I'm missing... don't we want this to be
> while(!get_pagefault())
>
That would be true if get_pagefault() was still identical to the current
code in drm-xe-next.... but it's less readable that way.
So I changed it. get_pagefault() now returns true if it dequeued
a work item. And false if it did not.
Make sense now?
> ?
>
> -Stuart
>
>> + ret = handle_pagefault(gt, &pf);
>> + if (unlikely(ret)) {
>> + print_pagefault(xe, &pf);
>> + pf.fault_unsuccessful = 1;
>> + drm_dbg(&xe->drm, "Fault response:
>> Unsuccessful %d\n", ret);
>> + }
>>
>> - reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
>> - FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) |
>> - FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
>> - FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) |
>> - FIELD_PREP(PFR_ASID, pf.asid);
>> + reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
>> + FIELD_PREP(PFR_SUCCESS,
>> pf.fault_unsuccessful) |
>> + FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
>> + FIELD_PREP(PFR_DESC_TYPE,
>> FAULT_RESPONSE_DESC) |
>> + FIELD_PREP(PFR_ASID, pf.asid);
>>
>> - reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
>> - FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) |
>> - FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
>> - FIELD_PREP(PFR_PDATA, pf.pdata);
>> + reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
>> + FIELD_PREP(PFR_ENG_INSTANCE,
>> pf.engine_instance) |
>> + FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
>> + FIELD_PREP(PFR_PDATA, pf.pdata);
>>
>> - send_pagefault_reply(>->uc.guc, &reply);
>> + send_pagefault_reply(>->uc.guc, &reply);
>> + }
>> }
>>
>> static void acc_queue_work_func(struct work_struct *w);
>> @@ -544,10 +541,10 @@ static int handle_acc(struct xe_gt *gt, struct
>> acc *acc)
>>
>> #define ACC_MSG_LEN_DW 4
>>
>> -static int get_acc(struct acc_queue *acc_queue, struct acc *acc)
>> +static bool get_acc(struct acc_queue *acc_queue, struct acc *acc)
>> {
>> const struct xe_guc_acc_desc *desc;
>> - int ret = 0;
>> + bool ret = false;
>>
>> spin_lock(&acc_queue->lock);
>> if (acc_queue->head != acc_queue->tail) {
>> @@ -567,8 +564,7 @@ static int get_acc(struct acc_queue *acc_queue,
>> struct acc *acc)
>>
>> acc_queue->head = (acc_queue->head + ACC_MSG_LEN_DW)
>> %
>> ACC_QUEUE_NUM_DW;
>> - } else {
>> - ret = -1;
>> + ret = true;
>> }
>> spin_unlock(&acc_queue->lock);
>>
>> @@ -583,14 +579,12 @@ static void acc_queue_work_func(struct
>> work_struct *w)
>> struct acc acc = {};
>> int ret;
>>
>> - ret = get_acc(acc_queue, &acc);
>> - if (ret)
>> - return;
>> -
>> - ret = handle_acc(gt, &acc);
>> - if (unlikely(ret)) {
>> - print_acc(xe, &acc);
>> - drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret);
>> + while (get_acc(acc_queue, &acc)) {
>> + ret = handle_acc(gt, &acc);
>> + if (unlikely(ret)) {
>> + print_acc(xe, &acc);
>> + drm_warn(&xe->drm, "ACC: Unsuccessful %d\n",
>> ret);
>> + }
>> }
>> }
>>
>
More information about the Intel-xe
mailing list