[PATCH] drm/xe: Convert page fault messages to be GT-oriented
Matt Roper
matthew.d.roper at intel.com
Fri May 30 17:33:53 UTC 2025
On Fri, May 30, 2025 at 06:48:35PM +0200, Michal Wajdeczko wrote:
> We are processing here G2H messages, so we should use GT oriented
> messages to retain information about the origin GT. While at it,
> print error codes in a user-friendly way.
>
> Signed-off-by: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Lucas De Marchi <lucas.demarchi at intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
Reviewed-by: Matt Roper <matthew.d.roper at intel.com>
> ---
> drivers/gpu/drm/xe/xe_gt_pagefault.c | 74 ++++++++++++++--------------
> 1 file changed, 36 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> index 7a8f87709e39..e0e68eda82bd 100644
> --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> @@ -14,6 +14,7 @@
> #include "abi/guc_actions_abi.h"
> #include "xe_bo.h"
> #include "xe_gt.h"
> +#include "xe_gt_printk.h"
> #include "xe_gt_stats.h"
> #include "xe_gt_tlb_invalidation.h"
> #include "xe_guc.h"
> @@ -244,22 +245,22 @@ static int send_pagefault_reply(struct xe_guc *guc,
> return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
> }
>
> -static void print_pagefault(struct xe_device *xe, struct pagefault *pf)
> +static void print_pagefault(struct xe_gt *gt, struct pagefault *pf)
> {
> - drm_dbg(&xe->drm, "\n\tASID: %d\n"
> - "\tVFID: %d\n"
> - "\tPDATA: 0x%04x\n"
> - "\tFaulted Address: 0x%08x%08x\n"
> - "\tFaultType: %d\n"
> - "\tAccessType: %d\n"
> - "\tFaultLevel: %d\n"
> - "\tEngineClass: %d %s\n"
> - "\tEngineInstance: %d\n",
> - pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr),
> - lower_32_bits(pf->page_addr),
> - pf->fault_type, pf->access_type, pf->fault_level,
> - pf->engine_class, xe_hw_engine_class_to_str(pf->engine_class),
> - pf->engine_instance);
> + xe_gt_dbg(gt, "\n\tASID: %d\n"
> + "\tVFID: %d\n"
> + "\tPDATA: 0x%04x\n"
> + "\tFaulted Address: 0x%08x%08x\n"
> + "\tFaultType: %d\n"
> + "\tAccessType: %d\n"
> + "\tFaultLevel: %d\n"
> + "\tEngineClass: %d %s\n"
> + "\tEngineInstance: %d\n",
> + pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr),
> + lower_32_bits(pf->page_addr),
> + pf->fault_type, pf->access_type, pf->fault_level,
> + pf->engine_class, xe_hw_engine_class_to_str(pf->engine_class),
> + pf->engine_instance);
> }
>
> #define PF_MSG_LEN_DW 4
> @@ -311,7 +312,6 @@ static bool pf_queue_full(struct pf_queue *pf_queue)
> int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
> {
> struct xe_gt *gt = guc_to_gt(guc);
> - struct xe_device *xe = gt_to_xe(gt);
> struct pf_queue *pf_queue;
> unsigned long flags;
> u32 asid;
> @@ -336,7 +336,7 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
> pf_queue->num_dw;
> queue_work(gt->usm.pf_wq, &pf_queue->worker);
> } else {
> - drm_warn(&xe->drm, "PF Queue full, shouldn't be possible");
> + xe_gt_warn(gt, "PageFault Queue full, shouldn't be possible\n");
> }
> spin_unlock_irqrestore(&pf_queue->lock, flags);
>
> @@ -349,7 +349,6 @@ static void pf_queue_work_func(struct work_struct *w)
> {
> struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker);
> struct xe_gt *gt = pf_queue->gt;
> - struct xe_device *xe = gt_to_xe(gt);
> struct xe_guc_pagefault_reply reply = {};
> struct pagefault pf = {};
> unsigned long threshold;
> @@ -360,9 +359,9 @@ static void pf_queue_work_func(struct work_struct *w)
> while (get_pagefault(pf_queue, &pf)) {
> ret = handle_pagefault(gt, &pf);
> if (unlikely(ret)) {
> - print_pagefault(xe, &pf);
> + print_pagefault(gt, &pf);
> pf.fault_unsuccessful = 1;
> - drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n", ret);
> + xe_gt_dbg(gt, "Fault response: Unsuccessful %pe\n", ERR_PTR(ret));
> }
>
> reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
> @@ -515,21 +514,21 @@ static int sub_granularity_in_byte(int val)
> return (granularity_in_byte(val) / 32);
> }
>
> -static void print_acc(struct xe_device *xe, struct acc *acc)
> +static void print_acc(struct xe_gt *gt, struct acc *acc)
> {
> - drm_warn(&xe->drm, "Access counter request:\n"
> - "\tType: %s\n"
> - "\tASID: %d\n"
> - "\tVFID: %d\n"
> - "\tEngine: %d:%d\n"
> - "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n"
> - "\tSub_Granularity Vector: 0x%08x\n"
> - "\tVA Range base: 0x%016llx\n",
> - acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL",
> - acc->asid, acc->vfid, acc->engine_class, acc->engine_instance,
> - granularity_in_byte(acc->granularity) / SZ_1K,
> - sub_granularity_in_byte(acc->granularity) / SZ_1K,
> - acc->sub_granularity, acc->va_range_base);
> + xe_gt_warn(gt, "Access counter request:\n"
> + "\tType: %s\n"
> + "\tASID: %d\n"
> + "\tVFID: %d\n"
> + "\tEngine: %d:%d\n"
> + "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n"
> + "\tSub_Granularity Vector: 0x%08x\n"
> + "\tVA Range base: 0x%016llx\n",
> + acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL",
> + acc->asid, acc->vfid, acc->engine_class, acc->engine_instance,
> + granularity_in_byte(acc->granularity) / SZ_1K,
> + sub_granularity_in_byte(acc->granularity) / SZ_1K,
> + acc->sub_granularity, acc->va_range_base);
> }
>
> static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc)
> @@ -627,7 +626,6 @@ static void acc_queue_work_func(struct work_struct *w)
> {
> struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker);
> struct xe_gt *gt = acc_queue->gt;
> - struct xe_device *xe = gt_to_xe(gt);
> struct acc acc = {};
> unsigned long threshold;
> int ret;
> @@ -637,8 +635,8 @@ static void acc_queue_work_func(struct work_struct *w)
> while (get_acc(acc_queue, &acc)) {
> ret = handle_acc(gt, &acc);
> if (unlikely(ret)) {
> - print_acc(xe, &acc);
> - drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret);
> + print_acc(gt, &acc);
> + xe_gt_warn(gt, "ACC: Unsuccessful %pe\n", ERR_PTR(ret));
> }
>
> if (time_after(jiffies, threshold) &&
> @@ -683,7 +681,7 @@ int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len)
> acc_queue->head = (acc_queue->head + len) % ACC_QUEUE_NUM_DW;
> queue_work(gt->usm.acc_wq, &acc_queue->worker);
> } else {
> - drm_warn(>_to_xe(gt)->drm, "ACC Queue full, dropping ACC");
> + xe_gt_warn(gt, "ACC Queue full, dropping ACC\n");
> }
> spin_unlock(&acc_queue->lock);
>
> --
> 2.47.1
>
--
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation
More information about the Intel-xe
mailing list