[PATCH v5 5/6] drm/xe/trace: Print device_id in xe_trace events
Gustavo Sousa
gustavo.sousa at intel.com
Fri Jun 7 14:35:00 UTC 2024
Quoting Radhakrishna Sripada (2024-06-06 19:38:18-03:00)
>In multi-gpu environments it is important to know the device
>gt events belongs to. The tracing information includes the device_id
>to indicate the device the event is associated with.
>
>v2: Use variable sized variant to display dev name(Gustavo)
>v3: Pass single argument to __assign_str to fix kunit error
>
>Suggested-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
>Cc: Gustavo Sousa <gustavo.sousa at intel.com>
>Cc: Lucas De Marchi <lucas.demarchi at intel.com>
>Signed-off-by: Radhakrishna Sripada <radhakrishna.sripada at intel.com>
>---
> drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 27 +++++----
> drivers/gpu/drm/xe/xe_pt.c | 8 ++-
> drivers/gpu/drm/xe/xe_trace.h | 65 +++++++++++++--------
> 3 files changed, 60 insertions(+), 40 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
>index 105797776a6c..23d397a246a8 100644
>--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
>+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
>@@ -22,6 +22,7 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
> {
> struct xe_gt *gt = container_of(work, struct xe_gt,
> tlb_invalidation.fence_tdr.work);
>+ struct xe_device *xe = gt_to_xe(gt);
> struct xe_gt_tlb_invalidation_fence *fence, *next;
>
> spin_lock_irq(>->tlb_invalidation.pending_lock);
>@@ -33,7 +34,7 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
> if (msecs_to_jiffies(since_inval_ms) < TLB_TIMEOUT)
> break;
>
>- trace_xe_gt_tlb_invalidation_fence_timeout(fence);
>+ trace_xe_gt_tlb_invalidation_fence_timeout(xe, fence);
> xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d",
> fence->seqno, gt->tlb_invalidation.seqno_recv);
>
>@@ -71,18 +72,18 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
> }
>
> static void
>-__invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
>+__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
> {
>- trace_xe_gt_tlb_invalidation_fence_signal(fence);
>+ trace_xe_gt_tlb_invalidation_fence_signal(xe, fence);
> dma_fence_signal(&fence->base);
> dma_fence_put(&fence->base);
> }
>
> static void
>-invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
>+invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
> {
> list_del(&fence->link);
>- __invalidation_fence_signal(fence);
>+ __invalidation_fence_signal(xe, fence);
> }
>
> /**
>@@ -121,7 +122,7 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
>
> list_for_each_entry_safe(fence, next,
> >->tlb_invalidation.pending_fences, link)
>- invalidation_fence_signal(fence);
>+ invalidation_fence_signal(gt_to_xe(gt), fence);
> spin_unlock_irq(>->tlb_invalidation.pending_lock);
> mutex_unlock(>->uc.guc.ct.lock);
> }
>@@ -144,6 +145,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
> u32 *action, int len)
> {
> struct xe_gt *gt = guc_to_gt(guc);
>+ struct xe_device *xe = gt_to_xe(gt);
> int seqno;
> int ret;
>
>@@ -157,7 +159,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
> seqno = gt->tlb_invalidation.seqno;
> if (fence) {
> fence->seqno = seqno;
>- trace_xe_gt_tlb_invalidation_fence_send(fence);
>+ trace_xe_gt_tlb_invalidation_fence_send(xe, fence);
> }
> action[1] = seqno;
> ret = xe_guc_ct_send_locked(&guc->ct, action, len,
>@@ -171,7 +173,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
> * we can just go ahead and signal the fence here.
> */
> if (tlb_invalidation_seqno_past(gt, seqno)) {
>- __invalidation_fence_signal(fence);
>+ __invalidation_fence_signal(xe, fence);
> } else {
> fence->invalidation_time = ktime_get();
> list_add_tail(&fence->link,
>@@ -184,7 +186,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
> }
> spin_unlock_irq(>->tlb_invalidation.pending_lock);
> } else if (ret < 0 && fence) {
>- __invalidation_fence_signal(fence);
>+ __invalidation_fence_signal(xe, fence);
> }
> if (!ret) {
> gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) %
>@@ -294,7 +296,7 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
> /* Execlists not supported */
> if (gt_to_xe(gt)->info.force_execlist) {
> if (fence)
>- __invalidation_fence_signal(fence);
>+ __invalidation_fence_signal(xe, fence);
>
> return 0;
> }
>@@ -432,6 +434,7 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
> int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
> {
> struct xe_gt *gt = guc_to_gt(guc);
>+ struct xe_device *xe = gt_to_xe(gt);
> struct xe_gt_tlb_invalidation_fence *fence, *next;
> unsigned long flags;
>
>@@ -468,12 +471,12 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
>
> list_for_each_entry_safe(fence, next,
> >->tlb_invalidation.pending_fences, link) {
>- trace_xe_gt_tlb_invalidation_fence_recv(fence);
>+ trace_xe_gt_tlb_invalidation_fence_recv(xe, fence);
>
> if (!tlb_invalidation_seqno_past(gt, fence->seqno))
> break;
>
>- invalidation_fence_signal(fence);
>+ invalidation_fence_signal(xe, fence);
> }
>
> if (!list_empty(>->tlb_invalidation.pending_fences))
>diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
>index cd60c009b679..ade9e7a3a0ad 100644
>--- a/drivers/gpu/drm/xe/xe_pt.c
>+++ b/drivers/gpu/drm/xe/xe_pt.c
>@@ -1137,8 +1137,9 @@ static void invalidation_fence_cb(struct dma_fence *fence,
> {
> struct invalidation_fence *ifence =
> container_of(cb, struct invalidation_fence, cb);
>+ struct xe_device *xe = gt_to_xe(ifence->gt);
>
>- trace_xe_gt_tlb_invalidation_fence_cb(&ifence->base);
>+ trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base);
> if (!ifence->fence->error) {
> queue_work(system_wq, &ifence->work);
> } else {
>@@ -1153,8 +1154,9 @@ static void invalidation_fence_work_func(struct work_struct *w)
> {
> struct invalidation_fence *ifence =
> container_of(w, struct invalidation_fence, work);
>+ struct xe_device *xe = gt_to_xe(ifence->gt);
>
>- trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
>+ trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base);
> xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start,
> ifence->end, ifence->asid);
> }
>@@ -1166,7 +1168,7 @@ static int invalidation_fence_init(struct xe_gt *gt,
> {
> int ret;
>
>- trace_xe_gt_tlb_invalidation_fence_create(&ifence->base);
>+ trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base);
>
> spin_lock_irq(>->tlb_invalidation.lock);
> dma_fence_init(&ifence->base.base, &invalidation_fence_ops,
>diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
>index 81128c0f31e6..f0b3477c0e1a 100644
>--- a/drivers/gpu/drm/xe/xe_trace.h
>+++ b/drivers/gpu/drm/xe/xe_trace.h
>@@ -9,6 +9,7 @@
> #if !defined(_XE_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
> #define _XE_TRACE_H_
>
>+#include <linux/string_helpers.h>
I believe string_helpers.h is not necessary here.
> #include <linux/tracepoint.h>
> #include <linux/types.h>
>
>@@ -20,58 +21,64 @@
> #include "xe_sched_job.h"
> #include "xe_vm.h"
>
>+#define __dev_name_xe(xe) dev_name((xe)->drm.dev)
>+#define __dev_name_gt(gt) __dev_name_xe(gt_to_xe((gt)))
>+#define __dev_name_eq(q) __dev_name_gt((q)->gt)
>+
> DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence,
>- TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
>- TP_ARGS(fence),
>+ TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence),
>+ TP_ARGS(xe, fence),
>
> TP_STRUCT__entry(
>+ __string(dev, __dev_name_xe(xe))
> __field(struct xe_gt_tlb_invalidation_fence *, fence)
> __field(int, seqno)
> ),
>
> TP_fast_assign(
>+ __assign_str(dev);
> __entry->fence = fence;
> __entry->seqno = fence->seqno;
> ),
>
>- TP_printk("fence=%p, seqno=%d",
>- __entry->fence, __entry->seqno)
>+ TP_printk("dev=%s, fence=%p, seqno=%d",
>+ __get_str(dev), __entry->fence, __entry->seqno)
> );
>
> DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_create,
>- TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
>- TP_ARGS(fence)
>+ TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence),
>+ TP_ARGS(xe, fence)
> );
>
> DEFINE_EVENT(xe_gt_tlb_invalidation_fence,
> xe_gt_tlb_invalidation_fence_work_func,
>- TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
>- TP_ARGS(fence)
>+ TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence),
>+ TP_ARGS(xe, fence)
> );
>
> DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_cb,
>- TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
>- TP_ARGS(fence)
>+ TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence),
>+ TP_ARGS(xe, fence)
> );
>
> DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_send,
>- TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
>- TP_ARGS(fence)
>+ TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence),
>+ TP_ARGS(xe, fence)
> );
>
> DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_recv,
>- TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
>- TP_ARGS(fence)
>+ TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence),
>+ TP_ARGS(xe, fence)
> );
>
> DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_signal,
>- TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
>- TP_ARGS(fence)
>+ TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence),
>+ TP_ARGS(xe, fence)
> );
>
> DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_timeout,
>- TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
>- TP_ARGS(fence)
>+ TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence),
>+ TP_ARGS(xe, fence)
> );
>
> DECLARE_EVENT_CLASS(xe_exec_queue,
>@@ -79,6 +86,7 @@ DECLARE_EVENT_CLASS(xe_exec_queue,
> TP_ARGS(q),
>
> TP_STRUCT__entry(
>+ __string(dev, __dev_name_eq(q))
> __field(enum xe_engine_class, class)
> __field(u32, logical_mask)
> __field(u8, gt_id)
>@@ -89,6 +97,7 @@ DECLARE_EVENT_CLASS(xe_exec_queue,
> ),
>
> TP_fast_assign(
>+ __assign_str(dev);
> __entry->class = q->class;
> __entry->logical_mask = q->logical_mask;
> __entry->gt_id = q->gt->info.id;
>@@ -98,8 +107,8 @@ DECLARE_EVENT_CLASS(xe_exec_queue,
> __entry->flags = q->flags;
> ),
>
>- TP_printk("%d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x",
>- __entry->class, __entry->logical_mask,
>+ TP_printk("dev=%s, %d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x",
>+ __get_str(dev), __entry->class, __entry->logical_mask,
> __entry->gt_id, __entry->width, __entry->guc_id,
> __entry->guc_state, __entry->flags)
> );
>@@ -199,6 +208,7 @@ DECLARE_EVENT_CLASS(xe_sched_job,
> TP_ARGS(job),
>
> TP_STRUCT__entry(
>+ __string(dev, __dev_name_eq(job->q))
> __field(u32, seqno)
> __field(u32, lrc_seqno)
> __field(u16, guc_id)
>@@ -210,6 +220,7 @@ DECLARE_EVENT_CLASS(xe_sched_job,
> ),
>
> TP_fast_assign(
>+ __assign_str(dev);
> __entry->seqno = xe_sched_job_seqno(job);
> __entry->lrc_seqno = xe_sched_job_lrc_seqno(job);
> __entry->guc_id = job->q->guc->id;
>@@ -221,8 +232,8 @@ DECLARE_EVENT_CLASS(xe_sched_job,
> __entry->batch_addr = (u64)job->ptrs[0].batch_addr;
> ),
>
>- TP_printk("fence=%p, seqno=%u, lrc_seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
>- __entry->fence, __entry->seqno,
>+ TP_printk("dev=%s, fence=%p, seqno=%u, lrc_seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
>+ __get_str(dev), __entry->fence, __entry->seqno,
> __entry->lrc_seqno, __entry->guc_id,
> __entry->batch_addr, __entry->guc_state,
> __entry->flags, __entry->error)
>@@ -268,17 +279,19 @@ DECLARE_EVENT_CLASS(xe_sched_msg,
> TP_ARGS(msg),
>
> TP_STRUCT__entry(
>+ __string(dev, __dev_name_eq(((struct xe_exec_queue *)msg->private_data)))
Well, in the definition of struct xe_sched_msg, it says that
private_data is an opaque pointer and it is "backend defined". It
seems that, today, the value for that will always be a pointer to a
struct xe_exec_queue, so the cast above is expected to work. Not sure if
that's a guarantee for the future.
I would prefer that the exec queue would be passed explicitly to the
tracing function, but I guess we can go ahead with this, since the cast
is already being done in this event for guc_id.
Thus, with the removal of the inclusion of string_helpers.h,
Reviewed-by: Gustavo Sousa <gustavo.sousa at intel.com>
> __field(u32, opcode)
> __field(u16, guc_id)
> ),
>
> TP_fast_assign(
>+ __assign_str(dev);
> __entry->opcode = msg->opcode;
> __entry->guc_id =
> ((struct xe_exec_queue *)msg->private_data)->guc->id;
> ),
>
>- TP_printk("guc_id=%d, opcode=%u", __entry->guc_id,
>+ TP_printk("dev=%s, guc_id=%d, opcode=%u", __get_str(dev), __entry->guc_id,
> __entry->opcode)
> );
>
>@@ -297,19 +310,21 @@ DECLARE_EVENT_CLASS(xe_hw_fence,
> TP_ARGS(fence),
>
> TP_STRUCT__entry(
>+ __string(dev, __dev_name_gt(fence->ctx->gt))
> __field(u64, ctx)
> __field(u32, seqno)
> __field(struct xe_hw_fence *, fence)
> ),
>
> TP_fast_assign(
>+ __assign_str(dev);
> __entry->ctx = fence->dma.context;
> __entry->seqno = fence->dma.seqno;
> __entry->fence = fence;
> ),
>
>- TP_printk("ctx=0x%016llx, fence=%p, seqno=%u",
>- __entry->ctx, __entry->fence, __entry->seqno)
>+ TP_printk("dev=%s, ctx=0x%016llx, fence=%p, seqno=%u",
>+ __get_str(dev), __entry->ctx, __entry->fence, __entry->seqno)
> );
>
> DEFINE_EVENT(xe_hw_fence, xe_hw_fence_create,
>--
>2.34.1
>
More information about the Intel-xe
mailing list