[Intel-xe] [PATCH v2 2/2] drm/xe: Add reg read/write trace

Sripada, Radhakrishna radhakrishna.sripada at intel.com
Tue Jun 20 22:38:50 UTC 2023



> -----Original Message-----
> From: De Marchi, Lucas <lucas.demarchi at intel.com>
> Sent: Tuesday, June 20, 2023 2:28 PM
> To: Sripada, Radhakrishna <radhakrishna.sripada at intel.com>
> Cc: intel-xe at lists.freedesktop.org; Roper, Matthew D
> <matthew.d.roper at intel.com>
> Subject: Re: [Intel-xe] [PATCH v2 2/2] drm/xe: Add reg read/write trace
> 
> On Tue, Jun 20, 2023 at 12:32:29PM -0700, Radhakrishna Sripada wrote:
> >This will help debug register read/writes and provide
> >a way to trace all the mmio transactions.
> >
> >In order to avoid spam from xe_mmio_wait32,
> >xe_mmio_read32_notrace is introduced and used.
> 
> feedback given before was to avoid the exported _notrace stuff. If there
> was a justification for it somewhere I probably lost it. Could you
> clarify? If I understand the above paragraph correctly, it's due to
> the fact the xe_mmio_wait32() reads the register multiple times...
> why would we want to avoid the "spam"? If we are reading it multiple
> times, the trace should show it instead of hiding.  If we demonstrate
> there is an actual impact, then xe_mmio_wait32() should be adjusted, not
> the rest of the shim layer in intel_uncore.h
Unless I missed some whaere earlier feedback I received was on the usage of inline in function definitions.
Xe_mmio_wait32 I believe is loosely based out of __intel_wait_for_register(atleast based on compat-headers)
Where during the actual check, no trace version of register read is used and just the final value is traced before
Returning from the function. A similar kind of approach is adapted here.

If it does not make much value like that then I believe we can use a traceable version
But in case if developers want to have a stack trace for each tracee out put then the spam becomes even more
messy.

> 
> >
> >v2: Do not use inline for the function declarations.(MattR)
> >
> >Cc: Lucas De Marchi <lucas.demarchi at intel.com>
> >Cc: Matt Roper <matthew.d.roper at intel.com>
> >Signed-off-by: Radhakrishna Sripada <radhakrishna.sripada at intel.com>
> >---
> > .../drm/xe/compat-i915-headers/intel_uncore.h |  8 +-
> > drivers/gpu/drm/xe/xe_mmio.c                  | 80 ++++++++++++++++++-
> > drivers/gpu/drm/xe/xe_mmio.h                  |  5 ++
> > drivers/gpu/drm/xe/xe_trace.h                 | 26 ++++++
> > 4 files changed, 111 insertions(+), 8 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
> b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
> >index fae6213d26f1..036f3b963a84 100644
> >--- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
> >+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
> >@@ -113,7 +113,7 @@ static inline u32 intel_uncore_read_fw(struct
> fake_uncore *uncore,
> > {
> > 	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
> >
> >-	return xe_mmio_read32(__fake_uncore_to_gt(uncore), reg);
> >+	return xe_mmio_read32_notrace(__fake_uncore_to_gt(uncore), reg);
> 
> how would you even know that intel_uncore_read_fw() is non-traceable?
> ditto for all the other functions in this header.
In i915 intel_uncore_read_fw and other _fw functions translate to raw_uncore_read
And its variants which do not include the GEN6 Header/footer which have the traces
included and hence are the non-trace variants. Unless I am missing something.

--Radhakrishna(RK) Sripada 
> 
> 
> Lucas De Marchi
> 
> > }
> >
> > static inline void intel_uncore_write_fw(struct fake_uncore *uncore,
> >@@ -121,7 +121,7 @@ static inline void intel_uncore_write_fw(struct
> fake_uncore *uncore,
> > {
> > 	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
> >
> >-	xe_mmio_write32(__fake_uncore_to_gt(uncore), reg, val);
> >+	xe_mmio_write32_notrace(__fake_uncore_to_gt(uncore), reg, val);
> > }
> >
> > static inline u32 intel_uncore_read_notrace(struct fake_uncore *uncore,
> >@@ -129,7 +129,7 @@ static inline u32 intel_uncore_read_notrace(struct
> fake_uncore *uncore,
> > {
> > 	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
> >
> >-	return xe_mmio_read32(__fake_uncore_to_gt(uncore), reg);
> >+	return xe_mmio_read32_notrace(__fake_uncore_to_gt(uncore), reg);
> > }
> >
> > static inline void intel_uncore_write_notrace(struct fake_uncore *uncore,
> >@@ -137,7 +137,7 @@ static inline void intel_uncore_write_notrace(struct
> fake_uncore *uncore,
> > {
> > 	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
> >
> >-	xe_mmio_write32(__fake_uncore_to_gt(uncore), reg, val);
> >+	xe_mmio_write32_notrace(__fake_uncore_to_gt(uncore), reg, val);
> > }
> >
> > #endif /* __INTEL_UNCORE_H__ */
> >diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
> >index 78c7f839f783..ef2192a2d060 100644
> >--- a/drivers/gpu/drm/xe/xe_mmio.c
> >+++ b/drivers/gpu/drm/xe/xe_mmio.c
> >@@ -19,6 +19,7 @@
> > #include "xe_gt_mcr.h"
> > #include "xe_macros.h"
> > #include "xe_module.h"
> >+#include "xe_trace.h"
> >
> > #define XEHP_MTCFG_ADDR		XE_REG(0x101800)
> > #define TILE_COUNT		REG_GENMASK(15, 8)
> >@@ -429,7 +430,7 @@ static const struct xe_reg mmio_read_whitelist[] = {
> > 	RING_TIMESTAMP(RENDER_RING_BASE),
> > };
> >
> >-u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
> >+u8 xe_mmio_read8_notrace(struct xe_gt *gt, struct xe_reg reg)
> > {
> > 	struct xe_tile *tile = gt_to_tile(gt);
> >
> >@@ -439,6 +440,31 @@ u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg
> reg)
> > 	return readb(tile->mmio.regs + reg.addr);
> > }
> >
> >+u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
> >+{
> >+	struct xe_tile *tile = gt_to_tile(gt);
> >+	u8 val;
> >+
> >+	if (reg.addr < gt->mmio.adj_limit)
> >+		reg.addr += gt->mmio.adj_offset;
> >+
> >+	val = readb(tile->mmio.regs + reg.addr);
> >+
> >+	trace_xe_reg_rw(false, &reg, val, sizeof(val));
> >+
> >+	return val;
> >+}
> >+
> >+void xe_mmio_write32_notrace(struct xe_gt *gt, struct xe_reg reg, u32 val)
> >+{
> >+	struct xe_tile *tile = gt_to_tile(gt);
> >+
> >+	if (reg.addr < gt->mmio.adj_limit)
> >+		reg.addr += gt->mmio.adj_offset;
> >+
> >+	writel(val, tile->mmio.regs + reg.addr);
> >+}
> >+
> > void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val)
> > {
> > 	struct xe_tile *tile = gt_to_tile(gt);
> >@@ -446,10 +472,12 @@ void xe_mmio_write32(struct xe_gt *gt, struct
> xe_reg reg, u32 val)
> > 	if (reg.addr < gt->mmio.adj_limit)
> > 		reg.addr += gt->mmio.adj_offset;
> >
> >+	trace_xe_reg_rw(true, &reg, val, sizeof(val));
> >+
> > 	writel(val, tile->mmio.regs + reg.addr);
> > }
> >
> >-u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
> >+u32 xe_mmio_read32_notrace(struct xe_gt *gt, struct xe_reg reg)
> > {
> > 	struct xe_tile *tile = gt_to_tile(gt);
> >
> >@@ -459,6 +487,21 @@ u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg
> reg)
> > 	return readl(tile->mmio.regs + reg.addr);
> > }
> >
> >+u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
> >+{
> >+	struct xe_tile *tile = gt_to_tile(gt);
> >+	u32 val;
> >+
> >+	if (reg.addr < gt->mmio.adj_limit)
> >+		reg.addr += gt->mmio.adj_offset;
> >+
> >+	val = readl(tile->mmio.regs + reg.addr);
> >+
> >+	trace_xe_reg_rw(false, &reg, val, sizeof(val));
> >+
> >+	return val;
> >+}
> >+
> > u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set)
> > {
> > 	u32 old, reg_val;
> >@@ -470,6 +513,16 @@ u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg
> reg, u32 clr, u32 set)
> > 	return old;
> > }
> >
> >+void xe_mmio_write64_notrace(struct xe_gt *gt, struct xe_reg reg, u64 val)
> >+{
> >+	struct xe_tile *tile = gt_to_tile(gt);
> >+
> >+	if (reg.addr < gt->mmio.adj_limit)
> >+		reg.addr += gt->mmio.adj_offset;
> >+
> >+	writeq(val, tile->mmio.regs + reg.addr);
> >+}
> >+
> > void xe_mmio_write64(struct xe_gt *gt, struct xe_reg reg, u64 val)
> > {
> > 	struct xe_tile *tile = gt_to_tile(gt);
> >@@ -477,10 +530,12 @@ void xe_mmio_write64(struct xe_gt *gt, struct
> xe_reg reg, u64 val)
> > 	if (reg.addr < gt->mmio.adj_limit)
> > 		reg.addr += gt->mmio.adj_offset;
> >
> >+	trace_xe_reg_rw(true, &reg, val, sizeof(val));
> >+
> > 	writeq(val, tile->mmio.regs + reg.addr);
> > }
> >
> >-u64 xe_mmio_read64(struct xe_gt *gt, struct xe_reg reg)
> >+u64 xe_mmio_read64_notrace(struct xe_gt *gt, struct xe_reg reg)
> > {
> > 	struct xe_tile *tile = gt_to_tile(gt);
> >
> >@@ -490,6 +545,21 @@ u64 xe_mmio_read64(struct xe_gt *gt, struct xe_reg
> reg)
> > 	return readq(tile->mmio.regs + reg.addr);
> > }
> >
> >+u64 xe_mmio_read64(struct xe_gt *gt, struct xe_reg reg)
> >+{
> >+	struct xe_tile *tile = gt_to_tile(gt);
> >+	u64 val;
> >+
> >+	if (reg.addr < gt->mmio.adj_limit)
> >+		reg.addr += gt->mmio.adj_offset;
> >+
> >+	val = readq(tile->mmio.regs + reg.addr);
> >+
> >+	trace_xe_reg_rw(false, &reg, val, sizeof(val));
> >+
> >+	return val;
> >+}
> >+
> > int xe_mmio_write32_and_verify(struct xe_gt *gt, struct xe_reg reg, u32 val,
> > 			       u32 mask, u32 eval)
> > {
> >@@ -511,7 +581,7 @@ int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg
> reg, u32 val, u32 mask,
> > 	u32 read;
> >
> > 	for (;;) {
> >-		read = xe_mmio_read32(gt, reg);
> >+		read = xe_mmio_read32_notrace(gt, reg);
> > 		if ((read & mask) == val) {
> > 			ret = 0;
> > 			break;
> >@@ -531,6 +601,8 @@ int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg
> reg, u32 val, u32 mask,
> > 		wait <<= 1;
> > 	}
> >
> >+	trace_xe_reg_rw(false, &reg, read, sizeof(read));
> >+
> > 	if (out_val)
> > 		*out_val = read;
> >
> >diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
> >index c06fcac2cc5b..02364ba9dda2 100644
> >--- a/drivers/gpu/drm/xe/xe_mmio.h
> >+++ b/drivers/gpu/drm/xe/xe_mmio.h
> >@@ -20,11 +20,16 @@ struct xe_device;
> > #define GEN12_LMEM_BAR		2
> >
> > int xe_mmio_init(struct xe_device *xe);
> >+u8 xe_mmio_read8_notrace(struct xe_gt *gt, struct xe_reg reg);
> > u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg);
> >+void xe_mmio_write32_notrace(struct xe_gt *gt, struct xe_reg reg, u32 val);
> > void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val);
> >+u32 xe_mmio_read32_notrace(struct xe_gt *gt, struct xe_reg reg);
> > u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg);
> > u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set);
> >+void xe_mmio_write64_notrace(struct xe_gt *gt, struct xe_reg reg, u64 val);
> > void xe_mmio_write64(struct xe_gt *gt, struct xe_reg reg, u64 val);
> >+u64 xe_mmio_read64_notrace(struct xe_gt *gt, struct xe_reg reg);
> > u64 xe_mmio_read64(struct xe_gt *gt, struct xe_reg reg);
> > int xe_mmio_write32_and_verify(struct xe_gt *gt, struct xe_reg reg, u32 val,
> > 			       u32 mask, u32 eval);
> >diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
> >index 2f8eb7ebe9a7..a5d514c190a2 100644
> >--- a/drivers/gpu/drm/xe/xe_trace.h
> >+++ b/drivers/gpu/drm/xe/xe_trace.h
> >@@ -12,6 +12,7 @@
> > #include <linux/tracepoint.h>
> > #include <linux/types.h>
> >
> >+#include "regs/xe_reg_defs.h"
> > #include "xe_bo_types.h"
> > #include "xe_engine_types.h"
> > #include "xe_gt_tlb_invalidation_types.h"
> >@@ -507,6 +508,31 @@ DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit,
> > 	     TP_ARGS(vm)
> > );
> >
> >+TRACE_EVENT(xe_reg_rw,
> >+	    TP_PROTO(bool write, struct xe_reg *reg, u64 val, int len),
> >+	    TP_ARGS(write, reg, val, len),
> >+
> >+	    TP_STRUCT__entry(
> >+		    __field(u64, val)
> >+		    __field(struct xe_reg *, reg)
> >+		    __field(u16, write)
> >+		    __field(u16, len)
> >+		    ),
> >+
> >+	    TP_fast_assign(
> >+		   __entry->val = (u64)val;
> >+		   __entry->reg = reg;
> >+		   __entry->write = write;
> >+		   __entry->len = len;
> >+		),
> >+
> >+	    TP_printk("%s reg=0x%x, len=%d, val=(0x%x, 0x%x)",
> >+		      __entry->write ? "write" : "read",
> >+		      __entry->reg->addr, __entry->len,
> >+		      (u32)(__entry->val & 0xffffffff),
> >+		      (u32)(__entry->val >> 32))
> >+);
> >+
> > TRACE_EVENT(xe_guc_ct_h2g_flow_control,
> > 	    TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len),
> > 	    TP_ARGS(_head, _tail, size, space, len),
> >--
> >2.34.1
> >


More information about the Intel-xe mailing list