[Intel-xe] [PATCH] drm/xe: Move VM entries print to a trace

Lucas De Marchi lucas.demarchi at intel.com
Fri Oct 20 17:48:42 UTC 2023


On Fri, Oct 20, 2023 at 05:21:55PM +0000, Stuart Summers wrote:
>While this information can be useful for debug, it is a little
>verbose to dump during a CI run, resulting in test failures due
>to disk limit issues in the CI machines.
>
>For now, move these debug prints to a trace for manual debug.
>
>Signed-off-by: Stuart Summers <stuart.summers at intel.com>

I think this is the right direction... maybe get an ack from
Thomas Hellström / Matthew Brost. From a quick look at the git history
(you will need to look at the xe branch, not drm-xe-next),
it seems they are the original authors of those debug prints.

Some comments below.

>---
> drivers/gpu/drm/xe/xe_pt.c    | 35 +++++-------------------------
> drivers/gpu/drm/xe/xe_pt.h    |  8 +++++++
> drivers/gpu/drm/xe/xe_trace.h | 41 +++++++++++++++++++++++++++++++++++
> 3 files changed, 54 insertions(+), 30 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
>index 31afab617b4e..eb905878c4cb 100644
>--- a/drivers/gpu/drm/xe/xe_pt.c
>+++ b/drivers/gpu/drm/xe/xe_pt.c
>@@ -908,6 +908,9 @@ static void xe_pt_commit_bind(struct xe_vma *vma,
>
> 			pt_dir->dir.entries[j_] = &newpte->base;
> 		}
>+
>+		trace_xe_pt_commit_bind(&entries[i]);
>+
> 		kfree(entries[i].pt_entries);
> 	}
> }
>@@ -929,34 +932,6 @@ xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma,
> 	return err;
> }
>
>-static void xe_vm_dbg_print_entries(struct xe_device *xe,
>-				    const struct xe_vm_pgtable_update *entries,
>-				    unsigned int num_entries)
>-#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM))
>-{
>-	unsigned int i;
>-
>-	vm_dbg(&xe->drm, "%u entries to update\n", num_entries);
>-	for (i = 0; i < num_entries; i++) {
>-		const struct xe_vm_pgtable_update *entry = &entries[i];
>-		struct xe_pt *xe_pt = entry->pt;
>-		u64 page_size = 1ull << xe_pt_shift(xe_pt->level);
>-		u64 end;
>-		u64 start;
>-
>-		xe_assert(xe, !entry->pt->is_compact);
>-		start = entry->ofs * page_size;
>-		end = start + page_size * entry->qwords;
>-		vm_dbg(&xe->drm,
>-		       "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n",
>-		       i, xe_pt->level, entry->ofs, entry->qwords,
>-		       xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0);
>-	}
>-}
>-#else
>-{}
>-#endif
>-
> #ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT
>
> static int xe_pt_userptr_inject_eagain(struct xe_vma *vma)
>@@ -1276,7 +1251,6 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
> 		goto err;
> 	xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
>
>-	xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
> 	xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries,
> 				   num_entries);
>
>@@ -1563,6 +1537,8 @@ xe_pt_commit_unbind(struct xe_vma *vma,
> 				pt_dir->dir.entries[i] = NULL;
> 			}
> 		}
>+
>+		trace_xe_pt_commit_unbind(entry);
> 	}
> }
>
>@@ -1627,7 +1603,6 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
> 	num_entries = xe_pt_stage_unbind(tile, vma, entries);
> 	xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
>
>-	xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);


> 	xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries,
> 				   num_entries);
>

so now we are tracing the entries individually on xe_pt_commit_bind()
and xe_pt_commit_unbind(). However there are a few direct users of the
previous functions this was printed on:

drivers/gpu/drm/xe/xe_gt_pagefault.c:   fence = __xe_pt_bind_vma(tile, vma, xe_tile_migrate_engine(tile), NULL, 0,
drivers/gpu/drm/xe/xe_vm.c:             fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
drivers/gpu/drm/xe/xe_vm.c:             fence = __xe_pt_unbind_vma(tile, vma, q ? q : vm->q[id],

Are we goint to miss those events now?

If we have dedicated traces for all the interesting events, maybe we can
go ahead and a) remove vm_dbg(); b) remove CONFIG_DRM_XE_DEBUG_VM... the
trace calls will be just NOP instructions unless they are enabled.


>diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
>index d5460e58dbbf..c7e5f7111227 100644
>--- a/drivers/gpu/drm/xe/xe_pt.h
>+++ b/drivers/gpu/drm/xe/xe_pt.h
>@@ -18,6 +18,14 @@ struct xe_tile;
> struct xe_vm;
> struct xe_vma;
>
>+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
>+#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr))
>+#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr)
>+#else
>+#define xe_pt_set_addr(__xe_pt, __addr)
>+#define xe_pt_addr(__xe_pt) 0ull
>+#endif

did you forget to remove those from the .c? But as I said, maybe we can go ahead
and simply remove these, replacing them inline in their callers

Lucas De Marchi

>+
> #define xe_pt_write(xe, map, idx, data) \
> 	xe_map_wr(xe, map, (idx) * sizeof(u64), u64, data)
>
>diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
>index e32f1cad51d9..24d898061d04 100644
>--- a/drivers/gpu/drm/xe/xe_trace.h
>+++ b/drivers/gpu/drm/xe/xe_trace.h
>@@ -17,8 +17,49 @@
> #include "xe_gt_tlb_invalidation_types.h"
> #include "xe_gt_types.h"
> #include "xe_guc_exec_queue_types.h"
>+#include "xe_pt_types.h"
> #include "xe_sched_job.h"
> #include "xe_vm.h"
>+#include "xe_pt.h"
>+
>+DECLARE_EVENT_CLASS(xe_vm_pgtable_update,
>+		    TP_PROTO(struct xe_vm_pgtable_update *update),
>+		    TP_ARGS(update),
>+
>+		    TP_STRUCT__entry(
>+			     __field(u64, page_size)
>+			     __field(u64, end)
>+			     __field(u64, start)
>+			     __field(u64, pt_addr)
>+			     __field(unsigned int, level)
>+			     __field(u32, ofs)
>+			     __field(u32, qwords)
>+			     ),
>+
>+		    TP_fast_assign(
>+			   __entry->level = xe_pt_shift(update->pt->level);
>+			   __entry->page_size = 1ull << __entry->level;
>+			   __entry->pt_addr = xe_pt_addr(update->pt);
>+			   __entry->ofs = update->ofs;
>+			   __entry->qwords = update->qwords;
>+			   __entry->start = __entry->pt_addr + update->ofs * __entry->page_size;
>+			   __entry->end = __entry->start + __entry->page_size * update->qwords;
>+			   ),
>+
>+		    TP_printk("Update level %u at (%u + %u) [%llx...%llx]",
>+			      __entry->level, __entry->ofs, __entry->qwords,
>+			      __entry->start, __entry->end)
>+);
>+
>+DEFINE_EVENT(xe_vm_pgtable_update, xe_pt_commit_bind,
>+	     TP_PROTO(struct xe_vm_pgtable_update *entry),
>+	     TP_ARGS(entry)
>+);
>+
>+DEFINE_EVENT(xe_vm_pgtable_update, xe_pt_commit_unbind,
>+	     TP_PROTO(struct xe_vm_pgtable_update *entry),
>+	     TP_ARGS(entry)
>+);
>
> DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence,
> 		    TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
>-- 
>2.34.1
>


More information about the Intel-xe mailing list