[Intel-xe] [PATCH] drm/xe: Move VM entries print to a trace
Lucas De Marchi
lucas.demarchi at intel.com
Fri Oct 20 17:48:42 UTC 2023
On Fri, Oct 20, 2023 at 05:21:55PM +0000, Stuart Summers wrote:
>While this information can be useful for debug, it is a little
>verbose to dump during a CI run, resulting in test failures due
>to disk limit issues in the CI machines.
>
>For now, move these debug prints to a trace for manual debug.
>
>Signed-off-by: Stuart Summers <stuart.summers at intel.com>
I think this is the right direction... maybe get an ack from
Thomas Hellström / Matthew Brost. From a quick look at the git history
(you will need to look at the xe branch, not drm-xe-next),
it seems they are the original authors of those debug prints.
Some comments below.
>---
> drivers/gpu/drm/xe/xe_pt.c | 35 +++++-------------------------
> drivers/gpu/drm/xe/xe_pt.h | 8 +++++++
> drivers/gpu/drm/xe/xe_trace.h | 41 +++++++++++++++++++++++++++++++++++
> 3 files changed, 54 insertions(+), 30 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
>index 31afab617b4e..eb905878c4cb 100644
>--- a/drivers/gpu/drm/xe/xe_pt.c
>+++ b/drivers/gpu/drm/xe/xe_pt.c
>@@ -908,6 +908,9 @@ static void xe_pt_commit_bind(struct xe_vma *vma,
>
> pt_dir->dir.entries[j_] = &newpte->base;
> }
>+
>+ trace_xe_pt_commit_bind(&entries[i]);
>+
> kfree(entries[i].pt_entries);
> }
> }
>@@ -929,34 +932,6 @@ xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma,
> return err;
> }
>
>-static void xe_vm_dbg_print_entries(struct xe_device *xe,
>- const struct xe_vm_pgtable_update *entries,
>- unsigned int num_entries)
>-#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM))
>-{
>- unsigned int i;
>-
>- vm_dbg(&xe->drm, "%u entries to update\n", num_entries);
>- for (i = 0; i < num_entries; i++) {
>- const struct xe_vm_pgtable_update *entry = &entries[i];
>- struct xe_pt *xe_pt = entry->pt;
>- u64 page_size = 1ull << xe_pt_shift(xe_pt->level);
>- u64 end;
>- u64 start;
>-
>- xe_assert(xe, !entry->pt->is_compact);
>- start = entry->ofs * page_size;
>- end = start + page_size * entry->qwords;
>- vm_dbg(&xe->drm,
>- "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n",
>- i, xe_pt->level, entry->ofs, entry->qwords,
>- xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0);
>- }
>-}
>-#else
>-{}
>-#endif
>-
> #ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT
>
> static int xe_pt_userptr_inject_eagain(struct xe_vma *vma)
>@@ -1276,7 +1251,6 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
> goto err;
> xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
>
>- xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
> xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries,
> num_entries);
>
>@@ -1563,6 +1537,8 @@ xe_pt_commit_unbind(struct xe_vma *vma,
> pt_dir->dir.entries[i] = NULL;
> }
> }
>+
>+ trace_xe_pt_commit_unbind(entry);
> }
> }
>
>@@ -1627,7 +1603,6 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
> num_entries = xe_pt_stage_unbind(tile, vma, entries);
> xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
>
>- xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
> xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries,
> num_entries);
>
so now we are tracing the entries individually on xe_pt_commit_bind()
and xe_pt_commit_unbind(). However there are a few direct users of the
previous functions this was printed on:
drivers/gpu/drm/xe/xe_gt_pagefault.c: fence = __xe_pt_bind_vma(tile, vma, xe_tile_migrate_engine(tile), NULL, 0,
drivers/gpu/drm/xe/xe_vm.c: fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
drivers/gpu/drm/xe/xe_vm.c: fence = __xe_pt_unbind_vma(tile, vma, q ? q : vm->q[id],
Are we goint to miss those events now?
If we have dedicated traces for all the interesting events, maybe we can
go ahead and a) remove vm_dbg(); b) remove CONFIG_DRM_XE_DEBUG_VM... the
trace calls will be just NOP instructions unless they are enabled.
>diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
>index d5460e58dbbf..c7e5f7111227 100644
>--- a/drivers/gpu/drm/xe/xe_pt.h
>+++ b/drivers/gpu/drm/xe/xe_pt.h
>@@ -18,6 +18,14 @@ struct xe_tile;
> struct xe_vm;
> struct xe_vma;
>
>+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
>+#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr))
>+#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr)
>+#else
>+#define xe_pt_set_addr(__xe_pt, __addr)
>+#define xe_pt_addr(__xe_pt) 0ull
>+#endif
did you forget to remove those from the .c? But as I said, maybe we can go ahead
and simply remove these, replacing them inline in their callers
Lucas De Marchi
>+
> #define xe_pt_write(xe, map, idx, data) \
> xe_map_wr(xe, map, (idx) * sizeof(u64), u64, data)
>
>diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
>index e32f1cad51d9..24d898061d04 100644
>--- a/drivers/gpu/drm/xe/xe_trace.h
>+++ b/drivers/gpu/drm/xe/xe_trace.h
>@@ -17,8 +17,49 @@
> #include "xe_gt_tlb_invalidation_types.h"
> #include "xe_gt_types.h"
> #include "xe_guc_exec_queue_types.h"
>+#include "xe_pt_types.h"
> #include "xe_sched_job.h"
> #include "xe_vm.h"
>+#include "xe_pt.h"
>+
>+DECLARE_EVENT_CLASS(xe_vm_pgtable_update,
>+ TP_PROTO(struct xe_vm_pgtable_update *update),
>+ TP_ARGS(update),
>+
>+ TP_STRUCT__entry(
>+ __field(u64, page_size)
>+ __field(u64, end)
>+ __field(u64, start)
>+ __field(u64, pt_addr)
>+ __field(unsigned int, level)
>+ __field(u32, ofs)
>+ __field(u32, qwords)
>+ ),
>+
>+ TP_fast_assign(
>+ __entry->level = xe_pt_shift(update->pt->level);
>+ __entry->page_size = 1ull << __entry->level;
>+ __entry->pt_addr = xe_pt_addr(update->pt);
>+ __entry->ofs = update->ofs;
>+ __entry->qwords = update->qwords;
>+ __entry->start = __entry->pt_addr + update->ofs * __entry->page_size;
>+ __entry->end = __entry->start + __entry->page_size * update->qwords;
>+ ),
>+
>+ TP_printk("Update level %u at (%u + %u) [%llx...%llx]",
>+ __entry->level, __entry->ofs, __entry->qwords,
>+ __entry->start, __entry->end)
>+);
>+
>+DEFINE_EVENT(xe_vm_pgtable_update, xe_pt_commit_bind,
>+ TP_PROTO(struct xe_vm_pgtable_update *entry),
>+ TP_ARGS(entry)
>+);
>+
>+DEFINE_EVENT(xe_vm_pgtable_update, xe_pt_commit_unbind,
>+ TP_PROTO(struct xe_vm_pgtable_update *entry),
>+ TP_ARGS(entry)
>+);
>
> DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence,
> TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
>--
>2.34.1
>
More information about the Intel-xe
mailing list