[Intel-xe] [PATCH] drm/xe: Move VM entries print to a trace

Summers, Stuart stuart.summers at intel.com
Fri Oct 20 19:14:39 UTC 2023


On Fri, 2023-10-20 at 12:48 -0500, Lucas De Marchi wrote:
> On Fri, Oct 20, 2023 at 05:21:55PM +0000, Stuart Summers wrote:
> > While this information can be useful for debug, it is a little
> > verbose to dump during a CI run, resulting in test failures due
> > to disk limit issues in the CI machines.
> > 
> > For now, move these debug prints to a trace for manual debug.
> > 
> > Signed-off-by: Stuart Summers <stuart.summers at intel.com>
> 
> I think this is the right direction... maybe get an ack from
> Thomas Hellström / Matthew Brost. From a quick look at the git
> history
> (you will need to look at the xe branch, not drm-xe-next),
> it seems they are the original authors of those debug prints.

Makes sense, CC Thomas/Matt. Also adding Brian here since he had some
feedback separately.

> 
> Some comments below.
> 
> > ---
> > drivers/gpu/drm/xe/xe_pt.c    | 35 +++++-------------------------
> > drivers/gpu/drm/xe/xe_pt.h    |  8 +++++++
> > drivers/gpu/drm/xe/xe_trace.h | 41
> > +++++++++++++++++++++++++++++++++++
> > 3 files changed, 54 insertions(+), 30 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_pt.c
> > b/drivers/gpu/drm/xe/xe_pt.c
> > index 31afab617b4e..eb905878c4cb 100644
> > --- a/drivers/gpu/drm/xe/xe_pt.c
> > +++ b/drivers/gpu/drm/xe/xe_pt.c
> > @@ -908,6 +908,9 @@ static void xe_pt_commit_bind(struct xe_vma
> > *vma,
> > 
> >                         pt_dir->dir.entries[j_] = &newpte->base;
> >                 }
> > +
> > +               trace_xe_pt_commit_bind(&entries[i]);
> > +
> >                 kfree(entries[i].pt_entries);
> >         }
> > }
> > @@ -929,34 +932,6 @@ xe_pt_prepare_bind(struct xe_tile *tile,
> > struct xe_vma *vma,
> >         return err;
> > }
> > 
> > -static void xe_vm_dbg_print_entries(struct xe_device *xe,
> > -                                   const struct
> > xe_vm_pgtable_update *entries,
> > -                                   unsigned int num_entries)
> > -#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM))
> > -{
> > -       unsigned int i;
> > -
> > -       vm_dbg(&xe->drm, "%u entries to update\n", num_entries);
> > -       for (i = 0; i < num_entries; i++) {
> > -               const struct xe_vm_pgtable_update *entry =
> > &entries[i];
> > -               struct xe_pt *xe_pt = entry->pt;
> > -               u64 page_size = 1ull << xe_pt_shift(xe_pt->level);
> > -               u64 end;
> > -               u64 start;
> > -
> > -               xe_assert(xe, !entry->pt->is_compact);
> > -               start = entry->ofs * page_size;
> > -               end = start + page_size * entry->qwords;
> > -               vm_dbg(&xe->drm,
> > -                      "\t%u: Update level %u at (%u + %u)
> > [%llx...%llx) f:%x\n",
> > -                      i, xe_pt->level, entry->ofs, entry->qwords,
> > -                      xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt)
> > + end, 0);
> > -       }
> > -}
> > -#else
> > -{}
> > -#endif
> > -
> > #ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT
> > 
> > static int xe_pt_userptr_inject_eagain(struct xe_vma *vma)
> > @@ -1276,7 +1251,6 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct
> > xe_vma *vma, struct xe_exec_queue
> >                 goto err;
> >         xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
> > 
> > -       xe_vm_dbg_print_entries(tile_to_xe(tile), entries,
> > num_entries);
> >         xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries,
> >                                    num_entries);
> > 
> > @@ -1563,6 +1537,8 @@ xe_pt_commit_unbind(struct xe_vma *vma,
> >                                 pt_dir->dir.entries[i] = NULL;
> >                         }
> >                 }
> > +
> > +               trace_xe_pt_commit_unbind(entry);
> >         }
> > }
> > 
> > @@ -1627,7 +1603,6 @@ __xe_pt_unbind_vma(struct xe_tile *tile,
> > struct xe_vma *vma, struct xe_exec_queu
> >         num_entries = xe_pt_stage_unbind(tile, vma, entries);
> >         xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
> > 
> > -       xe_vm_dbg_print_entries(tile_to_xe(tile), entries,
> > num_entries);
> 
> 
> >         xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries,
> >                                    num_entries);
> > 
> 
> so now we are tracing the entries individually on xe_pt_commit_bind()
> and xe_pt_commit_unbind(). However there are a few direct users of
> the
> previous functions this was printed on:
> 
> drivers/gpu/drm/xe/xe_gt_pagefault.c:   fence =
> __xe_pt_bind_vma(tile, vma, xe_tile_migrate_engine(tile), NULL, 0,
> drivers/gpu/drm/xe/xe_vm.c:             fence =
> __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
> drivers/gpu/drm/xe/xe_vm.c:             fence =
> __xe_pt_unbind_vma(tile, vma, q ? q : vm->q[id],
> 
> Are we goint to miss those events now?
> 
> If we have dedicated traces for all the interesting events, maybe we
> can
> go ahead and a) remove vm_dbg(); b) remove CONFIG_DRM_XE_DEBUG_VM...

Well I wasn't planning on taking a big hammer in this patch, it's more
piecemeal. Do you consider that a blocker for this change?

> the
> trace calls will be just NOP instructions unless they are enabled.

Well that should be part of the tracing infrastructure anyway though.

> 
> 
> > diff --git a/drivers/gpu/drm/xe/xe_pt.h
> > b/drivers/gpu/drm/xe/xe_pt.h
> > index d5460e58dbbf..c7e5f7111227 100644
> > --- a/drivers/gpu/drm/xe/xe_pt.h
> > +++ b/drivers/gpu/drm/xe/xe_pt.h
> > @@ -18,6 +18,14 @@ struct xe_tile;
> > struct xe_vm;
> > struct xe_vma;
> > 
> > +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
> > +#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr =
> > (__addr))
> > +#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr)
> > +#else
> > +#define xe_pt_set_addr(__xe_pt, __addr)
> > +#define xe_pt_addr(__xe_pt) 0ull
> > +#endif
> 
> did you forget to remove those from the .c? But as I said, maybe we
> can go ahead
> and simply remove these, replacing them inline in their callers

Heh, I swear I had done that locally... I kind of like the idea of
keeping this as-is for this patch at least (moved to the header of
course). We can revisit as part of a DEBUG_VM removal if that makes
sense. But there still might be a use case where we want to print the
rest of the debug messages without DEBUG_VM enabled today. And anyway
CI will run with DEBUG_VM=y for now anyway.

Thanks,
Stuart

> 
> Lucas De Marchi
> 
> > +
> > #define xe_pt_write(xe, map, idx, data) \
> >         xe_map_wr(xe, map, (idx) * sizeof(u64), u64, data)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_trace.h
> > b/drivers/gpu/drm/xe/xe_trace.h
> > index e32f1cad51d9..24d898061d04 100644
> > --- a/drivers/gpu/drm/xe/xe_trace.h
> > +++ b/drivers/gpu/drm/xe/xe_trace.h
> > @@ -17,8 +17,49 @@
> > #include "xe_gt_tlb_invalidation_types.h"
> > #include "xe_gt_types.h"
> > #include "xe_guc_exec_queue_types.h"
> > +#include "xe_pt_types.h"
> > #include "xe_sched_job.h"
> > #include "xe_vm.h"
> > +#include "xe_pt.h"
> > +
> > +DECLARE_EVENT_CLASS(xe_vm_pgtable_update,
> > +                   TP_PROTO(struct xe_vm_pgtable_update *update),
> > +                   TP_ARGS(update),
> > +
> > +                   TP_STRUCT__entry(
> > +                            __field(u64, page_size)
> > +                            __field(u64, end)
> > +                            __field(u64, start)
> > +                            __field(u64, pt_addr)
> > +                            __field(unsigned int, level)
> > +                            __field(u32, ofs)
> > +                            __field(u32, qwords)
> > +                            ),
> > +
> > +                   TP_fast_assign(
> > +                          __entry->level = xe_pt_shift(update->pt-
> > >level);
> > +                          __entry->page_size = 1ull << __entry-
> > >level;
> > +                          __entry->pt_addr = xe_pt_addr(update-
> > >pt);
> > +                          __entry->ofs = update->ofs;
> > +                          __entry->qwords = update->qwords;
> > +                          __entry->start = __entry->pt_addr +
> > update->ofs * __entry->page_size;
> > +                          __entry->end = __entry->start + __entry-
> > >page_size * update->qwords;
> > +                          ),
> > +
> > +                   TP_printk("Update level %u at (%u + %u)
> > [%llx...%llx]",
> > +                             __entry->level, __entry->ofs,
> > __entry->qwords,
> > +                             __entry->start, __entry->end)
> > +);
> > +
> > +DEFINE_EVENT(xe_vm_pgtable_update, xe_pt_commit_bind,
> > +            TP_PROTO(struct xe_vm_pgtable_update *entry),
> > +            TP_ARGS(entry)
> > +);
> > +
> > +DEFINE_EVENT(xe_vm_pgtable_update, xe_pt_commit_unbind,
> > +            TP_PROTO(struct xe_vm_pgtable_update *entry),
> > +            TP_ARGS(entry)
> > +);
> > 
> > DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence,
> >                     TP_PROTO(struct xe_gt_tlb_invalidation_fence
> > *fence),
> > -- 
> > 2.34.1
> > 



More information about the Intel-xe mailing list