[PATCH v2] drm/xe/bo: add GPU memory trace points
Juston Li
justonli at chromium.org
Wed May 28 20:01:21 UTC 2025
On Wed, 2025-05-28 at 18:34 +0000, Juston Li wrote:
> On Thu, 2025-05-22 at 16:14 +0100, Tvrtko Ursulin wrote:
> >
> > On 22/05/2025 15:50, Lucas De Marchi wrote:
> > > + dri-devel
> > >
> > > On Wed, May 21, 2025 at 10:42:35PM +0000, Juston Li wrote:
> > > > Add tracepoints behind CONFIG_DRM_XE_GPU_MEM_TRACEPOINTS for
> > > > tracking
> > > > global and per-process GPU memory usage.
> > > >
> > > > These are required by VSR on Android 12+ for reporting GPU
> > > > driver
> > > > memory
> > > > allocations.
> > > >
> > > > v2:
> > > > - Use u64 as preferred by checkpatch (Tvrtko)
> > > > - Fix errors in comments/Kconfig description (Tvrtko)
> > > > - drop redundant "CONFIG_" in Kconfig
> > > >
> > > > Signed-off-by: Juston Li <justonli at chromium.org>
> > > > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
> > > > ---
> > > > drivers/gpu/drm/xe/Kconfig.debug | 12 +++++++
> > > > drivers/gpu/drm/xe/xe_bo.c | 47
> > > > ++++++++++++++++++++++++++++
> > > > drivers/gpu/drm/xe/xe_device_types.h | 16 ++++++++++
> > > > 3 files changed, 75 insertions(+)
> > > >
> > > > diff --git a/drivers/gpu/drm/xe/Kconfig.debug
> > > > b/drivers/gpu/drm/xe/
> > > > Kconfig.debug
> > > > index 01735c6ece8ba..2371eeda0afd5 100644
> > > > --- a/drivers/gpu/drm/xe/Kconfig.debug
> > > > +++ b/drivers/gpu/drm/xe/Kconfig.debug
> > > > @@ -111,3 +111,15 @@ config DRM_XE_USERPTR_INVAL_INJECT
> > > >
> > > > Recommended for driver developers only.
> > > > If in doubt, say "N".
> > > > +
> > > > +config DRM_XE_GPU_MEM_TRACEPOINTS
> > >
> > > is there any particular reason to make this user-configurable per
> > > driver?
> > > Why aren't we making CONFIG_TRACE_GPU_MEM configurable (if
> > > needed,
> > > but
> > > could just depend on CONFIG_TRACEPOINTS) and then drivers just
> > > use
> > > it.
> >
> > Could be done like that too. Msm does unconditional select
> > TRACE_GPU_MEM
> > which I thought wouldn't be acceptable so I suggested making it
> > configurable.
>
> Ok yeah, I don't see a reason to make it per-driver either. I'll make
> CONFIG_TRACE_GPU_MEM configurable since we don't have
> CONFIG_TRACEPOINTS or CONFIG_TRACING enabled on GKI.
>
> Juston
Scratch that, CONFIG_TRACEPOINTS is enabled on GKI so we could do the
depend.
But now that I think about it, CONFIG_TRACEPOINTS seems enabled so
commonly I wonder if we might as well just unconditional select like
Msm as Tvrtko mentioned and then not have to deal with the #ifdefs?
If that's not acceptable, I'm leaning on just making
CONFIG_TRACE_GPU_MEM configurable.
Juston
> > > > + bool "Enable Xe GPU memory usage tracepoints"
> > > > + default n
> > > > + select TRACE_GPU_MEM
> > > > + help
> > > > + Choose this option to enable tracepoints for tracking
> > > > + global and per-process GPU memory usage.
> > > > + Intended for performance profiling and required for
> > > > + Android.
> > > > +
> > > > + If in doubt, say "N".
> > > > diff --git a/drivers/gpu/drm/xe/xe_bo.c
> > > > b/drivers/gpu/drm/xe/xe_bo.c
> > > > index d99d91fe8aa98..49ee20d54ede6 100644
> > > > --- a/drivers/gpu/drm/xe/xe_bo.c
> > > > +++ b/drivers/gpu/drm/xe/xe_bo.c
> > > > @@ -19,6 +19,8 @@
> > > >
> > > > #include <kunit/static_stub.h>
> > > >
> > > > +#include <trace/events/gpu_mem.h>
> > > > +
> > > > #include "xe_device.h"
> > > > #include "xe_dma_buf.h"
> > > > #include "xe_drm_client.h"
> > > > @@ -420,6 +422,35 @@ static void
> > > > xe_ttm_tt_account_subtract(struct
> > > > ttm_tt *tt)
> > > > xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, -
> > > > (long)tt-
> > > > > num_pages, 0);
> > > > }
> > > >
> > > > +#if IS_ENABLED(CONFIG_DRM_XE_GPU_MEM_TRACEPOINTS)
> > > > +static void update_global_total_pages(struct ttm_device
> > > > *ttm_dev,
> > > > long num_pages)
> > > > +{
> > > > + struct xe_device *xe = ttm_to_xe_device(ttm_dev);
> > > > + u64 global_total_pages =
> > > > + atomic64_add_return(num_pages, &xe-
> > > > >global_total_pages);
> > > > +
> > > > + trace_gpu_mem_total(0, 0, global_total_pages <<
> > > > PAGE_SHIFT);
> > > > +}
> > > > +
> > > > +static void update_process_mem(struct drm_file *file, ssize_t
> > > > size)
> > > > +{
> > > > + struct xe_file *xef = to_xe_file(file);
> > > > + u64 process_mem = atomic64_add_return(size, &xef-
> > > > > process_mem);
> > > > +
> > > > + rcu_read_lock(); /* Locks file->pid! */
> > > > + trace_gpu_mem_total(0, pid_nr(rcu_dereference(file->pid)),
> > > > process_mem);
> > > > + rcu_read_unlock();
> > > > +}
> > > > +#else
> > > > +static inline void update_global_total_pages(struct ttm_device
> > > > *ttm_dev, long num_pages)
> > > > +{
> > > > +}
> > > > +
> > > > +static inline void update_process_mem(struct drm_file *file,
> > > > ssize_t
> > > > size)
> > > > +{
> > > > +}
> > > > +#endif
> > > > +
> > > > static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object
> > > > *ttm_bo,
> > > > u32 page_flags)
> > > > {
> > > > @@ -528,6 +559,7 @@ static int xe_ttm_tt_populate(struct
> > > > ttm_device
> > > > *ttm_dev, struct ttm_tt *tt,
> > > >
> > > > xe_tt->purgeable = false;
> > > > xe_ttm_tt_account_add(tt);
> > > > + update_global_total_pages(ttm_dev, tt->num_pages);
> > > >
> > > > return 0;
> > > > }
> > > > @@ -542,6 +574,7 @@ static void xe_ttm_tt_unpopulate(struct
> > > > ttm_device
> > > > *ttm_dev, struct ttm_tt *tt)
> > > >
> > > > ttm_pool_free(&ttm_dev->pool, tt);
> > > > xe_ttm_tt_account_subtract(tt);
> > > > + update_global_total_pages(ttm_dev, -(long)tt->num_pages);
> > > > }
> > > >
> > > > static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev,
> > > > struct
> > > > ttm_tt *tt)
> > > > @@ -1648,6 +1681,15 @@ static void xe_gem_object_free(struct
> > > > drm_gem_object *obj)
> > > > ttm_bo_put(container_of(obj, struct ttm_buffer_object,
> > > > base));
> > > > }
> > > >
> > > > +#if IS_ENABLED(CONFIG_DRM_XE_GPU_MEM_TRACEPOINTS)
> > > > +static int xe_gem_object_open(struct drm_gem_object *obj,
> > > > + struct drm_file *file_priv)
> > > > +{
> > > > + update_process_mem(file_priv, obj->size);
> > > > + return 0;
> > > > +}
> > > > +#endif
> > > > +
> > > > static void xe_gem_object_close(struct drm_gem_object *obj,
> > > > struct drm_file *file_priv)
> > > > {
> > > > @@ -1660,6 +1702,8 @@ static void xe_gem_object_close(struct
> > > > drm_gem_object *obj,
> > > > ttm_bo_set_bulk_move(&bo->ttm, NULL);
> > > > xe_bo_unlock(bo);
> > > > }
> > > > +
> > > > + update_process_mem(file_priv, -obj->size);
> > > > }
> > > >
> > > > static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
> > > > @@ -1757,6 +1801,9 @@ static const struct vm_operations_struct
> > > > xe_gem_vm_ops = {
> > > >
> > > > static const struct drm_gem_object_funcs xe_gem_object_funcs =
> > > > {
> > > > .free = xe_gem_object_free,
> > > > +#if IS_ENABLED(CONFIG_DRM_XE_GPU_MEM_TRACEPOINTS)
> > >
> > > in future we may have other reasons for this, then we will need
> > > to
> > > refactor these ifdefs. So, maybe just assign this without ifdef,
> > > which
> > > allows to remove the ifdef around xe_gem_object_open. The impl of
> > > the
> > > update_* functions could also be in the form
> > >
> > > static void update_...()
> > > {
> > > #if IS_ENABLED(CONFIG_...)
> > > #endif
> > > }
> >
> > It was also my suggestion to do it like this to avoid having adding
> > an
> > ->open() callback which ends up an empty function call on all
> > builds
> > expect Android kernels. I agree ifdef-ery is heavy like this, but
> > in
> > this case it feels justified.
> >
> > Regards,
> >
> > Tvrtko
> >
> > > > + .open = xe_gem_object_open,
> > > > +#endif
> > > > .close = xe_gem_object_close,
> > > > .mmap = drm_gem_ttm_mmap,
> > > > .export = xe_gem_prime_export,
> > > > diff --git a/drivers/gpu/drm/xe/xe_device_types.h
> > > > b/drivers/gpu/drm/
> > > > xe/xe_device_types.h
> > > > index f81be293b260e..dd9d411e66dac 100644
> > > > --- a/drivers/gpu/drm/xe/xe_device_types.h
> > > > +++ b/drivers/gpu/drm/xe/xe_device_types.h
> > > > @@ -639,6 +639,14 @@ struct xe_device {
> > > > unsigned int fsb_freq, mem_freq, is_ddr3;
> > > > };
> > > > #endif
> > > > +
> > > > +#if IS_ENABLED(CONFIG_DRM_XE_GPU_MEM_TRACEPOINTS)
> > > > + /**
> > > > + * @global_total_pages: global GPU page usage tracked for
> > > > gpu_mem
> > > > + * tracepoints
> > > > + */
> > > > + atomic64_t global_total_pages;
> > > > +#endif
> > > > };
> > > >
> > > > /**
> > > > @@ -700,6 +708,14 @@ struct xe_file {
> > > >
> > > > /** @refcount: ref count of this xe file */
> > > > struct kref refcount;
> > > > +
> > > > +#if IS_ENABLED(CONFIG_DRM_XE_GPU_MEM_TRACEPOINTS)
> > > > + /**
> > > > + * @process_mem: per-process GPU memory usage tracked for
> > > > gpu_mem
> > > > + * tracepoints
> > > > + */
> > > > + atomic64_t process_mem;
> > > > +#endif
> > > > };
> > > >
> > > > #endif
> > > > --
> > > > 2.49.0.1143.g0be31eac6b-goog
> > > >
More information about the dri-devel
mailing list