[PATCH v2] drm/xe/bo: add GPU memory trace points
Tvrtko Ursulin
tursulin at ursulin.net
Thu May 22 15:14:08 UTC 2025
On 22/05/2025 15:50, Lucas De Marchi wrote:
> + dri-devel
>
> On Wed, May 21, 2025 at 10:42:35PM +0000, Juston Li wrote:
>> Add tracepoints behind CONFIG_DRM_XE_GPU_MEM_TRACEPOINTS for tracking
>> global and per-process GPU memory usage.
>>
>> These are required by VSR on Android 12+ for reporting GPU driver memory
>> allocations.
>>
>> v2:
>> - Use u64 as preferred by checkpatch (Tvrtko)
>> - Fix errors in comments/Kconfig description (Tvrtko)
>> - drop redundant "CONFIG_" in Kconfig
>>
>> Signed-off-by: Juston Li <justonli at chromium.org>
>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
>> ---
>> drivers/gpu/drm/xe/Kconfig.debug | 12 +++++++
>> drivers/gpu/drm/xe/xe_bo.c | 47 ++++++++++++++++++++++++++++
>> drivers/gpu/drm/xe/xe_device_types.h | 16 ++++++++++
>> 3 files changed, 75 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/
>> Kconfig.debug
>> index 01735c6ece8ba..2371eeda0afd5 100644
>> --- a/drivers/gpu/drm/xe/Kconfig.debug
>> +++ b/drivers/gpu/drm/xe/Kconfig.debug
>> @@ -111,3 +111,15 @@ config DRM_XE_USERPTR_INVAL_INJECT
>>
>> Recommended for driver developers only.
>> If in doubt, say "N".
>> +
>> +config DRM_XE_GPU_MEM_TRACEPOINTS
>
> is there any particular reason to make this user-configurable per driver?
> Why aren't we making CONFIG_TRACE_GPU_MEM configurable (if needed, but
> could just depend on CONFIG_TRACEPOINTS) and then drivers just use it.
Could be done like that too. Msm does unconditional select TRACE_GPU_MEM
which I thought wouldn't be acceptable so I suggested making it
configurable.
>> + bool "Enable Xe GPU memory usage tracepoints"
>> + default n
>> + select TRACE_GPU_MEM
>> + help
>> + Choose this option to enable tracepoints for tracking
>> + global and per-process GPU memory usage.
>> + Intended for performance profiling and required for
>> + Android.
>> +
>> + If in doubt, say "N".
>> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
>> index d99d91fe8aa98..49ee20d54ede6 100644
>> --- a/drivers/gpu/drm/xe/xe_bo.c
>> +++ b/drivers/gpu/drm/xe/xe_bo.c
>> @@ -19,6 +19,8 @@
>>
>> #include <kunit/static_stub.h>
>>
>> +#include <trace/events/gpu_mem.h>
>> +
>> #include "xe_device.h"
>> #include "xe_dma_buf.h"
>> #include "xe_drm_client.h"
>> @@ -420,6 +422,35 @@ static void xe_ttm_tt_account_subtract(struct
>> ttm_tt *tt)
>> xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, -(long)tt-
>> >num_pages, 0);
>> }
>>
>> +#if IS_ENABLED(CONFIG_DRM_XE_GPU_MEM_TRACEPOINTS)
>> +static void update_global_total_pages(struct ttm_device *ttm_dev,
>> long num_pages)
>> +{
>> + struct xe_device *xe = ttm_to_xe_device(ttm_dev);
>> + u64 global_total_pages =
>> + atomic64_add_return(num_pages, &xe->global_total_pages);
>> +
>> + trace_gpu_mem_total(0, 0, global_total_pages << PAGE_SHIFT);
>> +}
>> +
>> +static void update_process_mem(struct drm_file *file, ssize_t size)
>> +{
>> + struct xe_file *xef = to_xe_file(file);
>> + u64 process_mem = atomic64_add_return(size, &xef->process_mem);
>> +
>> + rcu_read_lock(); /* Locks file->pid! */
>> + trace_gpu_mem_total(0, pid_nr(rcu_dereference(file->pid)),
>> process_mem);
>> + rcu_read_unlock();
>> +}
>> +#else
>> +static inline void update_global_total_pages(struct ttm_device
>> *ttm_dev, long num_pages)
>> +{
>> +}
>> +
>> +static inline void update_process_mem(struct drm_file *file, ssize_t
>> size)
>> +{
>> +}
>> +#endif
>> +
>> static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
>> u32 page_flags)
>> {
>> @@ -528,6 +559,7 @@ static int xe_ttm_tt_populate(struct ttm_device
>> *ttm_dev, struct ttm_tt *tt,
>>
>> xe_tt->purgeable = false;
>> xe_ttm_tt_account_add(tt);
>> + update_global_total_pages(ttm_dev, tt->num_pages);
>>
>> return 0;
>> }
>> @@ -542,6 +574,7 @@ static void xe_ttm_tt_unpopulate(struct ttm_device
>> *ttm_dev, struct ttm_tt *tt)
>>
>> ttm_pool_free(&ttm_dev->pool, tt);
>> xe_ttm_tt_account_subtract(tt);
>> + update_global_total_pages(ttm_dev, -(long)tt->num_pages);
>> }
>>
>> static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct
>> ttm_tt *tt)
>> @@ -1648,6 +1681,15 @@ static void xe_gem_object_free(struct
>> drm_gem_object *obj)
>> ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
>> }
>>
>> +#if IS_ENABLED(CONFIG_DRM_XE_GPU_MEM_TRACEPOINTS)
>> +static int xe_gem_object_open(struct drm_gem_object *obj,
>> + struct drm_file *file_priv)
>> +{
>> + update_process_mem(file_priv, obj->size);
>> + return 0;
>> +}
>> +#endif
>> +
>> static void xe_gem_object_close(struct drm_gem_object *obj,
>> struct drm_file *file_priv)
>> {
>> @@ -1660,6 +1702,8 @@ static void xe_gem_object_close(struct
>> drm_gem_object *obj,
>> ttm_bo_set_bulk_move(&bo->ttm, NULL);
>> xe_bo_unlock(bo);
>> }
>> +
>> + update_process_mem(file_priv, -obj->size);
>> }
>>
>> static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
>> @@ -1757,6 +1801,9 @@ static const struct vm_operations_struct
>> xe_gem_vm_ops = {
>>
>> static const struct drm_gem_object_funcs xe_gem_object_funcs = {
>> .free = xe_gem_object_free,
>> +#if IS_ENABLED(CONFIG_DRM_XE_GPU_MEM_TRACEPOINTS)
>
> in future we may have other reasons for this, then we will need to
> refactor these ifdefs. So, maybe just assign this without ifdef, which
> allows to remove the ifdef around xe_gem_object_open. The impl of the
> update_* functions could also be in the form
>
> static void update_...()
> {
> #if IS_ENABLED(CONFIG_...)
> #endif
> }
It was also my suggestion to do it like this to avoid having adding an
->open() callback which ends up an empty function call on all builds
expect Android kernels. I agree ifdef-ery is heavy like this, but in
this case it feels justified.
Regards,
Tvrtko
>> + .open = xe_gem_object_open,
>> +#endif
>> .close = xe_gem_object_close,
>> .mmap = drm_gem_ttm_mmap,
>> .export = xe_gem_prime_export,
>> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/
>> xe/xe_device_types.h
>> index f81be293b260e..dd9d411e66dac 100644
>> --- a/drivers/gpu/drm/xe/xe_device_types.h
>> +++ b/drivers/gpu/drm/xe/xe_device_types.h
>> @@ -639,6 +639,14 @@ struct xe_device {
>> unsigned int fsb_freq, mem_freq, is_ddr3;
>> };
>> #endif
>> +
>> +#if IS_ENABLED(CONFIG_DRM_XE_GPU_MEM_TRACEPOINTS)
>> + /**
>> + * @global_total_pages: global GPU page usage tracked for gpu_mem
>> + * tracepoints
>> + */
>> + atomic64_t global_total_pages;
>> +#endif
>> };
>>
>> /**
>> @@ -700,6 +708,14 @@ struct xe_file {
>>
>> /** @refcount: ref count of this xe file */
>> struct kref refcount;
>> +
>> +#if IS_ENABLED(CONFIG_DRM_XE_GPU_MEM_TRACEPOINTS)
>> + /**
>> + * @process_mem: per-process GPU memory usage tracked for gpu_mem
>> + * tracepoints
>> + */
>> + atomic64_t process_mem;
>> +#endif
>> };
>>
>> #endif
>> --
>> 2.49.0.1143.g0be31eac6b-goog
>>
More information about the dri-devel
mailing list