[PATCH v4 2/2] drm/xe/bo: add GPU memory trace points

Thu Jun 12 05:40:26 UTC 2025

On Wed, Jun 11, 2025 at 03:51:24PM -0700, Juston Li wrote:
>Add TRACE_GPU_MEM tracepoints for tracking global and per-process GPU
>memory usage.
>
>These are required by VSR on Android 12+ for reporting GPU driver memory
>allocations.
>
>v3:
> - Use now configurable CONFIG_TRACE_GPU_MEM instead of adding a
>   per-driver Kconfig (Lucas)
>
>v2:
> - Use u64 as preferred by checkpatch (Tvrtko)
> - Fix errors in comments/Kconfig description (Tvrtko)
> - drop redundant "CONFIG" in Kconfig
>
>Signed-off-by: Juston Li <justonli at chromium.org>
>Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
>---
> drivers/gpu/drm/xe/xe_bo.c           | 47 ++++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_device_types.h | 16 ++++++++++
> 2 files changed, 63 insertions(+)
>
>diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
>index 4e39188a021ab..89a3d23e3b800 100644
>--- a/drivers/gpu/drm/xe/xe_bo.c
>+++ b/drivers/gpu/drm/xe/xe_bo.c
>@@ -19,6 +19,8 @@
>
> #include <kunit/static_stub.h>
>
>+#include <trace/events/gpu_mem.h>
>+
> #include "xe_device.h"
> #include "xe_dma_buf.h"
> #include "xe_drm_client.h"
>@@ -418,6 +420,35 @@ static void xe_ttm_tt_account_subtract(struct xe_device *xe, struct ttm_tt *tt)
> 		xe_shrinker_mod_pages(xe->mem.shrinker, -(long)tt->num_pages, 0);
> }
>
>+#if IS_ENABLED(CONFIG_TRACE_GPU_MEM)
>+static void update_global_total_pages(struct ttm_device *ttm_dev, long num_pages)
>+{
>+	struct xe_device *xe = ttm_to_xe_device(ttm_dev);
>+	u64 global_total_pages =
>+		atomic64_add_return(num_pages, &xe->global_total_pages);
>+
>+	trace_gpu_mem_total(0, 0, global_total_pages << PAGE_SHIFT);
>+}
>+
>+static void update_process_mem(struct drm_file *file, ssize_t size)
>+{
>+	struct xe_file *xef = to_xe_file(file);
>+	u64 process_mem = atomic64_add_return(size, &xef->process_mem);
>+
>+	rcu_read_lock(); /* Locks file->pid! */
>+	trace_gpu_mem_total(0, pid_nr(rcu_dereference(file->pid)), process_mem);

Isn't the first arg supposed to be the gpu id? Doesn't this become
invalid when I have e.g. LNL + BMG and the trace is enabled?

>+	rcu_read_unlock();
>+}
>+#else
>+static inline void update_global_total_pages(struct ttm_device *ttm_dev, long num_pages)
>+{
>+}
>+
>+static inline void update_process_mem(struct drm_file *file, ssize_t size)
>+{
>+}
>+#endif
>+
> static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
> 				       u32 page_flags)
> {
>@@ -525,6 +556,7 @@ static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
>
> 	xe_tt->purgeable = false;
> 	xe_ttm_tt_account_add(ttm_to_xe_device(ttm_dev), tt);
>+	update_global_total_pages(ttm_dev, tt->num_pages);
>
> 	return 0;
> }
>@@ -541,6 +573,7 @@ static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
>
> 	ttm_pool_free(&ttm_dev->pool, tt);
> 	xe_ttm_tt_account_subtract(xe, tt);
>+	update_global_total_pages(ttm_dev, -(long)tt->num_pages);
> }
>
> static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
>@@ -1653,6 +1686,15 @@ static void xe_gem_object_free(struct drm_gem_object *obj)
> 	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
> }
>
>+#if IS_ENABLED(CONFIG_TRACE_GPU_MEM)
>+static int xe_gem_object_open(struct drm_gem_object *obj,
>+			      struct drm_file *file_priv)
>+{
>+	update_process_mem(file_priv, obj->size);
>+	return 0;
>+}
>+#endif
>+
> static void xe_gem_object_close(struct drm_gem_object *obj,
> 				struct drm_file *file_priv)
> {
>@@ -1665,6 +1707,8 @@ static void xe_gem_object_close(struct drm_gem_object *obj,
> 		ttm_bo_set_bulk_move(&bo->ttm, NULL);
> 		xe_bo_unlock(bo);
> 	}
>+
>+	update_process_mem(file_priv, -obj->size);
> }
>
> static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
>@@ -1762,6 +1806,9 @@ static const struct vm_operations_struct xe_gem_vm_ops = {
>
> static const struct drm_gem_object_funcs xe_gem_object_funcs = {
> 	.free = xe_gem_object_free,
>+#if IS_ENABLED(CONFIG_TRACE_GPU_MEM)
>+	.open = xe_gem_object_open,
>+#endif
> 	.close = xe_gem_object_close,
> 	.mmap = drm_gem_ttm_mmap,
> 	.export = xe_gem_prime_export,
>diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
>index e5d02a47a5287..b797e766ccbc2 100644
>--- a/drivers/gpu/drm/xe/xe_device_types.h
>+++ b/drivers/gpu/drm/xe/xe_device_types.h
>@@ -641,6 +641,14 @@ struct xe_device {
> 		unsigned int fsb_freq, mem_freq, is_ddr3;
> 	};
> #endif
>+
>+#if IS_ENABLED(CONFIG_TRACE_GPU_MEM)
>+	/**
>+	 * @global_total_pages: global GPU page usage tracked for gpu_mem
>+	 * tracepoints
>+	 */
>+	atomic64_t global_total_pages;
>+#endif
> };
>
> /**
>@@ -702,6 +710,14 @@ struct xe_file {
>
> 	/** @refcount: ref count of this xe file */
> 	struct kref refcount;
>+
>+#if IS_ENABLED(CONFIG_TRACE_GPU_MEM)
>+	/**
>+	 * @process_mem: per-process GPU memory usage tracked for gpu_mem
>+	 * tracepoints
>+	 */
>+	atomic64_t process_mem;

so... this is not per-process, it's actually "per dev node" open. Does
this map correctly to the intended use and how it's handled in msm?

Lucas De Marchi

>+#endif
> };
>
> #endif
>-- 
>2.50.0.rc1.591.g9c95f17f64-goog
>