[PATCH] ANDROID: Add GPU work period support for Xe driver

Fri Aug 8 17:14:52 UTC 2025

On Fri, Aug 08, 2025 at 09:09:19AM +0000, Aakash Deep Sarkar wrote:
> This patch implements the Android requirement GPU work
> period event for Intel Xe driver.
> 

Since this is an Android requirement, can this feature be disabled on
non-Android builds? Having a worker performing sampling has a non-zero
cost in terms of CPU cycles, plus runtime PM waking the device.

I’m not suggesting compiling out the implementation—just never starting
the sample timer on non-Android builds.

> |GpuWorkPeriodEvent| defines a non-overlapping, non-zero period
> of time from |start_time_ns| (inclusive) until |end_time_ns|
> (exclusive) for a given |uid|, and includes details of how much
> work the GPU was performing for |uid| during the period. When
> GPU work for a given |uid| runs on the GPU, the driver must track
> one or more periods that cover the time where the work was running,
> and emit events soon after.
> 
> Full requirement is defined in the following file:
> https://cs.android.com/android/platform/superproject/main/+\
> /main:frameworks/native/services/gpuservice/gpuwork/bpfprogs/gpuWork.c;l=35
> 
> The requirement is implemented using a timer to give periodic
> interrupts and a worker thread per user id instance to accumulate
> its run time on gpu and emit the event. Each user id instance is
> tracked using a xe_user structure and the runtime is updated at
> each timer interrupt. The timer period is hardcoded to 500 msecs.
> 
> The runtime on the gpu is collected for each xe file individually
> inside the function xe_exec_queue_update_run_ticks() and accumulated
> into the corresponding xe_user active_duration_ns field. The HW
> Context timestamp field in the GTT is used to derive the runtime
> in clock ticks and then converted into nanosecs before updating the
> active duration.
> 
> Signed-off-by: Aakash Deep Sarkar <aakash.deep.sarkar at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_device.c                |  98 +++++++++++++++-
>  drivers/gpu/drm/xe/xe_device_types.h          |  27 +++++
>  drivers/gpu/drm/xe/xe_drm_client.c            | 108 ++++++++++++++++++
>  drivers/gpu/drm/xe/xe_drm_client.h            |  75 +++++++++++-
>  drivers/gpu/drm/xe/xe_exec_queue.c            |   5 +
>  drivers/gpu/drm/xe/xe_gt_clock.c              |   5 +
>  drivers/gpu/drm/xe/xe_gt_clock.h              |   1 +
>  .../drm/xe/xe_power_gpu_work_period_trace.h   |  61 ++++++++++
>  8 files changed, 378 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/gpu/drm/xe/xe_power_gpu_work_period_trace.h
> 
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index 57edbc63da6f..116247007a4d 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -8,6 +8,7 @@
>  #include <linux/aperture.h>
>  #include <linux/delay.h>
>  #include <linux/fault-inject.h>
> +#include <linux/jiffies.h>
>  #include <linux/units.h>
>  
>  #include <drm/drm_atomic_helper.h>
> @@ -76,9 +77,13 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
>  {
>  	struct xe_device *xe = to_xe_device(dev);
>  	struct xe_drm_client *client;
> +	struct xe_user *user;
>  	struct xe_file *xef;
>  	int ret = -ENOMEM;
> +	unsigned long flags;
> +	int uid = -EINVAL;
>  	struct task_struct *task = NULL;
> +	const struct cred *cred = NULL;
>  
>  	xef = kzalloc(sizeof(*xef), GFP_KERNEL);
>  	if (!xef)
> @@ -103,13 +108,66 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
>  	file->driver_priv = xef;
>  	kref_init(&xef->refcount);
>  
> +	INIT_LIST_HEAD(&xef->user_link);
> +
>  	task = get_pid_task(rcu_access_pointer(file->pid), PIDTYPE_PID);
>  	if (task) {
> +		cred = get_task_cred(task);
> +		if (cred) {
> +			uid = (int) cred->euid.val;
> +			put_cred(cred);
> +		}
>  		xef->process_name = kstrdup(task->comm, GFP_KERNEL);
>  		xef->pid = task->pid;
>  		put_task_struct(task);
>  	}
>  
> +	if (uid < 0)
> +		goto out;
> +
> +	/*
> +	 * Check if the calling process/uid has already been registered
> +	 * with the xe device during a previous open call. If so then
> +	 * take a reference to this xe file and add it to the list of xe
> +	 * files belonging to the this user
> +	 */
> +	spin_lock_irqsave(&xe->work_period.lock, flags);

I don't think you need irqsave rather just spin_lock_irq as this code in
always in process context.

> +	list_for_each_entry(user, &xe->work_period.user_list, entry) {
> +		if (user->uid == uid) {
> +			xef->user = xe_user_get(user);
> +			spin_unlock_irqrestore(&xe->work_period.lock, flags);
> +			goto filelist_add;
> +		}
> +	}
> +	spin_unlock_irqrestore(&xe->work_period.lock, flags);
> +
> +
> +	/*
> +	 * We couldn't find a xe user for this process. Allocate a new
> +	 * struct xe_user and register it with this xe device
> +	 */
> +	user = xe_user_alloc();
> +	if (!user)
> +		goto out;
> +
> +	user->uid = (unsigned int) uid;
> +	user->last_timestamp_ns = ktime_get_raw_ns();
> +	user->xe = xe;
> +
> +	/* Add this xe_user to xe_device->work_period.user_list */
> +	spin_lock_irqsave(&xe->work_period.lock, flags);
> +	list_add(&user->entry, &xe->work_period.user_list);
> +	spin_unlock_irqrestore(&xe->work_period.lock, flags);
> +
> +	drm_dev_get(&xe->drm);
> +	xef->user = user;
> +
> +filelist_add:
> +	/* Add this xe_file to xe_user->filelist */
> +	mutex_lock(&user->filelist_lock);
> +	list_add(&xef->user_link, &user->filelist);
> +	mutex_unlock(&user->filelist_lock);
> +out:
>  	return 0;
>  }
>  
> @@ -124,6 +182,12 @@ static void xe_file_destroy(struct kref *ref)
>  
>  	xe_drm_client_put(xef->client);
>  	kfree(xef->process_name);
> +
> +	mutex_lock(&xef->user->filelist_lock);
> +	list_del(&xef->user_link);
> +	mutex_unlock(&xef->user->filelist_lock);
> +	xe_user_put(xef->user);
> +
>  	kfree(xef);
>  }
>  
> @@ -347,6 +411,23 @@ static int xe_mmap(struct file *filp, struct vm_area_struct *vma)
>  	return drm_gem_mmap(filp, vma);
>  }
>  
> +static void work_period_timer_fn(struct timer_list *timer)
> +{
> +	struct xe_device *xe = container_of(timer, typeof(*xe), work_period.timer);
> +	struct xe_user *user;
> +	unsigned long timeout = 0;
> +
> +	spin_lock(&xe->work_period.lock);
> +	list_for_each_entry(user, &xe->work_period.user_list, entry) {
> +		xe_user_get(user);

I’m pretty sure it’s possible to leak a ref here. If work_period.wq is
already queued, it won’t get queued again.

Beyond that, do you really need a timer here? Could every xe_user just
have a delayed worker that continually queues itself?

Also, the work_period.user_list is a little odd. If you get rid of the
timer and use a delayed worker, I think you could use an xarray, indexed
by uid, to store xe_user and have it remove itself from the xarray when
its refcount goes to zero.

> +		queue_work(xe->work_period.wq, &user->work);
> +	}
> +	spin_unlock(&xe->work_period.lock);
> +	timeout = jiffies + msecs_to_jiffies(500);
> +
> +	mod_timer(timer, timeout);
> +}
> +
>  static const struct file_operations xe_driver_fops = {
>  	.owner = THIS_MODULE,
>  	.open = drm_open,
> @@ -409,6 +490,11 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy)
>  	if (xe->destroy_wq)
>  		destroy_workqueue(xe->destroy_wq);
>  
> +	if (xe->work_period.wq)
> +		destroy_workqueue(xe->work_period.wq);
> +
> +	timer_delete_sync(&xe->work_period.timer);
> +
>  	ttm_device_fini(&xe->ttm);
>  }
>  
> @@ -477,11 +563,21 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
>  
>  	xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq",
>  						       WQ_MEM_RECLAIM);
> +
> +	spin_lock_init(&xe->work_period.lock);
> +	INIT_LIST_HEAD(&xe->work_period.user_list);
> +	timer_setup(&xe->work_period.timer, work_period_timer_fn, 0);
> +	xe->work_period.timer.expires = jiffies + msecs_to_jiffies(1000);
> +	add_timer(&xe->work_period.timer);
> +
> +	xe->work_period.wq = alloc_workqueue("xe-work-period-wq", 0, 0);

We are a little overzealous in allocating work queues in Xe. I would
like to reduce the number of WQs in Xe — some are certainly needed, but
others are not. I’d say this usage falls into the “not needed” category;
I believe one of the system_wqs here would work just fine.

> +
>  	xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
>  	xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
>  	xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0);
>  	if (!xe->ordered_wq || !xe->unordered_wq ||
> -	    !xe->preempt_fence_wq || !xe->destroy_wq) {
> +	    !xe->preempt_fence_wq || !xe->destroy_wq ||
> +		!xe->work_period.wq) {
>  		/*
>  		 * Cleanup done in xe_device_destroy via
>  		 * drmm_add_action_or_reset register above
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> index 01e8fa0d2f9f..32b63b5f1c2e 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -418,6 +418,20 @@ struct xe_device {
>  		} late;
>  	} pinned;
>  
> +	/** @work_period: gpu work period event */
> +	struct {
> +		/** @lock: lock protecting this structure */
> +		spinlock_t lock;
> +		/** @timer: timer to give periodic interrupts to emit the
> +		 * gpu work period event
> +		 */
> +		struct timer_list timer;
> +		/** @user_list: list of xe users using this xe device */
> +		struct list_head user_list;
> +		/** @wq: workqueue for gpu work period event emitting work */
> +		struct workqueue_struct *wq;
> +	} work_period;
> +
>  	/** @ufence_wq: user fence wait queue */
>  	wait_queue_head_t ufence_wq;
>  
> @@ -656,6 +670,9 @@ struct xe_file {
>  	/** @run_ticks: hw engine class run time in ticks for this drm client */
>  	u64 run_ticks[XE_ENGINE_CLASS_MAX];
>  
> +	/** @active_duration_ns: total run time in ns for this xe file */
> +	u64 active_duration_ns;
> +
>  	/** @client: drm client */
>  	struct xe_drm_client *client;
>  
> @@ -671,6 +688,16 @@ struct xe_file {
>  	 */
>  	pid_t pid;
>  
> +	/**
> +	 * @user_link: entry into xe_user.filelist list
> +	 */
> +	struct list_head user_link;
> +
> +	/**
> +	 * @user: pointer to the xe user this xe file belongs to
> +	 */
> +	struct xe_user *user;
> +
>  	/** @refcount: ref count of this xe file */
>  	struct kref refcount;
>  };
> diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
> index f931ff9b1ec0..7a21d8e83310 100644
> --- a/drivers/gpu/drm/xe/xe_drm_client.c
> +++ b/drivers/gpu/drm/xe/xe_drm_client.c
> @@ -5,10 +5,12 @@
>  #include "xe_drm_client.h"
>  
>  #include <drm/drm_print.h>
> +#include <drm/drm_drv.h>
>  #include <uapi/drm/xe_drm.h>
>  #include <linux/kernel.h>
>  #include <linux/slab.h>
>  #include <linux/types.h>
> +#include <linux/spinlock.h>
>  
>  #include "xe_assert.h"
>  #include "xe_bo.h"
> @@ -21,6 +23,9 @@
>  #include "xe_pm.h"
>  #include "xe_trace.h"
>  
> +#define CREATE_TRACE_POINTS
> +#include "xe_power_gpu_work_period_trace.h"
> +
>  /**
>   * DOC: DRM Client usage stats
>   *
> @@ -404,3 +409,106 @@ void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
>  	show_run_ticks(p, file);
>  }
>  #endif
> +
> +/**
> + * worker thread to emit gpu work period event for this xe user
> + * @work: work instance for this xe user
> + *
> + * Return: void
> + */
> +static inline void work_period_worker(struct work_struct *work)
> +{
> +	struct xe_user *user = container_of(work, struct xe_user, work);
> +	struct xe_device *xe = user->xe;
> +	struct xe_file *xef;
> +	struct xe_exec_queue *q;
> +	u64 last_active_duration, last_timestamp;
> +	u32 gpuid = 0, uid = user->uid;
> +	u64 start_time, end_time, active_duration;
> +	unsigned long i;
> +
> +	last_active_duration = user->active_duration_ns;
> +	last_timestamp = user->last_timestamp_ns;
> +
> +	xe_pm_runtime_get(xe);
> +
> +	mutex_lock(&user->filelist_lock);
> +	list_for_each_entry(xef, &user->filelist, user_link) {
> +
> +		wait_var_event(&xef->exec_queue.pending_removal,
> +		!atomic_read(&xef->exec_queue.pending_removal));
> +
> +		/* Accumulate all the exec queues from this user */
> +		mutex_lock(&xef->exec_queue.lock);
> +		xa_for_each(&xef->exec_queue.xa, i, q) {
> +			xe_exec_queue_get(q);
> +			mutex_unlock(&xef->exec_queue.lock);
> +
> +			xe_exec_queue_update_run_ticks(q);
> +
> +			mutex_lock(&xef->exec_queue.lock);
> +			xe_exec_queue_put(q);
> +		}
> +		mutex_unlock(&xef->exec_queue.lock);
> +		user->active_duration_ns += xef->active_duration_ns;
> +	}
> +	mutex_unlock(&user->filelist_lock);
> +
> +	xe_pm_runtime_put(xe);
> +
> +	start_time = last_timestamp + 1;
> +	end_time = ktime_get_raw_ns();
> +	active_duration = user->active_duration_ns - last_active_duration;
> +	trace_gpu_work_period(gpuid, uid, start_time, end_time, active_duration);
> +	user->last_timestamp_ns = end_time;
> +
> +	xe_user_put(user);
> +}
> +
> +/**
> + * xe_user_alloc() - Allocate xe user
> + * @void: No arg
> + *
> + * Allocate xe user struct to track activity on the gpu
> + * by the application. Call this API whenever a new app
> + * has opened xe device.
> + *
> + * Return: pointer to user struct or NULL if can't allocate
> + */
> +struct xe_user *xe_user_alloc(void)

If possible, I stick all xe_user implementation into it's own file +
define types in a dedicated header.

Matt

> +{
> +	struct xe_user *user;
> +
> +	user = kzalloc(sizeof(*user), GFP_KERNEL);
> +	if (!user)
> +		return NULL;
> +
> +	kref_init(&user->refcount);
> +	mutex_init(&user->filelist_lock);
> +	INIT_LIST_HEAD(&user->filelist);
> +	INIT_LIST_HEAD(&user->entry);
> +	INIT_WORK(&user->work, work_period_worker);
> +	return user;
> +}
> +
> +/**
> + * __xe_user_free() - Free user struct
> + * @kref: The reference
> + *
> + * Return: void
> + */
> +void __xe_user_free(struct kref *kref)
> +{
> +	struct xe_user *user =
> +		container_of(kref, struct xe_user, refcount);
> +	struct xe_device *xe = user->xe;
> +	unsigned long flags;
> +
> +	/* Remove the xe_user from xe_device.user_list */
> +	spin_lock_irqsave(&xe->work_period.lock, flags);
> +	list_del(&user->entry);
> +	spin_unlock_irqrestore(&xe->work_period.lock, flags);
> +
> +	drm_dev_put(&user->xe->drm);
> +	kfree(user);
> +}
> diff --git a/drivers/gpu/drm/xe/xe_drm_client.h b/drivers/gpu/drm/xe/xe_drm_client.h
> index a9649aa36011..cf5f27e79251 100644
> --- a/drivers/gpu/drm/xe/xe_drm_client.h
> +++ b/drivers/gpu/drm/xe/xe_drm_client.h
> @@ -12,6 +12,7 @@
>  #include <linux/rcupdate.h>
>  #include <linux/sched.h>
>  #include <linux/spinlock.h>
> +#include <linux/workqueue.h>
>  
>  struct drm_file;
>  struct drm_printer;
> @@ -34,7 +35,63 @@ struct xe_drm_client {
>  #endif
>  };
>  
> -	static inline struct xe_drm_client *
> +/**
> + * This is a per process/user id structure for a xe device
> + * client. It is allocated when a new process/app opens the
> + * xe device and destroyed when the last xe file belonging
> + * to this user id is destroyed.
> + */
> +struct xe_user {
> +	/**
> +	 * @refcount: reference count
> +	 */
> +	struct kref refcount;
> +
> +	/**
> +	 * @xe: pointer to the xe_device
> +	 */
> +	struct xe_device *xe;
> +
> +	/**
> +	 * @filelist_lock: lock protecting the filelist
> +	 */
> +	struct mutex filelist_lock;
> +
> +	/**
> +	 * @filelist: list of xe files belonging to this xe user
> +	 */
> +	struct list_head filelist;
> +
> +	/**
> +	 * @entry: entry into the xe.work_period.user_list list
> +	 */
> +	struct list_head entry;
> +
> +	/**
> +	 * @work: work to emit the gpu work period event for this
> +	 * xe user
> +	 */
> +	struct work_struct work;
> +
> +	/**
> +	 * @uid: user id for this xe_user
> +	 */
> +	u32 uid;
> +
> +	/**
> +	 * @active_duration_ns: sum total of xe_file.active_duration_ns
> +	 * for all xe files belonging to this xe user
> +	 */
> +	u64 active_duration_ns;
> +
> +	/**
> +	 * @last_timestamp_ns: timestamp in ns when we last emitted event
> +	 * for this xe user
> +	 */
> +	u64 last_timestamp_ns;
> +};
> +
> +static inline struct xe_drm_client *
>  xe_drm_client_get(struct xe_drm_client *client)
>  {
>  	kref_get(&client->kref);
> @@ -67,4 +124,20 @@ static inline void xe_drm_client_remove_bo(struct xe_bo *bo)
>  {
>  }
>  #endif
> +
> +struct xe_user *xe_user_alloc(void);
> +
> +static inline struct xe_user *
> +xe_user_get(struct xe_user *user)
> +{
> +	kref_get(&user->refcount);
> +	return user;
> +}
> +
> +void __xe_user_free(struct kref *kref);
> +
> +static inline void xe_user_put(struct xe_user *user)
> +{
> +	kref_put(&user->refcount, __xe_user_free);
> +}
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
> index 6c176183ed58..59d69863f626 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> @@ -15,6 +15,7 @@
>  #include "xe_dep_scheduler.h"
>  #include "xe_device.h"
>  #include "xe_gt.h"
> +#include "xe_gt_clock.h"
>  #include "xe_hw_engine_class_sysfs.h"
>  #include "xe_hw_engine_group.h"
>  #include "xe_hw_fence.h"
> @@ -865,6 +866,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
>  	struct xe_device *xe = gt_to_xe(q->gt);
>  	struct xe_lrc *lrc;
>  	u64 old_ts, new_ts;
> +	struct xe_gt *gt = q->gt;
>  	int idx;
>  
>  	/*
> @@ -889,6 +891,9 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
>  	new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
>  	q->xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
>  
> +	q->xef->active_duration_ns +=
> +		xe_gt_clock_interval_to_ns(gt, (new_ts - old_ts));
> +
>  	drm_dev_exit(idx);
>  }
>  
> diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
> index 4f011d1573c6..6ba807837198 100644
> --- a/drivers/gpu/drm/xe/xe_gt_clock.c
> +++ b/drivers/gpu/drm/xe/xe_gt_clock.c
> @@ -110,3 +110,8 @@ u64 xe_gt_clock_interval_to_ms(struct xe_gt *gt, u64 count)
>  {
>  	return div_u64_roundup(count * MSEC_PER_SEC, gt->info.reference_clock);
>  }
> +
> +u64 xe_gt_clock_interval_to_ns(struct xe_gt *gt, u64 count)
> +{
> +	return div_u64_roundup(count * NSEC_PER_SEC, gt->info.reference_clock);
> +}
> diff --git a/drivers/gpu/drm/xe/xe_gt_clock.h b/drivers/gpu/drm/xe/xe_gt_clock.h
> index 3adeb7baaca4..bd87971bce97 100644
> --- a/drivers/gpu/drm/xe/xe_gt_clock.h
> +++ b/drivers/gpu/drm/xe/xe_gt_clock.h
> @@ -12,5 +12,6 @@ struct xe_gt;
>  
>  int xe_gt_clock_init(struct xe_gt *gt);
>  u64 xe_gt_clock_interval_to_ms(struct xe_gt *gt, u64 count);
> +u64 xe_gt_clock_interval_to_ns(struct xe_gt *gt, u64 count);
>  
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_power_gpu_work_period_trace.h b/drivers/gpu/drm/xe/xe_power_gpu_work_period_trace.h
> new file mode 100644
> index 000000000000..2de05f1b64f3
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_power_gpu_work_period_trace.h
> @@ -0,0 +1,61 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2024 Intel Corporation
> + */
> +
> +#ifndef _TRACE_POWER_GPU_WORK_PERIOD_INTEL
> +#define _TRACE_POWER_GPU_WORK_PERIOD_INTEL
> +#endif
> +
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM power
> +#undef TRACE_INCLUDE_FILE
> +#define TRACE_INCLUDE_FILE xe_power_gpu_work_period_trace
> +#undef TRACE_INCLUDE_PATH
> +#define TRACE_INCLUDE_PATH .
> +
> +#if !defined(_TRACE_POWER_GPU_WORK_PERIOD_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_POWER_GPU_WORK_PERIOD_H
> +
> +#include <linux/tracepoint.h>
> +
> +TRACE_EVENT(gpu_work_period,
> +
> +	TP_PROTO(
> +		u32 gpu_id,
> +		u32 uid,
> +		u64 start_time_ns,
> +		u64 end_time_ns,
> +		u64 total_active_duration_ns
> +	),
> +
> +	TP_ARGS(gpu_id, uid, start_time_ns, end_time_ns, total_active_duration_ns),
> +
> +	TP_STRUCT__entry(
> +		__field(u32, gpu_id)
> +		__field(u32, uid)
> +		__field(u64, start_time_ns)
> +		__field(u64, end_time_ns)
> +		__field(u64, total_active_duration_ns)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->gpu_id = gpu_id;
> +		__entry->uid = uid;
> +		__entry->start_time_ns = start_time_ns;
> +		__entry->end_time_ns = end_time_ns;
> +		__entry->total_active_duration_ns = total_active_duration_ns;
> +	),
> +
> +	TP_printk("gpu_id=%u uid=%u start_time_ns=%llu end_time_ns=%llu total_active_duration_ns=%llu",
> +		__entry->gpu_id,
> +		__entry->uid,
> +		__entry->start_time_ns,
> +		__entry->end_time_ns,
> +		__entry->total_active_duration_ns)
> +);
> +
> +#endif /* _TRACE_POWER_GPU_WORK_PERIOD_H */
> +
> +/* This part must be outside protection */
> +#include <trace/define_trace.h>
> -- 
> 2.49.0
>