[PATCH] drm/amdgpu: Add atomic CPU-GPU clock counter correlation
Alex Deucher
alexdeucher at gmail.com
Fri Jul 11 22:35:09 UTC 2025
On Fri, Jul 11, 2025 at 5:13 AM Jesse Zhang <jesse.zhang at amd.com> wrote:
>
> This patch introduces a new IOCTL to provide tightly correlated
> CPU and GPU timestamps for accurate performance measurements
> and synchronization between host and device timelines.
>
> Key improvements:
> 1. Adds AMDGPU_INFO_CLOCK_COUNTERS query type (0x06)
> 2. Implements atomic sampling of clocks with:
> - preempt_disable()
> - local IRQ disabling
> - GPU timestamp sampled first (higher latency)
> - CPU timestamps sampled immediately after
> 3. Provides three correlated clocks:
> - GPU clock counter (ns)
> - CPU raw monotonic time (ns)
> - System boottime (ns)
> 4. Includes system clock frequency (1GHz) for reference
>
> The implementation addresses the need for precise CPU-GPU
> timestamp correlation in ROCm applications, particularly for:
> - Performance analysis tools
> - Compute pipeline synchronization
> - Graphics/compute interoperability
> - Low-latency VR/AR applications
>
> Signed-off-by: Jesse Zhang <Jesse.Zhang at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 44 +++++++++++++++++++++++++
> include/uapi/drm/amdgpu_drm.h | 19 +++++++++++
> 2 files changed, 63 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 931c52c918c4..8412c88aada9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -595,6 +595,43 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> return 0;
> }
>
> +/**
> + * get_cpu_gpu_counters - Atomically sample CPU and GPU clocks
> + * @adev: amdgpu device pointer
> + * @args: structure to store clock counters
> + *
> + * Samples CPU and GPU clocks as close to simultaneously as possible
> + * by disabling preemption and interrupts during the sampling.
> + * Returns 0 on success.
> + */
> +static int get_cpu_gpu_counters(struct amdgpu_device *adev,
> + struct drm_amdgpu_info_clock_counters *args)
prefix this function with amdgpu_ for consistency. Other than that,
it looks good to me. Felix, Harish, this mirrors what we do in KFD.
Was there any other timing data you wanted to include?
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
> +{
> + unsigned long flags;
> +
> + /* Disable preemption and interrupts on local CPU */
> + preempt_disable();
> + local_irq_save(flags);
> +
> + if (adev->gfx.funcs->get_gpu_clock_counter)
> + args->gpu_clock_counter = adev->gfx.funcs->get_gpu_clock_counter(adev);
> + else
> + args->gpu_clock_counter = 0;
> +
> + /* No access to rdtsc. Using raw monotonic time */
> + args->cpu_clock_counter = ktime_get_raw_ns();
> + args->system_clock_counter = ktime_get_boottime_ns();
> +
> + /* Since the counter is in nano-seconds we use 1GHz frequency */
> + args->system_clock_freq = 1000000000;
> +
> + /* Restore previous state */
> + local_irq_restore(flags);
> + preempt_enable();
> +
> + return 0;
> +}
> +
> /*
> * Userspace get information ioctl
> */
> @@ -734,6 +771,13 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> case AMDGPU_INFO_TIMESTAMP:
> ui64 = amdgpu_gfx_get_gpu_clock_counter(adev);
> return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
> + case AMDGPU_INFO_CLOCK_COUNTERS: {
> + struct drm_amdgpu_info_clock_counters counters;
> +
> + memset(&counters, 0, sizeof(counters));
> + get_cpu_gpu_counters(adev, &counters);
> + return copy_to_user(out, &counters, min(size, sizeof(counters))) ? -EFAULT : 0;
> + }
> case AMDGPU_INFO_FW_VERSION: {
> struct drm_amdgpu_info_firmware fw_info;
>
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index bdedbaccf776..e8adbbd076ca 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -1056,6 +1056,8 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
> #define AMDGPU_INFO_HW_IP_COUNT 0x03
> /* timestamp for GL_ARB_timer_query */
> #define AMDGPU_INFO_TIMESTAMP 0x05
> +/* get synchronized CPU and GPU clock counters */
> +#define AMDGPU_INFO_CLOCK_COUNTERS 0x06
> /* Query the firmware version */
> #define AMDGPU_INFO_FW_VERSION 0x0e
> /* Subquery id: Query VCE firmware version */
> @@ -1598,6 +1600,23 @@ struct drm_amdgpu_info_uq_metadata {
> };
> };
>
> +/**
> + * struct drm_amdgpu_info_clock_counters - Clock counter information
> + *
> + * Used to correlate timestamps between CPU and GPU with minimal skew.
> + * All counters are in nanoseconds for consistent comparison.
> + */
> +struct drm_amdgpu_info_clock_counters {
> + /* GPU clock counter in nanoseconds */
> + __u64 gpu_clock_counter;
> + /* CPU clock counter (raw monotonic) in nanoseconds */
> + __u64 cpu_clock_counter;
> + /* System boottime clock counter in nanoseconds */
> + __u64 system_clock_counter;
> + /* System clock frequency in Hz (always 1GHz) */
> + __u64 system_clock_freq;
> +};
> +
> /*
> * Supported GPU families
> */
> --
> 2.34.1
>
More information about the amd-gfx
mailing list