[PATCH v9 7/8] drm/xe/uapi: Add a device query to get EU stall sampling information

Dixit, Ashutosh ashutosh.dixit at intel.com
Wed Feb 12 21:13:27 UTC 2025


On Mon, 10 Feb 2025 05:46:48 -0800, Harish Chegondi wrote:
>
> User space can get the EU stall data record size, EU stall capabilities,
> EU stall sampling rates, and per XeCore buffer size with query IOCTL
> DRM_IOCTL_XE_DEVICE_QUERY with .query set to DRM_XE_DEVICE_QUERY_EU_STALL.
> A struct drm_xe_query_eu_stall will be returned to the user space along
> with an array of supported sampling rates sorted in the fastest sampling
> rate first order. sampling_rates in struct drm_xe_query_eu_stall will
> point to the array of sampling rates.
>
> Any capabilities in EU stall sampling as of this patch are considered
> as base capabilities. New capability bits will be added for any new
> functionality added later.
>
> v9: Move reserved fields above num_sampling_rates in
>     struct drm_xe_query_eu_stall.
> v7: Change sampling_rates from a pointer to flexible array.
> v6: Include EU stall sampling rates information and
>     per XeCore buffer size in the query information.
>
> Signed-off-by: Harish Chegondi <harish.chegondi at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_eu_stall.c | 43 ++++++++++++++++++++++++++++++--
>  drivers/gpu/drm/xe/xe_eu_stall.h |  4 +++
>  drivers/gpu/drm/xe/xe_query.c    | 38 ++++++++++++++++++++++++++++
>  include/uapi/drm/xe_drm.h        | 40 +++++++++++++++++++++++++++--
>  4 files changed, 121 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c
> index 52f8f40ba9a6..4dce58f60405 100644
> --- a/drivers/gpu/drm/xe/xe_eu_stall.c
> +++ b/drivers/gpu/drm/xe/xe_eu_stall.c
> @@ -150,9 +150,48 @@ struct xe_eu_stall_data_xe2 {
>	__u64 unused[6];
>  } __packed;
>
> -static size_t xe_eu_stall_data_record_size(struct xe_device *xe)
> +const u64 eu_stall_sampling_rates[] = {251, 251 * 2, 251 * 3, 251 * 4, 251 * 5, 251 * 6, 251 * 7};
> +
> +/**
> + * xe_eu_stall_get_sampling_rates - get EU stall sampling rates information.
> + *
> + * @num_rates_ptr: Pointer to a u32 to return the number of sampling rates.
> + * @rates_ptr: double u64 pointer to point to an array of sampling rates.
> + *
> + * Stores the number of sampling rates and pointer to the array of
> + * sampling rates in the input pointers.
> + *
> + * Returns: Size of the EU stall sampling rates array.
> + */
> +size_t xe_eu_stall_get_sampling_rates(u32 *num_rates_ptr, const u64 **rates_ptr)

Just name these function arguments num_rates and rates. No need for _ptr,
it's obvious they are pointers.

> +{
> +	*num_rates_ptr = ARRAY_SIZE(eu_stall_sampling_rates);
> +	*rates_ptr = eu_stall_sampling_rates;
> +
> +	return sizeof(eu_stall_sampling_rates);
> +}
> +
> +/**
> + * xe_eu_stall_get_per_xecore_buf_size - get per XeCore buffer size.
> + *
> + * Returns: The per XeCore buffer size used to allocate the per GT
> + *	    EU stall data buffer.
> + */
> +size_t xe_eu_stall_get_per_xecore_buf_size(void)
> +{
> +	return per_xecore_buf_size;
> +}
> +
> +/**
> + * xe_eu_stall_data_record_size - get EU stall data record size.
> + *
> + * @xe: Pointer to a Xe device.
> + *
> + * Returns: EU stall data record size.
> + */
> +size_t xe_eu_stall_data_record_size(struct xe_device *xe)
>  {
> -	unsigned long record_size = 0;
> +	size_t record_size = 0;

This line should not be there in this patch. It should be fixed in the
previous patch where it was introduced.

>
>	if (xe->info.platform == XE_PVC)
>		record_size = sizeof(struct xe_eu_stall_data_pvc);
> diff --git a/drivers/gpu/drm/xe/xe_eu_stall.h b/drivers/gpu/drm/xe/xe_eu_stall.h
> index e42250c1d294..c612fbaf2901 100644
> --- a/drivers/gpu/drm/xe/xe_eu_stall.h
> +++ b/drivers/gpu/drm/xe/xe_eu_stall.h
> @@ -8,6 +8,10 @@
>
>  #include "xe_gt_types.h"
>
> +size_t xe_eu_stall_get_per_xecore_buf_size(void);
> +size_t xe_eu_stall_data_record_size(struct xe_device *xe);
> +size_t xe_eu_stall_get_sampling_rates(u32 *num_rates_ptr,
> +				      const u64 **rates_ptr);

num_rates/rates

>  int xe_eu_stall_init(struct xe_gt *gt);
>  void xe_eu_stall_fini(struct xe_gt *gt);
>
> diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
> index 042f87a688e7..2402bb6b221c 100644
> --- a/drivers/gpu/drm/xe/xe_query.c
> +++ b/drivers/gpu/drm/xe/xe_query.c
> @@ -16,6 +16,7 @@
>  #include "regs/xe_gt_regs.h"
>  #include "xe_bo.h"
>  #include "xe_device.h"
> +#include "xe_eu_stall.h"
>  #include "xe_exec_queue.h"
>  #include "xe_force_wake.h"
>  #include "xe_ggtt.h"
> @@ -726,6 +727,42 @@ static int query_pxp_status(struct xe_device *xe, struct drm_xe_device_query *qu
>	return 0;
>  }
>
> +static int query_eu_stall(struct xe_device *xe,
> +			  struct drm_xe_device_query *query)
> +{
> +	void __user *query_ptr = u64_to_user_ptr(query->data);
> +	struct drm_xe_query_eu_stall *info;
> +	size_t size, array_size;
> +	const u64 *rates_ptr;

rates, no ptr.

> +	u32 num_rates;
> +	int ret;
> +
> +	array_size = xe_eu_stall_get_sampling_rates(&num_rates, &rates_ptr);
> +	size = sizeof(struct drm_xe_query_eu_stall) + array_size;
> +
> +	if (query->size == 0) {
> +		query->size = size;
> +		return 0;
> +	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
> +		return -EINVAL;
> +	}
> +
> +	info = kzalloc(size, GFP_KERNEL);
> +	if (!info)
> +		return -ENOMEM;
> +
> +	info->num_sampling_rates = num_rates;
> +	info->capabilities = DRM_XE_EU_STALL_CAPS_BASE;
> +	info->record_size = xe_eu_stall_data_record_size(xe);
> +	info->per_xecore_buf_size = xe_eu_stall_get_per_xecore_buf_size();
> +	memcpy(info->sampling_rates, rates_ptr, array_size);
> +
> +	ret = copy_to_user(query_ptr, info, size);
> +	kfree(info);
> +
> +	return ret ? -EFAULT : 0;
> +}
> +
>  static int (* const xe_query_funcs[])(struct xe_device *xe,
>				      struct drm_xe_device_query *query) = {
>	query_engines,
> @@ -738,6 +775,7 @@ static int (* const xe_query_funcs[])(struct xe_device *xe,
>	query_uc_fw_version,
>	query_oa_units,
>	query_pxp_status,
> +	query_eu_stall,
>  };
>
>  int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> index 1d79621f267b..c2c0983a9f2a 100644
> --- a/include/uapi/drm/xe_drm.h
> +++ b/include/uapi/drm/xe_drm.h
> @@ -735,6 +735,7 @@ struct drm_xe_device_query {
>  #define DRM_XE_DEVICE_QUERY_UC_FW_VERSION	7
>  #define DRM_XE_DEVICE_QUERY_OA_UNITS		8
>  #define DRM_XE_DEVICE_QUERY_PXP_STATUS		9
> +#define DRM_XE_DEVICE_QUERY_EU_STALL		10
>	/** @query: The type of data to query */
>	__u32 query;
>
> @@ -1873,8 +1874,8 @@ enum drm_xe_eu_stall_property_id {
>	DRM_XE_EU_STALL_PROP_GT_ID = 1,
>
>	/**
> -	 * @DRM_XE_EU_STALL_PROP_SAMPLE_RATE: Sampling rate
> -	 * in GPU cycles.
> +	 * @DRM_XE_EU_STALL_PROP_SAMPLE_RATE: Sampling rate in
> +	 * GPU cycles from @sampling_rates in struct @drm_xe_query_eu_stall
>	 */
>	DRM_XE_EU_STALL_PROP_SAMPLE_RATE,
>
> @@ -1886,6 +1887,41 @@ enum drm_xe_eu_stall_property_id {
>	DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS,
>  };
>
> +/**
> + * struct drm_xe_query_eu_stall - Information about EU stall sampling.
> + *
> + * If a query is made with a struct @drm_xe_device_query where .query
> + * is equal to @DRM_XE_DEVICE_QUERY_EU_STALL, then the reply uses
> + * struct @drm_xe_query_eu_stall in .data.
> + */
> +struct drm_xe_query_eu_stall {
> +	/** @extensions: Pointer to the first extension struct, if any */
> +	__u64 extensions;
> +
> +	/** @capabilities: EU stall capabilities bit-mask */
> +	__u64 capabilities;
> +#define DRM_XE_EU_STALL_CAPS_BASE		(1 << 0)
> +
> +	/** @record_size: size of each EU stall data record */
> +	__u64 record_size;
> +
> +	/** @per_xecore_buf_size: Per XeCore buffer size */

/** @per_xecore_buf_size: internal per xecore buffer size */

> +	__u64 per_xecore_buf_size;

Sorry to bring this up again. I previously missed that the term 'dss' is
already present in xe_drm.h. So not sure we can't just call this
per_dss_buf_size? Or do we want to introduce a new 'xecore' term here.

Could we quickly check with Matt Roper about this. xecore is ok too, but
now we are introducing two different names for the same thing.

> +
> +	/** @reserved: Reserved */
> +	__u64 reserved[5];
> +
> +	/** @num_sampling_rates: Number of sampling rates supported */

/** @num_sampling_rates: number of entries in @sampling_rates array */

> +	__u64 num_sampling_rates;
> +
> +	/**
> +	 * @sampling_rates: Flexible array of sampling rates
> +	 * sorted in the fastest to slowest order.
> +	 * Sampling rates are specified in GPU clock cycles.
> +	 */
> +	__u64 sampling_rates[];
> +};
> +
>  #if defined(__cplusplus)
>  }
>  #endif
> --
> 2.48.1
>

Also, incidentally, this eu stall query will need to be added to IGT too,
some time.

I will take another look in the next revision of this patch too, but since
the comments here are nits, after addressing/fixing those, this is:

Reviewed-by: Ashutosh Dixit <ashutosh.dixit at intel.com>


More information about the Intel-xe mailing list