[PATCH i-g-t] drm-uapi/xe: Sync with PXP and EU stall changes
Harish Chegondi
harish.chegondi at intel.com
Wed Feb 26 23:42:24 UTC 2025
On Wed, Feb 26, 2025 at 12:40:17PM -0800, Ashutosh Dixit wrote:
Hi Ashutosh,
There is a patch series that adds tests for PXP
https://patchwork.freedesktop.org/series/142450/
The first patch is to update the uAPI in xe_drm.h
Except one patch, all the patches have Reviewed-by.
Should we wait for this patch series to merge?
Thanks
Harish.
> Align with kernel commit cd5bbb2532f2 ("drm/xe/uapi: Add a device query to
> get EU stall sampling information").
>
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
> ---
> include/drm-uapi/xe_drm.h | 197 +++++++++++++++++++++++++++++++++++++-
> 1 file changed, 195 insertions(+), 2 deletions(-)
>
> diff --git a/include/drm-uapi/xe_drm.h b/include/drm-uapi/xe_drm.h
> index 08e263b3b2..154f947ef0 100644
> --- a/include/drm-uapi/xe_drm.h
> +++ b/include/drm-uapi/xe_drm.h
> @@ -629,6 +629,39 @@ struct drm_xe_query_uc_fw_version {
> __u64 reserved;
> };
>
> +/**
> + * struct drm_xe_query_pxp_status - query if PXP is ready
> + *
> + * If PXP is enabled and no fatal error has occurred, the status will be set to
> + * one of the following values:
> + * 0: PXP init still in progress
> + * 1: PXP init complete
> + *
> + * If PXP is not enabled or something has gone wrong, the query will be failed
> + * with one of the following error codes:
> + * -ENODEV: PXP not supported or disabled;
> + * -EIO: fatal error occurred during init, so PXP will never be enabled;
> + * -EINVAL: incorrect value provided as part of the query;
> + * -EFAULT: error copying the memory between kernel and userspace.
> + *
> + * The status can only be 0 in the first few seconds after driver load. If
> + * everything works as expected, the status will transition to init complete in
> + * less than 1 second, while in case of errors the driver might take longer to
> + * start returning an error code, but it should still take less than 10 seconds.
> + *
> + * The supported session type bitmask is based on the values in
> + * enum drm_xe_pxp_session_type. TYPE_NONE is always supported and therefore
> + * is not reported in the bitmask.
> + *
> + */
> +struct drm_xe_query_pxp_status {
> + /** @status: current PXP status */
> + __u32 status;
> +
> + /** @supported_session_types: bitmask of supported PXP session types */
> + __u32 supported_session_types;
> +};
> +
> /**
> * struct drm_xe_device_query - Input of &DRM_IOCTL_XE_DEVICE_QUERY - main
> * structure to query device information
> @@ -648,6 +681,7 @@ struct drm_xe_query_uc_fw_version {
> * attributes.
> * - %DRM_XE_DEVICE_QUERY_GT_TOPOLOGY
> * - %DRM_XE_DEVICE_QUERY_ENGINE_CYCLES
> + * - %DRM_XE_DEVICE_QUERY_PXP_STATUS
> *
> * If size is set to 0, the driver fills it with the required size for
> * the requested type of data to query. If size is equal to the required
> @@ -700,6 +734,8 @@ struct drm_xe_device_query {
> #define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES 6
> #define DRM_XE_DEVICE_QUERY_UC_FW_VERSION 7
> #define DRM_XE_DEVICE_QUERY_OA_UNITS 8
> +#define DRM_XE_DEVICE_QUERY_PXP_STATUS 9
> +#define DRM_XE_DEVICE_QUERY_EU_STALL 10
> /** @query: The type of data to query */
> __u32 query;
>
> @@ -743,8 +779,23 @@ struct drm_xe_device_query {
> * - %DRM_XE_GEM_CPU_CACHING_WC - Allocate the pages as write-combined. This
> * is uncached. Scanout surfaces should likely use this. All objects
> * that can be placed in VRAM must use this.
> + *
> + * This ioctl supports setting the following properties via the
> + * %DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY extension, which uses the
> + * generic @drm_xe_ext_set_property struct:
> + *
> + * - %DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE - set the type of PXP session
> + * this object will be used with. Valid values are listed in enum
> + * drm_xe_pxp_session_type. %DRM_XE_PXP_TYPE_NONE is the default behavior, so
> + * there is no need to explicitly set that. Objects used with session of type
> + * %DRM_XE_PXP_TYPE_HWDRM will be marked as invalid if a PXP invalidation
> + * event occurs after their creation. Attempting to flip an invalid object
> + * will cause a black frame to be displayed instead. Submissions with invalid
> + * objects mapped in the VM will be rejected.
> */
> struct drm_xe_gem_create {
> +#define DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY 0
> +#define DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE 0
> /** @extensions: Pointer to the first extension struct, if any */
> __u64 extensions;
>
> @@ -811,6 +862,32 @@ struct drm_xe_gem_create {
>
> /**
> * struct drm_xe_gem_mmap_offset - Input of &DRM_IOCTL_XE_GEM_MMAP_OFFSET
> + *
> + * The @flags can be:
> + * - %DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER - For user to query special offset
> + * for use in mmap ioctl. Writing to the returned mmap address will generate a
> + * PCI memory barrier with low overhead (avoiding IOCTL call as well as writing
> + * to VRAM which would also add overhead), acting like an MI_MEM_FENCE
> + * instruction.
> + *
> + * Note: The mmap size can be at most 4K, due to HW limitations. As a result
> + * this interface is only supported on CPU architectures that support 4K page
> + * size. The mmap_offset ioctl will detect this and gracefully return an
> + * error, where userspace is expected to have a different fallback method for
> + * triggering a barrier.
> + *
> + * Roughly the usage would be as follows:
> + *
> + * .. code-block:: C
> + *
> + * struct drm_xe_gem_mmap_offset mmo = {
> + * .handle = 0, // must be set to 0
> + * .flags = DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER,
> + * };
> + *
> + * err = ioctl(fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mmo);
> + * map = mmap(NULL, size, PROT_WRITE, MAP_SHARED, fd, mmo.offset);
> + * map[i] = 0xdeadbeaf; // issue barrier
> */
> struct drm_xe_gem_mmap_offset {
> /** @extensions: Pointer to the first extension struct, if any */
> @@ -819,7 +896,8 @@ struct drm_xe_gem_mmap_offset {
> /** @handle: Handle for the object being mapped. */
> __u32 handle;
>
> - /** @flags: Must be zero */
> +#define DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER (1 << 0)
> + /** @flags: Flags */
> __u32 flags;
>
> /** @offset: The fake offset to use for subsequent mmap call */
> @@ -906,6 +984,9 @@ struct drm_xe_vm_destroy {
> * will only be valid for DRM_XE_VM_BIND_OP_MAP operations, the BO
> * handle MBZ, and the BO offset MBZ. This flag is intended to
> * implement VK sparse bindings.
> + * - %DRM_XE_VM_BIND_FLAG_CHECK_PXP - If the object is encrypted via PXP,
> + * reject the binding if the encryption key is no longer valid. This
> + * flag has no effect on BOs that are not marked as using PXP.
> */
> struct drm_xe_vm_bind_op {
> /** @extensions: Pointer to the first extension struct, if any */
> @@ -996,6 +1077,7 @@ struct drm_xe_vm_bind_op {
> #define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1)
> #define DRM_XE_VM_BIND_FLAG_NULL (1 << 2)
> #define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3)
> +#define DRM_XE_VM_BIND_FLAG_CHECK_PXP (1 << 4)
> /** @flags: Bind flags */
> __u32 flags;
>
> @@ -1087,6 +1169,24 @@ struct drm_xe_vm_bind {
> /**
> * struct drm_xe_exec_queue_create - Input of &DRM_IOCTL_XE_EXEC_QUEUE_CREATE
> *
> + * This ioctl supports setting the following properties via the
> + * %DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY extension, which uses the
> + * generic @drm_xe_ext_set_property struct:
> + *
> + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY - set the queue priority.
> + * CAP_SYS_NICE is required to set a value above normal.
> + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE - set the queue timeslice
> + * duration in microseconds.
> + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE - set the type of PXP session
> + * this queue will be used with. Valid values are listed in enum
> + * drm_xe_pxp_session_type. %DRM_XE_PXP_TYPE_NONE is the default behavior, so
> + * there is no need to explicitly set that. When a queue of type
> + * %DRM_XE_PXP_TYPE_HWDRM is created, the PXP default HWDRM session
> + * (%XE_PXP_HWDRM_DEFAULT_SESSION) will be started, if isn't already running.
> + * Given that going into a power-saving state kills PXP HWDRM sessions,
> + * runtime PM will be blocked while queues of this type are alive.
> + * All PXP queues will be killed if a PXP invalidation event occurs.
> + *
> * The example below shows how to use @drm_xe_exec_queue_create to create
> * a simple exec_queue (no parallel submission) of class
> * &DRM_XE_ENGINE_CLASS_RENDER.
> @@ -1110,7 +1210,7 @@ struct drm_xe_exec_queue_create {
> #define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0
> #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0
> #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1
> -
> +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE 2
> /** @extensions: Pointer to the first extension struct, if any */
> __u64 extensions;
>
> @@ -1397,6 +1497,8 @@ struct drm_xe_wait_user_fence {
> enum drm_xe_observation_type {
> /** @DRM_XE_OBSERVATION_TYPE_OA: OA observation stream type */
> DRM_XE_OBSERVATION_TYPE_OA,
> + /** @DRM_XE_OBSERVATION_TYPE_EU_STALL: EU stall sampling observation stream type */
> + DRM_XE_OBSERVATION_TYPE_EU_STALL,
> };
>
> /**
> @@ -1729,6 +1831,97 @@ struct drm_xe_oa_stream_info {
> __u64 reserved[3];
> };
>
> +/**
> + * enum drm_xe_pxp_session_type - Supported PXP session types.
> + *
> + * We currently only support HWDRM sessions, which are used for protected
> + * content that ends up being displayed, but the HW supports multiple types, so
> + * we might extend support in the future.
> + */
> +enum drm_xe_pxp_session_type {
> + /** @DRM_XE_PXP_TYPE_NONE: PXP not used */
> + DRM_XE_PXP_TYPE_NONE = 0,
> + /**
> + * @DRM_XE_PXP_TYPE_HWDRM: HWDRM sessions are used for content that ends
> + * up on the display.
> + */
> + DRM_XE_PXP_TYPE_HWDRM = 1,
> +};
> +
> +/* ID of the protected content session managed by Xe when PXP is active */
> +#define DRM_XE_PXP_HWDRM_DEFAULT_SESSION 0xf
> +
> +/**
> + * enum drm_xe_eu_stall_property_id - EU stall sampling input property ids.
> + *
> + * These properties are passed to the driver at open as a chain of
> + * @drm_xe_ext_set_property structures with @property set to these
> + * properties' enums and @value set to the corresponding values of these
> + * properties. @drm_xe_user_extension base.name should be set to
> + * @DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY.
> + *
> + * With the file descriptor obtained from open, user space must enable
> + * the EU stall stream fd with @DRM_XE_OBSERVATION_IOCTL_ENABLE before
> + * calling read(). EIO errno from read() indicates HW dropped data
> + * due to full buffer.
> + */
> +enum drm_xe_eu_stall_property_id {
> +#define DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY 0
> + /**
> + * @DRM_XE_EU_STALL_PROP_GT_ID: @gt_id of the GT on which
> + * EU stall data will be captured.
> + */
> + DRM_XE_EU_STALL_PROP_GT_ID = 1,
> +
> + /**
> + * @DRM_XE_EU_STALL_PROP_SAMPLE_RATE: Sampling rate in
> + * GPU cycles from @sampling_rates in struct @drm_xe_query_eu_stall
> + */
> + DRM_XE_EU_STALL_PROP_SAMPLE_RATE,
> +
> + /**
> + * @DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS: Minimum number of
> + * EU stall data reports to be present in the kernel buffer
> + * before unblocking a blocked poll or read.
> + */
> + DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS,
> +};
> +
> +/**
> + * struct drm_xe_query_eu_stall - Information about EU stall sampling.
> + *
> + * If a query is made with a struct @drm_xe_device_query where .query
> + * is equal to @DRM_XE_DEVICE_QUERY_EU_STALL, then the reply uses
> + * struct @drm_xe_query_eu_stall in .data.
> + */
> +struct drm_xe_query_eu_stall {
> + /** @extensions: Pointer to the first extension struct, if any */
> + __u64 extensions;
> +
> + /** @capabilities: EU stall capabilities bit-mask */
> + __u64 capabilities;
> +#define DRM_XE_EU_STALL_CAPS_BASE (1 << 0)
> +
> + /** @record_size: size of each EU stall data record */
> + __u64 record_size;
> +
> + /** @per_xecore_buf_size: internal per XeCore buffer size */
> + __u64 per_xecore_buf_size;
> +
> + /** @reserved: Reserved */
> + __u64 reserved[5];
> +
> + /** @num_sampling_rates: Number of sampling rates in @sampling_rates array */
> + __u64 num_sampling_rates;
> +
> + /**
> + * @sampling_rates: Flexible array of sampling rates
> + * sorted in the fastest to slowest order.
> + * Sampling rates are specified in GPU clock cycles.
> + */
> + __u64 sampling_rates[];
> +};
> +
> #if defined(__cplusplus)
> }
> #endif
> --
> 2.48.1
>
More information about the igt-dev
mailing list