[PATCH v2 17/32] drm/xe/uapi: Add madvise interface

Tue May 20 08:13:12 UTC 2025

On 02-05-2025 19:30, Thomas Hellström wrote:
> On Mon, 2025-04-07 at 15:47 +0530, Himal Prasad Ghimiray wrote:
>> This commit introduces a new madvise interface to support
>> driver-specific ioctl operations. The madvise interface allows for
>> more
>> efficient memory management by providing hints to the driver about
>> the
>> expected memory usage and pte update policy for gpuvma.
>>
>> Signed-off-by: Himal Prasad Ghimiray
>> <himal.prasad.ghimiray at intel.com>
>> ---
>>   include/uapi/drm/xe_drm.h | 97
>> +++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 97 insertions(+)
>>
>> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
>> index 9c08738c3b91..aaf515df3a83 100644
>> --- a/include/uapi/drm/xe_drm.h
>> +++ b/include/uapi/drm/xe_drm.h
>> @@ -81,6 +81,7 @@ extern "C" {
>>    *  - &DRM_IOCTL_XE_EXEC
>>    *  - &DRM_IOCTL_XE_WAIT_USER_FENCE
>>    *  - &DRM_IOCTL_XE_OBSERVATION
>> + *  - &DRM_IOCTL_XE_MADVISE
>>    */
>>   
>>   /*
>> @@ -102,6 +103,7 @@ extern "C" {
>>   #define DRM_XE_EXEC			0x09
>>   #define DRM_XE_WAIT_USER_FENCE		0x0a
>>   #define DRM_XE_OBSERVATION		0x0b
>> +#define DRM_XE_MADVISE			0x0c
>>   
>>   /* Must be kept compact -- no holes */
>>   
>> @@ -117,6 +119,7 @@ extern "C" {
>>   #define
>> DRM_IOCTL_XE_EXEC			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
>>   #define
>> DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE,structdrm_xe_wait_user_fence)
>>   #define
>> DRM_IOCTL_XE_OBSERVATION		DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OBSERVATION,structdrm_xe_observation_param)
>> +#define
>> DRM_IOCTL_XE_MADVISE			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_MADVISE, structdrm_xe_madvise)
>>   
>>   /**
>>    * DOC: Xe IOCTL Extensions
>> @@ -1965,6 +1968,100 @@ struct drm_xe_query_eu_stall {
>>   	__u64 sampling_rates[];
>>   };
>>   
>> +struct drm_xe_madvise_ops {
> 
> Suggest using extensions also for the ops, like for vm_bind, since we
> might come up with complicated ops in the future that don't fit the
> union + resvd below.

Sure

> 
>> +	/** @start: start of the virtual address range */
>> +	__u64 start;
>> +
>> +	/** @size: size of the virtual address range */
>> +	__u64 range;
>> +
>> +#define DRM_XE_VMA_ATTR_PREFERRED_LOC	0
> 
> Is UMD currently really using and exercising PREFERRED_LOC? If not, I
> suggest removing this op and invent a reasonable default behaviour
> until multi-device is in place.
> 
>> +#define DRM_XE_VMA_ATTR_ATOMIC		1
>> +#define DRM_XE_VMA_ATTR_PAT		2
>> +#define DRM_XE_VMA_ATTR_PURGEABLE_STATE	3
>> +	/** @type: type of attribute */
>> +	__u32 type;
>> +
>> +	/** @pad: MBZ */
>> +	__u32 pad;
>> +
>> +	union {
>> +		struct {
>> +#define DRM_XE_VMA_ATOMIC_UNDEFINED	0
>> +#define DRM_XE_VMA_ATOMIC_DEVICE	1
>> +#define DRM_XE_VMA_ATOMIC_GLOBAL	2
>> +#define DRM_XE_VMA_ATOMIC_CPU		3
>> +		/** @val: value of atomic operation*/
>> +			__u32 val;
>> +
>> +		/** @reserved: Reserved */
>> +			__u32 reserved;
>> +		} atomic;
>> +
>> +		struct {
>> +#define DRM_XE_VMA_PURGEABLE_STATE_WILLNEED	0
>> +#define DRM_XE_VMA_PURGEABLE_STATE_DONTNEED	1
>> +#define DRM_XE_VMA_PURGEABLE_STATE_PURGED	2
> 
> I think the purged state, at least on i915 was only known to the KMD
> (so shouldn't really be visible in this header). Also we should
> probably define the semantics here if
> 
> a) There are multiple gpu vms with conflicting purgeable state.

If even one VM says it still needs the buffer WILLNEED, we play it safe 
and keep it around.

> b) What happens if we call dontneed and the bo is deeply pipelined?

return -Ebusy ?

> c) What if a willneed madvise fails due to the bo being purged? And
> that op is embedded in an array of unrelated ops? Should it really fail
> the whole IOCTL?

Either drop array of op handling in ioctl or return status for each op ?
I am not sure what is better here.

> 
>> +		/** @val: value for DRM_XE_VMA_ATTR_PURGEABLE_STATE
>> */
>> +			__u32 val;
>> +
>> +		/** @reserved: Reserved */
>> +			__u32 reserved;
>> +		} purge_state_val;
>> +
>> +		struct {
>> +			/** @pat_index */
>> +			__u32 val;
>> +
>> +			/** @reserved: Reserved */
>> +			__u32 reserved;
>> +		} pat_index;
>> +
>> +		/** @preferred_mem_loc: preferred memory location */
>> +		struct {
>> +			__u32 devmem_fd;
>> +
>> +#define MIGRATE_ALL_PAGES 0
>> +#define MIGRATE_ONLY_SYSTEM_PAGES 1
>> +			__u32 migration_policy;
>> +		} preferred_mem_loc;
>> +	};
>> +
>> +	/** @reserved: Reserved */
>> +	__u64 reserved[2];
>> +};
>> +
>> +/**
>> + * struct drm_xe_madvise - Input of &DRM_IOCTL_XE_MADVISE
>> + *
>> + * Set memory attributes to a virtual address range
>> + */
>> +struct drm_xe_madvise {
>> +	/** @extensions: Pointer to the first extension struct, if
>> any */
>> +	__u64 extensions;
>> +
>> +	/** @vm_id: vm_id of the virtual range */
>> +	__u32 vm_id;
>> +
>> +	/** @num_ops: number of madvises in ioctl */
>> +	__u32 num_ops;
> 
> Should we really support an array of ops here given the experience we
> had with rollbacks on VM_bind? Also WRT this, also please see the
> purgeable state above.
> 
> 
> 
> 
>> +
>> +	union {
>> +		/** @ops: used if num_ops == 1 */
>> +		struct drm_xe_madvise_ops ops;
>> +
>> +		/**
>> +		 * @vector_of_ops: userptr to array of struct
>> +		 * drm_xe_vm_madvise_op if num_ops > 1
>> +		 */
>> +		__u64 vector_of_ops;
>> +	};
>> +
>> +	/** @reserved: Reserved */
>> +	__u64 reserved[2];
>> +
>> +};
>> +
>>   #if defined(__cplusplus)
>>   }
>>   #endif
> 
> /Thomas
>