[igt-dev] [PATCH i-g-t v3 01/11] lib/vm_bind: import uapi definitions

Niranjana Vishwanathapura niranjana.vishwanathapura at intel.com
Wed Oct 12 17:39:55 UTC 2022


On Wed, Oct 12, 2022 at 09:04:29AM +0100, Matthew Auld wrote:
>On 10/10/2022 07:59, Niranjana Vishwanathapura wrote:
>>Import required VM_BIND kernel uapi definitions.
>>
>>DRM_I915_GEM_VM_BIND/UNBIND ioctls allows UMD to bind/unbind GEM
>>buffer objects (BOs) or sections of a BOs at specified GPU virtual
>>addresses on a specified address space (VM). Multiple mappings can map
>>to the same physical pages of an object (aliasing). These mappings (also
>>referred to as persistent mappings) will be persistent across multiple
>>GPU submissions (execbuf calls) issued by the UMD, without user having
>>to provide a list of all required mappings during each submission (as
>>required by older execbuf mode).
>>
>>The new execbuf3 ioctl (I915_GEM_EXECBUFFER3) will only work in vm_bind
>>mode. The vm_bind mode only works with this new execbuf3 ioctl.
>>
>>v2: Move vm_bind uapi definitions to i915_drm-local.h
>>     Define HAS_64K_PAGES()
>>     Pull in uapi updates
>>
>>Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura at intel.com>
>>---
>>  lib/i915/i915_drm_local.h | 308 ++++++++++++++++++++++++++++++++++++++
>>  lib/intel_chipset.h       |   2 +
>>  2 files changed, 310 insertions(+)
>>
>>diff --git a/lib/i915/i915_drm_local.h b/lib/i915/i915_drm_local.h
>>index 9a2273c4e4..128d5f416f 100644
>>--- a/lib/i915/i915_drm_local.h
>>+++ b/lib/i915/i915_drm_local.h
>>@@ -23,6 +23,314 @@ extern "C" {
>>  #define DRM_I915_QUERY_GEOMETRY_SUBSLICES      6
>>+/*
>>+ * Signal to the kernel that the object will need to be accessed via
>>+ * the CPU.
>>+ *
>>+ * Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and only
>>+ * strictly required on platforms where only some of the device memory
>>+ * is directly visible or mappable through the CPU, like on DG2+.
>>+ *
>>+ * One of the placements MUST also be I915_MEMORY_CLASS_SYSTEM, to
>>+ * ensure we can always spill the allocation to system memory, if we
>>+ * can't place the object in the mappable part of
>>+ * I915_MEMORY_CLASS_DEVICE.
>>+ *
>>+ * Without this hint, the kernel will assume that non-mappable
>>+ * I915_MEMORY_CLASS_DEVICE is preferred for this object. Note that the
>>+ * kernel can still migrate the object to the mappable part, as a last
>>+ * resort, if userspace ever CPU faults this object, but this might be
>>+ * expensive, and so ideally should be avoided.
>>+ */
>>+#define I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS (1 << 0)
>>+
>>+#define DRM_I915_GEM_VM_BIND		0x3d
>>+#define DRM_I915_GEM_VM_UNBIND		0x3e
>>+#define DRM_I915_GEM_EXECBUFFER3	0x3f
>>+
>>+#define DRM_IOCTL_I915_GEM_VM_BIND	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_BIND, struct drm_i915_gem_vm_bind)
>>+#define DRM_IOCTL_I915_GEM_VM_UNBIND	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_UNBIND, struct drm_i915_gem_vm_unbind)
>>+#define DRM_IOCTL_I915_GEM_EXECBUFFER3  DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER3, struct drm_i915_gem_execbuffer3)
>>+
>>+/*
>>+ * VM_BIND feature version supported.
>>+ *
>>+ * The following versions of VM_BIND have been defined:
>>+ *
>>+ * 0: No VM_BIND support.
>>+ *
>>+ * 1: In VM_UNBIND calls, the UMD must specify the exact mappings created
>>+ *    previously with VM_BIND, the ioctl will not support unbinding multiple
>>+ *    mappings or splitting them. Similarly, VM_BIND calls will not replace
>>+ *    any existing mappings.
>>+ *
>>+ * See struct drm_i915_gem_vm_bind and struct drm_i915_gem_vm_unbind.
>>+ */
>>+#define I915_PARAM_VM_BIND_VERSION	57
>>+
>>+#define DRM_I915_GEM_EXECBUFFER3_EXT_TIMELINE_FENCES 0
>>+
>>+/**
>>+ * struct drm_i915_gem_timeline_fence - An input or output timeline fence.
>>+ *
>>+ * The operation will wait for input fence to signal.
>>+ *
>>+ * The returned output fence will be signaled after the completion of the
>>+ * operation.
>>+ */
>>+struct drm_i915_gem_timeline_fence {
>>+	/** @handle: User's handle for a drm_syncobj to wait on or signal. */
>>+	__u32 handle;
>>+
>>+	/**
>>+	 * @flags: Supported flags are:
>>+	 *
>>+	 * I915_TIMELINE_FENCE_WAIT:
>>+	 * Wait for the input fence before the operation.
>>+	 *
>>+	 * I915_TIMELINE_FENCE_SIGNAL:
>>+	 * Return operation completion fence as output.
>>+	 */
>>+	__u32 flags;
>>+#define I915_TIMELINE_FENCE_WAIT            (1 << 0)
>>+#define I915_TIMELINE_FENCE_SIGNAL          (1 << 1)
>>+#define __I915_TIMELINE_FENCE_UNKNOWN_FLAGS (-(I915_TIMELINE_FENCE_SIGNAL << 1))
>>+
>>+	/**
>>+	 * @value: A point in the timeline.
>>+	 * Value must be 0 for a binary drm_syncobj. A Value of 0 for a
>>+	 * timeline drm_syncobj is invalid as it turns a drm_syncobj into a
>>+	 * binary one.
>>+	 */
>>+	__u64 value;
>>+};
>>+
>>+/**
>>+ * struct drm_i915_gem_execbuffer3 - Structure for DRM_I915_GEM_EXECBUFFER3
>>+ * ioctl.
>>+ *
>>+ * DRM_I915_GEM_EXECBUFFER3 ioctl only works in VM_BIND mode and VM_BIND mode
>>+ * only works with this ioctl for submission.
>>+ * See I915_VM_CREATE_FLAGS_USE_VM_BIND.
>>+ */
>>+struct drm_i915_gem_execbuffer3 {
>>+	/**
>>+	 * @ctx_id: Context id
>>+	 *
>>+	 * Only contexts with user engine map are allowed.
>>+	 */
>>+	__u32 ctx_id;
>>+
>>+	/**
>>+	 * @engine_idx: Engine index
>>+	 *
>>+	 * An index in the user engine map of the context specified by @ctx_id.
>>+	 */
>>+	__u32 engine_idx;
>>+
>>+	/**
>>+	 * @batch_address: Batch gpu virtual address/es.
>>+	 *
>>+	 * For normal submission, it is the gpu virtual address of the batch
>>+	 * buffer. For parallel submission, it is a pointer to an array of
>>+	 * batch buffer gpu virtual addresses with array size equal to the
>>+	 * number of (parallel) engines involved in that submission (See
>>+	 * struct i915_context_engines_parallel_submit).
>>+	 */
>>+	__u64 batch_address;
>>+
>>+	/** @flags: Currently reserved, MBZ */
>>+	__u64 flags;
>>+#define __I915_EXEC3_UNKNOWN_FLAGS (~0)
>>+
>>+	/** @fence_count: Number of fences in @timeline_fences array. */
>>+	__u64 fence_count;
>>+
>>+	/**
>>+	 * @timeline_fences: Pointer to an array of timeline fences.
>>+	 *
>>+	 * Timeline fences are of format struct drm_i915_gem_timeline_fence.
>>+	 */
>>+	__u64 timeline_fences;
>>+
>>+	/** @rsvd: Reserved, MBZ */
>>+	__u64 rsvd;
>>+
>>+	/**
>>+	 * @extensions: Zero-terminated chain of extensions.
>>+	 *
>>+	 * For future extensions. See struct i915_user_extension.
>>+	 */
>>+	__u64 extensions;
>>+};
>>+
>>+/*
>>+ * If I915_VM_CREATE_FLAGS_USE_VM_BIND flag is set, VM created will work in
>>+ * VM_BIND mode
>>+ */
>>+#define I915_VM_CREATE_FLAGS_USE_VM_BIND	(1u << 0)
>>+
>>+/*
>>+ * For I915_GEM_CREATE_EXT_VM_PRIVATE usage see
>>+ * struct drm_i915_gem_create_ext_vm_private.
>>+ */
>>+#define I915_GEM_CREATE_EXT_VM_PRIVATE 2
>>+
>>+/**
>>+ * struct drm_i915_gem_create_ext_vm_private - Extension to make the object
>>+ * private to the specified VM.
>>+ *
>>+ * See struct drm_i915_gem_create_ext.
>>+ *
>>+ * By default, BOs can be mapped on multiple VMs and can also be dma-buf
>>+ * exported. Hence these BOs are referred to as Shared BOs.
>>+ * During each execbuf3 submission, the request fence must be added to the
>>+ * dma-resv fence list of all shared BOs mapped on the VM.
>>+ *
>>+ * Unlike Shared BOs, these VM private BOs can only be mapped on the VM they
>>+ * are private to and can't be dma-buf exported. All private BOs of a VM share
>>+ * the dma-resv object. Hence during each execbuf3 submission, they need only
>>+ * one dma-resv fence list updated. Thus, the fast path (where required
>>+ * mappings are already bound) submission latency is O(1) w.r.t the number of
>>+ * VM private BOs.
>>+ */
>>+struct drm_i915_gem_create_ext_vm_private {
>>+	/** @base: Extension link. See struct i915_user_extension. */
>>+	struct i915_user_extension base;
>>+
>>+	/** @vm_id: Id of the VM to which Object is private */
>>+	__u32 vm_id;
>>+
>>+	/** @rsvd: Reserved, MBZ */
>>+	__u32 rsvd;
>>+};
>>+
>>+/**
>>+ * struct drm_i915_gem_vm_bind - VA to object mapping to bind.
>>+ *
>>+ * This structure is passed to VM_BIND ioctl and specifies the mapping of GPU
>>+ * virtual address (VA) range to the section of an object that should be bound
>>+ * in the device page table of the specified address space (VM).
>>+ * The VA range specified must be unique (ie., not currently bound) and can
>>+ * be mapped to whole object or a section of the object (partial binding).
>>+ * Multiple VA mappings can be created to the same section of the object
>>+ * (aliasing).
>>+ *
>>+ * The @start, @offset and @length must be 4K page aligned. However the DG2
>>+ * and XEHPSDV has 64K page size for device local memory and has compact page
>>+ * table. On those platforms, for binding device local-memory objects, the
>>+ * @start, @offset and @length must be 64K aligned.
>>+ *
>>+ * Error code -EINVAL will be returned if @start, @offset and @length are not
>>+ * properly aligned. In version 1 (See I915_PARAM_VM_BIND_VERSION), error code
>>+ * -ENOSPC will be returned if the VA range specified can't be reserved.
>>+ *
>>+ * VM_BIND/UNBIND ioctl calls executed on different CPU threads concurrently
>>+ * are not ordered. Furthermore, parts of the VM_BIND operation can be done
>>+ * asynchronously, if valid @fence is specified.
>>+ */
>>+struct drm_i915_gem_vm_bind {
>>+	/** @vm_id: VM (address space) id to bind */
>>+	__u32 vm_id;
>>+
>>+	/** @handle: Object handle */
>>+	__u32 handle;
>>+
>>+	/** @start: Virtual Address start to bind */
>>+	__u64 start;
>>+
>>+	/** @offset: Offset in object to bind */
>>+	__u64 offset;
>>+
>>+	/** @length: Length of mapping to bind */
>>+	__u64 length;
>>+
>>+	/**
>>+	 * @flags: Currently reserved, MBZ.
>>+	 *
>>+	 * Note that @fence carries its own flags.
>>+	 */
>>+	__u64 flags;
>>+
>>+	/**
>>+	 * @fence: Timeline fence for bind completion signaling.
>>+	 *
>>+	 * Timeline fence is of format struct drm_i915_gem_timeline_fence.
>>+	 *
>>+	 * It is an out fence, hence using I915_TIMELINE_FENCE_WAIT flag
>>+	 * is invalid, and an error will be returned.
>>+	 *
>>+	 * If I915_TIMELINE_FENCE_SIGNAL flag is not set, then out fence
>>+	 * is not requested and binding is completed synchronously.
>>+	 */
>>+	struct drm_i915_gem_timeline_fence fence;
>>+
>>+	/**
>>+	 * @extensions: Zero-terminated chain of extensions.
>>+	 *
>>+	 * For future extensions. See struct i915_user_extension.
>>+	 */
>>+	__u64 extensions;
>>+};
>>+
>>+/**
>>+ * struct drm_i915_gem_vm_unbind - VA to object mapping to unbind.
>>+ *
>>+ * This structure is passed to VM_UNBIND ioctl and specifies the GPU virtual
>>+ * address (VA) range that should be unbound from the device page table of the
>>+ * specified address space (VM). VM_UNBIND will force unbind the specified
>>+ * range from device page table without waiting for any GPU job to complete.
>>+ * It is UMDs responsibility to ensure the mapping is no longer in use before
>>+ * calling VM_UNBIND.
>>+ *
>>+ * If the specified mapping is not found, the ioctl will simply return without
>>+ * any error.
>>+ *
>>+ * VM_BIND/UNBIND ioctl calls executed on different CPU threads concurrently
>>+ * are not ordered. Furthermore, parts of the VM_UNBIND operation can be done
>>+ * asynchronously, if valid @fence is specified.
>>+ */
>>+struct drm_i915_gem_vm_unbind {
>>+	/** @vm_id: VM (address space) id to bind */
>>+	__u32 vm_id;
>>+
>>+	/** @rsvd: Reserved, MBZ */
>>+	__u32 rsvd;
>>+
>>+	/** @start: Virtual Address start to unbind */
>>+	__u64 start;
>>+
>>+	/** @length: Length of mapping to unbind */
>>+	__u64 length;
>>+
>>+	/**
>>+	 * @flags: Currently reserved, MBZ.
>>+	 *
>>+	 * Note that @fence carries its own flags.
>>+	 */
>>+	__u64 flags;
>>+
>>+	/**
>>+	 * @fence: Timeline fence for unbind completion signaling.
>>+	 *
>>+	 * Timeline fence is of format struct drm_i915_gem_timeline_fence.
>>+	 *
>>+	 * It is an out fence, hence using I915_TIMELINE_FENCE_WAIT flag
>>+	 * is invalid, and an error will be returned.
>>+	 *
>>+	 * If I915_TIMELINE_FENCE_SIGNAL flag is not set, then out fence
>>+	 * is not requested and unbinding is completed synchronously.
>>+	 */
>>+	struct drm_i915_gem_timeline_fence fence;
>>+
>>+	/**
>>+	 * @extensions: Zero-terminated chain of extensions.
>>+	 *
>>+	 * For future extensions. See struct i915_user_extension.
>>+	 */
>>+	__u64 extensions;
>>+};
>>+
>>  #if defined(__cplusplus)
>>  }
>>  #endif
>>diff --git a/lib/intel_chipset.h b/lib/intel_chipset.h
>>index d7a6ff190f..7cf8259157 100644
>>--- a/lib/intel_chipset.h
>>+++ b/lib/intel_chipset.h
>>@@ -225,4 +225,6 @@ void intel_check_pch(void);
>>  #define HAS_FLATCCS(devid)	(intel_get_device_info(devid)->has_flatccs)
>>+#define HAS_64K_PAGES(devid)	(IS_DG2(devid))
>
>Can we replace this with region.gtt_alignment (once that gets merged)? 
>So long as we have the region where this is used. Note that is there 
>is also the auto alignment detection in IGT, which maybe could be 
>useful.
>

Sure, will replace once PS64 support gets merged.
I am not sure which auto alignment detection you are talking about,
but in anycase, using region.gtt_alignment will be better I guess.

>I guess not a big deal for now,
>Reviewed-by: Matthew Auld <matthew.auld at intel.com>

Thanks,
Niranjana

>
>>+
>>  #endif /* _INTEL_CHIPSET_H */


More information about the igt-dev mailing list