[Intel-gfx] [RFC v3 3/3] drm/doc/rfc: VM_BIND uapi definition

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Fri Jun 10 10:16:28 UTC 2022


On 09/06/2022 19:53, Niranjana Vishwanathapura wrote:
> On Thu, Jun 09, 2022 at 09:36:48AM +0100, Matthew Auld wrote:
>> On 08/06/2022 22:32, Niranjana Vishwanathapura wrote:
>>> On Wed, Jun 08, 2022 at 10:12:05AM +0100, Matthew Auld wrote:
>>>> On 08/06/2022 08:17, Tvrtko Ursulin wrote:
>>>>>
>>>>> On 07/06/2022 20:37, Niranjana Vishwanathapura wrote:
>>>>>> On Tue, Jun 07, 2022 at 11:27:14AM +0100, Tvrtko Ursulin wrote:
>>>>>>>
>>>>>>> On 17/05/2022 19:32, Niranjana Vishwanathapura wrote:
>>>>>>>> VM_BIND and related uapi definitions
>>>>>>>>
>>>>>>>> v2: Ensure proper kernel-doc formatting with cross references.
>>>>>>>>     Also add new uapi and documentation as per review comments
>>>>>>>>     from Daniel.
>>>>>>>>
>>>>>>>> Signed-off-by: Niranjana Vishwanathapura 
>>>>>>>> <niranjana.vishwanathapura at intel.com>
>>>>>>>> ---
>>>>>>>>  Documentation/gpu/rfc/i915_vm_bind.h | 399 
>>>>>>>> +++++++++++++++++++++++++++
>>>>>>>>  1 file changed, 399 insertions(+)
>>>>>>>>  create mode 100644 Documentation/gpu/rfc/i915_vm_bind.h
>>>>>>>>
>>>>>>>> diff --git a/Documentation/gpu/rfc/i915_vm_bind.h 
>>>>>>>> b/Documentation/gpu/rfc/i915_vm_bind.h
>>>>>>>> new file mode 100644
>>>>>>>> index 000000000000..589c0a009107
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/Documentation/gpu/rfc/i915_vm_bind.h
>>>>>>>> @@ -0,0 +1,399 @@
>>>>>>>> +/* SPDX-License-Identifier: MIT */
>>>>>>>> +/*
>>>>>>>> + * Copyright © 2022 Intel Corporation
>>>>>>>> + */
>>>>>>>> +
>>>>>>>> +/**
>>>>>>>> + * DOC: I915_PARAM_HAS_VM_BIND
>>>>>>>> + *
>>>>>>>> + * VM_BIND feature availability.
>>>>>>>> + * See typedef drm_i915_getparam_t param.
>>>>>>>> + */
>>>>>>>> +#define I915_PARAM_HAS_VM_BIND        57
>>>>>>>> +
>>>>>>>> +/**
>>>>>>>> + * DOC: I915_VM_CREATE_FLAGS_USE_VM_BIND
>>>>>>>> + *
>>>>>>>> + * Flag to opt-in for VM_BIND mode of binding during VM creation.
>>>>>>>> + * See struct drm_i915_gem_vm_control flags.
>>>>>>>> + *
>>>>>>>> + * A VM in VM_BIND mode will not support the older execbuff 
>>>>>>>> mode of binding.
>>>>>>>> + * In VM_BIND mode, execbuff ioctl will not accept any execlist 
>>>>>>>> (ie., the
>>>>>>>> + * &drm_i915_gem_execbuffer2.buffer_count must be 0).
>>>>>>>> + * Also, &drm_i915_gem_execbuffer2.batch_start_offset and
>>>>>>>> + * &drm_i915_gem_execbuffer2.batch_len must be 0.
>>>>>>>> + * DRM_I915_GEM_EXECBUFFER_EXT_BATCH_ADDRESSES extension must 
>>>>>>>> be provided
>>>>>>>> + * to pass in the batch buffer addresses.
>>>>>>>> + *
>>>>>>>> + * Additionally, I915_EXEC_NO_RELOC, I915_EXEC_HANDLE_LUT and
>>>>>>>> + * I915_EXEC_BATCH_FIRST of &drm_i915_gem_execbuffer2.flags 
>>>>>>>> must be 0
>>>>>>>> + * (not used) in VM_BIND mode. I915_EXEC_USE_EXTENSIONS flag 
>>>>>>>> must always be
>>>>>>>> + * set (See struct drm_i915_gem_execbuffer_ext_batch_addresses).
>>>>>>>> + * The buffers_ptr, buffer_count, batch_start_offset and 
>>>>>>>> batch_len fields
>>>>>>>> + * of struct drm_i915_gem_execbuffer2 are also not used and 
>>>>>>>> must be 0.
>>>>>>>> + */
>>>>>>>> +#define I915_VM_CREATE_FLAGS_USE_VM_BIND    (1 << 0)
>>>>>>>> +
>>>>>>>> +/**
>>>>>>>> + * DOC: I915_CONTEXT_CREATE_FLAGS_LONG_RUNNING
>>>>>>>> + *
>>>>>>>> + * Flag to declare context as long running.
>>>>>>>> + * See struct drm_i915_gem_context_create_ext flags.
>>>>>>>> + *
>>>>>>>> + * Usage of dma-fence expects that they complete in reasonable 
>>>>>>>> amount of time.
>>>>>>>> + * Compute on the other hand can be long running. Hence it is 
>>>>>>>> not appropriate
>>>>>>>> + * for compute contexts to export request completion dma-fence 
>>>>>>>> to user.
>>>>>>>> + * The dma-fence usage will be limited to in-kernel consumption 
>>>>>>>> only.
>>>>>>>> + * Compute contexts need to use user/memory fence.
>>>>>>>> + *
>>>>>>>> + * So, long running contexts do not support output fences. Hence,
>>>>>>>> + * I915_EXEC_FENCE_OUT (See &drm_i915_gem_execbuffer2.flags and
>>>>>>>> + * I915_EXEC_FENCE_SIGNAL (See &drm_i915_gem_exec_fence.flags) 
>>>>>>>> are expected
>>>>>>>> + * to be not used.
>>>>>>>> + *
>>>>>>>> + * DRM_I915_GEM_WAIT ioctl call is also not supported for 
>>>>>>>> objects mapped
>>>>>>>> + * to long running contexts.
>>>>>>>> + */
>>>>>>>> +#define I915_CONTEXT_CREATE_FLAGS_LONG_RUNNING   (1u << 2)
>>>>>>>> +
>>>>>>>> +/* VM_BIND related ioctls */
>>>>>>>> +#define DRM_I915_GEM_VM_BIND        0x3d
>>>>>>>> +#define DRM_I915_GEM_VM_UNBIND        0x3e
>>>>>>>> +#define DRM_I915_GEM_WAIT_USER_FENCE    0x3f
>>>>>>>> +
>>>>>>>> +#define DRM_IOCTL_I915_GEM_VM_BIND DRM_IOWR(DRM_COMMAND_BASE + 
>>>>>>>> DRM_I915_GEM_VM_BIND, struct drm_i915_gem_vm_bind)
>>>>>>>> +#define DRM_IOCTL_I915_GEM_VM_UNBIND DRM_IOWR(DRM_COMMAND_BASE 
>>>>>>>> + DRM_I915_GEM_VM_UNBIND, struct drm_i915_gem_vm_bind)
>>>>>>>> +#define DRM_IOCTL_I915_GEM_WAIT_USER_FENCE 
>>>>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT_USER_FENCE, struct 
>>>>>>>> drm_i915_gem_wait_user_fence)
>>>>>>>> +
>>>>>>>> +/**
>>>>>>>> + * struct drm_i915_gem_vm_bind - VA to object mapping to bind.
>>>>>>>> + *
>>>>>>>> + * This structure is passed to VM_BIND ioctl and specifies the 
>>>>>>>> mapping of GPU
>>>>>>>> + * virtual address (VA) range to the section of an object that 
>>>>>>>> should be bound
>>>>>>>> + * in the device page table of the specified address space (VM).
>>>>>>>> + * The VA range specified must be unique (ie., not currently 
>>>>>>>> bound) and can
>>>>>>>> + * be mapped to whole object or a section of the object 
>>>>>>>> (partial binding).
>>>>>>>> + * Multiple VA mappings can be created to the same section of 
>>>>>>>> the object
>>>>>>>> + * (aliasing).
>>>>>>>> + */
>>>>>>>> +struct drm_i915_gem_vm_bind {
>>>>>>>> +    /** @vm_id: VM (address space) id to bind */
>>>>>>>> +    __u32 vm_id;
>>>>>>>> +
>>>>>>>> +    /** @handle: Object handle */
>>>>>>>> +    __u32 handle;
>>>>>>>> +
>>>>>>>> +    /** @start: Virtual Address start to bind */
>>>>>>>> +    __u64 start;
>>>>>>>> +
>>>>>>>> +    /** @offset: Offset in object to bind */
>>>>>>>> +    __u64 offset;
>>>>>>>> +
>>>>>>>> +    /** @length: Length of mapping to bind */
>>>>>>>> +    __u64 length;
>>>>>>>
>>>>>>> Does it support, or should it, equivalent of 
>>>>>>> EXEC_OBJECT_PAD_TO_SIZE? Or if not userspace is expected to map 
>>>>>>> the remainder of the space to a dummy object? In which case would 
>>>>>>> there be any alignment/padding issues preventing the two bind to 
>>>>>>> be placed next to each other?
>>>>>>>
>>>>>>> I ask because someone from the compute side asked me about a 
>>>>>>> problem with their strategy of dealing with overfetch and I 
>>>>>>> suggested pad to size.
>>>>>>>
>>>>>>
>>>>>> Thanks Tvrtko,
>>>>>> I think we shouldn't be needing it. As with VM_BIND VA assignment
>>>>>> is completely pushed to userspace, no padding should be necessary
>>>>>> once the 'start' and 'size' alignment conditions are met.
>>>>>>
>>>>>> I will add some documentation on alignment requirement here.
>>>>>> Generally, 'start' and 'size' should be 4K aligned. But, I think
>>>>>> when we have 64K lmem page sizes (dg2 and xehpsdv), they need to
>>>>>> be 64K aligned.
>>>>>
>>>>> + Matt
>>>>>
>>>>> Align to 64k is enough for all overfetch issues?
>>>>>
>>>>> Apparently compute has a situation where a buffer is received by 
>>>>> one component and another has to apply more alignment to it, to 
>>>>> deal with overfetch. Since they cannot grow the actual BO if they 
>>>>> wanted to VM_BIND a scratch area on top? Or perhaps none of this is 
>>>>> a problem on discrete and original BO should be correctly allocated 
>>>>> to start with.
>>>>>
>>>>> Side question - what about the align to 2MiB mentioned in 
>>>>> i915_vma_insert to avoid mixing 4k and 64k PTEs? That does not 
>>>>> apply to discrete?
>>>>
>>>> Not sure about the overfetch thing, but yeah dg2 & xehpsdv both 
>>>> require a minimum of 64K pages underneath for local memory, and the 
>>>> BO size will also be rounded up accordingly. And yeah the 
>>>> complication arises due to not being able to mix 4K + 64K GTT pages 
>>>> within the same page-table (existed since even gen8). Note that 4K 
>>>> here is what we typically get for system memory.
>>>>
>>>> Originally we had a memory coloring scheme to track the "color" of 
>>>> each page-table, which basically ensures that userspace can't do 
>>>> something nasty like mixing page sizes. The advantage of that scheme 
>>>> is that we would only require 64K GTT alignment and no extra 
>>>> padding, but is perhaps a little complex.
>>>>
>>>> The merged solution is just to align and pad (i.e vma->node.size and 
>>>> not vma->size) out of the vma to 2M, which is dead simple 
>>>> implementation wise, but does potentially waste some GTT space and 
>>>> some of the local memory used for the actual page-table. For the 
>>>> alignment the kernel just validates that the GTT address is aligned 
>>>> to 2M in vma_insert(), and then for the padding it just inflates it 
>>>> to 2M, if userspace hasn't already.
>>>>
>>>> See the kernel-doc for @size: 
>>>> https://dri.freedesktop.org/docs/drm/gpu/driver-uapi.html?#c.drm_i915_gem_create_ext 
>>>>
>>>>
>>>>
>>>
>>> Ok, those requirements (2M VA alignment) will apply to VM_BIND also.
>>> This is unfortunate, but it is not something new enforced by VM_BIND.
>>> Other option is to go with 64K alignment and in VM_BIND case, user
>>> must ensure there is no mix-matching of 64K (lmem) and 4k (smem)
>>> mappings in the same 2M range. But this is not VM_BIND specific
>>> (will apply to soft-pinning in execbuf2 also).
>>>
>>> I don't think we need any VA padding here as with VM_BIND VA is
>>> managed fully by the user. If we enforce VA to be 2M aligned, it
>>> will leave holes (if BOs are smaller then 2M), but nobody is going
>>> to allocate anything form there.
>>
>> Note that we only apply the 2M alignment + padding for local memory 
>> pages, for system memory we don't have/need such restrictions. The VA 
>> padding then importantly prevents userspace from incorrectly (or 
>> maliciously) inserting 4K system memory object in some page-table 
>> operating in 64K GTT mode.
>>
> 
> Thanks Matt.
> I also, syned offline with Matt a bit on this.
> We don't need explicit 'pad_to_size' size. i915 driver is implicitly
> padding the size to 2M boundary for LMEM BOs which will apply for
> VM_BIND also.
> The remaining question is whether we enforce 2M VA alignment for
> lmem BOs (just like legacy execbuff path) on dg2 & xehpsdv, or go with
> just 64K alignment but ensure there is no mixing of 4K and 64K

"Driver is implicitly padding the size to 2MB boundary" - this is the 
backing store?

> mappings in same 2M range. I think we can go with 2M alignment
> requirement for VM_BIND also. So, no new requirements here for VM_BIND.

Are there any considerations here of letting the userspace know? 
Presumably userspace allocator has to know or it would try to ask for 
impossible addresses.

Regards,

Tvrtko

> 
> I will update the documentation.
> 
> Niranjana
> 
>>>
>>> Niranjana
>>>
>>>>>
>>>>> Regards,
>>>>>
>>>>> Tvrtko
>>>>>
>>>>>>
>>>>>> Niranjana
>>>>>>
>>>>>>> Regards,
>>>>>>>
>>>>>>> Tvrtko
>>>>>>>
>>>>>>>> +
>>>>>>>> +    /**
>>>>>>>> +     * @flags: Supported flags are,
>>>>>>>> +     *
>>>>>>>> +     * I915_GEM_VM_BIND_READONLY:
>>>>>>>> +     * Mapping is read-only.
>>>>>>>> +     *
>>>>>>>> +     * I915_GEM_VM_BIND_CAPTURE:
>>>>>>>> +     * Capture this mapping in the dump upon GPU error.
>>>>>>>> +     */
>>>>>>>> +    __u64 flags;
>>>>>>>> +#define I915_GEM_VM_BIND_READONLY    (1 << 0)
>>>>>>>> +#define I915_GEM_VM_BIND_CAPTURE     (1 << 1)
>>>>>>>> +
>>>>>>>> +    /** @extensions: 0-terminated chain of extensions for this 
>>>>>>>> mapping. */
>>>>>>>> +    __u64 extensions;
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +/**
>>>>>>>> + * struct drm_i915_gem_vm_unbind - VA to object mapping to unbind.
>>>>>>>> + *
>>>>>>>> + * This structure is passed to VM_UNBIND ioctl and specifies 
>>>>>>>> the GPU virtual
>>>>>>>> + * address (VA) range that should be unbound from the device 
>>>>>>>> page table of the
>>>>>>>> + * specified address space (VM). The specified VA range must 
>>>>>>>> match one of the
>>>>>>>> + * mappings created with the VM_BIND ioctl. TLB is flushed upon 
>>>>>>>> unbind
>>>>>>>> + * completion.
>>>>>>>> + */
>>>>>>>> +struct drm_i915_gem_vm_unbind {
>>>>>>>> +    /** @vm_id: VM (address space) id to bind */
>>>>>>>> +    __u32 vm_id;
>>>>>>>> +
>>>>>>>> +    /** @rsvd: Reserved for future use; must be zero. */
>>>>>>>> +    __u32 rsvd;
>>>>>>>> +
>>>>>>>> +    /** @start: Virtual Address start to unbind */
>>>>>>>> +    __u64 start;
>>>>>>>> +
>>>>>>>> +    /** @length: Length of mapping to unbind */
>>>>>>>> +    __u64 length;
>>>>>>>> +
>>>>>>>> +    /** @flags: reserved for future usage, currently MBZ */
>>>>>>>> +    __u64 flags;
>>>>>>>> +
>>>>>>>> +    /** @extensions: 0-terminated chain of extensions for this 
>>>>>>>> mapping. */
>>>>>>>> +    __u64 extensions;
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +/**
>>>>>>>> + * struct drm_i915_vm_bind_fence - An input or output fence for 
>>>>>>>> the vm_bind
>>>>>>>> + * or the vm_unbind work.
>>>>>>>> + *
>>>>>>>> + * The vm_bind or vm_unbind aync worker will wait for input 
>>>>>>>> fence to signal
>>>>>>>> + * before starting the binding or unbinding.
>>>>>>>> + *
>>>>>>>> + * The vm_bind or vm_unbind async worker will signal the 
>>>>>>>> returned output fence
>>>>>>>> + * after the completion of binding or unbinding.
>>>>>>>> + */
>>>>>>>> +struct drm_i915_vm_bind_fence {
>>>>>>>> +    /** @handle: User's handle for a drm_syncobj to wait on or 
>>>>>>>> signal. */
>>>>>>>> +    __u32 handle;
>>>>>>>> +
>>>>>>>> +    /**
>>>>>>>> +     * @flags: Supported flags are,
>>>>>>>> +     *
>>>>>>>> +     * I915_VM_BIND_FENCE_WAIT:
>>>>>>>> +     * Wait for the input fence before binding/unbinding
>>>>>>>> +     *
>>>>>>>> +     * I915_VM_BIND_FENCE_SIGNAL:
>>>>>>>> +     * Return bind/unbind completion fence as output
>>>>>>>> +     */
>>>>>>>> +    __u32 flags;
>>>>>>>> +#define I915_VM_BIND_FENCE_WAIT            (1<<0)
>>>>>>>> +#define I915_VM_BIND_FENCE_SIGNAL          (1<<1)
>>>>>>>> +#define __I915_VM_BIND_FENCE_UNKNOWN_FLAGS 
>>>>>>>> (-(I915_VM_BIND_FENCE_SIGNAL << 1))
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +/**
>>>>>>>> + * struct drm_i915_vm_bind_ext_timeline_fences - Timeline 
>>>>>>>> fences for vm_bind
>>>>>>>> + * and vm_unbind.
>>>>>>>> + *
>>>>>>>> + * This structure describes an array of timeline drm_syncobj 
>>>>>>>> and associated
>>>>>>>> + * points for timeline variants of drm_syncobj. These timeline 
>>>>>>>> 'drm_syncobj's
>>>>>>>> + * can be input or output fences (See struct 
>>>>>>>> drm_i915_vm_bind_fence).
>>>>>>>> + */
>>>>>>>> +struct drm_i915_vm_bind_ext_timeline_fences {
>>>>>>>> +#define I915_VM_BIND_EXT_timeline_FENCES    0
>>>>>>>> +    /** @base: Extension link. See struct i915_user_extension. */
>>>>>>>> +    struct i915_user_extension base;
>>>>>>>> +
>>>>>>>> +    /**
>>>>>>>> +     * @fence_count: Number of elements in the @handles_ptr & 
>>>>>>>> @value_ptr
>>>>>>>> +     * arrays.
>>>>>>>> +     */
>>>>>>>> +    __u64 fence_count;
>>>>>>>> +
>>>>>>>> +    /**
>>>>>>>> +     * @handles_ptr: Pointer to an array of struct 
>>>>>>>> drm_i915_vm_bind_fence
>>>>>>>> +     * of length @fence_count.
>>>>>>>> +     */
>>>>>>>> +    __u64 handles_ptr;
>>>>>>>> +
>>>>>>>> +    /**
>>>>>>>> +     * @values_ptr: Pointer to an array of u64 values of length
>>>>>>>> +     * @fence_count.
>>>>>>>> +     * Values must be 0 for a binary drm_syncobj. A Value of 0 
>>>>>>>> for a
>>>>>>>> +     * timeline drm_syncobj is invalid as it turns a 
>>>>>>>> drm_syncobj into a
>>>>>>>> +     * binary one.
>>>>>>>> +     */
>>>>>>>> +    __u64 values_ptr;
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +/**
>>>>>>>> + * struct drm_i915_vm_bind_user_fence - An input or output user 
>>>>>>>> fence for the
>>>>>>>> + * vm_bind or the vm_unbind work.
>>>>>>>> + *
>>>>>>>> + * The vm_bind or vm_unbind aync worker will wait for the input 
>>>>>>>> fence (value at
>>>>>>>> + * @addr to become equal to @val) before starting the binding 
>>>>>>>> or unbinding.
>>>>>>>> + *
>>>>>>>> + * The vm_bind or vm_unbind async worker will signal the output 
>>>>>>>> fence after
>>>>>>>> + * the completion of binding or unbinding by writing @val to 
>>>>>>>> memory location at
>>>>>>>> + * @addr
>>>>>>>> + */
>>>>>>>> +struct drm_i915_vm_bind_user_fence {
>>>>>>>> +    /** @addr: User/Memory fence qword aligned process virtual 
>>>>>>>> address */
>>>>>>>> +    __u64 addr;
>>>>>>>> +
>>>>>>>> +    /** @val: User/Memory fence value to be written after bind 
>>>>>>>> completion */
>>>>>>>> +    __u64 val;
>>>>>>>> +
>>>>>>>> +    /**
>>>>>>>> +     * @flags: Supported flags are,
>>>>>>>> +     *
>>>>>>>> +     * I915_VM_BIND_USER_FENCE_WAIT:
>>>>>>>> +     * Wait for the input fence before binding/unbinding
>>>>>>>> +     *
>>>>>>>> +     * I915_VM_BIND_USER_FENCE_SIGNAL:
>>>>>>>> +     * Return bind/unbind completion fence as output
>>>>>>>> +     */
>>>>>>>> +    __u32 flags;
>>>>>>>> +#define I915_VM_BIND_USER_FENCE_WAIT            (1<<0)
>>>>>>>> +#define I915_VM_BIND_USER_FENCE_SIGNAL          (1<<1)
>>>>>>>> +#define __I915_VM_BIND_USER_FENCE_UNKNOWN_FLAGS \
>>>>>>>> +    (-(I915_VM_BIND_USER_FENCE_SIGNAL << 1))
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +/**
>>>>>>>> + * struct drm_i915_vm_bind_ext_user_fence - User/memory fences 
>>>>>>>> for vm_bind
>>>>>>>> + * and vm_unbind.
>>>>>>>> + *
>>>>>>>> + * These user fences can be input or output fences
>>>>>>>> + * (See struct drm_i915_vm_bind_user_fence).
>>>>>>>> + */
>>>>>>>> +struct drm_i915_vm_bind_ext_user_fence {
>>>>>>>> +#define I915_VM_BIND_EXT_USER_FENCES    1
>>>>>>>> +    /** @base: Extension link. See struct i915_user_extension. */
>>>>>>>> +    struct i915_user_extension base;
>>>>>>>> +
>>>>>>>> +    /** @fence_count: Number of elements in the @user_fence_ptr 
>>>>>>>> array. */
>>>>>>>> +    __u64 fence_count;
>>>>>>>> +
>>>>>>>> +    /**
>>>>>>>> +     * @user_fence_ptr: Pointer to an array of
>>>>>>>> +     * struct drm_i915_vm_bind_user_fence of length @fence_count.
>>>>>>>> +     */
>>>>>>>> +    __u64 user_fence_ptr;
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +/**
>>>>>>>> + * struct drm_i915_gem_execbuffer_ext_batch_addresses - Array 
>>>>>>>> of batch buffer
>>>>>>>> + * gpu virtual addresses.
>>>>>>>> + *
>>>>>>>> + * In the execbuff ioctl (See struct drm_i915_gem_execbuffer2), 
>>>>>>>> this extension
>>>>>>>> + * must always be appended in the VM_BIND mode and it will be 
>>>>>>>> an error to
>>>>>>>> + * append this extension in older non-VM_BIND mode.
>>>>>>>> + */
>>>>>>>> +struct drm_i915_gem_execbuffer_ext_batch_addresses {
>>>>>>>> +#define DRM_I915_GEM_EXECBUFFER_EXT_BATCH_ADDRESSES    1
>>>>>>>> +    /** @base: Extension link. See struct i915_user_extension. */
>>>>>>>> +    struct i915_user_extension base;
>>>>>>>> +
>>>>>>>> +    /** @count: Number of addresses in the addr array. */
>>>>>>>> +    __u32 count;
>>>>>>>> +
>>>>>>>> +    /** @addr: An array of batch gpu virtual addresses. */
>>>>>>>> +    __u64 addr[0];
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +/**
>>>>>>>> + * struct drm_i915_gem_execbuffer_ext_user_fence - First level 
>>>>>>>> batch completion
>>>>>>>> + * signaling extension.
>>>>>>>> + *
>>>>>>>> + * This extension allows user to attach a user fence (@addr, 
>>>>>>>> @value pair) to an
>>>>>>>> + * execbuf to be signaled by the command streamer after the 
>>>>>>>> completion of first
>>>>>>>> + * level batch, by writing the @value at specified @addr and 
>>>>>>>> triggering an
>>>>>>>> + * interrupt.
>>>>>>>> + * User can either poll for this user fence to signal or can 
>>>>>>>> also wait on it
>>>>>>>> + * with i915_gem_wait_user_fence ioctl.
>>>>>>>> + * This is very much usefaul for long running contexts where 
>>>>>>>> waiting on dma-fence
>>>>>>>> + * by user (like i915_gem_wait ioctl) is not supported.
>>>>>>>> + */
>>>>>>>> +struct drm_i915_gem_execbuffer_ext_user_fence {
>>>>>>>> +#define DRM_I915_GEM_EXECBUFFER_EXT_USER_FENCE        2
>>>>>>>> +    /** @base: Extension link. See struct i915_user_extension. */
>>>>>>>> +    struct i915_user_extension base;
>>>>>>>> +
>>>>>>>> +    /**
>>>>>>>> +     * @addr: User/Memory fence qword aligned GPU virtual address.
>>>>>>>> +     *
>>>>>>>> +     * Address has to be a valid GPU virtual address at the 
>>>>>>>> time of
>>>>>>>> +     * first level batch completion.
>>>>>>>> +     */
>>>>>>>> +    __u64 addr;
>>>>>>>> +
>>>>>>>> +    /**
>>>>>>>> +     * @value: User/Memory fence Value to be written to above 
>>>>>>>> address
>>>>>>>> +     * after first level batch completes.
>>>>>>>> +     */
>>>>>>>> +    __u64 value;
>>>>>>>> +
>>>>>>>> +    /** @rsvd: Reserved for future extensions, MBZ */
>>>>>>>> +    __u64 rsvd;
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +/**
>>>>>>>> + * struct drm_i915_gem_create_ext_vm_private - Extension to 
>>>>>>>> make the object
>>>>>>>> + * private to the specified VM.
>>>>>>>> + *
>>>>>>>> + * See struct drm_i915_gem_create_ext.
>>>>>>>> + */
>>>>>>>> +struct drm_i915_gem_create_ext_vm_private {
>>>>>>>> +#define I915_GEM_CREATE_EXT_VM_PRIVATE        2
>>>>>>>> +    /** @base: Extension link. See struct i915_user_extension. */
>>>>>>>> +    struct i915_user_extension base;
>>>>>>>> +
>>>>>>>> +    /** @vm_id: Id of the VM to which the object is private */
>>>>>>>> +    __u32 vm_id;
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +/**
>>>>>>>> + * struct drm_i915_gem_wait_user_fence - Wait on user/memory 
>>>>>>>> fence.
>>>>>>>> + *
>>>>>>>> + * User/Memory fence can be woken up either by:
>>>>>>>> + *
>>>>>>>> + * 1. GPU context indicated by @ctx_id, or,
>>>>>>>> + * 2. Kerrnel driver async worker upon I915_UFENCE_WAIT_SOFT.
>>>>>>>> + *    @ctx_id is ignored when this flag is set.
>>>>>>>> + *
>>>>>>>> + * Wakeup condition is,
>>>>>>>> + * ``((*addr & mask) op (value & mask))``
>>>>>>>> + *
>>>>>>>> + * See :ref:`Documentation/driver-api/dma-buf.rst 
>>>>>>>> <indefinite_dma_fences>`
>>>>>>>> + */
>>>>>>>> +struct drm_i915_gem_wait_user_fence {
>>>>>>>> +    /** @extensions: Zero-terminated chain of extensions. */
>>>>>>>> +    __u64 extensions;
>>>>>>>> +
>>>>>>>> +    /** @addr: User/Memory fence address */
>>>>>>>> +    __u64 addr;
>>>>>>>> +
>>>>>>>> +    /** @ctx_id: Id of the Context which will signal the fence. */
>>>>>>>> +    __u32 ctx_id;
>>>>>>>> +
>>>>>>>> +    /** @op: Wakeup condition operator */
>>>>>>>> +    __u16 op;
>>>>>>>> +#define I915_UFENCE_WAIT_EQ      0
>>>>>>>> +#define I915_UFENCE_WAIT_NEQ     1
>>>>>>>> +#define I915_UFENCE_WAIT_GT      2
>>>>>>>> +#define I915_UFENCE_WAIT_GTE     3
>>>>>>>> +#define I915_UFENCE_WAIT_LT      4
>>>>>>>> +#define I915_UFENCE_WAIT_LTE     5
>>>>>>>> +#define I915_UFENCE_WAIT_BEFORE  6
>>>>>>>> +#define I915_UFENCE_WAIT_AFTER   7
>>>>>>>> +
>>>>>>>> +    /**
>>>>>>>> +     * @flags: Supported flags are,
>>>>>>>> +     *
>>>>>>>> +     * I915_UFENCE_WAIT_SOFT:
>>>>>>>> +     *
>>>>>>>> +     * To be woken up by i915 driver async worker (not by GPU).
>>>>>>>> +     *
>>>>>>>> +     * I915_UFENCE_WAIT_ABSTIME:
>>>>>>>> +     *
>>>>>>>> +     * Wait timeout specified as absolute time.
>>>>>>>> +     */
>>>>>>>> +    __u16 flags;
>>>>>>>> +#define I915_UFENCE_WAIT_SOFT    0x1
>>>>>>>> +#define I915_UFENCE_WAIT_ABSTIME 0x2
>>>>>>>> +
>>>>>>>> +    /** @value: Wakeup value */
>>>>>>>> +    __u64 value;
>>>>>>>> +
>>>>>>>> +    /** @mask: Wakeup mask */
>>>>>>>> +    __u64 mask;
>>>>>>>> +#define I915_UFENCE_WAIT_U8     0xffu
>>>>>>>> +#define I915_UFENCE_WAIT_U16    0xffffu
>>>>>>>> +#define I915_UFENCE_WAIT_U32    0xfffffffful
>>>>>>>> +#define I915_UFENCE_WAIT_U64    0xffffffffffffffffull
>>>>>>>> +
>>>>>>>> +    /**
>>>>>>>> +     * @timeout: Wait timeout in nanoseconds.
>>>>>>>> +     *
>>>>>>>> +     * If I915_UFENCE_WAIT_ABSTIME flag is set, then time 
>>>>>>>> timeout is the
>>>>>>>> +     * absolute time in nsec.
>>>>>>>> +     */
>>>>>>>> +    __s64 timeout;
>>>>>>>> +};


More information about the Intel-gfx mailing list