[Intel-gfx] [RFC v3 3/3] drm/doc/rfc: VM_BIND uapi definition

Matthew Auld matthew.auld at intel.com
Fri Jun 10 10:32:14 UTC 2022


On 10/06/2022 11:16, Tvrtko Ursulin wrote:
> 
> On 09/06/2022 19:53, Niranjana Vishwanathapura wrote:
>> On Thu, Jun 09, 2022 at 09:36:48AM +0100, Matthew Auld wrote:
>>> On 08/06/2022 22:32, Niranjana Vishwanathapura wrote:
>>>> On Wed, Jun 08, 2022 at 10:12:05AM +0100, Matthew Auld wrote:
>>>>> On 08/06/2022 08:17, Tvrtko Ursulin wrote:
>>>>>>
>>>>>> On 07/06/2022 20:37, Niranjana Vishwanathapura wrote:
>>>>>>> On Tue, Jun 07, 2022 at 11:27:14AM +0100, Tvrtko Ursulin wrote:
>>>>>>>>
>>>>>>>> On 17/05/2022 19:32, Niranjana Vishwanathapura wrote:
>>>>>>>>> VM_BIND and related uapi definitions
>>>>>>>>>
>>>>>>>>> v2: Ensure proper kernel-doc formatting with cross references.
>>>>>>>>>     Also add new uapi and documentation as per review comments
>>>>>>>>>     from Daniel.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Niranjana Vishwanathapura 
>>>>>>>>> <niranjana.vishwanathapura at intel.com>
>>>>>>>>> ---
>>>>>>>>>  Documentation/gpu/rfc/i915_vm_bind.h | 399 
>>>>>>>>> +++++++++++++++++++++++++++
>>>>>>>>>  1 file changed, 399 insertions(+)
>>>>>>>>>  create mode 100644 Documentation/gpu/rfc/i915_vm_bind.h
>>>>>>>>>
>>>>>>>>> diff --git a/Documentation/gpu/rfc/i915_vm_bind.h 
>>>>>>>>> b/Documentation/gpu/rfc/i915_vm_bind.h
>>>>>>>>> new file mode 100644
>>>>>>>>> index 000000000000..589c0a009107
>>>>>>>>> --- /dev/null
>>>>>>>>> +++ b/Documentation/gpu/rfc/i915_vm_bind.h
>>>>>>>>> @@ -0,0 +1,399 @@
>>>>>>>>> +/* SPDX-License-Identifier: MIT */
>>>>>>>>> +/*
>>>>>>>>> + * Copyright © 2022 Intel Corporation
>>>>>>>>> + */
>>>>>>>>> +
>>>>>>>>> +/**
>>>>>>>>> + * DOC: I915_PARAM_HAS_VM_BIND
>>>>>>>>> + *
>>>>>>>>> + * VM_BIND feature availability.
>>>>>>>>> + * See typedef drm_i915_getparam_t param.
>>>>>>>>> + */
>>>>>>>>> +#define I915_PARAM_HAS_VM_BIND        57
>>>>>>>>> +
>>>>>>>>> +/**
>>>>>>>>> + * DOC: I915_VM_CREATE_FLAGS_USE_VM_BIND
>>>>>>>>> + *
>>>>>>>>> + * Flag to opt-in for VM_BIND mode of binding during VM creation.
>>>>>>>>> + * See struct drm_i915_gem_vm_control flags.
>>>>>>>>> + *
>>>>>>>>> + * A VM in VM_BIND mode will not support the older execbuff 
>>>>>>>>> mode of binding.
>>>>>>>>> + * In VM_BIND mode, execbuff ioctl will not accept any 
>>>>>>>>> execlist (ie., the
>>>>>>>>> + * &drm_i915_gem_execbuffer2.buffer_count must be 0).
>>>>>>>>> + * Also, &drm_i915_gem_execbuffer2.batch_start_offset and
>>>>>>>>> + * &drm_i915_gem_execbuffer2.batch_len must be 0.
>>>>>>>>> + * DRM_I915_GEM_EXECBUFFER_EXT_BATCH_ADDRESSES extension must 
>>>>>>>>> be provided
>>>>>>>>> + * to pass in the batch buffer addresses.
>>>>>>>>> + *
>>>>>>>>> + * Additionally, I915_EXEC_NO_RELOC, I915_EXEC_HANDLE_LUT and
>>>>>>>>> + * I915_EXEC_BATCH_FIRST of &drm_i915_gem_execbuffer2.flags 
>>>>>>>>> must be 0
>>>>>>>>> + * (not used) in VM_BIND mode. I915_EXEC_USE_EXTENSIONS flag 
>>>>>>>>> must always be
>>>>>>>>> + * set (See struct drm_i915_gem_execbuffer_ext_batch_addresses).
>>>>>>>>> + * The buffers_ptr, buffer_count, batch_start_offset and 
>>>>>>>>> batch_len fields
>>>>>>>>> + * of struct drm_i915_gem_execbuffer2 are also not used and 
>>>>>>>>> must be 0.
>>>>>>>>> + */
>>>>>>>>> +#define I915_VM_CREATE_FLAGS_USE_VM_BIND    (1 << 0)
>>>>>>>>> +
>>>>>>>>> +/**
>>>>>>>>> + * DOC: I915_CONTEXT_CREATE_FLAGS_LONG_RUNNING
>>>>>>>>> + *
>>>>>>>>> + * Flag to declare context as long running.
>>>>>>>>> + * See struct drm_i915_gem_context_create_ext flags.
>>>>>>>>> + *
>>>>>>>>> + * Usage of dma-fence expects that they complete in reasonable 
>>>>>>>>> amount of time.
>>>>>>>>> + * Compute on the other hand can be long running. Hence it is 
>>>>>>>>> not appropriate
>>>>>>>>> + * for compute contexts to export request completion dma-fence 
>>>>>>>>> to user.
>>>>>>>>> + * The dma-fence usage will be limited to in-kernel 
>>>>>>>>> consumption only.
>>>>>>>>> + * Compute contexts need to use user/memory fence.
>>>>>>>>> + *
>>>>>>>>> + * So, long running contexts do not support output fences. Hence,
>>>>>>>>> + * I915_EXEC_FENCE_OUT (See &drm_i915_gem_execbuffer2.flags and
>>>>>>>>> + * I915_EXEC_FENCE_SIGNAL (See &drm_i915_gem_exec_fence.flags) 
>>>>>>>>> are expected
>>>>>>>>> + * to be not used.
>>>>>>>>> + *
>>>>>>>>> + * DRM_I915_GEM_WAIT ioctl call is also not supported for 
>>>>>>>>> objects mapped
>>>>>>>>> + * to long running contexts.
>>>>>>>>> + */
>>>>>>>>> +#define I915_CONTEXT_CREATE_FLAGS_LONG_RUNNING   (1u << 2)
>>>>>>>>> +
>>>>>>>>> +/* VM_BIND related ioctls */
>>>>>>>>> +#define DRM_I915_GEM_VM_BIND        0x3d
>>>>>>>>> +#define DRM_I915_GEM_VM_UNBIND        0x3e
>>>>>>>>> +#define DRM_I915_GEM_WAIT_USER_FENCE    0x3f
>>>>>>>>> +
>>>>>>>>> +#define DRM_IOCTL_I915_GEM_VM_BIND DRM_IOWR(DRM_COMMAND_BASE + 
>>>>>>>>> DRM_I915_GEM_VM_BIND, struct drm_i915_gem_vm_bind)
>>>>>>>>> +#define DRM_IOCTL_I915_GEM_VM_UNBIND DRM_IOWR(DRM_COMMAND_BASE 
>>>>>>>>> + DRM_I915_GEM_VM_UNBIND, struct drm_i915_gem_vm_bind)
>>>>>>>>> +#define DRM_IOCTL_I915_GEM_WAIT_USER_FENCE 
>>>>>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT_USER_FENCE, 
>>>>>>>>> struct drm_i915_gem_wait_user_fence)
>>>>>>>>> +
>>>>>>>>> +/**
>>>>>>>>> + * struct drm_i915_gem_vm_bind - VA to object mapping to bind.
>>>>>>>>> + *
>>>>>>>>> + * This structure is passed to VM_BIND ioctl and specifies the 
>>>>>>>>> mapping of GPU
>>>>>>>>> + * virtual address (VA) range to the section of an object that 
>>>>>>>>> should be bound
>>>>>>>>> + * in the device page table of the specified address space (VM).
>>>>>>>>> + * The VA range specified must be unique (ie., not currently 
>>>>>>>>> bound) and can
>>>>>>>>> + * be mapped to whole object or a section of the object 
>>>>>>>>> (partial binding).
>>>>>>>>> + * Multiple VA mappings can be created to the same section of 
>>>>>>>>> the object
>>>>>>>>> + * (aliasing).
>>>>>>>>> + */
>>>>>>>>> +struct drm_i915_gem_vm_bind {
>>>>>>>>> +    /** @vm_id: VM (address space) id to bind */
>>>>>>>>> +    __u32 vm_id;
>>>>>>>>> +
>>>>>>>>> +    /** @handle: Object handle */
>>>>>>>>> +    __u32 handle;
>>>>>>>>> +
>>>>>>>>> +    /** @start: Virtual Address start to bind */
>>>>>>>>> +    __u64 start;
>>>>>>>>> +
>>>>>>>>> +    /** @offset: Offset in object to bind */
>>>>>>>>> +    __u64 offset;
>>>>>>>>> +
>>>>>>>>> +    /** @length: Length of mapping to bind */
>>>>>>>>> +    __u64 length;
>>>>>>>>
>>>>>>>> Does it support, or should it, equivalent of 
>>>>>>>> EXEC_OBJECT_PAD_TO_SIZE? Or if not userspace is expected to map 
>>>>>>>> the remainder of the space to a dummy object? In which case 
>>>>>>>> would there be any alignment/padding issues preventing the two 
>>>>>>>> bind to be placed next to each other?
>>>>>>>>
>>>>>>>> I ask because someone from the compute side asked me about a 
>>>>>>>> problem with their strategy of dealing with overfetch and I 
>>>>>>>> suggested pad to size.
>>>>>>>>
>>>>>>>
>>>>>>> Thanks Tvrtko,
>>>>>>> I think we shouldn't be needing it. As with VM_BIND VA assignment
>>>>>>> is completely pushed to userspace, no padding should be necessary
>>>>>>> once the 'start' and 'size' alignment conditions are met.
>>>>>>>
>>>>>>> I will add some documentation on alignment requirement here.
>>>>>>> Generally, 'start' and 'size' should be 4K aligned. But, I think
>>>>>>> when we have 64K lmem page sizes (dg2 and xehpsdv), they need to
>>>>>>> be 64K aligned.
>>>>>>
>>>>>> + Matt
>>>>>>
>>>>>> Align to 64k is enough for all overfetch issues?
>>>>>>
>>>>>> Apparently compute has a situation where a buffer is received by 
>>>>>> one component and another has to apply more alignment to it, to 
>>>>>> deal with overfetch. Since they cannot grow the actual BO if they 
>>>>>> wanted to VM_BIND a scratch area on top? Or perhaps none of this 
>>>>>> is a problem on discrete and original BO should be correctly 
>>>>>> allocated to start with.
>>>>>>
>>>>>> Side question - what about the align to 2MiB mentioned in 
>>>>>> i915_vma_insert to avoid mixing 4k and 64k PTEs? That does not 
>>>>>> apply to discrete?
>>>>>
>>>>> Not sure about the overfetch thing, but yeah dg2 & xehpsdv both 
>>>>> require a minimum of 64K pages underneath for local memory, and the 
>>>>> BO size will also be rounded up accordingly. And yeah the 
>>>>> complication arises due to not being able to mix 4K + 64K GTT pages 
>>>>> within the same page-table (existed since even gen8). Note that 4K 
>>>>> here is what we typically get for system memory.
>>>>>
>>>>> Originally we had a memory coloring scheme to track the "color" of 
>>>>> each page-table, which basically ensures that userspace can't do 
>>>>> something nasty like mixing page sizes. The advantage of that 
>>>>> scheme is that we would only require 64K GTT alignment and no extra 
>>>>> padding, but is perhaps a little complex.
>>>>>
>>>>> The merged solution is just to align and pad (i.e vma->node.size 
>>>>> and not vma->size) out of the vma to 2M, which is dead simple 
>>>>> implementation wise, but does potentially waste some GTT space and 
>>>>> some of the local memory used for the actual page-table. For the 
>>>>> alignment the kernel just validates that the GTT address is aligned 
>>>>> to 2M in vma_insert(), and then for the padding it just inflates it 
>>>>> to 2M, if userspace hasn't already.
>>>>>
>>>>> See the kernel-doc for @size: 
>>>>> https://dri.freedesktop.org/docs/drm/gpu/driver-uapi.html?#c.drm_i915_gem_create_ext 
>>>>>
>>>>>
>>>>>
>>>>
>>>> Ok, those requirements (2M VA alignment) will apply to VM_BIND also.
>>>> This is unfortunate, but it is not something new enforced by VM_BIND.
>>>> Other option is to go with 64K alignment and in VM_BIND case, user
>>>> must ensure there is no mix-matching of 64K (lmem) and 4k (smem)
>>>> mappings in the same 2M range. But this is not VM_BIND specific
>>>> (will apply to soft-pinning in execbuf2 also).
>>>>
>>>> I don't think we need any VA padding here as with VM_BIND VA is
>>>> managed fully by the user. If we enforce VA to be 2M aligned, it
>>>> will leave holes (if BOs are smaller then 2M), but nobody is going
>>>> to allocate anything form there.
>>>
>>> Note that we only apply the 2M alignment + padding for local memory 
>>> pages, for system memory we don't have/need such restrictions. The VA 
>>> padding then importantly prevents userspace from incorrectly (or 
>>> maliciously) inserting 4K system memory object in some page-table 
>>> operating in 64K GTT mode.
>>>
>>
>> Thanks Matt.
>> I also, syned offline with Matt a bit on this.
>> We don't need explicit 'pad_to_size' size. i915 driver is implicitly
>> padding the size to 2M boundary for LMEM BOs which will apply for
>> VM_BIND also.
>> The remaining question is whether we enforce 2M VA alignment for
>> lmem BOs (just like legacy execbuff path) on dg2 & xehpsdv, or go with
>> just 64K alignment but ensure there is no mixing of 4K and 64K
> 
> "Driver is implicitly padding the size to 2MB boundary" - this is the 
> backing store?

Just the GTT space, i.e vma->node.size. Backing store just needs to use 
64K pages.

> 
>> mappings in same 2M range. I think we can go with 2M alignment
>> requirement for VM_BIND also. So, no new requirements here for VM_BIND.
> 
> Are there any considerations here of letting the userspace know? 
> Presumably userspace allocator has to know or it would try to ask for 
> impossible addresses.

It's the existing behaviour with execbuf, so I assume userspace must 
already get this right, on platforms like dg2.

> 
> Regards,
> 
> Tvrtko
> 
>>
>> I will update the documentation.
>>
>> Niranjana
>>
>>>>
>>>> Niranjana
>>>>
>>>>>>
>>>>>> Regards,
>>>>>>
>>>>>> Tvrtko
>>>>>>
>>>>>>>
>>>>>>> Niranjana
>>>>>>>
>>>>>>>> Regards,
>>>>>>>>
>>>>>>>> Tvrtko
>>>>>>>>
>>>>>>>>> +
>>>>>>>>> +    /**
>>>>>>>>> +     * @flags: Supported flags are,
>>>>>>>>> +     *
>>>>>>>>> +     * I915_GEM_VM_BIND_READONLY:
>>>>>>>>> +     * Mapping is read-only.
>>>>>>>>> +     *
>>>>>>>>> +     * I915_GEM_VM_BIND_CAPTURE:
>>>>>>>>> +     * Capture this mapping in the dump upon GPU error.
>>>>>>>>> +     */
>>>>>>>>> +    __u64 flags;
>>>>>>>>> +#define I915_GEM_VM_BIND_READONLY    (1 << 0)
>>>>>>>>> +#define I915_GEM_VM_BIND_CAPTURE     (1 << 1)
>>>>>>>>> +
>>>>>>>>> +    /** @extensions: 0-terminated chain of extensions for this 
>>>>>>>>> mapping. */
>>>>>>>>> +    __u64 extensions;
>>>>>>>>> +};
>>>>>>>>> +
>>>>>>>>> +/**
>>>>>>>>> + * struct drm_i915_gem_vm_unbind - VA to object mapping to 
>>>>>>>>> unbind.
>>>>>>>>> + *
>>>>>>>>> + * This structure is passed to VM_UNBIND ioctl and specifies 
>>>>>>>>> the GPU virtual
>>>>>>>>> + * address (VA) range that should be unbound from the device 
>>>>>>>>> page table of the
>>>>>>>>> + * specified address space (VM). The specified VA range must 
>>>>>>>>> match one of the
>>>>>>>>> + * mappings created with the VM_BIND ioctl. TLB is flushed 
>>>>>>>>> upon unbind
>>>>>>>>> + * completion.
>>>>>>>>> + */
>>>>>>>>> +struct drm_i915_gem_vm_unbind {
>>>>>>>>> +    /** @vm_id: VM (address space) id to bind */
>>>>>>>>> +    __u32 vm_id;
>>>>>>>>> +
>>>>>>>>> +    /** @rsvd: Reserved for future use; must be zero. */
>>>>>>>>> +    __u32 rsvd;
>>>>>>>>> +
>>>>>>>>> +    /** @start: Virtual Address start to unbind */
>>>>>>>>> +    __u64 start;
>>>>>>>>> +
>>>>>>>>> +    /** @length: Length of mapping to unbind */
>>>>>>>>> +    __u64 length;
>>>>>>>>> +
>>>>>>>>> +    /** @flags: reserved for future usage, currently MBZ */
>>>>>>>>> +    __u64 flags;
>>>>>>>>> +
>>>>>>>>> +    /** @extensions: 0-terminated chain of extensions for this 
>>>>>>>>> mapping. */
>>>>>>>>> +    __u64 extensions;
>>>>>>>>> +};
>>>>>>>>> +
>>>>>>>>> +/**
>>>>>>>>> + * struct drm_i915_vm_bind_fence - An input or output fence 
>>>>>>>>> for the vm_bind
>>>>>>>>> + * or the vm_unbind work.
>>>>>>>>> + *
>>>>>>>>> + * The vm_bind or vm_unbind aync worker will wait for input 
>>>>>>>>> fence to signal
>>>>>>>>> + * before starting the binding or unbinding.
>>>>>>>>> + *
>>>>>>>>> + * The vm_bind or vm_unbind async worker will signal the 
>>>>>>>>> returned output fence
>>>>>>>>> + * after the completion of binding or unbinding.
>>>>>>>>> + */
>>>>>>>>> +struct drm_i915_vm_bind_fence {
>>>>>>>>> +    /** @handle: User's handle for a drm_syncobj to wait on or 
>>>>>>>>> signal. */
>>>>>>>>> +    __u32 handle;
>>>>>>>>> +
>>>>>>>>> +    /**
>>>>>>>>> +     * @flags: Supported flags are,
>>>>>>>>> +     *
>>>>>>>>> +     * I915_VM_BIND_FENCE_WAIT:
>>>>>>>>> +     * Wait for the input fence before binding/unbinding
>>>>>>>>> +     *
>>>>>>>>> +     * I915_VM_BIND_FENCE_SIGNAL:
>>>>>>>>> +     * Return bind/unbind completion fence as output
>>>>>>>>> +     */
>>>>>>>>> +    __u32 flags;
>>>>>>>>> +#define I915_VM_BIND_FENCE_WAIT            (1<<0)
>>>>>>>>> +#define I915_VM_BIND_FENCE_SIGNAL          (1<<1)
>>>>>>>>> +#define __I915_VM_BIND_FENCE_UNKNOWN_FLAGS 
>>>>>>>>> (-(I915_VM_BIND_FENCE_SIGNAL << 1))
>>>>>>>>> +};
>>>>>>>>> +
>>>>>>>>> +/**
>>>>>>>>> + * struct drm_i915_vm_bind_ext_timeline_fences - Timeline 
>>>>>>>>> fences for vm_bind
>>>>>>>>> + * and vm_unbind.
>>>>>>>>> + *
>>>>>>>>> + * This structure describes an array of timeline drm_syncobj 
>>>>>>>>> and associated
>>>>>>>>> + * points for timeline variants of drm_syncobj. These timeline 
>>>>>>>>> 'drm_syncobj's
>>>>>>>>> + * can be input or output fences (See struct 
>>>>>>>>> drm_i915_vm_bind_fence).
>>>>>>>>> + */
>>>>>>>>> +struct drm_i915_vm_bind_ext_timeline_fences {
>>>>>>>>> +#define I915_VM_BIND_EXT_timeline_FENCES    0
>>>>>>>>> +    /** @base: Extension link. See struct i915_user_extension. */
>>>>>>>>> +    struct i915_user_extension base;
>>>>>>>>> +
>>>>>>>>> +    /**
>>>>>>>>> +     * @fence_count: Number of elements in the @handles_ptr & 
>>>>>>>>> @value_ptr
>>>>>>>>> +     * arrays.
>>>>>>>>> +     */
>>>>>>>>> +    __u64 fence_count;
>>>>>>>>> +
>>>>>>>>> +    /**
>>>>>>>>> +     * @handles_ptr: Pointer to an array of struct 
>>>>>>>>> drm_i915_vm_bind_fence
>>>>>>>>> +     * of length @fence_count.
>>>>>>>>> +     */
>>>>>>>>> +    __u64 handles_ptr;
>>>>>>>>> +
>>>>>>>>> +    /**
>>>>>>>>> +     * @values_ptr: Pointer to an array of u64 values of length
>>>>>>>>> +     * @fence_count.
>>>>>>>>> +     * Values must be 0 for a binary drm_syncobj. A Value of 0 
>>>>>>>>> for a
>>>>>>>>> +     * timeline drm_syncobj is invalid as it turns a 
>>>>>>>>> drm_syncobj into a
>>>>>>>>> +     * binary one.
>>>>>>>>> +     */
>>>>>>>>> +    __u64 values_ptr;
>>>>>>>>> +};
>>>>>>>>> +
>>>>>>>>> +/**
>>>>>>>>> + * struct drm_i915_vm_bind_user_fence - An input or output 
>>>>>>>>> user fence for the
>>>>>>>>> + * vm_bind or the vm_unbind work.
>>>>>>>>> + *
>>>>>>>>> + * The vm_bind or vm_unbind aync worker will wait for the 
>>>>>>>>> input fence (value at
>>>>>>>>> + * @addr to become equal to @val) before starting the binding 
>>>>>>>>> or unbinding.
>>>>>>>>> + *
>>>>>>>>> + * The vm_bind or vm_unbind async worker will signal the 
>>>>>>>>> output fence after
>>>>>>>>> + * the completion of binding or unbinding by writing @val to 
>>>>>>>>> memory location at
>>>>>>>>> + * @addr
>>>>>>>>> + */
>>>>>>>>> +struct drm_i915_vm_bind_user_fence {
>>>>>>>>> +    /** @addr: User/Memory fence qword aligned process virtual 
>>>>>>>>> address */
>>>>>>>>> +    __u64 addr;
>>>>>>>>> +
>>>>>>>>> +    /** @val: User/Memory fence value to be written after bind 
>>>>>>>>> completion */
>>>>>>>>> +    __u64 val;
>>>>>>>>> +
>>>>>>>>> +    /**
>>>>>>>>> +     * @flags: Supported flags are,
>>>>>>>>> +     *
>>>>>>>>> +     * I915_VM_BIND_USER_FENCE_WAIT:
>>>>>>>>> +     * Wait for the input fence before binding/unbinding
>>>>>>>>> +     *
>>>>>>>>> +     * I915_VM_BIND_USER_FENCE_SIGNAL:
>>>>>>>>> +     * Return bind/unbind completion fence as output
>>>>>>>>> +     */
>>>>>>>>> +    __u32 flags;
>>>>>>>>> +#define I915_VM_BIND_USER_FENCE_WAIT            (1<<0)
>>>>>>>>> +#define I915_VM_BIND_USER_FENCE_SIGNAL          (1<<1)
>>>>>>>>> +#define __I915_VM_BIND_USER_FENCE_UNKNOWN_FLAGS \
>>>>>>>>> +    (-(I915_VM_BIND_USER_FENCE_SIGNAL << 1))
>>>>>>>>> +};
>>>>>>>>> +
>>>>>>>>> +/**
>>>>>>>>> + * struct drm_i915_vm_bind_ext_user_fence - User/memory fences 
>>>>>>>>> for vm_bind
>>>>>>>>> + * and vm_unbind.
>>>>>>>>> + *
>>>>>>>>> + * These user fences can be input or output fences
>>>>>>>>> + * (See struct drm_i915_vm_bind_user_fence).
>>>>>>>>> + */
>>>>>>>>> +struct drm_i915_vm_bind_ext_user_fence {
>>>>>>>>> +#define I915_VM_BIND_EXT_USER_FENCES    1
>>>>>>>>> +    /** @base: Extension link. See struct i915_user_extension. */
>>>>>>>>> +    struct i915_user_extension base;
>>>>>>>>> +
>>>>>>>>> +    /** @fence_count: Number of elements in the 
>>>>>>>>> @user_fence_ptr array. */
>>>>>>>>> +    __u64 fence_count;
>>>>>>>>> +
>>>>>>>>> +    /**
>>>>>>>>> +     * @user_fence_ptr: Pointer to an array of
>>>>>>>>> +     * struct drm_i915_vm_bind_user_fence of length @fence_count.
>>>>>>>>> +     */
>>>>>>>>> +    __u64 user_fence_ptr;
>>>>>>>>> +};
>>>>>>>>> +
>>>>>>>>> +/**
>>>>>>>>> + * struct drm_i915_gem_execbuffer_ext_batch_addresses - Array 
>>>>>>>>> of batch buffer
>>>>>>>>> + * gpu virtual addresses.
>>>>>>>>> + *
>>>>>>>>> + * In the execbuff ioctl (See struct 
>>>>>>>>> drm_i915_gem_execbuffer2), this extension
>>>>>>>>> + * must always be appended in the VM_BIND mode and it will be 
>>>>>>>>> an error to
>>>>>>>>> + * append this extension in older non-VM_BIND mode.
>>>>>>>>> + */
>>>>>>>>> +struct drm_i915_gem_execbuffer_ext_batch_addresses {
>>>>>>>>> +#define DRM_I915_GEM_EXECBUFFER_EXT_BATCH_ADDRESSES    1
>>>>>>>>> +    /** @base: Extension link. See struct i915_user_extension. */
>>>>>>>>> +    struct i915_user_extension base;
>>>>>>>>> +
>>>>>>>>> +    /** @count: Number of addresses in the addr array. */
>>>>>>>>> +    __u32 count;
>>>>>>>>> +
>>>>>>>>> +    /** @addr: An array of batch gpu virtual addresses. */
>>>>>>>>> +    __u64 addr[0];
>>>>>>>>> +};
>>>>>>>>> +
>>>>>>>>> +/**
>>>>>>>>> + * struct drm_i915_gem_execbuffer_ext_user_fence - First level 
>>>>>>>>> batch completion
>>>>>>>>> + * signaling extension.
>>>>>>>>> + *
>>>>>>>>> + * This extension allows user to attach a user fence (@addr, 
>>>>>>>>> @value pair) to an
>>>>>>>>> + * execbuf to be signaled by the command streamer after the 
>>>>>>>>> completion of first
>>>>>>>>> + * level batch, by writing the @value at specified @addr and 
>>>>>>>>> triggering an
>>>>>>>>> + * interrupt.
>>>>>>>>> + * User can either poll for this user fence to signal or can 
>>>>>>>>> also wait on it
>>>>>>>>> + * with i915_gem_wait_user_fence ioctl.
>>>>>>>>> + * This is very much usefaul for long running contexts where 
>>>>>>>>> waiting on dma-fence
>>>>>>>>> + * by user (like i915_gem_wait ioctl) is not supported.
>>>>>>>>> + */
>>>>>>>>> +struct drm_i915_gem_execbuffer_ext_user_fence {
>>>>>>>>> +#define DRM_I915_GEM_EXECBUFFER_EXT_USER_FENCE        2
>>>>>>>>> +    /** @base: Extension link. See struct i915_user_extension. */
>>>>>>>>> +    struct i915_user_extension base;
>>>>>>>>> +
>>>>>>>>> +    /**
>>>>>>>>> +     * @addr: User/Memory fence qword aligned GPU virtual 
>>>>>>>>> address.
>>>>>>>>> +     *
>>>>>>>>> +     * Address has to be a valid GPU virtual address at the 
>>>>>>>>> time of
>>>>>>>>> +     * first level batch completion.
>>>>>>>>> +     */
>>>>>>>>> +    __u64 addr;
>>>>>>>>> +
>>>>>>>>> +    /**
>>>>>>>>> +     * @value: User/Memory fence Value to be written to above 
>>>>>>>>> address
>>>>>>>>> +     * after first level batch completes.
>>>>>>>>> +     */
>>>>>>>>> +    __u64 value;
>>>>>>>>> +
>>>>>>>>> +    /** @rsvd: Reserved for future extensions, MBZ */
>>>>>>>>> +    __u64 rsvd;
>>>>>>>>> +};
>>>>>>>>> +
>>>>>>>>> +/**
>>>>>>>>> + * struct drm_i915_gem_create_ext_vm_private - Extension to 
>>>>>>>>> make the object
>>>>>>>>> + * private to the specified VM.
>>>>>>>>> + *
>>>>>>>>> + * See struct drm_i915_gem_create_ext.
>>>>>>>>> + */
>>>>>>>>> +struct drm_i915_gem_create_ext_vm_private {
>>>>>>>>> +#define I915_GEM_CREATE_EXT_VM_PRIVATE        2
>>>>>>>>> +    /** @base: Extension link. See struct i915_user_extension. */
>>>>>>>>> +    struct i915_user_extension base;
>>>>>>>>> +
>>>>>>>>> +    /** @vm_id: Id of the VM to which the object is private */
>>>>>>>>> +    __u32 vm_id;
>>>>>>>>> +};
>>>>>>>>> +
>>>>>>>>> +/**
>>>>>>>>> + * struct drm_i915_gem_wait_user_fence - Wait on user/memory 
>>>>>>>>> fence.
>>>>>>>>> + *
>>>>>>>>> + * User/Memory fence can be woken up either by:
>>>>>>>>> + *
>>>>>>>>> + * 1. GPU context indicated by @ctx_id, or,
>>>>>>>>> + * 2. Kerrnel driver async worker upon I915_UFENCE_WAIT_SOFT.
>>>>>>>>> + *    @ctx_id is ignored when this flag is set.
>>>>>>>>> + *
>>>>>>>>> + * Wakeup condition is,
>>>>>>>>> + * ``((*addr & mask) op (value & mask))``
>>>>>>>>> + *
>>>>>>>>> + * See :ref:`Documentation/driver-api/dma-buf.rst 
>>>>>>>>> <indefinite_dma_fences>`
>>>>>>>>> + */
>>>>>>>>> +struct drm_i915_gem_wait_user_fence {
>>>>>>>>> +    /** @extensions: Zero-terminated chain of extensions. */
>>>>>>>>> +    __u64 extensions;
>>>>>>>>> +
>>>>>>>>> +    /** @addr: User/Memory fence address */
>>>>>>>>> +    __u64 addr;
>>>>>>>>> +
>>>>>>>>> +    /** @ctx_id: Id of the Context which will signal the 
>>>>>>>>> fence. */
>>>>>>>>> +    __u32 ctx_id;
>>>>>>>>> +
>>>>>>>>> +    /** @op: Wakeup condition operator */
>>>>>>>>> +    __u16 op;
>>>>>>>>> +#define I915_UFENCE_WAIT_EQ      0
>>>>>>>>> +#define I915_UFENCE_WAIT_NEQ     1
>>>>>>>>> +#define I915_UFENCE_WAIT_GT      2
>>>>>>>>> +#define I915_UFENCE_WAIT_GTE     3
>>>>>>>>> +#define I915_UFENCE_WAIT_LT      4
>>>>>>>>> +#define I915_UFENCE_WAIT_LTE     5
>>>>>>>>> +#define I915_UFENCE_WAIT_BEFORE  6
>>>>>>>>> +#define I915_UFENCE_WAIT_AFTER   7
>>>>>>>>> +
>>>>>>>>> +    /**
>>>>>>>>> +     * @flags: Supported flags are,
>>>>>>>>> +     *
>>>>>>>>> +     * I915_UFENCE_WAIT_SOFT:
>>>>>>>>> +     *
>>>>>>>>> +     * To be woken up by i915 driver async worker (not by GPU).
>>>>>>>>> +     *
>>>>>>>>> +     * I915_UFENCE_WAIT_ABSTIME:
>>>>>>>>> +     *
>>>>>>>>> +     * Wait timeout specified as absolute time.
>>>>>>>>> +     */
>>>>>>>>> +    __u16 flags;
>>>>>>>>> +#define I915_UFENCE_WAIT_SOFT    0x1
>>>>>>>>> +#define I915_UFENCE_WAIT_ABSTIME 0x2
>>>>>>>>> +
>>>>>>>>> +    /** @value: Wakeup value */
>>>>>>>>> +    __u64 value;
>>>>>>>>> +
>>>>>>>>> +    /** @mask: Wakeup mask */
>>>>>>>>> +    __u64 mask;
>>>>>>>>> +#define I915_UFENCE_WAIT_U8     0xffu
>>>>>>>>> +#define I915_UFENCE_WAIT_U16    0xffffu
>>>>>>>>> +#define I915_UFENCE_WAIT_U32    0xfffffffful
>>>>>>>>> +#define I915_UFENCE_WAIT_U64    0xffffffffffffffffull
>>>>>>>>> +
>>>>>>>>> +    /**
>>>>>>>>> +     * @timeout: Wait timeout in nanoseconds.
>>>>>>>>> +     *
>>>>>>>>> +     * If I915_UFENCE_WAIT_ABSTIME flag is set, then time 
>>>>>>>>> timeout is the
>>>>>>>>> +     * absolute time in nsec.
>>>>>>>>> +     */
>>>>>>>>> +    __s64 timeout;
>>>>>>>>> +};


More information about the Intel-gfx mailing list