[PATCH drm-misc-next v8 03/12] drm/nouveau: new VM_BIND uapi interfaces
Danilo Krummrich
dakr at redhat.com
Tue Jul 25 02:03:55 UTC 2023
On 7/22/23 00:58, Faith Ekstrand wrote:
>
> On Wed, Jul 19, 2023 at 7:15 PM Danilo Krummrich <dakr at redhat.com
> <mailto:dakr at redhat.com>> wrote:
>
> This commit provides the interfaces for the new UAPI motivated by the
> Vulkan API. It allows user mode drivers (UMDs) to:
>
> 1) Initialize a GPU virtual address (VA) space via the new
> DRM_IOCTL_NOUVEAU_VM_INIT ioctl. UMDs can provide a kernel reserved
> VA area.
>
> 2) Bind and unbind GPU VA space mappings via the new
> DRM_IOCTL_NOUVEAU_VM_BIND ioctl.
>
> 3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl.
>
> Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC support
> asynchronous processing with DRM syncobjs as synchronization mechanism.
>
> The default DRM_IOCTL_NOUVEAU_VM_BIND is synchronous processing,
> DRM_IOCTL_NOUVEAU_EXEC supports asynchronous processing only.
>
> Co-authored-by: Dave Airlie <airlied at redhat.com
> <mailto:airlied at redhat.com>>
> Signed-off-by: Danilo Krummrich <dakr at redhat.com
> <mailto:dakr at redhat.com>>
> ---
> Documentation/gpu/driver-uapi.rst | 8 ++
> include/uapi/drm/nouveau_drm.h | 209 ++++++++++++++++++++++++++++++
> 2 files changed, 217 insertions(+)
>
> diff --git a/Documentation/gpu/driver-uapi.rst
> b/Documentation/gpu/driver-uapi.rst
> index 4411e6919a3d..9c7ca6e33a68 100644
> --- a/Documentation/gpu/driver-uapi.rst
> +++ b/Documentation/gpu/driver-uapi.rst
> @@ -6,3 +6,11 @@ drm/i915 uAPI
> =============
>
> .. kernel-doc:: include/uapi/drm/i915_drm.h
> +
> +drm/nouveau uAPI
> +================
> +
> +VM_BIND / EXEC uAPI
> +-------------------
> +
> +.. kernel-doc:: include/uapi/drm/nouveau_drm.h
> diff --git a/include/uapi/drm/nouveau_drm.h
> b/include/uapi/drm/nouveau_drm.h
> index 853a327433d3..4d3a70529637 100644
> --- a/include/uapi/drm/nouveau_drm.h
> +++ b/include/uapi/drm/nouveau_drm.h
> @@ -126,6 +126,209 @@ struct drm_nouveau_gem_cpu_fini {
> __u32 handle;
> };
>
> +/**
> + * struct drm_nouveau_sync - sync object
> + *
> + * This structure serves as synchronization mechanism for (potentially)
> + * asynchronous operations such as EXEC or VM_BIND.
> + */
> +struct drm_nouveau_sync {
> + /**
> + * @flags: the flags for a sync object
> + *
> + * The first 8 bits are used to determine the type of the
> sync object.
> + */
> + __u32 flags;
> +#define DRM_NOUVEAU_SYNC_SYNCOBJ 0x0
> +#define DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ 0x1
> +#define DRM_NOUVEAU_SYNC_TYPE_MASK 0xf
> + /**
> + * @handle: the handle of the sync object
> + */
> + __u32 handle;
> + /**
> + * @timeline_value:
> + *
> + * The timeline point of the sync object in case the syncobj
> is of
> + * type DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ.
> + */
> + __u64 timeline_value;
> +};
> +
> +/**
> + * struct drm_nouveau_vm_init - GPU VA space init structure
> + *
> + * Used to initialize the GPU's VA space for a user client, telling
> the kernel
> + * which portion of the VA space is managed by the UMD and kernel
> respectively.
>
>
> I assume this has to be called quite early. Like maybe before any BOs or
> channels are created? In any case, it'd be nice to have that documented.
Exactly, doing any of those will disable the new uAPI entirely if it
wasn't yet initialized. I will add some documentation for this.
>
> + */
> +struct drm_nouveau_vm_init {
> + /**
> + * @unmanaged_addr: start address of the kernel managed VA
> space region
> + */
> + __u64 unmanaged_addr;
> + /**
> + * @unmanaged_size: size of the kernel managed VA space
> region in bytes
> + */
> + __u64 unmanaged_size;
>
>
> Over-all, I think this is the right API. My only concern is with the
> word "unmanaged". None of the VA space is unmanaged. Some is
> userspace-managed and some is kernel-managed. I guess "unmanaged" kinda
> makes sense because this is coming from userspace and it's saying which
> bits it manages and which bits it doesn't. Still seems clunky to me.
> Maybe kernel_managed? IDK, that feels weird too. Since we're already
> using UMD in this file, we could call it kmd_managed. IDK. 🤷🏻♀️
kernel_managed / kmd_managed both sounds fine to me. I'm good with
either one.
>
> Yeah, I know this is a total bikeshed color thing and I'm not going to
> block anything based on it. 😅 Just wanted to see if we can come up with
> anything better. It's documented and that's the important thing.
>
> +};
> +
> +/**
> + * struct drm_nouveau_vm_bind_op - VM_BIND operation
> + *
> + * This structure represents a single VM_BIND operation. UMDs
> should pass
> + * an array of this structure via struct drm_nouveau_vm_bind's
> &op_ptr field.
> + */
> +struct drm_nouveau_vm_bind_op {
> + /**
> + * @op: the operation type
> + */
> + __u32 op;
> +/**
> + * @DRM_NOUVEAU_VM_BIND_OP_MAP:
> + *
> + * Map a GEM object to the GPU's VA space. Optionally, the
> + * &DRM_NOUVEAU_VM_BIND_SPARSE flag can be passed to instruct the
> kernel to
> + * create sparse mappings for the given range.
> + */
> +#define DRM_NOUVEAU_VM_BIND_OP_MAP 0x0
> +/**
> + * @DRM_NOUVEAU_VM_BIND_OP_UNMAP:
> + *
> + * Unmap an existing mapping in the GPU's VA space. If the region
> the mapping
> + * is located in is a sparse region, new sparse mappings are
> created where the
> + * unmapped (memory backed) mapping was mapped previously. To
> remove a sparse
> + * region the &DRM_NOUVEAU_VM_BIND_SPARSE must be set.
> + */
> +#define DRM_NOUVEAU_VM_BIND_OP_UNMAP 0x1
> + /**
> + * @flags: the flags for a &drm_nouveau_vm_bind_op
> + */
> + __u32 flags;
> +/**
> + * @DRM_NOUVEAU_VM_BIND_SPARSE:
> + *
> + * Indicates that an allocated VA space region should be sparse.
> + */
> +#define DRM_NOUVEAU_VM_BIND_SPARSE (1 << 8)
> + /**
> + * @handle: the handle of the DRM GEM object to map
> + */
> + __u32 handle;
> + /**
> + * @pad: 32 bit padding, should be 0
> + */
> + __u32 pad;
> + /**
> + * @addr:
> + *
> + * the address the VA space region or (memory backed)
> mapping should be mapped to
> + */
> + __u64 addr;
> + /**
> + * @bo_offset: the offset within the BO backing the mapping
> + */
> + __u64 bo_offset;
> + /**
> + * @range: the size of the requested mapping in bytes
> + */
> + __u64 range;
> +};
> +
> +/**
> + * struct drm_nouveau_vm_bind - structure for DRM_IOCTL_NOUVEAU_VM_BIND
> + */
> +struct drm_nouveau_vm_bind {
> + /**
> + * @op_count: the number of &drm_nouveau_vm_bind_op
> + */
> + __u32 op_count;
>
>
> I've chatted a bit with Dave on IRC about this but both VM_BIND and EXEC
> should support `op_count == 0` and do exactly the same thing that they
> would do if there were real ops. In the case of vm_bind, that just means
> wait on the waits and then signal the signals. In particular, it should
> NOT just return success and do nothing. Dave has a patch for this for
> EXEC but IDK if VM_BIND needs any attention. Of course, if it's not
> ASYNC, then quickly doing nothing after validating inputs is acceptable.
What will this be used for? I guess it would not be important to be
executed in order with "regular" (non-noop) jobs? Because the only thing
this would tell you is that e.g. for VM_BIND all previous binds
completed, which is what we have syncobjs for.
- Danilo
>
> + /**
> + * @flags: the flags for a &drm_nouveau_vm_bind ioctl
> + */
> + __u32 flags;
> +/**
> + * @DRM_NOUVEAU_VM_BIND_RUN_ASYNC:
> + *
> + * Indicates that the given VM_BIND operation should be executed
> asynchronously
> + * by the kernel.
> + *
> + * If this flag is not supplied the kernel executes the associated
> operations
> + * synchronously and doesn't accept any &drm_nouveau_sync objects.
> + */
> +#define DRM_NOUVEAU_VM_BIND_RUN_ASYNC 0x1
> + /**
> + * @wait_count: the number of wait &drm_nouveau_syncs
> + */
> + __u32 wait_count;
> + /**
> + * @sig_count: the number of &drm_nouveau_syncs to signal
> when finished
> + */
> + __u32 sig_count;
> + /**
> + * @wait_ptr: pointer to &drm_nouveau_syncs to wait for
> + */
> + __u64 wait_ptr;
> + /**
> + * @sig_ptr: pointer to &drm_nouveau_syncs to signal when
> finished
> + */
> + __u64 sig_ptr;
> + /**
> + * @op_ptr: pointer to the &drm_nouveau_vm_bind_ops to execute
> + */
> + __u64 op_ptr;
> +};
> +
> +/**
> + * struct drm_nouveau_exec_push - EXEC push operation
> + *
> + * This structure represents a single EXEC push operation. UMDs
> should pass an
> + * array of this structure via struct drm_nouveau_exec's &push_ptr
> field.
> + */
> +struct drm_nouveau_exec_push {
> + /**
> + * @va: the virtual address of the push buffer mapping
> + */
> + __u64 va;
> + /**
> + * @va_len: the length of the push buffer mapping
> + */
> + __u64 va_len;
> +};
> +
> +/**
> + * struct drm_nouveau_exec - structure for DRM_IOCTL_NOUVEAU_EXEC
> + */
> +struct drm_nouveau_exec {
> + /**
> + * @channel: the channel to execute the push buffer in
> + */
> + __u32 channel;
> + /**
> + * @push_count: the number of &drm_nouveau_exec_push ops
> + */
> + __u32 push_count;
>
>
> Same comment as above. We want `push_count == 0` to behave the same as
> any other EXEC just without anything new. In particular, it needs to
> wait on all the waits as well as the previous EXECs on that channel and
> then signal the sigs. I know Dave has a patch for this and it's working
> quite well in my testing.
>
> Other than that, everything looks good. I'm still re-reading all the
> NVK patches but they've been working quite well in my testing this week
> apart from a perf issue I need to dig into. I'll give a real RB once
> we're sure we all agree on the semantics of _count.
>
> ~Faith
>
> + /**
> + * @wait_count: the number of wait &drm_nouveau_syncs
> + */
> + __u32 wait_count;
> + /**
> + * @sig_count: the number of &drm_nouveau_syncs to signal
> when finished
> + */
> + __u32 sig_count;
> + /**
> + * @wait_ptr: pointer to &drm_nouveau_syncs to wait for
> + */
> + __u64 wait_ptr;
> + /**
> + * @sig_ptr: pointer to &drm_nouveau_syncs to signal when
> finished
> + */
> + __u64 sig_ptr;
> + /**
> + * @push_ptr: pointer to &drm_nouveau_exec_push ops
> + */
> + __u64 push_ptr;
> +};
> +
> #define DRM_NOUVEAU_GETPARAM 0x00 /* deprecated */
> #define DRM_NOUVEAU_SETPARAM 0x01 /* deprecated */
> #define DRM_NOUVEAU_CHANNEL_ALLOC 0x02 /* deprecated */
> @@ -136,6 +339,9 @@ struct drm_nouveau_gem_cpu_fini {
> #define DRM_NOUVEAU_NVIF 0x07
> #define DRM_NOUVEAU_SVM_INIT 0x08
> #define DRM_NOUVEAU_SVM_BIND 0x09
> +#define DRM_NOUVEAU_VM_INIT 0x10
> +#define DRM_NOUVEAU_VM_BIND 0x11
> +#define DRM_NOUVEAU_EXEC 0x12
> #define DRM_NOUVEAU_GEM_NEW 0x40
> #define DRM_NOUVEAU_GEM_PUSHBUF 0x41
> #define DRM_NOUVEAU_GEM_CPU_PREP 0x42
> @@ -197,6 +403,9 @@ struct drm_nouveau_svm_bind {
> #define DRM_IOCTL_NOUVEAU_GEM_CPU_FINI DRM_IOW
> (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_FINI, struct
> drm_nouveau_gem_cpu_fini)
> #define DRM_IOCTL_NOUVEAU_GEM_INFO
> DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_INFO, struct
> drm_nouveau_gem_info)
>
> +#define DRM_IOCTL_NOUVEAU_VM_INIT
> DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_VM_INIT, struct
> drm_nouveau_vm_init)
> +#define DRM_IOCTL_NOUVEAU_VM_BIND
> DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_VM_BIND, struct
> drm_nouveau_vm_bind)
> +#define DRM_IOCTL_NOUVEAU_EXEC
> DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_EXEC, struct drm_nouveau_exec)
> #if defined(__cplusplus)
> }
> #endif
> --
> 2.41.0
>
More information about the dri-devel
mailing list