[PATCH drm-misc-next v8 03/12] drm/nouveau: new VM_BIND uapi interfaces

Tue Jul 25 02:03:55 UTC 2023

On 7/22/23 00:58, Faith Ekstrand wrote:
> 
> On Wed, Jul 19, 2023 at 7:15 PM Danilo Krummrich <dakr at redhat.com 
> <mailto:dakr at redhat.com>> wrote:
> 
>     This commit provides the interfaces for the new UAPI motivated by the
>     Vulkan API. It allows user mode drivers (UMDs) to:
> 
>     1) Initialize a GPU virtual address (VA) space via the new
>         DRM_IOCTL_NOUVEAU_VM_INIT ioctl. UMDs can provide a kernel reserved
>         VA area.
> 
>     2) Bind and unbind GPU VA space mappings via the new
>         DRM_IOCTL_NOUVEAU_VM_BIND ioctl.
> 
>     3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl.
> 
>     Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC support
>     asynchronous processing with DRM syncobjs as synchronization mechanism.
> 
>     The default DRM_IOCTL_NOUVEAU_VM_BIND is synchronous processing,
>     DRM_IOCTL_NOUVEAU_EXEC supports asynchronous processing only.
> 
>     Co-authored-by: Dave Airlie <airlied at redhat.com
>     <mailto:airlied at redhat.com>>
>     Signed-off-by: Danilo Krummrich <dakr at redhat.com
>     <mailto:dakr at redhat.com>>
>     ---
>       Documentation/gpu/driver-uapi.rst |   8 ++
>       include/uapi/drm/nouveau_drm.h    | 209 ++++++++++++++++++++++++++++++
>       2 files changed, 217 insertions(+)
> 
>     diff --git a/Documentation/gpu/driver-uapi.rst
>     b/Documentation/gpu/driver-uapi.rst
>     index 4411e6919a3d..9c7ca6e33a68 100644
>     --- a/Documentation/gpu/driver-uapi.rst
>     +++ b/Documentation/gpu/driver-uapi.rst
>     @@ -6,3 +6,11 @@ drm/i915 uAPI
>       =============
> 
>       .. kernel-doc:: include/uapi/drm/i915_drm.h
>     +
>     +drm/nouveau uAPI
>     +================
>     +
>     +VM_BIND / EXEC uAPI
>     +-------------------
>     +
>     +.. kernel-doc:: include/uapi/drm/nouveau_drm.h
>     diff --git a/include/uapi/drm/nouveau_drm.h
>     b/include/uapi/drm/nouveau_drm.h
>     index 853a327433d3..4d3a70529637 100644
>     --- a/include/uapi/drm/nouveau_drm.h
>     +++ b/include/uapi/drm/nouveau_drm.h
>     @@ -126,6 +126,209 @@ struct drm_nouveau_gem_cpu_fini {
>              __u32 handle;
>       };
> 
>     +/**
>     + * struct drm_nouveau_sync - sync object
>     + *
>     + * This structure serves as synchronization mechanism for (potentially)
>     + * asynchronous operations such as EXEC or VM_BIND.
>     + */
>     +struct drm_nouveau_sync {
>     +       /**
>     +        * @flags: the flags for a sync object
>     +        *
>     +        * The first 8 bits are used to determine the type of the
>     sync object.
>     +        */
>     +       __u32 flags;
>     +#define DRM_NOUVEAU_SYNC_SYNCOBJ 0x0
>     +#define DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ 0x1
>     +#define DRM_NOUVEAU_SYNC_TYPE_MASK 0xf
>     +       /**
>     +        * @handle: the handle of the sync object
>     +        */
>     +       __u32 handle;
>     +       /**
>     +        * @timeline_value:
>     +        *
>     +        * The timeline point of the sync object in case the syncobj
>     is of
>     +        * type DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ.
>     +        */
>     +       __u64 timeline_value;
>     +};
>     +
>     +/**
>     + * struct drm_nouveau_vm_init - GPU VA space init structure
>     + *
>     + * Used to initialize the GPU's VA space for a user client, telling
>     the kernel
>     + * which portion of the VA space is managed by the UMD and kernel
>     respectively.
> 
> 
> I assume this has to be called quite early. Like maybe before any BOs or 
> channels are created? In any case, it'd be nice to have that documented.

Exactly, doing any of those will disable the new uAPI entirely if it 
wasn't yet initialized. I will add some documentation for this.

> 
>     + */
>     +struct drm_nouveau_vm_init {
>     +       /**
>     +        * @unmanaged_addr: start address of the kernel managed VA
>     space region
>     +        */
>     +       __u64 unmanaged_addr;
>     +       /**
>     +        * @unmanaged_size: size of the kernel managed VA space
>     region in bytes
>     +        */
>     +       __u64 unmanaged_size;
> 
> 
> Over-all, I think this is the right API. My only concern is with the 
> word "unmanaged". None of the VA space is unmanaged. Some is 
> userspace-managed and some is kernel-managed.  I guess "unmanaged" kinda 
> makes sense because this is coming from userspace and it's saying which 
> bits it manages and which bits it doesn't.  Still seems clunky to me.  
> Maybe kernel_managed? IDK, that feels weird too. Since we're already 
> using UMD in this file, we could call it kmd_managed. IDK. 🤷🏻‍♀️

kernel_managed / kmd_managed both sounds fine to me. I'm good with 
either one.

> 
> Yeah, I know this is a total bikeshed color thing and I'm not going to 
> block anything based on it. 😅 Just wanted to see if we can come up with 
> anything better.  It's documented and that's the important thing.
> 
>     +};
>     +
>     +/**
>     + * struct drm_nouveau_vm_bind_op - VM_BIND operation
>     + *
>     + * This structure represents a single VM_BIND operation. UMDs
>     should pass
>     + * an array of this structure via struct drm_nouveau_vm_bind's
>     &op_ptr field.
>     + */
>     +struct drm_nouveau_vm_bind_op {
>     +       /**
>     +        * @op: the operation type
>     +        */
>     +       __u32 op;
>     +/**
>     + * @DRM_NOUVEAU_VM_BIND_OP_MAP:
>     + *
>     + * Map a GEM object to the GPU's VA space. Optionally, the
>     + * &DRM_NOUVEAU_VM_BIND_SPARSE flag can be passed to instruct the
>     kernel to
>     + * create sparse mappings for the given range.
>     + */
>     +#define DRM_NOUVEAU_VM_BIND_OP_MAP 0x0
>     +/**
>     + * @DRM_NOUVEAU_VM_BIND_OP_UNMAP:
>     + *
>     + * Unmap an existing mapping in the GPU's VA space. If the region
>     the mapping
>     + * is located in is a sparse region, new sparse mappings are
>     created where the
>     + * unmapped (memory backed) mapping was mapped previously. To
>     remove a sparse
>     + * region the &DRM_NOUVEAU_VM_BIND_SPARSE must be set.
>     + */
>     +#define DRM_NOUVEAU_VM_BIND_OP_UNMAP 0x1
>     +       /**
>     +        * @flags: the flags for a &drm_nouveau_vm_bind_op
>     +        */
>     +       __u32 flags;
>     +/**
>     + * @DRM_NOUVEAU_VM_BIND_SPARSE:
>     + *
>     + * Indicates that an allocated VA space region should be sparse.
>     + */
>     +#define DRM_NOUVEAU_VM_BIND_SPARSE (1 << 8)
>     +       /**
>     +        * @handle: the handle of the DRM GEM object to map
>     +        */
>     +       __u32 handle;
>     +       /**
>     +        * @pad: 32 bit padding, should be 0
>     +        */
>     +       __u32 pad;
>     +       /**
>     +        * @addr:
>     +        *
>     +        * the address the VA space region or (memory backed)
>     mapping should be mapped to
>     +        */
>     +       __u64 addr;
>     +       /**
>     +        * @bo_offset: the offset within the BO backing the mapping
>     +        */
>     +       __u64 bo_offset;
>     +       /**
>     +        * @range: the size of the requested mapping in bytes
>     +        */
>     +       __u64 range;
>     +};
>     +
>     +/**
>     + * struct drm_nouveau_vm_bind - structure for DRM_IOCTL_NOUVEAU_VM_BIND
>     + */
>     +struct drm_nouveau_vm_bind {
>     +       /**
>     +        * @op_count: the number of &drm_nouveau_vm_bind_op
>     +        */
>     +       __u32 op_count;
> 
> 
> I've chatted a bit with Dave on IRC about this but both VM_BIND and EXEC 
> should support `op_count == 0` and do exactly the same thing that they 
> would do if there were real ops. In the case of vm_bind, that just means 
> wait on the waits and then signal the signals. In particular, it should 
> NOT just return success and do nothing. Dave has a patch for this for 
> EXEC but IDK if VM_BIND needs any attention.  Of course, if it's not 
> ASYNC, then quickly doing nothing after validating inputs is acceptable.

What will this be used for? I guess it would not be important to be 
executed in order with "regular" (non-noop) jobs? Because the only thing 
this would tell you is that e.g. for VM_BIND all previous binds 
completed, which is what we have syncobjs for.

- Danilo

> 
>     +       /**
>     +        * @flags: the flags for a &drm_nouveau_vm_bind ioctl
>     +        */
>     +       __u32 flags;
>     +/**
>     + * @DRM_NOUVEAU_VM_BIND_RUN_ASYNC:
>     + *
>     + * Indicates that the given VM_BIND operation should be executed
>     asynchronously
>     + * by the kernel.
>     + *
>     + * If this flag is not supplied the kernel executes the associated
>     operations
>     + * synchronously and doesn't accept any &drm_nouveau_sync objects.
>     + */
>     +#define DRM_NOUVEAU_VM_BIND_RUN_ASYNC 0x1
>     +       /**
>     +        * @wait_count: the number of wait &drm_nouveau_syncs
>     +        */
>     +       __u32 wait_count;
>     +       /**
>     +        * @sig_count: the number of &drm_nouveau_syncs to signal
>     when finished
>     +        */
>     +       __u32 sig_count;
>     +       /**
>     +        * @wait_ptr: pointer to &drm_nouveau_syncs to wait for
>     +        */
>     +       __u64 wait_ptr;
>     +       /**
>     +        * @sig_ptr: pointer to &drm_nouveau_syncs to signal when
>     finished
>     +        */
>     +       __u64 sig_ptr;
>     +       /**
>     +        * @op_ptr: pointer to the &drm_nouveau_vm_bind_ops to execute
>     +        */
>     +       __u64 op_ptr;
>     +};
>     +
>     +/**
>     + * struct drm_nouveau_exec_push - EXEC push operation
>     + *
>     + * This structure represents a single EXEC push operation. UMDs
>     should pass an
>     + * array of this structure via struct drm_nouveau_exec's &push_ptr
>     field.
>     + */
>     +struct drm_nouveau_exec_push {
>     +       /**
>     +        * @va: the virtual address of the push buffer mapping
>     +        */
>     +       __u64 va;
>     +       /**
>     +        * @va_len: the length of the push buffer mapping
>     +        */
>     +       __u64 va_len;
>     +};
>     +
>     +/**
>     + * struct drm_nouveau_exec - structure for DRM_IOCTL_NOUVEAU_EXEC
>     + */
>     +struct drm_nouveau_exec {
>     +       /**
>     +        * @channel: the channel to execute the push buffer in
>     +        */
>     +       __u32 channel;
>     +       /**
>     +        * @push_count: the number of &drm_nouveau_exec_push ops
>     +        */
>     +       __u32 push_count;
> 
> 
> Same comment as above. We want `push_count == 0` to behave the same as 
> any other EXEC just without anything new. In particular, it needs to 
> wait on all the waits as well as the previous EXECs on that channel and 
> then signal the sigs. I know Dave has a patch for this and it's working 
> quite well in my testing.
> 
> Other than that, everything looks good.  I'm still re-reading all the 
> NVK patches but they've been working quite well in my testing this week 
> apart from a perf issue I need to dig into. I'll give a real RB once 
> we're sure we all agree on the semantics of _count.
> 
> ~Faith
> 
>     +       /**
>     +        * @wait_count: the number of wait &drm_nouveau_syncs
>     +        */
>     +       __u32 wait_count;
>     +       /**
>     +        * @sig_count: the number of &drm_nouveau_syncs to signal
>     when finished
>     +        */
>     +       __u32 sig_count;
>     +       /**
>     +        * @wait_ptr: pointer to &drm_nouveau_syncs to wait for
>     +        */
>     +       __u64 wait_ptr;
>     +       /**
>     +        * @sig_ptr: pointer to &drm_nouveau_syncs to signal when
>     finished
>     +        */
>     +       __u64 sig_ptr;
>     +       /**
>     +        * @push_ptr: pointer to &drm_nouveau_exec_push ops
>     +        */
>     +       __u64 push_ptr;
>     +};
>     +
>       #define DRM_NOUVEAU_GETPARAM           0x00 /* deprecated */
>       #define DRM_NOUVEAU_SETPARAM           0x01 /* deprecated */
>       #define DRM_NOUVEAU_CHANNEL_ALLOC      0x02 /* deprecated */
>     @@ -136,6 +339,9 @@ struct drm_nouveau_gem_cpu_fini {
>       #define DRM_NOUVEAU_NVIF               0x07
>       #define DRM_NOUVEAU_SVM_INIT           0x08
>       #define DRM_NOUVEAU_SVM_BIND           0x09
>     +#define DRM_NOUVEAU_VM_INIT            0x10
>     +#define DRM_NOUVEAU_VM_BIND            0x11
>     +#define DRM_NOUVEAU_EXEC               0x12
>       #define DRM_NOUVEAU_GEM_NEW            0x40
>       #define DRM_NOUVEAU_GEM_PUSHBUF        0x41
>       #define DRM_NOUVEAU_GEM_CPU_PREP       0x42
>     @@ -197,6 +403,9 @@ struct drm_nouveau_svm_bind {
>       #define DRM_IOCTL_NOUVEAU_GEM_CPU_FINI       DRM_IOW
>     (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_FINI, struct
>     drm_nouveau_gem_cpu_fini)
>       #define DRM_IOCTL_NOUVEAU_GEM_INFO         
>       DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_INFO, struct
>     drm_nouveau_gem_info)
> 
>     +#define DRM_IOCTL_NOUVEAU_VM_INIT           
>     DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_VM_INIT, struct
>     drm_nouveau_vm_init)
>     +#define DRM_IOCTL_NOUVEAU_VM_BIND           
>     DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_VM_BIND, struct
>     drm_nouveau_vm_bind)
>     +#define DRM_IOCTL_NOUVEAU_EXEC             
>       DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_EXEC, struct drm_nouveau_exec)
>       #if defined(__cplusplus)
>       }
>       #endif
>     -- 
>     2.41.0
>