[RFC 03/10] drm/i915/vm_bind: Support private and shared BOs
Hellstrom, Thomas
thomas.hellstrom at intel.com
Fri Jul 8 13:43:41 UTC 2022
On Fri, 2022-07-08 at 06:14 -0700, Niranjana Vishwanathapura wrote:
> On Thu, Jul 07, 2022 at 03:31:42AM -0700, Hellstrom, Thomas wrote:
> > On Fri, 2022-07-01 at 15:50 -0700, Niranjana Vishwanathapura wrote:
> > > Add uapi allowing user to specify a BO as private to a specified
> > > VM
> > > during the BO creation.
> > > VM private BOs can only be mapped on the specified VM and can't
> > > be
> > > dma_buf exported. VM private BOs share a single common dma_resv
> > > object,
> > > hence has a performance advantage requiring a single dma_resv
> > > object
> > > update in the execbuf path compared to non-private (shared) BOs.
> > >
> > > Signed-off-by: Niranjana Vishwanathapura
> > > <niranjana.vishwanathapura at intel.com>
> > > ---
> > > drivers/gpu/drm/i915/gem/i915_gem_create.c | 41
> > > ++++++++++++++++++-
> > > drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 6 +++
> > > .../gpu/drm/i915/gem/i915_gem_object_types.h | 3 ++
> > > drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 ++
> > > drivers/gpu/drm/i915/gem/i915_gem_vm_bind.h | 11 +++++
> > > .../drm/i915/gem/i915_gem_vm_bind_object.c | 9 ++++
> > > drivers/gpu/drm/i915/gt/intel_gtt.c | 4 ++
> > > drivers/gpu/drm/i915/gt/intel_gtt.h | 2 +
> > > drivers/gpu/drm/i915/i915_vma.c | 1 +
> > > drivers/gpu/drm/i915/i915_vma_types.h | 2 +
> > > include/uapi/drm/i915_drm.h | 30
> > > ++++++++++++++
> > > 11 files changed, 110 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c
> > > b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> > > index 927a87e5ec59..7e264566b51f 100644
> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> > > @@ -11,6 +11,7 @@
> > > #include "pxp/intel_pxp.h"
> > >
> > > #include "i915_drv.h"
> > > +#include "i915_gem_context.h"
> > > #include "i915_gem_create.h"
> > > #include "i915_trace.h"
> > > #include "i915_user_extensions.h"
> > > @@ -243,6 +244,7 @@ struct create_ext {
> > > unsigned int n_placements;
> > > unsigned int placement_mask;
> > > unsigned long flags;
> > > + u32 vm_id;
> > > };
> > >
> > > static void repr_placements(char *buf, size_t size,
> > > @@ -392,9 +394,24 @@ static int ext_set_protected(struct
> > > i915_user_extension __user *base, void *data
> > > return 0;
> > > }
> > >
> > > +static int ext_set_vm_private(struct i915_user_extension __user
> > > *base,
> > > + void *data)
> > > +{
> > > + struct drm_i915_gem_create_ext_vm_private ext;
> > > + struct create_ext *ext_data = data;
> > > +
> > > + if (copy_from_user(&ext, base, sizeof(ext)))
> > > + return -EFAULT;
> > > +
> > > + ext_data->vm_id = ext.vm_id;
> > > +
> > > + return 0;
> > > +}
> > > +
> > > static const i915_user_extension_fn create_extensions[] = {
> > > [I915_GEM_CREATE_EXT_MEMORY_REGIONS] =
> > > ext_set_placements,
> > > [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] =
> > > ext_set_protected,
> > > + [I915_GEM_CREATE_EXT_VM_PRIVATE] = ext_set_vm_private,
> > > };
> > >
> > > /**
> > > @@ -410,6 +427,7 @@ i915_gem_create_ext_ioctl(struct drm_device
> > > *dev,
> > > void *data,
> > > struct drm_i915_private *i915 = to_i915(dev);
> > > struct drm_i915_gem_create_ext *args = data;
> > > struct create_ext ext_data = { .i915 = i915 };
> > > + struct i915_address_space *vm = NULL;
> > > struct drm_i915_gem_object *obj;
> > > int ret;
> > >
> > > @@ -423,6 +441,12 @@ i915_gem_create_ext_ioctl(struct drm_device
> > > *dev, void *data,
> > > if (ret)
> > > return ret;
> > >
> > > + if (ext_data.vm_id) {
> > > + vm = i915_gem_vm_lookup(file->driver_priv,
> > > ext_data.vm_id);
> > > + if (unlikely(!vm))
> > > + return -ENOENT;
> > > + }
> > > +
> > > if (!ext_data.n_placements) {
> > > ext_data.placements[0] =
> > > intel_memory_region_by_type(i915,
> > > INTEL_MEMORY_SYSTEM);
> > > @@ -449,8 +473,21 @@ i915_gem_create_ext_ioctl(struct drm_device
> > > *dev, void *data,
> > >
> > > ext_data.placements,
> > >
> > > ext_data.n_placements
> > > ,
> > > ext_data.flags);
> > > - if (IS_ERR(obj))
> > > - return PTR_ERR(obj);
> > > + if (IS_ERR(obj)) {
> > > + ret = PTR_ERR(obj);
> > > + goto vm_put;
> > > + }
> > > +
> > > + if (vm) {
> > > + obj->base.resv = vm->root_obj->base.resv;
> > > + obj->priv_root = i915_gem_object_get(vm-
> > > >root_obj);
> > > + i915_vm_put(vm);
> > > + }
> > >
> > > return i915_gem_publish(obj, file, &args->size, &args-
> > > > handle);
> > > +vm_put:
> > > + if (vm)
> > > + i915_vm_put(vm);
> > > +
> > > + return ret;
> > > }
> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > > b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > > index f5062d0c6333..6433173c3e84 100644
> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > > @@ -218,6 +218,12 @@ struct dma_buf *i915_gem_prime_export(struct
> > > drm_gem_object *gem_obj, int flags)
> > > struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
> > > DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
> > >
> > > + if (obj->priv_root) {
> > > + drm_dbg(obj->base.dev,
> > > + "Exporting VM private objects is not
> > > allowed\n");
> > > + return ERR_PTR(-EINVAL);
> > > + }
> > > +
> > > exp_info.ops = &i915_dmabuf_ops;
> > > exp_info.size = gem_obj->size;
> > > exp_info.flags = flags;
> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > > b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > > index 5cf36a130061..9fe3395ad4d9 100644
> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > > @@ -241,6 +241,9 @@ struct drm_i915_gem_object {
> > >
> > > const struct drm_i915_gem_object_ops *ops;
> > >
> > > + /* Shared root is object private to a VM; NULL otherwise
> > > */
> > > + struct drm_i915_gem_object *priv_root;
> > > +
> > > struct {
> > > /**
> > > * @vma.lock: protect the list/tree of vmas
> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > index 7e1f8b83077f..f1912b12db00 100644
> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > > @@ -1152,6 +1152,9 @@ void i915_ttm_bo_destroy(struct
> > > ttm_buffer_object *bo)
> > > i915_gem_object_release_memory_region(obj);
> > > mutex_destroy(&obj->ttm.get_io_page.lock);
> > >
> > > + if (obj->priv_root)
> > > + i915_gem_object_put(obj->priv_root);
> >
> > This only works for ttm-based objects. For non-TTM objects on
> > integrated, we'll need to mimic the dma-resv individualization from
> > TTM.
>
> Ah, earlier I was doing this during VM destruction, but ran into
> problem as vma resources lives longer than VM in TTM case. So, I
> moved it here.
> Ok, yah, we probably need to mimic the dma-resv individualization
> from TTM, or, we can release priv_root during VM destruction for
> non-TTM objects?
>
> >
> > > +
> > > if (obj->ttm.created) {
> > > /*
> > > * We freely manage the shrinker LRU outide of
> > > the
> > > mm.pages life
> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind.h
> > > b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind.h
> > > index 642cdb559f17..ee6e4c52e80e 100644
> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind.h
> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind.h
> > > @@ -26,6 +26,17 @@ static inline void
> > > i915_gem_vm_bind_unlock(struct
> > > i915_address_space *vm)
> > > mutex_unlock(&vm->vm_bind_lock);
> > > }
> > >
> > > +static inline int i915_gem_vm_priv_lock(struct
> > > i915_address_space
> > > *vm,
> > > + struct i915_gem_ww_ctx
> > > *ww)
> > > +{
> > > + return i915_gem_object_lock(vm->root_obj, ww);
> > > +}
> >
> > Please make a pass on this patch making sure we provide kerneldoc
> > where
> > supposed to.
> >
> > > +
> > > +static inline void i915_gem_vm_priv_unlock(struct
> > > i915_address_space
> > > *vm)
> > > +{
> > > + i915_gem_object_unlock(vm->root_obj);
> > > +}
> > > +
> > > struct i915_vma *
> > > i915_gem_vm_bind_lookup_vma(struct i915_address_space *vm, u64
> > > va);
> > > void i915_gem_vm_bind_remove(struct i915_vma *vma, bool
> > > release_obj);
> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
> > > b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
> > > index 43ceb4dcca6c..3201204c8e74 100644
> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
> > > @@ -85,6 +85,7 @@ void i915_gem_vm_bind_remove(struct i915_vma
> > > *vma,
> > > bool release_obj)
> > >
> > > if (!list_empty(&vma->vm_bind_link)) {
> > > list_del_init(&vma->vm_bind_link);
> > > + list_del_init(&vma->non_priv_vm_bind_link);
> > > i915_vm_bind_it_remove(vma, &vma->vm->va);
> > >
> > > /* Release object */
> > > @@ -185,6 +186,11 @@ int i915_gem_vm_bind_obj(struct
> > > i915_address_space *vm,
> > > goto put_obj;
> > > }
> > >
> > > + if (obj->priv_root && obj->priv_root != vm->root_obj) {
> > > + ret = -EINVAL;
> > > + goto put_obj;
> > > + }
> > > +
> > > ret = i915_gem_vm_bind_lock_interruptible(vm);
> > > if (ret)
> > > goto put_obj;
> > > @@ -211,6 +217,9 @@ int i915_gem_vm_bind_obj(struct
> > > i915_address_space *vm,
> > >
> > > list_add_tail(&vma->vm_bind_link, &vm->vm_bound_list);
> > > i915_vm_bind_it_insert(vma, &vm->va);
> > > + if (!obj->priv_root)
> > > + list_add_tail(&vma->non_priv_vm_bind_link,
> > > + &vm->non_priv_vm_bind_list);
> >
> > I guess I'll find more details in the execbuf patches, but would it
> > work to keep the non-private objects on the vm_bind_list, and just
> > never move them to the vm_bound_list, rather than having a separate
> > list for them?
>
> The vm_bind/bound_list and the non_priv_vm_bind_list are there for
> very different reasons.
>
> The reason for having separate vm_bind_list and vm_bound_list is that
> during the execbuf path, we can rebind the unbound mappings by
> scooping
> all unbound vmas back from bound list into the bind list and binding
> them. In fact, this probably can be done with a single vm_bind_list
> and
> a 'eb.bind_list' (local to execbuf3 ioctl) for rebinding.
>
> The non_priv_vm_bind_list is just an optimization to loop only
> through
> non-priv objects while taking the locks in eb_lock_persistent_vmas()
> as only non-priv objects needs that (private objects are locked in a
> single shot with vm_priv_lock). A non-priv mapping will also be in
> the
> vm_bind/bound_list.
>
> I think, we need to add this as documentation to be more clear.
OK, I understood it as private objects were either on the vm_bind list
or vm_bound_list depending on whether they needed rebinding or not, and
shared objects only on the non_priv_vm_bind list, and were always
locked, validated and fenced...
Need to take a deeper look...
/Thomas
>
> Niranjana
>
> >
> >
> > >
> > > /* Hold object reference until vm_unbind */
> > > i915_gem_object_get(vma->obj);
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c
> > > b/drivers/gpu/drm/i915/gt/intel_gtt.c
> > > index 135dc4a76724..df0a8459c3c6 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_gtt.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
> > > @@ -176,6 +176,7 @@ int i915_vm_lock_objects(struct
> > > i915_address_space *vm,
> > > void i915_address_space_fini(struct i915_address_space *vm)
> > > {
> > > drm_mm_takedown(&vm->mm);
> > > + i915_gem_object_put(vm->root_obj);
> > > GEM_BUG_ON(!RB_EMPTY_ROOT(&vm->va.rb_root));
> > > mutex_destroy(&vm->vm_bind_lock);
> > > }
> > > @@ -289,6 +290,9 @@ void i915_address_space_init(struct
> > > i915_address_space *vm, int subclass)
> > > INIT_LIST_HEAD(&vm->vm_bind_list);
> > > INIT_LIST_HEAD(&vm->vm_bound_list);
> > > mutex_init(&vm->vm_bind_lock);
> > > + INIT_LIST_HEAD(&vm->non_priv_vm_bind_list);
> > > + vm->root_obj = i915_gem_object_create_internal(vm->i915,
> > > PAGE_SIZE);
> > > + GEM_BUG_ON(IS_ERR(vm->root_obj));
> > > }
> > >
> > > void *__px_vaddr(struct drm_i915_gem_object *p)
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h
> > > b/drivers/gpu/drm/i915/gt/intel_gtt.h
> > > index d4a6ce65251d..f538ce9115c9 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> > > @@ -267,6 +267,8 @@ struct i915_address_space {
> > > struct list_head vm_bound_list;
> > > /* va tree of persistent vmas */
> > > struct rb_root_cached va;
> > > + struct list_head non_priv_vm_bind_list;
> > > + struct drm_i915_gem_object *root_obj;
> > >
> > > /* Global GTT */
> > > bool is_ggtt:1;
> > > diff --git a/drivers/gpu/drm/i915/i915_vma.c
> > > b/drivers/gpu/drm/i915/i915_vma.c
> > > index d324e29cef0a..f0226581d342 100644
> > > --- a/drivers/gpu/drm/i915/i915_vma.c
> > > +++ b/drivers/gpu/drm/i915/i915_vma.c
> > > @@ -236,6 +236,7 @@ vma_create(struct drm_i915_gem_object *obj,
> > > mutex_unlock(&vm->mutex);
> > >
> > > INIT_LIST_HEAD(&vma->vm_bind_link);
> > > + INIT_LIST_HEAD(&vma->non_priv_vm_bind_link);
> > > return vma;
> > >
> > > err_unlock:
> > > diff --git a/drivers/gpu/drm/i915/i915_vma_types.h
> > > b/drivers/gpu/drm/i915/i915_vma_types.h
> > > index b6d179bdbfa0..2298b3d6b7c4 100644
> > > --- a/drivers/gpu/drm/i915/i915_vma_types.h
> > > +++ b/drivers/gpu/drm/i915/i915_vma_types.h
> > > @@ -290,6 +290,8 @@ struct i915_vma {
> > > struct list_head vm_link;
> > >
> > > struct list_head vm_bind_link; /* Link in persistent VMA
> > > list
> > > */
> > > + /* Link in non-private persistent VMA list */
> > > + struct list_head non_priv_vm_bind_link;
> > >
> > > /** Interval tree structures for persistent vma */
> > > struct rb_node rb;
> > > diff --git a/include/uapi/drm/i915_drm.h
> > > b/include/uapi/drm/i915_drm.h
> > > index 26cca49717f8..ce1c6592b0d7 100644
> > > --- a/include/uapi/drm/i915_drm.h
> > > +++ b/include/uapi/drm/i915_drm.h
> > > @@ -3542,9 +3542,13 @@ struct drm_i915_gem_create_ext {
> > > *
> > > * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
> > > * struct drm_i915_gem_create_ext_protected_content.
> > > + *
> > > + * For I915_GEM_CREATE_EXT_VM_PRIVATE usage see
> > > + * struct drm_i915_gem_create_ext_vm_private.
> > > */
> > > #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
> > > #define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
> > > +#define I915_GEM_CREATE_EXT_VM_PRIVATE 2
> > > __u64 extensions;
> > > };
> > >
> > > @@ -3662,6 +3666,32 @@ struct
> > > drm_i915_gem_create_ext_protected_content {
> > > /* ID of the protected content session managed by i915 when PXP
> > > is
> > > active */
> > > #define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
> > >
> > > +/**
> > > + * struct drm_i915_gem_create_ext_vm_private - Extension to make
> > > the
> > > object
> > > + * private to the specified VM.
> > > + *
> > > + * See struct drm_i915_gem_create_ext.
> > > + *
> > > + * By default, BOs can be mapped on multiple VMs and can also be
> > > dma-buf
> > > + * exported. Hence these BOs are referred to as Shared BOs.
> > > + * During each execbuf3 submission, the request fence must be
> > > added
> > > to the
> > > + * dma-resv fence list of all shared BOs mapped on the VM.
> > > + *
> > > + * Unlike Shared BOs, these VM private BOs can only be mapped on
> > > the
> > > VM they
> > > + * are private to and can't be dma-buf exported. All private BOs
> > > of
> > > a VM share
> > > + * the dma-resv object. Hence during each execbuf3 submission,
> > > they
> > > need only
> > > + * one dma-resv fence list updated. Thus, the fast path (where
> > > required
> > > + * mappings are already bound) submission latency is O(1) w.r.t
> > > the
> > > number of
> > > + * VM private BOs.
> > > + */
> > > +struct drm_i915_gem_create_ext_vm_private {
> > > + /** @base: Extension link. See struct
> > > i915_user_extension. */
> > > + struct i915_user_extension base;
> > > +
> > > + /** @vm_id: Id of the VM to which the object is private
> > > */
> > > + __u32 vm_id;
> > > +};
> > > +
> > > /**
> > > * struct drm_i915_gem_vm_bind - VA to object mapping to bind.
> > > *
> >
> > Thanks,
> > Thomas
> >
More information about the dri-devel
mailing list