[PATCH 2/9] dma-buf: add dynamic DMA-buf handling v7

Daniel Vetter daniel at ffwll.ch
Wed May 8 09:51:49 UTC 2019


On Tue, May 07, 2019 at 10:13:31AM +0200, Christian König wrote:
> On the exporter side we add optional explicit pinning callbacks. If those
> callbacks are implemented the framework no longer caches sg tables and the
> map/unmap callbacks are always called with the lock of the reservation object
> held.
> 
> On the importer side we add an optional invalidate callback. This callback is
> used by the exporter to inform the importers that their mappings should be
> destroyed as soon as possible.
> 
> This allows the exporter to provide the mappings without the need to pin
> the backing store.
> 
> v2: don't try to invalidate mappings when the callback is NULL,
>     lock the reservation obj while using the attachments,
>     add helper to set the callback
> v3: move flag for invalidation support into the DMA-buf,
>     use new attach_info structure to set the callback
> v4: use importer_priv field instead of mangling exporter priv.
> v5: drop invalidation_supported flag
> v6: squash together with pin/unpin changes

Imo much better, I can keep the entire thing in my brain now (and the sgt
caching patch is simple enough that I can remember what it does).

> v7: pin/unpin takes an attachment now
> 
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>  drivers/dma-buf/dma-buf.c | 190 ++++++++++++++++++++++++++++++++++++--
>  include/linux/dma-buf.h   |  91 ++++++++++++++++--
>  2 files changed, 261 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index 775e13f54083..464a4c38df6c 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -530,10 +530,12 @@ void dma_buf_put(struct dma_buf *dmabuf)
>  EXPORT_SYMBOL_GPL(dma_buf_put);
>  
>  /**
> - * dma_buf_attach - Add the device to dma_buf's attachments list; optionally,
> + * dma_buf_dynamic_attach - Add the device to dma_buf's attachments list; optionally,
>   * calls attach() of dma_buf_ops to allow device-specific attach functionality
> - * @dmabuf:	[in]	buffer to attach device to.
> - * @dev:	[in]	device to be attached.
> + * @dmabuf:		[in]	buffer to attach device to.
> + * @dev:		[in]	device to be attached.
> + * @importer_ops	[in]	importer operations for the attachment
> + * @importer_priv	[in]	importer private pointer for the attachment
>   *
>   * Returns struct dma_buf_attachment pointer for this attachment. Attachments
>   * must be cleaned up by calling dma_buf_detach().
> @@ -547,8 +549,10 @@ EXPORT_SYMBOL_GPL(dma_buf_put);
>   * accessible to @dev, and cannot be moved to a more suitable place. This is
>   * indicated with the error code -EBUSY.
>   */
> -struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
> -					  struct device *dev)
> +struct dma_buf_attachment *
> +dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
> +		       const struct dma_buf_attach_ops *importer_ops,
> +		       void *importer_priv)
>  {
>  	struct dma_buf_attachment *attach;
>  	int ret;
> @@ -562,6 +566,8 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
>  
>  	attach->dev = dev;
>  	attach->dmabuf = dmabuf;
> +	attach->importer_ops = importer_ops;
> +	attach->importer_priv = importer_priv;
>  
>  	mutex_lock(&dmabuf->lock);
>  
> @@ -570,7 +576,9 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
>  		if (ret)
>  			goto err_attach;
>  	}
> +	reservation_object_lock(dmabuf->resv, NULL);
>  	list_add(&attach->node, &dmabuf->attachments);
> +	reservation_object_unlock(dmabuf->resv);
>  
>  	mutex_unlock(&dmabuf->lock);
>  
> @@ -594,6 +602,21 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
>  	mutex_unlock(&dmabuf->lock);
>  	return ERR_PTR(ret);
>  }
> +EXPORT_SYMBOL_GPL(dma_buf_dynamic_attach);
> +
> +/**
> + * dma_buf_attach - Wrapper for dma_buf_dynamic_attach
> + * @dmabuf:	[in]	buffer to attach device to.
> + * @dev:	[in]	device to be attached.
> + *
> + * Wrapper to call dma_buf_dynamic_attach() for drivers which still use a static
> + * mapping.
> + */
> +struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
> +					  struct device *dev)
> +{
> +	return dma_buf_dynamic_attach(dmabuf, dev, NULL, NULL);
> +}
>  EXPORT_SYMBOL_GPL(dma_buf_attach);
>  
>  /**
> @@ -614,7 +637,9 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
>  					   DMA_BIDIRECTIONAL);
>  
>  	mutex_lock(&dmabuf->lock);
> +	reservation_object_lock(dmabuf->resv, NULL);
>  	list_del(&attach->node);
> +	reservation_object_unlock(dmabuf->resv);
>  	if (dmabuf->ops->detach)
>  		dmabuf->ops->detach(dmabuf, attach);
>  
> @@ -623,6 +648,100 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
>  }
>  EXPORT_SYMBOL_GPL(dma_buf_detach);
>  
> +/**
> + * dma_buf_pin - Lock down the DMA-buf
> + *
> + * @attach:	[in]	attachment which should be pinned
> + *
> + * Returns:
> + * 0 on success, negative error code on failure.
> + */
> +int dma_buf_pin(struct dma_buf_attachment *attach)

ocd: Needs now dma_buf_attachment_pin, similar for unpin.

> +{
> +	struct dma_buf *dmabuf = attach->dmabuf;
> +	int ret = 0;
> +
> +	reservation_object_assert_held(dmabuf->resv);
> +
> +	if (dmabuf->ops->pin)
> +		ret = dmabuf->ops->pin(attach);
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(dma_buf_pin);
> +
> +/**
> + * dma_buf_unpin - Remove lock from DMA-buf
> + *
> + * @attach:	[in]	attachment which should be unpinned
> + */
> +void dma_buf_unpin(struct dma_buf_attachment *attach)
> +{
> +	struct dma_buf *dmabuf = attach->dmabuf;
> +
> +	reservation_object_assert_held(dmabuf->resv);
> +
> +	if (dmabuf->ops->unpin)
> +		dmabuf->ops->unpin(attach);
> +}
> +EXPORT_SYMBOL_GPL(dma_buf_unpin);
> +
> +/**
> + * dma_buf_map_attachment_locked - Maps the buffer into _device_ address space
> + * with the reservation lock held. Is a wrapper for map_dma_buf() of the
> + *
> + * Returns the scatterlist table of the attachment;
> + * dma_buf_ops.
> + * @attach:	[in]	attachment whose scatterlist is to be returned
> + * @direction:	[in]	direction of DMA transfer
> + *
> + * Returns sg_table containing the scatterlist to be returned; returns ERR_PTR
> + * on error. May return -EINTR if it is interrupted by a signal.
> + *
> + * A mapping must be unmapped by using dma_buf_unmap_attachment().
> + */
> +struct sg_table *
> +dma_buf_map_attachment_locked(struct dma_buf_attachment *attach,
> +			      enum dma_data_direction direction)
> +{
> +	struct sg_table *sg_table;
> +	int r;
> +
> +	might_sleep();
> +	reservation_object_assert_held(attach->dmabuf->resv);
> +
> +	if (attach->sgt)
> +		return attach->sgt;
> +
> +	if (attach->importer_ops && attach->importer_ops->invalidate) {
> +		/*
> +		 * Mapping a DMA-buf can trigger its invalidation, prevent
> +		 * sending this event to the caller by temporary removing
> +		 * this attachment from the list.
> +		 */
> +		list_del(&attach->node);
I still feel like this is a very fragile hack here, and the only reason is
that we allow multiple mappings of the same attachment to coexist. From my
understanding:

1. import maps
2. exporter wants to move the buffer, calls invalidate
3. importer makes sure all processing has stopped in its ->invalidate
hook, which might involve add more fences to the reservation object
4. exporter does the buffer copy to the new place, involving dma engines
and obeying all the fences in the reservation object
5. importer needs the buffer again, calls map, gets a new location
[ importer now has 2 mappings in-flight, hooray this is fun, especially if
userspace manages your gpu va space and so both need to be mapped in the
same place.]
6. eventually importer retires the old work and finally calls unmap on the
old mapping

This feels like a mess of an api. I'm kinda inclined to require that the
->invalidate hook must call attachment_unmap, and require that exporters
delay the unmapping until all fences are done (they need to do that
anyway, and exporter needs to handle ghost objects anyway so can also hang
a few more sg tables of that I think). Even cleaner interface would be a
dynamic_unmap where you explicitly pass a fence that must be completed
before the attachment is gone.

Irrespective of that api bikeshed I think there's a bigger issue of using
the attachment list for invalidation:
1. importer attaches
2. exporter decides to move, calls invalidate
3. importer gets an ->invalidate for an attachment they haven't even
mapped yet.
-> (probably) boom somewhere, like the recursion you're preventing above
results in tears.

If we go with my suggestion above, which results in there being at most 1
mapping per dynamic dma-buf attachment (plus ghost mappings waiting for a
dma_fence, but they don't count), then all we need is an
attachment->is_mapped flag to filter these out, set in dynamic_map and
cleared in dynamic_unmap. I think that's also much cleaner than your
temporary list_del trickery.

With your overlapping mapping api the tracking gets a bit more tricky, you
need to remember which one is the current mapping so that you don't clear
the ->is_mapped flag when the importer unmaps a ghost mapping.

> +	} else {
> +		r = dma_buf_pin(attach);

Hm, I think this changed compared to older versions, and what we're
defacto doing now with drm prime sgt caching. If you have a dynamic
exporter, but not a dynamic importer, then:
- we will no longer cache the sgt
- pinning will only happen here in attachment_map

Aside from the behaviour change I think this also breaks the locking,
because it forces the reservation_lock into attach_map paths for
non-dynamic importers, which will piss of lockdep.

I think the only option we have is to redefine _is_dynamic as:

dma_buf_attachment_is_dynamic(attach)
{
	attach->buf->ops->pin && attach->ops->invalidate;
}

I.e. only when both importer and exporter are dynamic can we inflict the
reservation_lock onto them in attachment_map/unmap. For all others we need
need to cache the sgt, and if it's a dynamic exporter, pin the buffer
already at attach time.

> +		if (r)
> +			return ERR_PTR(r);
> +	}
> +
> +	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
> +	if (attach->importer_ops && attach->importer_ops->invalidate)
> +		list_add(&attach->node, &attach->dmabuf->attachments);
> +	if (!sg_table)
> +		sg_table = ERR_PTR(-ENOMEM);
> +
> +	if (IS_ERR(sg_table)) {
> +		reservation_object_lock(attach->dmabuf->resv, NULL);
> +		dma_buf_unpin(attach);
> +		reservation_object_unlock(attach->dmabuf->resv);
> +	}
> +
> +	return sg_table;
> +}
> +EXPORT_SYMBOL_GPL(dma_buf_map_attachment_locked);
> +
>  /**
>   * dma_buf_map_attachment - Returns the scatterlist table of the attachment;
>   * mapped into _device_ address space. Is a wrapper for map_dma_buf() of the
> @@ -651,14 +770,42 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
>  	if (attach->sgt)
>  		return attach->sgt;
>  
> -	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
> -	if (!sg_table)
> -		sg_table = ERR_PTR(-ENOMEM);
> +	reservation_object_lock(attach->dmabuf->resv, NULL);
> +	sg_table = dma_buf_map_attachment(attach, direction);
> +	reservation_object_unlock(attach->dmabuf->resv);
>  
>  	return sg_table;
>  }
>  EXPORT_SYMBOL_GPL(dma_buf_map_attachment);
>  
> +/**
> + * dma_buf_unmap_attachment_locked - unmaps the buffer with reservation lock
> + * held, should deallocate the associated scatterlist. Is a wrapper for
> + * unmap_dma_buf() of dma_buf_ops.
> + * @attach:	[in]	attachment to unmap buffer from
> + * @sg_table:	[in]	scatterlist info of the buffer to unmap
> + * @direction:  [in]    direction of DMA transfer
> + *
> + * This unmaps a DMA mapping for @attached obtained by
> + * dma_buf_map_attachment_locked().
> + */
> +void dma_buf_unmap_attachment_locked(struct dma_buf_attachment *attach,
> +				     struct sg_table *sg_table,
> +				     enum dma_data_direction direction)
> +{
> +	might_sleep();
> +	reservation_object_assert_held(attach->dmabuf->resv);
> +
> +	if (attach->sgt == sg_table)
> +		return;
> +
> +	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
> +						direction);
> +	if (!(attach->importer_ops && attach->importer_ops->invalidate))
> +		dma_buf_unpin(attach);
> +}
> +EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment_locked);
> +
>  /**
>   * dma_buf_unmap_attachment - unmaps and decreases usecount of the buffer;might
>   * deallocate the scatterlist associated. Is a wrapper for unmap_dma_buf() of
> @@ -681,11 +828,32 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
>  	if (attach->sgt == sg_table)
>  		return;
>  
> -	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
> -						direction);
> +	reservation_object_lock(attach->dmabuf->resv, NULL);
> +	dma_buf_unmap_attachment_locked(attach, sg_table, direction);
> +	reservation_object_unlock(attach->dmabuf->resv);
>  }
>  EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
>  
> +/**
> + * dma_buf_invalidate_mappings - invalidate all mappings of this dma_buf
> + *
> + * @dmabuf:	[in]	buffer which mappings should be invalidated
> + *
> + * Informs all attachmenst that they need to destroy and recreated all their
> + * mappings.
> + */
> +void dma_buf_invalidate_mappings(struct dma_buf *dmabuf)
> +{
> +	struct dma_buf_attachment *attach;
> +
> +	reservation_object_assert_held(dmabuf->resv);
> +
> +	list_for_each_entry(attach, &dmabuf->attachments, node)
> +		if (attach->importer_ops && attach->importer_ops->invalidate)
> +			attach->importer_ops->invalidate(attach);
> +}
> +EXPORT_SYMBOL_GPL(dma_buf_invalidate_mappings);
> +
>  /**
>   * DOC: cpu access
>   *
> @@ -1098,10 +1266,12 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
>  		seq_puts(s, "\tAttached Devices:\n");
>  		attach_count = 0;
>  
> +		reservation_object_lock(buf_obj->resv, NULL);
>  		list_for_each_entry(attach_obj, &buf_obj->attachments, node) {
>  			seq_printf(s, "\t%s\n", dev_name(attach_obj->dev));
>  			attach_count++;
>  		}
> +		reservation_object_unlock(buf_obj->resv);
>  
>  		seq_printf(s, "Total %d devices attached\n\n",
>  				attach_count);
> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> index 52031fdc75bb..f5681ca15d09 100644
> --- a/include/linux/dma-buf.h
> +++ b/include/linux/dma-buf.h
> @@ -90,14 +90,40 @@ struct dma_buf_ops {
>  	 */
>  	void (*detach)(struct dma_buf *, struct dma_buf_attachment *);
>  
> +	/**
> +	 * @pin:
> +	 *
> +	 * This is called by dma_buf_pin and lets the exporter know that the

dma_buf_pin() so it hyperlinks. Similar everywhere else.

> +	 * DMA-buf can't be moved any more.
> +	 *
> +	 * This is called with the dmabuf->resv object locked.
> +	 *
> +	 * This callback is optional.
> +	 *
> +	 * Returns:
> +	 *
> +	 * 0 on success, negative error code on failure.
> +	 */
> +	int (*pin)(struct dma_buf_attachment *);
> +
> +	/**
> +	 * @unpin:
> +	 *
> +	 * This is called by dma_buf_unpin and lets the exporter know that the
> +	 * DMA-buf can be moved again.
> +	 *
> +	 * This is called with the dmabuf->resv object locked.
> +	 *
> +	 * This callback is optional.
> +	 */
> +	void (*unpin)(struct dma_buf_attachment *);
> +
>  	/**
>  	 * @map_dma_buf:
>  	 *
>  	 * This is called by dma_buf_map_attachment() and is used to map a
>  	 * shared &dma_buf into device address space, and it is mandatory. It
> -	 * can only be called if @attach has been called successfully. This
> -	 * essentially pins the DMA buffer into place, and it cannot be moved
> -	 * any more
> +	 * can only be called if @attach has been called successfully.
>  	 *
>  	 * This call may sleep, e.g. when the backing storage first needs to be
>  	 * allocated, or moved to a location suitable for all currently attached
> @@ -118,6 +144,9 @@ struct dma_buf_ops {
>  	 * any other kind of sharing that the exporter might wish to make
>  	 * available to buffer-users.
>  	 *
> +	 * This is always called with the dmabuf->resv object locked when
> +	 * the pin/unpin callbacks are implemented.
> +	 *
>  	 * Returns:
>  	 *
>  	 * A &sg_table scatter list of or the backing storage of the DMA buffer,
> @@ -135,9 +164,6 @@ struct dma_buf_ops {
>  	 *
>  	 * This is called by dma_buf_unmap_attachment() and should unmap and
>  	 * release the &sg_table allocated in @map_dma_buf, and it is mandatory.
> -	 * It should also unpin the backing storage if this is the last mapping
> -	 * of the DMA buffer, it the exporter supports backing storage
> -	 * migration.
>  	 */
>  	void (*unmap_dma_buf)(struct dma_buf_attachment *,
>  			      struct sg_table *,
> @@ -302,12 +328,43 @@ struct dma_buf {
>  	} cb_excl, cb_shared;
>  };
>  
> +/**
> + * struct dma_buf_attach_ops - importer operations for an attachment
> + * @invalidate: [optional] invalidate all mappings made using this attachment.

@invalidate twice pisses of kerneldoc.

> + *
> + * Attachment operations implemented by the importer.
> + */
> +struct dma_buf_attach_ops {
> +	/**
> +	 * @invalidate:
> +	 *
> +	 * If the invalidate callback is provided the framework can avoid
> +	 * pinning the backing store while mappings exists.
> +	 *
> +	 * This callback is called with the lock of the reservation object
> +	 * associated with the dma_buf held and the mapping function must be
> +	 * called with this lock held as well. This makes sure that no mapping
> +	 * is created concurrently with an ongoing invalidation.
> +	 *
> +	 * After the callback all existing mappings are still valid until all
> +	 * fences in the dma_bufs reservation object are signaled. After getting an
> +	 * invalidation callback all mappings should be destroyed by the importer using
> +	 * the normal dma_buf_unmap_attachment() function as soon as possible.

I think "as soon as possible" is a bit vague. What breaks if we don't do
that? With my suggestion to immediately call unmap (but with a expclit or
implicit fence) would resolve this question much clearer, by offloading
the duty of "unmapping soon enough" to the exporter.

This also matches more how dynamic_map works: the mapping is only valid
once the explicit fence has signaled, not right away. Doing a similar
"unmap is only ok once the fences have signaled" sounds reasonable to me.
Aside: If we pass an explicit fence to dynamic_unamp, would make sense to
receive an explicit fence from dynamic_map. Passing explicit fences around
also feels a notch cleaner given all the headaches we already have with
leaking implicit fences into reservation objects and causing us trouble
there. Explicit dynamic_map/unmap fences would all an exporter to untangle
these two things (atm not possible, just prep for the future).

Aside: For non-dynamic importer the initial pin/sgt caching also needs to
block for that exclusive fence I think, if it's a dynamic exporter. Not
all drivers wait for the exclusive fence (v4l probably more than drm), so
if the exporter is still moving the buffer we might have a problem.

Or is the pin supposed to be synchronous? Would be another thing to
document I guess ...

> +	 *
> +	 * New mappings can be created immediately, but can't be used before the
> +	 * exclusive fence in the dma_bufs reservation object is signaled.

I guess you can't go right ahead and create a new mapping from the
->invalidate hook?

Need to document that, and maybe enforce?

> +	 */
> +	void (*invalidate)(struct dma_buf_attachment *attach);
> +};
> +
>  /**
>   * struct dma_buf_attachment - holds device-buffer attachment data
>   * @dmabuf: buffer for this attachment.
>   * @dev: device attached to the buffer.
>   * @node: list of dma_buf_attachment.
>   * @priv: exporter specific attachment data.
> + * @importer_ops: importer operations for this attachment.
> + * @importer_priv: importer specific attachment data.
>   *
>   * This structure holds the attachment information between the dma_buf buffer
>   * and its user device(s). The list contains one attachment struct per device
> @@ -324,6 +381,9 @@ struct dma_buf_attachment {
>  	struct list_head node;
>  	struct sg_table *sgt;
>  	void *priv;
> +
> +	const struct dma_buf_attach_ops *importer_ops;
> +	void *importer_priv;
>  };
>  
>  /**
> @@ -383,14 +443,19 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
>   */
>  static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf)
>  {
> -	/* Always use a static mapping for now */
> -	return false;
> +	return !!dmabuf->ops->pin;

!! is redundant, compiler does that for you when casting too bool.

>  }
>  
>  struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
> -							struct device *dev);
> +					  struct device *dev);
> +struct dma_buf_attachment *
> +dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
> +		       const struct dma_buf_attach_ops *importer_ops,
> +		       void *importer_priv);
>  void dma_buf_detach(struct dma_buf *dmabuf,
> -				struct dma_buf_attachment *dmabuf_attach);
> +		    struct dma_buf_attachment *attach);
> +int dma_buf_pin(struct dma_buf_attachment *attach);
> +void dma_buf_unpin(struct dma_buf_attachment *attach);
>  
>  struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info);
>  
> @@ -398,10 +463,16 @@ int dma_buf_fd(struct dma_buf *dmabuf, int flags);
>  struct dma_buf *dma_buf_get(int fd);
>  void dma_buf_put(struct dma_buf *dmabuf);
>  
> +struct sg_table *dma_buf_map_attachment_locked(struct dma_buf_attachment *,
> +					       enum dma_data_direction);
>  struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
>  					enum dma_data_direction);
> +void dma_buf_unmap_attachment_locked(struct dma_buf_attachment *,
> +				     struct sg_table *,
> +				     enum dma_data_direction);
>  void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
>  				enum dma_data_direction);
> +void dma_buf_invalidate_mappings(struct dma_buf *dma_buf);
>  int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
>  			     enum dma_data_direction dir);
>  int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
> -- 
> 2.17.1
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


More information about the dri-devel mailing list